I am running the following code, which gives me the tweets that contain the word cat, however at some points, I get an error
The code is:
import tweepy
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
import json
import sentmod as s
ckey= "xxxxx"
csecret="xxxx"
atoken="xxxxx"
asecret="xxxxx"
class listener(StreamListener):
def on_data(self, data):
all_data = json.loads(data)
tweet = all_data["text"]
sentiment_value, confidence = s.sentiment(tweet)
tweet.encode('utf-8', 'ignore')
if "RT" in tweet:
pass
else:
tweets=open("tweets.txt","a",encoding="utf-8")
tweets.write(tweet)
tweets.write('\n')
tweets.write(str(sentiment_value))
tweets.write('\n')
tweets.write(str(confidence))
tweets.write('\n\n\n')
tweets.close()
print(tweet, sentiment_value, confidence)
if confidence*100 >= 60:
output = open("twitter-out.txt","a")
output.write(sentiment_value)
output.write('\n')
output.close()
return True
def on_error(self, status):
print(status)
auth = OAuthHandler(ckey, csecret)
auth.set_access_token(atoken, asecret)
twitterStream = Stream(auth, listener())
twitterStream.filter(track=['Cat'],languages=['en'])
I get the following error after a few tweets:
print(tweet, sentiment_value, confidence)
UnicodeEncodeError: 'UCS-2' codec can't encode characters in position 44-44: Non-BMP character not supported in Tk
What I have tried:
decoding and encoding with utf-8 and utf-16, but did not work