0
트위터 데이터를 스크래핑하려고하지만이 오류가 발생합니다. 어떻게 수정합니까? 인터넷을 통해 몇 가지 검색어를 언급했지만 프로그램과 관련이 없습니다.TweepError : JSON 페이로드를 구문 분석하지 못했습니다.
코드 : 내가 어떤 오류없이 가능한 일 코드를 변경에 대한 도움말을 추구하고
import tweepy
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
import pandas as pd
import json
import csv
import sys
import time
reload(sys)
sys.setdefaultencoding('utf8')
ckey = 'abc'
csecret = 'abc'
atoken = 'abc'
asecret = 'abc'
OAUTH_KEYS = {'consumer_key':ckey, 'consumer_secret':csecret, 'access_token_key':atoken, 'access_token_secret':asecret}
auth = tweepy.OAuthHandler(OAUTH_KEYS['consumer_key'], OAUTH_KEYS['consumer_secret'])
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
if (not api):
print ("Can't Authenticate")
sys.exit(-1)
else:
print " Scraping data now" # Enter latitude & longitude and then radius in Kms q='hello'geocode="19.9974533,73.7898023,1000km"
cursor = tweepy.Cursor(api.search,q='olympics',since='2016-08-18',until='2016-08-19',lang='en',count=1000)
results=[]
for item in cursor.items(10000): # Remove the limit to 1000
results.append(item)
def toDataFrame(tweets):
# COnvert to data frame
DataSet = pd.DataFrame()
DataSet['tweetID'] = [tweet.id for tweet in tweets]
DataSet['tweetText'] = [tweet.text.encode('utf-8') for tweet in tweets]
DataSet['tweetRetweetCt'] = [tweet.retweet_count for tweet in tweets]
DataSet['tweetFavoriteCt'] = [tweet.favorite_count for tweet in tweets]
DataSet['tweetSource'] = [tweet.source for tweet in tweets]
DataSet['tweetCreated'] = [tweet.created_at for tweet in tweets]
DataSet['userID'] = [tweet.user.id for tweet in tweets]
DataSet['userScreen'] = [tweet.user.screen_name for tweet in tweets]
DataSet['userName'] = [tweet.user.name for tweet in tweets]
DataSet['userCreateDt'] = [tweet.user.created_at for tweet in tweets]
DataSet['userDesc'] = [tweet.user.description for tweet in tweets]
DataSet['userFollowerCt'] = [tweet.user.followers_count for tweet in tweets]
DataSet['userFriendsCt'] = [tweet.user.friends_count for tweet in tweets]
DataSet['userLocation'] = [tweet.user.location for tweet in tweets]
DataSet['userTimezone'] = [tweet.user.time_zone for tweet in tweets]
DataSet['Coordinates'] = [tweet.coordinates for tweet in tweets]
DataSet['GeoEnabled'] = [tweet.user.geo_enabled for tweet in tweets]
DataSet['Language'] = [tweet.user.lang for tweet in tweets]
tweets_place= []
#users_retweeted = []
for tweet in tweets:
if tweet.place:
tweets_place.append(tweet.place.full_name)
else:
tweets_place.append('null')
DataSet['TweetPlace'] = [i for i in tweets_place]
#DataSet['UserWhoRetweeted'] = [i for i in users_retweeted]
return DataSet
print "started writing the output"
DataSet = toDataFrame(results)
DataSet.to_csv('olympics_18_8.csv',index=False)
print "Download Completed"
. tweepy가 여기에 문제가 있다면 tweethyon을 사용할 수 있습니까? 그렇다면 어떻게하면 코드를 변경하여 오류를 피하고 덤프를 다운로드 할 수 있습니까?
오류 :
Traceback (most recent call last):
File "Scrape_lat_lon.py", line 30, in <module>
for item in cursor.items(10000): # Remove the limit to 1000
File "/usr/local/lib/python2.7/dist-packages/tweepy/cursor.py", line 197, in next
self.current_page = self.page_iterator.next()
File "/usr/local/lib/python2.7/dist-packages/tweepy/cursor.py", line 117, in next
model = ModelParser().parse(self.method(create=True), data)
File "/usr/local/lib/python2.7/dist-packages/tweepy/parsers.py", line 95, in parse
json = JSONParser.parse(self, method, payload)
File "/usr/local/lib/python2.7/dist-packages/tweepy/parsers.py", line 54, in parse
raise TweepError('Failed to parse JSON payload: %s' % e)
tweepy.error.TweepError: Failed to parse JSON payload: Unterminated string starting at: line 1 column 467050 (char 467049)
이 사전에 도움을 주셔서 감사합니다.
게시물에 오류 로그를 추가 할 수 있습니까? –
@KostasPelelis 업데이트 –