Skip to content

Commit

Permalink
Added scraper and updated readme
Browse files Browse the repository at this point in the history
  • Loading branch information
avidLearnerInProgress committed May 11, 2018
1 parent 4bcc841 commit aea09f4
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 7 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,5 @@
28. Ecommerce Scraper: Scrapes product data from ecommerce websites and displays it to user in CLI.
29. Lyrics Scraper: Scrape lyrics from atozlyrics website by specifying artist name.
30. Walmart Scraper: Scrape data from walmart website and store it in database using MySQLdb.
31. Twitter Scraper: Scrapes tweets from popular hashtags and saves them to csv file

Binary file added twitter-scraper/myfile.csv
Binary file not shown.
14 changes: 7 additions & 7 deletions twitter-scraper/twitter_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,19 @@
#This code is using AppAuthHandler, not OAuthHandler to get higher limits, 2.5 times.
auth = tweepy.AppAuthHandler('j2UAZfXuk6iitAjnLjbFcmn0y', 'Q9X7g4eAhyElO8u5VI183QwRCUF1sXrZs8m9poGt6Q1pmN4cOw')
api = tweepy.API(auth, wait_on_rate_limit=True,
wait_on_rate_limit_notify=True)
wait_on_rate_limit_notify=True)


if (not api):
print ("Can't Authenticate")
sys.exit(-1)
def clean(val):
clean = ""
if val:
clean = val.encode('utf-8')
return clean
clean = ""
if val:
clean = val.encode('utf-8')
return clean

searchQuery = '' #This is for your hasthag(s), separate by comma
searchQuery = '#techsytalk' #This is for your hasthag(s), separate by comma
maxTweets = 80000 # Large max nr
tweetsPerQry = 100 # the max the API permits
fName = 'myfile.csv' #The CSV file where your tweets will be stored
Expand Down Expand Up @@ -62,7 +62,7 @@ def clean(val):
print("No more tweets found")
break
for tweet in new_tweets:
csvwriter.writerow([tweet.created_at, clean(tweet.user.screen_name), clean(tweet.text), tweet.user.created_at, tweet.user.followers_count, tweet.user.friends_count, tweet.user.statuses_count, clean(tweet.user.location), tweet.user.geo_enabled, tweet.user.lang, clean(tweet.user.time_zone), tweet.retweet_count]);
csvwriter.writerow([tweet.created_at, clean(tweet.user.screen_name), clean(tweet.text), tweet.user.created_at, tweet.user.followers_count, tweet.user.friends_count, tweet.user.statuses_count, clean(tweet.user.location), tweet.user.geo_enabled, tweet.user.lang, clean(tweet.user.time_zone), tweet.retweet_count]);

tweetCount += len(new_tweets)
#print("Downloaded {0} tweets".format(tweetCount))
Expand Down

0 comments on commit aea09f4

Please sign in to comment.