python code
Please email at: mamoon@quake.vc or mik279@nyu.edu for questions about how to make modifications.
primary code credit: @ZeroVariance twitter handle.
TeamStation AI System Report LATAM IT Salaries 2024
Twitter sentiment analysis for cryptoassets
1. from urllib.requestimport urlopen
from bs4 import BeautifulSoup as soup
def getTwitterHandles():
# Fill in with url of page which is to be scraped
url = "https://cryptoweekly.co/100/"
# Retreives and parses page html
client = urlopen(url)
pageHtml = client.read()
pageSoup = soup(pageHtml,"html.parser")
# Adds all Twitter handles to twitterHandles list
profiles = pageSoup.findAll("div",{"class":"testimonial-wrapper"})
twitterHandles = []
for person in profiles:
twitterHandles.append(person.findAll("div",{"class":"author"}))
for i in range(len(twitterHandles)):
twitterHandles[i]=twitterHandles[i][0].findAll("a")[0].text[1:]
client.close()
return twitterHandles
if __name__ == '__main__':
getTwitterHandles()
2. # Modified from: https://gist.github.com/yanofsky/5436496
import tweepy #https://github.com/tweepy/tweepy
import csv
import sys
from getTwitterHandles import getTwitterHandles
# Twitter API credentials (expired,don't even try it)
consumer_key= ""
consumer_secret = ""
access_key = ""
access_secret = ""
def get_all_tweets(screen_name):
print("Gettingtweets from @" + str(screen_name))
#Twitter only allows access to a users most recent 3240 tweets with this method
#authorize twitter,initialize tweepy
auth = tweepy.OAuthHandler(consumer_key,consumer_secret)
auth.set_access_token(access_key,access_secret)
api = tweepy.API(auth)
#initialize a list to hold all the tweepy Tweets
3. alltweets = []
#make initial request for most recent tweets (200 is the maximum allowed count)
new_tweets = api.user_timeline(screen_name= screen_name,count=200)
#save most recent tweets
alltweets.extend(new_tweets)
#save the id of the oldest tweet less one
oldest = alltweets[-1].id - 1
#keep grabbingtweets until there are no tweets left to grab
while len(new_tweets)> 0:
print ("Gettingtweets before %s" % (oldest))
#all subsiquent requestsuse the max_id param to prevent duplicates
new_tweets = api.user_timeline(screen_name=
screen_name,count=200,max_id=oldest)
#save most recent tweets
alltweets.extend(new_tweets)
#update the id of the oldest tweet less one
oldest = alltweets[-1].id - 1
print ("...%s tweets downloadedso far" % (len(alltweets)))
#transform the tweepy tweets into a 2D array that will populate the csv
outtweets = [[tweet.id_str, tweet.created_at,tweet.text]for tweet in alltweets]
4. #write the csv
with open('./Tweets/%s_tweets.csv'% screen_name, 'w') as f:
writer = csv.writer(f)
writer.writerow(["id","created_at","text"])
writer.writerows(outtweets)
pass
if __name__ == '__main__':
handles = getTwitterHandles()
for handle in handles:
get_all_tweets(str(handle))
# Import modules,set styles
from helperScripts import *
import csv
import pandas as pd
import matplotlib.pyplot as plt
import numpyas np
import pickle
%matplotlib inline
plt.style.use('fivethirtyeight')
# Creates a datframe with columns:|Name|Twitter Handle|Path To Tweets|
5. handleNameDict = pickle.load(open("handleNamePair.pickle", "rb"))
arrayRep = np.array(list(handleNameDict.items()))
df = pd.DataFrame(arrayRep)
df = df.rename(columns={0:"Name", 1:"Twitter Handle"})
pathToTweets = []
for person in np.array(df["TwitterHandle"]):
pathToTweets.append("./Tweets/"+str(person)+"_tweets.csv")
df["Path To Tweets"] = pathToTweets
# Measures animositytowards Bitcoin Cash (0to 1, 0=low animosity,1=high animosity)
def bchAnimosityIndex(df):
# Creates search terms and counters
bitcoinCashsearch1= "bitcoincash"
bitcoinCashsearch2= "bitcoin cash"
bcashSearch = "bcash"
bitcoinCashCounter=0
bcashCounter=0
# Iterates over all tweets
for i in df["text"]:
# Increments bitcoin cash
if bitcoinCashsearch1in i.lower():
bitcoinCashCounter+=1
# Increments bitcoin cash
6. elif bitcoinCashsearch2in i.lower():
bitcoinCashCounter+=1
# Increments bcash
elif bcashSearch in i.lower():
bcashCounter+=1
# Calculates total # of mentions ofBCH
totalMentions = bcashCounter+bitcoinCashCounter
# If individualhas mentionedBCH,calculates animosityindex value
if totalMentions!=0:
index = bcashCounter/totalMentions
# If individualhasn't mentioned BCH,sets animosityindex value to zero
else:
index = 0
return [index,totalMentions]
# Adds columns to dataframe:|BCH Animosity|BCHMentions|
bchData = []
for i in df.iterrows():
temp = pd.read_csv(i[1][2])
bchData.append(bchAnimosityIndex(temp))
indexValue = []
bchMentions = []
7. for d in bchData:
indexValue.append(d[0])
bchMentions.append(d[1])
df["BCH Animosity"]= indexValue
df["BCH Mentions"]= bchMentions
ranked = df.sort_values("BCHAnimosity",ascending=False)
topTen = pd.DataFrame()
# Fills out Rank column
num = np.array([1,2,3,4,5,6,7,8,9,10])
topTen["Rank"]= num
# Fills out BCH AnimosityScore column
topTen["BCHAnimosityScore"]= np.array(ranked[ranked["BCH
Mentions"]>30].head(n=10)["BCHAnimosity"])
# Fills out Name column
topTen["Name"]= np.array(ranked[ranked["BCHMentions"]>30].head(n=10)["Name"])
# Fills out Twitter Handle column
topTen["Twitter Handle"]= np.array(ranked[ranked["BCHMentions"]>30].head(n=10)["Twitter
Handle"])