Sentiment analysis using NLTK SentimentIntensityAnalyzer and NRC Lexicon
Mohamad's interest is in Programming (Mobile, Web, Database and Machine Learning). He is studying at the Center For Artificial Intelligence Technology (CAIT), Universiti Kebangsaan Malaysia (UKM).
Download lexicon:
# Make data directory if it doesn't exist
!mkdir -p data
!wget -nc https://nyc3.digitaloceanspaces.com/ml-files-distro/v1/upshot-trump-emolex/data/NRC-emotion-lexicon-wordlevel-alphabetized-v0.92.txt -P data
Define processing task:
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
#nltk.download('vader_lexicon')
def load_emolex_lexicon():
emolex_lexicon = {}
with open('/content/data/NRC-emotion-lexicon-wordlevel-alphabetized-v0.92.txt', 'r') as file:
for line in file:
line = line.strip()
if line:
try:
word, emotion, value = line.split('\t')
if emotion == 'positive' and float(value) > 0.0:
emolex_lexicon[word] = 1
elif emotion == 'negative' and float(value) > 0.0:
emolex_lexicon[word] = -1
except ValueError:
pass
return emolex_lexicon
def analyze_sentiment(text):
sia = SentimentIntensityAnalyzer()
sentiment_scores = sia.polarity_scores(text)
emolex_lexicon = load_emolex_lexicon()
emolex_scores = {
'pos': sum(sentiment_scores[word] for word in sentiment_scores if word in emolex_lexicon and sentiment_scores[word] > 0),
'neg': sum(sentiment_scores[word] for word in sentiment_scores if word in emolex_lexicon and sentiment_scores[word] < 0),
'neu': sum(sentiment_scores[word] for word in sentiment_scores if word not in emolex_lexicon),
'compound': sentiment_scores['compound']
}
return emolex_scores
text = "This is a great day!"
scores = analyze_sentiment(text)
print(scores)

![Text Representation Basics for Natural Language Processing [Interactive Simulation]](/_next/image?url=https%3A%2F%2Fcdn.hashnode.com%2Fres%2Fhashnode%2Fimage%2Fupload%2Fv1769996097089%2Fbe2ca449-7145-4a87-ae0b-c1ccd57960f2.webp&w=3840&q=75)
