In my previous article, I showed you how to analyse the sentiment of tweets using Node.js. This time, we're going to shift our focus to news media - specifically, the Reuters news feed. Why? Because analyzing news sentiment can give us fascinating insights into media bias, trends, and public perception of current events.
Getting started
Before we dive into the code, you'll need to install a few dependencies:
npm install sentiment feedparser request moment --save
sentiment
: The same sentiment analysis library we used in the previous articlefeedparser
: A library to parse RSS feedsrequest
: To fetch the feeds from Reutersmoment
: For formatting dates and times
Parsing the Reuters RSS feeds
Reuters organizes its news into different categories via RSS feeds. For this example, we'll look at categories like 'businessNews', 'technologyNews', and 'worldNews' to get a varied analysis.
First, let's create a function to fetch and parse the RSS feeds:
var request = require('request'),
FeedParser = require('feedparser'),
sentiment = require('sentiment'),
moment = require('moment');
/**
* Fetch and parse Reuters RSS feed
* @param {String} category The news category to analyze
* @param {Function} callback Function to call with results
*/
function fetchReutersFeed(category, callback) {
var req = request('http://feeds.reuters.com/reuters/' + category);
var feedparser = new FeedParser();
var items = [];
req.on('error', function(error) {
console.error('Request error:', error);
callback([]);
});
req.on('response', function(res) {
if (res.statusCode !== 200) {
this.emit('error', new Error('Bad status code'));
return;
}
this.pipe(feedparser);
});
feedparser.on('error', function(error) {
console.error('Feedparser error:', error);
callback([]);
});
feedparser.on('readable', function() {
var item;
while (item = this.read()) {
items.push(item);
}
});
feedparser.on('end', function() {
callback(items);
});
}
This function makes a request to the Reuters RSS feed for a specific category, parses the feed, and collects all the news items in an array.
Analysing the sentiment
Now that we have our news items, we need to analyze their sentiment. Let's create a function for this:
/**
* Analyze sentiment of news items
* @param {Array} items Array of news items
* @return {Object} Sentiment analysis results
*/
function analyzeNewsSentiment(items) {
var results = {
totalScore: 0,
averageScore: 0,
positiveCount: 0,
negativeCount: 0,
neutralCount: 0,
items: []
};
items.forEach(function(item) {
// Analyze both the title and description
var titleAnalysis = sentiment(item.title);
var descAnalysis = sentiment(item.description);
// Combined score
var score = titleAnalysis.score + descAnalysis.score;
// Determine sentiment category
var category = 'neutral';
if (score > 0) {
results.positiveCount++;
category = 'positive';
} else if (score < 0) {
results.negativeCount++;
category = 'negative';
} else {
results.neutralCount++;
}
results.totalScore += score;
// Store detailed analysis for this item
results.items.push({
title: item.title,
link: item.link,
pubDate: moment(item.pubDate).format('MMMM Do YYYY, h:mm:ss a'),
score: score,
category: category,
titleScore: titleAnalysis.score,
descriptionScore: descAnalysis.score
});
});
// Calculate average sentiment score
if (items.length > 0) {
results.averageScore = results.totalScore / items.length;
}
return results;
}
This function calculates an overall sentiment score for each news item by analyzing both the title and description. It then categorizes each item as positive, negative, or neutral based on its score, and calculates aggregate statistics.
Putting it all together
Now let's create a function to fetch and analyze sentiment for multiple news categories:
/**
* Analyze sentiment across multiple news categories
* @param {Array} categories Array of news categories to analyze
* @param {Function} callback Function to call with results
*/
function analyzeMultipleCategories(categories, callback) {
var results = {};
var completed = 0;
// Handle the case where categories is empty
if (categories.length === 0) {
return process.nextTick(function() {
callback(results);
});
}
categories.forEach(function(category) {
fetchReutersFeed(category, function(items) {
results[category] = analyzeNewsSentiment(items);
completed++;
if (completed === categories.length) {
callback(results);
}
});
});
}
This function processes multiple categories and collects the results in a single object. Let's use it to analyze a few different news categories:
// Define the categories we want to analyze
var categories = ['businessNews', 'technologyNews', 'worldNews', 'healthNews'];
// Analyze all categories
analyzeMultipleCategories(categories, function(results) {
// Log summary results
for (var category in results) {
var data = results[category];
console.log('\n---------------------------------------');
console.log('Category:', category);
console.log('---------------------------------------');
console.log('Total articles:', data.items.length);
console.log('Average sentiment score:', data.averageScore.toFixed(2));
console.log('Positive articles:', data.positiveCount,
'(' + ((data.positiveCount / data.items.length) * 100).toFixed(1) + '%)');
console.log('Negative articles:', data.negativeCount,
'(' + ((data.negativeCount / data.items.length) * 100).toFixed(1) + '%)');
console.log('Neutral articles:', data.neutralCount,
'(' + ((data.neutralCount / data.items.length) * 100).toFixed(1) + '%)');
// Display the most positive and most negative articles
data.items.sort(function(a, b) {
return b.score - a.score;
});
if (data.items.length > 0) {
console.log('\nMost positive article:');
console.log('Title:', data.items[0].title);
console.log('Score:', data.items[0].score);
console.log('Link:', data.items[0].link);
console.log('\nMost negative article:');
var lastIndex = data.items.length - 1;
console.log('Title:', data.items[lastIndex].title);
console.log('Score:', data.items[lastIndex].score);
console.log('Link:', data.items[lastIndex].link);
}
}
});
Run the analysis with node reuters-sentiment.js
This code logs a summary of the sentiment analysis for each category, including the average sentiment score, the distribution of positive, negative, and neutral articles, and the most positive and most negative articles.
Final thoughts
This sentiment analysis approach can provide interesting insights into how different news topics are being presented. You might find, for example, that technology news tends to be more positive than world news, or that business news fluctuates with market conditions.
You could extend this project in many ways:
- Track sentiment over time to see how it changes
- Compare sentiment across different news sources
- Use more sophisticated NLP techniques for deeper analysis
- Store the results in a database for longer-term trend analysis
Sentiment analysis isn't perfect. It doesn't understand context, sarcasm, or nuance in the way people do. But it can still give you a broad overview of general sentiment trends across different news categories.
What did you think of this tutorial? Please let me know in the comments :)