Stream Tweets in MongoDB with Node.JS
Suppose we want store al our “mongodb” tweets in a MongoDB database.
We need 2 additional node packages:
1) ntwitter (Asynchronous Twitter REST/stream/search client API for Node.js)
2) mongodb (A Node.js driver for MongoDB). Of course there are more MongoDB drivers.
Create a Node.js project “twitterstream” and add the 2 packages with the following commands:
$ npm install ntwitter $ npm install mongodb
We need an existing twitter account and make a credential file for example credentials.js.
var credentials = { consumer_key: '3h7ryXnH209mHNWvTgon5A', consumer_secret: 'tD5OdqXw1qbDMrFbrtPIRRl4fEyUsKFXT2kZLQaMpVA', access_token_key: '474665342-wuRquALXNQZPYABUiOnXCmVSxyU2LIinV6VwpWMW', access_token_secret: 'k01HuXdl8umwt5rZcDDk0OgQJbhkiFlPv2dCAmHXQ' }; module.exports = credentials;
And now we create the main file twitter.js with the following code:
var twitter = require('ntwitter'); var credentials = require('./credentials.js'); var t = new twitter({ consumer_key: credentials.consumer_key, consumer_secret: credentials.consumer_secret, access_token_key: credentials.access_token_key, access_token_secret: credentials.access_token_secret }); var mongo = require('mongodb'); var Server = mongo.Server, Db = mongo.Db, assert = require('assert') BSON = mongo.BSONPure; var server = new Server('localhost', 27017, {auto_reconnect: true}); db = new Db('twitterstream', server); // open db db.open(function(err, db) { assert.equal(null, err); t.stream( 'statuses/filter', { track: ['mongodb'] }, function(stream) { stream.on('data', function(tweet) { db.collection('streamadams', function(err, collection) { collection.insert({'tweet': tweet.text, {safe:true} , function(err, result) {}); }); }); } ); });
Simply start the twitter.js with:
$ node twitter.js
Succes with Node.JS and MongoDB!
How to Fetch RSS feeds into MongoDB with Groovy
Suppose we will fetch some Amazon AWS news into a MongoDB database. These few lines made it possible with the use of Groovy and the Gmongo module:
// To download GMongo on the fly and put it at classpath @Grab(group='com.gmongo', module='gmongo', version='1.0') import com.gmongo.GMongo // Instantiate a com.gmongo.GMongo object instead of com.mongodb.Mongo // The same constructors and methods are available here def mongo = new GMongo("127.0.0.1", 27017) // Get a db reference def db = mongo.getDB("amazonnews") // Give the url of the RSS feed def url = "http://aws.amazon.com/rss/whats-new.rss" // Parse the url with famous XML Slurper def rss = new XmlSlurper().parse(url) // Write the title and link into the news collection rss.channel.item.each { db.news.insert([title: "${it.title}", link: "${it.link}"]) }
Connect to the MongoDB database if there some documents:
> use amazonnews switched to db amazonnews > db.news.find({}) { "_id" : ObjectId("519a2e980364e3901f41827d"), "title" : "Amazon Elastic Transcoder Announces Seven New Enhancements, Including HLS Support", "link" : "http://aws.amazon.com/about-aws/whats-new/2013/05/16/amazon-elastic-transcoder-announces-seven-new-features/" } { "_id" : ObjectId("519a2e980364e3901f41827e"), "title" : "Amazon DynamoDB Announces Parallel Scan and Lower-Cost Reads", "link" : "http://aws.amazon.com/about-aws/whats-new/2013/05/15/dynamodb-announces-parallel-scan-and-lower-cost-reads/" } { "_id" : ObjectId("519a2e990364e3901f41827f"), "title" : "AWS Management Console in AWS GovCloud (US) adds support for Amazon SWF", "link" : "http://aws.amazon.com/about-aws/whats-new/2013/05/14/aws-management-console-in-aws-govcloud-us-adds-support-for-amazon-swf/" } { "_id" : ObjectId("519a2e990364e3901f418280"), "title" : "AWS OpsWorks launches Amazon CloudWatch metrics view", "link" : "http://aws.amazon.com/about-aws/whats-new/2013/05/14/aws-opsworks-cloudwatch-view/" } { "_id" : ObjectId("519a2e990364e3901f418281"), "title" : "AWS OpsWorks supports Elastic Load Balancing", "link" : "http://aws.amazon.com/about-aws/whats-new/2013/05/14/aws-opsworks-supports-elb/" } { "_id" : ObjectId("519a2e990364e3901f418282"), "title" : "AWS Direct Connect location in Seattle and access to AWS GovCloud (US) now available", "link" : "http://aws.amazon.com/about-aws/whats-new/2013/05/08/aws-direct-connect-location-seattle-and-access-govcloud/" } { "_id" : ObjectId("519a2e990364e3901f418283"), "title" : "Announcing AWS Management Pack for Microsoft System Center ", "link" : "http://aws.amazon.com/about-aws/whats-new/2013/05/08/aws-management-pack-for-microsoft-system-center-2012/" } { "_id" : ObjectId("519a2e990364e3901f418284"), "title" : "Raising the bar: Amazon announces 4,000 IOPS per EBS Volume and Provisioned IOPS products on AWS Marketplace", "link" : "http://aws.amazon.com/about-aws/whats-new/2013/05/07/announcing-4000-iops-per-piops-volume-and-marketplace-support/" } { "_id" : ObjectId("519a2e990364e3901f418285"), "title" : "Announcing General Availability of the AWS SDK for Node.js", "link" : "http://aws.amazon.com/about-aws/whats-new/2013/05/06/announcing-general-availability-of-the-aws-sdk-for-node-js/" } { "_id" : ObjectId("519a2e990364e3901f418286"), "title" : "Amazon Elastic MapReduce (EMR) now supports S3 Server Side Encryption", "link" : "http://aws.amazon.com/about-aws/whats-new/2013/05/01/amazon-elastic-mapreduce-now-supports-S3-server-side-encryption/" }
Works!
More info:
Groovy http://groovy.codehaus.org/
Gmongo https://github.com/poiati/gmongo
Implement MongoDB replication in 3 simple steps
After we find out how replication works with MySQL lets look at mongoDB
Use the following steps to implement mongoDB Replication:
1) Create the data directories
2) Create the replication set and instances
3) Configure, primary, secundaries and an arbiter
Donwload MongoDB? Goto the Download site
Step 1) Create the data directories
Start by creating a data directory for each replica set member, one for the primary and one for the secundary. We add also an arbiter. The arbiter does not relpicate data, but choose a new primary in case there is an outage of the existing primary.
mkdir /data/node1 mkdir /data/node2 mkdir /data/arbiter
Step 2) Create the replication set and instances
Next, start each member as a separate mongod. Since you’ll be running each process on the same machine, it’s probably easiest to start each mongod in a separate terminal window:
mongod --replSet person --dbpath /data/node1 --port 40001 mongod --replSet person --dbpath /data/node2 --port 40002 mongod --replSet person --dbpath /data/arbiter --port 40003
Step 3) Configure, primary, secundaries and an arbiter
Logon on the primary node to proceed, you need to configure the replica set, because if you examine the mongod log output, the first thing you’ll notice are error messages saying that the configuration can’t be found.
mongo localhost:40001 MongoDB shell version: 2.2.0 connecting to: localhost:40001/test
> rs.initiate() { "info2" : "no configuration explicitly specified -- making one", "me" : "Computername.local:40001", "info" : "Config now saved locally. Should come online in about a minute.", "ok" : 1 }
Now connect again to the primary node, and add the secondary node including the arbiter node:
person:PRIMARY> rs.add(Computername:40002) { "ok" : 1 } person:PRIMARY> rs.add(Computername:40003, {arbiterOnly:true}) { "ok" : 1 }
Check if the configuration is ok, with rs.status():
person:PRIMARY> rs.status() { "set" : "person", "date" : ISODate("2012-10-28T19:50:52Z"), "myState" : 1, "members" : [ { "_id" : 0, "name" : "Computername.local:40001", "health" : 1, "state" : 1, "stateStr" : "PRIMARY", "uptime" : 1266, "optime" : Timestamp(1351453811000, 1), "optimeDate" : ISODate("2012-10-28T19:50:11Z"), "self" : true }, { "_id" : 1, "name" : "Computername.local:40002", "health" : 1, "state" : 2, "stateStr" : "SECONDARY", "uptime" : 41, "optime" : Timestamp(1351453811000, 1), "optimeDate" : ISODate("2012-10-28T19:50:11Z"), "lastHeartbeat" : ISODate("2012-10-28T19:50:51Z"), "pingMs" : 0 } ], "ok" : 1 } { "_id" : 1, "name" : "Computername.local:40003", "health" : 1, "state" : 3, "stateStr" : "ARBITER", "uptime" : 14, "optime" : Timestamp(1351453811000, 1), "optimeDate" : ISODate("2012-10-28T19:50:11Z"), "lastHeartbeat" : ISODate("2012-10-28T19:50:51Z"), "pingMs" : 0 } ], "ok" : 1 }
And now its time to check if it works. We put a person in our primary database:
person:PRIMARY> use portraitGallery switched to db portraitGallery person:PRIMARY> db.person.save( { "name" : "Maikel", "group" : [ "Oracle", "ExaData", "Big Data"], } )
Logon on the secondary and check if the data is there, and don’t forget to enable reading with rs.slaveOk() or db.getMongo().setSlaveOk()
mongo localhost:40002 MongoDB shell version: 2.2.0 connecting to: localhost:40002/test person:SECONDARY> rs.slaveOk() person:SECONDARY> use portraitGallery switched to db portraitGallery person:SECONDARY> db.person.find() { "_id" : ObjectId("508d971dda0730903bcbb612"), "name" : "Maikel", "group" : [ "Oracle", "ExaData", "Big Data" ] }
Now we can test it with a filler script. Type in the primary something like:
person:PRIMARY> for(i=0; i<1000000; i++) { db.person.save({person: i}); }
And in the secondary check if the collection is filled:
person:SECONDARY> db.person.find() { "_id" : ObjectId("508f95e9e38917f43ae20db3"), "person" : 0 } { "_id" : ObjectId("508f95e9e38917f43ae20db4"), "person" : 1 } { "_id" : ObjectId("508f95e9e38917f43ae20db5"), "person" : 2 } { "_id" : ObjectId("508f95e9e38917f43ae20db6"), "person" : 3 } { "_id" : ObjectId("508f95e9e38917f43ae20db7"), "person" : 4 } { "_id" : ObjectId("508f95e9e38917f43ae20db8"), "person" : 5 } { "_id" : ObjectId("508f95e9e38917f43ae20db9"), "person" : 6 } { "_id" : ObjectId("508f95e9e38917f43ae20dba"), "person" : 7 } { "_id" : ObjectId("508f95e9e38917f43ae20dbb"), "person" : 8 } { "_id" : ObjectId("508f95e9e38917f43ae20dbc"), "person" : 9 } { "_id" : ObjectId("508f95e9e38917f43ae20dbd"), "person" : 10 } { "_id" : ObjectId("508f95e9e38917f43ae20dbe"), "person" : 11 } { "_id" : ObjectId("508f95e9e38917f43ae20dbf"), "person" : 12 } { "_id" : ObjectId("508f95e9e38917f43ae20dc0"), "person" : 13 } { "_id" : ObjectId("508f95e9e38917f43ae20dc1"), "person" : 14 } { "_id" : ObjectId("508f95e9e38917f43ae20dc2"), "person" : 15 } { "_id" : ObjectId("508f95e9e38917f43ae20dc3"), "person" : 16 } { "_id" : ObjectId("508f95e9e38917f43ae20dc4"), "person" : 17 } { "_id" : ObjectId("508f95e9e38917f43ae20dc5"), "person" : 18 } { "_id" : ObjectId("508f95e9e38917f43ae20dc6"), "person" : 19 } Type "it" for more person:SECONDARY> db.person.count() 194079 person:SECONDARY> db.person.count() 215657 person:SECONDARY> db.person.count() 228488 person:SECONDARY> db.person.count() 239528 person:SECONDARY>
Works, succes with mongoDB!!!
If you wan to do the mongoDB intro lab goto mongodb.info