To Infinity & Beyond!
   Protocols & Lazy Sequences in Node
      Part Deux – Sh*t Just Got Real

Bahul Neel Upadhyaya (@bahulneel)
BraveNewTalent
http://github.com/bahulneel
http://www.bravenewtalent.com
cosy.lang
    Library                           Protocols
●   Protocols                     ●   ISeq
●   Sequences Lazy & Async        ●   ISync
●   Argument length dispatch      ●   IStream
●   Tail recursion                ●   IPromise
●   Object Identity
●   Object Metadata
                npm install cosy-lang
An Example (TF-IDF)



“Tf–idf, term frequency–inverse document frequency,
is a numerical statistic which reflects how important a word is to a
document in a collection or corpus. It is often used as a weighting
factor in information retrieval and text mining.”
                                                         - Wikipedia
Words
function stripWord(word) {
    return word.replace(/[^-a-zA-Z_0-9]+/, '').toLowerCase();
}
function isWord(word) {
    return /^[-a-zA-Z_0-9]+$/.exec(word)
}
function words(string) {
    return vec(filter(isWord, map(stripWord, string.split(/ /))));
}
Term Frequencies
function tf(words) {
    var max = 0, counts, word;
    function countFeq(counts, word) {
        var newCounts = clone(counts);
        if ('undefined' === typeof newCounts[word]) newCounts[word] = 0;
        newCounts[word] += 1;
        if (newCounts[word] > max) max = newCounts[word];
        return newCounts;
    }
    counts = reduce(countFeq, {}, words);
    if (max) {
        for (word in counts) {
            if (counts.hasOwnProperty(word)) counts[word] /= max;
        }
    }
    return counts;
}
Inverse Document Frequency
idf = fn$({
      1: function (terms) {
           return idf({}, 1, terms);
      },
      3: function (freq, docCount, terms) {
           if (null === first(terms)) return null;
           function calcIdf(terms) {
               var docFreq, invDocFreq = {}, word;
               docFreq = merge(freq, first(terms));
               for (word in docFreq) {
                   if (docFreq.hasOwnProperty(word)) invDocFreq[word] = docCount/(1+docFreq[word]);
               }
               return cons(invDocFreq, idf(docFreq, docCount + 1, rest(terms)));
           }
           return lazy(terms, calcIdf);
      }
});
TF-IDF
function tfIdf(documents) {
    var theWords, terms, freq;
    terms = map(tf, map(words, documents));
    freq = idf(terms);
    function calcTfIdf(tf, idf) {
        var word, tfIdf = {};
        for (word in tf) {
            if (tf.hasOwnProperty(word)) tfIdf[word] = tf[word] * idf[word];
        }
        return tfIdf;
    }
    return map(calcTfIdf, terms, freq);
}
Making Sequences Asyncronous
    Source                          Sink
●   Takes an ISeq & ISync as    ●   Takes an IStream as it's
    it's argument                   argument
●   Extends IStream             ●   Extends ISeq & ISync
●   Registers a tick callback   ●   First returns stream.skip
    using the ISync interface       until stream emits
●   Emits first element when
                                ●   Calls tick callback when
    callback is called              stream emits
Socket IO - Server
lang.protocol.extend(lang.stream.IStream, socketServer.Socket,
       ["tap", function (socket, fn) {
             socket.on("message", function (data) {
                   fn(JSON.parse(data));
             });
       }],
       ["emit", function (socket, val) {
             socket.send(JSON.stringify(val));
       }]
  );
  function server(port, callback) {
       var io = socketServer.listen(port);
       io.sockets.on('connection', callback);
  }
Socker IO - Server
(function (lang, tfIdf, server) {
  server(1234, function (socket) {
        lang.stream.pipe(tfIdf(socket), socket);
  });
})(require('cosy-lang'),
  require('./lib/tf-idf'),
  require('./lib/socket-server').server);
SocketIO - Client
lang.protocol.extend(lang.stream.IStream, socketClient.SocketNamespace,
       ["tap", function (socket, fn) {
             socket.on("message", function (data) {
                   fn(JSON.parse(data));
             });
       }],
       ["emit", function (socket, val) {
             socket.send(JSON.stringify(val));
       }]
  );
  function client(addr, callback) {
       var io = socketClient.connect(addr);
       io.on('connect', function () {
             callback(io);
       });
  }
Socket IO - Client
client("http://localhost:1234", function (socket) {
      lang.stream.tap(socket, function (val) {
            console.log('td-idf', val);
      });
      lang.stream.pipe(documents, socket);
});
Demo



 #!
Future work
●   Queues
●   Persistent Data Structures
●   Performance
●   Graphs
●   Persistence
Links
●   Cosy
     getcosy.org
     github.com/organizations/getcosy
●   Demo
     github.com/bahulneel/cosy-lang-demo
●   Me
     @bahulneel
     github.com/bahulneel
Fin



Questions

To Infinity & Beyond: Protocols & sequences in Node - Part 2

  • 1.
    To Infinity &Beyond! Protocols & Lazy Sequences in Node Part Deux – Sh*t Just Got Real Bahul Neel Upadhyaya (@bahulneel) BraveNewTalent http://github.com/bahulneel http://www.bravenewtalent.com
  • 2.
    cosy.lang Library Protocols ● Protocols ● ISeq ● Sequences Lazy & Async ● ISync ● Argument length dispatch ● IStream ● Tail recursion ● IPromise ● Object Identity ● Object Metadata npm install cosy-lang
  • 3.
    An Example (TF-IDF) “Tf–idf,term frequency–inverse document frequency, is a numerical statistic which reflects how important a word is to a document in a collection or corpus. It is often used as a weighting factor in information retrieval and text mining.” - Wikipedia
  • 4.
    Words function stripWord(word) { return word.replace(/[^-a-zA-Z_0-9]+/, '').toLowerCase(); } function isWord(word) { return /^[-a-zA-Z_0-9]+$/.exec(word) } function words(string) { return vec(filter(isWord, map(stripWord, string.split(/ /)))); }
  • 5.
    Term Frequencies function tf(words){ var max = 0, counts, word; function countFeq(counts, word) { var newCounts = clone(counts); if ('undefined' === typeof newCounts[word]) newCounts[word] = 0; newCounts[word] += 1; if (newCounts[word] > max) max = newCounts[word]; return newCounts; } counts = reduce(countFeq, {}, words); if (max) { for (word in counts) { if (counts.hasOwnProperty(word)) counts[word] /= max; } } return counts; }
  • 6.
    Inverse Document Frequency idf= fn$({ 1: function (terms) { return idf({}, 1, terms); }, 3: function (freq, docCount, terms) { if (null === first(terms)) return null; function calcIdf(terms) { var docFreq, invDocFreq = {}, word; docFreq = merge(freq, first(terms)); for (word in docFreq) { if (docFreq.hasOwnProperty(word)) invDocFreq[word] = docCount/(1+docFreq[word]); } return cons(invDocFreq, idf(docFreq, docCount + 1, rest(terms))); } return lazy(terms, calcIdf); } });
  • 7.
    TF-IDF function tfIdf(documents) { var theWords, terms, freq; terms = map(tf, map(words, documents)); freq = idf(terms); function calcTfIdf(tf, idf) { var word, tfIdf = {}; for (word in tf) { if (tf.hasOwnProperty(word)) tfIdf[word] = tf[word] * idf[word]; } return tfIdf; } return map(calcTfIdf, terms, freq); }
  • 8.
    Making Sequences Asyncronous Source Sink ● Takes an ISeq & ISync as ● Takes an IStream as it's it's argument argument ● Extends IStream ● Extends ISeq & ISync ● Registers a tick callback ● First returns stream.skip using the ISync interface until stream emits ● Emits first element when ● Calls tick callback when callback is called stream emits
  • 9.
    Socket IO -Server lang.protocol.extend(lang.stream.IStream, socketServer.Socket, ["tap", function (socket, fn) { socket.on("message", function (data) { fn(JSON.parse(data)); }); }], ["emit", function (socket, val) { socket.send(JSON.stringify(val)); }] ); function server(port, callback) { var io = socketServer.listen(port); io.sockets.on('connection', callback); }
  • 10.
    Socker IO -Server (function (lang, tfIdf, server) { server(1234, function (socket) { lang.stream.pipe(tfIdf(socket), socket); }); })(require('cosy-lang'), require('./lib/tf-idf'), require('./lib/socket-server').server);
  • 11.
    SocketIO - Client lang.protocol.extend(lang.stream.IStream,socketClient.SocketNamespace, ["tap", function (socket, fn) { socket.on("message", function (data) { fn(JSON.parse(data)); }); }], ["emit", function (socket, val) { socket.send(JSON.stringify(val)); }] ); function client(addr, callback) { var io = socketClient.connect(addr); io.on('connect', function () { callback(io); }); }
  • 12.
    Socket IO -Client client("http://localhost:1234", function (socket) { lang.stream.tap(socket, function (val) { console.log('td-idf', val); }); lang.stream.pipe(documents, socket); });
  • 13.
  • 14.
    Future work ● Queues ● Persistent Data Structures ● Performance ● Graphs ● Persistence
  • 15.
    Links ● Cosy getcosy.org github.com/organizations/getcosy ● Demo github.com/bahulneel/cosy-lang-demo ● Me @bahulneel github.com/bahulneel
  • 16.