进程管理的利器



  node-cluster
      张轩丞(朋春)
       @我是aleafs
项目地址


• npm install node-cluster
• https://github.com/aleafs/node-cluster
千万pv的服务
千万pv的服务

• 多进程协同提供服务
千万pv的服务

• 多进程协同提供服务
• 进程“死”掉时的容灾
千万pv的服务

• 多进程协同提供服务
• 进程“死”掉时的容灾
• 服务的平滑重启
千万pv的服务

• 多进程协同提供服务
• 进程“死”掉时的容灾
• 服务的平滑重启
• 监控、管理接口
进程
进程

|-- master
进程

|-- master
   |-- n * core service
进程

|-- master
   |-- n * core service
   |-- analysis daemon
进程

|-- master
   |-- n * core service
   |-- analysis daemon
   |-- control service
Usage
var master = require(‘node-cluster’).Master({
  ‘pidfile‘ : __dirname + ‘/run/app.pid’,
  ‘statusfile’ : __dirname + ‘/run/status.log’,
});

master.register(‘core’, __dirname + ‘/http.js’, {
  ‘listen‘ : [ port1, socket1, /** ... */ ],
  ‘children’ : 4,
});
master.dispatch();
Usage
var http = require(‘http’);
var server = http.createServer(function (req, res) {
  res.end(‘hello world’);
});

require(‘node-cluster’).Worker({
   ‘heartbeat_interval’ : 2000,
   ‘terminate_timeout’ : 1000,
}).ready(function (socket) {
  server.emit(‘connection’, socket);
});
协同服务


• 多进程“共用”同⼀一个端口
• 请求的负载均衡
端口“共享”
 master
       listen
1.request                          worker 1

            2.child_process.send   worker 2
                   handle
                                   worker 3
    3.response
                                   worker 4
worker分配算法

          master推          worker拉

       master选择⼀一个worker, master将请求投入队列,
 原理           让它干活             等worker来抢


          轮巡 / 权重 /
分配算法      最空闲worker
                            worker来抢



 劣势    worker“死”掉,影响⼀一批   多⼀一次进程通信
case MESSAGE.WAKEUP:
      if (STATUS.RUNNING === mstat.status) {
        _send(MESSAGE.GET_FD);
      }
      break;

case MESSAGE.REQ_FD:
      mstat.scores++;
      _accept(handle, callback);
      if (STATUS.RUNNING === mstat.status && msg.data) {
        process.nextTick(function () {
          _send(MESSAGE.GET_FD);
        });
      }
      break;
var usepush = 2 * _options.children;
_options.listen.forEach(function (item) {
  // ...
  _listener[item] = Listen(item, function (handle) {
    if (_fdqueue.push(handle) <= usepush) {
      _wakeups = (_wakeups + 1) % _pobject.length;
      try {
         _pobject[_wakeups].send({
           'type' : MESSAGE.WAKEUP,
         });
      } catch (e) {
      }
    }
  });
});
进程容灾

sub.on(‘exit’, function (code, signal) {
 // start a new child process
 // max_fatal_restart
});
进程容灾

setInterval(function () {
  if (sub.last_hb_time < ?) {
      // start a new child process and then kill this
  }
}, 30000);
平滑重启
       restart         reload

                     新worker开始工作了
原理   stop && start     停掉旧worker


信号    SIGTERM          SIGUSR1

区别   master有退出       master无退出
reload用在哪里?
// 配置、资源的“热”加载

app.init(function () {
  require(‘node-cluster’).Worker({
    ‘heartbeat_interval’ : 2000,
    ‘terminate_timeout’ : 1000,
  }).ready(function (socket) {
    // 正式开始提供服务
  });
});
监控接口
master.on(‘giveup’, function (name, fatals) {
  // XXX: alert
  // 暂时放弃某个子进程的尝试重启
});

master.on(‘state’, function (name, current, before) {
  // XXX: alert
  // 可工作子进程数量变化
});
statusfile
905:	

 daemon	

 908	

 {"status":2,"scores":0,"mem":{"rss":
22204416,"heapTotal":5306944,"heapUsed":2775704},"_time":
1341026802149}
905:	

 http	

 910	

 {"status":2,"scores":1831,"mem":{"rss":
27009024,"heapTotal":9085760,"heapUsed":4811288},"_time":
1341026802157}
905:	

 daemon	

 907	

 {"status":2,"scores":0,"mem":{"rss":
22200320,"heapTotal":5315072,"heapUsed":2756800},"_time":
1341026804137}
905:	

 daemon	

 908	

 {"status":2,"scores":0,"mem":{"rss":
22290432,"heapTotal":5306944,"heapUsed":2787056},"_time":
1341026804149}
贡献者
    aleafs       @我是aleafs

  fengmk2       @Python发烧友

 Jackson Tian      @朴灵

Will Wen Gunn    @Wen-小问
Node cluster

Node cluster