Merge branch 'scaling-index' into soon

pull/1/head
ansuz 5 years ago
commit ed63d98ab0

@ -0,0 +1,173 @@
/* jshint esversion: 6 */
/* global process */
const HK = require("./hk-util");
const Store = require("./storage/file");
const Util = require("./common-util");
const nThen = require("nthen");
const Env = {};
var ready = false;
var store;
const init = function (config, cb) {
if (!config) {
return void cb('E_INVALID_CONFIG');
}
Store.create(config, function (_store) {
store = _store;
cb();
});
};
const tryParse = function (Env, str) {
try {
return JSON.parse(str);
} catch (err) {
// XXX
}
};
/* computeIndex
can call back with an error or a computed index which includes:
* cpIndex:
* array including any checkpoints pushed within the last 100 messages
* processed by 'sliceCpIndex(cpIndex, line)'
* offsetByHash:
* a map containing message offsets by their hash
* this is for every message in history, so it could be very large...
* except we remove offsets from the map if they occur before the oldest relevant checkpoint
* size: in bytes
* metadata:
* validationKey
* expiration time
* owners
* ??? (anything else we might add in the future)
* line
* the number of messages in history
* including the initial metadata line, if it exists
*/
const computeIndex = function (channelName, cb) {
const cpIndex = [];
let messageBuf = [];
let i = 0;
const CB = Util.once(cb);
const offsetByHash = {};
let size = 0;
nThen(function (w) {
// iterate over all messages in the channel log
// old channels can contain metadata as the first message of the log
// skip over metadata as that is handled elsewhere
// otherwise index important messages in the log
store.readMessagesBin(channelName, 0, (msgObj, readMore) => {
let msg;
// keep an eye out for the metadata line if you haven't already seen it
// but only check for metadata on the first line
if (!i && msgObj.buff.indexOf('{') === 0) {
i++; // always increment the message counter
msg = tryParse(Env, msgObj.buff.toString('utf8'));
if (typeof msg === "undefined") { return readMore(); }
// validate that the current line really is metadata before storing it as such
// skip this, as you already have metadata...
if (HK.isMetadataMessage(msg)) { return readMore(); }
}
i++;
if (msgObj.buff.indexOf('cp|') > -1) {
msg = msg || tryParse(Env, msgObj.buff.toString('utf8'));
if (typeof msg === "undefined") { return readMore(); }
// cache the offsets of checkpoints if they can be parsed
if (msg[2] === 'MSG' && msg[4].indexOf('cp|') === 0) {
cpIndex.push({
offset: msgObj.offset,
line: i
});
// we only want to store messages since the latest checkpoint
// so clear the buffer every time you see a new one
messageBuf = [];
}
}
// if it's not metadata or a checkpoint then it should be a regular message
// store it in the buffer
messageBuf.push(msgObj);
return readMore();
}, w((err) => {
if (err && err.code !== 'ENOENT') {
w.abort();
return void CB(err);
}
// once indexing is complete you should have a buffer of messages since the latest checkpoint
// map the 'hash' of each message to its byte offset in the log, to be used for reconnecting clients
messageBuf.forEach((msgObj) => {
const msg = tryParse(Env, msgObj.buff.toString('utf8'));
if (typeof msg === "undefined") { return; }
if (msg[0] === 0 && msg[2] === 'MSG' && typeof(msg[4]) === 'string') {
// msgObj.offset is API guaranteed by our storage module
// it should always be a valid positive integer
offsetByHash[HK.getHash(msg[4])] = msgObj.offset;
}
// There is a trailing \n at the end of the file
size = msgObj.offset + msgObj.buff.length + 1;
});
}));
}).nThen(function () {
// return the computed index
CB(null, {
// Only keep the checkpoints included in the last 100 messages
cpIndex: HK.sliceCpIndex(cpIndex, i),
offsetByHash: offsetByHash,
size: size,
//metadata: metadata,
line: i
});
});
};
process.on('message', function (data) {
if (!data || !data.txid) {
return void process.send({
error:'E_INVAL'
});
}
const txid = data.txid;
if (!ready) {
return void init(data.config, function (err) {
if (err) {
return void process.send({
txid: txid,
error: err,
});
}
ready = true;
process.send({txid: txid,});
});
}
const channel = data.args;
if (!channel) {
return void process.send({
error: 'E_NO_CHANNEL',
});
}
// computeIndex
computeIndex(channel, function (err, index) {
if (err) {
return void process.send({
txid: txid,
error: err,
});
}
return void process.send({
txid: txid,
value: index,
});
});
});

@ -238,6 +238,19 @@ module.exports.create = function (config, cb) {
if (err) { throw new Error(err); } if (err) { throw new Error(err); }
Env.blobStore = blob; Env.blobStore = blob;
})); }));
}).nThen(function (w) {
HK.initializeIndexWorkers(Env, {
filePath: config.filePath,
archivePath: config.archivePath,
channelExpirationMs: config.channelExpirationMs,
verbose: config.verbose,
openFileLimit: config.openFileLimit,
}, w(function (err, computeIndex) {
if (err) {
throw new Error(err);
}
Env.computeIndex = computeIndex;
}));
}).nThen(function (w) { }).nThen(function (w) {
// create a task store // create a task store
require("./storage/tasks").create(config, w(function (e, tasks) { require("./storage/tasks").create(config, w(function (e, tasks) {

@ -7,7 +7,8 @@ const Util = require("./common-util");
const MetaRPC = require("./commands/metadata"); const MetaRPC = require("./commands/metadata");
const Nacl = require('tweetnacl/nacl-fast'); const Nacl = require('tweetnacl/nacl-fast');
const { fork } = require('child_process'); const { fork } = require('child_process');
const numCPUs = require('os').cpus().length; const OS = require("os");
const numCPUs = OS.cpus().length;
const now = function () { return (new Date()).getTime(); }; const now = function () { return (new Date()).getTime(); };
const ONE_DAY = 1000 * 60 * 60 * 24; // one day in milliseconds const ONE_DAY = 1000 * 60 * 60 * 24; // one day in milliseconds
@ -62,7 +63,7 @@ const tryParse = function (Env, str) {
clients from forking on checkpoints and dropping forked history. clients from forking on checkpoints and dropping forked history.
*/ */
const sliceCpIndex = function (cpIndex, line) { const sliceCpIndex = HK.sliceCpIndex = function (cpIndex, line) {
// Remove "old" checkpoints (cp sent before 100 messages ago) // Remove "old" checkpoints (cp sent before 100 messages ago)
const minLine = Math.max(0, (line - 100)); const minLine = Math.max(0, (line - 100));
let start = cpIndex.slice(0, -2); let start = cpIndex.slice(0, -2);
@ -203,108 +204,6 @@ const getMetadata = HK.getMetadata = function (Env, channelName, _cb) {
}); });
}; };
/* computeIndex
can call back with an error or a computed index which includes:
* cpIndex:
* array including any checkpoints pushed within the last 100 messages
* processed by 'sliceCpIndex(cpIndex, line)'
* offsetByHash:
* a map containing message offsets by their hash
* this is for every message in history, so it could be very large...
* except we remove offsets from the map if they occur before the oldest relevant checkpoint
* size: in bytes
* metadata:
* validationKey
* expiration time
* owners
* ??? (anything else we might add in the future)
* line
* the number of messages in history
* including the initial metadata line, if it exists
*/
const computeIndex = function (Env, channelName, cb) {
const store = Env.store;
const Log = Env.Log;
const cpIndex = [];
let messageBuf = [];
let i = 0;
const CB = Util.once(cb);
const offsetByHash = {};
let size = 0;
nThen(function (w) {
// iterate over all messages in the channel log
// old channels can contain metadata as the first message of the log
// skip over metadata as that is handled elsewhere
// otherwise index important messages in the log
store.readMessagesBin(channelName, 0, (msgObj, readMore) => {
let msg;
// keep an eye out for the metadata line if you haven't already seen it
// but only check for metadata on the first line
if (!i && msgObj.buff.indexOf('{') === 0) {
i++; // always increment the message counter
msg = tryParse(Env, msgObj.buff.toString('utf8'));
if (typeof msg === "undefined") { return readMore(); }
// validate that the current line really is metadata before storing it as such
// skip this, as you already have metadata...
if (isMetadataMessage(msg)) { return readMore(); }
}
i++;
if (msgObj.buff.indexOf('cp|') > -1) {
msg = msg || tryParse(Env, msgObj.buff.toString('utf8'));
if (typeof msg === "undefined") { return readMore(); }
// cache the offsets of checkpoints if they can be parsed
if (msg[2] === 'MSG' && msg[4].indexOf('cp|') === 0) {
cpIndex.push({
offset: msgObj.offset,
line: i
});
// we only want to store messages since the latest checkpoint
// so clear the buffer every time you see a new one
messageBuf = [];
}
}
// if it's not metadata or a checkpoint then it should be a regular message
// store it in the buffer
messageBuf.push(msgObj);
return readMore();
}, w((err) => {
if (err && err.code !== 'ENOENT') {
w.abort();
return void CB(err);
}
// once indexing is complete you should have a buffer of messages since the latest checkpoint
// map the 'hash' of each message to its byte offset in the log, to be used for reconnecting clients
messageBuf.forEach((msgObj) => {
const msg = tryParse(Env, msgObj.buff.toString('utf8'));
if (typeof msg === "undefined") { return; }
if (msg[0] === 0 && msg[2] === 'MSG' && typeof(msg[4]) === 'string') {
// msgObj.offset is API guaranteed by our storage module
// it should always be a valid positive integer
offsetByHash[getHash(msg[4], Log)] = msgObj.offset;
}
// There is a trailing \n at the end of the file
size = msgObj.offset + msgObj.buff.length + 1;
});
}));
}).nThen(function () {
// return the computed index
CB(null, {
// Only keep the checkpoints included in the last 100 messages
cpIndex: sliceCpIndex(cpIndex, i),
offsetByHash: offsetByHash,
size: size,
//metadata: metadata,
line: i
});
});
};
/* getIndex /* getIndex
calls back with an error if anything goes wrong calls back with an error if anything goes wrong
or with a cached index for a channel if it exists or with a cached index for a channel if it exists
@ -326,7 +225,7 @@ const getIndex = (Env, channelName, cb) => {
} }
Env.batchIndexReads(channelName, cb, function (done) { Env.batchIndexReads(channelName, cb, function (done) {
computeIndex(Env, channelName, (err, ret) => { Env.computeIndex(Env, channelName, (err, ret) => {
// this is most likely an unrecoverable filesystem error // this is most likely an unrecoverable filesystem error
if (err) { return void done(err); } if (err) { return void done(err); }
// cache the computed result if possible // cache the computed result if possible
@ -912,17 +811,77 @@ HK.onDirectMessage = function (Env, Server, seq, userId, json) {
}); });
}; };
/* onChannelMessage HK.initializeIndexWorkers = function (Env, config, _cb) {
Determine what we should store when a message a broadcasted to a channel" var cb = Util.once(Util.mkAsync(_cb));
* ignores ephemeral channels const workers = [];
* ignores messages sent to expired channels
* rejects duplicated checkpoints const response = Util.response();
* validates messages to channels that have validation keys const initWorker = function (worker, cb) {
* caches the id of the last saved checkpoint //console.log("initializing index worker");
* adds timestamps to incoming messages const txid = Util.uid();
* writes messages to the store response.expect(txid, function (err) {
*/ if (err) { return void cb(err); }
//console.log("worker initialized");
workers.push(worker);
cb();
}, 15000);
worker.send({
txid: txid,
config: config,
});
worker.on('message', function (res) {
if (!res || !res.txid) { return; }
//console.log(res);
response.handle(res.txid, [res.error, res.value]);
});
worker.on('exit', function () {
var idx = workers.indexOf(worker);
if (idx !== -1) {
workers.splice(idx, 1);
}
var w = fork('lib/compute-index');
initWorker(w, function (err) {
if (err) {
throw new Error(err);
}
workers.push(w);
});
});
};
var workerIndex = 0;
var sendCommand = function (Env, channel, cb) {
workerIndex = (workerIndex + 1) % workers.length;
if (workers.length === 0 ||
typeof(workers[workerIndex].send) !== 'function') {
return void cb("NO_WORKERS");
}
Env.store.getWeakLock(channel, function (next) {
const txid = Util.uid();
response.expect(txid, Util.both(next, cb), 45000);
workers[workerIndex].send({
txid: txid,
args: channel,
});
});
};
nThen(function (w) {
OS.cpus().forEach(function () {
initWorker(fork('lib/compute-index'), w(function (err) {
if (!err) { return; }
w.abort();
return void cb(err);
}));
});
}).nThen(function () {
//console.log("index workers ready");
cb(void 0, sendCommand);
});
};
HK.initializeValidationWorkers = function (Env) { HK.initializeValidationWorkers = function (Env) {
if (typeof(Env.validateMessage) !== 'undefined') { if (typeof(Env.validateMessage) !== 'undefined') {
@ -983,6 +942,17 @@ HK.initializeValidationWorkers = function (Env) {
}; };
}; };
/* onChannelMessage
Determine what we should store when a message a broadcasted to a channel"
* ignores ephemeral channels
* ignores messages sent to expired channels
* rejects duplicated checkpoints
* validates messages to channels that have validation keys
* caches the id of the last saved checkpoint
* adds timestamps to incoming messages
* writes messages to the store
*/
HK.onChannelMessage = function (Env, Server, channel, msgStruct) { HK.onChannelMessage = function (Env, Server, channel, msgStruct) {
//console.log(+new Date(), "onChannelMessage"); //console.log(+new Date(), "onChannelMessage");
const Log = Env.Log; const Log = Env.Log;

@ -1027,16 +1027,17 @@ module.exports.create = function (conf, cb) {
// make sure the store's directory exists // make sure the store's directory exists
Fse.mkdirp(env.root, PERMISSIVE, w(function (err) { Fse.mkdirp(env.root, PERMISSIVE, w(function (err) {
if (err && err.code !== 'EEXIST') { if (err && err.code !== 'EEXIST') {
throw err; throw err; // XXX
} }
})); }));
// make sure the cold storage directory exists // make sure the cold storage directory exists
Fse.mkdirp(env.archiveRoot, PERMISSIVE, w(function (err) { Fse.mkdirp(env.archiveRoot, PERMISSIVE, w(function (err) {
if (err && err.code !== 'EEXIST') { if (err && err.code !== 'EEXIST') {
throw err; throw err; // XXX
} }
})); }));
}).nThen(function () { }).nThen(function () {
// XXX leave a place for an error
cb({ cb({
// OLDER METHODS // OLDER METHODS
// write a new message to a log // write a new message to a log
@ -1081,6 +1082,12 @@ module.exports.create = function (conf, cb) {
}); });
}, },
getWeakLock: function (channelName, _cb) {
var cb = Util.once(Util.mkAsync(_cb));
if (!isValidChannelId(channelName)) { return void cb(new Error('EINVAL')); }
schedule.unordered(channelName, cb);
},
// METHODS for deleting data // METHODS for deleting data
// remove a channel and its associated metadata log if present // remove a channel and its associated metadata log if present
removeChannel: function (channelName, cb) { removeChannel: function (channelName, cb) {

Loading…
Cancel
Save