You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
621 lines
21 KiB
JavaScript
621 lines
21 KiB
JavaScript
/*@flow*/
|
|
/* jshint esversion: 6 */
|
|
/* global Buffer */
|
|
var Fs = require("fs");
|
|
var Fse = require("fs-extra");
|
|
var Path = require("path");
|
|
var nThen = require("nthen");
|
|
var Semaphore = require("saferphore");
|
|
const ToPull = require('stream-to-pull-stream');
|
|
const Pull = require('pull-stream');
|
|
|
|
const isValidChannelId = function (id) {
|
|
return typeof(id) === 'string' &&
|
|
id.length >= 32 && id.length < 50 &&
|
|
/^[a-zA-Z0-9=+-]*$/.test(id);
|
|
};
|
|
|
|
// 511 -> octal 777
|
|
// read, write, execute permissions flag
|
|
const PERMISSIVE = 511;
|
|
|
|
var mkPath = function (env, channelId) {
|
|
return Path.join(env.root, channelId.slice(0, 2), channelId) + '.ndjson';
|
|
};
|
|
|
|
var mkArchivePath = function (env, channelId) {
|
|
return Path.join(env.archiveRoot, 'datastore', channelId.slice(0, 2), channelId) + '.ndjson';
|
|
};
|
|
|
|
var getMetadataAtPath = function (Env, path, cb) {
|
|
var remainder = '';
|
|
var stream = Fs.createReadStream(path, { encoding: 'utf8' });
|
|
var complete = function (err, data) {
|
|
var _cb = cb;
|
|
cb = undefined;
|
|
if (_cb) { _cb(err, data); }
|
|
};
|
|
stream.on('data', function (chunk) {
|
|
if (!/\n/.test(chunk)) {
|
|
remainder += chunk;
|
|
return;
|
|
}
|
|
stream.close();
|
|
var metadata = chunk.split('\n')[0];
|
|
|
|
var parsed = null;
|
|
try {
|
|
parsed = JSON.parse(metadata);
|
|
complete(undefined, parsed);
|
|
}
|
|
catch (e) {
|
|
console.log("getMetadataAtPath");
|
|
console.error(e);
|
|
complete('INVALID_METADATA');
|
|
}
|
|
});
|
|
stream.on('end', function () {
|
|
complete();
|
|
});
|
|
stream.on('error', function (e) { complete(e); });
|
|
};
|
|
|
|
var getChannelMetadata = function (Env, channelId, cb) {
|
|
var path = mkPath(Env, channelId);
|
|
getMetadataAtPath(Env, path, cb);
|
|
};
|
|
|
|
var closeChannel = function (env, channelName, cb) {
|
|
if (!env.channels[channelName]) { return void cb(); }
|
|
try {
|
|
env.channels[channelName].writeStream.close();
|
|
delete env.channels[channelName];
|
|
env.openFiles--;
|
|
cb();
|
|
} catch (err) {
|
|
cb(err);
|
|
}
|
|
};
|
|
|
|
var clearChannel = function (env, channelId, cb) {
|
|
var path = mkPath(env, channelId);
|
|
getMetadataAtPath(env, path, function (e, metadata) {
|
|
if (e) { return cb(new Error(e)); }
|
|
if (!metadata) {
|
|
return void Fs.truncate(path, 0, function (err) {
|
|
if (err) {
|
|
return cb(err);
|
|
}
|
|
cb(void 0);
|
|
});
|
|
}
|
|
|
|
var len = JSON.stringify(metadata).length + 1;
|
|
|
|
// as long as closeChannel is synchronous, this should not cause
|
|
// any race conditions. truncate ought to return faster than a channel
|
|
// can be opened and read by another user. if that turns out not to be
|
|
// the case, we'll need to implement locking.
|
|
closeChannel(env, channelId, function (err) {
|
|
if (err) { cb(err); }
|
|
Fs.truncate(path, len, function (err) {
|
|
if (err) { return cb(err); }
|
|
cb();
|
|
});
|
|
});
|
|
});
|
|
};
|
|
|
|
var readMessages = function (path, msgHandler, cb) {
|
|
var remainder = '';
|
|
var stream = Fs.createReadStream(path, { encoding: 'utf8' });
|
|
var complete = function (err) {
|
|
var _cb = cb;
|
|
cb = undefined;
|
|
if (_cb) { _cb(err); }
|
|
};
|
|
stream.on('data', function (chunk) {
|
|
var lines = chunk.split('\n');
|
|
lines[0] = remainder + lines[0];
|
|
remainder = lines.pop();
|
|
lines.forEach(msgHandler);
|
|
});
|
|
stream.on('end', function () {
|
|
msgHandler(remainder);
|
|
complete();
|
|
});
|
|
stream.on('error', function (e) { complete(e); });
|
|
};
|
|
|
|
const NEWLINE_CHR = ('\n').charCodeAt(0);
|
|
const mkBufferSplit = () => {
|
|
let remainder = null;
|
|
return Pull((read) => {
|
|
return (abort, cb) => {
|
|
read(abort, function (end, data) {
|
|
if (end) {
|
|
if (data) { console.log("mkBufferSplit() Data at the end"); }
|
|
cb(end, remainder ? [remainder, data] : [data]);
|
|
remainder = null;
|
|
return;
|
|
}
|
|
const queue = [];
|
|
for (;;) {
|
|
const offset = data.indexOf(NEWLINE_CHR);
|
|
if (offset < 0) {
|
|
remainder = remainder ? Buffer.concat([remainder, data]) : data;
|
|
break;
|
|
}
|
|
let subArray = data.slice(0, offset);
|
|
if (remainder) {
|
|
subArray = Buffer.concat([remainder, subArray]);
|
|
remainder = null;
|
|
}
|
|
queue.push(subArray);
|
|
data = data.slice(offset + 1);
|
|
}
|
|
cb(end, queue);
|
|
});
|
|
};
|
|
}, Pull.flatten());
|
|
};
|
|
|
|
const mkOffsetCounter = () => {
|
|
let offset = 0;
|
|
return Pull.map((buff) => {
|
|
const out = { offset: offset, buff: buff };
|
|
// +1 for the eaten newline
|
|
offset += buff.length + 1;
|
|
return out;
|
|
});
|
|
};
|
|
|
|
const readMessagesBin = (env, id, start, msgHandler, cb) => {
|
|
const stream = Fs.createReadStream(mkPath(env, id), { start: start });
|
|
// TODO get the channel and add the atime
|
|
let keepReading = true;
|
|
Pull(
|
|
ToPull.read(stream),
|
|
mkBufferSplit(),
|
|
mkOffsetCounter(),
|
|
Pull.asyncMap((data, moreCb) => {
|
|
msgHandler(data, moreCb, () => { keepReading = false; moreCb(); });
|
|
}),
|
|
Pull.drain(() => (keepReading), (err) => {
|
|
cb((keepReading) ? err : undefined);
|
|
})
|
|
);
|
|
};
|
|
|
|
var checkPath = function (path, callback) {
|
|
// TODO check if we actually need to use stat at all
|
|
Fs.stat(path, function (err) {
|
|
if (!err) {
|
|
callback(undefined, true);
|
|
return;
|
|
}
|
|
if (err.code !== 'ENOENT') {
|
|
callback(err);
|
|
return;
|
|
}
|
|
Fse.mkdirp(Path.dirname(path), PERMISSIVE, function (err) {
|
|
if (err && err.code !== 'EEXIST') {
|
|
callback(err);
|
|
return;
|
|
}
|
|
callback(undefined, false);
|
|
});
|
|
});
|
|
};
|
|
|
|
var removeChannel = function (env, channelName, cb) {
|
|
var filename = mkPath(env, channelName);
|
|
Fs.unlink(filename, cb);
|
|
};
|
|
|
|
var removeArchivedChannel = function (env, channelName, cb) {
|
|
var filename = mkArchivePath(env, channelName);
|
|
Fs.unlink(filename, cb);
|
|
};
|
|
|
|
var listChannels = function (root, handler, cb) {
|
|
// do twenty things at a time
|
|
var sema = Semaphore.create(20);
|
|
|
|
var dirList = [];
|
|
|
|
nThen(function (w) {
|
|
// the root of your datastore contains nested directories...
|
|
Fs.readdir(root, w(function (err, list) {
|
|
if (err) {
|
|
w.abort();
|
|
// TODO check if we normally return strings or errors
|
|
return void cb(err);
|
|
}
|
|
dirList = list;
|
|
}));
|
|
}).nThen(function (w) {
|
|
// search inside the nested directories
|
|
// stream it so you don't put unnecessary data in memory
|
|
var wait = w();
|
|
dirList.forEach(function (dir) {
|
|
sema.take(function (give) {
|
|
var nestedDirPath = Path.join(root, dir);
|
|
Fs.readdir(nestedDirPath, w(give(function (err, list) {
|
|
if (err) { return void handler(err); } // Is this correct?
|
|
|
|
list.forEach(function (item) {
|
|
// ignore things that don't match the naming pattern
|
|
if (/^\./.test(item) || !/[0-9a-fA-F]{32,}\.ndjson$/.test(item)) { return; }
|
|
var filepath = Path.join(nestedDirPath, item);
|
|
var channel = filepath.replace(/\.ndjson$/, '').replace(/.*\//, '');
|
|
if ([32, 34].indexOf(channel.length) === -1) { return; }
|
|
|
|
// otherwise throw it on the pile
|
|
sema.take(function (give) {
|
|
Fs.stat(filepath, w(give(function (err, stats) {
|
|
if (err) {
|
|
return void handler(err);
|
|
}
|
|
|
|
handler(void 0, {
|
|
channel: channel,
|
|
atime: stats.atime,
|
|
mtime: stats.mtime,
|
|
ctime: stats.ctime,
|
|
size: stats.size,
|
|
});
|
|
})));
|
|
});
|
|
});
|
|
})));
|
|
});
|
|
});
|
|
wait();
|
|
}).nThen(function () {
|
|
cb();
|
|
});
|
|
};
|
|
|
|
// move a channel's log file from its current location
|
|
// to an equivalent location in the cold storage directory
|
|
var archiveChannel = function (env, channelName, cb) {
|
|
if (!env.retainData) {
|
|
return void cb("ARCHIVES_DISABLED");
|
|
}
|
|
|
|
// ctime is the most reliable indicator of when a file was archived
|
|
// because it is used to indicate changes to the files metadata
|
|
// and not its contents
|
|
// if we find that this is not reliable in production, we can update it manually
|
|
// https://nodejs.org/api/fs.html#fs_fs_utimes_path_atime_mtime_callback
|
|
|
|
// check what the channel's path should be (in its current location)
|
|
var currentPath = mkPath(env, channelName);
|
|
|
|
// construct a parallel path in the new location
|
|
var archivePath = mkArchivePath(env, channelName);
|
|
|
|
// use Fse.move to move it, Fse makes paths to the directory when you use it.
|
|
// https://github.com/jprichardson/node-fs-extra/blob/HEAD/docs/move.md
|
|
Fse.move(currentPath, archivePath, { overwrite: true }, cb);
|
|
};
|
|
|
|
var unarchiveChannel = function (env, channelName, cb) {
|
|
// very much like 'archiveChannel' but in the opposite direction
|
|
|
|
// the file is currently archived
|
|
var currentPath = mkArchivePath(env, channelName);
|
|
var unarchivedPath = mkPath(env, channelName);
|
|
|
|
// if a file exists in the unarchived path, you probably don't want to clobber its data
|
|
// so unlike 'archiveChannel' we won't overwrite.
|
|
// Fse.move will call back with EEXIST in such a situation
|
|
Fse.move(currentPath, unarchivedPath, cb);
|
|
};
|
|
|
|
var flushUnusedChannels = function (env, cb, frame) {
|
|
var currentTime = +new Date();
|
|
|
|
var expiration = typeof(frame) === 'undefined'? env.channelExpirationMs: frame;
|
|
Object.keys(env.channels).forEach(function (chanId) {
|
|
var chan = env.channels[chanId];
|
|
if (typeof(chan.atime) !== 'number') { return; }
|
|
if (currentTime >= expiration + chan.atime) {
|
|
closeChannel(env, chanId, function (err) {
|
|
if (err) {
|
|
console.error(err);
|
|
return;
|
|
}
|
|
if (env.verbose) {
|
|
console.log("Closed channel [%s]", chanId);
|
|
}
|
|
});
|
|
}
|
|
});
|
|
cb();
|
|
};
|
|
|
|
var channelBytes = function (env, chanName, cb) {
|
|
var path = mkPath(env, chanName);
|
|
Fs.stat(path, function (err, stats) {
|
|
if (err) { return void cb(err); }
|
|
cb(undefined, stats.size);
|
|
});
|
|
};
|
|
|
|
/*::
|
|
export type ChainPadServer_ChannelInternal_t = {
|
|
atime: number,
|
|
writeStream: typeof(process.stdout),
|
|
whenLoaded: ?Array<(err:?Error, chan:?ChainPadServer_ChannelInternal_t)=>void>,
|
|
onError: Array<(?Error)=>void>,
|
|
path: string
|
|
};
|
|
*/
|
|
var getChannel = function (
|
|
env,
|
|
id,
|
|
callback /*:(err:?Error, chan:?ChainPadServer_ChannelInternal_t)=>void*/
|
|
) {
|
|
if (env.channels[id]) {
|
|
var chan = env.channels[id];
|
|
chan.atime = +new Date();
|
|
if (chan.whenLoaded) {
|
|
chan.whenLoaded.push(callback);
|
|
} else {
|
|
callback(undefined, chan);
|
|
}
|
|
return;
|
|
}
|
|
|
|
if (env.openFiles >= env.openFileLimit) {
|
|
// if you're running out of open files, asynchronously clean up expired files
|
|
// do it on a shorter timeframe, though (half of normal)
|
|
setTimeout(function () {
|
|
flushUnusedChannels(env, function () {
|
|
if (env.verbose) {
|
|
console.log("Approaching open file descriptor limit. Cleaning up");
|
|
}
|
|
}, env.channelExpirationMs / 2);
|
|
});
|
|
}
|
|
var path = mkPath(env, id);
|
|
var channel /*:ChainPadServer_ChannelInternal_t*/ = env.channels[id] = {
|
|
atime: +new Date(),
|
|
writeStream: (undefined /*:any*/),
|
|
whenLoaded: [ callback ],
|
|
onError: [ ],
|
|
path: path
|
|
};
|
|
var complete = function (err) {
|
|
var whenLoaded = channel.whenLoaded;
|
|
// no guarantee stream.on('error') will not cause this to be called multiple times
|
|
if (!whenLoaded) { return; }
|
|
channel.whenLoaded = undefined;
|
|
if (err) {
|
|
delete env.channels[id];
|
|
}
|
|
if (!channel.writeStream) {
|
|
throw new Error("getChannel() complete called without channel writeStream");
|
|
}
|
|
whenLoaded.forEach(function (wl) { wl(err, (err) ? undefined : channel); });
|
|
};
|
|
var fileExists;
|
|
var errorState;
|
|
nThen(function (waitFor) {
|
|
checkPath(path, waitFor(function (err, exists) {
|
|
if (err) {
|
|
errorState = true;
|
|
complete(err);
|
|
return;
|
|
}
|
|
fileExists = exists;
|
|
}));
|
|
}).nThen(function (waitFor) {
|
|
if (errorState) { return; }
|
|
var stream = channel.writeStream = Fs.createWriteStream(path, { flags: 'a' });
|
|
env.openFiles++;
|
|
stream.on('open', waitFor());
|
|
stream.on('error', function (err /*:?Error*/) {
|
|
env.openFiles--;
|
|
// this might be called after this nThen block closes.
|
|
if (channel.whenLoaded) {
|
|
complete(err);
|
|
} else {
|
|
channel.onError.forEach(function (handler) {
|
|
handler(err);
|
|
});
|
|
}
|
|
});
|
|
}).nThen(function () {
|
|
if (errorState) { return; }
|
|
complete();
|
|
});
|
|
};
|
|
|
|
const messageBin = (env, chanName, msgBin, cb) => {
|
|
getChannel(env, chanName, function (err, chan) {
|
|
if (!chan) {
|
|
cb(err);
|
|
return;
|
|
}
|
|
let called = false;
|
|
var complete = function (err) {
|
|
if (called) { return; }
|
|
called = true;
|
|
cb(err);
|
|
};
|
|
chan.onError.push(complete);
|
|
chan.writeStream.write(msgBin, function () {
|
|
/*::if (!chan) { throw new Error("Flow unreachable"); }*/
|
|
chan.onError.splice(chan.onError.indexOf(complete), 1);
|
|
if (!cb) { return; }
|
|
//chan.messages.push(msg);
|
|
chan.atime = +new Date(); // FIXME seems like odd behaviour that not passing a callback would result in not updating atime...
|
|
complete();
|
|
});
|
|
});
|
|
};
|
|
|
|
var message = function (env, chanName, msg, cb) {
|
|
messageBin(env, chanName, new Buffer(msg + '\n', 'utf8'), cb);
|
|
};
|
|
|
|
var getMessages = function (env, chanName, handler, cb) {
|
|
getChannel(env, chanName, function (err, chan) {
|
|
if (!chan) {
|
|
cb(err);
|
|
return;
|
|
}
|
|
var errorState = false;
|
|
readMessages(chan.path, function (msg) {
|
|
if (!msg || errorState) { return; }
|
|
//console.log(msg);
|
|
try {
|
|
handler(msg);
|
|
} catch (e) {
|
|
errorState = true;
|
|
return void cb(err);
|
|
}
|
|
}, function (err) {
|
|
if (err) {
|
|
errorState = true;
|
|
return void cb(err);
|
|
}
|
|
if (!chan) { throw new Error("impossible, flow checking"); }
|
|
chan.atime = +new Date();
|
|
cb();
|
|
});
|
|
});
|
|
};
|
|
|
|
/*::
|
|
export type ChainPadServer_MessageObj_t = { buff: Buffer, offset: number };
|
|
export type ChainPadServer_Storage_t = {
|
|
readMessagesBin: (
|
|
channelName:string,
|
|
start:number,
|
|
asyncMsgHandler:(msg:ChainPadServer_MessageObj_t, moreCb:()=>void, abortCb:()=>void)=>void,
|
|
cb:(err:?Error)=>void
|
|
)=>void,
|
|
message: (channelName:string, content:string, cb:(err:?Error)=>void)=>void,
|
|
messageBin: (channelName:string, content:Buffer, cb:(err:?Error)=>void)=>void,
|
|
getMessages: (channelName:string, msgHandler:(msg:string)=>void, cb:(err:?Error)=>void)=>void,
|
|
removeChannel: (channelName:string, cb:(err:?Error)=>void)=>void,
|
|
closeChannel: (channelName:string, cb:(err:?Error)=>void)=>void,
|
|
flushUnusedChannels: (cb:()=>void)=>void,
|
|
getChannelSize: (channelName:string, cb:(err:?Error, size:?number)=>void)=>void,
|
|
getChannelMetadata: (channelName:string, cb:(err:?Error|string, data:?any)=>void)=>void,
|
|
clearChannel: (channelName:string, (err:?Error)=>void)=>void
|
|
};
|
|
export type ChainPadServer_Config_t = {
|
|
verbose?: boolean,
|
|
filePath?: string,
|
|
channelExpirationMs?: number,
|
|
openFileLimit?: number
|
|
};
|
|
*/
|
|
module.exports.create = function (
|
|
conf /*:ChainPadServer_Config_t*/,
|
|
cb /*:(store:ChainPadServer_Storage_t)=>void*/
|
|
) {
|
|
var env = {
|
|
root: conf.filePath || './datastore',
|
|
archiveRoot: conf.archivePath || './data/archive',
|
|
retainData: conf.retainData,
|
|
channels: { },
|
|
channelExpirationMs: conf.channelExpirationMs || 30000,
|
|
verbose: conf.verbose,
|
|
openFiles: 0,
|
|
openFileLimit: conf.openFileLimit || 2048,
|
|
};
|
|
var it;
|
|
|
|
nThen(function (w) {
|
|
// make sure the store's directory exists
|
|
Fse.mkdirp(env.root, PERMISSIVE, w(function (err) {
|
|
if (err && err.code !== 'EEXIST') {
|
|
throw err;
|
|
}
|
|
}));
|
|
// make sure the cold storage directory exists
|
|
Fse.mkdirp(env.archiveRoot, PERMISSIVE, w(function (err) {
|
|
if (err && err.code !== 'EEXIST') {
|
|
throw err;
|
|
}
|
|
}));
|
|
}).nThen(function () {
|
|
cb({
|
|
readMessagesBin: (channelName, start, asyncMsgHandler, cb) => {
|
|
if (!isValidChannelId(channelName)) { return void cb(new Error('EINVAL')); }
|
|
readMessagesBin(env, channelName, start, asyncMsgHandler, cb);
|
|
},
|
|
message: function (channelName, content, cb) {
|
|
if (!isValidChannelId(channelName)) { return void cb(new Error('EINVAL')); }
|
|
message(env, channelName, content, cb);
|
|
},
|
|
messageBin: (channelName, content, cb) => {
|
|
if (!isValidChannelId(channelName)) { return void cb(new Error('EINVAL')); }
|
|
messageBin(env, channelName, content, cb);
|
|
},
|
|
getMessages: function (channelName, msgHandler, cb) {
|
|
if (!isValidChannelId(channelName)) { return void cb(new Error('EINVAL')); }
|
|
getMessages(env, channelName, msgHandler, cb);
|
|
},
|
|
removeChannel: function (channelName, cb) {
|
|
if (!isValidChannelId(channelName)) { return void cb(new Error('EINVAL')); }
|
|
removeChannel(env, channelName, function (err) {
|
|
cb(err);
|
|
});
|
|
},
|
|
removeArchivedChannel: function (channelName, cb) {
|
|
if (!isValidChannelId(channelName)) { return void cb(new Error('EINVAL')); }
|
|
removeArchivedChannel(env, channelName, cb);
|
|
},
|
|
closeChannel: function (channelName, cb) {
|
|
if (!isValidChannelId(channelName)) { return void cb(new Error('EINVAL')); }
|
|
closeChannel(env, channelName, cb);
|
|
},
|
|
flushUnusedChannels: function (cb) {
|
|
flushUnusedChannels(env, cb);
|
|
},
|
|
getChannelSize: function (channelName, cb) {
|
|
if (!isValidChannelId(channelName)) { return void cb(new Error('EINVAL')); }
|
|
channelBytes(env, channelName, cb);
|
|
},
|
|
getChannelMetadata: function (channelName, cb) {
|
|
if (!isValidChannelId(channelName)) { return void cb(new Error('EINVAL')); }
|
|
getChannelMetadata(env, channelName, cb);
|
|
},
|
|
clearChannel: function (channelName, cb) {
|
|
if (!isValidChannelId(channelName)) { return void cb(new Error('EINVAL')); }
|
|
clearChannel(env, channelName, cb);
|
|
},
|
|
listChannels: function (handler, cb) {
|
|
listChannels(env.root, handler, cb);
|
|
},
|
|
listArchivedChannels: function (handler, cb) {
|
|
listChannels(env.archiveRoot, handler, cb);
|
|
},
|
|
archiveChannel: function (channelName, cb) {
|
|
if (!isValidChannelId(channelName)) { return void cb(new Error('EINVAL')); }
|
|
archiveChannel(env, channelName, cb);
|
|
},
|
|
unarchiveChannel: function (channelName, cb) {
|
|
if (!isValidChannelId(channelName)) { return void cb(new Error('EINVAL')); }
|
|
unarchiveChannel(env, channelName, cb);
|
|
},
|
|
log: function (channelName, content, cb) {
|
|
message(env, channelName, content, cb);
|
|
},
|
|
shutdown: function () {
|
|
clearInterval(it);
|
|
}
|
|
});
|
|
});
|
|
it = setInterval(function () {
|
|
flushUnusedChannels(env, function () { });
|
|
}, 5000);
|
|
};
|