From 2337fbdc6bc66eb60f61f9882dcd0f5786b1d671 Mon Sep 17 00:00:00 2001 From: ansuz Date: Thu, 6 Jun 2019 15:51:44 +0200 Subject: [PATCH 01/23] update chainpad-server dependency --- package-lock.json | 37 +++++++------------------------------ package.json | 2 +- 2 files changed, 8 insertions(+), 31 deletions(-) diff --git a/package-lock.json b/package-lock.json index daafab1d5..30b43f03e 100644 --- a/package-lock.json +++ b/package-lock.json @@ -99,26 +99,15 @@ "integrity": "sha1-0ygVQE1olpn4Wk6k+odV3ROpYEg=" }, "chainpad-server": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/chainpad-server/-/chainpad-server-3.0.1.tgz", - "integrity": "sha512-1r53gYvPlrnZg0vf91gP3pqHILfi67oSo3cnj7kcvC4Y/n4t6wS3QCCjXeNArOuZv/sIByuKkeo1929osr1/KA==", + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/chainpad-server/-/chainpad-server-3.0.2.tgz", + "integrity": "sha512-c5aEljVAapDKKs0+Rt2jymKAszm8X4ZeLFNJj1yxflwBqoh0jr8OANYvbfjtNaYFe2Wdflp/1i4gibYX4IMc+g==", "requires": { "nthen": "^0.1.8", "pull-stream": "^3.6.9", "stream-to-pull-stream": "^1.7.3", "tweetnacl": "~0.12.2", - "ws": "^1.0.1" - }, - "dependencies": { - "ws": { - "version": "1.1.5", - "resolved": "https://registry.npmjs.org/ws/-/ws-1.1.5.tgz", - "integrity": "sha512-o3KqipXNUdS7wpQzBHSe180lBGO60SoK0yVo3CYJgb2MkobuWuBX6dhkYP5ORCLd55y+SaflMOV5fqAB53ux4w==", - "requires": { - "options": ">=0.0.5", - "ultron": "1.0.x" - } - } + "ws": "^3.3.1" } }, "chalk": { @@ -827,11 +816,6 @@ "wrappy": "1" } }, - "options": { - "version": "0.0.6", - "resolved": "https://registry.npmjs.org/options/-/options-0.0.6.tgz", - "integrity": "sha1-7CLTEoBrtT5zF3Pnza788cZDEo8=" - }, "os-tmpdir": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/os-tmpdir/-/os-tmpdir-1.0.2.tgz", @@ -1248,9 +1232,9 @@ } }, "ultron": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/ultron/-/ultron-1.0.2.tgz", - "integrity": "sha1-rOEWq1V80Zc4ak6I9GhTeMiy5Po=" + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/ultron/-/ultron-1.1.1.tgz", + "integrity": "sha512-UIEXBNeYmKptWH6z8ZnqTeS8fV74zG0/eRU9VGkpzz+LIJNs8W/zM/L+7ctCkRrgbNnnR0xxw4bKOr0cW0N0Og==" }, "uniq": { "version": "1.0.1", @@ -1298,13 +1282,6 @@ "async-limiter": "~1.0.0", "safe-buffer": "~5.1.0", "ultron": "~1.1.0" - }, - "dependencies": { - "ultron": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/ultron/-/ultron-1.1.1.tgz", - "integrity": "sha512-UIEXBNeYmKptWH6z8ZnqTeS8fV74zG0/eRU9VGkpzz+LIJNs8W/zM/L+7ctCkRrgbNnnR0xxw4bKOr0cW0N0Og==" - } } }, "xml2js": { diff --git a/package.json b/package.json index 12a3dd15c..06c187f11 100644 --- a/package.json +++ b/package.json @@ -8,7 +8,7 @@ "url": "git://github.com/xwiki-labs/cryptpad.git" }, "dependencies": { - "chainpad-server": "~3.0.0", + "chainpad-server": "~3.0.2", "express": "~4.16.0", "fs-extra": "^7.0.0", "nthen": "~0.1.0", From 7647f7c68a9aabd13e357df57baf0c8e4c3d7da0 Mon Sep 17 00:00:00 2001 From: ansuz Date: Wed, 12 Jun 2019 16:05:18 +0200 Subject: [PATCH 02/23] implement four new storage APIs * listChannels * listArchivedChannels * archiveChannel * removeArchivedChannel --- storage/file.js | 141 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 131 insertions(+), 10 deletions(-) diff --git a/storage/file.js b/storage/file.js index ba092a14e..84d95a266 100644 --- a/storage/file.js +++ b/storage/file.js @@ -5,6 +5,7 @@ var Fs = require("fs"); var Fse = require("fs-extra"); var Path = require("path"); var nThen = require("nthen"); +var Semaphore = require("saferphore"); const ToPull = require('stream-to-pull-stream'); const Pull = require('pull-stream'); @@ -14,10 +15,18 @@ const isValidChannelId = function (id) { /^[a-zA-Z0-9=+-]*$/.test(id); }; +// 511 -> octal 777 +// read, write, execute permissions flag +const PERMISSIVE = 511; + var mkPath = function (env, channelId) { return Path.join(env.root, channelId.slice(0, 2), channelId) + '.ndjson'; }; +var mkArchivePath = function (env, channelId) { + return Path.join(env.archiveRoot, channelId.slice(0, 2), channelId) + '.ndjson'; +}; + var getMetadataAtPath = function (Env, path, cb) { var remainder = ''; var stream = Fs.createReadStream(path, { encoding: 'utf8' }); @@ -68,7 +77,7 @@ var closeChannel = function (env, channelName, cb) { } }; -var clearChannel = function (env, channelId, cb) { // FIXME deletion +var clearChannel = function (env, channelId, cb) { var path = mkPath(env, channelId); getMetadataAtPath(env, path, function (e, metadata) { if (e) { return cb(new Error(e)); } @@ -189,8 +198,7 @@ var checkPath = function (path, callback) { callback(err); return; } - // 511 -> octal 777 - Fse.mkdirp(Path.dirname(path), 511, function (err) { + Fse.mkdirp(Path.dirname(path), PERMISSIVE, function (err) { if (err && err.code !== 'EEXIST') { callback(err); return; @@ -200,11 +208,99 @@ var checkPath = function (path, callback) { }); }; -var removeChannel = function (env, channelName, cb) { // FIXME deletion +var removeChannel = function (env, channelName, cb) { var filename = mkPath(env, channelName); Fs.unlink(filename, cb); }; +var removeArchivedChannel = function (env, channelName, cb) { + var filename = mkArchivePath(env, channelName); + Fs.unlink(filename, cb); +}; + +var listChannels = function (root, handler, cb) { + // do twenty things at a time + var sema = Semaphore.create(20); + + var dirList = []; + + nThen(function (w) { + // the root of your datastore contains nested directories... + Fs.readdir(root, w(function (err, list) { + if (err) { + w.abort(); + // TODO check if we normally return strings or errors + return void cb(err); + } + dirList = list; + })); + }).nThen(function (w) { + // search inside the nested directories + // stream it so you don't put unnecessary data in memory + var wait = w(); + dirList.forEach(function (dir) { + sema.take(function (give) { + var nestedDirPath = Path.join(root, dir); + Fs.readdir(nestedDirPath, w(give(function (err, list) { + if (err) { return void handler(err); } // Is this correct? + + list.forEach(function (item) { + // ignore things that don't match the naming pattern + if (/^\./.test(item) || !/[0-9a-fA-F]{32,}\.ndjson$/.test(item)) { return; } + var filepath = Path.join(nestedDirPath, item); + var channel = filepath.replace(/\.ndjson$/, '').replace(/.*\//, ''); + if ([32, 34].indexOf(channel.length) === -1) { return; } + + // otherwise throw it on the pile + sema.take(function (give) { + Fs.stat(filepath, w(give(function (err, stats) { + if (err) { + return void handler(err); + } + + handler(void 0, { + channel: channel, + atime: stats.atime, + mtime: stats.mtime, + ctime: stats.ctime, + size: stats.size, + }); + }))); + }); + }); + }))); + }); + }); + wait(); + }).nThen(function () { + cb(); + }); +}; + +// move a channel's log file from its current location +// to an equivalent location in the cold storage directory +var archiveChannel = function (env, channelName, cb) { + if (!env.retainData) { + return void cb("ARCHIVES_DISABLED"); + } + + // ctime is the most reliable indicator of when a file was archived + // because it is used to indicate changes to the files metadata + // and not its contents + // if we find that this is not reliable in production, we can update it manually + // https://nodejs.org/api/fs.html#fs_fs_utimes_path_atime_mtime_callback + + // check what the channel's path should be (in its current location) + var currentPath = mkPath(env, channelName); + + // construct a parallel path in the new location + var archivePath = mkArchivePath(env, channelName); + + // use Fse.move to move it, Fse makes paths to the directory when you use it. + // https://github.com/jprichardson/node-fs-extra/blob/HEAD/docs/move.md + Fse.move(currentPath, archivePath, { overwrite: true }, cb); +}; + var flushUnusedChannels = function (env, cb, frame) { var currentTime = +new Date(); @@ -413,19 +509,30 @@ module.exports.create = function ( ) { var env = { root: conf.filePath || './datastore', + archiveRoot: conf.archivePath || './data/archive', + retainData: conf.retainData, channels: { }, channelExpirationMs: conf.channelExpirationMs || 30000, verbose: conf.verbose, openFiles: 0, openFileLimit: conf.openFileLimit || 2048, }; - // 0x1ff -> 777 var it; - Fse.mkdirp(env.root, 0x1ff, function (err) { - if (err && err.code !== 'EEXIST') { - // TODO: somehow return a nice error - throw err; - } + + nThen(function (w) { + // make sure the store's directory exists + Fse.mkdirp(env.root, PERMISSIVE, w(function (err) { + if (err && err.code !== 'EEXIST') { + throw err; + } + })); + // make sure the cold storage directory exists + Fse.mkdirp(env.archiveRoot, PERMISSIVE, w(function (err) { + if (err && err.code !== 'EEXIST') { + throw err; + } + })); + }).nThen(function () { cb({ readMessagesBin: (channelName, start, asyncMsgHandler, cb) => { if (!isValidChannelId(channelName)) { return void cb(new Error('EINVAL')); } @@ -449,6 +556,10 @@ module.exports.create = function ( cb(err); }); }, + removeArchivedChannel: function (channelName, cb) { + if (!isValidChannelId(channelName)) { return void cb(new Error('EINVAL')); } + removeArchivedChannel(env, channelName, cb); + }, closeChannel: function (channelName, cb) { if (!isValidChannelId(channelName)) { return void cb(new Error('EINVAL')); } closeChannel(env, channelName, cb); @@ -468,6 +579,16 @@ module.exports.create = function ( if (!isValidChannelId(channelName)) { return void cb(new Error('EINVAL')); } clearChannel(env, channelName, cb); }, + listChannels: function (handler, cb) { + listChannels(env.root, handler, cb); + }, + listArchivedChannels: function (handler, cb) { + listChannels(env.archiveRoot, handler, cb); + }, + archiveChannel: function (channelName, cb) { + if (!isValidChannelId(channelName)) { return void cb(new Error('EINVAL')); } + archiveChannel(env, channelName, cb); + }, log: function (channelName, content, cb) { message(env, channelName, content, cb); }, From 17ce25b666988b996e70db51eb319d0fc2f453af Mon Sep 17 00:00:00 2001 From: ansuz Date: Wed, 12 Jun 2019 16:18:04 +0200 Subject: [PATCH 03/23] add a warning to delete-inactive, which we know has bugs --- scripts/delete-inactive.js | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/scripts/delete-inactive.js b/scripts/delete-inactive.js index 9dbfb0707..1cbe799d4 100644 --- a/scripts/delete-inactive.js +++ b/scripts/delete-inactive.js @@ -7,6 +7,14 @@ const config = require("../lib/load-config"); if (!config.inactiveTime || typeof(config.inactiveTime) !== "number") { return; } +/* Instead of this script you should probably use + evict-inactive.js which moves things to an archive directory + in case the data that would have been deleted turns out to be important. + it also handles removing that archived data after a set period of time + + it only works for channels at the moment, though, and nothing else. +*/ + let inactiveTime = +new Date() - (config.inactiveTime * 24 * 3600 * 1000); let inactiveConfig = { unpinned: true, From ede929f4e48daea3e5dac6fe11fe3e4aa3ac8048 Mon Sep 17 00:00:00 2001 From: ansuz Date: Wed, 12 Jun 2019 16:18:58 +0200 Subject: [PATCH 04/23] implement inactive channel archival and archival expiration --- scripts/evict-inactive.js | 150 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 150 insertions(+) create mode 100644 scripts/evict-inactive.js diff --git a/scripts/evict-inactive.js b/scripts/evict-inactive.js new file mode 100644 index 000000000..51c7eacdc --- /dev/null +++ b/scripts/evict-inactive.js @@ -0,0 +1,150 @@ +var nThen = require("nthen"); + +var Store = require("../storage/file"); +var Pinned = require("./pinned"); +var config = require("../lib/load-config"); + +// the administrator should have set an 'inactiveTime' in their config +// if they didn't, just exit. +if (!config.inactiveTime || typeof(config.inactiveTime) !== "number") { return; } + +// files which have not been changed since before this date can be considered inactive +var inactiveTime = +new Date() - (config.inactiveTime * 24 * 3600 * 1000); + +// files which were archived before this date can be considered safe to remove +var retentionTime = +new Date() - (config.archiveRetentionTime * 24 * 3600 * 1000); + +var store; +var pins; +var Log; +nThen(function (w) { + // load the store which will be used for iterating over channels + // and performing operations like archival and deletion + Store.create(config, w(function (_) { + store = _; + })); // load the list of pinned files so you know which files + // should not be archived or deleted + Pinned.load(function (err, _) { + if (err) { + w.abort(); + return void console.error(err); + } + pins = _; + }, { + pinPath: config.pinPath, + }); + + // load the logging module so that you have a record of which + // files were archived or deleted at what time + var Logger = require("../lib/log"); + Logger.create(config, w(function (_) { + Log = _; + })); +}).nThen(function (w) { + // this block will iterate over archived channels and remove them + // if they've been in cold storage for longer than your configured archive time + + // if the admin has not set an 'archiveRetentionTime', this block makes no sense + // so just skip it + if (typeof(config.archiveRetentionTime) !== "number") { return; } + + // count the number of files which have been removed in this run + var removed = 0; + + var handler = function (err, item) { + if (err) { return void Log.error('EVICT_ARCHIVED_CHANNEL_ITERATION', err); } + // don't mess with files that are freshly stored in cold storage + // based on ctime because that's changed when the file is moved... + if (+new Date(item.ctime) > retentionTime) { return; } + + // but if it's been stored for the configured time... + // expire it + store.removeArchivedChannel(item.channel, w(function (err) { + if (err) { + return void Log.error('EVICT_ARCHIVED_CHANNEL_REMOVAL_ERROR', { + error: err, + channel: item.channel, + }); + } + console.log("removed %s from cold storage", item.channel); + Log.info('EVICT_ARCHIVED_CHANNEL_REMOVAL', item.channel); + removed++; + })); + }; + + // if you hit an error, log it + // otherwise, when there are no more channels to process + // log some stats about how many were removed + var cb = function (err) { + if (err) { + return Log.error('EVICT_ARCHIVED_FINAL_ERROR', err); + } + Log.info('EVICT_ARCHIVED_CHANNELS_REMOVED', removed); + }; + + store.listArchivedChannels(handler, w(cb)); +}).nThen(function (w) { + var removed = 0; + var channels = 0; + var archived = 0; + + var handler = function (err, item) { + channels++; + if (err) { return void Log.error('EVICT_CHANNEL_ITERATION', err); } + // check if the database has any ephemeral channels + // if it does it's because of a bug, and they should be removed + if (item.channel.length === 34) { + return void store.removeChannel(item.channel, w(function (err) { + if (err) { + return void Log.error('EVICT_EPHEMERAL_CHANNEL_REMOVAL_ERROR', { + error: err, + channel: item.channel, + }); + } + Log.info('EVICT_EPHEMERAL_CHANNEL_REMOVAL', item.channel); + })); + } + + // bail out if the channel was modified recently + if (+new Date(item.mtime) > inactiveTime) { return; } + + // ignore the channel if it's pinned + if (pins[item.channel]) { return; } + + // if the server is configured to retain data, archive the channel + if (config.retainData) { + store.archiveChannel(item.channel, w(function (err) { + if (err) { return void Log.error('EVICT_CHANNEL_ARCHIVAL_ERROR', { + error: err, + channel: item.channel, + }); } + Log.info('EVICT_CHANNEL_ARCHIVAL', item.channel); + archived++; + })); + return; + } + + // otherwise remove it + store.removeChannel(item.channel, w(function (err) { + if (err) { return void Log.error('EVICT_CHANNEL_REMOVAL_ERROR', { + error: err, + channel: item.channel, + }); } + Log.info('EVICT_CHANNEL_REMOVAL', item.channel); + removed++; + })); + }; + + var cb = function () { + if (config.retainData) { + return void Log.info('EVICT_CHANNELS_ARCHIVED', archived); + } + return void Log.info('EVICT_CHANNELS_REMOVED', removed); + }; + + store.listChannels(handler, w(cb)); +}).nThen(function () { + // the store will keep this script running if you don't shut it down + store.shutdown(); +}); + From 8ed725f566f4e859930541da0e1e4a54827003a6 Mon Sep 17 00:00:00 2001 From: ansuz Date: Wed, 12 Jun 2019 16:33:14 +0200 Subject: [PATCH 05/23] update the example config to support cold storage --- config/config.example.js | 40 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/config/config.example.js b/config/config.example.js index b896a0363..eb134591c 100644 --- a/config/config.example.js +++ b/config/config.example.js @@ -219,6 +219,29 @@ module.exports = { */ inactiveTime: 90, // days + /* CryptPad can be configured to remove inactive data which has not been pinned. + * Deletion of data is always risky and as an operator you have the choice to + * archive data instead of deleting it outright. Set this value to true if + * you want your server to archive files and false if you want to keep using + * the old behaviour of simply removing files. + * + * WARNING: this is not implemented universally, so at the moment this will + * only apply to the removal of 'channels' due to inactivity. + */ + retainData: true, + + /* As described above, CryptPad offers the ability to archive some data + * instead of deleting it outright. This archived data still takes up space + * and so you'll probably still want to remove these files after a brief period. + * The intent with this feature is to provide a safety net in case of accidental + * deletion. Set this value to the number of days you'd like to retain + * archived data before it's removed permanently. + * + * If 'retainData' is set to false, there will never be any archived data + * to remove. + */ + archiveRetentionTime: 15, + /* Max Upload Size (bytes) * this sets the maximum size of any one file uploaded to the server. * anything larger than this size will be rejected @@ -245,12 +268,21 @@ module.exports = { * ===================== */ /* - CryptPad stores each document in an individual file on your hard drive. - Specify a directory where files should be stored. - It will be created automatically if it does not already exist. - */ + * CryptPad stores each document in an individual file on your hard drive. + * Specify a directory where files should be stored. + * It will be created automatically if it does not already exist. + */ filePath: './datastore/', + /* CryptPad offers the ability to archive data for a configurable period + * before deleting it, allowing a means of recovering data in the event + * that it was deleted accidentally. + * + * To set the location of this archive directory to a custom value, change + * the path below: + */ + archivePath: './data/archive', + /* CryptPad allows logged in users to request that particular documents be * stored by the server indefinitely. This is called 'pinning'. * Pin requests are stored in a pin-store. The location of this store is From dd375a2a220dc9e11af4f8d71c9267828ab2a41f Mon Sep 17 00:00:00 2001 From: ansuz Date: Wed, 12 Jun 2019 16:33:52 +0200 Subject: [PATCH 06/23] every type of storage should have its own nested directory within the archive --- storage/file.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/file.js b/storage/file.js index 84d95a266..86aea935c 100644 --- a/storage/file.js +++ b/storage/file.js @@ -24,7 +24,7 @@ var mkPath = function (env, channelId) { }; var mkArchivePath = function (env, channelId) { - return Path.join(env.archiveRoot, channelId.slice(0, 2), channelId) + '.ndjson'; + return Path.join(env.archiveRoot, 'datastore', channelId.slice(0, 2), channelId) + '.ndjson'; }; var getMetadataAtPath = function (Env, path, cb) { From 11976b262eb92e8e1e4939d4dbb14e0153418570 Mon Sep 17 00:00:00 2001 From: ansuz Date: Wed, 12 Jun 2019 16:34:25 +0200 Subject: [PATCH 07/23] add a note for later --- storage/tasks.js | 1 + 1 file changed, 1 insertion(+) diff --git a/storage/tasks.js b/storage/tasks.js index 0e09d0ad0..22f89d7e2 100644 --- a/storage/tasks.js +++ b/storage/tasks.js @@ -83,6 +83,7 @@ var write = function (env, task, cb) { }; var remove = function (env, path, cb) { + // FIXME COLDSTORAGE? Fs.unlink(path, cb); }; From 4fd490f07f2e4095fcb38e8ddce4888106018e12 Mon Sep 17 00:00:00 2001 From: ansuz Date: Wed, 12 Jun 2019 16:35:08 +0200 Subject: [PATCH 08/23] leave some notes for later --- rpc.js | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/rpc.js b/rpc.js index adc869432..19d482368 100644 --- a/rpc.js +++ b/rpc.js @@ -814,6 +814,7 @@ var clearOwnedChannel = function (Env, channelId, unsafeKey, cb) { return void cb('INSUFFICIENT_PERMISSIONS'); } + // FIXME COLDSTORAGE return void Env.msgStore.clearChannel(channelId, function (e) { cb(e); }); @@ -900,6 +901,10 @@ var removeOwnedChannel = function (Env, channelId, unsafeKey, cb) { if (metadata.owners.indexOf(unsafeKey) === -1) { return void cb('INSUFFICIENT_PERMISSIONS'); } + + // FIXME COLDSTORAGE + // XXX check if 'config.retainData' is set to true + // if so, use msgStore.archiveChannel instead return void Env.msgStore.removeChannel(channelId, function (e) { Log.info('DELETION_CHANNEL_BY_OWNER_RPC', { unsafeKey: unsafeKey, @@ -1430,6 +1435,7 @@ var removeLoginBlock = function (Env, msg, cb) { return void cb('E_INVALID_BLOCK_PATH'); } + // FIXME COLDSTORAGE Fs.unlink(path, function (err) { Log.info('DELETION_BLOCK_BY_OWNER_RPC', { publicKey: publicKey, From c0d908af4df09c05ac8432ab421fdb0470a2c41e Mon Sep 17 00:00:00 2001 From: ansuz Date: Thu, 13 Jun 2019 11:20:53 +0200 Subject: [PATCH 09/23] implement 'unarchiveChannel' --- storage/file.js | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/storage/file.js b/storage/file.js index 86aea935c..1f8c00a0c 100644 --- a/storage/file.js +++ b/storage/file.js @@ -301,6 +301,19 @@ var archiveChannel = function (env, channelName, cb) { Fse.move(currentPath, archivePath, { overwrite: true }, cb); }; +var unarchiveChannel = function (env, channelName, cb) { + // very much like 'archiveChannel' but in the opposite direction + + // the file is currently archived + var currentPath = mkArchivePath(env, channelName); + var unarchivedPath = mkPath(env, channelName); + + // if a file exists in the unarchived path, you probably don't want to clobber its data + // so unlike 'archiveChannel' we won't overwrite. + // Fse.move will call back with EEXIST in such a situation + Fse.move(currentPath, unarchivedPath, cb); +}; + var flushUnusedChannels = function (env, cb, frame) { var currentTime = +new Date(); @@ -589,6 +602,10 @@ module.exports.create = function ( if (!isValidChannelId(channelName)) { return void cb(new Error('EINVAL')); } archiveChannel(env, channelName, cb); }, + unarchiveChannel: function (channelName, cb) { + if (!isValidChannelId(channelName)) { return void cb(new Error('EINVAL')); } + unarchiveChannel(env, channelName, cb); + }, log: function (channelName, content, cb) { message(env, channelName, content, cb); }, From 5a1c25a0dfca12fb3b4836cfb40476290ad638ca Mon Sep 17 00:00:00 2001 From: ansuz Date: Thu, 13 Jun 2019 11:22:20 +0200 Subject: [PATCH 10/23] fix a race condition, shutdown the log when you're done --- scripts/evict-inactive.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/evict-inactive.js b/scripts/evict-inactive.js index 51c7eacdc..1ebec6983 100644 --- a/scripts/evict-inactive.js +++ b/scripts/evict-inactive.js @@ -24,7 +24,7 @@ nThen(function (w) { store = _; })); // load the list of pinned files so you know which files // should not be archived or deleted - Pinned.load(function (err, _) { + Pinned.load(w(function (err, _) { if (err) { w.abort(); return void console.error(err); @@ -32,7 +32,7 @@ nThen(function (w) { pins = _; }, { pinPath: config.pinPath, - }); + })); // load the logging module so that you have a record of which // files were archived or deleted at what time @@ -66,7 +66,6 @@ nThen(function (w) { channel: item.channel, }); } - console.log("removed %s from cold storage", item.channel); Log.info('EVICT_ARCHIVED_CHANNEL_REMOVAL', item.channel); removed++; })); @@ -146,5 +145,6 @@ nThen(function (w) { }).nThen(function () { // the store will keep this script running if you don't shut it down store.shutdown(); + Log.shutdown(); }); From 2605d27e3f20b2ddc09557593facf1304bf0112c Mon Sep 17 00:00:00 2001 From: ansuz Date: Thu, 13 Jun 2019 11:24:01 +0200 Subject: [PATCH 11/23] fix a completely wrong function signature --- scripts/evict-inactive.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/evict-inactive.js b/scripts/evict-inactive.js index 1ebec6983..f23cbd7cd 100644 --- a/scripts/evict-inactive.js +++ b/scripts/evict-inactive.js @@ -30,9 +30,9 @@ nThen(function (w) { return void console.error(err); } pins = _; - }, { + }), { pinPath: config.pinPath, - })); + }); // load the logging module so that you have a record of which // files were archived or deleted at what time From cb0c4ee9448e4a1cd6fe7f812edf208a9b3e35c7 Mon Sep 17 00:00:00 2001 From: ansuz Date: Thu, 13 Jun 2019 11:54:42 +0200 Subject: [PATCH 12/23] don't continue iterating in listChannels until a the current task has called back --- scripts/evict-inactive.js | 64 +++++++++++++++++++----------- scripts/restored-archived.js | 75 ++++++++++++++++++++++++++++++++++++ storage/file.js | 7 ++-- 3 files changed, 120 insertions(+), 26 deletions(-) create mode 100644 scripts/restored-archived.js diff --git a/scripts/evict-inactive.js b/scripts/evict-inactive.js index f23cbd7cd..d1026fd64 100644 --- a/scripts/evict-inactive.js +++ b/scripts/evict-inactive.js @@ -51,97 +51,115 @@ nThen(function (w) { // count the number of files which have been removed in this run var removed = 0; - var handler = function (err, item) { - if (err) { return void Log.error('EVICT_ARCHIVED_CHANNEL_ITERATION', err); } + var handler = function (err, item, cb) { + if (err) { + Log.error('EVICT_ARCHIVED_CHANNEL_ITERATION', err); + return void cb(); + } // don't mess with files that are freshly stored in cold storage // based on ctime because that's changed when the file is moved... - if (+new Date(item.ctime) > retentionTime) { return; } + if (+new Date(item.ctime) > retentionTime) { + return void cb(); + } // but if it's been stored for the configured time... // expire it store.removeArchivedChannel(item.channel, w(function (err) { if (err) { - return void Log.error('EVICT_ARCHIVED_CHANNEL_REMOVAL_ERROR', { + Log.error('EVICT_ARCHIVED_CHANNEL_REMOVAL_ERROR', { error: err, channel: item.channel, }); + return void cb(); } Log.info('EVICT_ARCHIVED_CHANNEL_REMOVAL', item.channel); removed++; + cb(); })); }; // if you hit an error, log it // otherwise, when there are no more channels to process // log some stats about how many were removed - var cb = function (err) { + var done = function (err) { if (err) { return Log.error('EVICT_ARCHIVED_FINAL_ERROR', err); } Log.info('EVICT_ARCHIVED_CHANNELS_REMOVED', removed); }; - store.listArchivedChannels(handler, w(cb)); + store.listArchivedChannels(handler, w(done)); }).nThen(function (w) { var removed = 0; var channels = 0; var archived = 0; - var handler = function (err, item) { + var handler = function (err, item, cb) { channels++; - if (err) { return void Log.error('EVICT_CHANNEL_ITERATION', err); } + if (err) { + Log.error('EVICT_CHANNEL_ITERATION', err); + return void cb(); + } // check if the database has any ephemeral channels // if it does it's because of a bug, and they should be removed if (item.channel.length === 34) { return void store.removeChannel(item.channel, w(function (err) { if (err) { - return void Log.error('EVICT_EPHEMERAL_CHANNEL_REMOVAL_ERROR', { + Log.error('EVICT_EPHEMERAL_CHANNEL_REMOVAL_ERROR', { error: err, channel: item.channel, }); + return void cb(); } Log.info('EVICT_EPHEMERAL_CHANNEL_REMOVAL', item.channel); })); } // bail out if the channel was modified recently - if (+new Date(item.mtime) > inactiveTime) { return; } + if (+new Date(item.mtime) > inactiveTime) { return void cb(); } // ignore the channel if it's pinned - if (pins[item.channel]) { return; } + if (pins[item.channel]) { return void cb(); } // if the server is configured to retain data, archive the channel if (config.retainData) { - store.archiveChannel(item.channel, w(function (err) { - if (err) { return void Log.error('EVICT_CHANNEL_ARCHIVAL_ERROR', { - error: err, - channel: item.channel, - }); } + return void store.archiveChannel(item.channel, w(function (err) { + if (err) { + Log.error('EVICT_CHANNEL_ARCHIVAL_ERROR', { + error: err, + channel: item.channel, + }); + return void cb(); + } Log.info('EVICT_CHANNEL_ARCHIVAL', item.channel); archived++; + cb(); })); - return; } // otherwise remove it store.removeChannel(item.channel, w(function (err) { - if (err) { return void Log.error('EVICT_CHANNEL_REMOVAL_ERROR', { - error: err, - channel: item.channel, - }); } + if (err) { + Log.error('EVICT_CHANNEL_REMOVAL_ERROR', { + error: err, + channel: item.channel, + }); + return void cb(); + } Log.info('EVICT_CHANNEL_REMOVAL', item.channel); removed++; + cb(); })); }; - var cb = function () { + var done = function () { if (config.retainData) { return void Log.info('EVICT_CHANNELS_ARCHIVED', archived); } return void Log.info('EVICT_CHANNELS_REMOVED', removed); }; - store.listChannels(handler, w(cb)); + store.listChannels(handler, w(done)); }).nThen(function () { // the store will keep this script running if you don't shut it down store.shutdown(); diff --git a/scripts/restored-archived.js b/scripts/restored-archived.js new file mode 100644 index 000000000..4cefc764b --- /dev/null +++ b/scripts/restored-archived.js @@ -0,0 +1,75 @@ +var nThen = require("nthen"); + +var Store = require("../storage/file"); +var Pinned = require("./pinned"); +var config = require("../lib/load-config"); + +var store; +var pins; +var Log; +nThen(function (w) { + // load the store which will be used for iterating over channels + // and performing operations like archival and deletion + Store.create(config, w(function (_) { + store = _; + })); // load the list of pinned files so you know which files + // should not be archived or deleted + Pinned.load(w(function (err, _) { + if (err) { + w.abort(); + return void console.error(err); + } + pins = _; + }), { + pinPath: config.pinPath, + }); + + // load the logging module so that you have a record of which + // files were archived or deleted at what time + var Logger = require("../lib/log"); + Logger.create(config, w(function (_) { + Log = _; + })); +}).nThen(function (w) { + // count the number of files which have been restored in this run + var restored = 0; + + var handler = function (err, item, cb) { + if (err) { + Log.error('RESTORE_ARCHIVED_CHANNEL_ITERATION', err); + return void cb(); + } + + // but if it's been stored for the configured time... + // expire it + store.removeArchivedChannel(item.channel, w(function (err) { + if (err) { + Log.error('RESTORE_ARCHIVED_CHANNEL_RESTORATION_ERROR', { + error: err, + channel: item.channel, + }); + return void cb(); + } + Log.info('RESTORE_ARCHIVED_CHANNEL_RESTORATION', item.channel); + restored++; + cb(); + })); + }; + + // if you hit an error, log it + // otherwise, when there are no more channels to process + // log some stats about how many were removed + var done = function (err) { + if (err) { + return Log.error('RESTORE_ARCHIVED_FINAL_ERROR', err); + } + Log.info('RESTORE_ARCHIVED_CHANNELS_RESTORED', restored); + }; + + store.listArchivedChannels(handler, w(done)); +}).nThen(function () { + // the store will keep this script running if you don't shut it down + store.shutdown(); + Log.shutdown(); +}); + diff --git a/storage/file.js b/storage/file.js index 1f8c00a0c..f92600c46 100644 --- a/storage/file.js +++ b/storage/file.js @@ -253,7 +253,8 @@ var listChannels = function (root, handler, cb) { // otherwise throw it on the pile sema.take(function (give) { - Fs.stat(filepath, w(give(function (err, stats) { + var next = give(); + Fs.stat(filepath, w(function (err, stats) { if (err) { return void handler(err); } @@ -264,8 +265,8 @@ var listChannels = function (root, handler, cb) { mtime: stats.mtime, ctime: stats.ctime, size: stats.size, - }); - }))); + }, next); + })); }); }); }))); From 7e455e6fceeaaefa28b9c02db92dad01917d2c5a Mon Sep 17 00:00:00 2001 From: ansuz Date: Thu, 13 Jun 2019 12:03:46 +0200 Subject: [PATCH 13/23] drop unnecessary code, restore instead of remove archived files in restoration script, change restore archived name --- ...{restored-archived.js => restore-archived.js} | 16 ++-------------- storage/file.js | 2 +- 2 files changed, 3 insertions(+), 15 deletions(-) rename scripts/{restored-archived.js => restore-archived.js} (81%) diff --git a/scripts/restored-archived.js b/scripts/restore-archived.js similarity index 81% rename from scripts/restored-archived.js rename to scripts/restore-archived.js index 4cefc764b..3b516eb2b 100644 --- a/scripts/restored-archived.js +++ b/scripts/restore-archived.js @@ -1,28 +1,16 @@ var nThen = require("nthen"); var Store = require("../storage/file"); -var Pinned = require("./pinned"); var config = require("../lib/load-config"); var store; -var pins; var Log; nThen(function (w) { // load the store which will be used for iterating over channels // and performing operations like archival and deletion Store.create(config, w(function (_) { store = _; - })); // load the list of pinned files so you know which files - // should not be archived or deleted - Pinned.load(w(function (err, _) { - if (err) { - w.abort(); - return void console.error(err); - } - pins = _; - }), { - pinPath: config.pinPath, - }); + })); // load the logging module so that you have a record of which // files were archived or deleted at what time @@ -42,7 +30,7 @@ nThen(function (w) { // but if it's been stored for the configured time... // expire it - store.removeArchivedChannel(item.channel, w(function (err) { + store.restoreArchivedChannel(item.channel, w(function (err) { if (err) { Log.error('RESTORE_ARCHIVED_CHANNEL_RESTORATION_ERROR', { error: err, diff --git a/storage/file.js b/storage/file.js index f92600c46..01d9e5401 100644 --- a/storage/file.js +++ b/storage/file.js @@ -603,7 +603,7 @@ module.exports.create = function ( if (!isValidChannelId(channelName)) { return void cb(new Error('EINVAL')); } archiveChannel(env, channelName, cb); }, - unarchiveChannel: function (channelName, cb) { + restoreArchivedChannel: function (channelName, cb) { if (!isValidChannelId(channelName)) { return void cb(new Error('EINVAL')); } unarchiveChannel(env, channelName, cb); }, From ddf4a6a7843f7ade177349de1483f1d1a4e6ede9 Mon Sep 17 00:00:00 2001 From: ansuz Date: Thu, 13 Jun 2019 12:07:35 +0200 Subject: [PATCH 14/23] pass the correct path to 'listChannels' for 'listArchivedChannels' --- storage/file.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/storage/file.js b/storage/file.js index 01d9e5401..58a10ba46 100644 --- a/storage/file.js +++ b/storage/file.js @@ -253,7 +253,7 @@ var listChannels = function (root, handler, cb) { // otherwise throw it on the pile sema.take(function (give) { - var next = give(); + var next = w(give()); Fs.stat(filepath, w(function (err, stats) { if (err) { return void handler(err); @@ -597,7 +597,7 @@ module.exports.create = function ( listChannels(env.root, handler, cb); }, listArchivedChannels: function (handler, cb) { - listChannels(env.archiveRoot, handler, cb); + listChannels(Path.join(env.archiveRoot, 'datastore'), handler, cb); }, archiveChannel: function (channelName, cb) { if (!isValidChannelId(channelName)) { return void cb(new Error('EINVAL')); } From ac3a58cc3873e1d3b2c278951aefd79389226a08 Mon Sep 17 00:00:00 2001 From: ansuz Date: Thu, 13 Jun 2019 15:03:36 +0200 Subject: [PATCH 15/23] fix a deadlock by returning the lock after clearing ephemeral channels --- scripts/evict-inactive.js | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/evict-inactive.js b/scripts/evict-inactive.js index d1026fd64..ff9a3c343 100644 --- a/scripts/evict-inactive.js +++ b/scripts/evict-inactive.js @@ -112,6 +112,7 @@ nThen(function (w) { return void cb(); } Log.info('EVICT_EPHEMERAL_CHANNEL_REMOVAL', item.channel); + cb(); })); } From 6aa6b706a41bb0fa2454211a3163e44964d819de Mon Sep 17 00:00:00 2001 From: ansuz Date: Thu, 13 Jun 2019 16:36:22 +0200 Subject: [PATCH 16/23] don't release until you think about this XXX a bit more --- storage/file.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/file.js b/storage/file.js index 58a10ba46..60c90c203 100644 --- a/storage/file.js +++ b/storage/file.js @@ -253,7 +253,7 @@ var listChannels = function (root, handler, cb) { // otherwise throw it on the pile sema.take(function (give) { - var next = w(give()); + var next = w(give()); // XXX validate that it's ok to 'waitFor' here instead of outside the 'take' Fs.stat(filepath, w(function (err, stats) { if (err) { return void handler(err); From a2c8ec963ce3715c9215bbbb1f0fa382ecfc9a6c Mon Sep 17 00:00:00 2001 From: ansuz Date: Thu, 13 Jun 2019 16:44:27 +0200 Subject: [PATCH 17/23] archive files deleted by 'removeOwnedChannel' if the admin has configured that behaviour --- rpc.js | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/rpc.js b/rpc.js index 19d482368..a69033cc1 100644 --- a/rpc.js +++ b/rpc.js @@ -902,9 +902,19 @@ var removeOwnedChannel = function (Env, channelId, unsafeKey, cb) { return void cb('INSUFFICIENT_PERMISSIONS'); } - // FIXME COLDSTORAGE - // XXX check if 'config.retainData' is set to true - // if so, use msgStore.archiveChannel instead + // if the admin has configured data retention... + // temporarily archive the file instead of removing it + if (Env.retainData) { + return void Env.msgStore.archiveChannel(channelId, function (e) { + Log.info('ARCHIVAL_CHANNEL_BY_OWNER_RPC', { + unsafeKey: unsafeKey, + channelId: channelId, + status: e? String(e): 'SUCCESS', + }); + cb(e); + }); + } + return void Env.msgStore.removeChannel(channelId, function (e) { Log.info('DELETION_CHANNEL_BY_OWNER_RPC', { unsafeKey: unsafeKey, @@ -1651,6 +1661,7 @@ RPC.create = function ( }; var Env = { + retainData: config.retainData || false, defaultStorageLimit: config.defaultStorageLimit, maxUploadSize: config.maxUploadSize || (20 * 1024 * 1024), Sessions: {}, From 58cee9e13dd8a1efa9fd5d461eabce960434156c Mon Sep 17 00:00:00 2001 From: ansuz Date: Thu, 13 Jun 2019 16:48:12 +0200 Subject: [PATCH 18/23] remove an XXX because the code looks safe --- storage/file.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/file.js b/storage/file.js index 60c90c203..58a10ba46 100644 --- a/storage/file.js +++ b/storage/file.js @@ -253,7 +253,7 @@ var listChannels = function (root, handler, cb) { // otherwise throw it on the pile sema.take(function (give) { - var next = w(give()); // XXX validate that it's ok to 'waitFor' here instead of outside the 'take' + var next = w(give()); Fs.stat(filepath, w(function (err, stats) { if (err) { return void handler(err); From 3e4ee751373f573702684f815b506f73f836edde Mon Sep 17 00:00:00 2001 From: yflory Date: Fri, 14 Jun 2019 10:03:52 +0200 Subject: [PATCH 19/23] Fix race condition with pins --- www/common/outer/async-store.js | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/www/common/outer/async-store.js b/www/common/outer/async-store.js index 19b4a4fe4..cfa071ec1 100644 --- a/www/common/outer/async-store.js +++ b/www/common/outer/async-store.js @@ -327,15 +327,6 @@ define([ account.note = obj.note; cb(obj); }); - - arePinsSynced(function (err, yes) { - if (!yes) { - resetPins(function (err) { - if (err) { return console.error(err); } - console.log('RESET DONE'); - }); - } - }); }); }); }; @@ -1666,6 +1657,15 @@ define([ loadUniversal(Profile, 'profile', waitFor); cleanFriendRequests(); }).nThen(function () { + arePinsSynced(function (err, yes) { + if (!yes) { + resetPins(function (err) { + if (err) { return console.error(err); } + console.log('RESET DONE'); + }); + } + }); + var requestLogin = function () { broadcast([], "REQUEST_LOGIN"); }; From 13738abc03e3e87845bd89be993d070bd00eaede Mon Sep 17 00:00:00 2001 From: ansuz Date: Fri, 14 Jun 2019 11:37:34 +0200 Subject: [PATCH 20/23] remove an incorrect comment --- scripts/restore-archived.js | 2 -- 1 file changed, 2 deletions(-) diff --git a/scripts/restore-archived.js b/scripts/restore-archived.js index 3b516eb2b..a420e35e5 100644 --- a/scripts/restore-archived.js +++ b/scripts/restore-archived.js @@ -28,8 +28,6 @@ nThen(function (w) { return void cb(); } - // but if it's been stored for the configured time... - // expire it store.restoreArchivedChannel(item.channel, w(function (err) { if (err) { Log.error('RESTORE_ARCHIVED_CHANNEL_RESTORATION_ERROR', { From 1e5248ff9dd4bd41b8eea301faeca02251c88ceb Mon Sep 17 00:00:00 2001 From: ansuz Date: Fri, 14 Jun 2019 11:38:16 +0200 Subject: [PATCH 21/23] implement methods for checking if a file exists in the database or the archive --- storage/file.js | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/storage/file.js b/storage/file.js index 58a10ba46..717f4104b 100644 --- a/storage/file.js +++ b/storage/file.js @@ -213,6 +213,21 @@ var removeChannel = function (env, channelName, cb) { Fs.unlink(filename, cb); }; +// pass in the path so we can reuse the same function for archived files +var channelExists = function (filepath, channelName, cb) { + Fs.stat(filepath, function (err, stat) { + if (err) { + if (err.code === 'ENOENT') { + // no, the file doesn't exist + return void cb(void 0, true); + } + return void cb(err); + } + if (!stat.isFile()) { return void cb("E_NOT_FILE"); } + return void cb(void 0, true); + }); +}; + var removeArchivedChannel = function (env, channelName, cb) { var filename = mkArchivePath(env, channelName); Fs.unlink(filename, cb); @@ -596,6 +611,18 @@ module.exports.create = function ( listChannels: function (handler, cb) { listChannels(env.root, handler, cb); }, + isChannelAvailable: function (channelName, cb) { + if (!isValidChannelId(channelName)) { return void cb(new Error('EINVAL')); } + // construct the path + var filepath = mkPath(env, channelName); + channelExists(filepath, channelName, cb); + }, + isChannelArchived: function (channelName, cb) { + if (!isValidChannelId(channelName)) { return void cb(new Error('EINVAL')); } + // construct the path + var filepath = mkArchivePath(env, channelName); + channelExists(filepath, channelName, cb); + }, listArchivedChannels: function (handler, cb) { listChannels(Path.join(env.archiveRoot, 'datastore'), handler, cb); }, From f5a2627d86a83966527157d8f86ef2050d68dbc7 Mon Sep 17 00:00:00 2001 From: ansuz Date: Fri, 14 Jun 2019 11:48:15 +0200 Subject: [PATCH 22/23] implement script to diagnose conflicts between the production database and the archive --- scripts/diagnose-archive-conflicts.js | 63 +++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 scripts/diagnose-archive-conflicts.js diff --git a/scripts/diagnose-archive-conflicts.js b/scripts/diagnose-archive-conflicts.js new file mode 100644 index 000000000..8617150fc --- /dev/null +++ b/scripts/diagnose-archive-conflicts.js @@ -0,0 +1,63 @@ +var nThen = require("nthen"); + +var Store = require("../storage/file"); +var config = require("../lib/load-config"); + +var store; +var Log; +nThen(function (w) { + // load the store which will be used for iterating over channels + // and performing operations like archival and deletion + Store.create(config, w(function (_) { + store = _; + })); + + // load the logging module so that you have a record of which + // files were archived or deleted at what time + var Logger = require("../lib/log"); + Logger.create(config, w(function (_) { + Log = _; + })); +}).nThen(function (w) { + // count the number of files which have been restored in this run + var conflicts = 0; + + var handler = function (err, item, cb) { + if (err) { + Log.error('DIAGNOSE_ARCHIVE_CONFLICTS_ITERATION', err); + return void cb(); + } + + // check if such a file exists on the server + store.isChannelAvailable(item.channel, function (err, available) { + // weird edge case? + if (err) { return void cb(); } + + // the channel doesn't exist in the database + if (!available) { return void cb(); } + + // the channel is available + // that means it's a duplicate of something in the archive + conflicts++; + Log.info('DIAGNOSE_ARCHIVE_CONFLICT_DETECTED', item.channel); + cb(); + }); + }; + + // if you hit an error, log it + // otherwise, when there are no more channels to process + // log some stats about how many were removed + var done = function (err) { + if (err) { + return Log.error('DIAGNOSE_ARCHIVE_CONFLICTS_FINAL_ERROR', err); + } + Log.info('DIAGNOSE_ARCHIVE_CONFLICTS_COUNT', conflicts); + }; + + store.listArchivedChannels(handler, w(done)); +}).nThen(function () { + // the store will keep this script running if you don't shut it down + store.shutdown(); + Log.shutdown(); +}); + From d19b704bb8c2023fab12627602669608d319051c Mon Sep 17 00:00:00 2001 From: ansuz Date: Fri, 14 Jun 2019 13:38:09 +0200 Subject: [PATCH 23/23] fix an inverted boolean --- storage/file.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/file.js b/storage/file.js index 717f4104b..99272fc6f 100644 --- a/storage/file.js +++ b/storage/file.js @@ -219,7 +219,7 @@ var channelExists = function (filepath, channelName, cb) { if (err) { if (err.code === 'ENOENT') { // no, the file doesn't exist - return void cb(void 0, true); + return void cb(void 0, false); } return void cb(err); }