From 90f046f8960f216f86a646d9938b42a4639e3308 Mon Sep 17 00:00:00 2001 From: ansuz Date: Wed, 14 Oct 2020 17:18:57 +0530 Subject: [PATCH 1/4] prevent long-running worker tasks from timing out --- lib/workers/index.js | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/lib/workers/index.js b/lib/workers/index.js index ddee0ddde..d0a9a66f5 100644 --- a/lib/workers/index.js +++ b/lib/workers/index.js @@ -81,7 +81,8 @@ Workers.initialize = function (Env, config, _cb) { }; var drained = true; - var sendCommand = function (msg, _cb) { + var sendCommand = function (msg, _cb, opt) { + opt = opt || {}; var index = getAvailableWorkerIndex(); var state = workers[index]; @@ -119,7 +120,9 @@ Workers.initialize = function (Env, config, _cb) { delete state.tasks[txid]; }))); - response.expect(txid, cb, 180000); + // default to timing out affter 180s if no explicit timeout is passed + var timeout = typeof(opt.timeout) !== 'undefined'? opt.timeout: 180000; + response.expect(txid, cb, timeout); state.worker.send(msg); }; @@ -354,13 +357,17 @@ Workers.initialize = function (Env, config, _cb) { Env.evictInactive = function (cb) { sendCommand({ command: 'EVICT_INACTIVE', - }, cb); + }, cb, { + timeout: 1000 * 60 * 300, // time out after 300 minutes (5 hours) + }); }; Env.runTasks = function (cb) { sendCommand({ command: 'RUN_TASKS', - }, cb); + }, cb, { + timeout: 1000 * 60 * 10, // time out after 10 minutes + }); }; Env.writeTask = function (time, command, args, cb) { From a29c5641b8953a7cb184c5e7e63e30d0fabc0920 Mon Sep 17 00:00:00 2001 From: ansuz Date: Wed, 14 Oct 2020 17:22:31 +0530 Subject: [PATCH 2/4] introduce some artificial delays in the eviction script ...and reduce the margin of error in its bloom filters --- lib/eviction.js | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/lib/eviction.js b/lib/eviction.js index 979848bc6..854071574 100644 --- a/lib/eviction.js +++ b/lib/eviction.js @@ -81,7 +81,9 @@ module.exports = function (Env, cb) { TODO make this configurable ? */ var BLOOM_CAPACITY = (1 << 20) - 1; // over a million items - var BLOOM_ERROR = 1 / 1000; // an error rate of one in a thousand + var BLOOM_ERROR = 1 / 10000; // an error rate of one in a thousand + // the number of ms artificially introduced between CPU-intensive operations + var THROTTLE_FACTOR = 10; // we'll use one filter for the set of active documents var activeDocs = Bloom.optimalFilter(BLOOM_CAPACITY, BLOOM_ERROR); @@ -165,6 +167,7 @@ module.exports = function (Env, cb) { // if they are older than the specified retention time var removed = 0; blobs.list.archived.proofs(function (err, item, next) { + next = Util.mkAsync(next, THROTTLE_FACTOR); if (err) { Log.error("EVICT_BLOB_LIST_ARCHIVED_PROOF_ERROR", err); return void next(); @@ -190,6 +193,7 @@ module.exports = function (Env, cb) { // if they are older than the specified retention time var removed = 0; blobs.list.archived.blobs(function (err, item, next) { + next = Util.mkAsync(next, THROTTLE_FACTOR); if (err) { Log.error("EVICT_BLOB_LIST_ARCHIVED_BLOBS_ERROR", err); return void next(); @@ -246,6 +250,7 @@ module.exports = function (Env, cb) { var active = 0; blobs.list.blobs(function (err, item, next) { + next = Util.mkAsync(next, THROTTLE_FACTOR); n_blobs++; if (err) { Log.error("EVICT_BLOB_CATEGORIZATION", err); @@ -311,6 +316,7 @@ module.exports = function (Env, cb) { // otherwise, we'll only retain data from active accounts // so we need more heuristics var handler = function (content, id, next) { + next = Util.mkAsync(next, THROTTLE_FACTOR); accounts++; var mtime = content.latest; @@ -416,6 +422,7 @@ module.exports = function (Env, cb) { // if they don't correspond to a pinned or active file var removed = 0; blobs.list.proofs(function (err, item, next) { + next = Util.mkAsync(next, THROTTLE_FACTOR); if (err) { next(); return void Log.error("EVICT_BLOB_LIST_PROOFS_ERROR", err); @@ -458,6 +465,7 @@ module.exports = function (Env, cb) { var archived = 0; var handler = function (err, item, cb) { + cb = Util.mkAsync(cb, THROTTLE_FACTOR); channels++; if (err) { Log.error('EVICT_CHANNEL_ITERATION', err); From ac322c8e821d8afd08af6622f2ce477795485c27 Mon Sep 17 00:00:00 2001 From: ansuz Date: Wed, 14 Oct 2020 17:23:53 +0530 Subject: [PATCH 3/4] make the eviction script call back with accumulated statistics --- lib/eviction.js | 49 +++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 43 insertions(+), 6 deletions(-) diff --git a/lib/eviction.js b/lib/eviction.js index 854071574..22f5dbee8 100644 --- a/lib/eviction.js +++ b/lib/eviction.js @@ -32,6 +32,25 @@ Env = { module.exports = function (Env, cb) { var complete = Util.once(Util.mkAsync(cb)); + var report = { + // archivedChannelsRemoved, + // archivedAccountsRemoved, + // archivedBlobProofsRemoved, + // archivedBlobsRemoved, + + // totalChannels, + // activeChannels, + + // totalBlobs, + // activeBlobs, + + // totalAccounts, + // activeAccounts, + + // channelsArchived, + + // runningTime, + }; // the administrator should have set an 'inactiveTime' in their config // if they didn't, just exit. @@ -154,6 +173,8 @@ module.exports = function (Env, cb) { if (err) { return Log.error('EVICT_ARCHIVED_FINAL_ERROR', err); } + report.archivedChannelsRemoved = removed; + report.archivedAccountsRemoved = accounts; Log.info('EVICT_ARCHIVED_CHANNELS_REMOVED', removed); Log.info('EVICT_ARCHIVED_ACCOUNTS_REMOVED', accounts); }; @@ -183,6 +204,7 @@ module.exports = function (Env, cb) { next(); })); }, w(function () { + report.archivedBlobProofsRemoved = removed; Log.info('EVICT_ARCHIVED_BLOB_PROOFS_REMOVED', removed); })); }; @@ -209,6 +231,7 @@ module.exports = function (Env, cb) { next(); }); }, w(function () { + report.archivedBlobsRemoved = removed; Log.info('EVICT_ARCHIVED_BLOBS_REMOVED', removed); })); }; @@ -236,6 +259,8 @@ module.exports = function (Env, cb) { }; var done = function () { + report.activeChannels = active; + report.totalChannels = channels; Log.info('EVICT_CHANNELS_CATEGORIZED', { active: active, channels: channels, @@ -267,6 +292,8 @@ module.exports = function (Env, cb) { } next(); }, w(function () { + report.totalBlobs = n_blobs; + report.activeBlobs = active; Log.info('EVICT_BLOBS_CATEGORIZED', { active: active, blobs: n_blobs, @@ -333,6 +360,10 @@ module.exports = function (Env, cb) { // we plan to delete them, because it may be interesting information inactive++; if (PRESERVE_INACTIVE_ACCOUNTS) { + Log.info('EVICT_INACTIVE_ACCOUNT_PRESERVED', { + id: id, + mtime: mtime, + }); pinAll(pinList); return void next(); } @@ -362,9 +393,8 @@ module.exports = function (Env, cb) { "EVICT_COUNT_ACCOUNTS": "EVICT_INACTIVE_ACCOUNTS"; - // update the number of known active accounts in Env for statistics - Env.knownActiveAccounts = accounts - inactive; - + report.totalAccounts = accounts; + report.activeAccounts = accounts - inactive; Log.info(label, { accounts: accounts, inactive: inactive, @@ -381,7 +411,9 @@ module.exports = function (Env, cb) { // iterate over blobs and remove them // if they have not been accessed within the specified retention time var removed = 0; + var total = 0; blobs.list.blobs(function (err, item, next) { + next = Util.mkAsync(next, THROTTLE_FACTOR); if (err) { Log.error("EVICT_BLOB_LIST_BLOBS_ERROR", err); return void next(); @@ -390,6 +422,7 @@ module.exports = function (Env, cb) { next(); return void Log.error('EVICT_BLOB_LIST_BLOBS_NO_ITEM', item); } + total++; if (pinnedDocs.test(item.blobId)) { return void next(); } if (activeDocs.test(item.blobId)) { return void next(); } @@ -398,6 +431,7 @@ module.exports = function (Env, cb) { // unless we address this race condition with this last-minute double-check if (getNewestTime(item) > inactiveTime) { return void next(); } + removed++; blobs.archive.blob(item.blobId, function (err) { if (err) { Log.error("EVICT_ARCHIVE_BLOB_ERROR", { @@ -409,10 +443,11 @@ module.exports = function (Env, cb) { Log.info("EVICT_ARCHIVE_BLOB", { item: item, }); - removed++; next(); }); }, w(function () { + report.totalBlobs = total; + report.activeBlobs = total - removed; Log.info('EVICT_BLOBS_REMOVED', removed); })); }; @@ -522,6 +557,7 @@ module.exports = function (Env, cb) { }; var done = function () { + report.channelsArchived = archived; return void Log.info('EVICT_CHANNELS_ARCHIVED', archived); }; @@ -547,8 +583,9 @@ module.exports = function (Env, cb) { .nThen(archiveInactiveBlobProofs) .nThen(archiveInactiveChannels) .nThen(function () { - Log.info("EVICT_TIME_TO_RUN_SCRIPT", msSinceStart()); + var runningTime = report.runningTime = msSinceStart(); + Log.info("EVICT_TIME_TO_RUN_SCRIPT", runningTime); }).nThen(function () { - complete(); + complete(void 0, report); }); }; From faa7ebf399f368ad6f8df1b69d8971ea65603038 Mon Sep 17 00:00:00 2001 From: ansuz Date: Thu, 15 Oct 2020 13:11:09 +0530 Subject: [PATCH 4/4] Fix some bugs with integrated eviction 1. implement SET_LAST_EVICTION as an admin command, not a decree 2. expect a return value from Env.evictInactive and expose it via Env.evictionReport --- lib/commands/admin-rpc.js | 20 ++++++++++++++++++-- lib/decrees.js | 3 --- lib/env.js | 2 +- lib/eviction.js | 1 + lib/historyKeeper.js | 6 +++++- 5 files changed, 25 insertions(+), 7 deletions(-) diff --git a/lib/commands/admin-rpc.js b/lib/commands/admin-rpc.js index 9278c09a6..e67f75f6c 100644 --- a/lib/commands/admin-rpc.js +++ b/lib/commands/admin-rpc.js @@ -211,6 +211,21 @@ the server adds two pieces of information to the supplied decree: Decrees.write(Env, decree, cb); }; +// CryptPad_AsyncStore.rpc.send('ADMIN', ['SET_LAST_EVICTION', 0], console.log) +var setLastEviction = function (Env, Server, cb, data, unsafeKey) { + var time = data && data[1]; + if (typeof(time) !== 'number') { + return void cb('INVALID_ARGS'); + } + + Env.lastEviction = time; + cb(); + Env.Log.info('LAST_EVICTION_TIME_SET', { + author: unsafeKey, + time: time, + }); +}; + // CryptPad_AsyncStore.rpc.send('ADMIN', ['INSTANCE_STATUS], console.log) var instanceStatus = function (Env, Server, cb) { cb(void 0, { @@ -225,8 +240,8 @@ var instanceStatus = function (Env, Server, cb) { defaultStorageLimit: Env.defaultStorageLimit, lastEviction: Env.lastEviction, - // FIXME eviction is run in a worker and this isn't returned - //knownActiveAccounts: Env.knownActiveAccounts, + evictionReport: Env.evictionReport, + disableIntegratedEviction: Env.disableIntegratedEviction, disableIntegratedTasks: Env.disableIntegratedTasks, @@ -257,6 +272,7 @@ var commands = { ADMIN_DECREE: adminDecree, INSTANCE_STATUS: instanceStatus, GET_LIMITS: getLimits, + SET_LAST_EVICTION: setLastEviction, }; Admin.command = function (Env, safeKey, data, _cb, Server) { diff --git a/lib/decrees.js b/lib/decrees.js index d7983c79c..2672efdd3 100644 --- a/lib/decrees.js +++ b/lib/decrees.js @@ -112,9 +112,6 @@ commands.SET_PREMIUM_UPLOAD_SIZE = makeIntegerSetter('premiumUploadSize'); // CryptPad_AsyncStore.rpc.send('ADMIN', [ 'ADMIN_DECREE', ['UPDATE_DEFAULT_STORAGE', [100 * 1024 * 1024]]], console.log) commands.UPDATE_DEFAULT_STORAGE = makeIntegerSetter('defaultStorageLimit'); -// CryptPad_AsyncStore.rpc.send('ADMIN', [ 'ADMIN_DECREE', ['SET_LAST_EVICTION', [0]]], console.log) -commands.SET_LAST_EVICTION = makeIntegerSetter('lastEviction'); - // CryptPad_AsyncStore.rpc.send('ADMIN', [ 'ADMIN_DECREE', ['SET_INACTIVE_TIME', [90]]], console.log) commands.SET_INACTIVE_TIME = makeIntegerSetter('inactiveTime'); diff --git a/lib/env.js b/lib/env.js index 8cc45fcc4..97ebd20ce 100644 --- a/lib/env.js +++ b/lib/env.js @@ -92,7 +92,7 @@ module.exports.create = function (config) { disableIntegratedTasks: config.disableIntegratedTasks || false, disableIntegratedEviction: config.disableIntegratedEviction || false, lastEviction: +new Date(), - knownActiveAccounts: 0, + evictionReport: {}, }; (function () { diff --git a/lib/eviction.js b/lib/eviction.js index 22f5dbee8..84db87d13 100644 --- a/lib/eviction.js +++ b/lib/eviction.js @@ -49,6 +49,7 @@ module.exports = function (Env, cb) { // channelsArchived, + launchTime: +new Date(), // runningTime, }; diff --git a/lib/historyKeeper.js b/lib/historyKeeper.js index fc71c9856..d95c0e99f 100644 --- a/lib/historyKeeper.js +++ b/lib/historyKeeper.js @@ -191,13 +191,17 @@ module.exports.create = function (Env, cb) { // evict inactive data once per day if ((now - ONE_DAY) < Env.lastEviction) { return; } active = true; - Env.evictInactive(function (err) { + Env.evictInactive(function (err, report) { if (err) { // NO_INACTIVE_TIME Log.error('EVICT_INACTIVE_MAIN_ERROR', err); } active = false; Env.lastEviction = now; + if (report) { + Log.info('EVICT_INACTIVE_REPORT', report); + } + Env.evictionReport = report || {}; }); }, 60 * 1000); }).nThen(function () {