Merge branch 'staging' of github.com:xwiki-labs/cryptpad into staging
commit
adc64f0c6e
@ -0,0 +1,235 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
There are many situations where we want to do lots of little jobs
|
||||||
|
in parallel and with few constraints as to their ordering.
|
||||||
|
|
||||||
|
One example is recursing over a bunch of directories and reading files.
|
||||||
|
The naive way to do this is to recurse over all the subdirectories
|
||||||
|
relative to a root while adding files to a list. Then to iterate over
|
||||||
|
the files in that list. Unfortunately, this means holding the complete
|
||||||
|
list of file paths in memory, which can't possible scale as our database grows.
|
||||||
|
|
||||||
|
A better way to do this is to recurse into one directory and
|
||||||
|
iterate over its contents until there are no more, then to backtrack
|
||||||
|
to the next directory and repeat until no more directories exist.
|
||||||
|
This kind of thing is easy enough when you perform one task at a time
|
||||||
|
and use synchronous code, but with multiple asynchronous tasks it's
|
||||||
|
easy to introduce subtle bugs.
|
||||||
|
|
||||||
|
This module is designed for these situations. It allows you to easily
|
||||||
|
and efficiently schedule a large number of tasks with an associated
|
||||||
|
degree of priority from 0 (highest priority) to Number.MAX_SAFE_INTEGER.
|
||||||
|
|
||||||
|
Initialize your scheduler with a degree of parallelism, and start planning
|
||||||
|
some initial jobs. Set it to run and it will keep going until all jobs are
|
||||||
|
complete, at which point it will optionally execute a 'done' callback.
|
||||||
|
|
||||||
|
Getting back to the original example:
|
||||||
|
|
||||||
|
List the contents of the root directory, then plan subsequent jobs
|
||||||
|
with a priority of 1 to recurse into subdirectories. The callback
|
||||||
|
of each of these recursions can then plan higher priority tasks
|
||||||
|
to actually process the contained files with a priority of 0.
|
||||||
|
|
||||||
|
As long as there are more files scheduled it will continue to process
|
||||||
|
them first. When there are no more files the scheduler will read
|
||||||
|
the next directory and repopulate the list of files to process.
|
||||||
|
This will repeat until everything is done.
|
||||||
|
|
||||||
|
// load the module
|
||||||
|
const Plan = require("./plan");
|
||||||
|
|
||||||
|
// instantiate a scheduler with a parallelism of 5
|
||||||
|
var plan = Plan(5)
|
||||||
|
|
||||||
|
// plan the first job which schedules more jobs...
|
||||||
|
.job(1, function (next) {
|
||||||
|
listRootDirectory(function (files) {
|
||||||
|
files.forEach(function (file) {
|
||||||
|
// highest priority, run as soon as there is a free worker
|
||||||
|
plan.job(0, function (next) {
|
||||||
|
processFile(file, function (result) {
|
||||||
|
console.log(result);
|
||||||
|
// don't forget to call next
|
||||||
|
next();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
next(); // call 'next' to free up one worker
|
||||||
|
});
|
||||||
|
})
|
||||||
|
// chain commands together if you want
|
||||||
|
.done(function () {
|
||||||
|
console.log("DONE");
|
||||||
|
})
|
||||||
|
// it won't run unless you launch it
|
||||||
|
.start();
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
module.exports = function (max) {
|
||||||
|
var plan = {};
|
||||||
|
max = max || 5;
|
||||||
|
|
||||||
|
// finds an id that isn't in use in a particular map
|
||||||
|
// accepts an id in case you have one already chosen
|
||||||
|
// otherwise generates random new ids if one is not passed
|
||||||
|
// or if there is a collision
|
||||||
|
var uid = function (map, id) {
|
||||||
|
if (typeof(id) === 'undefined') {
|
||||||
|
id = Math.floor(Math.random() * Number.MAX_SAFE_INTEGER);
|
||||||
|
}
|
||||||
|
if (id && typeof(map[id]) === 'undefined') {
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
return uid(map);
|
||||||
|
};
|
||||||
|
|
||||||
|
// the queue of jobs is an array, which will be populated
|
||||||
|
// with maps for each level of priority
|
||||||
|
var jobs = [];
|
||||||
|
|
||||||
|
// the count of currently running jobs
|
||||||
|
var count = 0;
|
||||||
|
|
||||||
|
// a list of callbacks to be executed once everything is done
|
||||||
|
var completeHandlers = [];
|
||||||
|
|
||||||
|
// the recommended usage is to create a new scheduler for every job
|
||||||
|
// use it for internals in a scope, and let the garbage collector
|
||||||
|
// clean up when everything stops. This means you shouldn't
|
||||||
|
// go passing 'plan' around in a long-lived process!
|
||||||
|
var FINISHED = false;
|
||||||
|
var done = function () {
|
||||||
|
// 'done' gets called when there are no more jobs in the queue
|
||||||
|
// but other jobs might still be running...
|
||||||
|
|
||||||
|
// the count of running processes should never be less than zero
|
||||||
|
// because we guard against multiple callbacks
|
||||||
|
if (count < 0) { throw new Error("should never happen"); }
|
||||||
|
// greater than zero is definitely possible, it just means you aren't done yet
|
||||||
|
if (count !== 0) { return; }
|
||||||
|
// you will finish twice if you call 'start' a second time
|
||||||
|
// this behaviour isn't supported yet.
|
||||||
|
if (FINISHED) { throw new Error('finished twice'); }
|
||||||
|
FINISHED = true;
|
||||||
|
// execute all your 'done' callbacks
|
||||||
|
completeHandlers.forEach(function (f) { f(); });
|
||||||
|
};
|
||||||
|
|
||||||
|
var run;
|
||||||
|
|
||||||
|
// this 'next' is internal only.
|
||||||
|
// it iterates over all known jobs, running them until
|
||||||
|
// the scheduler achieves the desired amount of parallelism.
|
||||||
|
// If there are no more jobs it will call 'done'
|
||||||
|
// which will shortcircuit if there are still pending tasks.
|
||||||
|
// Whenever any tasks finishes it will return its lock and
|
||||||
|
// run as many new jobs as are allowed.
|
||||||
|
var next = function () {
|
||||||
|
// array.some skips over bare indexes in sparse arrays
|
||||||
|
var pending = jobs.some(function (bag /*, priority*/) {
|
||||||
|
if (!bag || typeof(bag) !== 'object') { return; }
|
||||||
|
// a bag is a map of jobs for any particular degree of priority
|
||||||
|
// iterate over jobs in the bag until you're out of 'workers'
|
||||||
|
for (var id in bag) {
|
||||||
|
// bail out if you hit max parallelism
|
||||||
|
if (count >= max) { return true; }
|
||||||
|
run(bag, id, next);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
// check whether you're done if you hit the end of the array
|
||||||
|
if (!pending) { done(); }
|
||||||
|
};
|
||||||
|
|
||||||
|
// and here's the part that actually handles jobs...
|
||||||
|
run = function (bag, id) {
|
||||||
|
// this is just a sanity check.
|
||||||
|
// there should only ever be jobs in each bag.
|
||||||
|
if (typeof(bag[id]) !== 'function') {
|
||||||
|
throw new Error("expected function");
|
||||||
|
}
|
||||||
|
|
||||||
|
// keep a local reference to the function
|
||||||
|
var f = bag[id];
|
||||||
|
// remove it from the bag.
|
||||||
|
delete bag[id];
|
||||||
|
// increment the count of running jobs
|
||||||
|
count++;
|
||||||
|
|
||||||
|
// guard against it being called twice.
|
||||||
|
var called = false;
|
||||||
|
f(function () {
|
||||||
|
// watch out! it'll bite you.
|
||||||
|
// maybe this should just return?
|
||||||
|
// support that option for 'production' ?
|
||||||
|
if (called) { throw new Error("called twice"); }
|
||||||
|
// the code below is safe because we can't call back a second time
|
||||||
|
called = true;
|
||||||
|
|
||||||
|
// decrement the count of running jobs...
|
||||||
|
count--;
|
||||||
|
|
||||||
|
// and finally call next to replace this worker with more job(s)
|
||||||
|
next();
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
// this is exposed as API
|
||||||
|
plan.job = function (priority, cb) {
|
||||||
|
// you have to pass both the priority (a non-negative number) and an actual job
|
||||||
|
if (typeof(priority) !== 'number' || priority < 0) { throw new Error('expected a non-negative number'); }
|
||||||
|
// a job is an asynchronous function that takes a single parameter:
|
||||||
|
// a 'next' callback which will keep the whole thing going.
|
||||||
|
// forgetting to call 'next' means you'll never complete.
|
||||||
|
if (typeof(cb) !== 'function') { throw new Error('expected function'); }
|
||||||
|
|
||||||
|
// initialize the specified priority level if it doesn't already exist
|
||||||
|
var bag = jobs[priority] = jobs[priority] || {};
|
||||||
|
// choose a random id that isn't already in use for this priority level
|
||||||
|
var id = uid(bag);
|
||||||
|
|
||||||
|
// add the job to this priority level's bag
|
||||||
|
// most (all?) javascript engines will append this job to the bottom
|
||||||
|
// of the map. Meaning when we iterate it will be run later than
|
||||||
|
// other jobs that were scheduled first, effectively making a FIFO queue.
|
||||||
|
// However, this is undefined behaviour and you shouldn't ever rely on it.
|
||||||
|
bag[id] = function (next) {
|
||||||
|
cb(next);
|
||||||
|
};
|
||||||
|
// returning 'plan' lets us chain methods together.
|
||||||
|
return plan;
|
||||||
|
};
|
||||||
|
|
||||||
|
var started = false;
|
||||||
|
plan.start = function () {
|
||||||
|
// don't allow multiple starts
|
||||||
|
// even though it should work, it's simpler not to.
|
||||||
|
if (started) { return plan; }
|
||||||
|
// this seems to imply a 'stop' method
|
||||||
|
// but I don't need it, so I'm not implementing it now --ansuz
|
||||||
|
started = true;
|
||||||
|
|
||||||
|
// start asynchronously, otherwise jobs will start running
|
||||||
|
// before you've had a chance to return 'plan', and weird things
|
||||||
|
// happen.
|
||||||
|
setTimeout(function () {
|
||||||
|
next();
|
||||||
|
});
|
||||||
|
return plan;
|
||||||
|
};
|
||||||
|
|
||||||
|
// you can pass any number of functions to be executed
|
||||||
|
// when all pending jobs are complete.
|
||||||
|
// We don't pass any arguments, so you need to handle return values
|
||||||
|
// yourself if you want them.
|
||||||
|
plan.done = function (f) {
|
||||||
|
if (typeof(f) !== 'function') { throw new Error('expected function'); }
|
||||||
|
completeHandlers.push(f);
|
||||||
|
return plan;
|
||||||
|
};
|
||||||
|
|
||||||
|
// That's all! I hope you had fun reading this!
|
||||||
|
return plan;
|
||||||
|
};
|
||||||
|
|
@ -0,0 +1,45 @@
|
|||||||
|
/*jshint esversion: 6 */
|
||||||
|
const Pins = require("../../lib/pins");
|
||||||
|
|
||||||
|
var stats = {
|
||||||
|
users: 0,
|
||||||
|
lines: 0, // how many lines did you iterate over
|
||||||
|
surplus: 0, // how many of those lines were not needed?
|
||||||
|
pinned: 0, // how many files are pinned?
|
||||||
|
duplicated: 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
var handler = function (ref, id /* safeKey */, pinned) {
|
||||||
|
if (ref.surplus) {
|
||||||
|
//console.log("%s has %s trimmable lines", id, ref.surplus);
|
||||||
|
stats.surplus += ref.surplus;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (var item in ref.pins) {
|
||||||
|
if (!pinned.hasOwnProperty(item)) {
|
||||||
|
//console.log("> %s is pinned", item);
|
||||||
|
stats.pinned++;
|
||||||
|
} else {
|
||||||
|
//console.log("> %s was already pinned", item);
|
||||||
|
stats.duplicated++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
stats.users++;
|
||||||
|
stats.lines += ref.index;
|
||||||
|
//console.log(ref, id);
|
||||||
|
};
|
||||||
|
|
||||||
|
Pins.list(function (err, pinned) {
|
||||||
|
/*
|
||||||
|
for (var id in pinned) {
|
||||||
|
console.log(id);
|
||||||
|
stats.pinned++;
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
console.log(stats);
|
||||||
|
}, {
|
||||||
|
pinPath: require("../../lib/load-config").pinPath,
|
||||||
|
handler: handler,
|
||||||
|
});
|
||||||
|
|
@ -0,0 +1,41 @@
|
|||||||
|
/*jshint esversion: 6 */
|
||||||
|
const Plan = require("../../lib/plan");
|
||||||
|
|
||||||
|
var rand_delay = function (f) {
|
||||||
|
setTimeout(f, Math.floor(Math.random() * 1500) + 250);
|
||||||
|
};
|
||||||
|
|
||||||
|
var plan = Plan(6).job(1, function (next) {
|
||||||
|
[1,2,3,4,5,6,7,8,9,10,11,12].forEach(function (n) {
|
||||||
|
plan.job(0, function (next) {
|
||||||
|
rand_delay(function () {
|
||||||
|
console.log("finishing job %s", n);
|
||||||
|
next();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
console.log("finishing job 0");
|
||||||
|
next();
|
||||||
|
}).job(2, function (next) {
|
||||||
|
console.log("finishing job 13");
|
||||||
|
|
||||||
|
[
|
||||||
|
100,
|
||||||
|
200,
|
||||||
|
300,
|
||||||
|
400
|
||||||
|
].forEach(function (n) {
|
||||||
|
plan.job(3, function (next) {
|
||||||
|
rand_delay(function () {
|
||||||
|
console.log("finishing job %s", n);
|
||||||
|
next();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
next();
|
||||||
|
}).done(function () { console.log("DONE"); }).start();
|
||||||
|
|
||||||
|
//console.log(plan);
|
||||||
|
|
||||||
|
//plan.start();
|
Loading…
Reference in New Issue