From 0ea300638adaa56f3b274611b459014357cd5320 Mon Sep 17 00:00:00 2001 From: ansuz Date: Thu, 26 Aug 2021 19:50:26 +0530 Subject: [PATCH] generate a report of consistently duplicated translation keys --- .../find-duplicate-translations.js | 116 +++++++++++++----- 1 file changed, 85 insertions(+), 31 deletions(-) diff --git a/scripts/translations/find-duplicate-translations.js b/scripts/translations/find-duplicate-translations.js index f8b25497c..8306fc70c 100644 --- a/scripts/translations/find-duplicate-translations.js +++ b/scripts/translations/find-duplicate-translations.js @@ -1,55 +1,109 @@ -var Util = require("../lib/common-util"); -var EN = Util.clone(require("../www/common/translations/messages.json")); -var FR = Util.clone(require("../www/common/translations/messages.fr.json")); -var DE = Util.clone(require("../www/common/translations/messages.de.json")); -var JP = Util.clone(require("../www/common/translations/messages.ja.json")); - -var keys = Object.keys(EN); - -var duplicates = {}; +var Assert = require("assert"); +var Util = require("../../lib/common-util"); var addIfAbsent = function (A, e) { if (A.includes(e)) { return; } A.push(e); }; -var markDuplicate = function (value, key1, key2) { - //console.log("[%s] === [%s] (%s)", key1, key2, value); - if (!Array.isArray(duplicates[value])) { - duplicates[value] = []; - } - addIfAbsent(duplicates[value], key1); - addIfAbsent(duplicates[value], key2); -}; -keys.forEach(function (key) { - var value = EN[key]; +var findDuplicates = function (map) { + var keys = Object.keys(map); + - //var duplicates = []; - keys.forEach(function (key2) { - if (key === key2) { return; } - var value2 = EN[key2]; - if (value === value2) { - markDuplicate(value, key, key2); + var duplicates = {}; + var markDuplicate = function (value, key1, key2) { + //console.log("[%s] === [%s] (%s)", key1, key2, value); + if (!Array.isArray(duplicates[value])) { + duplicates[value] = []; } + addIfAbsent(duplicates[value], key1); + addIfAbsent(duplicates[value], key2); + }; + + keys.forEach(function (key) { + var value = map[key]; + + //var duplicates = []; + keys.forEach(function (key2) { + if (key === key2) { return; } + var value2 = map[key2]; + if (value === value2) { + markDuplicate(value, key, key2); + } + }); }); -}); -// indicate which strings are duplicated and could potentially be changed to use one key -Object.keys(duplicates).forEach(function (val) { - console.log('\"%s\" => %s', val, JSON.stringify(duplicates[val])); + var temp = {}; + // sort keys and construct a new index using the first key in the sorted array + Object.keys(duplicates).forEach(function (key) { + var val = duplicates[key]; // should be an array + val.sort(); // default js sort + var new_key = val[0]; + temp[new_key] = val; + }); + + var canonical = {}; + Object.keys(temp).sort().forEach(function (key) { + canonical[key] = temp[key]; + }); + return canonical; +}; + +var logDuplicates = function (duplicates) { + // indicate which strings are duplicated and could potentially be changed to use one key + Object.keys(duplicates).forEach(function (val) { + console.log('\"%s\" => %s', val, JSON.stringify(duplicates[val])); + }); +}; + +var FULL_LANGUAGES = { + EN: Util.clone(require("../../www/common/translations/messages.json")), + FR: Util.clone(require("../../www/common/translations/messages.fr.json")), + DE: Util.clone(require("../../www/common/translations/messages.de.json")), + JP: Util.clone(require("../../www/common/translations/messages.ja.json")), +}; + +var DUPLICATES = {}; + +Object.keys(FULL_LANGUAGES).forEach(function (code) { + DUPLICATES[code] = findDuplicates(FULL_LANGUAGES[code]); }); -// TODO iterate over all languages and +var extraneousKeys = 0; // 1) check whether the same mapping exists across languages // ie. English has "Open" (verb) and "Open" (adjective) // while French has "Ouvrir" and "Ouvert(s)" // such keys should not be simplified/deduplicated +Object.keys(DUPLICATES.EN).forEach(function (key) { + var reference = DUPLICATES.EN[key]; + if (!['FR', 'DE', 'JP'].every(function (code) { + try { + Assert.deepEqual(reference, DUPLICATES[code][key]); + } catch (err) { + return false; + } + return true; + })) { + return; + } + console.log("The key [%s] (\"%s\") is duplicated identically across all fully supported languages", key, FULL_LANGUAGES.EN[key]); + console.log("Values:", JSON.stringify(['EN', 'FR', 'DE', 'JP'].map(function (code) { + return FULL_LANGUAGES[code][key]; + }))); + console.log("Keys:", JSON.stringify(reference)); + console.log(); + extraneousKeys += reference.length - 1; + //console.log("\n" + code + "\n==\n"); + //logDuplicates(map); +}); + +console.log("Total extraneous keys: %s", extraneousKeys); +// TODO // find instances where // one of the duplicated keys is not translated // perhaps we could automatically use the translated one everywhere // and improve the completeness of translations -