Speed up and simplify translations build (#19988)

* Speed up and simplify translations build

- Remove use of gulp-flatmap for merges (wasted input) and just loop over translation files.
- Parse and buffer master only once for all merges.
- Remove lokalise key reference transform from non-English files. This is already done by Lokalise when they are downloaded.
- Remove tabs from merged output to minimize buffer sizes.
- Pipe merges to a hashing stream, removing extra tasks and intermediate file I/O.
- Pipe hashed files to a single custom asynchronous transform stream to fragmentize the files. It expands the stream to push a new file for each fragment.
- Incorporate flattening into fragmentization.
- Delete entire ui.panel key for base translation (instead of leaving an empty object).
- Optimize flatten method to stop copying output over and over.
- Convert empty and test filters to JSON.parse() revivers for simplicity and better performance.
- Incorporate supervisor builds into main tasks using a simple toggle (i.e. remove duplicate code).
- Funcify local tasks and simplify exported tasks.
- Incorporate test metadata task into a simplified metadata task.

* Fix Lokalise key reference link

Co-authored-by: Simon Lamon <32477463+silamon@users.noreply.github.com>

---------

Co-authored-by: Simon Lamon <32477463+silamon@users.noreply.github.com>
This commit is contained in:
Steve Repsher
2024-04-12 06:49:18 -04:00
committed by GitHub
parent dc8a50965c
commit e22e3e88a0
6 changed files with 201 additions and 481 deletions

View File

@@ -1,92 +1,76 @@
import { createHash } from "crypto";
import { deleteSync } from "del";
import { mkdirSync, readdirSync, readFileSync, renameSync } from "fs";
import { writeFile } from "node:fs/promises";
import { deleteAsync } from "del";
import { glob } from "glob";
import gulp from "gulp";
import flatmap from "gulp-flatmap";
import transform from "gulp-json-transform";
import merge from "gulp-merge-json";
import rename from "gulp-rename";
import path from "path";
import vinylBuffer from "vinyl-buffer";
import source from "vinyl-source-stream";
import { createHash } from "node:crypto";
import { mkdir, readFile } from "node:fs/promises";
import { basename, join } from "node:path";
import { Transform } from "node:stream";
import { finished } from "node:stream/promises";
import env from "../env.cjs";
import paths from "../paths.cjs";
import { mapFiles } from "../util.cjs";
import "./fetch-nightly-translations.js";
const inFrontendDir = "translations/frontend";
const inBackendDir = "translations/backend";
const workDir = "build/translations";
const fullDir = workDir + "/full";
const coreDir = workDir + "/core";
const outDir = workDir + "/output";
const outDir = join(workDir, "output");
const EN_SRC = join(paths.translations_src, "en.json");
let mergeBackend = false;
gulp.task(
"translations-enable-merge-backend",
gulp.parallel((done) => {
gulp.parallel(async () => {
mergeBackend = true;
done();
}, "allow-setup-fetch-nightly-translations")
);
// Panel translations which should be split from the core translations.
const TRANSLATION_FRAGMENTS = Object.keys(
JSON.parse(
readFileSync(
path.resolve(paths.polymer_dir, "src/translations/en.json"),
"utf-8"
)
).ui.panel
);
// Transform stream to apply a function on Vinyl JSON files (buffer mode only).
// The provided function can either return a new object, or an array of
// [object, subdirectory] pairs for fragmentizing the JSON.
class CustomJSON extends Transform {
constructor(func, reviver = null) {
super({ objectMode: true });
this._func = func;
this._reviver = reviver;
}
function recursiveFlatten(prefix, data) {
let output = {};
Object.keys(data).forEach((key) => {
if (typeof data[key] === "object") {
output = {
...output,
...recursiveFlatten(prefix + key + ".", data[key]),
};
async _transform(file, _, callback) {
try {
let obj = JSON.parse(file.contents.toString(), this._reviver);
if (this._func) obj = this._func(obj, file.path);
for (const [outObj, dir] of Array.isArray(obj) ? obj : [[obj, ""]]) {
const outFile = file.clone({ contents: false });
outFile.contents = Buffer.from(JSON.stringify(outObj));
outFile.dirname += `/${dir}`;
this.push(outFile);
}
callback(null);
} catch (err) {
callback(err);
}
}
}
// Utility to flatten object keys to single level using separator
const flatten = (data, prefix = "", sep = ".") => {
const output = {};
for (const [key, value] of Object.entries(data)) {
if (typeof value === "object") {
Object.assign(output, flatten(value, prefix + key + sep, sep));
} else {
output[prefix + key] = data[key];
output[prefix + key] = value;
}
});
}
return output;
}
};
function flatten(data) {
return recursiveFlatten("", data);
}
function emptyFilter(data) {
const newData = {};
Object.keys(data).forEach((key) => {
if (data[key]) {
if (typeof data[key] === "object") {
newData[key] = emptyFilter(data[key]);
} else {
newData[key] = data[key];
}
}
});
return newData;
}
function recursiveEmpty(data) {
const newData = {};
Object.keys(data).forEach((key) => {
if (data[key]) {
if (typeof data[key] === "object") {
newData[key] = recursiveEmpty(data[key]);
} else {
newData[key] = "TRANSLATED";
}
}
});
return newData;
}
// Filter functions that can be passed directly to JSON.parse()
const emptyReviver = (_key, value) => value || undefined;
const testReviver = (_key, value) =>
value && typeof value === "string" ? "TRANSLATED" : value;
/**
* Replace Lokalise key placeholders with their actual values.
@@ -95,60 +79,44 @@ function recursiveEmpty(data) {
* be included in src/translations/en.json, but still be usable while
* developing locally.
*
* @link https://docs.lokalise.co/article/KO5SZWLLsy-key-referencing
* @link https://docs.lokalise.com/en/articles/1400528-key-referencing
*/
const re_key_reference = /\[%key:([^%]+)%\]/;
function lokaliseTransform(data, original, file) {
const KEY_REFERENCE = /\[%key:([^%]+)%\]/;
const lokaliseTransform = (data, path, original = data) => {
const output = {};
Object.entries(data).forEach(([key, value]) => {
if (value instanceof Object) {
output[key] = lokaliseTransform(value, original, file);
for (const [key, value] of Object.entries(data)) {
if (typeof value === "object") {
output[key] = lokaliseTransform(value, path, original);
} else {
output[key] = value.replace(re_key_reference, (_match, lokalise_key) => {
output[key] = value.replace(KEY_REFERENCE, (_match, lokalise_key) => {
const replace = lokalise_key.split("::").reduce((tr, k) => {
if (!tr) {
throw Error(
`Invalid key placeholder ${lokalise_key} in ${file.path}`
);
throw Error(`Invalid key placeholder ${lokalise_key} in ${path}`);
}
return tr[k];
}, original);
if (typeof replace !== "string") {
throw Error(
`Invalid key placeholder ${lokalise_key} in ${file.path}`
);
throw Error(`Invalid key placeholder ${lokalise_key} in ${path}`);
}
return replace;
});
}
});
}
return output;
}
};
gulp.task("clean-translations", async () => deleteSync([workDir]));
gulp.task("clean-translations", () => deleteAsync([workDir]));
gulp.task("ensure-translations-build-dir", async () => {
mkdirSync(workDir, { recursive: true });
});
const makeWorkDir = () => mkdir(workDir, { recursive: true });
gulp.task("create-test-metadata", () =>
env.isProdBuild()
? Promise.resolve()
: writeFile(
workDir + "/testMetadata.json",
JSON.stringify({ test: { nativeName: "Test" } })
)
);
gulp.task("create-test-translation", () =>
const createTestTranslation = () =>
env.isProdBuild()
? Promise.resolve()
: gulp
.src(path.join(paths.translations_src, "en.json"))
.pipe(transform((data, _file) => recursiveEmpty(data)))
.src(EN_SRC)
.pipe(new CustomJSON(null, testReviver))
.pipe(rename("test.json"))
.pipe(gulp.dest(workDir))
);
.pipe(gulp.dest(workDir));
/**
* This task will build a master translation file, to be used as the base for
@@ -159,279 +127,171 @@ gulp.task("create-test-translation", () =>
* project is buildable immediately after merging new translation keys, since
* the Lokalise update to translations/en.json will not happen immediately.
*/
gulp.task("build-master-translation", () => {
const src = [path.join(paths.translations_src, "en.json")];
if (mergeBackend) {
src.push(path.join(inBackendDir, "en.json"));
}
return gulp
.src(src)
.pipe(transform((data, file) => lokaliseTransform(data, data, file)))
const createMasterTranslation = () =>
gulp
.src([EN_SRC, ...(mergeBackend ? [`${inBackendDir}/en.json`] : [])])
.pipe(new CustomJSON(lokaliseTransform))
.pipe(
merge({
fileName: "en.json",
jsonSpace: undefined,
})
)
.pipe(gulp.dest(fullDir));
});
.pipe(gulp.dest(workDir));
gulp.task("build-merged-translations", () =>
gulp
.src([
inFrontendDir + "/*.json",
"!" + inFrontendDir + "/en.json",
...(env.isProdBuild() ? [] : [workDir + "/test.json"]),
])
.pipe(transform((data, file) => lokaliseTransform(data, data, file)))
.pipe(
flatmap((stream, file) => {
// For each language generate a merged json file. It begins with the master
// translation as a failsafe for untranslated strings, and merges all parent
// tags into one file for each specific subtag
//
// TODO: This is a naive interpretation of BCP47 that should be improved.
// Will be OK for now as long as we don't have anything more complicated
// than a base translation + region.
const tr = path.basename(file.history[0], ".json");
const subtags = tr.split("-");
const src = [fullDir + "/en.json"];
for (let i = 1; i <= subtags.length; i++) {
const lang = subtags.slice(0, i).join("-");
if (lang === "test") {
src.push(workDir + "/test.json");
} else if (lang !== "en") {
src.push(inFrontendDir + "/" + lang + ".json");
if (mergeBackend) {
src.push(inBackendDir + "/" + lang + ".json");
}
}
}
return gulp
.src(src, { allowEmpty: true })
.pipe(transform((data) => emptyFilter(data)))
.pipe(
merge({
fileName: tr + ".json",
})
)
.pipe(gulp.dest(fullDir));
})
)
);
const FRAGMENTS = ["base"];
let taskName;
const toggleSupervisorFragment = async () => {
FRAGMENTS[0] = "supervisor";
};
const splitTasks = [];
TRANSLATION_FRAGMENTS.forEach((fragment) => {
taskName = "build-translation-fragment-" + fragment;
gulp.task(taskName, () =>
// Return only the translations for this fragment.
gulp
.src(fullDir + "/*.json")
.pipe(
transform((data) => ({
ui: {
panel: {
[fragment]: data.ui.panel[fragment],
},
},
}))
)
.pipe(gulp.dest(workDir + "/" + fragment))
);
splitTasks.push(taskName);
});
const panelFragment = (fragment) =>
fragment !== "base" && fragment !== "supervisor";
taskName = "build-translation-core";
gulp.task(taskName, () =>
// Remove the fragment translations from the core translation.
gulp
.src(fullDir + "/*.json")
.pipe(
transform((data, _file) => {
TRANSLATION_FRAGMENTS.forEach((fragment) => {
delete data.ui.panel[fragment];
});
delete data.supervisor;
return data;
})
)
.pipe(gulp.dest(coreDir))
);
const HASHES = new Map();
splitTasks.push(taskName);
gulp.task("build-flattened-translations", () =>
// Flatten the split versions of our translations, and move them into outDir
gulp
.src(
TRANSLATION_FRAGMENTS.map(
(fragment) => workDir + "/" + fragment + "/*.json"
).concat(coreDir + "/*.json"),
{ base: workDir }
)
.pipe(
transform((data) =>
// Polymer.AppLocalizeBehavior requires flattened json
flatten(data)
)
)
.pipe(
rename((filePath) => {
if (filePath.dirname === "core") {
filePath.dirname = "";
}
// In dev we create the file with the fake hash in the filename
if (!env.isProdBuild()) {
filePath.basename += "-dev";
}
})
)
.pipe(gulp.dest(outDir))
);
const fingerprints = {};
gulp.task("build-translation-fingerprints", () => {
// Fingerprint full file of each language
const files = readdirSync(fullDir);
for (let i = 0; i < files.length; i++) {
fingerprints[files[i].split(".")[0]] = {
// In dev we create fake hashes
hash: env.isProdBuild()
? createHash("md5")
.update(readFileSync(path.join(fullDir, files[i]), "utf-8"))
.digest("hex")
: "dev",
};
const createTranslations = async () => {
// Parse and store the master to avoid repeating this for each locale, then
// add the panel fragments when processing the app.
const enMaster = JSON.parse(await readFile(`${workDir}/en.json`, "utf-8"));
if (FRAGMENTS[0] === "base") {
FRAGMENTS.push(...Object.keys(enMaster.ui.panel));
}
// In dev we create the file with the fake hash in the filename
if (env.isProdBuild()) {
mapFiles(outDir, ".json", (filename) => {
const parsed = path.parse(filename);
// The downstream pipeline is setup first. It hashes the merged data for
// each locale, then fragmentizes and flattens the data for final output.
const translationFiles = await glob([
`${inFrontendDir}/!(en).json`,
...(env.isProdBuild() ? [] : [`${workDir}/test.json`]),
]);
const hashStream = new Transform({
objectMode: true,
transform: async (file, _, callback) => {
const hash = env.isProdBuild()
? createHash("md5").update(file.contents).digest("hex")
: "dev";
HASHES.set(file.stem, hash);
file.stem += `-${hash}`;
callback(null, file);
},
}).setMaxListeners(translationFiles.length + 1);
const fragmentsStream = hashStream
.pipe(
new CustomJSON((data) =>
FRAGMENTS.map((fragment) => {
switch (fragment) {
case "base":
// Remove the panels and supervisor to create the base translations
return [
flatten({
...data,
ui: { ...data.ui, panel: undefined },
supervisor: undefined,
}),
"",
];
case "supervisor":
// Supervisor key is at the top level
return [flatten(data.supervisor), ""];
default:
// Create a fragment with only the given panel
return [
flatten(data.ui.panel[fragment], `ui.panel.${fragment}.`),
fragment,
];
}
})
)
)
.pipe(gulp.dest(outDir));
// nl.json -> nl-<hash>.json
if (!(parsed.name in fingerprints)) {
throw new Error(`Unable to find hash for ${filename}`);
// Send the English master downstream first, then for each other locale
// generate merged JSON data to continue piping. It begins with the master
// translation as a failsafe for untranslated strings, and merges all parent
// tags into one file for each specific subtag
//
// TODO: This is a naive interpretation of BCP47 that should be improved.
// Will be OK for now as long as we don't have anything more complicated
// than a base translation + region.
gulp.src(`${workDir}/en.json`).pipe(hashStream, { end: false });
const mergesFinished = [];
for (const translationFile of translationFiles) {
const locale = basename(translationFile, ".json");
const subtags = locale.split("-");
const mergeFiles = [];
for (let i = 1; i <= subtags.length; i++) {
const lang = subtags.slice(0, i).join("-");
if (lang === "test") {
mergeFiles.push(`${workDir}/test.json`);
} else if (lang !== "en") {
mergeFiles.push(`${inFrontendDir}/${lang}.json`);
if (mergeBackend) {
mergeFiles.push(`${inBackendDir}/${lang}.json`);
}
}
renameSync(
filename,
`${parsed.dir}/${parsed.name}-${fingerprints[parsed.name].hash}${
parsed.ext
}`
);
});
}
const mergeStream = gulp.src(mergeFiles, { allowEmpty: true }).pipe(
merge({
fileName: `${locale}.json`,
startObj: enMaster,
jsonReviver: emptyReviver,
jsonSpace: undefined,
})
);
mergesFinished.push(finished(mergeStream));
mergeStream.pipe(hashStream, { end: false });
}
const stream = source("translationFingerprints.json");
stream.write(JSON.stringify(fingerprints));
process.nextTick(() => stream.end());
return stream.pipe(vinylBuffer()).pipe(gulp.dest(workDir));
});
// Wait for all merges to finish, then it's safe to end writing to the
// downstream pipeline and wait for all fragments to finish writing.
await Promise.all(mergesFinished);
hashStream.end();
await finished(fragmentsStream);
};
gulp.task("build-translation-fragment-supervisor", () =>
const writeTranslationMetaData = () =>
gulp
.src(fullDir + "/*.json")
.pipe(transform((data) => data.supervisor))
.src([`${paths.translations_src}/translationMetadata.json`])
.pipe(
rename((filePath) => {
// In dev we create the file with the fake hash in the filename
new CustomJSON((meta) => {
// Add the test translation in development.
if (!env.isProdBuild()) {
filePath.basename += "-dev";
meta.test = { nativeName: "Test" };
}
})
)
.pipe(gulp.dest(workDir + "/supervisor"))
);
gulp.task("build-translation-flatten-supervisor", () =>
gulp
.src(workDir + "/supervisor/*.json")
.pipe(
transform((data) =>
// Polymer.AppLocalizeBehavior requires flattened json
flatten(data)
)
)
.pipe(gulp.dest(outDir))
);
gulp.task("build-translation-write-metadata", () =>
gulp
.src([
path.join(paths.translations_src, "translationMetadata.json"),
...(env.isProdBuild() ? [] : [workDir + "/testMetadata.json"]),
workDir + "/translationFingerprints.json",
])
.pipe(merge({}))
.pipe(
transform((data) => {
const newData = {};
Object.entries(data).forEach(([key, value]) => {
// Filter out translations without native name.
if (value.nativeName) {
newData[key] = value;
} else {
// Filter out locales without a native name, and add the hashes.
for (const locale of Object.keys(meta)) {
if (!meta[locale].nativeName) {
meta[locale] = undefined;
console.warn(
`Skipping language ${key}. Native name was not translated.`
`Skipping locale ${locale} because native name is not translated.`
);
} else {
meta[locale].hash = HASHES.get(locale);
}
});
return newData;
}
return {
fragments: FRAGMENTS.filter(panelFragment),
translations: meta,
};
})
)
.pipe(
transform((data) => ({
fragments: TRANSLATION_FRAGMENTS,
translations: data,
}))
)
.pipe(rename("translationMetadata.json"))
.pipe(gulp.dest(workDir))
);
gulp.task(
"create-translations",
gulp.series(
gulp.parallel("create-test-metadata", "create-test-translation"),
"build-master-translation",
"build-merged-translations",
gulp.parallel(...splitTasks),
"build-flattened-translations"
)
);
.pipe(gulp.dest(workDir));
gulp.task(
"build-translations",
gulp.series(
gulp.parallel(
"fetch-nightly-translations",
gulp.series("clean-translations", "ensure-translations-build-dir")
gulp.series("clean-translations", makeWorkDir)
),
"create-translations",
"build-translation-fingerprints",
"build-translation-write-metadata"
createTestTranslation,
createMasterTranslation,
createTranslations,
writeTranslationMetaData
)
);
gulp.task(
"build-supervisor-translations",
gulp.series(
gulp.parallel(
"fetch-nightly-translations",
gulp.series("clean-translations", "ensure-translations-build-dir")
),
gulp.parallel("create-test-metadata", "create-test-translation"),
"build-master-translation",
"build-merged-translations",
"build-translation-fragment-supervisor",
"build-translation-flatten-supervisor",
"build-translation-fingerprints",
"build-translation-write-metadata"
)
gulp.series(toggleSupervisorFragment, "build-translations")
);

View File

@@ -99,7 +99,7 @@ gulp.task("webpack-watch-app", () => {
).watch({ poll: isWsl }, doneHandler());
gulp.watch(
path.join(paths.translations_src, "en.json"),
gulp.series("create-translations", "copy-translations-app")
gulp.series("build-translations", "copy-translations-app")
);
});