From 205711da7e9802f663d7f856e9ab8455c3092f0d Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Tue, 1 Aug 2017 10:24:19 -0400 Subject: [PATCH] refactor(image-stream): parse xz and gzip metadata using a custom read function (#1590) This commit refactors the xz and gzip image handlers to pass/use a custom read function to be able to determine the uncompressed size, and other needed metadata. By using this function (which currently only uses the `fs` module), we can implement support for getting the uncompressed size of compressed files using HTTP Ranges. Change-Type: patch Signed-off-by: Juan Cruz Viotti --- lib/image-stream/gzip.js | 60 +++++++++++++++++------------------- lib/image-stream/handlers.js | 23 +++++++++++--- lib/image-stream/mime.js | 9 +++--- lib/image-stream/utils.js | 35 +++++++++++++++++++++ 4 files changed, 87 insertions(+), 40 deletions(-) diff --git a/lib/image-stream/gzip.js b/lib/image-stream/gzip.js index 287a3bf9..8a10b99a 100644 --- a/lib/image-stream/gzip.js +++ b/lib/image-stream/gzip.js @@ -16,9 +16,6 @@ 'use strict'; -const Bluebird = require('bluebird'); -const fs = Bluebird.promisifyAll(require('fs')); - /** * @summary The byte length of ISIZE * @type {Number} @@ -29,46 +26,47 @@ const fs = Bluebird.promisifyAll(require('fs')); const ISIZE_LENGTH = 4; /** - * @summary Get a gzip file uncompressed size + * @summary Get the estimated uncompressed size of a gzip file * @function * @public * * @description * This function determines the uncompressed size of the gzip file - * by reading its `ISIZE`. The specification clarifies that this - * value is just an estimation. + * by reading its `ISIZE` field at the end of the file. The specification + * clarifies that this value is just an estimation. * - * @param {String} file - path to gzip file + * @param {Object} options - options + * @param {Number} options.size - file size + * @param {Function} options.read - read function (position, count) * @fulfil {Number} - uncompressed size * @returns {Promise} * * @example - * gzip.getUncompressedSize('path/to/file.gz').then((uncompressedSize) => { + * const fd = fs.openSync('path/to/image', 'r'); + * + * gzip.getUncompressedSize({ + * size: fs.statSync('path/to/image.gz').size, + * read: (position, count) => { + * const buffer = Buffer.alloc(count); + * return new Promise((resolve, reject) => { + * fs.read(fd, buffer, 0, count, position, (error) => { + * if (error) { + * return reject(error); + * } + * + * resolve(buffer); + * }); + * }); + * } + * }).then((uncompressedSize) => { * console.log(`The uncompressed size is: ${uncompressedSize}`); + * fs.closeSync(fd); * }); */ -exports.getUncompressedSize = (file) => { - return Bluebird.using(fs.openAsync(file, 'r').disposer((fileDescriptor) => { - return fs.closeAsync(fileDescriptor); - }), (fileDescriptor) => { - return fs.fstatAsync(fileDescriptor).then((stats) => { - const ISIZE_BUFFER_FILL_VALUE = 0; - const ISIZE_BUFFER_START = 0; - const isizeBuffer = Buffer.alloc(ISIZE_LENGTH, ISIZE_BUFFER_FILL_VALUE); - - return fs.readAsync( - fileDescriptor, - isizeBuffer, - ISIZE_BUFFER_START, - ISIZE_LENGTH, - stats.size - ISIZE_LENGTH - ).then((bytesRead) => { - if (bytesRead !== ISIZE_LENGTH) { - throw new Error(`Bytes read mismatch: ${bytesRead} != ${ISIZE_LENGTH}`); - } - - return isizeBuffer.readUInt32LE(ISIZE_BUFFER_START); - }); - }); +exports.getUncompressedSize = (options) => { + const ISIZE_BUFFER_START = 0; + const ISIZE_POSITION = options.size - ISIZE_LENGTH; + return options.read(ISIZE_POSITION, ISIZE_LENGTH).then((buffer) => { + return buffer.readUInt32LE(ISIZE_BUFFER_START); }); }; diff --git a/lib/image-stream/handlers.js b/lib/image-stream/handlers.js index ee5f838c..b4529bd5 100644 --- a/lib/image-stream/handlers.js +++ b/lib/image-stream/handlers.js @@ -27,6 +27,7 @@ const unbzip2Stream = require('unbzip2-stream'); const gzip = require('./gzip'); const udif = Bluebird.promisifyAll(require('udif')); const archive = require('./archive'); +const utils = require('./utils'); const zipArchiveHooks = require('./archive-hooks/zip'); const fileExtensions = require('../shared/file-extensions'); const path = require('path'); @@ -83,7 +84,16 @@ module.exports = { * @returns {Promise} */ 'application/gzip': (imagePath, options) => { - return gzip.getUncompressedSize(imagePath).then((uncompressedSize) => { + return Bluebird.using(fs.openAsync(imagePath, 'r').disposer((fileDescriptor) => { + return fs.closeAsync(fileDescriptor); + }), (fileDescriptor) => { + return gzip.getUncompressedSize({ + size: options.size, + read: (position, count) => { + return utils.readBufferFromImageFileDescriptor(fileDescriptor, position, count); + } + }); + }).then((uncompressedSize) => { return Bluebird.props({ path: imagePath, archiveExtension: fileExtensions.getLastFileExtension(imagePath), @@ -115,9 +125,14 @@ module.exports = { * @returns {Promise} */ 'application/x-xz': (imagePath, options) => { - return fs.openAsync(imagePath, 'r').then((fileDescriptor) => { - return lzma.parseFileIndexFDAsync(fileDescriptor).tap(() => { - return fs.closeAsync(fileDescriptor); + return Bluebird.using(fs.openAsync(imagePath, 'r').disposer((fileDescriptor) => { + return fs.closeAsync(fileDescriptor); + }), (fileDescriptor) => { + return lzma.parseFileIndexAsync({ + fileSize: options.size, + read: (count, position, callback) => { + utils.readBufferFromImageFileDescriptor(fileDescriptor, position, count).asCallback(callback); + } }); }).then((metadata) => { return { diff --git a/lib/image-stream/mime.js b/lib/image-stream/mime.js index bc7d2e6f..2f2538d3 100644 --- a/lib/image-stream/mime.js +++ b/lib/image-stream/mime.js @@ -18,9 +18,10 @@ const _ = require('lodash'); const Bluebird = require('bluebird'); +const fs = Bluebird.promisifyAll(require('fs')); const fileType = require('file-type'); const mime = require('mime-types'); -const fs = require('fs'); +const utils = require('./utils'); /** * @summary The default MIME type @@ -50,15 +51,13 @@ exports.getMimeTypeFromFileName = (filename) => { return Bluebird.resolve(mimeType); } + const FILE_TYPE_ID_START = 0; const FILE_TYPE_ID_BYTES = 262; return Bluebird.using(fs.openAsync(filename, 'r').disposer((fileDescriptor) => { return fs.closeAsync(fileDescriptor); }), (fileDescriptor) => { - const BUFFER_START = 0; - const buffer = Buffer.alloc(FILE_TYPE_ID_BYTES); - - return fs.readAsync(fileDescriptor, buffer, BUFFER_START, FILE_TYPE_ID_BYTES, null).then(() => { + return utils.readBufferFromImageFileDescriptor(fileDescriptor, FILE_TYPE_ID_START, FILE_TYPE_ID_BYTES).then((buffer) => { return _.get(fileType(buffer), [ 'mime' ], exports.DEFAULT_MIME_TYPE); }); }); diff --git a/lib/image-stream/utils.js b/lib/image-stream/utils.js index fa896a11..7b462bdf 100644 --- a/lib/image-stream/utils.js +++ b/lib/image-stream/utils.js @@ -17,6 +17,41 @@ 'use strict'; const Bluebird = require('bluebird'); +const fs = Bluebird.promisifyAll(require('fs')); +const errors = require('../shared/errors'); + +/** + * @summary Read a buffer from an image file descriptor + * @function + * @private + * + * @param {Number} fileDescriptor - file descriptor + * @param {Number} position - image position to start reading from + * @param {Number} count - number of bytes to read + * @fulfil {Buffer} - buffer + * @returns {Promise} + * + * @example + * fs.openAsync('path/to/image.img', 'r').then((fileDescriptor) => { + * return utils.readBufferFromImageFileDescriptor(fileDescriptor, 0, 512); + * }).then((buffer) => { + * console.log(buffer); + * }); + */ +exports.readBufferFromImageFileDescriptor = (fileDescriptor, position, count) => { + const BUFFER_FILL_VALUE = 0; + const BUFFER_START_POSITION = 0; + const buffer = Buffer.alloc(count, BUFFER_FILL_VALUE); + + return fs.readAsync(fileDescriptor, buffer, BUFFER_START_POSITION, count, position).tap((bytesRead) => { + if (bytesRead !== count) { + throw errors.createUserError({ + title: 'Looks like the image is truncated', + description: `We tried to read ${count} bytes at ${position}, but got ${bytesRead} bytes instead` + }); + } + }).return(buffer); +}; /** * @summary Extract the data of a readable stream