Skip to content

Commit 23d399c

Browse files
Merge pull request #310 from stephenplusplus/spp--storage-download-integrity
storage: download integrity check
2 parents dc4bf20 + c7c6e60 commit 23d399c

File tree

3 files changed

+253
-39
lines changed

3 files changed

+253
-39
lines changed

lib/storage/file.js

+130-19
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,19 @@ File.prototype.copy = function(destination, callback) {
211211
* piped to a writable stream or listened to for 'data' events to read a file's
212212
* contents.
213213
*
214+
* In the unlikely event there is a mismatch between what you downloaded and the
215+
* version in your Bucket, your error handler will receive an error with code
216+
* "CONTENT_DOWNLOAD_MISMATCH". If you receive this error, the best recourse is
217+
* to try downloading the file again.
218+
*
219+
* @param {object=} options - Configuration object.
220+
* @param {string|boolean} options.validation - Possible values: `"md5"`,
221+
* `"crc32c"`, or `false`. By default, data integrity is validated with an
222+
* MD5 checksum for maximum reliability, falling back to CRC32c when an MD5
223+
* hash wasn't returned from the API. CRC32c will provide better performance
224+
* with less reliability. You may also choose to skip validation completely,
225+
* however this is **not recommended**.
226+
*
214227
* @example
215228
* //-
216229
* // <h4>Downloading a File</h4>
@@ -226,35 +239,133 @@ File.prototype.copy = function(destination, callback) {
226239
* .pipe(fs.createWriteStream('/Users/stephen/Photos/image.png'))
227240
* .on('error', function(err) {});
228241
*/
229-
File.prototype.createReadStream = function() {
230-
var storage = this.bucket.storage;
231-
var dup = duplexify();
232-
function createAuthorizedReq(uri) {
233-
var reqOpts = { uri: uri };
234-
storage.makeAuthorizedRequest_(reqOpts, {
235-
onAuthorized: function(err, authorizedReqOpts) {
236-
if (err) {
237-
dup.emit('error', err);
238-
dup.end();
239-
return;
240-
}
241-
dup.setReadable(request(authorizedReqOpts));
242-
}
243-
});
242+
File.prototype.createReadStream = function(options) {
243+
options = options || {};
244+
245+
var that = this;
246+
var throughStream = through();
247+
248+
var validations = ['crc32c', 'md5'];
249+
var validation;
250+
251+
if (util.is(options.validation, 'string')) {
252+
options.validation = options.validation.toLowerCase();
253+
254+
if (validations.indexOf(options.validation) > -1) {
255+
validation = options.validation;
256+
} else {
257+
validation = 'all';
258+
}
259+
}
260+
261+
if (util.is(options.validation, 'undefined')) {
262+
validation = 'all';
244263
}
264+
265+
var crc32c = validation === 'crc32c' || validation === 'all';
266+
var md5 = validation === 'md5' || validation === 'all';
267+
245268
if (this.metadata.mediaLink) {
246269
createAuthorizedReq(this.metadata.mediaLink);
247270
} else {
248271
this.getMetadata(function(err, metadata) {
249272
if (err) {
250-
dup.emit('error', err);
251-
dup.end();
273+
throughStream.emit('error', err);
274+
throughStream.end();
252275
return;
253276
}
277+
254278
createAuthorizedReq(metadata.mediaLink);
255279
});
256280
}
257-
return dup;
281+
282+
return throughStream;
283+
284+
// Authenticate the request, then pipe the remote API request to the stream
285+
// returned to the user.
286+
function createAuthorizedReq(uri) {
287+
var reqOpts = {
288+
uri: uri
289+
};
290+
291+
that.bucket.storage.makeAuthorizedRequest_(reqOpts, {
292+
onAuthorized: function(err, authorizedReqOpts) {
293+
if (err) {
294+
throughStream.emit('error', err);
295+
throughStream.end();
296+
return;
297+
}
298+
299+
// For data integrity, hash the contents of the stream as we receive it
300+
// from the server.
301+
var localCrc32cHash;
302+
var localMd5Hash = crypto.createHash('md5');
303+
304+
request(authorizedReqOpts)
305+
.on('error', function(err) {
306+
throughStream.emit('error', err);
307+
throughStream.end();
308+
})
309+
310+
.on('data', function(chunk) {
311+
if (crc32c) {
312+
localCrc32cHash = crc.calculate(chunk, localCrc32cHash);
313+
}
314+
315+
if (md5) {
316+
localMd5Hash.update(chunk);
317+
}
318+
})
319+
320+
.on('complete', function(res) {
321+
var failed = false;
322+
var crcFail = true;
323+
var md5Fail = true;
324+
325+
var hashes = {};
326+
res.headers['x-goog-hash'].split(',').forEach(function(hash) {
327+
var hashType = hash.split('=')[0];
328+
hashes[hashType] = hash.substr(hash.indexOf('=') + 1);
329+
});
330+
331+
var remoteMd5 = hashes.md5;
332+
var remoteCrc = hashes.crc32c && hashes.crc32c.substr(4);
333+
334+
if (crc32c) {
335+
crcFail =
336+
new Buffer([localCrc32cHash]).toString('base64') !== remoteCrc;
337+
failed = crcFail;
338+
}
339+
340+
if (md5) {
341+
md5Fail = localMd5Hash.digest('base64') !== remoteMd5;
342+
failed = md5Fail;
343+
}
344+
345+
if (validation === 'all') {
346+
failed = remoteMd5 ? md5Fail : crcFail;
347+
}
348+
349+
if (failed) {
350+
var error = new Error([
351+
'The downloaded data did not match the data from the server.',
352+
'To be sure the content is the same, you should download the',
353+
'file again.'
354+
].join(' '));
355+
error.code = 'CONTENT_DOWNLOAD_MISMATCH';
356+
357+
throughStream.emit('error', error);
358+
} else {
359+
throughStream.emit('complete');
360+
}
361+
362+
throughStream.end();
363+
})
364+
365+
.pipe(throughStream);
366+
}
367+
});
368+
}
258369
};
259370

260371
/**
@@ -688,7 +799,7 @@ File.prototype.startResumableUpload_ = function(stream, metadata) {
688799
method: 'PUT',
689800
uri: resumableUri
690801
}, {
691-
onAuthorized: function (err, reqOpts) {
802+
onAuthorized: function(err, reqOpts) {
692803
if (err) {
693804
handleError(err);
694805
return;

regression/storage.js

+8-3
Original file line numberDiff line numberDiff line change
@@ -322,12 +322,17 @@ describe('storage', function() {
322322

323323
writeStream.on('error', done);
324324
writeStream.on('complete', function() {
325+
var data = new Buffer('');
326+
325327
file.createReadStream()
328+
.on('error', done)
326329
.on('data', function(chunk) {
327-
assert.equal(String(chunk), contents);
330+
data = Buffer.concat([data, chunk]);
328331
})
329-
.on('error', done)
330-
.on('end', done);
332+
.on('complete', function() {
333+
assert.equal(data.toString(), contents);
334+
done();
335+
});
331336
});
332337
});
333338

test/storage/file.js

+115-17
Original file line numberDiff line numberDiff line change
@@ -263,13 +263,15 @@ describe('File', function() {
263263
});
264264

265265
it('should create an authorized request', function(done) {
266-
request_Override = function(opts) {
266+
file.bucket.storage.makeAuthorizedRequest_ = function(opts) {
267267
assert.equal(opts.uri, metadata.mediaLink);
268268
done();
269269
};
270+
270271
file.getMetadata = function(callback) {
271272
callback(null, metadata);
272273
};
274+
273275
file.createReadStream();
274276
});
275277

@@ -292,33 +294,129 @@ describe('File', function() {
292294

293295
it('should get readable stream from request', function(done) {
294296
var fakeRequest = { a: 'b', c: 'd' };
295-
file.getMetadata = function(callback) {
296-
callback(null, metadata);
297-
};
297+
298+
// Faking a stream implementation so we can simulate an actual Request
299+
// request. The only thing we want to know is if the data passed to
300+
// request was correct.
298301
request_Override = function(req) {
302+
if (!(this instanceof request_Override)) {
303+
return new request_Override(req);
304+
}
305+
306+
stream.Readable.call(this);
307+
this._read = util.noop;
308+
299309
assert.deepEqual(req, fakeRequest);
300310
done();
301311
};
302-
file.bucket.storage.makeAuthorizedRequest_ = function(opts, callback) {
303-
(callback.onAuthorized || callback)(null, fakeRequest);
304-
};
305-
file.createReadStream();
306-
});
312+
nodeutil.inherits(request_Override, stream.Readable);
307313

308-
it('should set readable stream', function() {
309-
var dup = duplexify();
310314
file.getMetadata = function(callback) {
311315
callback(null, metadata);
312316
};
313-
request_Override = function() {
314-
return dup;
315-
};
317+
316318
file.bucket.storage.makeAuthorizedRequest_ = function(opts, callback) {
317-
(callback.onAuthorized || callback)();
319+
(callback.onAuthorized || callback)(null, fakeRequest);
318320
};
321+
319322
file.createReadStream();
320-
assert.deepEqual(readableStream, dup);
321-
readableStream = null;
323+
});
324+
325+
describe('validation', function() {
326+
var data = 'test';
327+
328+
var crc32cBase64 = new Buffer([crc.calculate(data)]).toString('base64');
329+
330+
var md5HashBase64 = crypto.createHash('md5');
331+
md5HashBase64.update(data);
332+
md5HashBase64 = md5HashBase64.digest('base64');
333+
334+
var fakeResponse = {
335+
crc32c: {
336+
headers: { 'x-goog-hash': 'crc32c=####' + crc32cBase64 }
337+
},
338+
md5: {
339+
headers: { 'x-goog-hash': 'md5=' + md5HashBase64 }
340+
}
341+
};
342+
343+
function getFakeRequest(data, fakeResponse) {
344+
function FakeRequest(req) {
345+
if (!(this instanceof FakeRequest)) {
346+
return new FakeRequest(req);
347+
}
348+
349+
var that = this;
350+
351+
stream.Readable.call(this);
352+
this._read = function() {
353+
this.push(data);
354+
this.push(null);
355+
};
356+
357+
setImmediate(function() {
358+
that.emit('complete', fakeResponse);
359+
});
360+
}
361+
nodeutil.inherits(FakeRequest, stream.Readable);
362+
return FakeRequest;
363+
}
364+
365+
beforeEach(function() {
366+
file.metadata.mediaLink = 'http://uri';
367+
368+
file.bucket.storage.makeAuthorizedRequest_ = function(opts, callback) {
369+
(callback.onAuthorized || callback)(null, {});
370+
};
371+
});
372+
373+
it('should validate with crc32c', function(done) {
374+
request_Override = getFakeRequest(data, fakeResponse.crc32c);
375+
376+
file.createReadStream({ validation: 'crc32c' })
377+
.on('error', done)
378+
.on('complete', done);
379+
});
380+
381+
it('should emit an error if crc32c validation fails', function(done) {
382+
request_Override = getFakeRequest('bad-data', fakeResponse.crc32c);
383+
384+
file.createReadStream({ validation: 'crc32c' })
385+
.on('error', function(err) {
386+
assert.equal(err.code, 'CONTENT_DOWNLOAD_MISMATCH');
387+
done();
388+
});
389+
});
390+
391+
it('should validate with md5', function(done) {
392+
request_Override = getFakeRequest(data, fakeResponse.md5);
393+
394+
file.createReadStream({ validation: 'md5' })
395+
.on('error', done)
396+
.on('complete', done);
397+
});
398+
399+
it('should emit an error if md5 validation fails', function(done) {
400+
request_Override = getFakeRequest('bad-data', fakeResponse.crc32c);
401+
402+
file.createReadStream({ validation: 'md5' })
403+
.on('error', function(err) {
404+
assert.equal(err.code, 'CONTENT_DOWNLOAD_MISMATCH');
405+
done();
406+
});
407+
});
408+
409+
it('should default to md5 validation', function(done) {
410+
request_Override = getFakeRequest(data, {
411+
headers: { 'x-goog-hash': 'md5=fakefakefake' }
412+
});
413+
414+
file.createReadStream()
415+
.on('error', function(err) {
416+
assert.equal(err.code, 'CONTENT_DOWNLOAD_MISMATCH');
417+
done();
418+
});
419+
});
322420
});
323421
});
324422

0 commit comments

Comments
 (0)