zlib: add support for concatenated members

kthelgason · bnoordhuis · commit f380db237b2a · 2016-03-15T16:47:10.000+01:00
According to the spec gzipped archives can contain more than one compressed member. Previously Node's gzip implementation would only unzip the first member and throw away the rest of the compressed data. Issue #4306 is an example of this occurring in daily use. Fixes: #4306 PR-URL: #5120 Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl> Reviewed-By: James M Snell <jasnell@gmail.com>
diff --git a/src/node_zlib.cc b/src/node_zlib.cc
@@ -41,6 +41,9 @@ enum node_zlib_mode {
   UNZIP
 };
 
+#define GZIP_HEADER_ID1 0x1f
+#define GZIP_HEADER_ID2 0x8b
+#define GZIP_MIN_HEADER_SIZE 10
 
 void InitZlib(v8::Local<v8::Object> target);
 
@@ -254,6 +257,19 @@ class ZCtx : public AsyncWrap {
             ctx->err_ = Z_NEED_DICT;
           }
         }
+        while (ctx->strm_.avail_in >= GZIP_MIN_HEADER_SIZE &&
+               ctx->mode_ == GUNZIP) {
+          // Bytes remain in input buffer. Perhaps this is another compressed
+          // member in the same archive, or just trailing garbage.
+          // Check the header to find out.
+          if (ctx->strm_.next_in[0] != GZIP_HEADER_ID1 ||
+              ctx->strm_.next_in[1] != GZIP_HEADER_ID2) {
+            // Not a valid gzip member
+            break;
+          }
+          Reset(ctx);
+          ctx->err_ = inflate(&ctx->strm_, ctx->flush_);
+        }
         break;
       default:
         CHECK(0 && "wtf?");
@@ -524,10 +540,12 @@ class ZCtx : public AsyncWrap {
     switch (ctx->mode_) {
       case DEFLATE:
       case DEFLATERAW:
+      case GZIP:
         ctx->err_ = deflateReset(&ctx->strm_);
         break;
       case INFLATE:
       case INFLATERAW:
+      case GUNZIP:
         ctx->err_ = inflateReset(&ctx->strm_);
         break;
       default:
diff --git a/test/parallel/test-zlib-from-concatenated-gzip.js b/test/parallel/test-zlib-from-concatenated-gzip.js
@@ -0,0 +1,18 @@
+'use strict';
+// Test unzipping a gzip file that contains multiple concatenated "members"
+
+const common = require('../common');
+const assert = require('assert');
+const zlib = require('zlib');
+
+const data = Buffer.concat([
+  zlib.gzipSync('abc'),
+  zlib.gzipSync('def')
+]);
+
+assert.equal(zlib.gunzipSync(data).toString(), 'abcdef');
+
+zlib.gunzip(data, common.mustCall((err, result) => {
+  assert.ifError(err);
+  assert.equal(result, 'abcdef', 'result should match original string');
+}));
diff --git a/test/parallel/test-zlib-from-gzip-with-trailing-garbage.js b/test/parallel/test-zlib-from-gzip-with-trailing-garbage.js
@@ -0,0 +1,50 @@
+'use strict';
+// test unzipping a gzip file that has trailing garbage
+
+const common = require('../common');
+const assert = require('assert');
+const zlib = require('zlib');
+
+// should ignore trailing null-bytes
+let data = Buffer.concat([
+  zlib.gzipSync('abc'),
+  zlib.gzipSync('def'),
+  Buffer(10).fill(0)
+]);
+
+assert.equal(zlib.gunzipSync(data).toString(), 'abcdef');
+
+zlib.gunzip(data, common.mustCall((err, result) => {
+  assert.ifError(err);
+  assert.equal(result, 'abcdef', 'result should match original string');
+}));
+
+// if the trailing garbage happens to look like a gzip header, it should
+// throw an error.
+data = Buffer.concat([
+  zlib.gzipSync('abc'),
+  zlib.gzipSync('def'),
+  Buffer([0x1f, 0x8b, 0xff, 0xff]),
+  Buffer(10).fill(0)
+]);
+
+assert.throws(() => zlib.gunzipSync(data));
+
+zlib.gunzip(data, common.mustCall((err, result) => {
+  assert(err);
+}));
+
+// In this case the trailing junk is too short to be a gzip segment
+// So we ignore it and decompression succeeds.
+data = Buffer.concat([
+  zlib.gzipSync('abc'),
+  zlib.gzipSync('def'),
+  Buffer([0x1f, 0x8b, 0xff, 0xff])
+]);
+
+assert.equal(zlib.gunzipSync(data).toString(), 'abcdef');
+
+zlib.gunzip(data, common.mustCall((err, result) => {
+  assert.ifError(err);
+  assert.equal(result, 'abcdef', 'result should match original string');
+}));