From dc58a76469f0a76ad62ad281592794028988abe6 Mon Sep 17 00:00:00 2001
From: Yigal <yigal@loadmill.com>
Date: Thu, 18 Jul 2024 11:36:41 +0300
Subject: [PATCH] feat(brotli): use xregexp to protect against attack vectors

---
 package.json           |   8 +--
 src/node/decompress.js |  15 ++++++
 src/node/index.js      |  55 ++++++++++++---------
 src/node/unzip.js      |  17 ++++---
 src/utils.js           |  35 +++++++++++++
 test/node/inflate.js   |  84 ++++++++++++++++++++++++++++++++
 test/node/utils.js     | 108 +++++++++++++++++++++++++++++++++++++++++
 7 files changed, 287 insertions(+), 35 deletions(-)
 create mode 100644 src/node/decompress.js

diff --git a/package.json b/package.json
index a371d7bf8..decdd673a 100644
--- a/package.json
+++ b/package.json
@@ -6,7 +6,8 @@
   "browser": {
     "./src/node/index.js": "./src/client.js",
     "./lib/node/index.js": "./lib/client.js",
-    "./test/support/server.js": "./test/support/blank.js"
+    "./test/support/server.js": "./test/support/blank.js",
+    "re2": false
   },
   "bugs": {
     "url": "https://github.com/ladjs/superagent/issues"
@@ -26,7 +27,8 @@
     "formidable": "^3.5.1",
     "methods": "^1.1.2",
     "mime": "2.6.0",
-    "qs": "^6.11.0"
+    "qs": "^6.11.0",
+    "re2": "^1.21.3"
   },
   "devDependencies": {
     "@babel/cli": "^7.20.7",
@@ -62,7 +64,7 @@
     "rimraf": "3",
     "should": "^13.2.3",
     "should-http": "^0.1.1",
-    "tinyify": "3.0.0",
+    "tinyify": "4.0.0",
     "xo": "^0.53.1",
     "zuul": "^3.12.0"
   },
diff --git a/src/node/decompress.js b/src/node/decompress.js
new file mode 100644
index 000000000..81de84412
--- /dev/null
+++ b/src/node/decompress.js
@@ -0,0 +1,15 @@
+const zlib = require('zlib');
+const utils = require('../utils');
+const { isGzipOrDeflateEncoding, isBrotliEncoding } = utils;
+
+exports.chooseDecompresser = (res) => {
+  let decompresser;
+  if (isGzipOrDeflateEncoding(res)) {
+    decompresser = zlib.createUnzip();
+  } else if (isBrotliEncoding(res)) {
+    decompresser = zlib.createBrotliDecompress();
+  } else {
+    throw new Error('unknown content-encoding');
+  }
+  return decompresser;
+}
diff --git a/src/node/index.js b/src/node/index.js
index 75217c8bd..74b357005 100644
--- a/src/node/index.js
+++ b/src/node/index.js
@@ -21,10 +21,11 @@ const safeStringify = require('fast-safe-stringify');
 const utils = require('../utils');
 const RequestBase = require('../request-base');
 const http2 = require('./http2wrapper');
-const { unzip } = require('./unzip');
+const { decompress } = require('./unzip');
 const Response = require('./response');
 
-const { mixin, hasOwn } = utils;
+const { mixin, hasOwn, isBrotliEncoding, isGzipOrDeflateEncoding } = utils;
+const { chooseDecompresser } = require('./decompress');
 
 function request(method, url) {
   // callback
@@ -446,9 +447,11 @@ Request.prototype._pipeContinue = function (stream, options) {
     this._emitResponse();
     if (this._aborted) return;
 
-    if (this._shouldUnzip(res)) {
-      const unzipObject = zlib.createUnzip();
-      unzipObject.on('error', (error) => {
+    if (this._shouldDecompress(res)) {
+
+      let decompresser = chooseDecompresser(res);
+
+      decompresser.on('error', (error) => {
         if (error && error.code === 'Z_BUF_ERROR') {
           // unexpected end of file is ignored by browsers and curl
           stream.emit('end');
@@ -457,9 +460,9 @@ Request.prototype._pipeContinue = function (stream, options) {
 
         stream.emit('error', error);
       });
-      res.pipe(unzipObject).pipe(stream, options);
-      // don't emit 'end' until unzipObject has completed writing all its data.
-      unzipObject.once('end', () => this.emit('end'));
+      res.pipe(decompresser).pipe(stream, options);
+      // don't emit 'end' until decompresser has completed writing all its data.
+      decompresser.once('end', () => this.emit('end'));
     } else {
       res.pipe(stream, options);
       res.once('end', () => this.emit('end'));
@@ -1045,8 +1048,8 @@ Request.prototype._end = function () {
     }
 
     // zlib support
-    if (this._shouldUnzip(res)) {
-      unzip(req, res);
+    if (this._shouldDecompress(res)) {
+      decompress(req, res);
     }
 
     let buffer = this._buffer;
@@ -1275,22 +1278,11 @@ Request.prototype._end = function () {
 };
 
 // Check whether response has a non-0-sized gzip-encoded body
-Request.prototype._shouldUnzip = (res) => {
-  if (res.statusCode === 204 || res.statusCode === 304) {
-    // These aren't supposed to have any body
-    return false;
-  }
-
-  // header content is a string, and distinction between 0 and no information is crucial
-  if (res.headers['content-length'] === '0') {
-    // We know that the body is empty (unfortunately, this check does not cover chunked encoding)
-    return false;
-  }
-
-  // console.log(res);
-  return /^\s*(?:deflate|gzip)\s*$/.test(res.headers['content-encoding']);
+Request.prototype._shouldDecompress = (res) => {
+  return hasNonEmptyResponseContent(res) && (isGzipOrDeflateEncoding(res) || isBrotliEncoding(res));
 };
 
+
 /**
  * Overrides DNS for selected hostnames. Takes object mapping hostnames to IP addresses.
  *
@@ -1411,3 +1403,18 @@ function isJSON(mime) {
 function isRedirect(code) {
   return [301, 302, 303, 305, 307, 308].includes(code);
 }
+
+function hasNonEmptyResponseContent(res) {
+  if (res.statusCode === 204 || res.statusCode === 304) {
+    // These aren't supposed to have any body
+    return false;
+  }
+
+  // header content is a string, and distinction between 0 and no information is crucial
+  if (res.headers['content-length'] === '0') {
+    // We know that the body is empty (unfortunately, this check does not cover chunked encoding)
+    return false;
+  }
+
+  return true;
+}
diff --git a/src/node/unzip.js b/src/node/unzip.js
index 46182f4da..1eeff9802 100644
--- a/src/node/unzip.js
+++ b/src/node/unzip.js
@@ -4,25 +4,26 @@
 
 const { StringDecoder } = require('string_decoder');
 const Stream = require('stream');
-const zlib = require('zlib');
+const { chooseDecompresser } = require('./decompress');
 
 /**
- * Buffers response data events and re-emits when they're unzipped.
+ * Buffers response data events and re-emits when they're decompressed.
  *
  * @param {Request} req
  * @param {Response} res
  * @api private
  */
 
-exports.unzip = (request, res) => {
-  const unzip = zlib.createUnzip();
+exports.decompress = (request, res) => {
+  let decompresser = chooseDecompresser(res);
+
   const stream = new Stream();
   let decoder;
 
   // make node responseOnEnd() happy
   stream.req = request;
 
-  unzip.on('error', (error) => {
+  decompresser.on('error', (error) => {
     if (error && error.code === 'Z_BUF_ERROR') {
       // unexpected end of file is ignored by browsers and curl
       stream.emit('end');
@@ -33,7 +34,7 @@ exports.unzip = (request, res) => {
   });
 
   // pipe to unzip
-  res.pipe(unzip);
+  res.pipe(decompresser);
 
   // override `setEncoding` to capture encoding
   res.setEncoding = (type) => {
@@ -41,7 +42,7 @@ exports.unzip = (request, res) => {
   };
 
   // decode upon decompressing with captured encoding
-  unzip.on('data', (buf) => {
+  decompresser.on('data', (buf) => {
     if (decoder) {
       const string_ = decoder.write(buf);
       if (string_.length > 0) stream.emit('data', string_);
@@ -50,7 +51,7 @@ exports.unzip = (request, res) => {
     }
   });
 
-  unzip.on('end', () => {
+  decompresser.on('end', () => {
     stream.emit('end');
   });
 
diff --git a/src/utils.js b/src/utils.js
index bd8333f90..0ee42dcca 100644
--- a/src/utils.js
+++ b/src/utils.js
@@ -1,3 +1,18 @@
+let RE2, hasRE2;
+
+const SafeRegExp = hasRE2 !== false
+      ? (() => {
+          if (typeof RE2 === 'function') return RE2;
+          try {
+            RE2 = require('re2');
+            return typeof RE2 === 'function' ? RE2 : RegExp;
+          } catch {
+            hasRE2 = false;
+            return RegExp;
+          }
+        })()
+      : RegExp;
+
 /**
  * Return the mime type for the given `str`.
  *
@@ -105,3 +120,23 @@ exports.mixin = (target, source) => {
     }
   }
 };
+
+/**
+ * Check if the response is compressed using Gzip or Deflate.
+ * @param {Object} res
+ * @return {Boolean}
+ */
+
+exports.isGzipOrDeflateEncoding = (res) => {
+  return new SafeRegExp(/^\s*(?:deflate|gzip)\s*$/).test(res.headers['content-encoding']);
+};
+
+/**
+ * Check if the response is compressed using Brotli.
+ * @param {Object} res
+ * @return {Boolean}
+ */
+
+exports.isBrotliEncoding = (res) => {
+  return new SafeRegExp(/^\s*(?:br)\s*$/).test(res.headers['content-encoding']);
+};
diff --git a/test/node/inflate.js b/test/node/inflate.js
index 1a849e440..7b48a1e01 100644
--- a/test/node/inflate.js
+++ b/test/node/inflate.js
@@ -36,11 +36,24 @@ app.get('/binary', (request_, res) => {
     res.send(buf);
   });
 });
+
+app.get('/binary-brotli', (request_, res) => {
+  zlib.brotliCompress(subject, (error, buf) => {
+    res.set('Content-Encoding', 'br');
+    res.send(buf);
+  });
+});
+
 app.get('/corrupt', (request_, res) => {
   res.set('Content-Encoding', 'gzip');
   res.send('blah');
 });
 
+app.get('/corrupt-brotli', (request_, res) => {
+  res.set('Content-Encoding', 'br');
+  res.send('blah');
+});
+
 app.get('/nocontent', (request_, res, next) => {
   res.statusCode = 204;
   res.set('Content-Type', 'text/plain');
@@ -48,6 +61,13 @@ app.get('/nocontent', (request_, res, next) => {
   res.send('');
 });
 
+app.get('/nocontent-brotli', (request_, res, next) => {
+  res.statusCode = 204;
+  res.set('Content-Type', 'text/plain');
+  res.set('Content-Encoding', 'br');
+  res.send('');
+});
+
 app.get('/', (request_, res, next) => {
   zlib.deflate(subject, (error, buf) => {
     res.set('Content-Type', 'text/plain');
@@ -65,6 +85,15 @@ app.get('/junk', (request_, res) => {
   });
 });
 
+app.get('/junk-brotli', (request_, res) => {
+  zlib.brotliCompress(subject, (error, buf) => {
+    res.set('Content-Type', 'text/plain');
+    res.set('Content-Encoding', 'br');
+    res.write(buf);
+    res.end(' 0 junk');
+  });
+});
+
 app.get('/chopped', (request_, res) => {
   zlib.deflate(`${subject}123456`, (error, buf) => {
     res.set('Content-Type', 'text/plain');
@@ -73,6 +102,14 @@ app.get('/chopped', (request_, res) => {
   });
 });
 
+app.get('/chopped-brotli', (request_, res) => {
+  zlib.brotliCompress(`${subject}123456`, (error, buf) => {
+    res.set('Content-Type', 'text/plain');
+    res.set('Content-Encoding', 'br');
+    res.send(buf.slice(0, -1));
+  });
+});
+
 describe('zlib', () => {
   it('should deflate the content', (done) => {
     request.get(base).end((error, res) => {
@@ -106,6 +143,14 @@ describe('zlib', () => {
     });
   });
 
+  it('should ignore trailing junk-brotli', (done) => {
+    request.get(`${base}/junk-brotli`).end((error, res) => {
+      res.should.have.status(200);
+      res.text.should.equal(subject);
+      done();
+    });
+  });
+
   it('should ignore missing data', (done) => {
     request.get(`${base}/chopped`).end((error, res) => {
       assert.equal(undefined, error);
@@ -115,6 +160,15 @@ describe('zlib', () => {
     });
   });
 
+  it('should ignore missing brotli data', (done) => {
+    request.get(`${base}/chopped-brotli`).end((error, res) => {
+      assert.equal(undefined, error);
+      res.should.have.status(200);
+      res.text.should.startWith(subject);
+      done();
+    });
+  });
+
   it('should handle corrupted responses', (done) => {
     request.get(`${base}/corrupt`).end((error, res) => {
       assert(error, 'missing error');
@@ -123,6 +177,13 @@ describe('zlib', () => {
     });
   });
 
+  it('should handle brotli corrupted responses', (done) => {
+    request.get(`${base}/corrupt-brotli`).end((error, res) => {
+      res.text.should.equal('');
+      done();
+    });
+  });
+
   it('should handle no content with gzip header', (done) => {
     request.get(`${base}/nocontent`).end((error, res) => {
       assert.ifError(error);
@@ -134,6 +195,17 @@ describe('zlib', () => {
     });
   });
 
+  it('should handle no content with gzip header', (done) => {
+    request.get(`${base}/nocontent-brotli`).end((error, res) => {
+      assert.ifError(error);
+      assert(res);
+      res.should.have.status(204);
+      res.text.should.equal('');
+      res.headers.should.not.have.property('content-length');
+      done();
+    });
+  });
+
   describe('without encoding set', () => {
     it('should buffer if asked', () => {
       return request
@@ -147,6 +219,18 @@ describe('zlib', () => {
         });
     });
 
+    it('should buffer Brotli if asked', () => {
+      return request
+        .get(`${base}/binary-brotli`)
+        .buffer(true)
+        .then((res) => {
+          res.should.have.status(200);
+          assert(res.headers['content-length']);
+          assert(res.body.byteLength);
+          assert.equal(subject, res.body.toString());
+        });
+    });
+
     it('should emit buffers', (done) => {
       request.get(`${base}/binary`).end((error, res) => {
         res.should.have.status(200);
diff --git a/test/node/utils.js b/test/node/utils.js
index e0ba5e57d..50980bc18 100644
--- a/test/node/utils.js
+++ b/test/node/utils.js
@@ -39,3 +39,111 @@ describe('utils.parseLinks(str)', () => {
     );
   });
 });
+
+describe('utils.isGzipOrDeflateEncoding(res)', () => {
+  it('should return true when content encoding is gzip', () => {
+    utils.isGzipOrDeflateEncoding({
+      headers: {
+        'content-encoding': 'gzip',
+      },
+    }).should.equal(true);
+  });
+  it('should return true when content encoding is deflate', () => {
+    utils.isGzipOrDeflateEncoding({
+      headers: {
+        'content-encoding': 'deflate',
+      },
+    }).should.equal(true);
+  });
+  it('should return false when content encoding is bla', () => {
+    utils.isGzipOrDeflateEncoding({
+      headers: {
+        'content-encoding': 'bla',
+      },
+    }).should.equal(false);
+  });
+  
+  it('should return true when content encoding has a lot of spaces followed with gzip', () => {
+    utils.isGzipOrDeflateEncoding({
+      headers: {
+        'content-encoding': " " * 10**6 + " gzip",
+      },
+    }).should.equal(false);
+  });
+  
+  it('should return true when content encoding repeates it self', () => {
+    utils.isGzipOrDeflateEncoding({
+      headers: {
+        'content-encoding': "gzip deflate gzip deflate gzip deflate gzip deflate gzip deflate gzip deflate gzip deflate gzip deflate",
+      },
+    }).should.equal(false);
+  });
+  
+  it('should return true when content encoding repeates it self wuth a lot of spaces', () => {
+    utils.isGzipOrDeflateEncoding({
+      headers: {
+        'content-encoding': " gzip   deflate   gzip   deflate   gzip   deflate   gzip   deflate   gzip   deflate   gzip   deflate",
+      },
+    }).should.equal(false);
+  });
+  
+  it('should return true when content encoding - nested patterns', () => {
+    utils.isGzipOrDeflateEncoding({
+      headers: {
+        'content-encoding': " " * 10**5 + ("gzip deflate " * 1000)
+      },
+    }).should.equal(false);
+  });
+
+  
+});
+
+describe('utils.isBrotliEncoding(res)', () => {
+  it('should return true when content encoding is br', () => {
+    utils.isBrotliEncoding({
+      headers: {
+        'content-encoding': 'br',
+      },
+    }).should.equal(true);
+  });
+  it('should return false when content encoding is bla', () => {
+    utils.isBrotliEncoding({
+      headers: {
+        'content-encoding': 'bla',
+      },
+    }).should.equal(false);
+  });
+  
+  it('should return true when content encoding has a lot of spaces followed with br', () => {
+    utils.isBrotliEncoding({
+      headers: {
+        'content-encoding': " " * 10**6 + " br",
+      },
+    }).should.equal(false);
+  });
+  
+  it('should return true when content encoding repeates it self', () => {
+    utils.isBrotliEncoding({
+      headers: {
+        'content-encoding': " br br br br br  br br br br br  br br br br br br br br br br  br br br br br  br br br br br",
+      },
+    }).should.equal(false);
+  });
+  
+  it('should return true when content encoding repeates it self wuth a lot of spaces', () => {
+    utils.isBrotliEncoding({
+      headers: {
+        'content-encoding': "br     br     br     br     br     br     br     br     br     br     br     br     br     br",
+      },
+    }).should.equal(false);
+  });
+  
+  it('should return true when content encoding - nested patterns', () => {
+    utils.isBrotliEncoding({
+      headers: {
+        'content-encoding': " " * 10**5 + ("br " * 1000)
+      },
+    }).should.equal(false);
+  });
+  
+});