Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support brotli compression of HTTP responses #2945

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES/2518.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add support for ``br`` (brotli) Content-Encoding compression (enabled if ``brotlipy`` is installed).
4 changes: 2 additions & 2 deletions aiohttp/client_reqrep.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from multidict import CIMultiDict, CIMultiDictProxy, MultiDict, MultiDictProxy
from yarl import URL

from . import hdrs, helpers, http, multipart, payload
from . import compression, hdrs, helpers, http, multipart, payload
from .client_exceptions import (ClientConnectionError, ClientOSError,
ClientResponseError, ContentTypeError,
InvalidURL, ServerFingerprintMismatch)
Expand Down Expand Up @@ -152,7 +152,7 @@ class ClientRequest:

DEFAULT_HEADERS = {
hdrs.ACCEPT: '*/*',
hdrs.ACCEPT_ENCODING: 'gzip, deflate',
hdrs.ACCEPT_ENCODING: compression.DEFAULT_ACCEPT_ENCODING,
}

body = b''
Expand Down
198 changes: 198 additions & 0 deletions aiohttp/compression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
import enum
import zlib

from .http_exceptions import ContentEncodingError


try:
import brotli
except ImportError: # pragma: nocover
brotli = None


if brotli is None:
DEFAULT_ACCEPT_ENCODING = 'gzip, deflate'
else:
DEFAULT_ACCEPT_ENCODING = 'gzip, deflate, br'


class ContentCoding(enum.Enum):
# The content codings that we have support for.
#
# Additional registered codings are listed at:
# https://www.iana.org/assignments/http-parameters/http-parameters.xhtml#content-coding
deflate = 'deflate'
gzip = 'gzip'
identity = 'identity'
br = 'br'

@classmethod
def get_from_accept_encoding(cls, accept_encoding):
accept_encoding = accept_encoding.lower()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be nice to parse AcceptEncoding header properly and respect weights instead of selecting the first supported compression based on server preference order.
Multiple Accept-Encoding headers are allowed by HTTP spec also.
We can either accept a list of headers (req.headers.getall('Accept-Encoding')) or headers param itself.

for coding in cls:
if coding.value in accept_encoding:
if coding == cls.br and brotli is None:
continue
return coding

@classmethod
def values(cls):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the usage of the method?
Could we utilize Enum.__members__ or another existing Enum API?

_values = getattr(cls, '_values', None)
if _values is None:
cls._values = _values = frozenset({c.value for c in cls})
return _values


def get_compressor(encoding):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we switch here to ContentCoding enum? Maybe make it a ContentCoding member function?

The same for decompressor.

if encoding == 'gzip':
return ZlibCompressor.gzip()
elif encoding == 'deflate':
return ZlibCompressor.deflate()
elif encoding == 'br':
return BrotliCompressor()
elif encoding == 'identity':
return None
else:
raise RuntimeError('Encoding is %s not supported' % encoding)


class ZlibCompressor:

def __init__(self, wbits):
self._compress = zlib.compressobj(wbits=wbits)
self._finished = False

@classmethod
def gzip(cls):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I prefer making GZipCompressor class derived from ZlibCompressor over class methods for gzip and deflate compressors building.

return cls(wbits=16 + zlib.MAX_WBITS)

@classmethod
def deflate(cls):
return cls(wbits=-zlib.MAX_WBITS)

def compress(self, data):
return self._compress.compress(data)

def finish(self):
if self._finished:
raise RuntimeError('Compressor is finished!')
self._finished = True
return self._compress.flush()


class BrotliCompressor:

def __init__(self):
if brotli is None: # pragma: no cover
raise ContentEncodingError(
'Can not decode content-encoding: brotli (br). '
'Please install `brotlipy`')
self._compress = brotli.Compressor()

def compress(self, data):
return self._compress.compress(data)

def finish(self):
return self._compress.finish()


def decompress(encoding, data):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As I see the function is used only once in multipart module.
Maybe direct usage of decompressor classes there is enough?

if encoding == 'identity':
return data
decompressor = get_decompressor(encoding)
decompressed = decompressor.decompress(data) + decompressor.flush()
if not decompressor.eof:
raise ContentEncodingError(
'Can not decode content-encoding: %s' % encoding)
return decompressed


def get_decompressor(encoding):
if encoding == 'gzip':
return GzipDecompressor()
elif encoding == 'deflate':
return DeflateDecompressor()
elif encoding == 'br':
return BrotliDecompressor()
else:
raise RuntimeError('Encoding %s is not supported' % encoding)


class DeflateDecompressor:

__slots__ = ('_decompressor', '_started_decoding')

def __init__(self):
self._decompressor = zlib.decompressobj(wbits=-zlib.MAX_WBITS)
self._started_decoding = False

def decompress(self, chunk):
try:
decompressed = self._decompressor.decompress(chunk)
if decompressed:
self._started_decoding = True
return decompressed
except Exception:
# Try another wbits setting. See #1918 for details.
if not self._started_decoding:
self._decompressor = zlib.decompressobj()
return self.decompress(chunk)
raise

def flush(self):
return self._decompressor.flush()

@property
def eof(self):
return self._decompressor.eof


class GzipDecompressor:

__slots__ = ('_decompressor',)

def __init__(self):
self._decompressor = zlib.decompressobj(wbits=16 + zlib.MAX_WBITS)

def decompress(self, chunk):
return self._decompressor.decompress(chunk)

def flush(self):
return self._decompressor.flush()

@property
def eof(self):
return self._decompressor.eof


class BrotliDecompressor:

__slots__ = ('_decompressor', '_eof')

def __init__(self):
if brotli is None: # pragma: no cover
raise ContentEncodingError(
'Can not decode content-encoding: brotli (br). '
'Please install `brotlipy`')
self._decompressor = brotli.Decompressor()
self._eof = None

def decompress(self, chunk):
if isinstance(chunk, bytearray):
chunk = bytes(chunk)
return self._decompressor.decompress(chunk)

def flush(self):
# Brotli decompression is eager.
return b''

@property
def eof(self):
if self._eof is not None:
return self._eof
try:
self._decompressor.finish()
self._eof = True
except brotli.Error:
self._eof = False
return self._eof
39 changes: 6 additions & 33 deletions aiohttp/http_parser.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import collections
import re
import string
import zlib
from enum import IntEnum

from multidict import CIMultiDict
from yarl import URL

from . import hdrs
from .compression import get_decompressor
from .helpers import NO_EXTENSIONS
from .http_exceptions import (BadStatusLine, ContentEncodingError,
ContentLengthError, InvalidHeader, LineTooLong,
Expand All @@ -17,13 +17,6 @@
from .streams import EMPTY_PAYLOAD, StreamReader


try:
import brotli
HAS_BROTLI = True
except ImportError: # pragma: no cover
HAS_BROTLI = False


__all__ = (
'HttpParser', 'HttpRequestParser', 'HttpResponseParser',
'RawRequestMessage', 'RawResponseMessage')
Expand Down Expand Up @@ -620,18 +613,7 @@ def __init__(self, out, encoding):
self.out = out
self.size = 0
self.encoding = encoding
self._started_decoding = False

if encoding == 'br':
if not HAS_BROTLI: # pragma: no cover
raise ContentEncodingError(
'Can not decode content-encoding: brotli (br). '
'Please install `brotlipy`')
self.decompressor = brotli.Decompressor()
else:
zlib_mode = (16 + zlib.MAX_WBITS
if encoding == 'gzip' else -zlib.MAX_WBITS)
self.decompressor = zlib.decompressobj(wbits=zlib_mode)
self.decompressor = get_decompressor(encoding)

def set_exception(self, exc):
self.out.set_exception(exc)
Expand All @@ -641,28 +623,19 @@ def feed_data(self, chunk, size):
try:
chunk = self.decompressor.decompress(chunk)
except Exception:
if not self._started_decoding and self.encoding == 'deflate':
self.decompressor = zlib.decompressobj()
try:
chunk = self.decompressor.decompress(chunk)
except Exception:
raise ContentEncodingError(
'Can not decode content-encoding: %s' % self.encoding)
else:
raise ContentEncodingError(
'Can not decode content-encoding: %s' % self.encoding)
raise ContentEncodingError(
'Can not decode content-encoding: %s' % self.encoding)

if chunk:
self._started_decoding = True
self.out.feed_data(chunk, len(chunk))

def feed_eof(self):
chunk = self.decompressor.flush()

if chunk or self.size > 0:
self.out.feed_data(chunk, len(chunk))
if self.encoding != 'br' and not self.decompressor.eof:
raise ContentEncodingError('deflate')
if not self.decompressor.eof:
raise ContentEncodingError(self.encoding)

self.out.feed_eof()

Expand Down
8 changes: 3 additions & 5 deletions aiohttp/http_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@

import asyncio
import collections
import zlib

from .abc import AbstractStreamWriter
from .compression import get_compressor


__all__ = ('StreamWriter', 'HttpVersion', 'HttpVersion10', 'HttpVersion11')
Expand Down Expand Up @@ -44,9 +44,7 @@ def enable_chunking(self):
self.chunked = True

def enable_compression(self, encoding='deflate'):
zlib_mode = (16 + zlib.MAX_WBITS
if encoding == 'gzip' else -zlib.MAX_WBITS)
self._compress = zlib.compressobj(wbits=zlib_mode)
self._compress = get_compressor(encoding)

def _write(self, chunk):
size = len(chunk)
Expand Down Expand Up @@ -112,7 +110,7 @@ async def write_eof(self, chunk=b''):
if chunk:
chunk = self._compress.compress(chunk)

chunk = chunk + self._compress.flush()
chunk = chunk + self._compress.finish()
if chunk and self.chunked:
chunk_len = ('%x\r\n' % len(chunk)).encode('ascii')
chunk = chunk_len + chunk + b'\r\n0\r\n\r\n'
Expand Down
20 changes: 5 additions & 15 deletions aiohttp/multipart.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@
import re
import uuid
import warnings
import zlib
from collections import Mapping, Sequence, deque
from urllib.parse import parse_qsl, unquote, urlencode

from multidict import CIMultiDict

from .compression import ContentCoding, decompress, get_compressor
from .hdrs import (CONTENT_DISPOSITION, CONTENT_ENCODING, CONTENT_LENGTH,
CONTENT_TRANSFER_ENCODING, CONTENT_TYPE)
from .helpers import CHAR, TOKEN, parse_mimetype, reify
Expand Down Expand Up @@ -386,15 +386,7 @@ def decode(self, data):

def _decode_content(self, data):
encoding = self.headers[CONTENT_ENCODING].lower()

if encoding == 'deflate':
return zlib.decompress(data, -zlib.MAX_WBITS)
elif encoding == 'gzip':
return zlib.decompress(data, 16 + zlib.MAX_WBITS)
elif encoding == 'identity':
return data
else:
raise RuntimeError('unknown content encoding: {}'.format(encoding))
return decompress(encoding, data)

def _decode_content_transfer(self, data):
encoding = self.headers[CONTENT_TRANSFER_ENCODING].lower()
Expand Down Expand Up @@ -727,7 +719,7 @@ def append_payload(self, payload):

# compression
encoding = payload.headers.get(CONTENT_ENCODING, '').lower()
if encoding and encoding not in ('deflate', 'gzip', 'identity'):
if encoding and encoding not in ContentCoding.values():
raise RuntimeError('unknown content encoding: {}'.format(encoding))
if encoding == 'identity':
encoding = None
Expand Down Expand Up @@ -836,13 +828,11 @@ def enable_encoding(self, encoding):
self._encoding = 'quoted-printable'

def enable_compression(self, encoding='deflate'):
zlib_mode = (16 + zlib.MAX_WBITS
if encoding == 'gzip' else -zlib.MAX_WBITS)
self._compress = zlib.compressobj(wbits=zlib_mode)
self._compress = get_compressor(encoding)

async def write_eof(self):
if self._compress is not None:
chunk = self._compress.flush()
chunk = self._compress.finish()
if chunk:
self._compress = None
await self.write(chunk)
Expand Down
Loading