Skip to content

Commit

Permalink
Merge pull request #95 from julik/add-explicit-chunking
Browse files Browse the repository at this point in the history
Apply explicit chunked encoding
  • Loading branch information
fringd committed Feb 7, 2024
2 parents 8d8394b + e20ed63 commit 8cef8c2
Show file tree
Hide file tree
Showing 9 changed files with 164 additions and 160 deletions.
11 changes: 0 additions & 11 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,3 @@ source 'https://rubygems.org'

# Specify your gem's dependencies in zipline.gemspec
gemspec

group :development, :test do
gem 'rspec', '~> 3'
gem 'fog-aws'
gem 'activesupport'
gem 'actionpack'
gem 'aws-sdk-s3'
gem 'carrierwave'
gem 'paperclip'
gem 'rake'
end
37 changes: 34 additions & 3 deletions lib/zipline.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
require 'content_disposition'
require "zipline/version"
require 'zipline/version'
require 'zip_tricks'
require "zipline/zip_generator"
require 'zipline/zip_generator'
require 'zipline/chunked_body'
require 'zipline/tempfile_body'

# class MyController < ApplicationController
# include Zipline
Expand All @@ -18,7 +20,36 @@ def zipline(files, zipname = 'zipline.zip', **kwargs_for_new)
headers['Content-Type'] = Mime::Type.lookup_by_extension('zip').to_s
response.sending_file = true
response.cache_control[:public] ||= false
self.response_body = zip_generator

# Disables Rack::ETag if it is enabled (prevent buffering)
# see https://github.com/rack/rack/issues/1619#issuecomment-606315714
self.response.headers['Last-Modified'] = Time.now.httpdate

if request.get_header("HTTP_VERSION") == "HTTP/1.0"
# If HTTP/1.0 is used it is not possible to stream, and if that happens it usually will be
# unclear why buffering is happening. Some info in the log is the least one can do.
logger.warn { "The downstream HTTP proxy/LB insists on HTTP/1.0 protocol, ZIP response will be buffered." } if logger

# If we use Rack::ContentLength it would run through our ZIP block twice - once to calculate the content length
# of the response, and once - to serve. We can trade performance for disk space and buffer the response into a Tempfile
# since we are already buffering.
tempfile_body = TempfileBody.new(request.env, zip_generator)
headers["Content-Length"] = tempfile_body.size.to_s
headers["X-Zipline-Output"] = "buffered"
self.response_body = tempfile_body
else
# Disable buffering for both nginx and Google Load Balancer, see
# https://cloud.google.com/appengine/docs/flexible/how-requests-are-handled?tab=python#x-accel-buffering
response.headers["X-Accel-Buffering"] = "no"

# Make sure Rack::ContentLength does not try to compute a content length,
# and remove the one already set
headers.delete("Content-Length")

# and send out in chunked encoding
headers["Transfer-Encoding"] = "chunked"
headers["X-Zipline-Output"] = "streamed"
self.response_body = Chunked.new(zip_generator)
end
end
end
31 changes: 31 additions & 0 deletions lib/zipline/chunked_body.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
module Zipline
# A body wrapper that emits chunked responses, creating valid
# "Transfer-Encoding: chunked" HTTP response body. This is copied from Rack::Chunked::Body,
# because Rack is not going to include that class after version 3.x
# Rails has a substitute class for this inside ActionController::Streaming,
# but that module is a private constant in the Rails codebase, and is thus
# considered "private" from the Rails standpoint. It is not that much code to
# carry, so we copy it into our code.
class Chunked
TERM = "\r\n"
TAIL = "0#{TERM}"

def initialize(body)
@body = body
end

# For each string yielded by the response body, yield
# the element in chunked encoding - and finish off with a terminator
def each
term = TERM
@body.each do |chunk|
size = chunk.bytesize
next if size == 0

yield [size.to_s(16), term, chunk.b, term].join
end
yield TAIL
yield term
end
end
end
52 changes: 52 additions & 0 deletions lib/zipline/tempfile_body.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
module Zipline
# Contains a file handle which can be closed once the response finishes sending.
# It supports `to_path` so that `Rack::Sendfile` can intercept it
class TempfileBody
TEMPFILE_NAME_PREFIX = "zipline-tf-body-"
attr_reader :tempfile

# @param body[#each] the enumerable that yields bytes, usually a `RackBody`.
# The `body` will be read in full immediately and closed.
def initialize(env, body)
@tempfile = Tempfile.new(TEMPFILE_NAME_PREFIX)
# Rack::TempfileReaper calls close! on tempfiles which get buffered
# We wil assume that it works fine with Rack::Sendfile (i.e. the path
# to the file getting served gets used before we unlink the tempfile)
env['rack.tempfiles'] ||= []
env['rack.tempfiles'] << @tempfile

@tempfile.binmode

body.each { |bytes| @tempfile << bytes }
body.close if body.respond_to?(:close)

@tempfile.flush
end

# Returns the size of the contained `Tempfile` so that a correct
# Content-Length header can be set
#
# @return [Integer]
def size
@tempfile.size
end

# Returns the path to the `Tempfile`, so that Rack::Sendfile can send this response
# using the downstream webserver
#
# @return [String]
def to_path
@tempfile.to_path
end

# Stream the file's contents if `Rack::Sendfile` isn't present.
#
# @return [void]
def each
@tempfile.rewind
while chunk = @tempfile.read(16384)
yield chunk
end
end
end
end
39 changes: 8 additions & 31 deletions lib/zipline/zip_generator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,41 +3,18 @@
module Zipline
class ZipGenerator
# takes an array of pairs [[uploader, filename], ... ]
def initialize(files, **kwargs_for_new)
@files = files
@kwargs_for_new = kwargs_for_new
end

#this is supposed to be streamed!
def to_s
throw "stop!"
def initialize(files, **kwargs_for_streamer)
# Use RackBody as it has buffering built-in in zip_tricks 5.x+
@body = ZipTricks::RackBody.new(**kwargs_for_streamer) do |streamer|
files.each do |file, name, options = {}|
handle_file(streamer, file, name.to_s, options)
end
end
end

def each(&block)
return to_enum(:each) unless block_given?

fake_io_writer = ZipTricks::BlockWrite.new(&block)
# ZipTricks outputs lots of strings in rapid succession, and with
# servers it can be beneficial to avoid doing too many tiny writes so that
# the number of syscalls is minimized. See https://github.com/WeTransfer/zip_tricks/issues/78
# There is a built-in facility for this in ZipTricks which can be used to implement
# some cheap buffering here (it exists both in version 4 and version 5). The buffer is really
# tiny and roughly equal to the medium Linux socket buffer size (16 KB). Although output
# will be not so immediate with this buffering the overall performance will be better,
# especially with multiple clients being serviced at the same time.
# Note that the WriteBuffer writes the same, retained String object - but the contents
# of that object changes between calls. This should work fine with servers where the
# contents of the string gets written to a socket immediately before the execution inside
# the WriteBuffer resumes), but if the strings get retained somewhere - like in an Array -
# this might pose a problem. Unlikely that it will be an issue here though.
write_buffer_size = 16 * 1024
write_buffer = ZipTricks::WriteBuffer.new(fake_io_writer, write_buffer_size)
ZipTricks::Streamer.open(write_buffer, **@kwargs_for_new) do |streamer|
@files.each do |file, name, options = {}|
handle_file(streamer, file, name.to_s, options)
end
end
write_buffer.flush! # for any remaining writes
@body.each(&block)
end

def handle_file(streamer, file, name, options)
Expand Down
87 changes: 0 additions & 87 deletions spec/lib/zipline/zip_generator_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -185,91 +185,4 @@ def create_filename
end
end
end

describe '.write_file' do
let(:file) { StringIO.new('passthrough') }

context 'when passing an ActiveStorage::Filename object as filename' do
let(:filename) { ActiveStorage::Filename.new('test') }

let(:generator) do
Zipline::ZipGenerator.new([[file, filename]])
end

it 'passes a string as filename to ZipTricks' do
allow(file).to receive(:url).and_return('fakeurl')
expect_any_instance_of(ZipTricks::Streamer).to receive(:write_deflated_file)
.with('test')
generator.each { |_| 'Test' }
end
end
end

describe 'passing an options hash' do
let(:file) { StringIO.new('passthrough') }

context 'with optional arguments' do
let(:mtime) { 1.day.ago }
let(:generator) do
Zipline::ZipGenerator.new([[file, 'test', modification_time: mtime]])
end

it 'passes the options hash through handle_file' do
expect(generator).to receive(:handle_file)
.with(anything, anything, anything, { modification_time: mtime })
generator.each { |_| 'Test' }
end

it 'passes the options hash to ZipTricks as kwargs' do
allow(file).to receive(:url).and_return('fakeurl')
expect_any_instance_of(ZipTricks::Streamer).to receive(:write_deflated_file)
.with(anything, modification_time: mtime)
generator.each { |_| 'Test' }
end
end

context 'without optional arguments' do
let(:generator) do
Zipline::ZipGenerator.new([[file, 'test']])
end

it 'passes the options hash through handle_file' do
expect(generator).to receive(:handle_file)
.with(anything, anything, anything, {})
generator.each { |_| 'Test' }
end

it 'passes the options hash to ZipTricks as kwargs' do
allow(file).to receive(:url).and_return('fakeurl')
expect_any_instance_of(ZipTricks::Streamer).to receive(:write_deflated_file)
.with(anything)
generator.each { |_| 'Test' }
end
end

context 'with extra invalid options' do
let(:mtime) { 1.day.ago }
let(:generator) do
Zipline::ZipGenerator.new([[file, 'test', modification_time: mtime, extra: 'invalid']])
end

it 'passes the whole options hash through handle_file' do
expect(generator).to receive(:handle_file)
.with(anything, anything, anything, { modification_time: mtime, extra: 'invalid' })
generator.each { |_| 'Test' }
end

it 'only passes the kwargs to ZipTricks that it expects (i.e., :modification_time)' do
allow(file).to receive(:url).and_return('fakeurl')
expect_any_instance_of(ZipTricks::Streamer).to receive(:write_deflated_file)
.with(anything, modification_time: mtime)
generator.each { |_| 'Test' }
end
end
end
it 'passes along constructor options to ZipTricks streamer' do
expect(ZipTricks::Streamer).to receive(:open).with(anything, { :some => 'options' })
generator = Zipline::ZipGenerator.new([file, 'somefile'], :some => 'options')
generator.each { |_| 'Test' }
end
end
43 changes: 24 additions & 19 deletions spec/lib/zipline/zipline_spec.rb
Original file line number Diff line number Diff line change
@@ -1,29 +1,34 @@
require 'spec_helper'
require 'ostruct'
require 'action_controller'

describe Zipline do
before { Fog.mock! }

let (:undertest) {
class TestZipline
class FakeController < ActionController::Base
include Zipline
def download_zip
files = [
[StringIO.new("File content goes here"), "one.txt"],
[StringIO.new("Some other content goes here"), "two.txt"]
]
zipline(files, 'myfiles.zip', auto_rename_duplicate_filenames: false)
end
end

attr_accessor :headers
attr_accessor :response
attr_accessor :response_body
def initialize
@headers = {}
@response = OpenStruct.new(:cache_control => {}, :headers => {} )
end
include Zipline
end
return TestZipline.new()
}
it 'passes keyword parameters to ZipTricks::Streamer' do
fake_rack_env = {
"HTTP_VERSION" => "HTTP/1.0",
"REQUEST_METHOD" => "GET",
"SCRIPT_NAME" => "",
"PATH_INFO" => "/download",
"QUERY_STRING" => "",
"SERVER_NAME" => "host.example",
"rack.input" => StringIO.new,
}
expect(ZipTricks::Streamer).to receive(:new).with(anything, auto_rename_duplicate_filenames: false).and_call_original

status, headers, body = FakeController.action(:download_zip).call(fake_rack_env)

it 'passes arguments along' do
expect(Zipline::ZipGenerator).to receive(:new)
.with(['some', 'fake', 'files'], { some: 'options' })
undertest.zipline(['some', 'fake', 'files'], 'myfiles.zip', some: 'options')
expect(undertest.headers['Content-Disposition']).to eq("attachment; filename=\"myfiles.zip\"; filename*=UTF-8''myfiles.zip")
expect(headers['Content-Disposition']).to eq("attachment; filename=\"myfiles.zip\"; filename*=UTF-8''myfiles.zip")
end
end
13 changes: 6 additions & 7 deletions spec/spec_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,16 @@
require 'fog-aws'
require 'carrierwave'

Dir["#{File.expand_path('..', __FILE__)}/support/**/*.rb"].each { |f| require f }
Dir["#{File.expand_path('..', __FILE__)}/support/**/*.rb"].sort.each { |f| require f }

CarrierWave.configure do |config|
config.fog_provider = 'fog/aws'
config.fog_credentials = {
provider: 'AWS',
aws_access_key_id: 'dummy',
aws_secret_access_key: 'data',
region: 'us-west-2',
}

provider: 'AWS',
aws_access_key_id: 'dummy',
aws_secret_access_key: 'data',
region: 'us-west-2',
}
end

RSpec.configure do |config|
Expand Down
11 changes: 9 additions & 2 deletions zipline.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,15 @@ Gem::Specification.new do |gem|

gem.add_dependency 'actionpack', ['>= 6.0', '< 8.0']
gem.add_dependency 'content_disposition', '~> 1.0'
gem.add_dependency 'zip_tricks', ['>= 4.2.1', '< 6.0']
gem.add_dependency 'zip_tricks', ['~> 4.8', '< 6'] # Minimum to 4.8.3 which is the last-released MIT version

gem.add_development_dependency 'rspec', '~> 3'
gem.add_development_dependency 'fog-aws'
gem.add_development_dependency 'aws-sdk-s3'
gem.add_development_dependency 'carrierwave'
gem.add_development_dependency 'paperclip'
gem.add_development_dependency 'rake'

# https://github.com/rspec/rspec-mocks/issues/1457
gem.add_development_dependency 'rspec-mocks', '3.10.2'
gem.add_development_dependency 'rspec-mocks', '~> 3.12'
end

0 comments on commit 8cef8c2

Please sign in to comment.