Skip to content
This repository has been archived by the owner on Dec 8, 2020. It is now read-only.

Commit

Permalink
Drop ASCII-8BIT (binary) strings before encoding log data to JSON
Browse files Browse the repository at this point in the history
  • Loading branch information
binarylogic committed Sep 27, 2017
1 parent d7f2e0f commit 5d83a04
Show file tree
Hide file tree
Showing 6 changed files with 96 additions and 18 deletions.
19 changes: 7 additions & 12 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,16 @@ rvm:
- 1.9.3
- 2.2.6
- 2.3.3
- 2.4.1
- jruby
- 2.4.2
- jruby-1.7.26
- jruby-9.1.9.0
gemfile:
- gemfiles/rails-3.0.gemfile
- gemfiles/rails-3.1.gemfile
- gemfiles/rails-3.2.gemfile
- gemfiles/rails-4.0.gemfile
- gemfiles/rails-4.1.gemfile
- gemfiles/rails-4.2.gemfile
- gemfiles/rails-4.2.gemfile
- gemfiles/rails-5.0.gemfile
- gemfiles/rails-5.1.gemfile
- gemfiles/rails-edge.gemfile
Expand All @@ -38,15 +38,10 @@ matrix:
gemfile: "gemfiles/rails-3.2.gemfile"
allow_failures:
- rvm: 1.9.3 # bundler wont install the gems
- rvm: 2.4.1
- rvm: jruby
gemfile: gemfiles/rails-3.0.gemfile
- rvm: jruby
gemfile: gemfiles/rails-5.0.gemfile
- rvm: jruby
gemfile: gemfiles/rails-5.1.gemfile
- rvm: jruby
gemfile: gemfiles/rails-edge.gemfile
- rvm: 2.4.2
gemfile: gemfiles/rails-4.0.gemfile
- rvm: 2.4.2
gemfile: gemfiles/rails-4.1.gemfile
notifications:
email: false
slack:
Expand Down
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.

## [Unreleased]

### Fixed

- Drop ASCII-8BIT (binary) data before encoding to JSON. This resolves encoding errors during
this process.

## [2.3.1] - 2017-09-26

### Fixed
Expand Down
38 changes: 37 additions & 1 deletion lib/timber/log_entry.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ module Timber
# Represents a new log entry into the log. This is an intermediary class between
# `Logger` and the log device that you set it up with.
class LogEntry #:nodoc:
BINARY_LIMIT_THRESHOLD = 1_000.freeze
DT_PRECISION = 6.freeze
MESSAGE_MAX_BYTES = 8192.freeze
SCHEMA = "https://raw.githubusercontent.com/timberio/log-event-json-schema/v3.1.1/schema.json".freeze
Expand Down Expand Up @@ -73,7 +74,30 @@ def as_json(options = {})
hash
end

Util::Hash.deep_compact(hash)
# Preparing a log event for JSON should remove any blank values. Timber strictly
# validates incoming data, including message size. Blank values will fail validation.
# Moreover, binary data (ASCII-8BIT) generally cannot be encoded into JSON because it
# contains characters outside of the valid UTF-8 space.
Util::Hash.deep_reduce(hash) do |k, v, h|
# Discard blank values
if !v.nil? && (!v.respond_to?(:length) || v.length > 0)
# If the value is a binary string, give it special treatment
if v.respond_to?(:encoding) && v.encoding == ::Encoding::ASCII_8BIT
# Only keep binary values less than a certain size. Sizes larger than this
# are almost always file uploads and data we do not want to log.
if v.length < BINARY_LIMIT_THRESHOLD
# Attempt to safely encode the data to UTF-8
encoded_value = encode_string(v)
if !encoded_value.nil?
h[k] = encoded_value
end
end
else
# Keep all other values
h[k] = v
end
end
end
end

def inspect
Expand Down Expand Up @@ -112,5 +136,17 @@ def to_s
def formatted_dt
@formatted_dt ||= time.iso8601(DT_PRECISION)
end

# Attempts to encode a non UTF-8 string into UTF-8, discarding invalid characters.
# If it fails, a nil is returned.
def encode_string(string)
string.encode('UTF-8', {
:invalid => :replace,
:undef => :replace,
:replace => '?'
})
rescue Exception
nil
end
end
end
11 changes: 6 additions & 5 deletions lib/timber/util/hash.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,20 @@ module Hash

extend self

def deep_compact(hash)
# Deeply reduces a hash into a new hash, passing the current key, value,
# and accumulated map up to that point. This allows the caller to
# conditionally rebuild the hash.
def deep_reduce(hash, &block)
new_hash = {}

hash.each do |k, v|
v = if v.is_a?(::Hash)
deep_compact(v)
deep_reduce(v, &block)
else
v
end

if v != nil && (!v.respond_to?(:length) || v.length > 0)
new_hash[k] = v
end
block.call(k, v, new_hash)
end

new_hash
Expand Down
2 changes: 2 additions & 0 deletions spec/spec_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
require File.join(File.dirname(__FILE__), 'support', 'active_record')
end

RSpec::Support::ObjectFormatter.default_instance.max_formatted_output_length = 5_000

RSpec.configure do |config|
config.color = true
config.order = :random
Expand Down
39 changes: 39 additions & 0 deletions spec/timber/log_entry_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,45 @@
describe Timber::LogEntry, :rails_23 => true do
let(:time) { Time.utc(2016, 9, 1, 12, 0, 0) }

describe "#as_json" do
it "should drop nil value keys" do
event = Timber::Events::Custom.new(type: :event_type, message: "event_message", data: {a: nil})
log_entry = described_class.new("INFO", time, nil, "log message", {}, event)
hash = log_entry.as_json
expect(hash.key?(:event)).to be false
end

it "should drop blank string value keys" do
event = Timber::Events::Custom.new(type: :event_type, message: "event_message", data: {a: ""})
log_entry = described_class.new("INFO", time, nil, "log message", {}, event)
hash = log_entry.as_json
expect(hash.key?(:event)).to be false
end

it "should drop empty array value keys" do
event = Timber::Events::Custom.new(type: :event_type, message: "event_message", data: {a: []})
log_entry = described_class.new("INFO", time, nil, "log message", {}, event)
hash = log_entry.as_json
expect(hash.key?(:event)).to be false
end

it "should drop ascii-8bit (binary) value keys" do
binary = ("a" * 1001).force_encoding("ASCII-8BIT")
event = Timber::Events::Custom.new(type: :event_type, message: "event_message", data: {a: binary})
log_entry = described_class.new("INFO", time, nil, "log message", {}, event)
hash = log_entry.as_json
expect(hash.key?(:event)).to be false
end

it "should keep ascii-8bit (binary) values below the threshold" do
binary = "test".force_encoding("ASCII-8BIT")
event = Timber::Events::Custom.new(type: :event_type, message: "event_message", data: {a: binary})
log_entry = described_class.new("INFO", time, nil, "log message", {}, event)
hash = log_entry.as_json
expect(hash[:event][:custom][:event_type][:a].encoding).to eq(::Encoding::UTF_8)
end
end

describe "#to_msgpack" do
it "should encode properly with an event and context" do
event = Timber::Events::Custom.new(type: :event_type, message: "event_message", data: {a: 1})
Expand Down

0 comments on commit 5d83a04

Please sign in to comment.