Skip to content

Commit

Permalink
Introduce PositionalGenerator (#2710)
Browse files Browse the repository at this point in the history
* Introduce PositionalGenerator

One of the more common complaints among programmers is memorizing and
understanding the regular expression syntax. It is concise but dense.

The `regexify` method uses a _subset_ of regex language to generate
values. Since it is not exactly the Ruby regex language, this means that
it is an additional mini-language to learn, with its own quirks.

The PositionalGenerator attempts to solve the same problem but more
clearly. It is used for generating fixed-length strings where each byte
has a specific value, such as postal codes, VINs, business IDs, and
so on.

Three examples to show the tradeoffs:

`gb_licence_checksum`
---------------------

With `regexify`:

```ruby
regexify(/[0-9][A-Z][A-Z]/)
```

With `PositionalGenerator`:

```ruby
generate(:string) do |g|
  g.int
  g.letter(ranges: ['A'..'Z'], length: 2)
end
```

`ssn_valid`
-----------

With `regexify`:

```ruby
ssn = regexify(/[0-8]\d{2}-\d{2}-\d{4}/)
INVALID_SSN.any? { |regex| regex =~ ssn } ? ssn_valid : ssn
```

With `PositionalGenerator`:

```ruby
generate(:string) do |g|
  g.int(ranges: [100..665, 667..899])
  g.lit('-')
  g.int(ranges: [10..99])
  g.lit('-')
  g.int(ranges: [1000..9999])
end
```

`vin`
-----

With `regexify`:

```ruby
front = 8.times.map { VIN_KEYSPACE.sample(random: Faker::Config.random) }.join
back = 8.times.map { VIN_KEYSPACE.sample(random: Faker::Config.random) }.join
checksum = "#{front}A#{back}".chars.each_with_index.map do |char, i|
  value = (char =~ /\A\d\z/ ? char.to_i : VIN_TRANSLITERATION[char.to_sym])
  value * VIN_WEIGHT[i]
end.inject(:+) % 11
checksum = 'X' if checksum == 10
"#{front}#{checksum}#{back}"
```

With `PositionalGenerator`:

```ruby
generate(:string) do |g|
  g.letter(name: :wmi, ranges: ['100'..'199', '400'..'499', '500'..'599', '700'..'799', '7A0'..'7F9'])
  g.letter(name: :vds, length: 5, ranges: [VIN_KEYSPACE])
  g.computed(name: :checksum, deps: %i[wmi vds model_year plant_code vis]) do |wmi, vds, model_year, plant_code, vis|
    checksum = "#{wmi}#{vds}0#{model_year}#{plant_code}#{vis}".chars.each_with_index.map do |char, i|
      value = (char =~ /\A\d\z/ ? char.to_i : VIN_TRANSLITERATION[char.to_sym])
      value * VIN_WEIGHT[i]
    end.inject(:+) % 11

    if checksum == 10
      'X'
    else
      checksum
    end
  end
  g.letter(name: :model_year, length: 1, ranges: [VIN_KEYSPACE - %w[U Z 0]])
  g.letter(name: :plant_code, length: 1, ranges: [VIN_KEYSPACE])
  g.int(name: :vis, length: 6)
end
```

Summary
-------

As you can see, the `PositionalGenerator` is much more verbose than
`regexify`.  The tradeoff is understanding and readability.

* Update lib/helpers/positional_generator.rb

Co-authored-by: Mike Burns <mburns@thoughtbot.com>

* Apply suggestions from code review

Co-authored-by: Thiago Araujo <thd.araujo@gmail.com>

* Update lib/helpers/positional_generator.rb

Co-authored-by: Mike Burns <mburns@thoughtbot.com>

* remove extra space

* Update lib/helpers/positional_generator.rb

add missing space

* Update lib/helpers/positional_generator.rb

---------

Co-authored-by: Thiago Araujo <thd.araujo@gmail.com>
  • Loading branch information
mike-burns and thdaraujo committed Jul 11, 2023
1 parent e689abb commit 1bc565f
Show file tree
Hide file tree
Showing 7 changed files with 628 additions and 40 deletions.
4 changes: 4 additions & 0 deletions lib/faker.rb
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,10 @@ def bothify(string)
letterify(numerify(string))
end

def generate(as_type, &block)
PositionalGenerator.new(as_type, &block).generate
end

# Given a regular expression, attempt to generate a string
# that would match it. This is a rather simple implementation,
# so don't be shocked if it blows up on you in a spectacular fashion.
Expand Down
76 changes: 59 additions & 17 deletions lib/faker/default/code.rb
Original file line number Diff line number Diff line change
Expand Up @@ -93,11 +93,23 @@ def rut
# @faker.version 2.2.0
def nric(min_age: 18, max_age: 65)
birthyear = Date.birthday(min_age: min_age, max_age: max_age).year
prefix = birthyear < 2000 ? 'S' : 'T'
values = birthyear.to_s[-2..]
values << regexify(/\d{5}/)
check_alpha = generate_nric_check_alphabet(values, prefix)
"#{prefix}#{values}#{check_alpha}"

generate(:string) do |g|
g.computed(name: :prefix) do
if birthyear < 2000
'S'
else
'T'
end
end
g.computed(name: :yy) do
birthyear.to_s[-2..]
end
g.int(name: :values, length: 5)
g.computed(name: :check, deps: %i[prefix yy values]) do |prefix, yy, values|
generate_nric_check_alphabet("#{yy}#{values}", prefix)
end
end
end

##
Expand Down Expand Up @@ -197,15 +209,29 @@ def generate_imei
end

def generate_base10_isbn
values = regexify(/\d{9}/)
remainder = sum(values) { |value, index| (index + 1) * value.to_i } % 11
values << "-#{remainder == 10 ? 'X' : remainder}"
generate(:string) do |g|
g.int(name: :values, length: 9)
g.lit('-')
g.computed(name: :checksum, deps: [:values]) do |values|
remainder = sum(values.to_s) { |value, offset| (offset + 1) * value.to_i } % 11
if remainder == 10
'X'
else
remainder.to_s
end
end
end
end

def generate_base13_isbn
values = regexify(/\d{12}/)
remainder = sum(values) { |value, index| index.even? ? value.to_i : value.to_i * 3 } % 10
values << "-#{(10 - remainder) % 10}"
generate(:string) do |g|
g.int(name: :values, length: 12)
g.lit('-')
g.computed(name: :checksum, deps: [:values]) do |values|
remainder = sum(values.to_s) { |value, offset| offset.even? ? value.to_i : value.to_i * 3 } % 10
(10 - remainder) % 10
end
end
end

def sum(values)
Expand All @@ -215,15 +241,31 @@ def sum(values)
end

def generate_base8_ean
values = regexify(/\d{7}/)
check_digit = 10 - values.chars.each_with_index.inject(0) { |s, (v, i)| s + v.to_i * EAN_CHECK_DIGIT8[i] } % 10
values << (check_digit == 10 ? 0 : check_digit).to_s
generate(:string) do |g|
g.int(name: :values, length: 7)
g.computed(name: :checksum, deps: [:values]) do |values|
check_digit = 10 - values.to_s.chars.each_with_index.inject(0) { |s, (v, i)| s + v.to_i * EAN_CHECK_DIGIT8[i] } % 10
if check_digit == 10
0
else
check_digit
end
end
end
end

def generate_base13_ean
values = regexify(/\d{12}/)
check_digit = 10 - values.chars.each_with_index.inject(0) { |s, (v, i)| s + v.to_i * EAN_CHECK_DIGIT13[i] } % 10
values << (check_digit == 10 ? 0 : check_digit).to_s
generate(:string) do |g|
g.int(name: :values, length: 12)
g.computed(name: :checksum, deps: [:values]) do |values|
check_digit = 10 - values.to_s.chars.each_with_index.inject(0) { |s, (v, i)| s + v.to_i * EAN_CHECK_DIGIT13[i] } % 10
if check_digit == 10
0
else
check_digit
end
end
end
end

EAN_CHECK_DIGIT8 = [3, 1, 3, 1, 3, 1, 3].freeze
Expand Down
34 changes: 30 additions & 4 deletions lib/faker/default/company.rb
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,12 @@ def polish_register_of_national_economy(length: 9)
#
# @faker.version 1.9.2
def south_african_pty_ltd_registration_number
regexify(%r{\d{4}/\d{4,10}/07})
generate(:string) do |g|
g.int(length: 4)
g.lit('/')
g.int(ranges: [1000..9_999_999_999])
g.lit('/07')
end
end

##
Expand All @@ -356,7 +361,18 @@ def south_african_pty_ltd_registration_number
#
# @faker.version 1.9.2
def south_african_close_corporation_registration_number
regexify(%r{(CK\d{2}|\d{4})/\d{4,10}/23})
generate(:string) do |g|
g.oneof do |one|
one.group do |g_|
g_.lit('CK')
g_.int(length: 2)
end
one.int(length: 4)
end
g.lit('/')
g.int(ranges: [1000..9_999_999_999])
g.lit('/23')
end
end

##
Expand All @@ -369,7 +385,12 @@ def south_african_close_corporation_registration_number
#
# @faker.version 1.9.2
def south_african_listed_company_registration_number
regexify(%r{\d{4}/\d{4,10}/06})
generate(:string) do |g|
g.int(length: 4)
g.lit('/')
g.int(ranges: [1000..9_999_999_999])
g.lit('/06')
end
end

##
Expand All @@ -382,7 +403,12 @@ def south_african_listed_company_registration_number
#
# @faker.version 1.9.2
def south_african_trust_registration_number
regexify(%r{IT\d{2,4}/\d{2,10}})
generate(:string) do |g|
g.lit('IT')
g.int(ranges: [10..9999])
g.lit('/')
g.int(ranges: [10..9_999_999_999])
end
end

##
Expand Down
27 changes: 19 additions & 8 deletions lib/faker/default/driving_licence.rb
Original file line number Diff line number Diff line change
Expand Up @@ -98,17 +98,28 @@ def gb_licence_padding(str, num_chars)
end

def gb_licence_year(dob, gender)
decade = (dob.year / 10) % 10
year = dob.year % 10
month = gender == :female ? dob.month + 50 : dob.month
# Rubocop's preferred formatting is pretty gory
# rubocop:disable Style/FormatString
"#{decade}#{'%02d' % month}#{'%02d' % dob.day}#{year}"
# rubocop:enable Style/FormatString
generate(:string) do |g|
g.computed do
(dob.year / 10) % 10
end
g.computed do
gender_marker = gender == :female ? 50 : 0
format('%02d', (dob.month + gender_marker))
end
g.computed do
format('%02d', dob.day)
end
g.computed do
dob.year % 10
end
end
end

def gb_licence_checksum
regexify(/[0-9][A-Z][A-Z]/)
generate(:string) do |g|
g.int
g.letter(ranges: ['A'..'Z'], length: 2)
end
end
end
end
Expand Down
20 changes: 17 additions & 3 deletions lib/faker/default/id_number.rb
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,23 @@ def invalid
end

def ssn_valid
ssn = regexify(/[0-8]\d{2}-\d{2}-\d{4}/)
# We could still have all 0s in one segment or another
INVALID_SSN.any? { |regex| regex =~ ssn } ? ssn_valid : ssn
generate(:string) do |g|
g.computed(name: :first) do
range = [1..665, 667..899].sample(random: Faker::Config.random)
n = Faker::Base.rand(range)
format('%03d', n)
end
g.lit('-')
g.computed(name: :second) do
n = Faker::Base.rand(1..99)
format('%02d', n)
end
g.lit('-')
g.computed(name: :third) do
n = Faker::Base.rand(1..9999)
format('%04d', n)
end
end
end

##
Expand Down
27 changes: 19 additions & 8 deletions lib/faker/default/vehicle.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,25 @@ class << self
#
# @faker.version 1.6.4
def vin
front = 8.times.map { VIN_KEYSPACE.sample(random: Faker::Config.random) }.join
back = 8.times.map { VIN_KEYSPACE.sample(random: Faker::Config.random) }.join
checksum = "#{front}A#{back}".chars.each_with_index.map do |char, i|
value = (char =~ /\A\d\z/ ? char.to_i : VIN_TRANSLITERATION[char.to_sym])
value * VIN_WEIGHT[i]
end.inject(:+) % 11
checksum = 'X' if checksum == 10
"#{front}#{checksum}#{back}"
generate(:string) do |g|
g.letter(name: :wmi, ranges: ['100'..'199', '400'..'499', '500'..'599', '700'..'799', '7A0'..'7F9'])
g.letter(name: :vds, length: 5, ranges: [VIN_KEYSPACE])
g.computed(name: :checksum, deps: %i[wmi vds model_year plant_code vis]) do |wmi, vds, model_year, plant_code, vis|
checksum = "#{wmi}#{vds}0#{model_year}#{plant_code}#{vis}".chars.each_with_index.map do |char, i|
value = (char =~ /\A\d\z/ ? char.to_i : VIN_TRANSLITERATION[char.to_sym])
value * VIN_WEIGHT[i]
end.inject(:+) % 11

if checksum == 10
'X'
else
checksum
end
end
g.letter(name: :model_year, length: 1, ranges: [VIN_KEYSPACE - %w[U Z 0]])
g.letter(name: :plant_code, length: 1, ranges: [VIN_KEYSPACE])
g.int(name: :vis, length: 6)
end
end

# Produces a random vehicle manufacturer.
Expand Down
Loading

0 comments on commit 1bc565f

Please sign in to comment.