Skip to content

Commit

Permalink
Merge pull request #6 from itsmontoya/adjust-naming-to-best-practices
Browse files Browse the repository at this point in the history
Adjust naming to best practices
  • Loading branch information
itsmontoya committed Jul 15, 2024
2 parents 292df86 + 19df75f commit 0ad5e87
Show file tree
Hide file tree
Showing 6 changed files with 54 additions and 51 deletions.
33 changes: 20 additions & 13 deletions bag.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,6 @@ func (b *Bag) GetResults(in string) (r Results) {

return
}
func (b *Bag) toNGrams(in string) (ns []string) {
if b.c.NGramType == "word" {
return toNGrams(in, b.c.NGramSize)
}

return tocharacterNGrams(in, b.c.NGramSize)
}

func (b *Bag) Train(in, label string) {
// Convert inbound data to a slice of NGrams
Expand All @@ -73,16 +66,23 @@ func (b *Bag) Train(in, label string) {
v[n]++
}

// Increment count of trained documents for the provided label
b.countByLabel[label]++
// Increment total count of trained documents
b.totalCount++
// Increment model counters
b.incrementCounts(label)
}

// toNGrams converts the inbound string into n-grams based on the configuration settings
func (b *Bag) toNGrams(in string) (ns []string) {
if b.c.NGramType == "word" {
return toNGrams(in, b.c.NGramSize)
}

return tocharacterNGrams(in, b.c.NGramSize)
}

// getProbability uses a Naive Bayes classifier to determine probability for a given label
func (b *Bag) getProbability(ns []string, label string, vocab Vocabulary) (probability float64) {
// Set initial probability value as the prior probability value
probability = b.getPriorProbability(label)
probability = b.getLogPriorProbability(label)
// Get the current counts by label (to be used by Laplace smoothing during for-loop)
countsByLabel := float64(b.countByLabel[label]) + b.c.SmoothingParameter*float64(len(vocab))

Expand All @@ -98,7 +98,7 @@ func (b *Bag) getProbability(ns []string, label string, vocab Vocabulary) (proba
return
}

func (b *Bag) getPriorProbability(label string) (probability float64) {
func (b *Bag) getLogPriorProbability(label string) (probability float64) {
count := float64(b.countByLabel[label])
total := float64(b.totalCount)
// Get the logarithmic value of count divided by total count
Expand All @@ -118,3 +118,10 @@ func (b *Bag) getOrCreateVocabulary(label string) (v Vocabulary) {

return
}

func (b *Bag) incrementCounts(label string) {
// Increment count of trained documents for the provided label
b.countByLabel[label]++
// Increment total count of trained documents
b.totalCount++
}
56 changes: 28 additions & 28 deletions ngram.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,57 +2,57 @@ package bag

import "bytes"

// toNGrams will convert inbound data to an NGram of provided size
// toNGrams will convert inbound data to an nGram of provided size
func toNGrams(in string, size int) (ns []string) {
// Initialize NGram with a provided size
n := make(NGram, size)
// Initialize nGram with a provided size
n := make(nGram, size)
// Iterate inbound data as words
toWords(in, func(word string) {
// Append word to NGram
// Append word to nGram
n = n.Append(word)
if !n.IsFull() {
// NGram is not full - we do not want to append yet, return
return
}

// Append current NGram to NGrams slice
// Append current nGram to nGrams slice
ns = append(ns, n.String())
})

if !n.IsFull() && !n.IsZero() {
// The NGram is not full, so we haven't appended yet
// The NGram is not empty, so we have something to append
// Append current NGram to NGrams slice
// The nGram is not full, so we haven't appended yet
// The nGram is not empty, so we have something to append
// Append current nGram to nGrams slice
ns = append(ns, n.String())
}

return
}

// NGram represents an NGram (variable sized)
type NGram []string
// nGram represents an N-Gram (variable sized)
type nGram []string

// Append will append a given string to an NGram and output the new value
// Note: The original NGram is NOT modified
func (n NGram) Append(str string) (out NGram) {
// Initialize new NGram with the same size as the original NGram
out = make(NGram, len(n))
// Iterate through original NGram, starting at index 1
// Append will append a given string to an nGram and output the new value
// Note: The original nGram is NOT modified
func (n nGram) Append(str string) (out nGram) {
// Initialize new nGram with the same size as the original nGram
out = make(nGram, len(n))
// Iterate through original nGram, starting at index 1
for i := 1; i < len(n); i++ {
// Set the value of the current original NGram index as the value for the previous index for the output NGram
// Set the value of the current original nGram index as the value for the previous index for the output nGram
out[i-1] = n[i]
}

// Set the last value of the output NGram as the input string
// Set the last value of the output nGram as the input string
out[len(n)-1] = str
return
}

// String will convert the NGram contents to a string
func (n NGram) String() (out string) {
// String will convert the nGram contents to a string
func (n nGram) String() (out string) {
// Initialize buffer
buf := bytes.NewBuffer(nil)
// Iterate through NGram values
// Iterate through nGram values
n.iterate(func(value string) {
if buf.Len() > 0 {
// Buffer is not empty, prefix the iterating value with a space
Expand All @@ -67,21 +67,21 @@ func (n NGram) String() (out string) {
return buf.String()
}

// IsZero returns whether or not the NGram is empty
func (n NGram) IsZero() bool {
// IsZero returns whether or not the nGram is empty
func (n nGram) IsZero() bool {
// Return result of if the value in the last position is empty
return len(n[len(n)-1]) == 0
}

// IsFull returns whether or not the NGram is full
func (n NGram) IsFull() bool {
// IsFull returns whether or not the nGram is full
func (n nGram) IsFull() bool {
// Return result of if the value in the first position is populated
return len(n[0]) > 0
}

// iterate will iterate through the NGram values
func (n NGram) iterate(fn func(word string)) {
// Iterate through NGram values
// iterate will iterate through the nGram values
func (n nGram) iterate(fn func(word string)) {
// Iterate through nGram values
for _, str := range n {
// Check if value is empty
if len(str) == 0 {
Expand Down
6 changes: 0 additions & 6 deletions sample.go

This file was deleted.

3 changes: 3 additions & 0 deletions samples.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
package bag

type Samples []string
3 changes: 3 additions & 0 deletions samplesbylabel.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
package bag

type SamplesByLabel map[string]Samples
4 changes: 0 additions & 4 deletions trainingset.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,3 @@ type TrainingSet struct {

Samples SamplesByLabel `yaml:"samples"`
}

type SamplesByLabel map[string]Samples

type Samples []string

0 comments on commit 0ad5e87

Please sign in to comment.