Skip to content

Commit

Permalink
Move beidermorse/common pkg to internal/bmpm
Browse files Browse the repository at this point in the history
BREAKING: end users will have to rename accuracy constants
  • Loading branch information
cyradin committed Feb 22, 2023
1 parent 0fde288 commit 54fc71b
Show file tree
Hide file tree
Showing 17 changed files with 7,479 additions and 7,439 deletions.
6 changes: 2 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,6 @@ import (
"github.com/f1monkey/phonetic/beidermorse"
"github.com/f1monkey/phonetic/beidermorse/beidermorseash"
"github.com/f1monkey/phonetic/beidermorse/beidermorsesep"
"github.com/f1monkey/phonetic/beidermorse/common"
)

func main() {
Expand All @@ -119,7 +118,7 @@ func main() {
// prints: [orangi oragi orongi orogi orYngi Yrangi Yrongi YrYngi oranxi oronxi orani oroni oranii oronii oranzi oronzi urangi urongi]

// USE ENCODER WITH "GENERIC" RULESET WITH "EXACT" ACCURACY
genEncoder, err = beidermorse.NewEncoder(beidermorse.WithAccuracy(common.Exact))
genEncoder, err = beidermorse.NewEncoder(beidermorse.WithAccuracy(beidermorse.Exact))
if err != nil {
panic(err)
}
Expand All @@ -130,7 +129,7 @@ func main() {
// USE ENCODER WITH "GENERIC" RULESET WITH "EXACT" ACCURACY
// AND "ENGLISH" LANGUAGE
genEncoder, err = beidermorse.NewEncoder(
beidermorse.WithAccuracy(common.Exact),
beidermorse.WithAccuracy(beidermorse.Exact),
beidermorse.WithLang(beidermorse.English),
)
if err != nil {
Expand All @@ -140,7 +139,6 @@ func main() {
fmt.Println(result)
// prints: [orenk orenge orendS orendZe oronk oronge orondS orondZe orank orange orandS orandZe arenk arenge arendS arendZe aronk aronge arondS arondZe arank arange arandS arandZe]


// USE ENCODER WITH "ASHKENAZI" RULESET
ashEncoder, err := beidermorseash.NewEncoder()
if err != nil {
Expand Down
50 changes: 31 additions & 19 deletions beidermorse/beidermorseash/encoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,34 @@ import (
"fmt"
"math/bits"

"github.com/f1monkey/phonetic/beidermorse/common"
"github.com/f1monkey/phonetic/internal/bmpm"
"github.com/f1monkey/phonetic/internal/exrunes"
)

var ErrInvalidMode = fmt.Errorf("invalid name mode")
var ErrInvalidAccuracy = fmt.Errorf("invalid accuracy value")

// Accuracy exact or approximate matching
type Accuracy bmpm.Accuracy

const (
Exact Accuracy = "exact" // exact matching rules
Approx Accuracy = "approx" // approx matching (results in more tokens)
)

func (a Accuracy) Valid() bool {
return a == Exact || a == Approx
}

type Encoder struct {
accuracy common.Accuracy
accuracy Accuracy
lang Lang
}

// NewEncoder create new encoder instance
func NewEncoder(opts ...EncoderOption) (*Encoder, error) {
result := &Encoder{
accuracy: common.Approx,
accuracy: Approx,
}

for _, opt := range opts {
Expand All @@ -45,7 +57,7 @@ func MustNewEncoder(opts ...EncoderOption) *Encoder {
// Encode transform a passed string to a slice of phonetic tokens
func (e *Encoder) Encode(input string) []string {
langDetector := detectLangFunc()
lang := common.Lang(e.lang)
lang := bmpm.Lang(e.lang)
if lang == 0 {
lang = langDetector(input)
}
Expand All @@ -54,10 +66,10 @@ func (e *Encoder) Encode(input string) []string {

buf := exrunes.NewBuffer(200)

tokens := common.MakeTokens(
input, common.Ashkenazi,
e.accuracy,
common.Ruleset{Main: main, Final1: final1, Final2: final2, Discards: Discards, DetectLang: langDetector},
tokens := bmpm.MakeTokens(
input, bmpm.Ashkenazi,
bmpm.Accuracy(e.accuracy),
bmpm.Ruleset{Main: main, Final1: final1, Final2: final2, Discards: Discards, DetectLang: langDetector},
lang,
false,
buf,
Expand Down Expand Up @@ -85,7 +97,7 @@ func (e *Encoder) SetOption(opt EncoderOption) error {
type EncoderOption func(e *Encoder) error

// WithAccuracy Set encoder accuracy
func WithAccuracy(a common.Accuracy) EncoderOption {
func WithAccuracy(a Accuracy) EncoderOption {
return func(e *Encoder) error {
if !a.Valid() {
return fmt.Errorf("%w: %q", ErrInvalidAccuracy, a)
Expand All @@ -104,35 +116,35 @@ func WithLang(l Lang) EncoderOption {
}

func getRules(
accuracy common.Accuracy,
lang common.Lang,
) (common.Rules, common.Rules, common.Rules) {
var main, final1, final2 common.Rules
accuracy Accuracy,
lang bmpm.Lang,
) (bmpm.Rules, bmpm.Rules, bmpm.Rules) {
var main, final1, final2 bmpm.Rules

langCount := bits.OnesCount64(uint64(lang))
if langCount > 1 {
lang = common.Lang(Any)
lang = bmpm.Lang(Any)
}
main = Rules[lang]

if accuracy == common.Approx {
if accuracy == Approx {
final1 = FinalRules.Approx.First
final2 = FinalRules.Approx.Second[lang]
} else if accuracy == common.Exact {
} else if accuracy == Exact {
final1 = FinalRules.Exact.First
final2 = FinalRules.Exact.Second[lang]
}

return main, final1, final2
}

func detectLangFunc() common.DetectLangFunc {
return func(input string) common.Lang {
func detectLangFunc() bmpm.DetectLangFunc {
return func(input string) bmpm.Lang {
all := All
rules := LangRules

runes := []rune(input)
remaining := common.Lang(all)
remaining := bmpm.Lang(all)
for _, rule := range rules {
if rule.Matcher == nil {
continue
Expand Down
25 changes: 12 additions & 13 deletions beidermorse/beidermorseash/encoder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,46 +4,45 @@ import (
"fmt"
"testing"

"github.com/f1monkey/phonetic/beidermorse/common"
"github.com/stretchr/testify/require"
)

func Benchmark_Encoder_Encode_Ru_Approx(b *testing.B) {
e := MustNewEncoder(WithAccuracy(common.Approx))
e := MustNewEncoder(WithAccuracy(Approx))
for i := 0; i < b.N; i++ {
e.Encode("апельсин")
}
}

func Benchmark_Encoder_Encode_Ru_Exact(b *testing.B) {
e := MustNewEncoder(WithAccuracy(common.Exact))
e := MustNewEncoder(WithAccuracy(Exact))
for i := 0; i < b.N; i++ {
e.Encode("апельсин")
}
}

func Benchmark_Encoder_Encode_En_Approx(b *testing.B) {
e := MustNewEncoder(WithAccuracy(common.Approx))
e := MustNewEncoder(WithAccuracy(Approx))
for i := 0; i < b.N; i++ {
e.Encode("orange")
}
}

func Benchmark_Encoder_Encode_En_Exact(b *testing.B) {
e := MustNewEncoder(WithAccuracy(common.Exact))
e := MustNewEncoder(WithAccuracy(Exact))
for i := 0; i < b.N; i++ {
e.Encode("orange")
}
}

func Test_Encoder_Encode(t *testing.T) {
cases := []struct {
accuracy common.Accuracy
accuracy Accuracy
input string
expected []string
}{
{
accuracy: common.Approx,
accuracy: Approx,
input: "orange",
expected: []string{
"orangi",
Expand All @@ -63,7 +62,7 @@ func Test_Encoder_Encode(t *testing.T) {
},
},
{
accuracy: common.Exact,
accuracy: Exact,
input: "orange",
expected: []string{
"orange",
Expand All @@ -73,7 +72,7 @@ func Test_Encoder_Encode(t *testing.T) {
},
},
{
accuracy: common.Exact,
accuracy: Exact,
input: "van der orange",
expected: []string{
"vander",
Expand All @@ -90,7 +89,7 @@ func Test_Encoder_Encode(t *testing.T) {
},
},
{
accuracy: common.Approx,
accuracy: Approx,
input: "test",
expected: []string{
"tist",
Expand All @@ -100,20 +99,20 @@ func Test_Encoder_Encode(t *testing.T) {
},
},
{
accuracy: common.Exact,
accuracy: Exact,
input: "test",
expected: []string{
"teSt",
"test",
},
},
{
accuracy: common.Exact,
accuracy: Exact,
input: "апельсин",
expected: []string{"apelsin"},
},
{
accuracy: common.Approx,
accuracy: Approx,
input: "апельсин",
expected: []string{
"apYlzn",
Expand Down
Loading

0 comments on commit 54fc71b

Please sign in to comment.