diff --git a/cmd/cannot.go b/cmd/cannot.go deleted file mode 100644 index fb075ab..0000000 --- a/cmd/cannot.go +++ /dev/null @@ -1,7 +0,0 @@ -package cmd - -func cannot(err error) { - if err != nil { - panic(err) - } -} diff --git a/cmd/comm.go b/cmd/comm.go index c968808..f67debd 100644 --- a/cmd/comm.go +++ b/cmd/comm.go @@ -2,11 +2,10 @@ package cmd import ( "encoding/binary" - "encoding/json" "fmt" "os" - "github.com/calebcase/ibf/lib" + ibf "github.com/calebcase/ibf/lib" "github.com/spf13/cobra" ) @@ -17,63 +16,59 @@ func indexed(data []byte) (int64, []byte) { var commCmd = &cobra.Command{ Use: "comm IBF1 IBF2", Short: "Compare IBF1 and IBF2.", - Run: func(cmd *cobra.Command, args []string) { + Args: cobra.ExactArgs(2), + RunE: func(cmd *cobra.Command, args []string) (err error) { // Load IBF1 and IBF2. paths := args - ibfs := [2]ibf.IBFer{} + sets := [2]*ibf.IBF{} for i, path := range paths { if i > 1 { break } - file, err := os.Open(path) - cannot(err) - - decoder := json.NewDecoder(file) - - ibf := ibf.NewEmptyIBF() - err = decoder.Decode(ibf) - cannot(err) - ibfs[i] = ibf - - file.Close() + sets[i], err = open(path) + if err != nil { + return err + } } // Subtract IBF2 from IBF1. - ibfs[0].Subtract(ibfs[1]) - ibf := ibfs[0] + set := sets[0].Clone() + set.Subtract(sets[1]) // Produce the two-column output. leftEmpty := true - for val, err := ibf.Pop(); err == nil; val, err = ibf.Pop() { + for val, err := set.Pop(); err == nil; val, err = set.Pop() { if !cfg.suppressLeft { if cfg.blockIndex >= 0 { - idx, bytes := indexed(val.Bytes()) + idx, bytes := indexed(val) fmt.Printf("%d:%s\n", idx, string(bytes)) } else { - fmt.Printf("%s\n", string(val.Bytes())) + fmt.Printf("%s\n", string(val)) } } } if !cfg.suppressLeft { - leftEmpty = ibf.IsEmpty() + leftEmpty = set.IsEmpty() } + set = sets[1].Clone() + set.Subtract(sets[0]) + rightEmpty := true - ibf.Invert() - for val, err := ibf.Pop(); err == nil; val, err = ibf.Pop() { + for val, err := set.Pop(); err == nil; val, err = set.Pop() { if !cfg.suppressRight { if cfg.blockIndex >= 0 { - idx, bytes := indexed(val.Bytes()) + idx, bytes := indexed(val) fmt.Printf("%s%d:%s\n", cfg.columnDelimiter, idx, string(bytes)) } else { - fmt.Printf("%s%s\n", cfg.columnDelimiter, string(val.Bytes())) + fmt.Printf("%s%s\n", cfg.columnDelimiter, string(val)) } } } if !cfg.suppressRight { - rightEmpty = ibf.IsEmpty() + rightEmpty = set.IsEmpty() } // Incomplete listing? @@ -90,8 +85,11 @@ var commCmd = &cobra.Command{ } fmt.Fprintf(os.Stderr, "Unable to list all elements (%s).\n", side) + os.Exit(1) } + + return nil }, } diff --git a/cmd/completion.go b/cmd/completion.go index f4ff123..35b2440 100644 --- a/cmd/completion.go +++ b/cmd/completion.go @@ -1,7 +1,7 @@ package cmd import ( - "fmt" + "errors" "os" "github.com/spf13/cobra" @@ -10,16 +10,20 @@ import ( var completionCmd = &cobra.Command{ Use: "completion SHELL", Short: "Output shell completion code for the given shell.", - Run: func(cmd *cobra.Command, args []string) { + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) (err error) { shell := args[0] switch shell { case "bash": - err := RootCmd.GenBashCompletion(os.Stdout) - cannot(err) - default: - fmt.Fprintf(os.Stderr, "Unknown shell.\n") + return RootCmd.GenBashCompletion(os.Stdout) + case "zsh": + return RootCmd.GenZshCompletion(os.Stdout) + case "powershell": + return RootCmd.GenPowerShellCompletion(os.Stdout) } + + return errors.New("unknown shell") }, } diff --git a/cmd/create.go b/cmd/create.go index 75fa527..0cd16e6 100644 --- a/cmd/create.go +++ b/cmd/create.go @@ -1,48 +1,35 @@ package cmd import ( - "encoding/json" - "math/rand" - "os" "strconv" - "github.com/calebcase/ibf/lib" + ibf "github.com/calebcase/ibf/lib" "github.com/spf13/cobra" ) var createCmd = &cobra.Command{ Use: "create PATH SIZE [SEED]", Short: "Create a new set. Optionally specify a seed for the hash parameters.", - Run: func(cmd *cobra.Command, args []string) { + Args: cobra.RangeArgs(2, 3), + RunE: func(cmd *cobra.Command, args []string) (err error) { var path = args[0] var seed int64 = 0 size, err := strconv.ParseUint(args[1], 10, 64) - cannot(err) + if err != nil { + return err + } if len(args) > 2 { seed, err = strconv.ParseInt(args[2], 10, 64) - cannot(err) - } - - file, err := os.Create(path) - cannot(err) - - r := rand.New(rand.NewSource(seed)) - - positioners := []*ibf.Hash{ - ibf.NewHash(uint64(r.Int63()), uint64(r.Int63())), - ibf.NewHash(uint64(r.Int63()), uint64(r.Int63())), - ibf.NewHash(uint64(r.Int63()), uint64(r.Int63())), + if err != nil { + return err + } } - hasher := ibf.NewHash(uint64(r.Int63()), uint64(r.Int63())) - - set := ibf.NewIBF(size, positioners, hasher) - enc := json.NewEncoder(file) + set := ibf.NewIBF(size, seed) - err = enc.Encode(&set) - cannot(err) + return create(path, set) }, } diff --git a/cmd/insert.go b/cmd/insert.go index 16abbd1..dd00ac7 100644 --- a/cmd/insert.go +++ b/cmd/insert.go @@ -3,13 +3,10 @@ package cmd import ( "bufio" "encoding/binary" - "encoding/json" "fmt" - "math/big" "os" "strings" - "github.com/calebcase/ibf/lib" "github.com/spf13/cobra" "golang.org/x/crypto/ssh/terminal" ) @@ -17,7 +14,8 @@ import ( var insertCmd = &cobra.Command{ Use: "insert IBF [KEY]", Short: "Insert the key into the set. If key isn't provided, they will be read from stdin one per line.", - Run: func(cmd *cobra.Command, args []string) { + Args: cobra.RangeArgs(1, 2), + RunE: func(cmd *cobra.Command, args []string) (err error) { var path = args[0] // Should we echo our input? @@ -32,22 +30,13 @@ var insertCmd = &cobra.Command{ } } - file, err := os.Open(path) - cannot(err) - - decoder := json.NewDecoder(file) - - ibf := ibf.NewEmptyIBF() - err = decoder.Decode(ibf) - cannot(err) - file.Close() + set, err := open(path) + if err != nil { + return err + } if len(args) == 2 { - var key = args[1] - - val := new(big.Int) - val.SetBytes([]byte(key)) - ibf.Insert(val) + set.Insert([]byte(args[1])) } else { scanner := bufio.NewScanner(os.Stdin) @@ -80,34 +69,29 @@ var insertCmd = &cobra.Command{ count := -1 for scanner.Scan() { + count++ + bytes := scanner.Bytes() - count += 1 - val := new(big.Int) if cfg.blockSize >= 0 && cfg.blockIndex >= 0 { idx := make([]byte, 8) - binary.LittleEndian.PutUint64(idx, uint64(count)) - bytes = append(bytes, 1) + binary.BigEndian.PutUint64(idx, uint64(count)) bytes = append(bytes, idx...) } - val.SetBytes(bytes) - ibf.Insert(val) + + set.Insert(bytes) if echoed { fmt.Printf("%s\n", string(bytes)) } } - - cannot(scanner.Err()) + err = scanner.Err() + if err != nil { + return err + } } - file, err = os.Create(path) - cannot(err) - - encoder := json.NewEncoder(file) - - err = encoder.Encode(&ibf) - cannot(err) + return create(path, set) }, } diff --git a/cmd/invert.go b/cmd/invert.go index 833ef26..499354b 100644 --- a/cmd/invert.go +++ b/cmd/invert.go @@ -1,38 +1,24 @@ package cmd import ( - "encoding/json" - "os" - - "github.com/calebcase/ibf/lib" "github.com/spf13/cobra" ) var invertCmd = &cobra.Command{ Use: "invert IBF", Short: "Invert the counts of the IBF (multiply by -1).", - Run: func(cmd *cobra.Command, args []string) { + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) (err error) { var path = args[0] - file, err := os.Open(path) - cannot(err) - - decoder := json.NewDecoder(file) - - ibf := ibf.NewEmptyIBF() - err = decoder.Decode(ibf) - cannot(err) - file.Close() - - ibf.Invert() - - file, err = os.Create(path) - cannot(err) + set, err := open(path) + if err != nil { + return err + } - encoder := json.NewEncoder(file) + set.Invert() - err = encoder.Encode(&ibf) - cannot(err) + return create(path, set) }, } diff --git a/cmd/list.go b/cmd/list.go index cbf7105..61523a9 100644 --- a/cmd/list.go +++ b/cmd/list.go @@ -1,49 +1,44 @@ package cmd import ( - "encoding/json" "fmt" "os" - "github.com/calebcase/ibf/lib" + ibf "github.com/calebcase/ibf/lib" "github.com/spf13/cobra" ) var listCmd = &cobra.Command{ Use: "list IBF", Short: "List available keys from the set.", - Run: func(cmd *cobra.Command, args []string) { + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) (err error) { var path = args[0] - file, err := os.Open(path) - cannot(err) - - decoder := json.NewDecoder(file) - - ibf := ibf.NewEmptyIBF() - err = decoder.Decode(ibf) - cannot(err) - file.Close() + set, err := open(path) + if err != nil { + return err + } leftEmpty := true - for val, err := ibf.Pop(); err == nil; val, err = ibf.Pop() { + for val, err := set.Pop(); err == nil; val, err = set.Pop() { if !cfg.suppressLeft { - fmt.Printf("%s\n", string(val.Bytes())) + fmt.Printf("%s\n", string(val)) } } if !cfg.suppressLeft { - leftEmpty = ibf.IsEmpty() + leftEmpty = set.IsEmpty() } rightEmpty := true - ibf.Invert() - for val, err := ibf.Pop(); err == nil; val, err = ibf.Pop() { + set.Invert() + for val, err := set.Pop(); err == nil; val, err = set.Pop() { if !cfg.suppressRight { - fmt.Printf("%s\n", string(val.Bytes())) + fmt.Printf("%s\n", string(val)) } } if !cfg.suppressRight { - rightEmpty = ibf.IsEmpty() + rightEmpty = set.IsEmpty() } // Incomplete listing? @@ -60,8 +55,11 @@ var listCmd = &cobra.Command{ } fmt.Fprintf(os.Stderr, "Unable to list all elements (%s).\n", side) - os.Exit(1) + + return ibf.ErrNoPureCell } + + return nil }, } diff --git a/cmd/merge.go b/cmd/merge.go index e935450..e8f183f 100644 --- a/cmd/merge.go +++ b/cmd/merge.go @@ -1,48 +1,43 @@ package cmd import ( - "encoding/json" - "errors" + "fmt" "os" - "github.com/calebcase/ibf/lib" + ibf "github.com/calebcase/ibf/lib" "github.com/spf13/cobra" ) var mergeCmd = &cobra.Command{ Use: "merge IBF IBF [IBF]", Short: "Merge the second IBF into the first. If third is provided, write the result there. Otherwise overwrite the first. The difference between the first and second must be small enough to be completely listed otherwise merging is not possible.", - Run: func(cmd *cobra.Command, args []string) { + Args: cobra.RangeArgs(2, 3), + RunE: func(cmd *cobra.Command, args []string) (err error) { paths := args - ibfs := [2]ibf.IBFer{} + sets := [2]*ibf.IBF{} for i, path := range paths { if i > 1 { break } - file, err := os.Open(path) - cannot(err) - - decoder := json.NewDecoder(file) - - ibf := ibf.NewEmptyIBF() - err = decoder.Decode(ibf) - cannot(err) - ibfs[i] = ibf - - file.Close() + sets[i], err = open(path) + if err != nil { + return err + } } // Attempt to remove the elements in first from second and then // list the remainder. For each of the remainder, insert into // first. - ibfs[1].Subtract(ibfs[0]) - for val, err := ibfs[1].Pop(); err == nil; val, err = ibfs[1].Pop() { - ibfs[0].Insert(val) + sets[1].Subtract(sets[0]) + for val, err := sets[1].Pop(); err == nil; val, err = sets[1].Pop() { + sets[0].Insert(val) } - if !ibfs[1].IsEmpty() { - cannot(errors.New("More elements in the set, but unable to retrieve.")) + if !sets[1].IsEmpty() { + fmt.Fprintf(os.Stderr, "More elements in the set, but unable to retrieve.\n") + + return ibf.ErrNoPureCell } var output string @@ -53,13 +48,7 @@ var mergeCmd = &cobra.Command{ output = paths[2] } - file, err := os.Create(output) - cannot(err) - - encoder := json.NewEncoder(file) - - err = encoder.Encode(ibfs[0]) - cannot(err) + return create(output, sets[0]) }, } diff --git a/cmd/pop.go b/cmd/pop.go index c37f0f2..a416d43 100644 --- a/cmd/pop.go +++ b/cmd/pop.go @@ -1,42 +1,31 @@ package cmd import ( - "encoding/json" "fmt" - "os" - "github.com/calebcase/ibf/lib" "github.com/spf13/cobra" ) var popCmd = &cobra.Command{ Use: "pop IBF", Short: "Remove and print the first available key from the set.", - Run: func(cmd *cobra.Command, args []string) { + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) (err error) { var path = args[0] - file, err := os.Open(path) - cannot(err) + set, err := open(path) + if err != nil { + return err + } - decoder := json.NewDecoder(file) + val, err := set.Pop() + if err != nil { + return err + } - ibf := ibf.NewEmptyIBF() - err = decoder.Decode(ibf) - cannot(err) - file.Close() + fmt.Printf("%s\n", string(val)) - val, err := ibf.Pop() - cannot(err) - - fmt.Printf("%s\n", string(val.Bytes())) - - file, err = os.Create(path) - cannot(err) - - encoder := json.NewEncoder(file) - - err = encoder.Encode(&ibf) - cannot(err) + return create(path, set) }, } diff --git a/cmd/remove.go b/cmd/remove.go index 9016a77..f451d0c 100644 --- a/cmd/remove.go +++ b/cmd/remove.go @@ -2,13 +2,10 @@ package cmd import ( "bufio" - "encoding/json" "fmt" - "math/big" "os" "strings" - "github.com/calebcase/ibf/lib" "github.com/spf13/cobra" "golang.org/x/crypto/ssh/terminal" ) @@ -16,7 +13,8 @@ import ( var removeCmd = &cobra.Command{ Use: "remove IBF KEY", Short: "Remove the key into the set. If key isn't provided, they will be read from stdin one per line.", - Run: func(cmd *cobra.Command, args []string) { + Args: cobra.ExactArgs(2), + RunE: func(cmd *cobra.Command, args []string) (err error) { var path = args[0] // Should we echo our input? @@ -31,43 +29,31 @@ var removeCmd = &cobra.Command{ } } - file, err := os.Open(path) - cannot(err) - - decoder := json.NewDecoder(file) - - ibf := ibf.NewEmptyIBF() - err = decoder.Decode(ibf) - cannot(err) - file.Close() + set, err := open(path) + if err != nil { + return err + } if len(args) == 2 { - var key = args[1] - - val := new(big.Int) - val.SetBytes([]byte(key)) - ibf.Remove(val) + set.Remove([]byte(args[1])) } else { scanner := bufio.NewScanner(os.Stdin) for scanner.Scan() { bytes := scanner.Bytes() - val := new(big.Int) - val.SetBytes(bytes) - ibf.Remove(val) + + set.Remove(bytes) if echoed { fmt.Printf("%s\n", string(bytes)) } } + err = scanner.Err() + if err != nil { + return err + } } - file, err = os.Create(path) - cannot(err) - - encoder := json.NewEncoder(file) - - err = encoder.Encode(&ibf) - cannot(err) + return create(path, set) }, } diff --git a/cmd/root.go b/cmd/root.go index 0931ff3..1430a73 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -26,8 +26,7 @@ var RootCmd = &cobra.Command{ func Execute() { if err := RootCmd.Execute(); err != nil { - fmt.Println(err) - os.Exit(-1) + os.Exit(1) } } diff --git a/cmd/subtract.go b/cmd/subtract.go index cf76c48..3200a72 100644 --- a/cmd/subtract.go +++ b/cmd/subtract.go @@ -1,39 +1,30 @@ package cmd import ( - "encoding/json" - "os" - - "github.com/calebcase/ibf/lib" + ibf "github.com/calebcase/ibf/lib" "github.com/spf13/cobra" ) var subtractCmd = &cobra.Command{ Use: "subtract IBF IBF [IBF]", Short: "Subtract the second IBF from the first. If third is provided, write the result there. Otherwise overwrite the first.", - Run: func(cmd *cobra.Command, args []string) { + Args: cobra.RangeArgs(2, 3), + RunE: func(cmd *cobra.Command, args []string) (err error) { paths := args - ibfs := [2]ibf.IBFer{} + sets := [2]*ibf.IBF{} for i, path := range paths { if i > 1 { break } - file, err := os.Open(path) - cannot(err) - - decoder := json.NewDecoder(file) - - ibf := ibf.NewEmptyIBF() - err = decoder.Decode(ibf) - cannot(err) - ibfs[i] = ibf - - file.Close() + sets[i], err = open(path) + if err != nil { + return err + } } - ibfs[0].Subtract(ibfs[1]) + sets[0].Subtract(sets[1]) var output string @@ -43,13 +34,7 @@ var subtractCmd = &cobra.Command{ output = paths[2] } - file, err := os.Create(output) - cannot(err) - - encoder := json.NewEncoder(file) - - err = encoder.Encode(ibfs[0]) - cannot(err) + return create(output, sets[0]) }, } diff --git a/cmd/union.go b/cmd/union.go index a255d44..5f415fd 100644 --- a/cmd/union.go +++ b/cmd/union.go @@ -1,39 +1,30 @@ package cmd import ( - "encoding/json" - "os" - - "github.com/calebcase/ibf/lib" + ibf "github.com/calebcase/ibf/lib" "github.com/spf13/cobra" ) var unionCmd = &cobra.Command{ Use: "union IBF IBF [IBF]", Short: "Union the second IBF with the first. If third is provided, write the result there. Otherwise overwrite the first.", - Run: func(cmd *cobra.Command, args []string) { + Args: cobra.RangeArgs(2, 3), + RunE: func(cmd *cobra.Command, args []string) (err error) { paths := args - ibfs := [2]ibf.IBFer{} + sets := [2]*ibf.IBF{} for i, path := range paths { if i > 1 { break } - file, err := os.Open(path) - cannot(err) - - decoder := json.NewDecoder(file) - - ibf := ibf.NewEmptyIBF() - err = decoder.Decode(ibf) - cannot(err) - ibfs[i] = ibf - - file.Close() + sets[i], err = open(path) + if err != nil { + return err + } } - ibfs[0].Union(ibfs[1]) + sets[0].Union(sets[1]) var output string @@ -43,13 +34,7 @@ var unionCmd = &cobra.Command{ output = paths[2] } - file, err := os.Create(output) - cannot(err) - - encoder := json.NewEncoder(file) - - err = encoder.Encode(ibfs[0]) - cannot(err) + return create(output, sets[0]) }, } diff --git a/cmd/util.go b/cmd/util.go new file mode 100644 index 0000000..985bba0 --- /dev/null +++ b/cmd/util.go @@ -0,0 +1,35 @@ +package cmd + +import ( + "encoding/json" + "os" + + ibf "github.com/calebcase/ibf/lib" + "github.com/zeebo/errs" +) + +func create(path string, set *ibf.IBF) (err error) { + file, err := os.Create(path) + if err != nil { + return err + } + defer func() { + err = errs.Combine(err, file.Close()) + }() + + return json.NewEncoder(file).Encode(set) +} + +func open(path string) (set *ibf.IBF, err error) { + file, err := os.Open(path) + if err != nil { + return nil, err + } + defer func() { + err = errs.Combine(err, file.Close()) + }() + + set = &ibf.IBF{} + + return set, json.NewDecoder(file).Decode(set) +} diff --git a/cmd/version.go b/cmd/version.go deleted file mode 100644 index 0335c08..0000000 --- a/cmd/version.go +++ /dev/null @@ -1,18 +0,0 @@ -package cmd - -import ( - "fmt" - "github.com/spf13/cobra" -) - -func init() { - RootCmd.AddCommand(versionCmd) -} - -var versionCmd = &cobra.Command{ - Use: "version", - Short: "Print the version number.", - Run: func(cmd *cobra.Command, args []string) { - fmt.Println("0.0.1") - }, -} diff --git a/go.mod b/go.mod index e3050f3..41909e9 100644 --- a/go.mod +++ b/go.mod @@ -3,8 +3,13 @@ module github.com/calebcase/ibf go 1.13 require ( + github.com/davecgh/go-spew v1.1.1 github.com/dchest/siphash v1.2.1 + github.com/go-faster/xor v0.3.0 + github.com/google/gofuzz v1.2.0 github.com/spf13/cobra v0.0.5 github.com/spf13/viper v1.5.0 + github.com/stretchr/testify v1.2.2 + github.com/zeebo/errs v1.2.2 golang.org/x/crypto v0.0.0-20191108234033-bd318be0434a ) diff --git a/go.sum b/go.sum index 6b55e46..7f2212b 100644 --- a/go.sum +++ b/go.sum @@ -1,4 +1,5 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= @@ -15,6 +16,7 @@ github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3Ee github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA= github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dchest/siphash v1.2.1 h1:4cLinnzVJDKxTCl9B01807Yiy+W7ZzVHj/KIroQRvT4= github.com/dchest/siphash v1.2.1/go.mod h1:q+IRvb2gOSrUnYoPqHiyHXS0FOBBOdl6tONBlVnOnt4= @@ -23,6 +25,8 @@ github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8 github.com/fsnotify/fsnotify v1.4.7 h1:IXs+QLmnXW2CcXuY+8Mzv/fWEsPGWxqefPtCP5CnV9I= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= +github.com/go-faster/xor v0.3.0 h1:tc0bdVe31Wj999e5rEj7K3DhHyQNp2VydYyLFj3YSN8= +github.com/go-faster/xor v0.3.0/go.mod h1:x5CaDY9UKErKzqfRfFZdfu+OSTfoZny3w5Ak7UxcipQ= github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= @@ -36,12 +40,15 @@ github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5y github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= +github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= +github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/gorilla/websocket v1.4.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= github.com/grpc-ecosystem/go-grpc-middleware v1.0.0/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= +github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM= github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= @@ -49,8 +56,10 @@ github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvW github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= +github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= github.com/magiconair/properties v1.8.1 h1:ZC2Vc7/ZFkGmsVC9KvOjumD+G5lXy2RtTKyzRKO2BQ4= @@ -64,6 +73,7 @@ github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn github.com/pelletier/go-toml v1.2.0 h1:T5zMGML61Wp+FlcbWjRDT7yAxhJNAiPPLOFECq181zc= github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= github.com/prometheus/client_golang v0.9.3/go.mod h1:/TN21ttK/J9q6uSwhBd54HahCDft0ttaMvbicHlPoso= @@ -93,6 +103,7 @@ github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DM github.com/spf13/viper v1.5.0 h1:GpsTwfsQ27oS/Aha/6d1oD7tpKIqWnOA6tgOX9HHkt4= github.com/spf13/viper v1.5.0/go.mod h1:AkYRkVJF8TkSG/xet6PzXX+l39KhhXa2pdqVSxnTcn4= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.2.2 h1:bSDNvY7ZPG5RlJ8otE/7V6gMiyenm9RtJ7IUVIAoJ1w= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/subosito/gotenv v1.2.0 h1:Slr1R9HxAlEKefgq5jn9U+DnETlIUa6HfgEzj0g5d7s= github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw= @@ -101,6 +112,8 @@ github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGr github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0= github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= +github.com/zeebo/errs v1.2.2 h1:5NFypMTuSdoySVTqlNs1dEoU21QVamMQJxW/Fii5O7g= +github.com/zeebo/errs v1.2.2/go.mod h1:sgbWHsvVuTPHcqJJGQ1WhI5KbWlHYz+2+2C/LSEtCw4= go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= @@ -142,6 +155,7 @@ google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZi google.golang.org/grpc v1.21.0/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/resty.v1 v1.12.0/go.mod h1:mDo4pnntr5jdWRML875a/NmxYqAlA73dVijT2AXvQQo= gopkg.in/yaml.v2 v2.0.0-20170812160011-eb3733d160e7/go.mod h1:JAlM8MvJe8wmxCU4Bli9HhUf9+ttbYbLASfIpnQbh74= diff --git a/lib/block.go b/lib/block.go new file mode 100644 index 0000000..64eef57 --- /dev/null +++ b/lib/block.go @@ -0,0 +1,56 @@ +package ibf + +import ( + "encoding/binary" + + xor "github.com/go-faster/xor" +) + +// block is a byte array where the first 8 bytes are the big endian uint64 +// length of the value. When blocks are combined via xor they are extended to +// the right with zero bytes. When a block is "pure" then it can be losslessly +// recovered by truncating to the stored length. Blocks rely on external logic +// to know when it is "pure" (e.g. see Cell) and attempts to get the value of +// an "unpure" block will likely panic (and if it doesn't will return +// nonsense). +type block struct { + Data []byte `json:"data"` +} + +func newBlock(value []byte) (b *block) { + data := make([]byte, 8+len(value)) + binary.BigEndian.PutUint64(data, uint64(len(value))) + copy(data[8:], value) + + return &block{ + Data: data, + } +} + +func (b *block) Xor(other *block) { + if len(other.Data) > len(b.Data) { + b.Data = append(b.Data, make([]byte, len(other.Data)-len(b.Data))...) + } + + // In theory we could extend others.Data harmlessly here, but for now + // we will avoid modifying other unnecessarily. + data := make([]byte, len(b.Data)) + copy(data, other.Data) + + xor.Bytes(b.Data, b.Data, data) +} + +func (b *block) Value() []byte { + size := binary.BigEndian.Uint64(b.Data[:8]) + + return b.Data[8 : 8+size] +} + +func (b *block) Clone() *block { + data := make([]byte, len(b.Data)) + copy(data, b.Data) + + return &block{ + Data: data, + } +} diff --git a/lib/block_test.go b/lib/block_test.go new file mode 100644 index 0000000..00ddb0a --- /dev/null +++ b/lib/block_test.go @@ -0,0 +1,64 @@ +package ibf + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestBlock(t *testing.T) { + type TC struct { + name string + + b *block // accumulated block + i []byte // input value + d []byte // block data + v []byte // value (optional) + } + + tcs := []TC{ + { + name: "same length 0", + b: newBlock([]byte{}), + i: []byte{}, + d: []byte{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + v: []byte{}, + }, + { + name: "same length 1", + b: newBlock([]byte{0x0F}), + i: []byte{0xF0}, + d: []byte{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 1 ^ 1, 0xFF}, + v: nil, + }, + { + name: "diff lengths 1,2", + b: newBlock([]byte{0x0F}), + i: []byte{0x00, 0xF0}, + d: []byte{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 1 ^ 2, 0x0F, 0xF0}, + v: nil, + }, + { + name: "remove", + b: &block{ + Data: []byte{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 1 ^ 2, 0x0F, 0xF0}, + }, + i: []byte{0x00, 0xF0}, + d: []byte{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 1, 0x0F, 0x00}, + v: []byte{0x0F}, + }, + } + + for i, tc := range tcs { + t.Run(fmt.Sprintf("[%d] %s", i, tc.name), func(t *testing.T) { + tc.b.Xor(newBlock(tc.i)) + + require.Equal(t, tc.d, tc.b.Data) + + if tc.v != nil { + require.Equal(t, tc.v, tc.b.Value()) + } + }) + } +} diff --git a/lib/cell.go b/lib/cell.go index 6a5ec1b..77e17a5 100644 --- a/lib/cell.go +++ b/lib/cell.go @@ -1,122 +1,107 @@ package ibf -import ( - "encoding/json" - "math/big" -) - -// Interface - -type Celler interface { - Insert(key *big.Int, hash uint64) - Remove(key *big.Int, hash uint64) - Subtract(cell Celler) - Invert() - - Clone() Celler - - GetId() *big.Int - GetHash() uint64 - GetCount() int64 - - IsEmpty() bool - IsPure(hasher Hasher) bool - - json.Marshaler - json.Unmarshaler -} - -// Cell - -type cell struct { - Id *big.Int `json:"id"` - Hash uint64 `json:"hash"` - Count int64 `json:"count"` -} - +// Cell contains the state of an individual position in an IBF. type Cell struct { - p cell + Key *block `json:"key"` + Digest uint64 `json:"digest"` + Count int64 `json:"count"` } -var _ Celler = (*Cell)(nil) - +// NewCell returns a new empty cell. func NewCell() *Cell { - return &Cell{cell{ - Id: big.NewInt(0), - Hash: 0, - Count: 0, - }} -} - -func (self *Cell) Insert(key *big.Int, hash uint64) { - self.p.Id.Xor(self.p.Id, key) - self.p.Hash = self.p.Hash ^ hash - self.p.Count += 1 + return &Cell{ + Key: newBlock([]byte{}), + Digest: 0, + Count: 0, + } } -func (self *Cell) Remove(key *big.Int, hash uint64) { - self.p.Id.Xor(self.p.Id, key) - self.p.Hash = self.p.Hash ^ hash - self.p.Count -= 1 +// Insert adds the key with the given digest to this cell. +// +// NOTE: This assumes the key does not already exist in the cell. If it does +// this effectively removes it and the count will be incorrect. +func (c *Cell) Insert(key []byte, digest uint64) { + c.Key.Xor(newBlock(key)) + c.Digest = c.Digest ^ digest + c.Count++ +} + +// Remove deletes the key with the given digest from this cell. +// +// NOTE: This assumes the key already exists in the cell. If it does not this +// effectively adds it and the count will be incorrect. +func (c *Cell) Remove(key []byte, digest uint64) { + c.Key.Xor(newBlock(key)) + c.Digest = c.Digest ^ digest + c.Count-- +} + +// Union adds all keys from the given cell to this one. +// +// NOTE: This assumes cells were disjoint sets. If there weren't this effectly +// performs a symmetric difference and the count will be incorrect. +func (c *Cell) Union(cell *Cell) { + c.Key.Xor(cell.Key) + c.Digest = c.Digest ^ cell.GetDigest() + c.Count += cell.GetCount() +} + +// Subtract removes all keys in the given cell from this one. +// +// NOTE: This assumes given cell is a subset of this one. If it wasn't this +// effectly performs a symmetric difference and the count will be incorrect. +func (c *Cell) Subtract(cell *Cell) { + c.Key.Xor(cell.Key) + c.Digest = c.Digest ^ cell.GetDigest() + c.Count -= cell.GetCount() +} + +// Invert negates the count. +func (c *Cell) Invert() { + c.Count *= -1 +} + +// Clone returns a deep copy of this cell. +func (c *Cell) Clone() *Cell { + return &Cell{ + Key: c.Key.Clone(), + Digest: c.Digest, + Count: c.Count, + } } -func (self *Cell) Union(cell Celler) { - self.p.Id.Xor(self.p.Id, cell.GetId()) - self.p.Hash = self.p.Hash ^ cell.GetHash() - self.p.Count += cell.GetCount() -} +// GetKey returns a copy of the key. +func (c *Cell) GetKey() []byte { + raw := c.Key.Value() -func (self *Cell) Subtract(cell Celler) { - self.p.Id.Xor(self.p.Id, cell.GetId()) - self.p.Hash = self.p.Hash ^ cell.GetHash() - self.p.Count -= cell.GetCount() -} + key := make([]byte, len(raw)) + copy(key, raw) -func (self *Cell) Invert() { - self.p.Count *= -1 + return key } -func (self *Cell) Clone() Celler { - return &Cell{cell{ - Id: big.NewInt(0).Set(self.p.Id), - Hash: self.p.Hash, - Count: self.p.Count, - }} +// GetDigest returns the cell's digest. +func (c *Cell) GetDigest() uint64 { + return c.Digest } -func (self *Cell) GetId() *big.Int { - return big.NewInt(0).Set(self.p.Id) +// GetCount returns the cell's count. +func (c *Cell) GetCount() int64 { + return c.Count } -func (self *Cell) GetHash() uint64 { - return self.p.Hash +// IsEmpty returns true if the cell's count is zero, key is empty, and digest +// is zero. +func (c *Cell) IsEmpty() bool { + return c.Count == 0 && len(c.Key.Value()) == 0 && c.Digest == 0 } -func (self *Cell) GetCount() int64 { - return self.p.Count -} - -func (self *Cell) IsEmpty() bool { - if self.p.Count == 0 && self.p.Id.Cmp(ZERO) == 0 && self.p.Hash == 0 { - return true +// IsPure returns true if the cell contains exactly one value and the hash is +// valid. +func (c *Cell) IsPure(h *Hash) bool { + if c.Count == 1 { + return c.Digest == h.Hash(c.Key.Value()) } - return false -} -func (self *Cell) IsPure(hasher Hasher) bool { - if self.p.Count == 1 { - hash := hasher.Hash(self.p.Id) - if self.p.Hash == hash { - return true - } - } return false } - -func (self *Cell) MarshalJSON() ([]byte, error) { - return json.Marshal(&self.p) -} - -func (self *Cell) UnmarshalJSON(data []byte) error { - return json.Unmarshal(data, &self.p) -} diff --git a/lib/cell_test.go b/lib/cell_test.go new file mode 100644 index 0000000..55678ca --- /dev/null +++ b/lib/cell_test.go @@ -0,0 +1,37 @@ +package ibf + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestCell(t *testing.T) { + h := NewHash(0, 0) + + a := []byte{0x00, 0x00, 0x01} + b := []byte{0x01} + + cell := NewCell() + require.Equal(t, int64(0), cell.Count) + require.True(t, cell.IsEmpty()) + + cell.Insert(a, h.Hash(a)) + cell.Insert(b, h.Hash(b)) + require.Equal(t, int64(2), cell.Count) + require.False(t, cell.IsEmpty()) + require.False(t, cell.IsPure(h)) + + cell.Remove(a, h.Hash(a)) + require.Equal(t, int64(1), cell.Count) + require.False(t, cell.IsEmpty()) + require.True(t, cell.IsPure(h)) + require.Equal(t, b, cell.GetKey()) + + cell.Insert(a, h.Hash(a)) + cell.Remove(b, h.Hash(b)) + require.Equal(t, int64(1), cell.Count) + require.False(t, cell.IsEmpty()) + require.True(t, cell.IsPure(h)) + require.Equal(t, a, cell.GetKey()) +} diff --git a/lib/constants.go b/lib/constants.go deleted file mode 100644 index 71f0da9..0000000 --- a/lib/constants.go +++ /dev/null @@ -1,9 +0,0 @@ -package ibf - -import "math/big" - -// Contants - -var ZERO = big.NewInt(0) -var ONE = big.NewInt(1) -var NEG = big.NewInt(-1) diff --git a/lib/error.go b/lib/error.go new file mode 100644 index 0000000..2bcd2fa --- /dev/null +++ b/lib/error.go @@ -0,0 +1,11 @@ +package ibf + +import "github.com/zeebo/errs" + +// Errors for this package. +var ( + Error = errs.Class("ibf") + + ErrNoPureCell = Error.New("no pure cell") + ErrEmptySet = Error.New("empty set") +) diff --git a/lib/hash.go b/lib/hash.go index 2b632e9..59f104c 100644 --- a/lib/hash.go +++ b/lib/hash.go @@ -1,45 +1,22 @@ package ibf import ( - "encoding/json" - "math/big" - "github.com/dchest/siphash" ) -// Interface - -type Hasher interface { - Hash(key *big.Int) uint64 - - json.Marshaler - json.Unmarshaler -} - -// Hash - -type hash struct { - Key [2]uint64 `json:"key"` -} - +// Hash maintains the state for a siphash hasher. type Hash struct { - p hash + Key [2]uint64 `json:"key"` } -var _ Hasher = (*Hash)(nil) - +// NewHash returns a new siphash hasher. func NewHash(key0, key1 uint64) *Hash { - return &Hash{hash{[2]uint64{key0, key1}}} -} - -func (self *Hash) Hash(key *big.Int) uint64 { - return siphash.Hash(self.p.Key[0], self.p.Key[1], key.Bytes()) -} - -func (self *Hash) MarshalJSON() ([]byte, error) { - return json.Marshal(&self.p) + return &Hash{ + Key: [2]uint64{key0, key1}, + } } -func (self *Hash) UnmarshalJSON(data []byte) error { - return json.Unmarshal(data, &self.p) +// Hash retuns the digest of the value. +func (h *Hash) Hash(value []byte) (digest uint64) { + return siphash.Hash(h.Key[0], h.Key[1], value) } diff --git a/lib/ibf.go b/lib/ibf.go index 177ed12..459ed40 100644 --- a/lib/ibf.go +++ b/lib/ibf.go @@ -1,224 +1,222 @@ package ibf -import ( - "encoding/json" - "errors" - "math/big" -) +import "math/rand" -// Interface - -type IBFer interface { - Insert(key *big.Int) - Remove(key *big.Int) - - Pop() (*big.Int, error) - - Union(IBFer) - Subtract(IBFer) - - Invert() - - Clone() IBFer - - GetSize() uint64 - GetCells() []Celler - GetCardinality() *big.Int - - IsEmpty() bool - - json.Marshaler - json.Unmarshaler -} - -// IBF - -type ibf struct { +// IBF holds the state of an invertable bloom filter. +type IBF struct { Positioners []*Hash `json:"positioners"` Hasher *Hash `json:"hasher"` Size uint64 `json:"size"` Cells []*Cell `json:"cells"` - Cardinality *big.Int `json:"cardinality"` + Cardinality int64 `json:"cardinality"` } -type IBF struct { - p ibf -} +// NewIBF creates a new IBF of the given size. An IBF can accurately handle +// differences of approximately 2/3rds the configured size (e.g. a size of 100 +// would allow for ~66 differences to be accurately retrieved). 3 positioners +// and a hasher are created using the output from a random number generator +// initialized with the seed. +func NewIBF(size uint64, seed int64) *IBF { + rng := rand.New(rand.NewSource(seed)) -var _ IBFer = (*IBF)(nil) + positioners := []*Hash{ + NewHash(uint64(rng.Int63()), uint64(rng.Int63())), + NewHash(uint64(rng.Int63()), uint64(rng.Int63())), + NewHash(uint64(rng.Int63()), uint64(rng.Int63())), + } + hasher := NewHash(uint64(rng.Int63()), uint64(rng.Int63())) + + return NewIBFWithHash(size, positioners, hasher) +} -func NewIBF(size uint64, positioners []*Hash, hasher *Hash) *IBF { +// NewIBFWithHash creates a new IBF with the provided positioners and hasher. +// It will use the given hashers for positioning and computing the key hashes. +// The positioners must all be initialized with different seeds to ensure they +// do not produce the same positions for the same key. +func NewIBFWithHash(size uint64, positioners []*Hash, hasher *Hash) *IBF { cells := make([]*Cell, size) - for i, _ := range cells { + for i := range cells { cells[i] = NewCell() } - return &IBF{ibf{ + return &IBF{ Positioners: positioners, Hasher: hasher, Size: size, Cells: cells, - Cardinality: big.NewInt(0), - }} -} - -func NewEmptyIBF() *IBF { - return &IBF{} + Cardinality: 0, + } } -func (self *IBF) Insert(key *big.Int) { - hash := self.p.Hasher.Hash(key) +// getPositions returns the cells that the key would occupy. It always returns +// len(positioners) many cells ensuring that no key is under represented. +func (i *IBF) getPositions(key []byte, digest uint64) (cells []*Cell) { + cells = make([]*Cell, len(i.Positioners)) used := map[uint64]bool{} - for _, positioner := range self.p.Positioners { - index := positioner.Hash(key) % self.p.Size + for j, positioner := range i.Positioners { + index := positioner.Hash(key) % i.Size + + // NOTE: We need to keep looking if we have found a collision + // with an already used position. for used[index] { - index = (index + 1) % self.p.Size + index = (index + 1) % i.Size } - used[index] = true - self.p.Cells[index].Insert(key, hash) + used[index] = true + cells[j] = i.Cells[index] } - self.p.Cardinality.Add(self.p.Cardinality, ONE) + return cells } -func (self *IBF) Remove(key *big.Int) { - total := len(self.p.Positioners) - cells := make([]*Cell, total) - hash := self.p.Hasher.Hash(key) - used := map[uint64]bool{} +// Insert adds the key to the set. +// +// NOTE: This does not know if the key already exists and will add it +// unconditionally. If the key did already exist in the set, then that +// effectively would remove it! +func (i *IBF) Insert(key []byte) { + digest := i.Hasher.Hash(key) + cells := i.getPositions(key, digest) - // Find all the positions. - for i, positioner := range self.p.Positioners { - index := positioner.Hash(key) % self.p.Size - for used[index] { - index = (index + 1) % self.p.Size - } - used[index] = true - - cells[i] = self.p.Cells[index] + for _, c := range cells { + c.Insert(key, digest) } - // Determine if all cells are filled. - all_filled := true - for _, cell := range cells { - if cell.IsEmpty() { - all_filled = false - break - } - } - if !all_filled { - // It can't be in the set if all cells aren't filled. - return - } + i.Cardinality++ +} - for _, cell := range cells { - cell.Remove(key, hash) +// Remove deletes the key from the set. +// +// NOTE: This does not know if the key already exists and will add it +// unconditionally. If the key did already exist in the set, then that +// effectively would add it! +func (i *IBF) Remove(key []byte) { + digest := i.Hasher.Hash(key) + cells := i.getPositions(key, digest) + + for _, c := range cells { + c.Remove(key, digest) } - self.p.Cardinality.Sub(self.p.Cardinality, ONE) + i.Cardinality-- } -func (self *IBF) Invert() { - for _, cell := range self.p.Cells { +// Invert flips the cardinality of the set and the cells. As if all elements +// has instead been removed from the set instead of added. +func (i *IBF) Invert() { + for _, cell := range i.Cells { cell.Invert() } - self.p.Cardinality.Mul(self.p.Cardinality, NEG) + i.Cardinality *= -1 } -func (self *IBF) Pop() (*big.Int, error) { - all_empty := true +// Pop finds a key in a pure cell, removes it from the set, and returns it. If +// no pure cell can be found it returns ErrNoPureCell indicating that there are +// more elements in the set, but they cannot be popped. If the set is empty it +// returns ErrEmptySet. +func (i *IBF) Pop() ([]byte, error) { + allEmpty := true // Look for a pure cell. - for _, cell := range self.p.Cells { - if cell.IsPure(self.p.Hasher) { - result := big.NewInt(0).Set(cell.GetId()) - self.Remove(result) - return result, nil + for _, cell := range i.Cells { + if cell.IsPure(i.Hasher) { + key := cell.GetKey() + i.Remove(key) + + return key, nil } - if all_empty && !cell.IsEmpty() { - all_empty = false + if allEmpty && !cell.IsEmpty() { + allEmpty = false } } // Are there non-empty cells? - if !all_empty { - return nil, errors.New("More elements in the set, but unable to retrieve.") + if !allEmpty { + return nil, ErrNoPureCell } // Empty set, nothing to pop. - return nil, errors.New("Empty set.") + return nil, ErrEmptySet } -func (self *IBF) Union(ibf IBFer) { - cells := ibf.GetCells() +// Union inserts all the elements from the provided set to this set. +// +// NOTE: This assumes the two sets are disjoint and configured the same. If the +// two sets are not disjoint this will actually perform a symmetric difference +// and the cardinality will be incorrect! If the two sets are not configured +// the same then the behavior is undefined and could potentially panic. +func (i *IBF) Union(other *IBF) { + cells := other.GetCells() - for i := 0; i < len(self.p.Cells); i++ { - self.p.Cells[i].Union(cells[i]) + for j := 0; j < len(i.Cells); j++ { + i.Cells[j].Union(cells[j]) } - self.p.Cardinality.Add(self.p.Cardinality, ibf.GetCardinality()) + i.Cardinality += other.GetCardinality() } -func (self *IBF) Subtract(ibf IBFer) { - cells := ibf.GetCells() +// Subtract removes all the elements from the provided set from this set. +// +// NOTE: This assumes the other set is a subset of this one. If that isn't true +// then this will actually perform a symmetric difference and the cardinality +// will be incorrect! If the two sets are not configured the same then the +// behavior is undefined and could potentially panic. +func (i *IBF) Subtract(other *IBF) { + cells := other.GetCells() - for i := 0; i < len(self.p.Cells); i++ { - self.p.Cells[i].Subtract(cells[i]) + for j := 0; j < len(i.Cells); j++ { + i.Cells[j].Subtract(cells[j]) } - self.p.Cardinality.Sub(self.p.Cardinality, ibf.GetCardinality()) + i.Cardinality -= other.GetCardinality() } -func (self *IBF) Clone() IBFer { - clone := NewIBF(self.p.Size, self.p.Positioners, self.p.Hasher) - for i, c := range self.p.Cells { - clone.p.Cells[i] = c.Clone().(*Cell) +// Clone returns a copy of this set. +func (i *IBF) Clone() (clone *IBF) { + clone = NewIBFWithHash(i.Size, i.Positioners, i.Hasher) + + for j, c := range i.Cells { + clone.Cells[j] = c.Clone() } + clone.Cardinality = i.Cardinality + return clone } -func (self *IBF) GetSize() uint64 { - return self.p.Size +// GetSize returns the IBF's size. +func (i *IBF) GetSize() uint64 { + return i.Size } -func (self *IBF) GetCells() []Celler { - cells := make([]Celler, len(self.p.Cells)) - for i, c := range self.p.Cells { - cells[i] = c - } - return cells +// GetCells returns the IBF's cells. +func (i *IBF) GetCells() []*Cell { + return i.Cells } -func (self *IBF) GetCardinality() *big.Int { - return big.NewInt(0).Set(self.p.Cardinality) +// GetCardinality returns the IBF's cardinality. +func (i *IBF) GetCardinality() int64 { + return i.Cardinality } -func (self *IBF) IsEmpty() bool { - all_empty := true +// IsEmpty returns true if all the cells are empty and the cardinality is zero. +func (i *IBF) IsEmpty() bool { + if i.Cardinality != 0 { + return false + } - for _, cell := range self.p.Cells { - if !cell.IsEmpty() && cell.GetCount() > 0 { - all_empty = false - break + for _, cell := range i.Cells { + if !cell.IsEmpty() { + return false } } - return all_empty -} - -func (self *IBF) MarshalJSON() ([]byte, error) { - return json.Marshal(&self.p) -} - -func (self *IBF) UnmarshalJSON(data []byte) error { - return json.Unmarshal(data, &self.p) + return true } diff --git a/lib/ibf_test.go b/lib/ibf_test.go new file mode 100644 index 0000000..c20b3c6 --- /dev/null +++ b/lib/ibf_test.go @@ -0,0 +1,101 @@ +package ibf + +import ( + "testing" + + "github.com/davecgh/go-spew/spew" + fuzz "github.com/google/gofuzz" + "github.com/stretchr/testify/require" +) + +func TestIBF(t *testing.T) { + t.Run("simple", func(t *testing.T) { + vs := []string{ + "a", + "b", + "c", + "d", + } + + i0 := NewIBF(3, 1) + + for _, v := range vs { + i0.Insert([]byte(v)) + } + + require.Equal(t, int64(len(vs)), i0.Cardinality) + + i1 := i0.Clone() + i1.Remove([]byte(vs[0])) + require.Equal(t, int64(len(vs)), i0.Cardinality) + require.Equal(t, int64(len(vs)-1), i1.Cardinality) + + i2 := i0.Clone() + i2.Subtract(i1) + require.Equal(t, int64(1), i2.Cardinality) + + value, err := i2.Pop() + require.NoError(t, err) + require.Equal(t, vs[0], string(value)) + }) + + t.Run("leading zeros", func(t *testing.T) { + vs := [][]byte{ + []byte{0x00, 0x00, 0x00, 0x01}, + []byte{0x00, 0x00, 0x01}, + []byte{0x00, 0x01}, + []byte{0x01}, + } + + i0 := NewIBF(3, 2) + + for _, v := range vs { + i0.Insert(v) + } + + i1 := i0.Clone() + i1.Remove(vs[0]) + + i2 := i0.Clone() + i2.Subtract(i1) + + value, err := i2.Pop() + require.NoError(t, err) + require.Equal(t, vs[0], value) + }) + + t.Run("fuzz", func(t *testing.T) { + f := fuzz.New().NilChance(0).NumElements(0, 1024) + + vs := make([][]byte, 10) + + for i := 0; i < len(vs); i++ { + var data []byte + f.Fuzz(&data) + + vs[i] = data + } + + t.Log("vs:", spew.Sdump(vs)) + + var seed int64 + f.Fuzz(&seed) + + i0 := NewIBF(3, seed) + + for _, v := range vs { + i0.Insert(v) + } + + i1 := i0.Clone() + i1.Remove(vs[0]) + + i2 := i0.Clone() + i2.Subtract(i1) + + value, err := i2.Pop() + t.Log("value:", spew.Sdump(value)) + require.NoError(t, err) + require.Equal(t, vs[0], value) + }) +} diff --git a/walkthrough.md b/walkthrough.md index 3bf4ec5..ad92bf4 100644 --- a/walkthrough.md +++ b/walkthrough.md @@ -149,53 +149,73 @@ The actual IBF cells, composed as discussed of an `id`, `hash`, and `count`. "size": 10, "cells": [ { - "id": 0, - "hash": 0, + "key": { + "data": "AAAAAAAAAAA=" + }, + "digest": 0, "count": 0 }, { - "id": 0, - "hash": 0, + "key": { + "data": "AAAAAAAAAAA=" + }, + "digest": 0, "count": 0 }, { - "id": 0, - "hash": 0, + "key": { + "data": "AAAAAAAAAAA=" + }, + "digest": 0, "count": 0 }, { - "id": 0, - "hash": 0, + "key": { + "data": "AAAAAAAAAAA=" + }, + "digest": 0, "count": 0 }, { - "id": 0, - "hash": 0, + "key": { + "data": "AAAAAAAAAAA=" + }, + "digest": 0, "count": 0 }, { - "id": 0, - "hash": 0, + "key": { + "data": "AAAAAAAAAAA=" + }, + "digest": 0, "count": 0 }, { - "id": 0, - "hash": 0, + "key": { + "data": "AAAAAAAAAAA=" + }, + "digest": 0, "count": 0 }, { - "id": 0, - "hash": 0, + "key": { + "data": "AAAAAAAAAAA=" + }, + "digest": 0, "count": 0 }, { - "id": 0, - "hash": 0, + "key": { + "data": "AAAAAAAAAAA=" + }, + "digest": 0, "count": 0 }, { - "id": 0, - "hash": 0, + "key": { + "data": "AAAAAAAAAAA=" + }, + "digest": 0, "count": 0 } ], @@ -218,53 +238,73 @@ $ jq '.cells' demo.ibf ```json [ { - "id": 0, - "hash": 0, + "key": { + "data": "AAAAAAAAAAA=" + }, + "digest": 0, "count": 0 }, { - "id": 0, - "hash": 0, + "key": { + "data": "AAAAAAAAAAA=" + }, + "digest": 0, "count": 0 }, { - "id": 18331428859966820, - "hash": 11333042293537241000, + "key": { + "data": "AAAAAAAAAAdBIFZhbHVl" + }, + "digest": 11333042293537241000, "count": 1 }, { - "id": 0, - "hash": 0, + "key": { + "data": "AAAAAAAAAAA=" + }, + "digest": 0, "count": 0 }, { - "id": 0, - "hash": 0, + "key": { + "data": "AAAAAAAAAAA=" + }, + "digest": 0, "count": 0 }, { - "id": 0, - "hash": 0, + "key": { + "data": "AAAAAAAAAAA=" + }, + "digest": 0, "count": 0 }, { - "id": 0, - "hash": 0, + "key": { + "data": "AAAAAAAAAAA=" + }, + "digest": 0, "count": 0 }, { - "id": 0, - "hash": 0, + "key": { + "data": "AAAAAAAAAAA=" + }, + "digest": 0, "count": 0 }, { - "id": 18331428859966820, - "hash": 11333042293537241000, + "key": { + "data": "AAAAAAAAAAdBIFZhbHVl" + }, + "digest": 11333042293537241000, "count": 1 }, { - "id": 18331428859966820, - "hash": 11333042293537241000, + "key": { + "data": "AAAAAAAAAAdBIFZhbHVl" + }, + "digest": 11333042293537241000, "count": 1 } ] @@ -295,53 +335,73 @@ $ jq '.cells' demo.ibf ```json [ { - "id": 0, - "hash": 0, + "key": { + "data": "AAAAAAAAAAA=" + }, + "digest": 0, "count": 0 }, { - "id": 0, - "hash": 0, + "key": { + "data": "AAAAAAAAAAA=" + }, + "digest": 0, "count": 0 }, { - "id": 18331428859966820, - "hash": 11333042293537241000, + "key": { + "data": "AAAAAAAAAAdBIFZhbHVl" + }, + "digest": 11333042293537241000, "count": 1 }, { - "id": 0, - "hash": 0, + "key": { + "data": "AAAAAAAAAAA=" + }, + "digest": 0, "count": 0 }, { - "id": 18612903836677476, - "hash": 10437079027557620000, + "key": { + "data": "AAAAAAAAAAdCIFZhbHVl" + }, + "digest": 10437079027557620000, "count": 1 }, { - "id": 0, - "hash": 0, + "key": { + "data": "AAAAAAAAAAA=" + }, + "digest": 0, "count": 0 }, { - "id": 18894378813388132, - "hash": 9477556251531188000, + "key": { + "data": "AAAAAAAAAAdDIFZhbHVl" + }, + "digest": 9477556251531188000, "count": 1 }, { - "id": 281474976710656, - "hash": 1391892804557128400, + "key": { + "data": "AAAAAAAAAAABAAAAAAAA" + }, + "digest": 1391892804557128400, "count": 2 }, { - "id": 844424930131968, - "hash": 977555963097886000, + "key": { + "data": "AAAAAAAAAAADAAAAAAAA" + }, + "digest": 977555963097886000, "count": 2 }, { - "id": 562949953421312, - "hash": 2215778548118941700, + "key": { + "data": "AAAAAAAAAAACAAAAAAAA" + }, + "digest": 2215778548118941700, "count": 2 } ] @@ -359,53 +419,73 @@ $ jq '.cells' demo.ibf ```json [ { - "id": 0, - "hash": 0, + "key": { + "data": "AAAAAAAAAAA=" + }, + "digest": 0, "count": 0 }, { - "id": 0, - "hash": 0, + "key": { + "data": "AAAAAAAAAAA=" + }, + "digest": 0, "count": 0 }, { - "id": 18331428859966820, - "hash": 11333042293537241000, + "key": { + "data": "AAAAAAAAAAdBIFZhbHVl" + }, + "digest": 11333042293537241000, "count": 1 }, { - "id": 0, - "hash": 0, + "key": { + "data": "AAAAAAAAAAA=" + }, + "digest": 0, "count": 0 }, { - "id": 0, - "hash": 0, + "key": { + "data": "AAAAAAAAAAAAAAAAAAAA" + }, + "digest": 0, "count": 0 }, { - "id": 0, - "hash": 0, + "key": { + "data": "AAAAAAAAAAA=" + }, + "digest": 0, "count": 0 }, { - "id": 18894378813388132, - "hash": 9477556251531188000, + "key": { + "data": "AAAAAAAAAAdDIFZhbHVl" + }, + "digest": 9477556251531188000, "count": 1 }, { - "id": 18894378813388132, - "hash": 9477556251531188000, + "key": { + "data": "AAAAAAAAAAdDIFZhbHVl" + }, + "digest": 9477556251531188000, "count": 1 }, { - "id": 18331428859966820, - "hash": 11333042293537241000, + "key": { + "data": "AAAAAAAAAAdBIFZhbHVl" + }, + "digest": 11333042293537241000, "count": 1 }, { - "id": 562949953421312, - "hash": 2215778548118941700, + "key": { + "data": "AAAAAAAAAAACAAAAAAAA" + }, + "digest": 2215778548118941700, "count": 2 } ]