From 16c7c2ef22d8f9c435d79e637138e40092d53c93 Mon Sep 17 00:00:00 2001 From: Morgan Date: Thu, 22 Feb 2024 19:24:13 +0100 Subject: [PATCH] feat(examples): add package cford32, add method `seqid.ID.String` (#1572) This PR adds a new package to examples, `cford32`, meant primarily to be used in package `seqid` as an AVL- and human-friendly ID, which implements an encoding scheme with the base32 encoding scheme [specified by Douglas Crockford](https://www.crockford.com/base32.html). It additionally implements a `uint64` encoding scheme I created, which encodes "tiny" (< 17 billion) values as 7-byte strings, and can encode the full `uint64` range with 13 bytes. The package is largely a fork of Go's `encoding/base32`, intentionally forked to have a very familiar API, while needing to be forked to implement some distinctive features of the encoding (like case insensitivity, and mapping in decoding all of the symbols `l L i I 1` to the same value). The necessity of this package comes from a solution that I implemented in GnoChess: https://github.com/gnolang/gnochess/blob/9aa813fbb86fec377a85fc4528411d652fc780ff/realm/chess.gno#L286-L295 Essentially, GnoChess used simple sequential IDs for its saved entities (like games). To work well with AVL's sorted keys, it padded the generated strings to the left with up to 9 zeroes. This, of course, breaks for values `>= 1e10` (10 billion), as `("2" + "000000000") > ("10" + "000000000")`. --- examples/gno.land/p/demo/cford32/LICENSE | 27 + examples/gno.land/p/demo/cford32/README.md | 76 ++ examples/gno.land/p/demo/cford32/cford32.gno | 700 ++++++++++++++++++ .../gno.land/p/demo/cford32/cford32_test.gno | 631 ++++++++++++++++ examples/gno.land/p/demo/cford32/gno.mod | 1 + examples/gno.land/p/demo/seqid/gno.mod | 2 + examples/gno.land/p/demo/seqid/seqid.gno | 40 +- examples/gno.land/p/demo/seqid/seqid_test.gno | 27 +- tm2/pkg/std/memfile.go | 2 +- 9 files changed, 1501 insertions(+), 5 deletions(-) create mode 100644 examples/gno.land/p/demo/cford32/LICENSE create mode 100644 examples/gno.land/p/demo/cford32/README.md create mode 100644 examples/gno.land/p/demo/cford32/cford32.gno create mode 100644 examples/gno.land/p/demo/cford32/cford32_test.gno create mode 100644 examples/gno.land/p/demo/cford32/gno.mod diff --git a/examples/gno.land/p/demo/cford32/LICENSE b/examples/gno.land/p/demo/cford32/LICENSE new file mode 100644 index 00000000000..6a66aea5eaf --- /dev/null +++ b/examples/gno.land/p/demo/cford32/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2009 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/examples/gno.land/p/demo/cford32/README.md b/examples/gno.land/p/demo/cford32/README.md new file mode 100644 index 00000000000..30cc9372e55 --- /dev/null +++ b/examples/gno.land/p/demo/cford32/README.md @@ -0,0 +1,76 @@ +# cford32 + +``` +package cford32 // import "gno.land/p/demo/cford32" + +Package cford32 implements a base32-like encoding/decoding package, with the +encoding scheme specified by Douglas Crockford. + +From the website, the requirements of said encoding scheme are to: + + - Be human readable and machine readable. + - Be compact. Humans have difficulty in manipulating long strings of arbitrary + symbols. + - Be error resistant. Entering the symbols must not require keyboarding + gymnastics. + - Be pronounceable. Humans should be able to accurately transmit the symbols + to other humans using a telephone. + +This is slightly different from a simple difference in encoding table from +the Go's stdlib `encoding/base32`, as when decoding the characters i I l L are +parsed as 1, and o O is parsed as 0. + +This package additionally provides ways to encode uint64's efficiently, as well +as efficient encoding to a lowercase variation of the encoding. The encodings +never use paddings. + +# Uint64 Encoding + +Aside from lower/uppercase encoding, there is a compact encoding, allowing to +encode all values in [0,2^34), and the full encoding, allowing all values in +[0,2^64). The compact encoding uses 7 characters, and the full encoding uses 13 +characters. Both are parsed unambiguously by the Uint64 decoder. + +The compact encodings have the first character between ['0','f'], while the +full encoding's first character ranges between ['g','z']. Practically, in your +usage of the package, you should consider which one to use and stick with it, +while considering that the compact encoding, once it reaches 2^34, automatically +switches to the full encoding. The properties of the generated strings are still +maintained: for instance, any two encoded uint64s x,y consistently generated +with the compact encoding, if the numeric value is x < y, will also be x < y in +lexical ordering. However, values [0,2^34) have a "double encoding", which if +mixed together lose the lexical ordering property. + +The Uint64 encoding is most useful for generating string versions of Uint64 IDs. +Practically, it allows you to retain sleek and compact IDs for your applcation +for the first 2^34 (>17 billion) entities, while seamlessly rolling over to the +full encoding should you exceed that. You are encouraged to use it unless you +have a requirement or preferences for IDs consistently being always the same +size. + +To use the cford32 encoding for IDs, you may want to consider using package +gno.land/p/demo/seqid. + +[specified by Douglas Crockford]: https://www.crockford.com/base32.html + +func AppendCompact(id uint64, b []byte) []byte +func AppendDecode(dst, src []byte) ([]byte, error) +func AppendEncode(dst, src []byte) []byte +func AppendEncodeLower(dst, src []byte) []byte +func Decode(dst, src []byte) (n int, err error) +func DecodeString(s string) ([]byte, error) +func DecodedLen(n int) int +func Encode(dst, src []byte) +func EncodeLower(dst, src []byte) +func EncodeToString(src []byte) string +func EncodeToStringLower(src []byte) string +func EncodedLen(n int) int +func NewDecoder(r io.Reader) io.Reader +func NewEncoder(w io.Writer) io.WriteCloser +func NewEncoderLower(w io.Writer) io.WriteCloser +func PutCompact(id uint64) []byte +func PutUint64(id uint64) [13]byte +func PutUint64Lower(id uint64) [13]byte +func Uint64(b []byte) (uint64, error) +type CorruptInputError int64 +``` diff --git a/examples/gno.land/p/demo/cford32/cford32.gno b/examples/gno.land/p/demo/cford32/cford32.gno new file mode 100644 index 00000000000..effa32bef88 --- /dev/null +++ b/examples/gno.land/p/demo/cford32/cford32.gno @@ -0,0 +1,700 @@ +// Modified from the Go Source code for encoding/base32. +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package cford32 implements a base32-like encoding/decoding package, with the +// encoding scheme [specified by Douglas Crockford]. +// +// From the website, the requirements of said encoding scheme are to: +// +// - Be human readable and machine readable. +// - Be compact. Humans have difficulty in manipulating long strings of arbitrary symbols. +// - Be error resistant. Entering the symbols must not require keyboarding gymnastics. +// - Be pronounceable. Humans should be able to accurately transmit the symbols to other humans using a telephone. +// +// This is slightly different from a simple difference in encoding table from +// the Go's stdlib `encoding/base32`, as when decoding the characters i I l L are +// parsed as 1, and o O is parsed as 0. +// +// This package additionally provides ways to encode uint64's efficiently, +// as well as efficient encoding to a lowercase variation of the encoding. +// The encodings never use paddings. +// +// # Uint64 Encoding +// +// Aside from lower/uppercase encoding, there is a compact encoding, allowing +// to encode all values in [0,2^34), and the full encoding, allowing all +// values in [0,2^64). The compact encoding uses 7 characters, and the full +// encoding uses 13 characters. Both are parsed unambiguously by the Uint64 +// decoder. +// +// The compact encodings have the first character between ['0','f'], while the +// full encoding's first character ranges between ['g','z']. Practically, in +// your usage of the package, you should consider which one to use and stick +// with it, while considering that the compact encoding, once it reaches 2^34, +// automatically switches to the full encoding. The properties of the generated +// strings are still maintained: for instance, any two encoded uint64s x,y +// consistently generated with the compact encoding, if the numeric value is +// x < y, will also be x < y in lexical ordering. However, values [0,2^34) have a +// "double encoding", which if mixed together lose the lexical ordering property. +// +// The Uint64 encoding is most useful for generating string versions of Uint64 +// IDs. Practically, it allows you to retain sleek and compact IDs for your +// applcation for the first 2^34 (>17 billion) entities, while seamlessly +// rolling over to the full encoding should you exceed that. You are encouraged +// to use it unless you have a requirement or preferences for IDs consistently +// being always the same size. +// +// To use the cford32 encoding for IDs, you may want to consider using package +// [gno.land/p/demo/seqid]. +// +// [specified by Douglas Crockford]: https://www.crockford.com/base32.html +package cford32 + +import ( + "io" + "strconv" +) + +const ( + encTable = "0123456789ABCDEFGHJKMNPQRSTVWXYZ" + encTableLower = "0123456789abcdefghjkmnpqrstvwxyz" + + // each line is 16 bytes + decTable = "" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + // 00-0f + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + // 10-1f + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + // 20-2f + "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\xff\xff\xff\xff\xff\xff" + // 30-3f + "\xff\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x01\x12\x13\x01\x14\x15\x00" + // 40-4f + "\x16\x17\x18\x19\x1a\xff\x1b\x1c\x1d\x1e\x1f\xff\xff\xff\xff\xff" + // 50-5f + "\xff\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x01\x12\x13\x01\x14\x15\x00" + // 60-6f + "\x16\x17\x18\x19\x1a\xff\x1b\x1c\x1d\x1e\x1f\xff\xff\xff\xff\xff" + // 70-7f + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + // 80-ff (not ASCII) + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" +) + +// CorruptInputError is returned by parsing functions when an invalid character +// in the input is found. The integer value represents the byte index where +// the error occurred. +// +// This is typically because the given character does not exist in the encoding. +type CorruptInputError int64 + +func (e CorruptInputError) Error() string { + return "illegal cford32 data at input byte " + strconv.FormatInt(int64(e), 10) +} + +// Uint64 parses a cford32-encoded byte slice into a uint64. +// +// - The parser requires all provided character to be valid cford32 characters. +// - The parser disregards case. +// - If the first character is '0' <= c <= 'f', then the passed value is assumed +// encoded in the compact encoding, and must be 7 characters long. +// - If the first character is 'g' <= c <= 'z', then the passed value is +// assumed encoded in the full encoding, and must be 13 characters long. +// +// If any of these requirements fail, a CorruptInputError will be returned. +func Uint64(b []byte) (uint64, error) { + switch { + default: + return 0, CorruptInputError(0) + case len(b) == 7 && b[0] >= '0' && b[0] <= 'f': + decVals := [7]byte{ + decTable[b[0]], + decTable[b[1]], + decTable[b[2]], + decTable[b[3]], + decTable[b[4]], + decTable[b[5]], + decTable[b[6]], + } + for idx, v := range decVals { + if v >= 32 { + return 0, CorruptInputError(idx) + } + } + + return 0 + + uint64(decVals[0])<<30 | + uint64(decVals[1])<<25 | + uint64(decVals[2])<<20 | + uint64(decVals[3])<<15 | + uint64(decVals[4])<<10 | + uint64(decVals[5])<<5 | + uint64(decVals[6]), nil + case len(b) == 13 && b[0] >= 'g' && b[0] <= 'z': + decVals := [13]byte{ + decTable[b[0]] & 0x0F, // disregard high bit + decTable[b[1]], + decTable[b[2]], + decTable[b[3]], + decTable[b[4]], + decTable[b[5]], + decTable[b[6]], + decTable[b[7]], + decTable[b[8]], + decTable[b[9]], + decTable[b[10]], + decTable[b[11]], + decTable[b[12]], + } + for idx, v := range decVals { + if v >= 32 { + return 0, CorruptInputError(idx) + } + } + + return 0 + + uint64(decVals[0])<<60 | + uint64(decVals[1])<<55 | + uint64(decVals[2])<<50 | + uint64(decVals[3])<<45 | + uint64(decVals[4])<<40 | + uint64(decVals[5])<<35 | + uint64(decVals[6])<<30 | + uint64(decVals[7])<<25 | + uint64(decVals[8])<<20 | + uint64(decVals[9])<<15 | + uint64(decVals[10])<<10 | + uint64(decVals[11])<<5 | + uint64(decVals[12]), nil + } +} + +const mask = 31 + +// PutUint64 returns a cford32-encoded byte slice. +func PutUint64(id uint64) [13]byte { + return [13]byte{ + encTable[id>>60&mask|0x10], // specify full encoding + encTable[id>>55&mask], + encTable[id>>50&mask], + encTable[id>>45&mask], + encTable[id>>40&mask], + encTable[id>>35&mask], + encTable[id>>30&mask], + encTable[id>>25&mask], + encTable[id>>20&mask], + encTable[id>>15&mask], + encTable[id>>10&mask], + encTable[id>>5&mask], + encTable[id&mask], + } +} + +// PutUint64Lower returns a cford32-encoded byte array, swapping uppercase +// letters with lowercase. +// +// For more information on how the value is encoded, see [Uint64]. +func PutUint64Lower(id uint64) [13]byte { + return [13]byte{ + encTableLower[id>>60&mask|0x10], + encTableLower[id>>55&mask], + encTableLower[id>>50&mask], + encTableLower[id>>45&mask], + encTableLower[id>>40&mask], + encTableLower[id>>35&mask], + encTableLower[id>>30&mask], + encTableLower[id>>25&mask], + encTableLower[id>>20&mask], + encTableLower[id>>15&mask], + encTableLower[id>>10&mask], + encTableLower[id>>5&mask], + encTableLower[id&mask], + } +} + +// PutCompact returns a cford32-encoded byte slice, using the compact +// representation of cford32 described in the package documentation where +// possible (all values of id < 1<<34). The lowercase encoding is used. +// +// The resulting byte slice will be 7 bytes long for all compact values, +// and 13 bytes long for +func PutCompact(id uint64) []byte { + return AppendCompact(id, nil) +} + +// AppendCompact works like [PutCompact] but appends to the given byte slice +// instead of allocating one anew. +func AppendCompact(id uint64, b []byte) []byte { + const maxCompact = 1 << 34 + if id < maxCompact { + return append(b, + encTableLower[id>>30&mask], + encTableLower[id>>25&mask], + encTableLower[id>>20&mask], + encTableLower[id>>15&mask], + encTableLower[id>>10&mask], + encTableLower[id>>5&mask], + encTableLower[id&mask], + ) + } + return append(b, + encTableLower[id>>60&mask|0x10], + encTableLower[id>>55&mask], + encTableLower[id>>50&mask], + encTableLower[id>>45&mask], + encTableLower[id>>40&mask], + encTableLower[id>>35&mask], + encTableLower[id>>30&mask], + encTableLower[id>>25&mask], + encTableLower[id>>20&mask], + encTableLower[id>>15&mask], + encTableLower[id>>10&mask], + encTableLower[id>>5&mask], + encTableLower[id&mask], + ) +} + +func DecodedLen(n int) int { + return n/8*5 + n%8*5/8 +} + +func EncodedLen(n int) int { + return n/5*8 + (n%5*8+4)/5 +} + +// Encode encodes src using the encoding enc, +// writing [EncodedLen](len(src)) bytes to dst. +// +// The encoding does not contain any padding, unlike Go's base32. +func Encode(dst, src []byte) { + // Copied from encoding/base32/base32.go (go1.22) + if len(src) == 0 { + return + } + + di, si := 0, 0 + n := (len(src) / 5) * 5 + for si < n { + // Combining two 32 bit loads allows the same code to be used + // for 32 and 64 bit platforms. + hi := uint32(src[si+0])<<24 | uint32(src[si+1])<<16 | uint32(src[si+2])<<8 | uint32(src[si+3]) + lo := hi<<8 | uint32(src[si+4]) + + dst[di+0] = encTable[(hi>>27)&0x1F] + dst[di+1] = encTable[(hi>>22)&0x1F] + dst[di+2] = encTable[(hi>>17)&0x1F] + dst[di+3] = encTable[(hi>>12)&0x1F] + dst[di+4] = encTable[(hi>>7)&0x1F] + dst[di+5] = encTable[(hi>>2)&0x1F] + dst[di+6] = encTable[(lo>>5)&0x1F] + dst[di+7] = encTable[(lo)&0x1F] + + si += 5 + di += 8 + } + + // Add the remaining small block + remain := len(src) - si + if remain == 0 { + return + } + + // Encode the remaining bytes in reverse order. + val := uint32(0) + switch remain { + case 4: + val |= uint32(src[si+3]) + dst[di+6] = encTable[val<<3&0x1F] + dst[di+5] = encTable[val>>2&0x1F] + fallthrough + case 3: + val |= uint32(src[si+2]) << 8 + dst[di+4] = encTable[val>>7&0x1F] + fallthrough + case 2: + val |= uint32(src[si+1]) << 16 + dst[di+3] = encTable[val>>12&0x1F] + dst[di+2] = encTable[val>>17&0x1F] + fallthrough + case 1: + val |= uint32(src[si+0]) << 24 + dst[di+1] = encTable[val>>22&0x1F] + dst[di+0] = encTable[val>>27&0x1F] + } +} + +// EncodeLower is like [Encode], but uses the lowercase +func EncodeLower(dst, src []byte) { + // Copied from encoding/base32/base32.go (go1.22) + if len(src) == 0 { + return + } + + di, si := 0, 0 + n := (len(src) / 5) * 5 + for si < n { + // Combining two 32 bit loads allows the same code to be used + // for 32 and 64 bit platforms. + hi := uint32(src[si+0])<<24 | uint32(src[si+1])<<16 | uint32(src[si+2])<<8 | uint32(src[si+3]) + lo := hi<<8 | uint32(src[si+4]) + + dst[di+0] = encTableLower[(hi>>27)&0x1F] + dst[di+1] = encTableLower[(hi>>22)&0x1F] + dst[di+2] = encTableLower[(hi>>17)&0x1F] + dst[di+3] = encTableLower[(hi>>12)&0x1F] + dst[di+4] = encTableLower[(hi>>7)&0x1F] + dst[di+5] = encTableLower[(hi>>2)&0x1F] + dst[di+6] = encTableLower[(lo>>5)&0x1F] + dst[di+7] = encTableLower[(lo)&0x1F] + + si += 5 + di += 8 + } + + // Add the remaining small block + remain := len(src) - si + if remain == 0 { + return + } + + // Encode the remaining bytes in reverse order. + val := uint32(0) + switch remain { + case 4: + val |= uint32(src[si+3]) + dst[di+6] = encTableLower[val<<3&0x1F] + dst[di+5] = encTableLower[val>>2&0x1F] + fallthrough + case 3: + val |= uint32(src[si+2]) << 8 + dst[di+4] = encTableLower[val>>7&0x1F] + fallthrough + case 2: + val |= uint32(src[si+1]) << 16 + dst[di+3] = encTableLower[val>>12&0x1F] + dst[di+2] = encTableLower[val>>17&0x1F] + fallthrough + case 1: + val |= uint32(src[si+0]) << 24 + dst[di+1] = encTableLower[val>>22&0x1F] + dst[di+0] = encTableLower[val>>27&0x1F] + } +} + +// AppendEncode appends the cford32 encoded src to dst +// and returns the extended buffer. +func AppendEncode(dst, src []byte) []byte { + n := EncodedLen(len(src)) + dst = grow(dst, n) + Encode(dst[len(dst):][:n], src) + return dst[:len(dst)+n] +} + +// AppendEncodeLower appends the lowercase cford32 encoded src to dst +// and returns the extended buffer. +func AppendEncodeLower(dst, src []byte) []byte { + n := EncodedLen(len(src)) + dst = grow(dst, n) + EncodeLower(dst[len(dst):][:n], src) + return dst[:len(dst)+n] +} + +func grow(s []byte, n int) []byte { + // slices.Grow + if n -= cap(s) - len(s); n > 0 { + news := make([]byte, cap(s)+n) + copy(news[:cap(s)], s[:cap(s)]) + return news[:len(s)] + } + return s +} + +// EncodeToString returns the cford32 encoding of src. +func EncodeToString(src []byte) string { + buf := make([]byte, EncodedLen(len(src))) + Encode(buf, src) + return string(buf) +} + +// EncodeToStringLower returns the cford32 lowercase encoding of src. +func EncodeToStringLower(src []byte) string { + buf := make([]byte, EncodedLen(len(src))) + EncodeLower(buf, src) + return string(buf) +} + +func decode(dst, src []byte) (n int, err error) { + dsti := 0 + olen := len(src) + + for len(src) > 0 { + // Decode quantum using the base32 alphabet + var dbuf [8]byte + dlen := 8 + + for j := 0; j < 8; { + if len(src) == 0 { + // We have reached the end and are not expecting any padding + dlen = j + break + } + in := src[0] + src = src[1:] + dbuf[j] = decTable[in] + if dbuf[j] == 0xFF { + return n, CorruptInputError(olen - len(src) - 1) + } + j++ + } + + // Pack 8x 5-bit source blocks into 5 byte destination + // quantum + switch dlen { + case 8: + dst[dsti+4] = dbuf[6]<<5 | dbuf[7] + n++ + fallthrough + case 7: + dst[dsti+3] = dbuf[4]<<7 | dbuf[5]<<2 | dbuf[6]>>3 + n++ + fallthrough + case 5: + dst[dsti+2] = dbuf[3]<<4 | dbuf[4]>>1 + n++ + fallthrough + case 4: + dst[dsti+1] = dbuf[1]<<6 | dbuf[2]<<1 | dbuf[3]>>4 + n++ + fallthrough + case 2: + dst[dsti+0] = dbuf[0]<<3 | dbuf[1]>>2 + n++ + } + dsti += 5 + } + return n, nil +} + +type encoder struct { + err error + w io.Writer + enc func(dst, src []byte) + buf [5]byte // buffered data waiting to be encoded + nbuf int // number of bytes in buf + out [1024]byte // output buffer +} + +func NewEncoder(w io.Writer) io.WriteCloser { + return &encoder{w: w, enc: Encode} +} + +func NewEncoderLower(w io.Writer) io.WriteCloser { + return &encoder{w: w, enc: EncodeLower} +} + +func (e *encoder) Write(p []byte) (n int, err error) { + if e.err != nil { + return 0, e.err + } + + // Leading fringe. + if e.nbuf > 0 { + var i int + for i = 0; i < len(p) && e.nbuf < 5; i++ { + e.buf[e.nbuf] = p[i] + e.nbuf++ + } + n += i + p = p[i:] + if e.nbuf < 5 { + return + } + e.enc(e.out[0:], e.buf[0:]) + if _, e.err = e.w.Write(e.out[0:8]); e.err != nil { + return n, e.err + } + e.nbuf = 0 + } + + // Large interior chunks. + for len(p) >= 5 { + nn := len(e.out) / 8 * 5 + if nn > len(p) { + nn = len(p) + nn -= nn % 5 + } + e.enc(e.out[0:], p[0:nn]) + if _, e.err = e.w.Write(e.out[0 : nn/5*8]); e.err != nil { + return n, e.err + } + n += nn + p = p[nn:] + } + + // Trailing fringe. + copy(e.buf[:], p) + e.nbuf = len(p) + n += len(p) + return +} + +// Close flushes any pending output from the encoder. +// It is an error to call Write after calling Close. +func (e *encoder) Close() error { + // If there's anything left in the buffer, flush it out + if e.err == nil && e.nbuf > 0 { + e.enc(e.out[0:], e.buf[0:e.nbuf]) + encodedLen := EncodedLen(e.nbuf) + e.nbuf = 0 + _, e.err = e.w.Write(e.out[0:encodedLen]) + } + return e.err +} + +// Decode decodes src using cford32. It writes at most +// [DecodedLen](len(src)) bytes to dst and returns the number of bytes +// written. If src contains invalid cford32 data, it will return the +// number of bytes successfully written and [CorruptInputError]. +// Newline characters (\r and \n) are ignored. +func Decode(dst, src []byte) (n int, err error) { + buf := make([]byte, len(src)) + l := stripNewlines(buf, src) + return decode(dst, buf[:l]) +} + +// AppendDecode appends the cford32 decoded src to dst +// and returns the extended buffer. +// If the input is malformed, it returns the partially decoded src and an error. +func AppendDecode(dst, src []byte) ([]byte, error) { + n := DecodedLen(len(src)) + + dst = grow(dst, n) + dstsl := dst[len(dst) : len(dst)+n] + n, err := Decode(dstsl, src) + return dst[:len(dst)+n], err +} + +// DecodeString returns the bytes represented by the cford32 string s. +func DecodeString(s string) ([]byte, error) { + buf := []byte(s) + l := stripNewlines(buf, buf) + n, err := decode(buf, buf[:l]) + return buf[:n], err +} + +// stripNewlines removes newline characters and returns the number +// of non-newline characters copied to dst. +func stripNewlines(dst, src []byte) int { + offset := 0 + for _, b := range src { + if b == '\r' || b == '\n' { + continue + } + dst[offset] = b + offset++ + } + return offset +} + +type decoder struct { + err error + r io.Reader + buf [1024]byte // leftover input + nbuf int + out []byte // leftover decoded output + outbuf [1024 / 8 * 5]byte +} + +// NewDecoder constructs a new base32 stream decoder. +func NewDecoder(r io.Reader) io.Reader { + return &decoder{r: &newlineFilteringReader{r}} +} + +func readEncodedData(r io.Reader, buf []byte) (n int, err error) { + for n < 1 && err == nil { + var nn int + nn, err = r.Read(buf[n:]) + n += nn + } + return +} + +func (d *decoder) Read(p []byte) (n int, err error) { + // Use leftover decoded output from last read. + if len(d.out) > 0 { + n = copy(p, d.out) + d.out = d.out[n:] + if len(d.out) == 0 { + return n, d.err + } + return n, nil + } + + if d.err != nil { + return 0, d.err + } + + // Read nn bytes from input, bounded [8,len(d.buf)] + nn := (len(p)/5 + 1) * 8 + if nn > len(d.buf) { + nn = len(d.buf) + } + + nn, d.err = readEncodedData(d.r, d.buf[d.nbuf:nn]) + d.nbuf += nn + if d.nbuf < 1 { + return 0, d.err + } + + // Decode chunk into p, or d.out and then p if p is too small. + nr := d.nbuf + if d.err != io.EOF && nr%8 != 0 { + nr -= nr % 8 + } + nw := DecodedLen(d.nbuf) + + if nw > len(p) { + nw, err = decode(d.outbuf[0:], d.buf[0:nr]) + d.out = d.outbuf[0:nw] + n = copy(p, d.out) + d.out = d.out[n:] + } else { + n, err = decode(p, d.buf[0:nr]) + } + d.nbuf -= nr + for i := 0; i < d.nbuf; i++ { + d.buf[i] = d.buf[i+nr] + } + + if err != nil && (d.err == nil || d.err == io.EOF) { + d.err = err + } + + if len(d.out) > 0 { + // We cannot return all the decoded bytes to the caller in this + // invocation of Read, so we return a nil error to ensure that Read + // will be called again. The error stored in d.err, if any, will be + // returned with the last set of decoded bytes. + return n, nil + } + + return n, d.err +} + +type newlineFilteringReader struct { + wrapped io.Reader +} + +func (r *newlineFilteringReader) Read(p []byte) (int, error) { + n, err := r.wrapped.Read(p) + for n > 0 { + s := p[0:n] + offset := stripNewlines(s, s) + if err != nil || offset > 0 { + return offset, err + } + // Previous buffer entirely whitespace, read again + n, err = r.wrapped.Read(p) + } + return n, err +} diff --git a/examples/gno.land/p/demo/cford32/cford32_test.gno b/examples/gno.land/p/demo/cford32/cford32_test.gno new file mode 100644 index 00000000000..1a17d64c856 --- /dev/null +++ b/examples/gno.land/p/demo/cford32/cford32_test.gno @@ -0,0 +1,631 @@ +package cford32 + +import ( + "bytes" + "errors" + "fmt" + "io" + "math" + "strconv" + "strings" + "testing" +) + +func TestCompactRoundtrip(t *testing.T) { + buf := make([]byte, 13) + prev := make([]byte, 13) + for i := uint64(0); i < (1 << 12); i++ { + res := AppendCompact(i, buf[:0]) + back, err := Uint64(res) + testEqual(t, "Uint64(%q) = (%d, %v), want %v", string(res), back, err, nil) + testEqual(t, "Uint64(%q) = %d, want %v", string(res), back, i) + + testEqual(t, "bytes.Compare(prev, res) = %d, want %d", bytes.Compare(prev, res), -1) + prev, buf = res, prev + } + for i := uint64(1<<34 - 1024); i < (1<<34 + 1024); i++ { + res := AppendCompact(i, buf[:0]) + back, err := Uint64(res) + // println(string(res)) + testEqual(t, "Uint64(%q) = (%d, %v), want %v", string(res), back, err, nil) + testEqual(t, "Uint64(%q) = %d, want %v", string(res), back, i) + + testEqual(t, "bytes.Compare(prev, res) = %d, want %d", bytes.Compare(prev, res), -1) + prev, buf = res, prev + } + for i := uint64(1<<64 - 5000); i != 0; i++ { + res := AppendCompact(i, buf[:0]) + back, err := Uint64(res) + testEqual(t, "Uint64(%q) = (%d, %v), want %v", string(res), back, err, nil) + testEqual(t, "Uint64(%q) = %d, want %v", string(res), back, i) + + testEqual(t, "bytes.Compare(prev, res) = %d, want %d", bytes.Compare(prev, res), -1) + prev, buf = res, prev + } +} + +func BenchmarkCompact(b *testing.B) { + buf := make([]byte, 13) + for i := 0; i < b.N; i++ { + _ = AppendCompact(uint64(i), buf[:0]) + } +} + +type testpair struct { + decoded, encoded string +} + +var pairs = []testpair{ + {"", ""}, + {"f", "CR"}, + {"fo", "CSQG"}, + {"foo", "CSQPY"}, + {"foob", "CSQPYRG"}, + {"fooba", "CSQPYRK1"}, + {"foobar", "CSQPYRK1E8"}, + + {"sure.", "EDTQ4S9E"}, + {"sure", "EDTQ4S8"}, + {"sur", "EDTQ4"}, + {"su", "EDTG"}, + {"leasure.", "DHJP2WVNE9JJW"}, + {"easure.", "CNGQ6XBJCMQ0"}, + {"asure.", "C5SQAWK55R"}, +} + +var bigtest = testpair{ + "Twas brillig, and the slithy toves", + "AHVP2WS0C9S6JV3CD5KJR831DSJ20X38CMG76V39EHM7J83MDXV6AWR", +} + +func testEqual(t *testing.T, msg string, args ...interface{}) bool { + t.Helper() + if args[len(args)-2] != args[len(args)-1] { + t.Errorf(msg, args...) + return false + } + return true +} + +func TestEncode(t *testing.T) { + for _, p := range pairs { + got := EncodeToString([]byte(p.decoded)) + testEqual(t, "Encode(%q) = %q, want %q", p.decoded, got, p.encoded) + dst := AppendEncode([]byte("lead"), []byte(p.decoded)) + testEqual(t, `AppendEncode("lead", %q) = %q, want %q`, p.decoded, string(dst), "lead"+p.encoded) + } +} + +func TestEncoder(t *testing.T) { + for _, p := range pairs { + bb := &strings.Builder{} + encoder := NewEncoder(bb) + encoder.Write([]byte(p.decoded)) + encoder.Close() + testEqual(t, "Encode(%q) = %q, want %q", p.decoded, bb.String(), p.encoded) + } +} + +func TestEncoderBuffering(t *testing.T) { + input := []byte(bigtest.decoded) + for bs := 1; bs <= 12; bs++ { + bb := &strings.Builder{} + encoder := NewEncoder(bb) + for pos := 0; pos < len(input); pos += bs { + end := pos + bs + if end > len(input) { + end = len(input) + } + n, err := encoder.Write(input[pos:end]) + testEqual(t, "Write(%q) gave error %v, want %v", input[pos:end], err, error(nil)) + testEqual(t, "Write(%q) gave length %v, want %v", input[pos:end], n, end-pos) + } + err := encoder.Close() + testEqual(t, "Close gave error %v, want %v", err, error(nil)) + testEqual(t, "Encoding/%d of %q = %q, want %q", bs, bigtest.decoded, bb.String(), bigtest.encoded) + } +} + +func TestDecode(t *testing.T) { + for _, p := range pairs { + dbuf := make([]byte, DecodedLen(len(p.encoded))) + count, err := decode(dbuf, []byte(p.encoded)) + testEqual(t, "Decode(%q) = error %v, want %v", p.encoded, err, error(nil)) + testEqual(t, "Decode(%q) = length %v, want %v", p.encoded, count, len(p.decoded)) + testEqual(t, "Decode(%q) = %q, want %q", p.encoded, string(dbuf[0:count]), p.decoded) + + dbuf, err = DecodeString(p.encoded) + testEqual(t, "DecodeString(%q) = error %v, want %v", p.encoded, err, error(nil)) + testEqual(t, "DecodeString(%q) = %q, want %q", p.encoded, string(dbuf), p.decoded) + + // XXX: https://github.com/gnolang/gno/issues/1570 + dst, err := AppendDecode(append([]byte(nil), []byte("lead")...), []byte(p.encoded)) + testEqual(t, "AppendDecode(%q) = error %v, want %v", p.encoded, err, error(nil)) + testEqual(t, `AppendDecode("lead", %q) = %q, want %q`, p.encoded, string(dst), "lead"+p.decoded) + + dst2, err := AppendDecode(dst[:0:len(p.decoded)], []byte(p.encoded)) + testEqual(t, "AppendDecode(%q) = error %v, want %v", p.encoded, err, error(nil)) + testEqual(t, `AppendDecode("", %q) = %q, want %q`, p.encoded, string(dst2), p.decoded) + // XXX: https://github.com/gnolang/gno/issues/1569 + // old used &dst2[0] != &dst[0] as a check. + if len(dst) > 0 && len(dst2) > 0 && cap(dst2) != len(p.decoded) { + t.Errorf("unexpected capacity growth: got %d, want %d", cap(dst2), len(p.decoded)) + } + } +} + +// A minimal variation on strings.Reader. +// Here, we return a io.EOF immediately on Read if the read has reached the end +// of the reader. It's used to simplify TestDecoder. +type stringReader struct { + s string + i int64 +} + +func (r *stringReader) Read(b []byte) (n int, err error) { + if r.i >= int64(len(r.s)) { + return 0, io.EOF + } + n = copy(b, r.s[r.i:]) + r.i += int64(n) + if r.i >= int64(len(r.s)) { + return n, io.EOF + } + return +} + +func TestDecoder(t *testing.T) { + for _, p := range pairs { + decoder := NewDecoder(&stringReader{p.encoded, 0}) + dbuf := make([]byte, DecodedLen(len(p.encoded))) + count, err := decoder.Read(dbuf) + if err != nil && err != io.EOF { + t.Fatal("Read failed", err) + } + testEqual(t, "Read from %q = length %v, want %v", p.encoded, count, len(p.decoded)) + testEqual(t, "Decoding of %q = %q, want %q", p.encoded, string(dbuf[0:count]), p.decoded) + if err != io.EOF { + _, err = decoder.Read(dbuf) + } + testEqual(t, "Read from %q = %v, want %v", p.encoded, err, io.EOF) + } +} + +type badReader struct { + data []byte + errs []error + called int + limit int +} + +// Populates p with data, returns a count of the bytes written and an +// error. The error returned is taken from badReader.errs, with each +// invocation of Read returning the next error in this slice, or io.EOF, +// if all errors from the slice have already been returned. The +// number of bytes returned is determined by the size of the input buffer +// the test passes to decoder.Read and will be a multiple of 8, unless +// badReader.limit is non zero. +func (b *badReader) Read(p []byte) (int, error) { + lim := len(p) + if b.limit != 0 && b.limit < lim { + lim = b.limit + } + if len(b.data) < lim { + lim = len(b.data) + } + for i := range p[:lim] { + p[i] = b.data[i] + } + b.data = b.data[lim:] + err := io.EOF + if b.called < len(b.errs) { + err = b.errs[b.called] + } + b.called++ + return lim, err +} + +// TestIssue20044 tests that decoder.Read behaves correctly when the caller +// supplied reader returns an error. +func TestIssue20044(t *testing.T) { + badErr := errors.New("bad reader error") + testCases := []struct { + r badReader + res string + err error + dbuflen int + }{ + // Check valid input data accompanied by an error is processed and the error is propagated. + { + r: badReader{data: []byte("d1jprv3fexqq4v34"), errs: []error{badErr}}, + res: "helloworld", err: badErr, + }, + // Check a read error accompanied by input data consisting of newlines only is propagated. + { + r: badReader{data: []byte("\n\n\n\n\n\n\n\n"), errs: []error{badErr, nil}}, + res: "", err: badErr, + }, + // Reader will be called twice. The first time it will return 8 newline characters. The + // second time valid base32 encoded data and an error. The data should be decoded + // correctly and the error should be propagated. + { + r: badReader{data: []byte("\n\n\n\n\n\n\n\nd1jprv3fexqq4v34"), errs: []error{nil, badErr}}, + res: "helloworld", err: badErr, dbuflen: 8, + }, + // Reader returns invalid input data (too short) and an error. Verify the reader + // error is returned. + { + r: badReader{data: []byte("c"), errs: []error{badErr}}, + res: "", err: badErr, + }, + // Reader returns invalid input data (too short) but no error. Verify io.ErrUnexpectedEOF + // is returned. + // NOTE(thehowl): I don't think this should applyto us? + /* { + r: badReader{data: []byte("c"), errs: []error{nil}}, + res: "", err: io.ErrUnexpectedEOF, + },*/ + // Reader returns invalid input data and an error. Verify the reader and not the + // decoder error is returned. + { + r: badReader{data: []byte("cu"), errs: []error{badErr}}, + res: "", err: badErr, + }, + // Reader returns valid data and io.EOF. Check data is decoded and io.EOF is propagated. + { + r: badReader{data: []byte("csqpyrk1"), errs: []error{io.EOF}}, + res: "fooba", err: io.EOF, + }, + // Check errors are properly reported when decoder.Read is called multiple times. + // decoder.Read will be called 8 times, badReader.Read will be called twice, returning + // valid data both times but an error on the second call. + { + r: badReader{data: []byte("dhjp2wvne9jjwc9g"), errs: []error{nil, badErr}}, + res: "leasure.10", err: badErr, dbuflen: 1, + }, + // Check io.EOF is properly reported when decoder.Read is called multiple times. + // decoder.Read will be called 8 times, badReader.Read will be called twice, returning + // valid data both times but io.EOF on the second call. + { + r: badReader{data: []byte("dhjp2wvne9jjw"), errs: []error{nil, io.EOF}}, + res: "leasure.", err: io.EOF, dbuflen: 1, + }, + // The following two test cases check that errors are propagated correctly when more than + // 8 bytes are read at a time. + { + r: badReader{data: []byte("dhjp2wvne9jjw"), errs: []error{io.EOF}}, + res: "leasure.", err: io.EOF, dbuflen: 11, + }, + { + r: badReader{data: []byte("dhjp2wvne9jjwc9g"), errs: []error{badErr}}, + res: "leasure.10", err: badErr, dbuflen: 11, + }, + // Check that errors are correctly propagated when the reader returns valid bytes in + // groups that are not divisible by 8. The first read will return 11 bytes and no + // error. The second will return 7 and an error. The data should be decoded correctly + // and the error should be propagated. + // NOTE(thehowl): again, this is on the assumption that this is padded, and it's not. + /* { + r: badReader{data: []byte("dhjp2wvne9jjw"), errs: []error{nil, badErr}, limit: 11}, + res: "leasure.", err: badErr, + }, */ + } + + for idx, tc := range testCases { + t.Run(fmt.Sprintf("%d-%s", idx, string(tc.res)), func(t *testing.T) { + input := tc.r.data + decoder := NewDecoder(&tc.r) + var dbuflen int + if tc.dbuflen > 0 { + dbuflen = tc.dbuflen + } else { + dbuflen = DecodedLen(len(input)) + } + dbuf := make([]byte, dbuflen) + var err error + var res []byte + for err == nil { + var n int + n, err = decoder.Read(dbuf) + if n > 0 { + res = append(res, dbuf[:n]...) + } + } + + testEqual(t, "Decoding of %q = %q, want %q", string(input), string(res), tc.res) + testEqual(t, "Decoding of %q err = %v, expected %v", string(input), err, tc.err) + }) + } +} + +// TestDecoderError verifies decode errors are propagated when there are no read +// errors. +func TestDecoderError(t *testing.T) { + for _, readErr := range []error{io.EOF, nil} { + input := "ucsqpyrk1u" + dbuf := make([]byte, DecodedLen(len(input))) + br := badReader{data: []byte(input), errs: []error{readErr}} + decoder := NewDecoder(&br) + n, err := decoder.Read(dbuf) + testEqual(t, "Read after EOF, n = %d, expected %d", n, 0) + if _, ok := err.(CorruptInputError); !ok { + t.Errorf("Corrupt input error expected. Found %T", err) + } + } +} + +// TestReaderEOF ensures decoder.Read behaves correctly when input data is +// exhausted. +func TestReaderEOF(t *testing.T) { + for _, readErr := range []error{io.EOF, nil} { + input := "MZXW6YTB" + br := badReader{data: []byte(input), errs: []error{nil, readErr}} + decoder := NewDecoder(&br) + dbuf := make([]byte, DecodedLen(len(input))) + n, err := decoder.Read(dbuf) + testEqual(t, "Decoding of %q err = %v, expected %v", input, err, error(nil)) + n, err = decoder.Read(dbuf) + testEqual(t, "Read after EOF, n = %d, expected %d", n, 0) + testEqual(t, "Read after EOF, err = %v, expected %v", err, io.EOF) + n, err = decoder.Read(dbuf) + testEqual(t, "Read after EOF, n = %d, expected %d", n, 0) + testEqual(t, "Read after EOF, err = %v, expected %v", err, io.EOF) + } +} + +func TestDecoderBuffering(t *testing.T) { + for bs := 1; bs <= 12; bs++ { + decoder := NewDecoder(strings.NewReader(bigtest.encoded)) + buf := make([]byte, len(bigtest.decoded)+12) + var total int + var n int + var err error + for total = 0; total < len(bigtest.decoded) && err == nil; { + n, err = decoder.Read(buf[total : total+bs]) + total += n + } + if err != nil && err != io.EOF { + t.Errorf("Read from %q at pos %d = %d, unexpected error %v", bigtest.encoded, total, n, err) + } + testEqual(t, "Decoding/%d of %q = %q, want %q", bs, bigtest.encoded, string(buf[0:total]), bigtest.decoded) + } +} + +func TestDecodeCorrupt(t *testing.T) { + testCases := []struct { + input string + offset int // -1 means no corruption. + }{ + {"", -1}, + {"iIoOlL", -1}, + {"!!!!", 0}, + {"uxp10", 0}, + {"x===", 1}, + {"AA=A====", 2}, + {"AAA=AAAA", 3}, + // Much fewer cases compared to Go as there are much fewer cases where input + // can be "corrupted". + } + for _, tc := range testCases { + dbuf := make([]byte, DecodedLen(len(tc.input))) + _, err := Decode(dbuf, []byte(tc.input)) + if tc.offset == -1 { + if err != nil { + t.Error("Decoder wrongly detected corruption in", tc.input) + } + continue + } + switch err := err.(type) { + case CorruptInputError: + testEqual(t, "Corruption in %q at offset %v, want %v", tc.input, int(err), tc.offset) + default: + t.Error("Decoder failed to detect corruption in", tc) + } + } +} + +func TestBig(t *testing.T) { + n := 3*1000 + 1 + raw := make([]byte, n) + const alpha = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + for i := 0; i < n; i++ { + raw[i] = alpha[i%len(alpha)] + } + encoded := new(bytes.Buffer) + w := NewEncoder(encoded) + nn, err := w.Write(raw) + if nn != n || err != nil { + t.Fatalf("Encoder.Write(raw) = %d, %v want %d, nil", nn, err, n) + } + err = w.Close() + if err != nil { + t.Fatalf("Encoder.Close() = %v want nil", err) + } + decoded, err := io.ReadAll(NewDecoder(encoded)) + if err != nil { + t.Fatalf("io.ReadAll(NewDecoder(...)): %v", err) + } + + if !bytes.Equal(raw, decoded) { + var i int + for i = 0; i < len(decoded) && i < len(raw); i++ { + if decoded[i] != raw[i] { + break + } + } + t.Errorf("Decode(Encode(%d-byte string)) failed at offset %d", n, i) + } +} + +func testStringEncoding(t *testing.T, expected string, examples []string) { + for _, e := range examples { + buf, err := DecodeString(e) + if err != nil { + t.Errorf("Decode(%q) failed: %v", e, err) + continue + } + if s := string(buf); s != expected { + t.Errorf("Decode(%q) = %q, want %q", e, s, expected) + } + } +} + +func TestNewLineCharacters(t *testing.T) { + // Each of these should decode to the string "sure", without errors. + examples := []string{ + "EDTQ4S8", + "EDTQ4S8\r", + "EDTQ4S8\n", + "EDTQ4S8\r\n", + "EDTQ4S\r\n8", + "EDT\rQ4S\n8", + "edt\nq4s\r8", + "edt\nq4s8", + "EDTQ4S\n8", + } + testStringEncoding(t, "sure", examples) +} + +func BenchmarkEncode(b *testing.B) { + data := make([]byte, 8192) + buf := make([]byte, EncodedLen(len(data))) + b.SetBytes(int64(len(data))) + for i := 0; i < b.N; i++ { + Encode(buf, data) + } +} + +func BenchmarkEncodeToString(b *testing.B) { + data := make([]byte, 8192) + b.SetBytes(int64(len(data))) + for i := 0; i < b.N; i++ { + EncodeToString(data) + } +} + +func BenchmarkDecode(b *testing.B) { + data := make([]byte, EncodedLen(8192)) + Encode(data, make([]byte, 8192)) + buf := make([]byte, 8192) + b.SetBytes(int64(len(data))) + for i := 0; i < b.N; i++ { + Decode(buf, data) + } +} + +func BenchmarkDecodeString(b *testing.B) { + data := EncodeToString(make([]byte, 8192)) + b.SetBytes(int64(len(data))) + for i := 0; i < b.N; i++ { + DecodeString(data) + } +} + +/* TODO: rewrite without using goroutines +func TestBufferedDecodingSameError(t *testing.T) { + testcases := []struct { + prefix string + chunkCombinations [][]string + expected error + }{ + // Normal case, this is valid input + {"helloworld", [][]string{ + {"D1JP", "RV3F", "EXQQ", "4V34"}, + {"D1JPRV3FEXQQ4V34"}, + {"D1J", "PRV", "3FE", "XQQ", "4V3", "4"}, + {"D1JPRV3FEXQQ4V", "34"}, + }, nil}, + + // Normal case, this is valid input + {"fooba", [][]string{ + {"CSQPYRK1"}, + {"CSQPYRK", "1"}, + {"CSQPYR", "K1"}, + {"CSQPY", "RK1"}, + {"CSQPY", "RK", "1"}, + {"CSQPY", "RK1"}, + {"CSQP", "YR", "K1"}, + }, nil}, + + // NOTE: many test cases have been removed as we don't return ErrUnexpectedEOF. + } + + for _, testcase := range testcases { + for _, chunks := range testcase.chunkCombinations { + pr, pw := io.Pipe() + + // Write the encoded chunks into the pipe + go func() { + for _, chunk := range chunks { + pw.Write([]byte(chunk)) + } + pw.Close() + }() + + decoder := NewDecoder(pr) + back, err := io.ReadAll(decoder) + + if err != testcase.expected { + t.Errorf("Expected %v, got %v; case %s %+v", testcase.expected, err, testcase.prefix, chunks) + } + if testcase.expected == nil { + testEqual(t, "Decode from NewDecoder(chunkReader(%v)) = %q, want %q", chunks, string(back), testcase.prefix) + } + } + } +} +*/ + +func TestEncodedLen(t *testing.T) { + type test struct { + n int + want int64 + } + tests := []test{ + {0, 0}, + {1, 2}, + {2, 4}, + {3, 5}, + {4, 7}, + {5, 8}, + {6, 10}, + {7, 12}, + {10, 16}, + {11, 18}, + } + // check overflow + tests = append(tests, test{(math.MaxInt-4)/8 + 1, 1844674407370955162}) + tests = append(tests, test{math.MaxInt/8*5 + 4, math.MaxInt}) + for _, tt := range tests { + if got := EncodedLen(tt.n); int64(got) != tt.want { + t.Errorf("EncodedLen(%d): got %d, want %d", tt.n, got, tt.want) + } + } +} + +func TestDecodedLen(t *testing.T) { + type test struct { + n int + want int64 + } + tests := []test{ + {0, 0}, + {2, 1}, + {4, 2}, + {5, 3}, + {7, 4}, + {8, 5}, + {10, 6}, + {12, 7}, + {16, 10}, + {18, 11}, + } + // check overflow + tests = append(tests, test{math.MaxInt/5 + 1, 1152921504606846976}) + tests = append(tests, test{math.MaxInt, 5764607523034234879}) + for _, tt := range tests { + if got := DecodedLen(tt.n); int64(got) != tt.want { + t.Errorf("DecodedLen(%d): got %d, want %d", tt.n, got, tt.want) + } + } +} diff --git a/examples/gno.land/p/demo/cford32/gno.mod b/examples/gno.land/p/demo/cford32/gno.mod new file mode 100644 index 00000000000..20b99c65e4c --- /dev/null +++ b/examples/gno.land/p/demo/cford32/gno.mod @@ -0,0 +1 @@ +module gno.land/p/demo/cford32 diff --git a/examples/gno.land/p/demo/seqid/gno.mod b/examples/gno.land/p/demo/seqid/gno.mod index 63e6a1fb551..d1390012c3c 100644 --- a/examples/gno.land/p/demo/seqid/gno.mod +++ b/examples/gno.land/p/demo/seqid/gno.mod @@ -1 +1,3 @@ module gno.land/p/demo/seqid + +require gno.land/p/demo/cford32 v0.0.0-latest diff --git a/examples/gno.land/p/demo/seqid/seqid.gno b/examples/gno.land/p/demo/seqid/seqid.gno index 8cb5366ef44..b3ff815a421 100644 --- a/examples/gno.land/p/demo/seqid/seqid.gno +++ b/examples/gno.land/p/demo/seqid/seqid.gno @@ -7,11 +7,15 @@ // var users avl.Tree // // func NewUser() { -// users.Set(id.Next().Binary(), &User{ ... }) +// users.Set(id.Next().String(), &User{ ... }) // } package seqid -import "encoding/binary" +import ( + "encoding/binary" + + "gno.land/p/demo/cford32" +) // An ID is a simple sequential ID generator. type ID uint64 @@ -48,10 +52,40 @@ func (i ID) Binary() string { return string(buf) } -// FromBinary creates a new ID from the given string. +// String encodes i using cford32's compact encoding. For more information, +// see the documentation for package [gno.land/p/demo/cford32]. +// +// The result of String will be a 7-byte string for IDs [0,2^34), and a +// 13-byte string for all values following that. All generated string IDs +// follow the same lexicographic order as their number values; that is, for any +// two IDs (x, y) such that x < y, x.String() < y.String(). +// As such, this string representation is suitable to be used as an AVL key. +func (i ID) String() string { + return string(cford32.PutCompact(uint64(i))) +} + +// FromBinary creates a new ID from the given string, expected to be a binary +// big-endian encoding of an ID (such as that of [ID.Binary]). +// The second return value is true if the conversion was successful. func FromBinary(b string) (ID, bool) { if len(b) != 8 { return 0, false } return ID(binary.BigEndian.Uint64([]byte(b))), true } + +// FromString creates a new ID from the given string, expected to be a string +// representation using cford32, such as that returned by [ID.String]. +// +// The encoding scheme used by cford32 allows the same ID to have many +// different representations (though the one returned by [ID.String] is only +// one, deterministic and safe to be used in AVL). The encoding scheme is +// "human-centric" and is thus case insensitive, and maps some ambiguous +// characters to be the same, ie. L = I = 1, O = 0. For this reason, when +// parsing user input to retrieve a key (encoded as a string), always sanitize +// it first using FromString, then run String(), instead of using the user's +// input directly. +func FromString(b string) (ID, error) { + n, err := cford32.Uint64([]byte(b)) + return ID(n), err +} diff --git a/examples/gno.land/p/demo/seqid/seqid_test.gno b/examples/gno.land/p/demo/seqid/seqid_test.gno index c6f57960177..0a1e777f1f7 100644 --- a/examples/gno.land/p/demo/seqid/seqid_test.gno +++ b/examples/gno.land/p/demo/seqid/seqid_test.gno @@ -37,7 +37,32 @@ func TestID_Binary(t *testing.T) { for j := 0; j < 1000; j++ { cur := i.Next().Binary() if cur <= prev { - t.Fatalf("cur %x <= prev %x", cur, prev) + t.Fatalf("cur %x > prev %x", cur, prev) } + prev = cur + } +} + +func TestID_String(t *testing.T) { + var i ID + prev := i.String() + + for j := 0; j < 1000; j++ { + cur := i.Next().String() + if cur <= prev { + t.Fatalf("cur %s > prev %s", cur, prev) + } + prev = cur + } + + // Test for when cford32 switches over to the long encoding. + i = 1<<34 - 512 + for j := 0; j < 1024; j++ { + cur := i.Next().String() + // println(cur) + if cur <= prev { + t.Fatalf("cur %s > prev %s", cur, prev) + } + prev = cur } } diff --git a/tm2/pkg/std/memfile.go b/tm2/pkg/std/memfile.go index 782537c8063..599e9a59cc5 100644 --- a/tm2/pkg/std/memfile.go +++ b/tm2/pkg/std/memfile.go @@ -43,7 +43,7 @@ const rePathPart = `[a-z][a-z0-9_]*` var ( rePkgName = regexp.MustCompile(`^[a-z][a-z0-9_]*$`) rePkgOrRlmPath = regexp.MustCompile(`gno\.land/(?:p|r)(?:/` + rePathPart + `)+`) - reFileName = regexp.MustCompile(`^[a-zA-Z0-9_]*\.[a-z0-9_\.]*$`) + reFileName = regexp.MustCompile(`^([a-zA-Z0-9_]*\.[a-z0-9_\.]*|LICENSE|README)$`) ) // path must not contain any dots after the first domain component.