diff --git a/examples/gno.land/p/demo/cford32/LICENSE b/examples/gno.land/p/demo/cford32/LICENSE new file mode 100644 index 00000000000..6a66aea5eaf --- /dev/null +++ b/examples/gno.land/p/demo/cford32/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2009 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/examples/gno.land/p/demo/cford32/README.md b/examples/gno.land/p/demo/cford32/README.md new file mode 100644 index 00000000000..30cc9372e55 --- /dev/null +++ b/examples/gno.land/p/demo/cford32/README.md @@ -0,0 +1,76 @@ +# cford32 + +``` +package cford32 // import "gno.land/p/demo/cford32" + +Package cford32 implements a base32-like encoding/decoding package, with the +encoding scheme specified by Douglas Crockford. + +From the website, the requirements of said encoding scheme are to: + + - Be human readable and machine readable. + - Be compact. Humans have difficulty in manipulating long strings of arbitrary + symbols. + - Be error resistant. Entering the symbols must not require keyboarding + gymnastics. + - Be pronounceable. Humans should be able to accurately transmit the symbols + to other humans using a telephone. + +This is slightly different from a simple difference in encoding table from +the Go's stdlib `encoding/base32`, as when decoding the characters i I l L are +parsed as 1, and o O is parsed as 0. + +This package additionally provides ways to encode uint64's efficiently, as well +as efficient encoding to a lowercase variation of the encoding. The encodings +never use paddings. + +# Uint64 Encoding + +Aside from lower/uppercase encoding, there is a compact encoding, allowing to +encode all values in [0,2^34), and the full encoding, allowing all values in +[0,2^64). The compact encoding uses 7 characters, and the full encoding uses 13 +characters. Both are parsed unambiguously by the Uint64 decoder. + +The compact encodings have the first character between ['0','f'], while the +full encoding's first character ranges between ['g','z']. Practically, in your +usage of the package, you should consider which one to use and stick with it, +while considering that the compact encoding, once it reaches 2^34, automatically +switches to the full encoding. The properties of the generated strings are still +maintained: for instance, any two encoded uint64s x,y consistently generated +with the compact encoding, if the numeric value is x < y, will also be x < y in +lexical ordering. However, values [0,2^34) have a "double encoding", which if +mixed together lose the lexical ordering property. + +The Uint64 encoding is most useful for generating string versions of Uint64 IDs. +Practically, it allows you to retain sleek and compact IDs for your applcation +for the first 2^34 (>17 billion) entities, while seamlessly rolling over to the +full encoding should you exceed that. You are encouraged to use it unless you +have a requirement or preferences for IDs consistently being always the same +size. + +To use the cford32 encoding for IDs, you may want to consider using package +gno.land/p/demo/seqid. + +[specified by Douglas Crockford]: https://www.crockford.com/base32.html + +func AppendCompact(id uint64, b []byte) []byte +func AppendDecode(dst, src []byte) ([]byte, error) +func AppendEncode(dst, src []byte) []byte +func AppendEncodeLower(dst, src []byte) []byte +func Decode(dst, src []byte) (n int, err error) +func DecodeString(s string) ([]byte, error) +func DecodedLen(n int) int +func Encode(dst, src []byte) +func EncodeLower(dst, src []byte) +func EncodeToString(src []byte) string +func EncodeToStringLower(src []byte) string +func EncodedLen(n int) int +func NewDecoder(r io.Reader) io.Reader +func NewEncoder(w io.Writer) io.WriteCloser +func NewEncoderLower(w io.Writer) io.WriteCloser +func PutCompact(id uint64) []byte +func PutUint64(id uint64) [13]byte +func PutUint64Lower(id uint64) [13]byte +func Uint64(b []byte) (uint64, error) +type CorruptInputError int64 +``` diff --git a/examples/gno.land/p/demo/cford32/cford32.gno b/examples/gno.land/p/demo/cford32/cford32.gno new file mode 100644 index 00000000000..effa32bef88 --- /dev/null +++ b/examples/gno.land/p/demo/cford32/cford32.gno @@ -0,0 +1,700 @@ +// Modified from the Go Source code for encoding/base32. +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package cford32 implements a base32-like encoding/decoding package, with the +// encoding scheme [specified by Douglas Crockford]. +// +// From the website, the requirements of said encoding scheme are to: +// +// - Be human readable and machine readable. +// - Be compact. Humans have difficulty in manipulating long strings of arbitrary symbols. +// - Be error resistant. Entering the symbols must not require keyboarding gymnastics. +// - Be pronounceable. Humans should be able to accurately transmit the symbols to other humans using a telephone. +// +// This is slightly different from a simple difference in encoding table from +// the Go's stdlib `encoding/base32`, as when decoding the characters i I l L are +// parsed as 1, and o O is parsed as 0. +// +// This package additionally provides ways to encode uint64's efficiently, +// as well as efficient encoding to a lowercase variation of the encoding. +// The encodings never use paddings. +// +// # Uint64 Encoding +// +// Aside from lower/uppercase encoding, there is a compact encoding, allowing +// to encode all values in [0,2^34), and the full encoding, allowing all +// values in [0,2^64). The compact encoding uses 7 characters, and the full +// encoding uses 13 characters. Both are parsed unambiguously by the Uint64 +// decoder. +// +// The compact encodings have the first character between ['0','f'], while the +// full encoding's first character ranges between ['g','z']. Practically, in +// your usage of the package, you should consider which one to use and stick +// with it, while considering that the compact encoding, once it reaches 2^34, +// automatically switches to the full encoding. The properties of the generated +// strings are still maintained: for instance, any two encoded uint64s x,y +// consistently generated with the compact encoding, if the numeric value is +// x < y, will also be x < y in lexical ordering. However, values [0,2^34) have a +// "double encoding", which if mixed together lose the lexical ordering property. +// +// The Uint64 encoding is most useful for generating string versions of Uint64 +// IDs. Practically, it allows you to retain sleek and compact IDs for your +// applcation for the first 2^34 (>17 billion) entities, while seamlessly +// rolling over to the full encoding should you exceed that. You are encouraged +// to use it unless you have a requirement or preferences for IDs consistently +// being always the same size. +// +// To use the cford32 encoding for IDs, you may want to consider using package +// [gno.land/p/demo/seqid]. +// +// [specified by Douglas Crockford]: https://www.crockford.com/base32.html +package cford32 + +import ( + "io" + "strconv" +) + +const ( + encTable = "0123456789ABCDEFGHJKMNPQRSTVWXYZ" + encTableLower = "0123456789abcdefghjkmnpqrstvwxyz" + + // each line is 16 bytes + decTable = "" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + // 00-0f + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + // 10-1f + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + // 20-2f + "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\xff\xff\xff\xff\xff\xff" + // 30-3f + "\xff\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x01\x12\x13\x01\x14\x15\x00" + // 40-4f + "\x16\x17\x18\x19\x1a\xff\x1b\x1c\x1d\x1e\x1f\xff\xff\xff\xff\xff" + // 50-5f + "\xff\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x01\x12\x13\x01\x14\x15\x00" + // 60-6f + "\x16\x17\x18\x19\x1a\xff\x1b\x1c\x1d\x1e\x1f\xff\xff\xff\xff\xff" + // 70-7f + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + // 80-ff (not ASCII) + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" +) + +// CorruptInputError is returned by parsing functions when an invalid character +// in the input is found. The integer value represents the byte index where +// the error occurred. +// +// This is typically because the given character does not exist in the encoding. +type CorruptInputError int64 + +func (e CorruptInputError) Error() string { + return "illegal cford32 data at input byte " + strconv.FormatInt(int64(e), 10) +} + +// Uint64 parses a cford32-encoded byte slice into a uint64. +// +// - The parser requires all provided character to be valid cford32 characters. +// - The parser disregards case. +// - If the first character is '0' <= c <= 'f', then the passed value is assumed +// encoded in the compact encoding, and must be 7 characters long. +// - If the first character is 'g' <= c <= 'z', then the passed value is +// assumed encoded in the full encoding, and must be 13 characters long. +// +// If any of these requirements fail, a CorruptInputError will be returned. +func Uint64(b []byte) (uint64, error) { + switch { + default: + return 0, CorruptInputError(0) + case len(b) == 7 && b[0] >= '0' && b[0] <= 'f': + decVals := [7]byte{ + decTable[b[0]], + decTable[b[1]], + decTable[b[2]], + decTable[b[3]], + decTable[b[4]], + decTable[b[5]], + decTable[b[6]], + } + for idx, v := range decVals { + if v >= 32 { + return 0, CorruptInputError(idx) + } + } + + return 0 + + uint64(decVals[0])<<30 | + uint64(decVals[1])<<25 | + uint64(decVals[2])<<20 | + uint64(decVals[3])<<15 | + uint64(decVals[4])<<10 | + uint64(decVals[5])<<5 | + uint64(decVals[6]), nil + case len(b) == 13 && b[0] >= 'g' && b[0] <= 'z': + decVals := [13]byte{ + decTable[b[0]] & 0x0F, // disregard high bit + decTable[b[1]], + decTable[b[2]], + decTable[b[3]], + decTable[b[4]], + decTable[b[5]], + decTable[b[6]], + decTable[b[7]], + decTable[b[8]], + decTable[b[9]], + decTable[b[10]], + decTable[b[11]], + decTable[b[12]], + } + for idx, v := range decVals { + if v >= 32 { + return 0, CorruptInputError(idx) + } + } + + return 0 + + uint64(decVals[0])<<60 | + uint64(decVals[1])<<55 | + uint64(decVals[2])<<50 | + uint64(decVals[3])<<45 | + uint64(decVals[4])<<40 | + uint64(decVals[5])<<35 | + uint64(decVals[6])<<30 | + uint64(decVals[7])<<25 | + uint64(decVals[8])<<20 | + uint64(decVals[9])<<15 | + uint64(decVals[10])<<10 | + uint64(decVals[11])<<5 | + uint64(decVals[12]), nil + } +} + +const mask = 31 + +// PutUint64 returns a cford32-encoded byte slice. +func PutUint64(id uint64) [13]byte { + return [13]byte{ + encTable[id>>60&mask|0x10], // specify full encoding + encTable[id>>55&mask], + encTable[id>>50&mask], + encTable[id>>45&mask], + encTable[id>>40&mask], + encTable[id>>35&mask], + encTable[id>>30&mask], + encTable[id>>25&mask], + encTable[id>>20&mask], + encTable[id>>15&mask], + encTable[id>>10&mask], + encTable[id>>5&mask], + encTable[id&mask], + } +} + +// PutUint64Lower returns a cford32-encoded byte array, swapping uppercase +// letters with lowercase. +// +// For more information on how the value is encoded, see [Uint64]. +func PutUint64Lower(id uint64) [13]byte { + return [13]byte{ + encTableLower[id>>60&mask|0x10], + encTableLower[id>>55&mask], + encTableLower[id>>50&mask], + encTableLower[id>>45&mask], + encTableLower[id>>40&mask], + encTableLower[id>>35&mask], + encTableLower[id>>30&mask], + encTableLower[id>>25&mask], + encTableLower[id>>20&mask], + encTableLower[id>>15&mask], + encTableLower[id>>10&mask], + encTableLower[id>>5&mask], + encTableLower[id&mask], + } +} + +// PutCompact returns a cford32-encoded byte slice, using the compact +// representation of cford32 described in the package documentation where +// possible (all values of id < 1<<34). The lowercase encoding is used. +// +// The resulting byte slice will be 7 bytes long for all compact values, +// and 13 bytes long for +func PutCompact(id uint64) []byte { + return AppendCompact(id, nil) +} + +// AppendCompact works like [PutCompact] but appends to the given byte slice +// instead of allocating one anew. +func AppendCompact(id uint64, b []byte) []byte { + const maxCompact = 1 << 34 + if id < maxCompact { + return append(b, + encTableLower[id>>30&mask], + encTableLower[id>>25&mask], + encTableLower[id>>20&mask], + encTableLower[id>>15&mask], + encTableLower[id>>10&mask], + encTableLower[id>>5&mask], + encTableLower[id&mask], + ) + } + return append(b, + encTableLower[id>>60&mask|0x10], + encTableLower[id>>55&mask], + encTableLower[id>>50&mask], + encTableLower[id>>45&mask], + encTableLower[id>>40&mask], + encTableLower[id>>35&mask], + encTableLower[id>>30&mask], + encTableLower[id>>25&mask], + encTableLower[id>>20&mask], + encTableLower[id>>15&mask], + encTableLower[id>>10&mask], + encTableLower[id>>5&mask], + encTableLower[id&mask], + ) +} + +func DecodedLen(n int) int { + return n/8*5 + n%8*5/8 +} + +func EncodedLen(n int) int { + return n/5*8 + (n%5*8+4)/5 +} + +// Encode encodes src using the encoding enc, +// writing [EncodedLen](len(src)) bytes to dst. +// +// The encoding does not contain any padding, unlike Go's base32. +func Encode(dst, src []byte) { + // Copied from encoding/base32/base32.go (go1.22) + if len(src) == 0 { + return + } + + di, si := 0, 0 + n := (len(src) / 5) * 5 + for si < n { + // Combining two 32 bit loads allows the same code to be used + // for 32 and 64 bit platforms. + hi := uint32(src[si+0])<<24 | uint32(src[si+1])<<16 | uint32(src[si+2])<<8 | uint32(src[si+3]) + lo := hi<<8 | uint32(src[si+4]) + + dst[di+0] = encTable[(hi>>27)&0x1F] + dst[di+1] = encTable[(hi>>22)&0x1F] + dst[di+2] = encTable[(hi>>17)&0x1F] + dst[di+3] = encTable[(hi>>12)&0x1F] + dst[di+4] = encTable[(hi>>7)&0x1F] + dst[di+5] = encTable[(hi>>2)&0x1F] + dst[di+6] = encTable[(lo>>5)&0x1F] + dst[di+7] = encTable[(lo)&0x1F] + + si += 5 + di += 8 + } + + // Add the remaining small block + remain := len(src) - si + if remain == 0 { + return + } + + // Encode the remaining bytes in reverse order. + val := uint32(0) + switch remain { + case 4: + val |= uint32(src[si+3]) + dst[di+6] = encTable[val<<3&0x1F] + dst[di+5] = encTable[val>>2&0x1F] + fallthrough + case 3: + val |= uint32(src[si+2]) << 8 + dst[di+4] = encTable[val>>7&0x1F] + fallthrough + case 2: + val |= uint32(src[si+1]) << 16 + dst[di+3] = encTable[val>>12&0x1F] + dst[di+2] = encTable[val>>17&0x1F] + fallthrough + case 1: + val |= uint32(src[si+0]) << 24 + dst[di+1] = encTable[val>>22&0x1F] + dst[di+0] = encTable[val>>27&0x1F] + } +} + +// EncodeLower is like [Encode], but uses the lowercase +func EncodeLower(dst, src []byte) { + // Copied from encoding/base32/base32.go (go1.22) + if len(src) == 0 { + return + } + + di, si := 0, 0 + n := (len(src) / 5) * 5 + for si < n { + // Combining two 32 bit loads allows the same code to be used + // for 32 and 64 bit platforms. + hi := uint32(src[si+0])<<24 | uint32(src[si+1])<<16 | uint32(src[si+2])<<8 | uint32(src[si+3]) + lo := hi<<8 | uint32(src[si+4]) + + dst[di+0] = encTableLower[(hi>>27)&0x1F] + dst[di+1] = encTableLower[(hi>>22)&0x1F] + dst[di+2] = encTableLower[(hi>>17)&0x1F] + dst[di+3] = encTableLower[(hi>>12)&0x1F] + dst[di+4] = encTableLower[(hi>>7)&0x1F] + dst[di+5] = encTableLower[(hi>>2)&0x1F] + dst[di+6] = encTableLower[(lo>>5)&0x1F] + dst[di+7] = encTableLower[(lo)&0x1F] + + si += 5 + di += 8 + } + + // Add the remaining small block + remain := len(src) - si + if remain == 0 { + return + } + + // Encode the remaining bytes in reverse order. + val := uint32(0) + switch remain { + case 4: + val |= uint32(src[si+3]) + dst[di+6] = encTableLower[val<<3&0x1F] + dst[di+5] = encTableLower[val>>2&0x1F] + fallthrough + case 3: + val |= uint32(src[si+2]) << 8 + dst[di+4] = encTableLower[val>>7&0x1F] + fallthrough + case 2: + val |= uint32(src[si+1]) << 16 + dst[di+3] = encTableLower[val>>12&0x1F] + dst[di+2] = encTableLower[val>>17&0x1F] + fallthrough + case 1: + val |= uint32(src[si+0]) << 24 + dst[di+1] = encTableLower[val>>22&0x1F] + dst[di+0] = encTableLower[val>>27&0x1F] + } +} + +// AppendEncode appends the cford32 encoded src to dst +// and returns the extended buffer. +func AppendEncode(dst, src []byte) []byte { + n := EncodedLen(len(src)) + dst = grow(dst, n) + Encode(dst[len(dst):][:n], src) + return dst[:len(dst)+n] +} + +// AppendEncodeLower appends the lowercase cford32 encoded src to dst +// and returns the extended buffer. +func AppendEncodeLower(dst, src []byte) []byte { + n := EncodedLen(len(src)) + dst = grow(dst, n) + EncodeLower(dst[len(dst):][:n], src) + return dst[:len(dst)+n] +} + +func grow(s []byte, n int) []byte { + // slices.Grow + if n -= cap(s) - len(s); n > 0 { + news := make([]byte, cap(s)+n) + copy(news[:cap(s)], s[:cap(s)]) + return news[:len(s)] + } + return s +} + +// EncodeToString returns the cford32 encoding of src. +func EncodeToString(src []byte) string { + buf := make([]byte, EncodedLen(len(src))) + Encode(buf, src) + return string(buf) +} + +// EncodeToStringLower returns the cford32 lowercase encoding of src. +func EncodeToStringLower(src []byte) string { + buf := make([]byte, EncodedLen(len(src))) + EncodeLower(buf, src) + return string(buf) +} + +func decode(dst, src []byte) (n int, err error) { + dsti := 0 + olen := len(src) + + for len(src) > 0 { + // Decode quantum using the base32 alphabet + var dbuf [8]byte + dlen := 8 + + for j := 0; j < 8; { + if len(src) == 0 { + // We have reached the end and are not expecting any padding + dlen = j + break + } + in := src[0] + src = src[1:] + dbuf[j] = decTable[in] + if dbuf[j] == 0xFF { + return n, CorruptInputError(olen - len(src) - 1) + } + j++ + } + + // Pack 8x 5-bit source blocks into 5 byte destination + // quantum + switch dlen { + case 8: + dst[dsti+4] = dbuf[6]<<5 | dbuf[7] + n++ + fallthrough + case 7: + dst[dsti+3] = dbuf[4]<<7 | dbuf[5]<<2 | dbuf[6]>>3 + n++ + fallthrough + case 5: + dst[dsti+2] = dbuf[3]<<4 | dbuf[4]>>1 + n++ + fallthrough + case 4: + dst[dsti+1] = dbuf[1]<<6 | dbuf[2]<<1 | dbuf[3]>>4 + n++ + fallthrough + case 2: + dst[dsti+0] = dbuf[0]<<3 | dbuf[1]>>2 + n++ + } + dsti += 5 + } + return n, nil +} + +type encoder struct { + err error + w io.Writer + enc func(dst, src []byte) + buf [5]byte // buffered data waiting to be encoded + nbuf int // number of bytes in buf + out [1024]byte // output buffer +} + +func NewEncoder(w io.Writer) io.WriteCloser { + return &encoder{w: w, enc: Encode} +} + +func NewEncoderLower(w io.Writer) io.WriteCloser { + return &encoder{w: w, enc: EncodeLower} +} + +func (e *encoder) Write(p []byte) (n int, err error) { + if e.err != nil { + return 0, e.err + } + + // Leading fringe. + if e.nbuf > 0 { + var i int + for i = 0; i < len(p) && e.nbuf < 5; i++ { + e.buf[e.nbuf] = p[i] + e.nbuf++ + } + n += i + p = p[i:] + if e.nbuf < 5 { + return + } + e.enc(e.out[0:], e.buf[0:]) + if _, e.err = e.w.Write(e.out[0:8]); e.err != nil { + return n, e.err + } + e.nbuf = 0 + } + + // Large interior chunks. + for len(p) >= 5 { + nn := len(e.out) / 8 * 5 + if nn > len(p) { + nn = len(p) + nn -= nn % 5 + } + e.enc(e.out[0:], p[0:nn]) + if _, e.err = e.w.Write(e.out[0 : nn/5*8]); e.err != nil { + return n, e.err + } + n += nn + p = p[nn:] + } + + // Trailing fringe. + copy(e.buf[:], p) + e.nbuf = len(p) + n += len(p) + return +} + +// Close flushes any pending output from the encoder. +// It is an error to call Write after calling Close. +func (e *encoder) Close() error { + // If there's anything left in the buffer, flush it out + if e.err == nil && e.nbuf > 0 { + e.enc(e.out[0:], e.buf[0:e.nbuf]) + encodedLen := EncodedLen(e.nbuf) + e.nbuf = 0 + _, e.err = e.w.Write(e.out[0:encodedLen]) + } + return e.err +} + +// Decode decodes src using cford32. It writes at most +// [DecodedLen](len(src)) bytes to dst and returns the number of bytes +// written. If src contains invalid cford32 data, it will return the +// number of bytes successfully written and [CorruptInputError]. +// Newline characters (\r and \n) are ignored. +func Decode(dst, src []byte) (n int, err error) { + buf := make([]byte, len(src)) + l := stripNewlines(buf, src) + return decode(dst, buf[:l]) +} + +// AppendDecode appends the cford32 decoded src to dst +// and returns the extended buffer. +// If the input is malformed, it returns the partially decoded src and an error. +func AppendDecode(dst, src []byte) ([]byte, error) { + n := DecodedLen(len(src)) + + dst = grow(dst, n) + dstsl := dst[len(dst) : len(dst)+n] + n, err := Decode(dstsl, src) + return dst[:len(dst)+n], err +} + +// DecodeString returns the bytes represented by the cford32 string s. +func DecodeString(s string) ([]byte, error) { + buf := []byte(s) + l := stripNewlines(buf, buf) + n, err := decode(buf, buf[:l]) + return buf[:n], err +} + +// stripNewlines removes newline characters and returns the number +// of non-newline characters copied to dst. +func stripNewlines(dst, src []byte) int { + offset := 0 + for _, b := range src { + if b == '\r' || b == '\n' { + continue + } + dst[offset] = b + offset++ + } + return offset +} + +type decoder struct { + err error + r io.Reader + buf [1024]byte // leftover input + nbuf int + out []byte // leftover decoded output + outbuf [1024 / 8 * 5]byte +} + +// NewDecoder constructs a new base32 stream decoder. +func NewDecoder(r io.Reader) io.Reader { + return &decoder{r: &newlineFilteringReader{r}} +} + +func readEncodedData(r io.Reader, buf []byte) (n int, err error) { + for n < 1 && err == nil { + var nn int + nn, err = r.Read(buf[n:]) + n += nn + } + return +} + +func (d *decoder) Read(p []byte) (n int, err error) { + // Use leftover decoded output from last read. + if len(d.out) > 0 { + n = copy(p, d.out) + d.out = d.out[n:] + if len(d.out) == 0 { + return n, d.err + } + return n, nil + } + + if d.err != nil { + return 0, d.err + } + + // Read nn bytes from input, bounded [8,len(d.buf)] + nn := (len(p)/5 + 1) * 8 + if nn > len(d.buf) { + nn = len(d.buf) + } + + nn, d.err = readEncodedData(d.r, d.buf[d.nbuf:nn]) + d.nbuf += nn + if d.nbuf < 1 { + return 0, d.err + } + + // Decode chunk into p, or d.out and then p if p is too small. + nr := d.nbuf + if d.err != io.EOF && nr%8 != 0 { + nr -= nr % 8 + } + nw := DecodedLen(d.nbuf) + + if nw > len(p) { + nw, err = decode(d.outbuf[0:], d.buf[0:nr]) + d.out = d.outbuf[0:nw] + n = copy(p, d.out) + d.out = d.out[n:] + } else { + n, err = decode(p, d.buf[0:nr]) + } + d.nbuf -= nr + for i := 0; i < d.nbuf; i++ { + d.buf[i] = d.buf[i+nr] + } + + if err != nil && (d.err == nil || d.err == io.EOF) { + d.err = err + } + + if len(d.out) > 0 { + // We cannot return all the decoded bytes to the caller in this + // invocation of Read, so we return a nil error to ensure that Read + // will be called again. The error stored in d.err, if any, will be + // returned with the last set of decoded bytes. + return n, nil + } + + return n, d.err +} + +type newlineFilteringReader struct { + wrapped io.Reader +} + +func (r *newlineFilteringReader) Read(p []byte) (int, error) { + n, err := r.wrapped.Read(p) + for n > 0 { + s := p[0:n] + offset := stripNewlines(s, s) + if err != nil || offset > 0 { + return offset, err + } + // Previous buffer entirely whitespace, read again + n, err = r.wrapped.Read(p) + } + return n, err +} diff --git a/examples/gno.land/p/demo/cford32/cford32_test.gno b/examples/gno.land/p/demo/cford32/cford32_test.gno new file mode 100644 index 00000000000..1a17d64c856 --- /dev/null +++ b/examples/gno.land/p/demo/cford32/cford32_test.gno @@ -0,0 +1,631 @@ +package cford32 + +import ( + "bytes" + "errors" + "fmt" + "io" + "math" + "strconv" + "strings" + "testing" +) + +func TestCompactRoundtrip(t *testing.T) { + buf := make([]byte, 13) + prev := make([]byte, 13) + for i := uint64(0); i < (1 << 12); i++ { + res := AppendCompact(i, buf[:0]) + back, err := Uint64(res) + testEqual(t, "Uint64(%q) = (%d, %v), want %v", string(res), back, err, nil) + testEqual(t, "Uint64(%q) = %d, want %v", string(res), back, i) + + testEqual(t, "bytes.Compare(prev, res) = %d, want %d", bytes.Compare(prev, res), -1) + prev, buf = res, prev + } + for i := uint64(1<<34 - 1024); i < (1<<34 + 1024); i++ { + res := AppendCompact(i, buf[:0]) + back, err := Uint64(res) + // println(string(res)) + testEqual(t, "Uint64(%q) = (%d, %v), want %v", string(res), back, err, nil) + testEqual(t, "Uint64(%q) = %d, want %v", string(res), back, i) + + testEqual(t, "bytes.Compare(prev, res) = %d, want %d", bytes.Compare(prev, res), -1) + prev, buf = res, prev + } + for i := uint64(1<<64 - 5000); i != 0; i++ { + res := AppendCompact(i, buf[:0]) + back, err := Uint64(res) + testEqual(t, "Uint64(%q) = (%d, %v), want %v", string(res), back, err, nil) + testEqual(t, "Uint64(%q) = %d, want %v", string(res), back, i) + + testEqual(t, "bytes.Compare(prev, res) = %d, want %d", bytes.Compare(prev, res), -1) + prev, buf = res, prev + } +} + +func BenchmarkCompact(b *testing.B) { + buf := make([]byte, 13) + for i := 0; i < b.N; i++ { + _ = AppendCompact(uint64(i), buf[:0]) + } +} + +type testpair struct { + decoded, encoded string +} + +var pairs = []testpair{ + {"", ""}, + {"f", "CR"}, + {"fo", "CSQG"}, + {"foo", "CSQPY"}, + {"foob", "CSQPYRG"}, + {"fooba", "CSQPYRK1"}, + {"foobar", "CSQPYRK1E8"}, + + {"sure.", "EDTQ4S9E"}, + {"sure", "EDTQ4S8"}, + {"sur", "EDTQ4"}, + {"su", "EDTG"}, + {"leasure.", "DHJP2WVNE9JJW"}, + {"easure.", "CNGQ6XBJCMQ0"}, + {"asure.", "C5SQAWK55R"}, +} + +var bigtest = testpair{ + "Twas brillig, and the slithy toves", + "AHVP2WS0C9S6JV3CD5KJR831DSJ20X38CMG76V39EHM7J83MDXV6AWR", +} + +func testEqual(t *testing.T, msg string, args ...interface{}) bool { + t.Helper() + if args[len(args)-2] != args[len(args)-1] { + t.Errorf(msg, args...) + return false + } + return true +} + +func TestEncode(t *testing.T) { + for _, p := range pairs { + got := EncodeToString([]byte(p.decoded)) + testEqual(t, "Encode(%q) = %q, want %q", p.decoded, got, p.encoded) + dst := AppendEncode([]byte("lead"), []byte(p.decoded)) + testEqual(t, `AppendEncode("lead", %q) = %q, want %q`, p.decoded, string(dst), "lead"+p.encoded) + } +} + +func TestEncoder(t *testing.T) { + for _, p := range pairs { + bb := &strings.Builder{} + encoder := NewEncoder(bb) + encoder.Write([]byte(p.decoded)) + encoder.Close() + testEqual(t, "Encode(%q) = %q, want %q", p.decoded, bb.String(), p.encoded) + } +} + +func TestEncoderBuffering(t *testing.T) { + input := []byte(bigtest.decoded) + for bs := 1; bs <= 12; bs++ { + bb := &strings.Builder{} + encoder := NewEncoder(bb) + for pos := 0; pos < len(input); pos += bs { + end := pos + bs + if end > len(input) { + end = len(input) + } + n, err := encoder.Write(input[pos:end]) + testEqual(t, "Write(%q) gave error %v, want %v", input[pos:end], err, error(nil)) + testEqual(t, "Write(%q) gave length %v, want %v", input[pos:end], n, end-pos) + } + err := encoder.Close() + testEqual(t, "Close gave error %v, want %v", err, error(nil)) + testEqual(t, "Encoding/%d of %q = %q, want %q", bs, bigtest.decoded, bb.String(), bigtest.encoded) + } +} + +func TestDecode(t *testing.T) { + for _, p := range pairs { + dbuf := make([]byte, DecodedLen(len(p.encoded))) + count, err := decode(dbuf, []byte(p.encoded)) + testEqual(t, "Decode(%q) = error %v, want %v", p.encoded, err, error(nil)) + testEqual(t, "Decode(%q) = length %v, want %v", p.encoded, count, len(p.decoded)) + testEqual(t, "Decode(%q) = %q, want %q", p.encoded, string(dbuf[0:count]), p.decoded) + + dbuf, err = DecodeString(p.encoded) + testEqual(t, "DecodeString(%q) = error %v, want %v", p.encoded, err, error(nil)) + testEqual(t, "DecodeString(%q) = %q, want %q", p.encoded, string(dbuf), p.decoded) + + // XXX: https://github.com/gnolang/gno/issues/1570 + dst, err := AppendDecode(append([]byte(nil), []byte("lead")...), []byte(p.encoded)) + testEqual(t, "AppendDecode(%q) = error %v, want %v", p.encoded, err, error(nil)) + testEqual(t, `AppendDecode("lead", %q) = %q, want %q`, p.encoded, string(dst), "lead"+p.decoded) + + dst2, err := AppendDecode(dst[:0:len(p.decoded)], []byte(p.encoded)) + testEqual(t, "AppendDecode(%q) = error %v, want %v", p.encoded, err, error(nil)) + testEqual(t, `AppendDecode("", %q) = %q, want %q`, p.encoded, string(dst2), p.decoded) + // XXX: https://github.com/gnolang/gno/issues/1569 + // old used &dst2[0] != &dst[0] as a check. + if len(dst) > 0 && len(dst2) > 0 && cap(dst2) != len(p.decoded) { + t.Errorf("unexpected capacity growth: got %d, want %d", cap(dst2), len(p.decoded)) + } + } +} + +// A minimal variation on strings.Reader. +// Here, we return a io.EOF immediately on Read if the read has reached the end +// of the reader. It's used to simplify TestDecoder. +type stringReader struct { + s string + i int64 +} + +func (r *stringReader) Read(b []byte) (n int, err error) { + if r.i >= int64(len(r.s)) { + return 0, io.EOF + } + n = copy(b, r.s[r.i:]) + r.i += int64(n) + if r.i >= int64(len(r.s)) { + return n, io.EOF + } + return +} + +func TestDecoder(t *testing.T) { + for _, p := range pairs { + decoder := NewDecoder(&stringReader{p.encoded, 0}) + dbuf := make([]byte, DecodedLen(len(p.encoded))) + count, err := decoder.Read(dbuf) + if err != nil && err != io.EOF { + t.Fatal("Read failed", err) + } + testEqual(t, "Read from %q = length %v, want %v", p.encoded, count, len(p.decoded)) + testEqual(t, "Decoding of %q = %q, want %q", p.encoded, string(dbuf[0:count]), p.decoded) + if err != io.EOF { + _, err = decoder.Read(dbuf) + } + testEqual(t, "Read from %q = %v, want %v", p.encoded, err, io.EOF) + } +} + +type badReader struct { + data []byte + errs []error + called int + limit int +} + +// Populates p with data, returns a count of the bytes written and an +// error. The error returned is taken from badReader.errs, with each +// invocation of Read returning the next error in this slice, or io.EOF, +// if all errors from the slice have already been returned. The +// number of bytes returned is determined by the size of the input buffer +// the test passes to decoder.Read and will be a multiple of 8, unless +// badReader.limit is non zero. +func (b *badReader) Read(p []byte) (int, error) { + lim := len(p) + if b.limit != 0 && b.limit < lim { + lim = b.limit + } + if len(b.data) < lim { + lim = len(b.data) + } + for i := range p[:lim] { + p[i] = b.data[i] + } + b.data = b.data[lim:] + err := io.EOF + if b.called < len(b.errs) { + err = b.errs[b.called] + } + b.called++ + return lim, err +} + +// TestIssue20044 tests that decoder.Read behaves correctly when the caller +// supplied reader returns an error. +func TestIssue20044(t *testing.T) { + badErr := errors.New("bad reader error") + testCases := []struct { + r badReader + res string + err error + dbuflen int + }{ + // Check valid input data accompanied by an error is processed and the error is propagated. + { + r: badReader{data: []byte("d1jprv3fexqq4v34"), errs: []error{badErr}}, + res: "helloworld", err: badErr, + }, + // Check a read error accompanied by input data consisting of newlines only is propagated. + { + r: badReader{data: []byte("\n\n\n\n\n\n\n\n"), errs: []error{badErr, nil}}, + res: "", err: badErr, + }, + // Reader will be called twice. The first time it will return 8 newline characters. The + // second time valid base32 encoded data and an error. The data should be decoded + // correctly and the error should be propagated. + { + r: badReader{data: []byte("\n\n\n\n\n\n\n\nd1jprv3fexqq4v34"), errs: []error{nil, badErr}}, + res: "helloworld", err: badErr, dbuflen: 8, + }, + // Reader returns invalid input data (too short) and an error. Verify the reader + // error is returned. + { + r: badReader{data: []byte("c"), errs: []error{badErr}}, + res: "", err: badErr, + }, + // Reader returns invalid input data (too short) but no error. Verify io.ErrUnexpectedEOF + // is returned. + // NOTE(thehowl): I don't think this should applyto us? + /* { + r: badReader{data: []byte("c"), errs: []error{nil}}, + res: "", err: io.ErrUnexpectedEOF, + },*/ + // Reader returns invalid input data and an error. Verify the reader and not the + // decoder error is returned. + { + r: badReader{data: []byte("cu"), errs: []error{badErr}}, + res: "", err: badErr, + }, + // Reader returns valid data and io.EOF. Check data is decoded and io.EOF is propagated. + { + r: badReader{data: []byte("csqpyrk1"), errs: []error{io.EOF}}, + res: "fooba", err: io.EOF, + }, + // Check errors are properly reported when decoder.Read is called multiple times. + // decoder.Read will be called 8 times, badReader.Read will be called twice, returning + // valid data both times but an error on the second call. + { + r: badReader{data: []byte("dhjp2wvne9jjwc9g"), errs: []error{nil, badErr}}, + res: "leasure.10", err: badErr, dbuflen: 1, + }, + // Check io.EOF is properly reported when decoder.Read is called multiple times. + // decoder.Read will be called 8 times, badReader.Read will be called twice, returning + // valid data both times but io.EOF on the second call. + { + r: badReader{data: []byte("dhjp2wvne9jjw"), errs: []error{nil, io.EOF}}, + res: "leasure.", err: io.EOF, dbuflen: 1, + }, + // The following two test cases check that errors are propagated correctly when more than + // 8 bytes are read at a time. + { + r: badReader{data: []byte("dhjp2wvne9jjw"), errs: []error{io.EOF}}, + res: "leasure.", err: io.EOF, dbuflen: 11, + }, + { + r: badReader{data: []byte("dhjp2wvne9jjwc9g"), errs: []error{badErr}}, + res: "leasure.10", err: badErr, dbuflen: 11, + }, + // Check that errors are correctly propagated when the reader returns valid bytes in + // groups that are not divisible by 8. The first read will return 11 bytes and no + // error. The second will return 7 and an error. The data should be decoded correctly + // and the error should be propagated. + // NOTE(thehowl): again, this is on the assumption that this is padded, and it's not. + /* { + r: badReader{data: []byte("dhjp2wvne9jjw"), errs: []error{nil, badErr}, limit: 11}, + res: "leasure.", err: badErr, + }, */ + } + + for idx, tc := range testCases { + t.Run(fmt.Sprintf("%d-%s", idx, string(tc.res)), func(t *testing.T) { + input := tc.r.data + decoder := NewDecoder(&tc.r) + var dbuflen int + if tc.dbuflen > 0 { + dbuflen = tc.dbuflen + } else { + dbuflen = DecodedLen(len(input)) + } + dbuf := make([]byte, dbuflen) + var err error + var res []byte + for err == nil { + var n int + n, err = decoder.Read(dbuf) + if n > 0 { + res = append(res, dbuf[:n]...) + } + } + + testEqual(t, "Decoding of %q = %q, want %q", string(input), string(res), tc.res) + testEqual(t, "Decoding of %q err = %v, expected %v", string(input), err, tc.err) + }) + } +} + +// TestDecoderError verifies decode errors are propagated when there are no read +// errors. +func TestDecoderError(t *testing.T) { + for _, readErr := range []error{io.EOF, nil} { + input := "ucsqpyrk1u" + dbuf := make([]byte, DecodedLen(len(input))) + br := badReader{data: []byte(input), errs: []error{readErr}} + decoder := NewDecoder(&br) + n, err := decoder.Read(dbuf) + testEqual(t, "Read after EOF, n = %d, expected %d", n, 0) + if _, ok := err.(CorruptInputError); !ok { + t.Errorf("Corrupt input error expected. Found %T", err) + } + } +} + +// TestReaderEOF ensures decoder.Read behaves correctly when input data is +// exhausted. +func TestReaderEOF(t *testing.T) { + for _, readErr := range []error{io.EOF, nil} { + input := "MZXW6YTB" + br := badReader{data: []byte(input), errs: []error{nil, readErr}} + decoder := NewDecoder(&br) + dbuf := make([]byte, DecodedLen(len(input))) + n, err := decoder.Read(dbuf) + testEqual(t, "Decoding of %q err = %v, expected %v", input, err, error(nil)) + n, err = decoder.Read(dbuf) + testEqual(t, "Read after EOF, n = %d, expected %d", n, 0) + testEqual(t, "Read after EOF, err = %v, expected %v", err, io.EOF) + n, err = decoder.Read(dbuf) + testEqual(t, "Read after EOF, n = %d, expected %d", n, 0) + testEqual(t, "Read after EOF, err = %v, expected %v", err, io.EOF) + } +} + +func TestDecoderBuffering(t *testing.T) { + for bs := 1; bs <= 12; bs++ { + decoder := NewDecoder(strings.NewReader(bigtest.encoded)) + buf := make([]byte, len(bigtest.decoded)+12) + var total int + var n int + var err error + for total = 0; total < len(bigtest.decoded) && err == nil; { + n, err = decoder.Read(buf[total : total+bs]) + total += n + } + if err != nil && err != io.EOF { + t.Errorf("Read from %q at pos %d = %d, unexpected error %v", bigtest.encoded, total, n, err) + } + testEqual(t, "Decoding/%d of %q = %q, want %q", bs, bigtest.encoded, string(buf[0:total]), bigtest.decoded) + } +} + +func TestDecodeCorrupt(t *testing.T) { + testCases := []struct { + input string + offset int // -1 means no corruption. + }{ + {"", -1}, + {"iIoOlL", -1}, + {"!!!!", 0}, + {"uxp10", 0}, + {"x===", 1}, + {"AA=A====", 2}, + {"AAA=AAAA", 3}, + // Much fewer cases compared to Go as there are much fewer cases where input + // can be "corrupted". + } + for _, tc := range testCases { + dbuf := make([]byte, DecodedLen(len(tc.input))) + _, err := Decode(dbuf, []byte(tc.input)) + if tc.offset == -1 { + if err != nil { + t.Error("Decoder wrongly detected corruption in", tc.input) + } + continue + } + switch err := err.(type) { + case CorruptInputError: + testEqual(t, "Corruption in %q at offset %v, want %v", tc.input, int(err), tc.offset) + default: + t.Error("Decoder failed to detect corruption in", tc) + } + } +} + +func TestBig(t *testing.T) { + n := 3*1000 + 1 + raw := make([]byte, n) + const alpha = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + for i := 0; i < n; i++ { + raw[i] = alpha[i%len(alpha)] + } + encoded := new(bytes.Buffer) + w := NewEncoder(encoded) + nn, err := w.Write(raw) + if nn != n || err != nil { + t.Fatalf("Encoder.Write(raw) = %d, %v want %d, nil", nn, err, n) + } + err = w.Close() + if err != nil { + t.Fatalf("Encoder.Close() = %v want nil", err) + } + decoded, err := io.ReadAll(NewDecoder(encoded)) + if err != nil { + t.Fatalf("io.ReadAll(NewDecoder(...)): %v", err) + } + + if !bytes.Equal(raw, decoded) { + var i int + for i = 0; i < len(decoded) && i < len(raw); i++ { + if decoded[i] != raw[i] { + break + } + } + t.Errorf("Decode(Encode(%d-byte string)) failed at offset %d", n, i) + } +} + +func testStringEncoding(t *testing.T, expected string, examples []string) { + for _, e := range examples { + buf, err := DecodeString(e) + if err != nil { + t.Errorf("Decode(%q) failed: %v", e, err) + continue + } + if s := string(buf); s != expected { + t.Errorf("Decode(%q) = %q, want %q", e, s, expected) + } + } +} + +func TestNewLineCharacters(t *testing.T) { + // Each of these should decode to the string "sure", without errors. + examples := []string{ + "EDTQ4S8", + "EDTQ4S8\r", + "EDTQ4S8\n", + "EDTQ4S8\r\n", + "EDTQ4S\r\n8", + "EDT\rQ4S\n8", + "edt\nq4s\r8", + "edt\nq4s8", + "EDTQ4S\n8", + } + testStringEncoding(t, "sure", examples) +} + +func BenchmarkEncode(b *testing.B) { + data := make([]byte, 8192) + buf := make([]byte, EncodedLen(len(data))) + b.SetBytes(int64(len(data))) + for i := 0; i < b.N; i++ { + Encode(buf, data) + } +} + +func BenchmarkEncodeToString(b *testing.B) { + data := make([]byte, 8192) + b.SetBytes(int64(len(data))) + for i := 0; i < b.N; i++ { + EncodeToString(data) + } +} + +func BenchmarkDecode(b *testing.B) { + data := make([]byte, EncodedLen(8192)) + Encode(data, make([]byte, 8192)) + buf := make([]byte, 8192) + b.SetBytes(int64(len(data))) + for i := 0; i < b.N; i++ { + Decode(buf, data) + } +} + +func BenchmarkDecodeString(b *testing.B) { + data := EncodeToString(make([]byte, 8192)) + b.SetBytes(int64(len(data))) + for i := 0; i < b.N; i++ { + DecodeString(data) + } +} + +/* TODO: rewrite without using goroutines +func TestBufferedDecodingSameError(t *testing.T) { + testcases := []struct { + prefix string + chunkCombinations [][]string + expected error + }{ + // Normal case, this is valid input + {"helloworld", [][]string{ + {"D1JP", "RV3F", "EXQQ", "4V34"}, + {"D1JPRV3FEXQQ4V34"}, + {"D1J", "PRV", "3FE", "XQQ", "4V3", "4"}, + {"D1JPRV3FEXQQ4V", "34"}, + }, nil}, + + // Normal case, this is valid input + {"fooba", [][]string{ + {"CSQPYRK1"}, + {"CSQPYRK", "1"}, + {"CSQPYR", "K1"}, + {"CSQPY", "RK1"}, + {"CSQPY", "RK", "1"}, + {"CSQPY", "RK1"}, + {"CSQP", "YR", "K1"}, + }, nil}, + + // NOTE: many test cases have been removed as we don't return ErrUnexpectedEOF. + } + + for _, testcase := range testcases { + for _, chunks := range testcase.chunkCombinations { + pr, pw := io.Pipe() + + // Write the encoded chunks into the pipe + go func() { + for _, chunk := range chunks { + pw.Write([]byte(chunk)) + } + pw.Close() + }() + + decoder := NewDecoder(pr) + back, err := io.ReadAll(decoder) + + if err != testcase.expected { + t.Errorf("Expected %v, got %v; case %s %+v", testcase.expected, err, testcase.prefix, chunks) + } + if testcase.expected == nil { + testEqual(t, "Decode from NewDecoder(chunkReader(%v)) = %q, want %q", chunks, string(back), testcase.prefix) + } + } + } +} +*/ + +func TestEncodedLen(t *testing.T) { + type test struct { + n int + want int64 + } + tests := []test{ + {0, 0}, + {1, 2}, + {2, 4}, + {3, 5}, + {4, 7}, + {5, 8}, + {6, 10}, + {7, 12}, + {10, 16}, + {11, 18}, + } + // check overflow + tests = append(tests, test{(math.MaxInt-4)/8 + 1, 1844674407370955162}) + tests = append(tests, test{math.MaxInt/8*5 + 4, math.MaxInt}) + for _, tt := range tests { + if got := EncodedLen(tt.n); int64(got) != tt.want { + t.Errorf("EncodedLen(%d): got %d, want %d", tt.n, got, tt.want) + } + } +} + +func TestDecodedLen(t *testing.T) { + type test struct { + n int + want int64 + } + tests := []test{ + {0, 0}, + {2, 1}, + {4, 2}, + {5, 3}, + {7, 4}, + {8, 5}, + {10, 6}, + {12, 7}, + {16, 10}, + {18, 11}, + } + // check overflow + tests = append(tests, test{math.MaxInt/5 + 1, 1152921504606846976}) + tests = append(tests, test{math.MaxInt, 5764607523034234879}) + for _, tt := range tests { + if got := DecodedLen(tt.n); int64(got) != tt.want { + t.Errorf("DecodedLen(%d): got %d, want %d", tt.n, got, tt.want) + } + } +} diff --git a/examples/gno.land/p/demo/cford32/gno.mod b/examples/gno.land/p/demo/cford32/gno.mod new file mode 100644 index 00000000000..20b99c65e4c --- /dev/null +++ b/examples/gno.land/p/demo/cford32/gno.mod @@ -0,0 +1 @@ +module gno.land/p/demo/cford32 diff --git a/examples/gno.land/p/demo/seqid/gno.mod b/examples/gno.land/p/demo/seqid/gno.mod index 63e6a1fb551..d1390012c3c 100644 --- a/examples/gno.land/p/demo/seqid/gno.mod +++ b/examples/gno.land/p/demo/seqid/gno.mod @@ -1 +1,3 @@ module gno.land/p/demo/seqid + +require gno.land/p/demo/cford32 v0.0.0-latest diff --git a/examples/gno.land/p/demo/seqid/seqid.gno b/examples/gno.land/p/demo/seqid/seqid.gno index 8cb5366ef44..b3ff815a421 100644 --- a/examples/gno.land/p/demo/seqid/seqid.gno +++ b/examples/gno.land/p/demo/seqid/seqid.gno @@ -7,11 +7,15 @@ // var users avl.Tree // // func NewUser() { -// users.Set(id.Next().Binary(), &User{ ... }) +// users.Set(id.Next().String(), &User{ ... }) // } package seqid -import "encoding/binary" +import ( + "encoding/binary" + + "gno.land/p/demo/cford32" +) // An ID is a simple sequential ID generator. type ID uint64 @@ -48,10 +52,40 @@ func (i ID) Binary() string { return string(buf) } -// FromBinary creates a new ID from the given string. +// String encodes i using cford32's compact encoding. For more information, +// see the documentation for package [gno.land/p/demo/cford32]. +// +// The result of String will be a 7-byte string for IDs [0,2^34), and a +// 13-byte string for all values following that. All generated string IDs +// follow the same lexicographic order as their number values; that is, for any +// two IDs (x, y) such that x < y, x.String() < y.String(). +// As such, this string representation is suitable to be used as an AVL key. +func (i ID) String() string { + return string(cford32.PutCompact(uint64(i))) +} + +// FromBinary creates a new ID from the given string, expected to be a binary +// big-endian encoding of an ID (such as that of [ID.Binary]). +// The second return value is true if the conversion was successful. func FromBinary(b string) (ID, bool) { if len(b) != 8 { return 0, false } return ID(binary.BigEndian.Uint64([]byte(b))), true } + +// FromString creates a new ID from the given string, expected to be a string +// representation using cford32, such as that returned by [ID.String]. +// +// The encoding scheme used by cford32 allows the same ID to have many +// different representations (though the one returned by [ID.String] is only +// one, deterministic and safe to be used in AVL). The encoding scheme is +// "human-centric" and is thus case insensitive, and maps some ambiguous +// characters to be the same, ie. L = I = 1, O = 0. For this reason, when +// parsing user input to retrieve a key (encoded as a string), always sanitize +// it first using FromString, then run String(), instead of using the user's +// input directly. +func FromString(b string) (ID, error) { + n, err := cford32.Uint64([]byte(b)) + return ID(n), err +} diff --git a/examples/gno.land/p/demo/seqid/seqid_test.gno b/examples/gno.land/p/demo/seqid/seqid_test.gno index c6f57960177..0a1e777f1f7 100644 --- a/examples/gno.land/p/demo/seqid/seqid_test.gno +++ b/examples/gno.land/p/demo/seqid/seqid_test.gno @@ -37,7 +37,32 @@ func TestID_Binary(t *testing.T) { for j := 0; j < 1000; j++ { cur := i.Next().Binary() if cur <= prev { - t.Fatalf("cur %x <= prev %x", cur, prev) + t.Fatalf("cur %x > prev %x", cur, prev) } + prev = cur + } +} + +func TestID_String(t *testing.T) { + var i ID + prev := i.String() + + for j := 0; j < 1000; j++ { + cur := i.Next().String() + if cur <= prev { + t.Fatalf("cur %s > prev %s", cur, prev) + } + prev = cur + } + + // Test for when cford32 switches over to the long encoding. + i = 1<<34 - 512 + for j := 0; j < 1024; j++ { + cur := i.Next().String() + // println(cur) + if cur <= prev { + t.Fatalf("cur %s > prev %s", cur, prev) + } + prev = cur } } diff --git a/tm2/pkg/std/memfile.go b/tm2/pkg/std/memfile.go index 782537c8063..599e9a59cc5 100644 --- a/tm2/pkg/std/memfile.go +++ b/tm2/pkg/std/memfile.go @@ -43,7 +43,7 @@ const rePathPart = `[a-z][a-z0-9_]*` var ( rePkgName = regexp.MustCompile(`^[a-z][a-z0-9_]*$`) rePkgOrRlmPath = regexp.MustCompile(`gno\.land/(?:p|r)(?:/` + rePathPart + `)+`) - reFileName = regexp.MustCompile(`^[a-zA-Z0-9_]*\.[a-z0-9_\.]*$`) + reFileName = regexp.MustCompile(`^([a-zA-Z0-9_]*\.[a-z0-9_\.]*|LICENSE|README)$`) ) // path must not contain any dots after the first domain component.