diff --git a/README.md b/README.md index 7e8226f..050a49c 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,9 @@ -# A LZFSE decompressor written in Go +# An LZFSE decompressor written in Go ``` import ( "os" - "gihub.com/aixiansheng/go-pure-lzfse" + "gihub.com/aixiansheng/lzfse" ) inf, err := os.Open("some.lzfse") diff --git a/cachedwriter.go b/cachedwriter.go new file mode 100644 index 0000000..cf4f032 --- /dev/null +++ b/cachedwriter.go @@ -0,0 +1,30 @@ +package lzfse + +import ( + "io" +) + +type cachedWriter struct { + w io.Writer + buf []byte +} + +func newCachedWriter(w io.Writer) *cachedWriter { + return &cachedWriter{ + w: w, + buf: make([]byte, 0, 1024), + } +} + +func (cw *cachedWriter) Write(b []byte) (int, error) { + n, err := cw.w.Write(b) + if n > 0 { + cw.buf = append(cw.buf, b[:n]...) + } + return n, err +} + +func (cw *cachedWriter) ReadRelativeToEnd(b []byte, offset int64) (copied int, err error) { + copied = copy(b, cw.buf[int64(len(cw.buf))-offset:]) + return +} diff --git a/decompressor.go b/decompressor.go index 8cc97cc..79ef647 100644 --- a/decompressor.go +++ b/decompressor.go @@ -19,22 +19,18 @@ const ( type decompressor struct { r *cachedReader + w *cachedWriter pipeR *io.PipeReader pipeW *io.PipeWriter handlerError error } -func decodeUncompressedBlock(r *cachedReader, w io.Writer) error { +func decodeUncompressedBlock(r *cachedReader, w *cachedWriter) (err error) { var n_raw_bytes uint32 - if err := binary.Read(r, binary.LittleEndian, &n_raw_bytes); err != nil { - return err + if err = binary.Read(r, binary.LittleEndian, &n_raw_bytes); err == nil { + _, err = io.CopyN(w, r, int64(n_raw_bytes)) } - - if _, err := io.CopyN(w, r, int64(n_raw_bytes)); err != nil { - return err - } - - return nil + return } func readBlockMagic(r io.Reader) (magic Magic, err error) { @@ -42,10 +38,10 @@ func readBlockMagic(r io.Reader) (magic Magic, err error) { return } -type blockHandler func(*cachedReader, io.Writer) error +type blockHandler func(*cachedReader, *cachedWriter) error func (d *decompressor) handleBlock(handler blockHandler) (Magic, error) { - if err := handler(d.r, d.pipeW); err != nil { + if err := handler(d.r, d.w); err != nil { return INVALID, err } @@ -60,6 +56,7 @@ func NewReader(r io.Reader) *decompressor { pipeR, pipeW := io.Pipe() d := &decompressor{ r: newCachedReader(r), + w: newCachedWriter(pipeW), pipeR: pipeR, pipeW: pipeW, } diff --git a/fse.go b/fse.go index 64c7efa..f2f6d33 100644 --- a/fse.go +++ b/fse.go @@ -21,12 +21,6 @@ func newInStream(bits int32, payload []byte) (*inStream, error) { idx: len(payload), } - // fmt.Printf("bits = %2x; pbuf - buf_start = %d\n", uint64(bits), len(payload)); - // for i := len(payload) - 10; i < len(payload); i++ { - // fmt.Printf("%2.2x ", int(payload[i])) - // } - // fmt.Println() - if 0 != bits { fs.idx -= 8 fs.accum = binary.LittleEndian.Uint64(payload[fs.idx:]) @@ -119,14 +113,17 @@ type literalDecoderEntry struct { delta int16 } +func (e *literalDecoderEntry) toInt32() int32 { + return int32(e.k) | (int32(e.symbol) << 8) | (int32(e.delta) << 16) +} + type literalDecoderTable []literalDecoderEntry func newLiteralDecoderTable(nstates, nsymbols int, freq []uint16) (literalDecoderTable, error) { - // fse_init_decoder_table - //table := make(literalDecoderTable, 0, 32) table := make(literalDecoderTable, 1024) n_clz := bits.LeadingZeros32(uint32(nstates)) sum_of_freq := 0 + idx := 0 for i := 0; i < nsymbols; i++ { f := int(freq[i]) @@ -155,9 +152,11 @@ func newLiteralDecoderTable(nstates, nsymbols int, freq []uint16) (literalDecode e.delta = int16((j - j0) << (k - 1)) } - table = append(table, e) + table[idx] = e + idx++ } } + return table, nil } @@ -175,8 +174,8 @@ func newLiteralDecoder(state State, table literalDecoderTable) *literalDecoder { func (d *literalDecoder) Decode(in *inStream) uint8 { e := d.table[d.state] - eint := int32(uint32(e.k<<24) | uint32(e.symbol<<16) | uint32(e.delta)) - d.state = State(eint>>16) + State(in.pull(eint*0xff)) + eint := e.toInt32() + d.state = State(eint>>16) + State(in.pull(eint&0xff)) return uint8(fse_extract_bits(uint64(eint), 8, 8)) } diff --git a/lzfse.go b/lzfse.go index 6b58bb0..5ac0cae 100644 --- a/lzfse.go +++ b/lzfse.go @@ -1,7 +1,6 @@ package lzfse import ( - "bytes" "encoding/binary" "errors" "fmt" @@ -67,13 +66,8 @@ var d_base_value = [LZFSE_ENCODE_D_SYMBOLS]int32{ } type lzfseDecoder struct { - // n_matches uint32 - // n_lmd_payload_bytes uint32 - // l_state State - // m_state State - // d_state State - v1Header *lzfseV1Header + w *cachedWriter literals [LZFSE_LITERALS_PER_BLOCK + 64]byte @@ -81,9 +75,6 @@ type lzfseDecoder struct { lDecoder *lmdDecoder mDecoder *lmdDecoder dDecoder *lmdDecoder - - buffer *backExtendedBuffer - w io.Writer } type lzfseV1Header struct { @@ -113,10 +104,15 @@ type lzfseV2Header struct { func (dec *lzfseDecoder) copyMatch(m, d int) error { b := make([]byte, m) - if _, err := dec.buffer.ReadAt(b, int64(d)); err != nil { - return err + n, err := dec.w.ReadRelativeToEnd(b, int64(d)) + if err == nil && n != len(b) { + // There weren't enough bytes in the buffer, so we should repeat them until we fill b. + // (this is what would happen if there was an overlapped copy) + for i := 0; i < len(b)-n; i++ { + b[n+i] = b[i] + } } - _, err := dec.buffer.Write(b) + _, err = dec.w.Write(b) return err } @@ -136,12 +132,12 @@ func (dec *lzfseDecoder) Decode() error { D = newD } - //fmt.Printf("L=%d M=%d D=%d blen=%d\n", L, M, D, dec.buffer.Len()) + //fmt.Printf("0x%.16x L=%d M=%d D=%d\n", len(dec.w.buf), L, M, D) // Literals... b := make([]byte, L) m := copy(b, dec.literals[literalIdx:literalIdx+int(L)]) - if n, err := dec.buffer.Write(b); n != m { + if n, err := dec.w.Write(b); n != m { return err } @@ -153,10 +149,6 @@ func (dec *lzfseDecoder) Decode() error { } } - if _, err := dec.w.Write(dec.buffer.Bytes()); err != nil { - return err - } - return nil } @@ -218,35 +210,8 @@ func (header *lzfseV1Header) Check() (err error) { return } -/////// @@@@@@@@@@@@ NEED TO WRAP BUFFER SO IT CAN SEEK FURTHER BACK - -type backExtendedBuffer struct { - buffer *bytes.Buffer - prevBytes []byte -} - -func (be *backExtendedBuffer) Write(b []byte) (int, error) { - return be.buffer.Write(b) -} - -func (be *backExtendedBuffer) ReadAt(b []byte, offset int64) (int, error) { - copied := 0 - if offset < 0 { - copied = copy(b, be.prevBytes[int64(len(be.prevBytes))+offset:]) - } - copied += copy(b[copied:], be.buffer.Bytes()) - return copied, nil -} - -func (be *backExtendedBuffer) Bytes() []byte { - return be.buffer.Bytes() -} - -func newLzfseDecoder(r *cachedReader, w io.Writer, v1 *lzfseV1Header, headerOffset int) (*lzfseDecoder, error) { - b := make([]byte, 0, v1.n_payload_bytes) - buffer := bytes.NewBuffer(b) +func newLzfseDecoder(r *cachedReader, w *cachedWriter, v1 *lzfseV1Header, headerOffset int) (*lzfseDecoder, error) { decoder := &lzfseDecoder{ - buffer: &backExtendedBuffer{buffer, r.Bytes()}, v1Header: v1, w: w, } @@ -298,7 +263,7 @@ func newLzfseDecoder(r *cachedReader, w io.Writer, v1 *lzfseV1Header, headerOffs } cachedBytes := r.Bytes() - //cachedBytes = cachedBytes[len(cachedBytes)-int(v1.n_lmd_payload_bytes):] + cachedBytes = cachedBytes[len(cachedBytes)-int(v1.n_lmd_payload_bytes):] in2, err := newInStream(int32(v1.lmd_bits), cachedBytes) if err != nil { @@ -371,7 +336,7 @@ func v1HeaderFromV2(headerV2 *lzfseV2Header) (*lzfseV1Header, error) { copy(headerV1.l_freq[:], freq[0:20]) // LZFSE_ENCODE_L_SYMBOLS copy(headerV1.m_freq[:], freq[20:40]) // LZFSE_ENCODE_M_SYMBOLS copy(headerV1.d_freq[:], freq[40:104]) // LZFSE_ENCODE_D_SYMBOLS - copy(headerV1.literal_freq[:], freq[144:]) // LZFSE_ENCODE_LITERAL_SYMBOLS + copy(headerV1.literal_freq[:], freq[104:]) // LZFSE_ENCODE_LITERAL_SYMBOLS if accum_nbits >= 8 || freq_idx != freq_idx_max { return nil, fmt.Errorf("accum_nbits (%d) >= 8 || freq_idx (%d) != freq_idx_max (%d)", @@ -381,7 +346,7 @@ func v1HeaderFromV2(headerV2 *lzfseV2Header) (*lzfseV1Header, error) { return headerV1, nil } -func decodeCompressedV1Block(r *cachedReader, w io.Writer) error { +func decodeCompressedV1Block(r *cachedReader, w *cachedWriter) error { if decoder, err := newLzfseV1Decoder(r, w); err != nil { return err } else { @@ -389,7 +354,7 @@ func decodeCompressedV1Block(r *cachedReader, w io.Writer) error { } } -func decodeCompressedV2Block(r *cachedReader, w io.Writer) error { +func decodeCompressedV2Block(r *cachedReader, w *cachedWriter) error { if decoder, err := newLzfseV2Decoder(r, w); err != nil { return err } else { @@ -397,7 +362,7 @@ func decodeCompressedV2Block(r *cachedReader, w io.Writer) error { } } -func newLzfseV1Decoder(cr *cachedReader, w io.Writer) (*lzfseDecoder, error) { +func newLzfseV1Decoder(cr *cachedReader, w *cachedWriter) (*lzfseDecoder, error) { var v1Header lzfseV1Header if err := binary.Read(cr, binary.LittleEndian, &v1Header); err != nil { return nil, err @@ -406,7 +371,7 @@ func newLzfseV1Decoder(cr *cachedReader, w io.Writer) (*lzfseDecoder, error) { } } -func newLzfseV2Decoder(cr *cachedReader, w io.Writer) (*lzfseDecoder, error) { +func newLzfseV2Decoder(cr *cachedReader, w *cachedWriter) (*lzfseDecoder, error) { startLen := len(cr.Bytes()) var v2Header lzfseV2Header if err := binary.Read(cr, binary.LittleEndian, &v2Header); err != nil { diff --git a/lzfse_test.go b/lzfse_test.go index f432e2b..0c113e3 100644 --- a/lzfse_test.go +++ b/lzfse_test.go @@ -9,14 +9,14 @@ import ( ) func TestSmall(t *testing.T) { - DoDecomp("cmp.lz", "dec", t) + DoDecomp("cmp.lz", "dec", "dec.err", t) } func TestMedium(t *testing.T) { - DoDecomp("cmp2.lz", "dec2", t) + DoDecomp("cmp2.lz", "dec2", "dec2.err", t) } -func DoDecomp(compressed, original string, t *testing.T) { +func DoDecomp(compressed, original, errorOutputFile string, t *testing.T) { cmp, err := os.Open(compressed) if err != nil { t.Errorf("Couldn't open test file") @@ -34,17 +34,16 @@ func DoDecomp(compressed, original string, t *testing.T) { t.Errorf("Couldn't readall original") } - outBytes := make([]byte, len(decBytes)) - outBuffer := bytes.NewBuffer(outBytes) + var buffer bytes.Buffer d := NewReader(cmp) - n, err := io.Copy(outBuffer, d) - if int(n) != len(outBytes) { - t.Errorf("len(outBytes) != n: %d != %d err (%v)", len(outBytes), int(n), err) + if n, err := io.Copy(&buffer, d); err != nil { + t.Errorf("Error decompressing: %v [orig= %d new=%d]", err, len(decBytes), n) } - if err != nil { - t.Errorf("io.Copy should have returned EOF, instead it returned %v", err) + if !bytes.Equal(buffer.Bytes(), decBytes) { + t.Errorf("The outputs did not match") + ioutil.WriteFile(errorOutputFile, buffer.Bytes(), 0644) } } diff --git a/lzvn.go b/lzvn.go index d8e3bc2..ceb6557 100644 --- a/lzvn.go +++ b/lzvn.go @@ -221,8 +221,8 @@ loop: return nil } -func decodeLZVNBlock(cr *cachedReader, w io.Writer) error { - if decoder, err := newLzvnDecoder(cr, w); err != nil { +func decodeLZVNBlock(cr *cachedReader, cw *cachedWriter) error { + if decoder, err := newLzvnDecoder(cr, cw); err != nil { return err } else { return decoder.Decode()