123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445 |
- package lz4
- import (
- "encoding/binary"
- "errors"
- )
- // block represents a frame data block.
- // Used when compressing or decompressing frame blocks concurrently.
- type block struct {
- compressed bool
- zdata []byte // compressed data
- data []byte // decompressed data
- offset int // offset within the data as with block dependency the 64Kb window is prepended to it
- checksum uint32 // compressed data checksum
- err error // error while [de]compressing
- }
- var (
- // ErrInvalidSource is returned by UncompressBlock when a compressed block is corrupted.
- ErrInvalidSource = errors.New("lz4: invalid source")
- // ErrShortBuffer is returned by UncompressBlock, CompressBlock or CompressBlockHC when
- // the supplied buffer for [de]compression is too small.
- ErrShortBuffer = errors.New("lz4: short buffer")
- )
- // CompressBlockBound returns the maximum size of a given buffer of size n, when not compressible.
- func CompressBlockBound(n int) int {
- return n + n/255 + 16
- }
- // UncompressBlock decompresses the source buffer into the destination one,
- // starting at the di index and returning the decompressed size.
- //
- // The destination buffer must be sized appropriately.
- //
- // An error is returned if the source data is invalid or the destination buffer is too small.
- func UncompressBlock(src, dst []byte, di int) (int, error) {
- si, sn, di0 := 0, len(src), di
- if sn == 0 {
- return 0, nil
- }
- for {
- // literals and match lengths (token)
- lLen := int(src[si] >> 4)
- mLen := int(src[si] & 0xF)
- if si++; si == sn {
- return di, ErrInvalidSource
- }
- // literals
- if lLen > 0 {
- if lLen == 0xF {
- for src[si] == 0xFF {
- lLen += 0xFF
- if si++; si == sn {
- return di - di0, ErrInvalidSource
- }
- }
- lLen += int(src[si])
- if si++; si == sn {
- return di - di0, ErrInvalidSource
- }
- }
- if len(dst)-di < lLen || si+lLen > sn {
- return di - di0, ErrShortBuffer
- }
- di += copy(dst[di:], src[si:si+lLen])
- if si += lLen; si >= sn {
- return di - di0, nil
- }
- }
- if si += 2; si >= sn {
- return di, ErrInvalidSource
- }
- offset := int(src[si-2]) | int(src[si-1])<<8
- if di-offset < 0 || offset == 0 {
- return di - di0, ErrInvalidSource
- }
- // match
- if mLen == 0xF {
- for src[si] == 0xFF {
- mLen += 0xFF
- if si++; si == sn {
- return di - di0, ErrInvalidSource
- }
- }
- mLen += int(src[si])
- if si++; si == sn {
- return di - di0, ErrInvalidSource
- }
- }
- // minimum match length is 4
- mLen += 4
- if len(dst)-di <= mLen {
- return di - di0, ErrShortBuffer
- }
- // copy the match (NB. match is at least 4 bytes long)
- // NB. past di, copy() would write old bytes instead of
- // the ones we just copied, so split the work into the largest chunk.
- for ; mLen >= offset; mLen -= offset {
- di += copy(dst[di:], dst[di-offset:di])
- }
- di += copy(dst[di:], dst[di-offset:di-offset+mLen])
- }
- }
- // CompressBlock compresses the source buffer starting at soffet into the destination one.
- // This is the fast version of LZ4 compression and also the default one.
- //
- // The size of the compressed data is returned. If it is 0 and no error, then the data is incompressible.
- //
- // An error is returned if the destination buffer is too small.
- func CompressBlock(src, dst []byte, soffset int) (int, error) {
- sn, dn := len(src)-mfLimit, len(dst)
- if sn <= 0 || dn == 0 || soffset >= sn {
- return 0, nil
- }
- var si, di int
- // fast scan strategy:
- // we only need a hash table to store the last sequences (4 bytes)
- var hashTable [1 << hashLog]int
- var hashShift = uint((minMatch * 8) - hashLog)
- // Initialise the hash table with the first 64Kb of the input buffer
- // (used when compressing dependent blocks)
- for si < soffset {
- h := binary.LittleEndian.Uint32(src[si:]) * hasher >> hashShift
- si++
- hashTable[h] = si
- }
- anchor := si
- fma := 1 << skipStrength
- for si < sn-minMatch {
- // hash the next 4 bytes (sequence)...
- h := binary.LittleEndian.Uint32(src[si:]) * hasher >> hashShift
- // -1 to separate existing entries from new ones
- ref := hashTable[h] - 1
- // ...and store the position of the hash in the hash table (+1 to compensate the -1 upon saving)
- hashTable[h] = si + 1
- // no need to check the last 3 bytes in the first literal 4 bytes as
- // this guarantees that the next match, if any, is compressed with
- // a lower size, since to have some compression we must have:
- // ll+ml-overlap > 1 + (ll-15)/255 + (ml-4-15)/255 + 2 (uncompressed size>compressed size)
- // => ll+ml>3+2*overlap => ll+ml>= 4+2*overlap
- // and by definition we do have:
- // ll >= 1, ml >= 4
- // => ll+ml >= 5
- // => so overlap must be 0
- // the sequence is new, out of bound (64kb) or not valid: try next sequence
- if ref < 0 || fma&(1<<skipStrength-1) < 4 ||
- (si-ref)>>winSizeLog > 0 ||
- src[ref] != src[si] ||
- src[ref+1] != src[si+1] ||
- src[ref+2] != src[si+2] ||
- src[ref+3] != src[si+3] {
- // variable step: improves performance on non-compressible data
- si += fma >> skipStrength
- fma++
- continue
- }
- // match found
- fma = 1 << skipStrength
- lLen := si - anchor
- offset := si - ref
- // encode match length part 1
- si += minMatch
- mLen := si // match length has minMatch already
- for si <= sn && src[si] == src[si-offset] {
- si++
- }
- mLen = si - mLen
- if mLen < 0xF {
- dst[di] = byte(mLen)
- } else {
- dst[di] = 0xF
- }
- // encode literals length
- if lLen < 0xF {
- dst[di] |= byte(lLen << 4)
- } else {
- dst[di] |= 0xF0
- if di++; di == dn {
- return di, ErrShortBuffer
- }
- l := lLen - 0xF
- for ; l >= 0xFF; l -= 0xFF {
- dst[di] = 0xFF
- if di++; di == dn {
- return di, ErrShortBuffer
- }
- }
- dst[di] = byte(l)
- }
- if di++; di == dn {
- return di, ErrShortBuffer
- }
- // literals
- if di+lLen >= dn {
- return di, ErrShortBuffer
- }
- di += copy(dst[di:], src[anchor:anchor+lLen])
- anchor = si
- // encode offset
- if di += 2; di >= dn {
- return di, ErrShortBuffer
- }
- dst[di-2], dst[di-1] = byte(offset), byte(offset>>8)
- // encode match length part 2
- if mLen >= 0xF {
- for mLen -= 0xF; mLen >= 0xFF; mLen -= 0xFF {
- dst[di] = 0xFF
- if di++; di == dn {
- return di, ErrShortBuffer
- }
- }
- dst[di] = byte(mLen)
- if di++; di == dn {
- return di, ErrShortBuffer
- }
- }
- }
- if anchor == 0 {
- // incompressible
- return 0, nil
- }
- // last literals
- lLen := len(src) - anchor
- if lLen < 0xF {
- dst[di] = byte(lLen << 4)
- } else {
- dst[di] = 0xF0
- if di++; di == dn {
- return di, ErrShortBuffer
- }
- lLen -= 0xF
- for ; lLen >= 0xFF; lLen -= 0xFF {
- dst[di] = 0xFF
- if di++; di == dn {
- return di, ErrShortBuffer
- }
- }
- dst[di] = byte(lLen)
- }
- if di++; di == dn {
- return di, ErrShortBuffer
- }
- // write literals
- src = src[anchor:]
- switch n := di + len(src); {
- case n > dn:
- return di, ErrShortBuffer
- case n >= sn:
- // incompressible
- return 0, nil
- }
- di += copy(dst[di:], src)
- return di, nil
- }
- // CompressBlockHC compresses the source buffer starting at soffet into the destination one.
- // CompressBlockHC compression ratio is better than CompressBlock but it is also slower.
- //
- // The size of the compressed data is returned. If it is 0 and no error, then the data is not compressible.
- //
- // An error is returned if the destination buffer is too small.
- func CompressBlockHC(src, dst []byte, soffset int) (int, error) {
- sn, dn := len(src)-mfLimit, len(dst)
- if sn <= 0 || dn == 0 || soffset >= sn {
- return 0, nil
- }
- var si, di int
- // Hash Chain strategy:
- // we need a hash table and a chain table
- // the chain table cannot contain more entries than the window size (64Kb entries)
- var hashTable [1 << hashLog]int
- var chainTable [winSize]int
- var hashShift = uint((minMatch * 8) - hashLog)
- // Initialise the hash table with the first 64Kb of the input buffer
- // (used when compressing dependent blocks)
- for si < soffset {
- h := binary.LittleEndian.Uint32(src[si:]) * hasher >> hashShift
- chainTable[si&winMask] = hashTable[h]
- si++
- hashTable[h] = si
- }
- anchor := si
- for si < sn-minMatch {
- // hash the next 4 bytes (sequence)...
- h := binary.LittleEndian.Uint32(src[si:]) * hasher >> hashShift
- // follow the chain until out of window and give the longest match
- mLen := 0
- offset := 0
- for next := hashTable[h] - 1; next > 0 && next > si-winSize; next = chainTable[next&winMask] - 1 {
- // the first (mLen==0) or next byte (mLen>=minMatch) at current match length must match to improve on the match length
- if src[next+mLen] == src[si+mLen] {
- for ml := 0; ; ml++ {
- if src[next+ml] != src[si+ml] || si+ml > sn {
- // found a longer match, keep its position and length
- if mLen < ml && ml >= minMatch {
- mLen = ml
- offset = si - next
- }
- break
- }
- }
- }
- }
- chainTable[si&winMask] = hashTable[h]
- hashTable[h] = si + 1
- // no match found
- if mLen == 0 {
- si++
- continue
- }
- // match found
- // update hash/chain tables with overlaping bytes:
- // si already hashed, add everything from si+1 up to the match length
- for si, ml := si+1, si+mLen; si < ml; {
- h := binary.LittleEndian.Uint32(src[si:]) * hasher >> hashShift
- chainTable[si&winMask] = hashTable[h]
- si++
- hashTable[h] = si
- }
- lLen := si - anchor
- si += mLen
- mLen -= minMatch // match length does not include minMatch
- if mLen < 0xF {
- dst[di] = byte(mLen)
- } else {
- dst[di] = 0xF
- }
- // encode literals length
- if lLen < 0xF {
- dst[di] |= byte(lLen << 4)
- } else {
- dst[di] |= 0xF0
- if di++; di == dn {
- return di, ErrShortBuffer
- }
- l := lLen - 0xF
- for ; l >= 0xFF; l -= 0xFF {
- dst[di] = 0xFF
- if di++; di == dn {
- return di, ErrShortBuffer
- }
- }
- dst[di] = byte(l)
- }
- if di++; di == dn {
- return di, ErrShortBuffer
- }
- // literals
- if di+lLen >= dn {
- return di, ErrShortBuffer
- }
- di += copy(dst[di:], src[anchor:anchor+lLen])
- anchor = si
- // encode offset
- if di += 2; di >= dn {
- return di, ErrShortBuffer
- }
- dst[di-2], dst[di-1] = byte(offset), byte(offset>>8)
- // encode match length part 2
- if mLen >= 0xF {
- for mLen -= 0xF; mLen >= 0xFF; mLen -= 0xFF {
- dst[di] = 0xFF
- if di++; di == dn {
- return di, ErrShortBuffer
- }
- }
- dst[di] = byte(mLen)
- if di++; di == dn {
- return di, ErrShortBuffer
- }
- }
- }
- if anchor == 0 {
- // incompressible
- return 0, nil
- }
- // last literals
- lLen := len(src) - anchor
- if lLen < 0xF {
- dst[di] = byte(lLen << 4)
- } else {
- dst[di] = 0xF0
- if di++; di == dn {
- return di, ErrShortBuffer
- }
- lLen -= 0xF
- for ; lLen >= 0xFF; lLen -= 0xFF {
- dst[di] = 0xFF
- if di++; di == dn {
- return di, ErrShortBuffer
- }
- }
- dst[di] = byte(lLen)
- }
- if di++; di == dn {
- return di, ErrShortBuffer
- }
- // write literals
- src = src[anchor:]
- switch n := di + len(src); {
- case n > dn:
- return di, ErrShortBuffer
- case n >= sn:
- // incompressible
- return 0, nil
- }
- di += copy(dst[di:], src)
- return di, nil
- }
|