mirror of
https://github.com/PSI-Rockin/DobieStation.git
synced 2024-05-20 13:08:11 -04:00
a491dce51a
This changes up the build system quite a bit.
422 lines
12 KiB
C
422 lines
12 KiB
C
/*
|
|
* decompress_template.h
|
|
*
|
|
* Copyright 2016 Eric Biggers
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person
|
|
* obtaining a copy of this software and associated documentation
|
|
* files (the "Software"), to deal in the Software without
|
|
* restriction, including without limitation the rights to use,
|
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
* copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following
|
|
* conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be
|
|
* included in all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
* OTHER DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
/*
|
|
* This is the actual DEFLATE decompression routine, lifted out of
|
|
* deflate_decompress.c so that it can be compiled multiple times with different
|
|
* target instruction sets.
|
|
*/
|
|
|
|
static enum libdeflate_result ATTRIBUTES
|
|
FUNCNAME(struct libdeflate_decompressor * restrict d,
|
|
const void * restrict in, size_t in_nbytes,
|
|
void * restrict out, size_t out_nbytes_avail,
|
|
size_t *actual_in_nbytes_ret, size_t *actual_out_nbytes_ret)
|
|
{
|
|
u8 *out_next = out;
|
|
u8 * const out_end = out_next + out_nbytes_avail;
|
|
const u8 *in_next = in;
|
|
const u8 * const in_end = in_next + in_nbytes;
|
|
bitbuf_t bitbuf = 0;
|
|
unsigned bitsleft = 0;
|
|
size_t overrun_count = 0;
|
|
unsigned i;
|
|
unsigned is_final_block;
|
|
unsigned block_type;
|
|
u16 len;
|
|
u16 nlen;
|
|
unsigned num_litlen_syms;
|
|
unsigned num_offset_syms;
|
|
u16 tmp16;
|
|
u32 tmp32;
|
|
|
|
next_block:
|
|
/* Starting to read the next block. */
|
|
;
|
|
|
|
STATIC_ASSERT(CAN_ENSURE(1 + 2 + 5 + 5 + 4));
|
|
ENSURE_BITS(1 + 2 + 5 + 5 + 4);
|
|
|
|
/* BFINAL: 1 bit */
|
|
is_final_block = POP_BITS(1);
|
|
|
|
/* BTYPE: 2 bits */
|
|
block_type = POP_BITS(2);
|
|
|
|
if (block_type == DEFLATE_BLOCKTYPE_DYNAMIC_HUFFMAN) {
|
|
|
|
/* Dynamic Huffman block. */
|
|
|
|
/* The order in which precode lengths are stored. */
|
|
static const u8 deflate_precode_lens_permutation[DEFLATE_NUM_PRECODE_SYMS] = {
|
|
16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
|
|
};
|
|
|
|
unsigned num_explicit_precode_lens;
|
|
|
|
/* Read the codeword length counts. */
|
|
|
|
STATIC_ASSERT(DEFLATE_NUM_LITLEN_SYMS == ((1 << 5) - 1) + 257);
|
|
num_litlen_syms = POP_BITS(5) + 257;
|
|
|
|
STATIC_ASSERT(DEFLATE_NUM_OFFSET_SYMS == ((1 << 5) - 1) + 1);
|
|
num_offset_syms = POP_BITS(5) + 1;
|
|
|
|
STATIC_ASSERT(DEFLATE_NUM_PRECODE_SYMS == ((1 << 4) - 1) + 4);
|
|
num_explicit_precode_lens = POP_BITS(4) + 4;
|
|
|
|
d->static_codes_loaded = false;
|
|
|
|
/* Read the precode codeword lengths. */
|
|
STATIC_ASSERT(DEFLATE_MAX_PRE_CODEWORD_LEN == (1 << 3) - 1);
|
|
for (i = 0; i < num_explicit_precode_lens; i++) {
|
|
ENSURE_BITS(3);
|
|
d->u.precode_lens[deflate_precode_lens_permutation[i]] = POP_BITS(3);
|
|
}
|
|
|
|
for (; i < DEFLATE_NUM_PRECODE_SYMS; i++)
|
|
d->u.precode_lens[deflate_precode_lens_permutation[i]] = 0;
|
|
|
|
/* Build the decode table for the precode. */
|
|
SAFETY_CHECK(build_precode_decode_table(d));
|
|
|
|
/* Expand the literal/length and offset codeword lengths. */
|
|
for (i = 0; i < num_litlen_syms + num_offset_syms; ) {
|
|
u32 entry;
|
|
unsigned presym;
|
|
u8 rep_val;
|
|
unsigned rep_count;
|
|
|
|
ENSURE_BITS(DEFLATE_MAX_PRE_CODEWORD_LEN + 7);
|
|
|
|
/* (The code below assumes that the precode decode table
|
|
* does not have any subtables.) */
|
|
STATIC_ASSERT(PRECODE_TABLEBITS == DEFLATE_MAX_PRE_CODEWORD_LEN);
|
|
|
|
/* Read the next precode symbol. */
|
|
entry = d->u.l.precode_decode_table[BITS(DEFLATE_MAX_PRE_CODEWORD_LEN)];
|
|
REMOVE_BITS(entry & HUFFDEC_LENGTH_MASK);
|
|
presym = entry >> HUFFDEC_RESULT_SHIFT;
|
|
|
|
if (presym < 16) {
|
|
/* Explicit codeword length */
|
|
d->u.l.lens[i++] = presym;
|
|
continue;
|
|
}
|
|
|
|
/* Run-length encoded codeword lengths */
|
|
|
|
/* Note: we don't need verify that the repeat count
|
|
* doesn't overflow the number of elements, since we
|
|
* have enough extra spaces to allow for the worst-case
|
|
* overflow (138 zeroes when only 1 length was
|
|
* remaining).
|
|
*
|
|
* In the case of the small repeat counts (presyms 16
|
|
* and 17), it is fastest to always write the maximum
|
|
* number of entries. That gets rid of branches that
|
|
* would otherwise be required.
|
|
*
|
|
* It is not just because of the numerical order that
|
|
* our checks go in the order 'presym < 16', 'presym ==
|
|
* 16', and 'presym == 17'. For typical data this is
|
|
* ordered from most frequent to least frequent case.
|
|
*/
|
|
STATIC_ASSERT(DEFLATE_MAX_LENS_OVERRUN == 138 - 1);
|
|
|
|
if (presym == 16) {
|
|
/* Repeat the previous length 3 - 6 times */
|
|
SAFETY_CHECK(i != 0);
|
|
rep_val = d->u.l.lens[i - 1];
|
|
STATIC_ASSERT(3 + ((1 << 2) - 1) == 6);
|
|
rep_count = 3 + POP_BITS(2);
|
|
d->u.l.lens[i + 0] = rep_val;
|
|
d->u.l.lens[i + 1] = rep_val;
|
|
d->u.l.lens[i + 2] = rep_val;
|
|
d->u.l.lens[i + 3] = rep_val;
|
|
d->u.l.lens[i + 4] = rep_val;
|
|
d->u.l.lens[i + 5] = rep_val;
|
|
i += rep_count;
|
|
} else if (presym == 17) {
|
|
/* Repeat zero 3 - 10 times */
|
|
STATIC_ASSERT(3 + ((1 << 3) - 1) == 10);
|
|
rep_count = 3 + POP_BITS(3);
|
|
d->u.l.lens[i + 0] = 0;
|
|
d->u.l.lens[i + 1] = 0;
|
|
d->u.l.lens[i + 2] = 0;
|
|
d->u.l.lens[i + 3] = 0;
|
|
d->u.l.lens[i + 4] = 0;
|
|
d->u.l.lens[i + 5] = 0;
|
|
d->u.l.lens[i + 6] = 0;
|
|
d->u.l.lens[i + 7] = 0;
|
|
d->u.l.lens[i + 8] = 0;
|
|
d->u.l.lens[i + 9] = 0;
|
|
i += rep_count;
|
|
} else {
|
|
/* Repeat zero 11 - 138 times */
|
|
STATIC_ASSERT(11 + ((1 << 7) - 1) == 138);
|
|
rep_count = 11 + POP_BITS(7);
|
|
memset(&d->u.l.lens[i], 0,
|
|
rep_count * sizeof(d->u.l.lens[i]));
|
|
i += rep_count;
|
|
}
|
|
}
|
|
} else if (block_type == DEFLATE_BLOCKTYPE_UNCOMPRESSED) {
|
|
|
|
/* Uncompressed block: copy 'len' bytes literally from the input
|
|
* buffer to the output buffer. */
|
|
|
|
ALIGN_INPUT();
|
|
|
|
SAFETY_CHECK(in_end - in_next >= 4);
|
|
|
|
len = READ_U16();
|
|
nlen = READ_U16();
|
|
|
|
SAFETY_CHECK(len == (u16)~nlen);
|
|
if (unlikely(len > out_end - out_next))
|
|
return LIBDEFLATE_INSUFFICIENT_SPACE;
|
|
SAFETY_CHECK(len <= in_end - in_next);
|
|
|
|
memcpy(out_next, in_next, len);
|
|
in_next += len;
|
|
out_next += len;
|
|
|
|
goto block_done;
|
|
|
|
} else {
|
|
SAFETY_CHECK(block_type == DEFLATE_BLOCKTYPE_STATIC_HUFFMAN);
|
|
|
|
/*
|
|
* Static Huffman block: build the decode tables for the static
|
|
* codes. Skip doing so if the tables are already set up from
|
|
* an earlier static block; this speeds up decompression of
|
|
* degenerate input of many empty or very short static blocks.
|
|
*
|
|
* Afterwards, the remainder is the same as decompressing a
|
|
* dynamic Huffman block.
|
|
*/
|
|
|
|
if (d->static_codes_loaded)
|
|
goto have_decode_tables;
|
|
|
|
d->static_codes_loaded = true;
|
|
|
|
STATIC_ASSERT(DEFLATE_NUM_LITLEN_SYMS == 288);
|
|
STATIC_ASSERT(DEFLATE_NUM_OFFSET_SYMS == 32);
|
|
|
|
for (i = 0; i < 144; i++)
|
|
d->u.l.lens[i] = 8;
|
|
for (; i < 256; i++)
|
|
d->u.l.lens[i] = 9;
|
|
for (; i < 280; i++)
|
|
d->u.l.lens[i] = 7;
|
|
for (; i < 288; i++)
|
|
d->u.l.lens[i] = 8;
|
|
|
|
for (; i < 288 + 32; i++)
|
|
d->u.l.lens[i] = 5;
|
|
|
|
num_litlen_syms = 288;
|
|
num_offset_syms = 32;
|
|
}
|
|
|
|
/* Decompressing a Huffman block (either dynamic or static) */
|
|
|
|
SAFETY_CHECK(build_offset_decode_table(d, num_litlen_syms, num_offset_syms));
|
|
SAFETY_CHECK(build_litlen_decode_table(d, num_litlen_syms, num_offset_syms));
|
|
have_decode_tables:
|
|
|
|
/* The main DEFLATE decode loop */
|
|
for (;;) {
|
|
u32 entry;
|
|
u32 length;
|
|
u32 offset;
|
|
const u8 *src;
|
|
u8 *dst;
|
|
|
|
/* Decode a litlen symbol. */
|
|
ENSURE_BITS(DEFLATE_MAX_LITLEN_CODEWORD_LEN);
|
|
entry = d->u.litlen_decode_table[BITS(LITLEN_TABLEBITS)];
|
|
if (entry & HUFFDEC_SUBTABLE_POINTER) {
|
|
/* Litlen subtable required (uncommon case) */
|
|
REMOVE_BITS(LITLEN_TABLEBITS);
|
|
entry = d->u.litlen_decode_table[
|
|
((entry >> HUFFDEC_RESULT_SHIFT) & 0xFFFF) +
|
|
BITS(entry & HUFFDEC_LENGTH_MASK)];
|
|
}
|
|
REMOVE_BITS(entry & HUFFDEC_LENGTH_MASK);
|
|
if (entry & HUFFDEC_LITERAL) {
|
|
/* Literal */
|
|
if (unlikely(out_next == out_end))
|
|
return LIBDEFLATE_INSUFFICIENT_SPACE;
|
|
*out_next++ = (u8)(entry >> HUFFDEC_RESULT_SHIFT);
|
|
continue;
|
|
}
|
|
|
|
/* Match or end-of-block */
|
|
|
|
entry >>= HUFFDEC_RESULT_SHIFT;
|
|
ENSURE_BITS(MAX_ENSURE);
|
|
|
|
/* Pop the extra length bits and add them to the length base to
|
|
* produce the full length. */
|
|
length = (entry >> HUFFDEC_LENGTH_BASE_SHIFT) +
|
|
POP_BITS(entry & HUFFDEC_EXTRA_LENGTH_BITS_MASK);
|
|
|
|
/* The match destination must not end after the end of the
|
|
* output buffer. For efficiency, combine this check with the
|
|
* end-of-block check. We're using 0 for the special
|
|
* end-of-block length, so subtract 1 and it turn it into
|
|
* SIZE_MAX. */
|
|
STATIC_ASSERT(HUFFDEC_END_OF_BLOCK_LENGTH == 0);
|
|
if (unlikely((size_t)length - 1 >= out_end - out_next)) {
|
|
if (unlikely(length != HUFFDEC_END_OF_BLOCK_LENGTH))
|
|
return LIBDEFLATE_INSUFFICIENT_SPACE;
|
|
goto block_done;
|
|
}
|
|
|
|
/* Decode the match offset. */
|
|
|
|
entry = d->offset_decode_table[BITS(OFFSET_TABLEBITS)];
|
|
if (entry & HUFFDEC_SUBTABLE_POINTER) {
|
|
/* Offset subtable required (uncommon case) */
|
|
REMOVE_BITS(OFFSET_TABLEBITS);
|
|
entry = d->offset_decode_table[
|
|
((entry >> HUFFDEC_RESULT_SHIFT) & 0xFFFF) +
|
|
BITS(entry & HUFFDEC_LENGTH_MASK)];
|
|
}
|
|
REMOVE_BITS(entry & HUFFDEC_LENGTH_MASK);
|
|
entry >>= HUFFDEC_RESULT_SHIFT;
|
|
|
|
STATIC_ASSERT(CAN_ENSURE(DEFLATE_MAX_EXTRA_LENGTH_BITS +
|
|
DEFLATE_MAX_OFFSET_CODEWORD_LEN) &&
|
|
CAN_ENSURE(DEFLATE_MAX_EXTRA_OFFSET_BITS));
|
|
if (!CAN_ENSURE(DEFLATE_MAX_EXTRA_LENGTH_BITS +
|
|
DEFLATE_MAX_OFFSET_CODEWORD_LEN +
|
|
DEFLATE_MAX_EXTRA_OFFSET_BITS))
|
|
ENSURE_BITS(DEFLATE_MAX_EXTRA_OFFSET_BITS);
|
|
|
|
/* Pop the extra offset bits and add them to the offset base to
|
|
* produce the full offset. */
|
|
offset = (entry & HUFFDEC_OFFSET_BASE_MASK) +
|
|
POP_BITS(entry >> HUFFDEC_EXTRA_OFFSET_BITS_SHIFT);
|
|
|
|
/* The match source must not begin before the beginning of the
|
|
* output buffer. */
|
|
SAFETY_CHECK(offset <= out_next - (const u8 *)out);
|
|
|
|
/*
|
|
* Copy the match: 'length' bytes at 'out_next - offset' to
|
|
* 'out_next', possibly overlapping. If the match doesn't end
|
|
* too close to the end of the buffer and offset >= WORDBYTES ||
|
|
* offset == 1, take a fast path which copies a word at a time
|
|
* -- potentially more than the length of the match, but that's
|
|
* fine as long as we check for enough extra space.
|
|
*
|
|
* The remaining cases are not performance-critical so are
|
|
* handled by a simple byte-by-byte copy.
|
|
*/
|
|
|
|
src = out_next - offset;
|
|
dst = out_next;
|
|
out_next += length;
|
|
|
|
if (UNALIGNED_ACCESS_IS_FAST &&
|
|
/* max overrun is writing 3 words for a min length match */
|
|
likely(out_end - out_next >=
|
|
3 * WORDBYTES - DEFLATE_MIN_MATCH_LEN)) {
|
|
if (offset >= WORDBYTES) { /* words don't overlap? */
|
|
copy_word_unaligned(src, dst);
|
|
src += WORDBYTES;
|
|
dst += WORDBYTES;
|
|
copy_word_unaligned(src, dst);
|
|
src += WORDBYTES;
|
|
dst += WORDBYTES;
|
|
do {
|
|
copy_word_unaligned(src, dst);
|
|
src += WORDBYTES;
|
|
dst += WORDBYTES;
|
|
} while (dst < out_next);
|
|
} else if (offset == 1) {
|
|
/* RLE encoding of previous byte, common if the
|
|
* data contains many repeated bytes */
|
|
machine_word_t v = repeat_byte(*src);
|
|
|
|
store_word_unaligned(v, dst);
|
|
dst += WORDBYTES;
|
|
store_word_unaligned(v, dst);
|
|
dst += WORDBYTES;
|
|
do {
|
|
store_word_unaligned(v, dst);
|
|
dst += WORDBYTES;
|
|
} while (dst < out_next);
|
|
} else {
|
|
*dst++ = *src++;
|
|
*dst++ = *src++;
|
|
do {
|
|
*dst++ = *src++;
|
|
} while (dst < out_next);
|
|
}
|
|
} else {
|
|
STATIC_ASSERT(DEFLATE_MIN_MATCH_LEN == 3);
|
|
*dst++ = *src++;
|
|
*dst++ = *src++;
|
|
do {
|
|
*dst++ = *src++;
|
|
} while (dst < out_next);
|
|
}
|
|
}
|
|
|
|
block_done:
|
|
/* Finished decoding a block. */
|
|
|
|
if (!is_final_block)
|
|
goto next_block;
|
|
|
|
/* That was the last block. */
|
|
|
|
/* Discard any readahead bits and check for excessive overread */
|
|
ALIGN_INPUT();
|
|
|
|
/* Optionally return the actual number of bytes read */
|
|
if (actual_in_nbytes_ret)
|
|
*actual_in_nbytes_ret = in_next - (u8 *)in;
|
|
|
|
/* Optionally return the actual number of bytes written */
|
|
if (actual_out_nbytes_ret) {
|
|
*actual_out_nbytes_ret = out_next - (u8 *)out;
|
|
} else {
|
|
if (out_next != out_end)
|
|
return LIBDEFLATE_SHORT_OUTPUT;
|
|
}
|
|
return LIBDEFLATE_SUCCESS;
|
|
}
|
|
|
|
#undef FUNCNAME
|
|
#undef ATTRIBUTES
|