From cbf49a0b77b636f35e145a1bd7e0efbbb4ee06d3 Mon Sep 17 00:00:00 2001 From: Jesse Talavera-Greenberg Date: Thu, 23 Nov 2023 23:19:07 -0500 Subject: [PATCH] XDelta patch support (Take 2) (#15915) * Add xdelta in deps * Include in xdelta3.h - Otherwise the static_assert calls can fail * Build xdelta3 in Makefile.common * Add xdelta support to the softpatching infrastructure - The patching itself isn't fully implemented yet * Adjust how xdelta3.h checks the sizes of some types - Now checks max values instead of relying on autotools * Add some enums that were excluded by the cherry-pick * Remove stray whitespace * Adjust SIZE macros in xdelta3.h - Move them outside the XD3_USE_LARGEFILE64 block - Add more SIZE declarations - Make SIZEOF_UNSIGNED_LONG_LONG contingent on the presence of ULLONG_MAX * Reintegrate xdelta support * Enable support for xdelta's secondary compressors - Necessary for some patches * Fix some format specifiers * Remove unnecessary files from xdelta * Include xdelta3.h with a relative path * Add xdelta3 headers to HEADERS variable * Gate Xdelta support behind HAVE_XDELTA - HAVE_XDELTA is on by default - HAVE_PATCH is still required for HAVE_XDELTA to be meaningful - Support is mostly contingent on the availability of LZMA - Anything modern should be okay - Legacy platforms (e.g. DOS) may need to have Xdelta support disabled - At least until some other solution can be found * Disable HAVE_XDELTA on platforms where the build recently failed - These come from looking at the failed builds on GitHub - These are guesses, and may turn out to be wrong * Fix a potential memory leak - Whoops, looks like I need to call two cleanup functions - xd3_close_stream exists separately from xd3_free_stream * Split the --help printout for --xdelta into its own strlcat call - GCC was complaining about #ifdefs within macro arguments being non-portable * Fix some incorrect printf format specifiers * Modify Xdelta to adhere to C89 - It's mostly using RetroArch's INLINE macro instead of the inline keyword * Slight cleanups * Remove a stray comma that was hindering C89 builds * Add XDelta support to CHANGES.md * Change how the xdelta patch's name is computed - To be in line with other recent refactoring * Fix an incorrect merge - Whoops, this part was from before I figured out how to get the size of a patched file * Explain the song-and-dance behind computing a patched file's size * Define some XDelta3-related constants to 0 on 32-bit platforms * Adjust some Xdelta-related macro definitions - Exclude the encoder, since we're not making patches - Move some #defines to after inclusion of , to fix undefined behavior - Remove _WIN32_WINNT overrides, since they were for code that we're not using * Fix Xdelta support * Wrap an encoder-only function in `#if XD3_ENCODER` --- CHANGES.md | 1 + Makefile.common | 19 +- Makefile.dos | 1 + Makefile.miyoo | 1 + Makefile.ngc | 1 + Makefile.ps2 | 1 + Makefile.psp1 | 2 +- Makefile.retrofw | 1 + Makefile.rs90 | 1 + Makefile.vita | 1 + Makefile.wii | 1 + Makefile.wiiu | 1 + config.def.h | 2 +- configuration.c | 1 + deps/xdelta3/LICENSE | 176 ++ deps/xdelta3/README.md | 37 + deps/xdelta3/xdelta3-cfgs.h | 171 ++ deps/xdelta3/xdelta3-decode.h | 1222 ++++++++ deps/xdelta3/xdelta3-djw.h | 1838 ++++++++++++ deps/xdelta3/xdelta3-fgk.h | 862 ++++++ deps/xdelta3/xdelta3-hash.h | 163 ++ deps/xdelta3/xdelta3-internal.h | 387 +++ deps/xdelta3/xdelta3-list.h | 130 + deps/xdelta3/xdelta3-lzma.h | 195 ++ deps/xdelta3/xdelta3-second.h | 321 +++ deps/xdelta3/xdelta3.c | 4813 +++++++++++++++++++++++++++++++ deps/xdelta3/xdelta3.h | 1502 ++++++++++ docs/retroarch.6 | 6 + file_path_special.h | 1 + qb/config.params.sh | 1 + retroarch.c | 55 +- retroarch.h | 8 +- retroarch_types.h | 2 + runloop.c | 10 + runloop.h | 1 + tasks/task_content.c | 88 +- tasks/task_patch.c | 180 +- tasks/tasks_internal.h | 2 + 38 files changed, 12159 insertions(+), 46 deletions(-) create mode 100644 deps/xdelta3/LICENSE create mode 100644 deps/xdelta3/README.md create mode 100644 deps/xdelta3/xdelta3-cfgs.h create mode 100644 deps/xdelta3/xdelta3-decode.h create mode 100644 deps/xdelta3/xdelta3-djw.h create mode 100644 deps/xdelta3/xdelta3-fgk.h create mode 100644 deps/xdelta3/xdelta3-hash.h create mode 100644 deps/xdelta3/xdelta3-internal.h create mode 100644 deps/xdelta3/xdelta3-list.h create mode 100644 deps/xdelta3/xdelta3-lzma.h create mode 100644 deps/xdelta3/xdelta3-second.h create mode 100644 deps/xdelta3/xdelta3.c create mode 100644 deps/xdelta3/xdelta3.h diff --git a/CHANGES.md b/CHANGES.md index b0a958244d..87bcff12a7 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,4 +1,5 @@ # Future +- PATCHES: Add support for XDelta-formatted patches. # 1.16.0 - 3DS: Update __system_initArgv diff --git a/Makefile.common b/Makefile.common index 2165cd4ed9..ef418f679b 100644 --- a/Makefile.common +++ b/Makefile.common @@ -250,7 +250,24 @@ OBJ += frontend/frontend_driver.o \ ifeq ($(HAVE_PATCH), 1) DEFINES += -DHAVE_PATCH - OBJ += tasks/task_patch.o + OBJ += tasks/task_patch.o + ifeq ($(HAVE_XDELTA), 1) + DEFINES += -DHAVE_XDELTA -DSECONDARY_DJW -DSECONDARY_LZMA -DSECONDARY_FGK + INCLUDE_DIRS += -I$(DEPS_DIR)/xdelta3 -I$(LIBRETRO_COMM_DIR) + LIBS += -llzma + OBJ += $(DEPS_DIR)/xdelta3/xdelta3.o + HEADERS += xdelta3.h \ + xdelta3-cfgs.h \ + xdelta3-fgk.h \ + xdelta3-hash.h \ + xdelta3-internal.h \ + xdelta3-list.h \ + xdelta3-lzma.h \ + xdelta3-second.h + # These headers are added to the makefile because xdelta3 does weird things + # with its #includes, which affects dependency tracking and project analysis + # (e.g. for IDEs). + endif endif OBJ += \ diff --git a/Makefile.dos b/Makefile.dos index a09f0bbade..536b67a75f 100644 --- a/Makefile.dos +++ b/Makefile.dos @@ -66,6 +66,7 @@ HAVE_CHD = 0 # disabled due to static libretro-common and libchdr conflicts betw HAVE_STB_VORBIS = 1 HAVE_IBXM = 1 HAVE_CORE_INFO_CACHE = 1 +HAVE_XDELTA = 0 # disabled because isn't available (or we haven't figured out how to install it) HAVE_RGUI = 1 HAVE_MATERIALUI = 0 diff --git a/Makefile.miyoo b/Makefile.miyoo index 751cdf2639..2880af10ad 100644 --- a/Makefile.miyoo +++ b/Makefile.miyoo @@ -105,6 +105,7 @@ HAVE_OZONE = 0 HAVE_ZLIB = 1 HAVE_CONFIGFILE = 1 HAVE_PATCH = 1 +HAVE_XDELTA = 0 # Disabled until we figure out how to include HAVE_CHEATS = 1 HAVE_CHEEVOS = 0 HAVE_LIBSHAKE = 0 diff --git a/Makefile.ngc b/Makefile.ngc index 483b3380ff..432ed36e8f 100644 --- a/Makefile.ngc +++ b/Makefile.ngc @@ -129,6 +129,7 @@ HAVE_ZLIB := 1 HAVE_7ZIP := 1 HAVE_CONFIGFILE := 1 HAVE_PATCH := 1 +HAVE_XDELTA := 0 # disabled because isn't available (or we haven't figured out how to install it) HAVE_CHEATS := 1 HAVE_SCREENSHOTS := 1 HAVE_REWIND := 1 diff --git a/Makefile.ps2 b/Makefile.ps2 index cea4358c53..cda6aee7aa 100644 --- a/Makefile.ps2 +++ b/Makefile.ps2 @@ -54,6 +54,7 @@ else HAVE_MENU = 1 HAVE_CONFIGFILE = 1 HAVE_PATCH = 1 + HAVE_PATCH = 0 # disabled because isn't available (or we haven't figured out how to install it) HAVE_CHEATS = 1 HAVE_RGUI = 1 HAVE_MATERIALUI = 0 diff --git a/Makefile.psp1 b/Makefile.psp1 index b7e5884335..029411a854 100644 --- a/Makefile.psp1 +++ b/Makefile.psp1 @@ -6,7 +6,7 @@ HAVE_THREADS ?= 1 BIG_STACK ?= 0 LOAD_WITHOUT_CORE_INFO ?= 0 HAVE_STATIC_DUMMY ?= 0 - +HAVE_XDELTA ?= 1 TARGET = retroarchpsp ifeq ($(DEBUG), 1) diff --git a/Makefile.retrofw b/Makefile.retrofw index 188ace6578..f03ba467a1 100644 --- a/Makefile.retrofw +++ b/Makefile.retrofw @@ -107,6 +107,7 @@ HAVE_OZONE = 0 HAVE_ZLIB = 1 HAVE_CONFIGFILE = 1 HAVE_PATCH = 1 +HAVE_XDELTA = 0 # disabled because isn't available (or we haven't figured out how to install it) HAVE_CHEATS = 1 HAVE_CHEEVOS = 0 HAVE_LIBSHAKE = 0 diff --git a/Makefile.rs90 b/Makefile.rs90 index ce52ecb947..3f58131d44 100644 --- a/Makefile.rs90 +++ b/Makefile.rs90 @@ -107,6 +107,7 @@ HAVE_OZONE = 0 HAVE_ZLIB = 1 HAVE_CONFIGFILE = 1 HAVE_PATCH = 1 +HAVE_XDELTA = 0 # Disabled until we figure out how to include HAVE_CHEATS = 1 HAVE_CHEEVOS = 0 HAVE_LIBSHAKE = 0 diff --git a/Makefile.vita b/Makefile.vita index 652d3e79ec..9244f0d515 100644 --- a/Makefile.vita +++ b/Makefile.vita @@ -84,6 +84,7 @@ else HAVE_GFX_WIDGETS := 1 HAVE_CONFIGFILE := 1 HAVE_PATCH := 1 + HAVE_XDELTA := 1 # disabled because isn't available (or we haven't figured out how to install it) HAVE_CHEATS := 1 HAVE_OVERLAY := 1 HAVE_MATERIALUI := 1 diff --git a/Makefile.wii b/Makefile.wii index 0f3e94147f..92b5117059 100644 --- a/Makefile.wii +++ b/Makefile.wii @@ -137,6 +137,7 @@ HAVE_ZLIB := 1 HAVE_7ZIP := 1 HAVE_CONFIGFILE := 1 HAVE_PATCH := 1 +HAVE_XDELTA := 0 # disabled because isn't available (or we haven't figured out how to install it) HAVE_CHEATS := 1 HAVE_SCREENSHOTS := 1 HAVE_REWIND := 1 diff --git a/Makefile.wiiu b/Makefile.wiiu index eefc342fef..e1f96a04eb 100644 --- a/Makefile.wiiu +++ b/Makefile.wiiu @@ -143,6 +143,7 @@ endif HAVE_RBMP = 1 HAVE_CONFIGFILE = 1 HAVE_PATCH = 1 + HAVE_XDELTA = 0 # disabled because isn't available (or we haven't figured out how to install it) HAVE_REWIND = 1 HAVE_CHEATS = 1 HAVE_MENU = 1 diff --git a/config.def.h b/config.def.h index 0b630cdaab..f4f2eb2cb3 100644 --- a/config.def.h +++ b/config.def.h @@ -1040,7 +1040,7 @@ #define DEFAULT_NOTIFICATION_SHOW_CHEATS_APPLIED true /* Display a notification when applying an - * IPS/BPS/UPS patch file */ + * IPS/BPS/UPS/Xdelta patch file */ #define DEFAULT_NOTIFICATION_SHOW_PATCH_APPLIED true /* Display a notification when loading an diff --git a/configuration.c b/configuration.c index ef8fc04fef..a3e3eccfa1 100644 --- a/configuration.c +++ b/configuration.c @@ -2917,6 +2917,7 @@ void config_set_defaults(void *data) retroarch_ctl(RARCH_CTL_UNSET_UPS_PREF, NULL); retroarch_ctl(RARCH_CTL_UNSET_BPS_PREF, NULL); retroarch_ctl(RARCH_CTL_UNSET_IPS_PREF, NULL); + retroarch_ctl(RARCH_CTL_UNSET_XDELTA_PREF, NULL); *recording_st->output_dir = '\0'; *recording_st->config_dir = '\0'; diff --git a/deps/xdelta3/LICENSE b/deps/xdelta3/LICENSE new file mode 100644 index 0000000000..7a774156a6 --- /dev/null +++ b/deps/xdelta3/LICENSE @@ -0,0 +1,176 @@ +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, +and distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by +the copyright owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all +other entities that control, are controlled by, or are under common +control with that entity. For the purposes of this definition, +"control" means (i) the power, direct or indirect, to cause the +direction or management of such entity, whether by contract or +otherwise, or (ii) ownership of fifty percent (50%) or more of the +outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity +exercising permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, +including but not limited to software source code, documentation +source, and configuration files. + +"Object" form shall mean any form resulting from mechanical +transformation or translation of a Source form, including but +not limited to compiled object code, generated documentation, +and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or +Object form, made available under the License, as indicated by a +copyright notice that is included in or attached to the work +(an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object +form, that is based on (or derived from) the Work and for which the +editorial revisions, annotations, elaborations, or other modifications +represent, as a whole, an original work of authorship. For the purposes +of this License, Derivative Works shall not include works that remain +separable from, or merely link (or bind by name) to the interfaces of, +the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including +the original version of the Work and any modifications or additions +to that Work or Derivative Works thereof, that is intentionally +submitted to Licensor for inclusion in the Work by the copyright owner +or by an individual or Legal Entity authorized to submit on behalf of +the copyright owner. For the purposes of this definition, "submitted" +means any form of electronic, verbal, or written communication sent +to the Licensor or its representatives, including but not limited to +communication on electronic mailing lists, source code control systems, +and issue tracking systems that are managed by, or on behalf of, the +Licensor for the purpose of discussing and improving the Work, but +excluding communication that is conspicuously marked or otherwise +designated in writing by the copyright owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity +on behalf of whom a Contribution has been received by Licensor and +subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of +this License, each Contributor hereby grants to You a perpetual, +worldwide, non-exclusive, no-charge, royalty-free, irrevocable +copyright license to reproduce, prepare Derivative Works of, +publicly display, publicly perform, sublicense, and distribute the +Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of +this License, each Contributor hereby grants to You a perpetual, +worldwide, non-exclusive, no-charge, royalty-free, irrevocable +(except as stated in this section) patent license to make, have made, +use, offer to sell, sell, import, and otherwise transfer the Work, +where such license applies only to those patent claims licensable +by such Contributor that are necessarily infringed by their +Contribution(s) alone or by combination of their Contribution(s) +with the Work to which such Contribution(s) was submitted. If You +institute patent litigation against any entity (including a +cross-claim or counterclaim in a lawsuit) alleging that the Work +or a Contribution incorporated within the Work constitutes direct +or contributory patent infringement, then any patent licenses +granted to You under this License for that Work shall terminate +as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the +Work or Derivative Works thereof in any medium, with or without +modifications, and in Source or Object form, provided that You +meet the following conditions: + +(a) You must give any other recipients of the Work or +Derivative Works a copy of this License; and + +(b) You must cause any modified files to carry prominent notices +stating that You changed the files; and + +(c) You must retain, in the Source form of any Derivative Works +that You distribute, all copyright, patent, trademark, and +attribution notices from the Source form of the Work, +excluding those notices that do not pertain to any part of +the Derivative Works; and + +(d) If the Work includes a "NOTICE" text file as part of its +distribution, then any Derivative Works that You distribute must +include a readable copy of the attribution notices contained +within such NOTICE file, excluding those notices that do not +pertain to any part of the Derivative Works, in at least one +of the following places: within a NOTICE text file distributed +as part of the Derivative Works; within the Source form or +documentation, if provided along with the Derivative Works; or, +within a display generated by the Derivative Works, if and +wherever such third-party notices normally appear. The contents +of the NOTICE file are for informational purposes only and +do not modify the License. You may add Your own attribution +notices within Derivative Works that You distribute, alongside +or as an addendum to the NOTICE text from the Work, provided +that such additional attribution notices cannot be construed +as modifying the License. + +You may add Your own copyright statement to Your modifications and +may provide additional or different license terms and conditions +for use, reproduction, or distribution of Your modifications, or +for any such Derivative Works as a whole, provided Your use, +reproduction, and distribution of the Work otherwise complies with +the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, +any Contribution intentionally submitted for inclusion in the Work +by You to the Licensor shall be under the terms and conditions of +this License, without any additional terms or conditions. +Notwithstanding the above, nothing herein shall supersede or modify +the terms of any separate license agreement you may have executed +with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade +names, trademarks, service marks, or product names of the Licensor, +except as required for reasonable and customary use in describing the +origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or +agreed to in writing, Licensor provides the Work (and each +Contributor provides its Contributions) on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +implied, including, without limitation, any warranties or conditions +of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A +PARTICULAR PURPOSE. You are solely responsible for determining the +appropriateness of using or redistributing the Work and assume any +risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, +whether in tort (including negligence), contract, or otherwise, +unless required by applicable law (such as deliberate and grossly +negligent acts) or agreed to in writing, shall any Contributor be +liable to You for damages, including any direct, indirect, special, +incidental, or consequential damages of any character arising as a +result of this License or out of the use or inability to use the +Work (including but not limited to damages for loss of goodwill, +work stoppage, computer failure or malfunction, or any and all +other commercial damages or losses), even if such Contributor +has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing +the Work or Derivative Works thereof, You may choose to offer, +and charge a fee for, acceptance of support, warranty, indemnity, +or other liability obligations and/or rights consistent with this +License. However, in accepting such obligations, You may act only +on Your own behalf and on Your sole responsibility, not on behalf +of any other Contributor, and only if You agree to indemnify, +defend, and hold each Contributor harmless for any liability +incurred by, or claims asserted against, such Contributor by reason +of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS diff --git a/deps/xdelta3/README.md b/deps/xdelta3/README.md new file mode 100644 index 0000000000..ba6f030b58 --- /dev/null +++ b/deps/xdelta3/README.md @@ -0,0 +1,37 @@ +Xdelta 3.x readme.txt +Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, +2009, 2010, 2011, 2012, 2013, 2014, 2015 + + + +Thanks for downloading Xdelta! + +This directory contains the Xdelta3 command-line interface (CLI) and source +distribution for VCDIFF differential compression, a.k.a. delta +compression. The latest information and downloads are available here: + + http://xdelta.org/ + http://github.com/jmacd/xdelta/ + +Xdelta can be configured to use XZ Utils for secondary compression: + + http://tukaani.org/xz/ + +The command-line syntax is detailed here: + + https://github.com/jmacd/xdelta/blob/wiki/CommandLineSyntax.md + +Run 'xdelta3 -h' for brief help. Run 'xdelta3 test' for built-in tests. + +Sample commands (like gzip, -e means encode, -d means decode) + + xdelta3 -9 -S lzma -e -f -s OLD_FILE NEW_FILE DELTA_FILE + xdelta3 -d -s OLD_FILE DELTA_FILE DECODED_FILE + +File bug reports and browse open support issues here: + + https://github.com/jmacd/xdelta/issues + +The source distribution contains the C/C++/Python APIs, Unix, Microsoft VC++ +and Cygwin builds. Xdelta3 is covered under the terms of the APL, see +LICENSE. diff --git a/deps/xdelta3/xdelta3-cfgs.h b/deps/xdelta3/xdelta3-cfgs.h new file mode 100644 index 0000000000..84a2221439 --- /dev/null +++ b/deps/xdelta3/xdelta3-cfgs.h @@ -0,0 +1,171 @@ +/* xdelta3 - delta compression tools and library + Copyright 2016 Joshua MacDonald + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +/****************************************************************** + SOFT string matcher + ******************************************************************/ + +#if XD3_BUILD_SOFT + +#define TEMPLATE soft +#define LLOOK stream->smatcher.large_look +#define LSTEP stream->smatcher.large_step +#define SLOOK stream->smatcher.small_look +#define SCHAIN stream->smatcher.small_chain +#define SLCHAIN stream->smatcher.small_lchain +#define MAXLAZY stream->smatcher.max_lazy +#define LONGENOUGH stream->smatcher.long_enough + +#define SOFTCFG 1 +#include "xdelta3.c" +#undef SOFTCFG + +#undef TEMPLATE +#undef LLOOK +#undef SLOOK +#undef LSTEP +#undef SCHAIN +#undef SLCHAIN +#undef MAXLAZY +#undef LONGENOUGH +#endif + +#define SOFTCFG 0 + +/************************************************************ + FASTEST string matcher + **********************************************************/ +#if XD3_BUILD_FASTEST +#define TEMPLATE fastest +#define LLOOK 9 +#define LSTEP 26 +#define SLOOK 4U +#define SCHAIN 1 +#define SLCHAIN 1 +#define MAXLAZY 6 +#define LONGENOUGH 6 + +#include "xdelta3.c" + +#undef TEMPLATE +#undef LLOOK +#undef SLOOK +#undef LSTEP +#undef SCHAIN +#undef SLCHAIN +#undef MAXLAZY +#undef LONGENOUGH +#endif + +/************************************************************ + FASTER string matcher + **********************************************************/ +#if XD3_BUILD_FASTER +#define TEMPLATE faster +#define LLOOK 9 +#define LSTEP 15 +#define SLOOK 4U +#define SCHAIN 1 +#define SLCHAIN 1 +#define MAXLAZY 18 +#define LONGENOUGH 18 + +#include "xdelta3.c" + +#undef TEMPLATE +#undef LLOOK +#undef SLOOK +#undef LSTEP +#undef SCHAIN +#undef SLCHAIN +#undef MAXLAZY +#undef LONGENOUGH +#endif + +/****************************************************** + FAST string matcher + ********************************************************/ +#if XD3_BUILD_FAST +#define TEMPLATE fast +#define LLOOK 9 +#define LSTEP 8 +#define SLOOK 4U +#define SCHAIN 4 +#define SLCHAIN 1 +#define MAXLAZY 18 +#define LONGENOUGH 35 + +#include "xdelta3.c" + +#undef TEMPLATE +#undef LLOOK +#undef SLOOK +#undef LSTEP +#undef SCHAIN +#undef SLCHAIN +#undef MAXLAZY +#undef LONGENOUGH +#endif + +/************************************************** + SLOW string matcher + **************************************************************/ +#if XD3_BUILD_SLOW +#define TEMPLATE slow +#define LLOOK 9 +#define LSTEP 2 +#define SLOOK 4U +#define SCHAIN 44 +#define SLCHAIN 13 +#define MAXLAZY 90 +#define LONGENOUGH 70 + +#include "xdelta3.c" + +#undef TEMPLATE +#undef LLOOK +#undef SLOOK +#undef LSTEP +#undef SCHAIN +#undef SLCHAIN +#undef MAXLAZY +#undef LONGENOUGH +#endif + +/******************************************************** + DEFAULT string matcher + ************************************************************/ +#if XD3_BUILD_DEFAULT +#define TEMPLATE default +#define LLOOK 9 +#define LSTEP 3 +#define SLOOK 4U +#define SCHAIN 8 +#define SLCHAIN 2 +#define MAXLAZY 36 +#define LONGENOUGH 70 + +#include "xdelta3.c" + +#undef TEMPLATE +#undef LLOOK +#undef SLOOK +#undef LSTEP +#undef SCHAIN +#undef SLCHAIN +#undef MAXLAZY +#undef LONGENOUGH +#endif diff --git a/deps/xdelta3/xdelta3-decode.h b/deps/xdelta3/xdelta3-decode.h new file mode 100644 index 0000000000..55822252c4 --- /dev/null +++ b/deps/xdelta3/xdelta3-decode.h @@ -0,0 +1,1222 @@ +/* xdelta3 - delta compression tools and library + Copyright 2016 Joshua MacDonald + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef _XDELTA3_DECODE_H_ +#define _XDELTA3_DECODE_H_ + +/* To include RetroArch's INLINE macro */ +#include "retro_inline.h" + +#include "xdelta3-internal.h" + +#define SRCORTGT(x) ((((x) & VCD_SRCORTGT) == VCD_SOURCE) ? \ + VCD_SOURCE : ((((x) & VCD_SRCORTGT) == \ + VCD_TARGET) ? VCD_TARGET : 0)) + +static INLINE int +xd3_decode_byte (xd3_stream *stream, usize_t *val) +{ + if (stream->avail_in == 0) + { + stream->msg = "further input required"; + return XD3_INPUT; + } + + (*val) = stream->next_in[0]; + + DECODE_INPUT (1); + return 0; +} + +static INLINE int +xd3_decode_bytes (xd3_stream *stream, uint8_t *buf, usize_t *pos, usize_t size) +{ + usize_t want; + usize_t take; + + /* Note: The case where (*pos == size) happens when a zero-length + * appheader or code table is transmitted, but there is nothing in + * the standard against that. */ + while (*pos < size) + { + if (stream->avail_in == 0) + { + stream->msg = "further input required"; + return XD3_INPUT; + } + + want = size - *pos; + take = xd3_min (want, stream->avail_in); + + memcpy (buf + *pos, stream->next_in, (size_t) take); + + DECODE_INPUT (take); + (*pos) += take; + } + + return 0; +} + +/* Initialize the decoder for a new window. The dec_tgtlen value is + * preserved across successive window decodings, and the update to + * dec_winstart is delayed until a new window actually starts. This + * is to avoid throwing an error due to overflow until the last + * possible moment. This makes it possible to encode exactly 4GB + * through a 32-bit encoder. */ +static int +xd3_decode_init_window (xd3_stream *stream) +{ + stream->dec_cpylen = 0; + stream->dec_cpyoff = 0; + stream->dec_cksumbytes = 0; + + xd3_init_cache (& stream->acache); + + return 0; +} + +/* Allocates buffer space for the target window and possibly the + * VCD_TARGET copy-window. Also sets the base of the two copy + * segments. */ +static int +xd3_decode_setup_buffers (xd3_stream *stream) +{ + /* If VCD_TARGET is set then the previous buffer may be reused. */ + if (stream->dec_win_ind & VCD_TARGET) + { + /* Note: this implementation is untested, since Xdelta3 itself + * does not implement an encoder for VCD_TARGET mode. Thus, mark + * unimplemented until needed. */ + if (1) + { + stream->msg = "VCD_TARGET not implemented"; + return XD3_UNIMPLEMENTED; + } + + /* But this implementation only supports copying from the last + * target window. If the offset is outside that range, it can't + * be done. */ + if (stream->dec_cpyoff < stream->dec_laststart) + { + stream->msg = "unsupported VCD_TARGET offset"; + return XD3_INVALID_INPUT; + } + + /* See if the two windows are the same. This indicates the + * first time VCD_TARGET is used. This causes a second buffer + * to be allocated, after that the two are swapped in the + * DEC_FINISH case. */ + if (stream->dec_lastwin == stream->next_out) + { + stream->next_out = NULL; + stream->space_out = 0; + } + + /* TODO: (See note above, this looks incorrect) */ + stream->dec_cpyaddrbase = stream->dec_lastwin + + (usize_t) (stream->dec_cpyoff - stream->dec_laststart); + } + + /* See if the current output window is large enough. */ + if (stream->space_out < stream->dec_tgtlen) + { + xd3_free (stream, stream->dec_buffer); + + stream->space_out = + xd3_round_blksize (stream->dec_tgtlen, XD3_ALLOCSIZE); + + if ((stream->dec_buffer = + (uint8_t*) xd3_alloc (stream, stream->space_out, 1)) == NULL) + { + return ENOMEM; + } + + stream->next_out = stream->dec_buffer; + } + + /* dec_tgtaddrbase refers to an invalid base address, but it is + * always used with a sufficiently large instruction offset (i.e., + * beyond the copy window). This condition is enforced by + * xd3_decode_output_halfinst. */ + stream->dec_tgtaddrbase = stream->next_out - stream->dec_cpylen; + + return 0; +} + +static int +xd3_decode_allocate (xd3_stream *stream, + usize_t size, + uint8_t **buf_ptr, + usize_t *buf_alloc) +{ + IF_DEBUG2 (DP(RINT "[xd3_decode_allocate] size %"W"u alloc %"W"u\n", + size, *buf_alloc)); + + if (*buf_ptr != NULL && *buf_alloc < size) + { + xd3_free (stream, *buf_ptr); + *buf_ptr = NULL; + } + + if (*buf_ptr == NULL) + { + *buf_alloc = xd3_round_blksize (size, XD3_ALLOCSIZE); + + if ((*buf_ptr = (uint8_t*) xd3_alloc (stream, *buf_alloc, 1)) == NULL) + { + return ENOMEM; + } + } + + return 0; +} + +static int +xd3_decode_section (xd3_stream *stream, + xd3_desect *section, + xd3_decode_state nstate, + int copy) +{ + XD3_ASSERT (section->pos <= section->size); + XD3_ASSERT (stream->dec_state != nstate); + + if (section->pos < section->size) + { + usize_t sect_take; + + if (stream->avail_in == 0) + { + return XD3_INPUT; + } + + if ((copy == 0) && (section->pos == 0)) + { + /* No allocation/copy needed */ + section->buf = stream->next_in; + sect_take = section->size; + IF_DEBUG1 (DP(RINT "[xd3_decode_section] zerocopy %"W"u @ %"W"u avail %"W"u\n", + sect_take, section->pos, stream->avail_in)); + } + else + { + usize_t sect_need = section->size - section->pos; + + /* Allocate and copy */ + sect_take = xd3_min (sect_need, stream->avail_in); + + if (section->pos == 0) + { + int ret; + + if ((ret = xd3_decode_allocate (stream, + section->size, + & section->copied1, + & section->alloc1))) + { + return ret; + } + + section->buf = section->copied1; + } + + IF_DEBUG2 (DP(RINT "[xd3_decode_section] take %"W"u @ %"W"u [need %"W"u] avail %"W"u\n", + sect_take, section->pos, sect_need, stream->avail_in)); + XD3_ASSERT (section->pos + sect_take <= section->alloc1); + + memcpy (section->copied1 + section->pos, + stream->next_in, + sect_take); + } + + section->pos += sect_take; + + stream->dec_winbytes += sect_take; + + DECODE_INPUT (sect_take); + } + + if (section->pos < section->size) + { + IF_DEBUG1 (DP(RINT "[xd3_decode_section] further input required %"W"u\n", + section->size - section->pos)); + stream->msg = "further input required"; + return XD3_INPUT; + } + + XD3_ASSERT (section->pos == section->size); + + stream->dec_state = nstate; + section->buf_max = section->buf + section->size; + section->pos = 0; + return 0; +} + +/* Decode the size and address for half of an instruction (i.e., a + * single opcode). This updates the stream->dec_position, which are + * bytes already output prior to processing this instruction. Perform + * bounds checking for sizes and copy addresses, which uses the + * dec_position (which is why these checks are done here). */ +static int +xd3_decode_parse_halfinst (xd3_stream *stream, xd3_hinst *inst) +{ + int ret; + + /* If the size from the instruction table is zero then read a size value. */ + if ((inst->size == 0) && + (ret = xd3_read_size (stream, + & stream->inst_sect.buf, + stream->inst_sect.buf_max, + & inst->size))) + { + return XD3_INVALID_INPUT; + } + + /* For copy instructions, read address. */ + if (inst->type >= XD3_CPY) + { + IF_DEBUG2 ({ + static int cnt = 0; + XPR(NT "DECODE:%u: COPY at %"Q"u (winoffset %"W"u) " + "size %"W"u winaddr %"W"u\n", + cnt++, + stream->total_out + (stream->dec_position - + stream->dec_cpylen), + (stream->dec_position - stream->dec_cpylen), + inst->size, + inst->addr); + }); + + if ((ret = xd3_decode_address (stream, + stream->dec_position, + inst->type - XD3_CPY, + & stream->addr_sect.buf, + stream->addr_sect.buf_max, + & inst->addr))) + { + return ret; + } + + /* Cannot copy an address before it is filled-in. */ + if (inst->addr >= stream->dec_position) + { + stream->msg = "address too large"; + return XD3_INVALID_INPUT; + } + + /* Check: a VCD_TARGET or VCD_SOURCE copy cannot exceed the remaining + * buffer space in its own segment. */ + if (inst->addr < stream->dec_cpylen && + inst->addr + inst->size > stream->dec_cpylen) + { + stream->msg = "size too large"; + return XD3_INVALID_INPUT; + } + } + else + { + IF_DEBUG2 ({ + if (inst->type == XD3_ADD) + { + static int cnt; + XPR(NT "DECODE:%d: ADD at %"Q"u (winoffset %"W"u) size %"W"u\n", + cnt++, + (stream->total_out + stream->dec_position - stream->dec_cpylen), + stream->dec_position - stream->dec_cpylen, + inst->size); + } + else + { + static int cnt; + XD3_ASSERT (inst->type == XD3_RUN); + XPR(NT "DECODE:%d: RUN at %"Q"u (winoffset %"W"u) size %"W"u\n", + cnt++, + stream->total_out + stream->dec_position - stream->dec_cpylen, + stream->dec_position - stream->dec_cpylen, + inst->size); + } + }); + } + + /* Check: The instruction will not overflow the output buffer. */ + if (stream->dec_position + inst->size > stream->dec_maxpos) + { + stream->msg = "size too large"; + return XD3_INVALID_INPUT; + } + + stream->dec_position += inst->size; + return 0; +} + +/* Decode a single opcode and then decode the two half-instructions. */ +static int +xd3_decode_instruction (xd3_stream *stream) +{ + int ret; + const xd3_dinst *inst; + + if (stream->inst_sect.buf == stream->inst_sect.buf_max) + { + stream->msg = "instruction underflow"; + return XD3_INVALID_INPUT; + } + + inst = &stream->code_table[*stream->inst_sect.buf++]; + + stream->dec_current1.type = inst->type1; + stream->dec_current2.type = inst->type2; + stream->dec_current1.size = inst->size1; + stream->dec_current2.size = inst->size2; + + /* For each instruction with a real operation, decode the + * corresponding size and addresses if necessary. Assume a + * code-table may have NOOP in either position, although this is + * unlikely. */ + if (inst->type1 != XD3_NOOP && + (ret = xd3_decode_parse_halfinst (stream, & stream->dec_current1))) + { + return ret; + } + if (inst->type2 != XD3_NOOP && + (ret = xd3_decode_parse_halfinst (stream, & stream->dec_current2))) + { + return ret; + } + return 0; +} + +/* Output the result of a single half-instruction. OPT: This the + decoder hotspot. Modifies "hinst", see below. */ +static int +xd3_decode_output_halfinst (xd3_stream *stream, xd3_hinst *inst) +{ + /* This method is reentrant for copy instructions which may return + * XD3_GETSRCBLK to the caller. Each time through a copy takes the + * minimum of inst->size and the available space on whichever block + * supplies the data */ + usize_t take = inst->size; + + if (USIZE_T_OVERFLOW (stream->avail_out, take) || + stream->avail_out + take > stream->space_out) + { + stream->msg = "overflow while decoding"; + return XD3_INVALID_INPUT; + } + + XD3_ASSERT (inst->type != XD3_NOOP); + + switch (inst->type) + { + case XD3_RUN: + { + /* Only require a single data byte. */ + if (stream->data_sect.buf == stream->data_sect.buf_max) + { + stream->msg = "data underflow"; + return XD3_INVALID_INPUT; + } + + memset (stream->next_out + stream->avail_out, + stream->data_sect.buf[0], + take); + + stream->data_sect.buf += 1; + stream->avail_out += take; + inst->type = XD3_NOOP; + break; + } + case XD3_ADD: + { + /* Require at least TAKE data bytes. */ + if (stream->data_sect.buf + take > stream->data_sect.buf_max) + { + stream->msg = "data underflow"; + return XD3_INVALID_INPUT; + } + + memcpy (stream->next_out + stream->avail_out, + stream->data_sect.buf, + take); + + stream->data_sect.buf += take; + stream->avail_out += take; + inst->type = XD3_NOOP; + break; + } + default: + { + usize_t i; + const uint8_t *src; + uint8_t *dst; + int overlap; + + /* See if it copies from the VCD_TARGET/VCD_SOURCE window or + * the target window. Out-of-bounds checks for the addresses + * and sizes are performed in xd3_decode_parse_halfinst. This + * if/else must set "overlap", "src", and "dst". */ + if (inst->addr < stream->dec_cpylen) + { + /* In both branches we are copying from outside the + * current decoder window, the first (VCD_TARGET) is + * unimplemented. */ + overlap = 0; + + /* This branch sets "src". As a side-effect, we modify + * "inst" so that if we reenter this method after a + * XD3_GETSRCBLK response the state is correct. So if the + * instruction can be fulfilled by a contiguous block of + * memory then we will set: + * + * inst->type = XD3_NOOP; + * inst->size = 0; + */ + if (stream->dec_win_ind & VCD_TARGET) + { + /* TODO: Users have requested long-distance copies of + * similar material within a target (e.g., for dup + * supression in backups). This code path is probably + * dead due to XD3_UNIMPLEMENTED in xd3_decode_setup_buffers */ + inst->size = 0; + inst->type = XD3_NOOP; + stream->msg = "VCD_TARGET not implemented"; + return XD3_UNIMPLEMENTED; + } + else + { + /* In this case we have to read a source block, which + * could return control to the caller. We need to + * know the first block number needed for this + * copy. */ + xd3_source *source = stream->src; + xoff_t block = source->cpyoff_blocks; + usize_t blkoff = source->cpyoff_blkoff; + const usize_t blksize = source->blksize; + int ret; + + xd3_blksize_add (&block, &blkoff, source, inst->addr); + XD3_ASSERT (blkoff < blksize); + + if ((ret = xd3_getblk (stream, block))) + { + /* could be a XD3_GETSRCBLK failure. */ + if (ret == XD3_TOOFARBACK) + { + stream->msg = "non-seekable source in decode"; + ret = XD3_INTERNAL; + } + return ret; + } + + src = source->curblk + blkoff; + + /* This block is either full, or a partial block that + * must contain enough bytes. */ + if ((source->onblk != blksize) && + (blkoff + take > source->onblk)) + { + IF_DEBUG1 (XPR(NT "[srcfile] short at blkno %"Q"u onblk " + "%"W"u blksize %"W"u blkoff %"W"u take %"W"u\n", + block, + source->onblk, + blksize, + blkoff, + take)); + stream->msg = "source file too short"; + return XD3_INVALID_INPUT; + } + + XD3_ASSERT (blkoff != blksize); + + /* Check if we have enough data on this block to + * finish the instruction. */ + if (blkoff + take <= blksize) + { + inst->type = XD3_NOOP; + inst->size = 0; + } + else + { + take = blksize - blkoff; + inst->size -= take; + inst->addr += take; + + /* because (blkoff + take > blksize), above */ + XD3_ASSERT (inst->size != 0); + } + } + } + else + { + /* TODO: the memcpy/overlap optimization, etc. Overlap + * here could be more specific, it's whether (inst->addr - + * srclen) + inst->size > input_pos ? And is the system + * memcpy really any good? */ + overlap = 1; + + /* For a target-window copy, we know the entire range is + * in-memory. The dec_tgtaddrbase is negatively offset by + * dec_cpylen because the addresses start beyond that + * point. */ + src = stream->dec_tgtaddrbase + inst->addr; + inst->type = XD3_NOOP; + inst->size = 0; + } + + dst = stream->next_out + stream->avail_out; + + stream->avail_out += take; + + if (overlap) + { + /* Can't just memcpy here due to possible overlap. */ + for (i = take; i != 0; i -= 1) + { + *dst++ = *src++; + } + } + else + { + memcpy (dst, src, take); + } + } + } + + return 0; +} + +static int +xd3_decode_finish_window (xd3_stream *stream) +{ + stream->dec_winbytes = 0; + stream->dec_state = DEC_FINISH; + + stream->data_sect.pos = 0; + stream->inst_sect.pos = 0; + stream->addr_sect.pos = 0; + + return XD3_OUTPUT; +} + +static int +xd3_decode_secondary_sections (xd3_stream *secondary_stream) +{ +#if SECONDARY_ANY + int ret; +#define DECODE_SECONDARY_SECTION(UPPER,LOWER) \ + ((secondary_stream->dec_del_ind & VCD_ ## UPPER ## COMP) && \ + (ret = xd3_decode_secondary (secondary_stream, \ + & secondary_stream-> LOWER ## _sect, \ + & xd3_sec_ ## LOWER (secondary_stream)))) + + if (DECODE_SECONDARY_SECTION (DATA, data) || + DECODE_SECONDARY_SECTION (INST, inst) || + DECODE_SECONDARY_SECTION (ADDR, addr)) + { + return ret; + } +#undef DECODE_SECONDARY_SECTION +#endif + return 0; +} + +static int +xd3_decode_sections (xd3_stream *stream) +{ + usize_t need, more, take; + int copy, ret; + + if ((stream->flags & XD3_JUST_HDR) != 0) + { + /* Nothing left to do. */ + return xd3_decode_finish_window (stream); + } + + /* To avoid extra copying, allocate three sections at once (but + * check for overflow). */ + need = stream->inst_sect.size; + + if (USIZE_T_OVERFLOW (need, stream->addr_sect.size)) + { + stream->msg = "decoder section size overflow"; + return XD3_INTERNAL; + } + need += stream->addr_sect.size; + + if (USIZE_T_OVERFLOW (need, stream->data_sect.size)) + { + stream->msg = "decoder section size overflow"; + return XD3_INTERNAL; + } + need += stream->data_sect.size; + + /* The window may be entirely processed. */ + XD3_ASSERT (stream->dec_winbytes <= need); + + /* Compute how much more input is needed. */ + more = (need - stream->dec_winbytes); + + /* How much to consume. */ + take = xd3_min (more, stream->avail_in); + + /* See if the input is completely available, to avoid copy. */ + copy = (take != more); + + /* If the window is skipped... */ + if ((stream->flags & XD3_SKIP_WINDOW) != 0) + { + /* Skip the available input. */ + DECODE_INPUT (take); + + stream->dec_winbytes += take; + + if (copy) + { + stream->msg = "further input required"; + return XD3_INPUT; + } + + return xd3_decode_finish_window (stream); + } + + /* Process all but the DATA section. */ + switch (stream->dec_state) + { + default: + stream->msg = "internal error"; + return XD3_INVALID_INPUT; + + case DEC_DATA: + if ((ret = xd3_decode_section (stream, & stream->data_sect, + DEC_INST, copy))) { return ret; } + case DEC_INST: + if ((ret = xd3_decode_section (stream, & stream->inst_sect, + DEC_ADDR, copy))) { return ret; } + case DEC_ADDR: + if ((ret = xd3_decode_section (stream, & stream->addr_sect, + DEC_EMIT, copy))) { return ret; } + } + + XD3_ASSERT (stream->dec_winbytes == need); + + if ((ret = xd3_decode_secondary_sections (stream))) { return ret; } + + if (stream->flags & XD3_SKIP_EMIT) + { + return xd3_decode_finish_window (stream); + } + + /* OPT: A possible optimization is to avoid allocating memory in + * decode_setup_buffers and to avoid a large memcpy when the window + * consists of a single VCD_SOURCE copy instruction. */ + if ((ret = xd3_decode_setup_buffers (stream))) { return ret; } + + return 0; +} + +static int +xd3_decode_emit (xd3_stream *stream) +{ + int ret; + + /* Produce output: originally structured to allow reentrant code + * that fills as much of the output buffer as possible, but VCDIFF + * semantics allows to copy from anywhere from the target window, so + * instead allocate a sufficiently sized buffer after the target + * window length is decoded. + * + * This code still needs to be reentrant to allow XD3_GETSRCBLK to + * return control. This is handled by setting the + * stream->dec_currentN instruction types to XD3_NOOP after they + * have been processed. */ + XD3_ASSERT (! (stream->flags & XD3_SKIP_EMIT)); + XD3_ASSERT (stream->dec_tgtlen <= stream->space_out); + + while (stream->inst_sect.buf != stream->inst_sect.buf_max || + stream->dec_current1.type != XD3_NOOP || + stream->dec_current2.type != XD3_NOOP) + { + /* Decode next instruction pair. */ + if ((stream->dec_current1.type == XD3_NOOP) && + (stream->dec_current2.type == XD3_NOOP) && + (ret = xd3_decode_instruction (stream))) { return ret; } + + /* Output dec_current1 */ + while ((stream->dec_current1.type != XD3_NOOP)) + { + if ((ret = xd3_decode_output_halfinst (stream, & stream->dec_current1))) + { + return ret; + } + } + /* Output dec_current2 */ + while (stream->dec_current2.type != XD3_NOOP) + { + if ((ret = xd3_decode_output_halfinst (stream, & stream->dec_current2))) + { + return ret; + } + } + } + + if (stream->avail_out != stream->dec_tgtlen) + { + IF_DEBUG2 (DP(RINT "AVAIL_OUT(%"W"u) != DEC_TGTLEN(%"W"u)\n", + stream->avail_out, stream->dec_tgtlen)); + stream->msg = "wrong window length"; + return XD3_INVALID_INPUT; + } + + if (stream->data_sect.buf != stream->data_sect.buf_max) + { + stream->msg = "extra data section"; + return XD3_INVALID_INPUT; + } + + if (stream->addr_sect.buf != stream->addr_sect.buf_max) + { + stream->msg = "extra address section"; + return XD3_INVALID_INPUT; + } + + /* OPT: Should cksum computation be combined with the above loop? */ + if ((stream->dec_win_ind & VCD_ADLER32) != 0 && + (stream->flags & XD3_ADLER32_NOVER) == 0) + { + uint32_t a32 = adler32 (1L, stream->next_out, stream->avail_out); + + if (a32 != stream->dec_adler32) + { + stream->msg = "target window checksum mismatch"; + return XD3_INVALID_INPUT; + } + } + + /* Finished with a window. */ + return xd3_decode_finish_window (stream); +} + +int +xd3_decode_input (xd3_stream *stream) +{ + int ret; + + if (stream->enc_state != 0) + { + stream->msg = "encoder/decoder transition"; + return XD3_INVALID_INPUT; + } + +#define BYTE_CASE(expr,x,nstate) \ + do { \ + if ( (expr) && \ + ((ret = xd3_decode_byte (stream, & (x))) != 0) ) { return ret; } \ + stream->dec_state = (nstate); \ + } while (0) + +#define OFFSET_CASE(expr,x,nstate) \ + do { \ + if ( (expr) && \ + ((ret = xd3_decode_offset (stream, & (x))) != 0) ) { return ret; } \ + stream->dec_state = (nstate); \ + } while (0) + +#define SIZE_CASE(expr,x,nstate) \ + do { \ + if ( (expr) && \ + ((ret = xd3_decode_size (stream, & (x))) != 0) ) { return ret; } \ + stream->dec_state = (nstate); \ + } while (0) + + switch (stream->dec_state) + { + case DEC_VCHEAD: + { + if ((ret = xd3_decode_bytes (stream, stream->dec_magic, + & stream->dec_magicbytes, 4))) + { + return ret; + } + + if (stream->dec_magic[0] != VCDIFF_MAGIC1 || + stream->dec_magic[1] != VCDIFF_MAGIC2 || + stream->dec_magic[2] != VCDIFF_MAGIC3) + { + stream->msg = "not a VCDIFF input"; + return XD3_INVALID_INPUT; + } + + if (stream->dec_magic[3] != 0) + { + stream->msg = "VCDIFF input version > 0 is not supported"; + return XD3_INVALID_INPUT; + } + + stream->dec_state = DEC_HDRIND; + } + case DEC_HDRIND: + { + if ((ret = xd3_decode_byte (stream, & stream->dec_hdr_ind))) + { + return ret; + } + + if ((stream->dec_hdr_ind & VCD_INVHDR) != 0) + { + stream->msg = "unrecognized header indicator bits set"; + return XD3_INVALID_INPUT; + } + + stream->dec_state = DEC_SECONDID; + } + + case DEC_SECONDID: + /* Secondary compressor ID: only if VCD_SECONDARY is set */ + if ((stream->dec_hdr_ind & VCD_SECONDARY) != 0) + { + BYTE_CASE (1, stream->dec_secondid, DEC_TABLEN); + + switch (stream->dec_secondid) + { + case VCD_FGK_ID: + FGK_CASE (stream); + case VCD_DJW_ID: + DJW_CASE (stream); + case VCD_LZMA_ID: + LZMA_CASE (stream); + default: + stream->msg = "unknown secondary compressor ID"; + return XD3_INVALID_INPUT; + } + } + + case DEC_TABLEN: + /* Length of code table data: only if VCD_CODETABLE is set */ + SIZE_CASE ((stream->dec_hdr_ind & VCD_CODETABLE) != 0, + stream->dec_codetblsz, DEC_NEAR); + + /* The codetblsz counts the two NEAR/SAME bytes */ + if ((stream->dec_hdr_ind & VCD_CODETABLE) != 0) { + if (stream->dec_codetblsz <= 2) { + stream->msg = "invalid code table size"; + return ENOMEM; + } + stream->dec_codetblsz -= 2; + } + case DEC_NEAR: + /* Near modes: only if VCD_CODETABLE is set */ + BYTE_CASE((stream->dec_hdr_ind & VCD_CODETABLE) != 0, + stream->acache.s_near, DEC_SAME); + case DEC_SAME: + /* Same modes: only if VCD_CODETABLE is set */ + BYTE_CASE((stream->dec_hdr_ind & VCD_CODETABLE) != 0, + stream->acache.s_same, DEC_TABDAT); + case DEC_TABDAT: + /* Compressed code table data */ + + if ((stream->dec_hdr_ind & VCD_CODETABLE) != 0) + { + stream->msg = "VCD_CODETABLE support was removed"; + return XD3_UNIMPLEMENTED; + } + else + { + /* Use the default table. */ + stream->acache.s_near = __rfc3284_code_table_desc.near_modes; + stream->acache.s_same = __rfc3284_code_table_desc.same_modes; + stream->code_table = xd3_rfc3284_code_table (); + } + + if ((ret = xd3_alloc_cache (stream))) { return ret; } + + stream->dec_state = DEC_APPLEN; + + case DEC_APPLEN: + /* Length of application data */ + SIZE_CASE((stream->dec_hdr_ind & VCD_APPHEADER) != 0, + stream->dec_appheadsz, DEC_APPDAT); + + case DEC_APPDAT: + /* Application data */ + if (stream->dec_hdr_ind & VCD_APPHEADER) + { + /* Note: we add an additional byte for padding, to allow + 0-termination. Check for overflow: */ + if (USIZE_T_OVERFLOW(stream->dec_appheadsz, 1)) + { + stream->msg = "exceptional appheader size"; + return XD3_INVALID_INPUT; + } + + if ((stream->dec_appheader == NULL) && + (stream->dec_appheader = + (uint8_t*) xd3_alloc (stream, + stream->dec_appheadsz+1, 1)) == NULL) + { + return ENOMEM; + } + + stream->dec_appheader[stream->dec_appheadsz] = 0; + + if ((ret = xd3_decode_bytes (stream, stream->dec_appheader, + & stream->dec_appheadbytes, + stream->dec_appheadsz))) + { + return ret; + } + } + + /* xoff_t -> usize_t is safe because this is the first block. */ + stream->dec_hdrsize = (usize_t) stream->total_in; + stream->dec_state = DEC_WININD; + + case DEC_WININD: + { + /* Start of a window: the window indicator */ + if ((ret = xd3_decode_byte (stream, & stream->dec_win_ind))) + { + return ret; + } + + stream->current_window = stream->dec_window_count; + + if (XOFF_T_OVERFLOW (stream->dec_winstart, stream->dec_tgtlen)) + { + stream->msg = "decoder file offset overflow"; + return XD3_INVALID_INPUT; + } + + stream->dec_winstart += stream->dec_tgtlen; + + if ((stream->dec_win_ind & VCD_INVWIN) != 0) + { + stream->msg = "unrecognized window indicator bits set"; + return XD3_INVALID_INPUT; + } + + if ((ret = xd3_decode_init_window (stream))) { return ret; } + + stream->dec_state = DEC_CPYLEN; + + IF_DEBUG2 (DP(RINT "--------- TARGET WINDOW %"Q"u -----------\n", + stream->current_window)); + } + + case DEC_CPYLEN: + /* Copy window length: only if VCD_SOURCE or VCD_TARGET is set */ + SIZE_CASE(SRCORTGT (stream->dec_win_ind), stream->dec_cpylen, + DEC_CPYOFF); + + /* Set the initial, logical decoder position (HERE address) in + * dec_position. This is set to just after the source/copy + * window, as we are just about to output the first byte of + * target window. */ + stream->dec_position = stream->dec_cpylen; + + case DEC_CPYOFF: + /* Copy window offset: only if VCD_SOURCE or VCD_TARGET is set */ + OFFSET_CASE(SRCORTGT (stream->dec_win_ind), stream->dec_cpyoff, + DEC_ENCLEN); + + /* Copy offset and copy length may not overflow. */ + if (XOFF_T_OVERFLOW (stream->dec_cpyoff, stream->dec_cpylen)) + { + stream->msg = "decoder copy window overflows a file offset"; + return XD3_INVALID_INPUT; + } + + /* Check copy window bounds: VCD_TARGET window may not exceed + current position. */ + if ((stream->dec_win_ind & VCD_TARGET) && + (stream->dec_cpyoff + stream->dec_cpylen > + stream->dec_winstart)) + { + stream->msg = "VCD_TARGET window out of bounds"; + return XD3_INVALID_INPUT; + } + + case DEC_ENCLEN: + /* Length of the delta encoding */ + SIZE_CASE(1, stream->dec_enclen, DEC_TGTLEN); + case DEC_TGTLEN: + /* Length of target window */ + SIZE_CASE(1, stream->dec_tgtlen, DEC_DELIND); + + /* Set the maximum decoder position, beyond which we should not + * decode any data. This is the maximum value for dec_position. + * This may not exceed the size of a usize_t. */ + if (USIZE_T_OVERFLOW (stream->dec_cpylen, stream->dec_tgtlen)) + { + stream->msg = "decoder target window overflows a usize_t"; + return XD3_INVALID_INPUT; + } + + /* Check for malicious files. */ + if (stream->dec_tgtlen > XD3_HARDMAXWINSIZE) + { + stream->msg = "hard window size exceeded"; + return XD3_INVALID_INPUT; + } + + stream->dec_maxpos = stream->dec_cpylen + stream->dec_tgtlen; + + case DEC_DELIND: + /* Delta indicator */ + BYTE_CASE(1, stream->dec_del_ind, DEC_DATALEN); + + if ((stream->dec_del_ind & VCD_INVDEL) != 0) + { + stream->msg = "unrecognized delta indicator bits set"; + return XD3_INVALID_INPUT; + } + + /* Delta indicator is only used with secondary compression. */ + if ((stream->dec_del_ind != 0) && (stream->sec_type == NULL)) + { + stream->msg = "invalid delta indicator bits set"; + return XD3_INVALID_INPUT; + } + + /* Section lengths */ + case DEC_DATALEN: + SIZE_CASE(1, stream->data_sect.size, DEC_INSTLEN); + case DEC_INSTLEN: + SIZE_CASE(1, stream->inst_sect.size, DEC_ADDRLEN); + case DEC_ADDRLEN: + SIZE_CASE(1, stream->addr_sect.size, DEC_CKSUM); + + case DEC_CKSUM: + /* Window checksum. */ + if ((stream->dec_win_ind & VCD_ADLER32) != 0) + { + int i; + + if ((ret = xd3_decode_bytes (stream, stream->dec_cksum, + & stream->dec_cksumbytes, 4))) + { + return ret; + } + + for (i = 0; i < 4; i += 1) + { + stream->dec_adler32 = + (stream->dec_adler32 << 8) | stream->dec_cksum[i]; + } + } + + stream->dec_state = DEC_DATA; + + /* Check dec_enclen for redundency, otherwise it is not really used. */ + { + usize_t enclen_check = + (1 + (xd3_sizeof_size (stream->dec_tgtlen) + + xd3_sizeof_size (stream->data_sect.size) + + xd3_sizeof_size (stream->inst_sect.size) + + xd3_sizeof_size (stream->addr_sect.size)) + + stream->data_sect.size + + stream->inst_sect.size + + stream->addr_sect.size + + ((stream->dec_win_ind & VCD_ADLER32) ? 4 : 0)); + + if (stream->dec_enclen != enclen_check) + { + stream->msg = "incorrect encoding length (redundent)"; + return XD3_INVALID_INPUT; + } + } + + /* Returning here gives the application a chance to inspect the + * header, skip the window, etc. */ + if (stream->current_window == 0) { return XD3_GOTHEADER; } + else { return XD3_WINSTART; } + + case DEC_DATA: + case DEC_INST: + case DEC_ADDR: + /* Next read the three sections. */ + if ((ret = xd3_decode_sections (stream))) { return ret; } + + case DEC_EMIT: + + /* To speed VCD_SOURCE block-address calculations, the source + * cpyoff_blocks and cpyoff_blkoff are pre-computed. */ + if (stream->dec_win_ind & VCD_SOURCE) + { + xd3_source *src = stream->src; + + if (src == NULL) + { + stream->msg = "source input required"; + return XD3_INVALID_INPUT; + } + + xd3_blksize_div(stream->dec_cpyoff, src, + &src->cpyoff_blocks, + &src->cpyoff_blkoff); + + IF_DEBUG2(DP(RINT + "[decode_cpyoff] %"Q"u " + "cpyblkno %"Q"u " + "cpyblkoff %"W"u " + "blksize %"W"u\n", + stream->dec_cpyoff, + src->cpyoff_blocks, + src->cpyoff_blkoff, + src->blksize)); + } + + /* xd3_decode_emit returns XD3_OUTPUT on every success. */ + if ((ret = xd3_decode_emit (stream)) == XD3_OUTPUT) + { + stream->total_out += stream->avail_out; + } + + return ret; + + case DEC_FINISH: + { + if (stream->dec_win_ind & VCD_TARGET) + { + if (stream->dec_lastwin == NULL) + { + stream->dec_lastwin = stream->next_out; + stream->dec_lastspace = stream->space_out; + } + else + { + xd3_swap_uint8p (& stream->dec_lastwin, + & stream->next_out); + xd3_swap_usize_t (& stream->dec_lastspace, + & stream->space_out); + } + } + + stream->dec_lastlen = stream->dec_tgtlen; + stream->dec_laststart = stream->dec_winstart; + stream->dec_window_count += 1; + + /* Note: the updates to dec_winstart & current_window are + * deferred until after the next DEC_WININD byte is read. */ + stream->dec_state = DEC_WININD; + return XD3_WINFINISH; + } + + default: + stream->msg = "invalid state"; + return XD3_INVALID_INPUT; + } +} + +#endif /* _XDELTA3_DECODE_H_*/ \ No newline at end of file diff --git a/deps/xdelta3/xdelta3-djw.h b/deps/xdelta3/xdelta3-djw.h new file mode 100644 index 0000000000..a5750d15fc --- /dev/null +++ b/deps/xdelta3/xdelta3-djw.h @@ -0,0 +1,1838 @@ +/* xdelta3 - delta compression tools and library + Copyright 2016 Joshua MacDonald + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef _XDELTA3_DJW_H_ +#define _XDELTA3_DJW_H_ + +/* To include RetroArch's INLINE macro */ +#include "retro_inline.h" + +/* The following people deserve much credit for the algorithms and + * techniques contained in this file: + + Julian Seward + Bzip2 sources, implementation of the multi-table Huffman technique. + + Jean-loup Gailly and Mark Adler and L. Peter Deutsch + Zlib source code, RFC 1951 + + Daniel S. Hirschberg and Debra A. LeLewer + "Efficient Decoding of Prefix Codes" + Communications of the ACM, April 1990 33(4). + + David J. Wheeler + Program bred3.c, bexp3 and accompanying documents bred3.ps, huff.ps. + This contains the idea behind the multi-table Huffman and 1-2 coding + techniques. + ftp://ftp.cl.cam.ac.uk/users/djw3/ + +*/ + +/* OPT: during the multi-table iteration, pick the worst-overall + * performing table and replace it with exactly the frequencies of the + * worst-overall performing sector or N-worst performing sectors. */ + +/* REF: See xdfs-0.222 and xdfs-0.226 for some old experiments with + * the Bzip prefix coding strategy. xdfs-0.256 contains the last of + * the other-format tests, including RFC1950 and the RFC1950+MTF + * tests. */ + +#define DJW_MAX_CODELEN 20U /* Maximum length of an alphabet code. */ + +/* Code lengths are themselves code-length encoded, so the total number of + * codes is: [RUN_0, RUN_1, 1-DJW_MAX_CODELEN] */ +#define DJW_TOTAL_CODES (DJW_MAX_CODELEN+2) + +#define RUN_0 0U /* Symbols used in MTF+1/2 coding. */ +#define RUN_1 1U + +/* Number of code lengths always encoded (djw_encode_basic array) */ +#define DJW_BASIC_CODES 5U +#define DJW_RUN_CODES 2U /* Number of run codes */ + +/* Offset of extra codes */ +#define DJW_EXTRA_12OFFSET (DJW_BASIC_CODES + DJW_RUN_CODES) + +/* Number of optionally encoded code lengths (djw_encode_extra array) */ +#define DJW_EXTRA_CODES 15U + +/* Number of bits to code [0-DJW_EXTRA_CODES] */ +#define DJW_EXTRA_CODE_BITS 4U + +#define DJW_MAX_GROUPS 8U /* Max number of group coding tables */ +#define DJW_GROUP_BITS 3U /* Number of bits to code [1-DJW_MAX_GROUPS] */ + +#define DJW_SECTORSZ_MULT 5U /* Multiplier for encoded sectorsz */ +#define DJW_SECTORSZ_BITS 5U /* Number of bits to code group size */ +#define DJW_SECTORSZ_MAX ((1U << DJW_SECTORSZ_BITS) * DJW_SECTORSZ_MULT) + +/* Maximum number of iterations to find group tables. */ +#define DJW_MAX_ITER 6U +/* Minimum number of bits an iteration must reduce coding by. */ +#define DJW_MIN_IMPROVEMENT 20U + +/* Maximum code length of a prefix code length */ +#define DJW_MAX_CLCLEN 15U + +/* Number of bits to code [0-DJW_MAX_CLCLEN] */ +#define DJW_CLCLEN_BITS 4U + +#define DJW_MAX_GBCLEN 7U /* Maximum code length of a group selector */ + +/* Number of bits to code [0-DJW_MAX_GBCLEN] + * TODO: Actually, should never have zero code lengths here, or else a group + * went unused. Write a test for this: if a group goes unused, eliminate + * it? */ +#define DJW_GBCLEN_BITS 3U + +/* It has to save at least this many bits... */ +#define EFFICIENCY_BITS 16U + +typedef struct _djw_stream djw_stream; +typedef struct _djw_heapen djw_heapen; +typedef struct _djw_prefix djw_prefix; +typedef uint32_t djw_weight; + +struct _djw_heapen +{ + uint32_t depth; + uint32_t freq; + uint32_t parent; +}; + +struct _djw_prefix +{ + usize_t scount; + uint8_t *symbol; + usize_t mcount; + uint8_t *mtfsym; + uint8_t *repcnt; +}; + +struct _djw_stream +{ + int unused; +}; + +/* Each Huffman table consists of 256 "code length" (CLEN) codes, + * which are themselves Huffman coded after eliminating repeats and + * move-to-front coding. The prefix consists of all the CLEN codes in + * djw_encode_basic plus a 4-bit value stating how many of the + * djw_encode_extra codes are actually coded (the rest are presumed + * zero, or unused CLEN codes). + * + * These values of these two arrays were arrived at by studying the + * distribution of min and max clen over a collection of DATA, INST, + * and ADDR inputs. The goal is to specify the order of + * djw_extra_codes that is most likely to minimize the number of extra + * codes that must be encoded. + * + * Results: 158896 sections were counted by compressing files (window + * size 512K) listed with: `find / -type f ( -user jmacd -o -perm +444 + * )` + * + * The distribution of CLEN codes for each efficient invocation of the + * secondary compressor (taking the best number of groups/sector size) + * was recorded. Then we look at the distribution of min and max clen + * values, counting the number of times the value C_low is less than + * the min and C_high is greater than the max. Values >= C_high and + * <= C_low will not have their lengths coded. The results are sorted + * and the least likely 15 are placed into the djw_encode_extra[] + * array in order. These values are used as the initial MTF ordering. + + clow[1] = 155119 + clow[2] = 140325 + clow[3] = 84072 + --- + clow[4] = 7225 + clow[5] = 1093 + clow[6] = 215 + --- + chigh[4] = 1 + chigh[5] = 30 + chigh[6] = 218 + chigh[7] = 2060 + chigh[8] = 13271 + --- + chigh[9] = 39463 + chigh[10] = 77360 + chigh[11] = 118298 + chigh[12] = 141360 + chigh[13] = 154086 + chigh[14] = 157967 + chigh[15] = 158603 + chigh[16] = 158864 + chigh[17] = 158893 + chigh[18] = 158895 + chigh[19] = 158896 + chigh[20] = 158896 + +*/ + +static const uint8_t djw_encode_12extra[DJW_EXTRA_CODES] = + { + 9, 10, 3, 11, 2, 12, 13, 1, 14, 15, 16, 17, 18, 19, 20, + }; + +static const uint8_t djw_encode_12basic[DJW_BASIC_CODES] = + { + 4, 5, 6, 7, 8, + }; + +/*********************************************************************/ +/* DECLS */ +/*********************************************************************/ + +static djw_stream* djw_alloc (xd3_stream *stream); +static int djw_init (xd3_stream *stream, + djw_stream *h, + int is_encode); +static void djw_destroy (xd3_stream *stream, + djw_stream *h); + +#if XD3_ENCODER +static int xd3_encode_huff (xd3_stream *stream, + djw_stream *sec_stream, + xd3_output *input, + xd3_output *output, + xd3_sec_cfg *cfg); +#endif + +static int xd3_decode_huff (xd3_stream *stream, + djw_stream *sec_stream, + const uint8_t **input, + const uint8_t *const input_end, + uint8_t **output, + const uint8_t *const output_end); + +/*********************************************************************/ +/* HUFFMAN */ +/*********************************************************************/ + +static djw_stream* +djw_alloc (xd3_stream *stream) +{ + return (djw_stream*) xd3_alloc (stream, sizeof (djw_stream), 1); +} + +static int +djw_init (xd3_stream *stream, djw_stream *h, int is_encode) +{ + /* Fields are initialized prior to use. */ + return 0; +} + +static void +djw_destroy (xd3_stream *stream, + djw_stream *h) +{ + xd3_free (stream, h); +} + + +/*********************************************************************/ +/* HEAP */ +/*********************************************************************/ + +static INLINE int +heap_less (const djw_heapen *a, const djw_heapen *b) +{ + return a->freq < b->freq || + (a->freq == b->freq && + a->depth < b->depth); +} + +static INLINE void +heap_insert (usize_t *heap, const djw_heapen *ents, usize_t p, const usize_t e) +{ + /* Insert ents[e] into next slot heap[p] */ + usize_t pp = p/2; /* P's parent */ + + while (heap_less (& ents[e], & ents[heap[pp]])) + { + heap[p] = heap[pp]; + p = pp; + pp = p/2; + } + + heap[p] = e; +} + +static INLINE djw_heapen* +heap_extract (usize_t *heap, const djw_heapen *ents, usize_t heap_last) +{ + usize_t smallest = heap[1]; + usize_t p, pc, t; + + /* Caller decrements heap_last, so heap_last+1 is the replacement elt. */ + heap[1] = heap[heap_last+1]; + + /* Re-heapify */ + for (p = 1; ; p = pc) + { + pc = p*2; + + /* Reached bottom of heap */ + if (pc > heap_last) { break; } + + /* See if second child is smaller. */ + if (pc < heap_last && heap_less (& ents[heap[pc+1]], & ents[heap[pc]])) + { + pc += 1; + } + + /* If pc is not smaller than p, heap property re-established. */ + if (! heap_less (& ents[heap[pc]], & ents[heap[p]])) { break; } + + t = heap[pc]; + heap[pc] = heap[p]; + heap[p] = t; + } + + return (djw_heapen*) & ents[smallest]; +} + +#if XD3_DEBUG +static void +heap_check (usize_t *heap, djw_heapen *ents, usize_t heap_last) +{ + usize_t i; + for (i = 1; i <= heap_last; i += 1) + { + /* Heap property: child not less than parent */ + XD3_ASSERT (! heap_less (& ents[heap[i]], & ents[heap[i/2]])); + + IF_DEBUG2 (DP(RINT "heap[%"W"u] = %u\n", i, ents[heap[i]].freq)); + } +} +#endif + +/*********************************************************************/ +/* MTF, 1/2 */ +/*********************************************************************/ + +static INLINE usize_t +djw_update_mtf (uint8_t *mtf, usize_t mtf_i) +{ + int k; + usize_t sym = mtf[mtf_i]; + + for (k = mtf_i; k != 0; k -= 1) { mtf[k] = mtf[k-1]; } + + mtf[0] = sym; + return sym; +} + +static INLINE void +djw_update_1_2 (int *mtf_run, usize_t *mtf_i, + uint8_t *mtfsym, djw_weight *freq) +{ + uint8_t code; + + do + { + /* Offset by 1, since any number of RUN_ symbols implies run>0... */ + *mtf_run -= 1; + + code = (*mtf_run & 1) ? RUN_1 : RUN_0; + + mtfsym[(*mtf_i)++] = code; + freq[code] += 1; + *mtf_run >>= 1; + } + while (*mtf_run >= 1); + + *mtf_run = 0; +} + +static void +djw_init_clen_mtf_1_2 (uint8_t *clmtf) +{ + usize_t i, cl_i = 0; + + clmtf[cl_i++] = 0; + for (i = 0; i < DJW_BASIC_CODES; i += 1) + { + clmtf[cl_i++] = djw_encode_12basic[i]; + } + for (i = 0; i < DJW_EXTRA_CODES; i += 1) + { + clmtf[cl_i++] = djw_encode_12extra[i]; + } +} + +/*********************************************************************/ +/* PREFIX CODES */ +/*********************************************************************/ +#if XD3_ENCODER +static usize_t +djw_build_prefix (const djw_weight *freq, uint8_t *clen, usize_t asize, usize_t maxlen) +{ + /* Heap with 0th entry unused, prefix tree with up to ALPHABET_SIZE-1 + * internal nodes, never more than ALPHABET_SIZE entries actually in the + * heap (minimum weight subtrees during prefix construction). First + * ALPHABET_SIZE entries are the actual symbols, next ALPHABET_SIZE-1 are + * internal nodes. */ + djw_heapen ents[ALPHABET_SIZE * 2]; + usize_t heap[ALPHABET_SIZE + 1]; + + usize_t heap_last; /* Index of the last _valid_ heap entry. */ + usize_t ents_size; /* Number of entries, including 0th fake entry */ + usize_t overflow; /* Number of code lengths that overflow */ + usize_t total_bits; + usize_t i; + + IF_DEBUG (usize_t first_bits = 0); + + /* Insert real symbol frequences. */ + for (i = 0; i < asize; i += 1) + { + ents[i+1].freq = freq[i]; + IF_DEBUG2 (DP(RINT "ents[%"W"i] = freq[%"W"u] = %d\n", + i+1, i, freq[i])); + } + + again: + + /* The loop is re-entered each time an overflow occurs. Re-initialize... */ + heap_last = 0; + ents_size = 1; + overflow = 0; + total_bits = 0; + + /* 0th entry terminates the while loop in heap_insert (it's the parent of + * the smallest element, always less-than) */ + heap[0] = 0; + ents[0].depth = 0; + ents[0].freq = 0; + + /* Initial heap. */ + for (i = 0; i < asize; i += 1, ents_size += 1) + { + ents[ents_size].depth = 0; + ents[ents_size].parent = 0; + + if (ents[ents_size].freq != 0) + { + heap_insert (heap, ents, ++heap_last, ents_size); + } + } + + IF_DEBUG (heap_check (heap, ents, heap_last)); + + /* Must be at least one symbol, or else we can't get here. */ + XD3_ASSERT (heap_last != 0); + + /* If there is only one symbol, fake a second to prevent zero-length + * codes. */ + if (heap_last == 1) + { + /* Pick either the first or last symbol. */ + usize_t s = freq[0] ? asize-1 : 0; + ents[s+1].freq = 1; + goto again; + } + + /* Build prefix tree. */ + while (heap_last > 1) + { + djw_heapen *h1 = heap_extract (heap, ents, --heap_last); + djw_heapen *h2 = heap_extract (heap, ents, --heap_last); + + ents[ents_size].freq = h1->freq + h2->freq; + ents[ents_size].depth = 1 + xd3_max (h1->depth, h2->depth); + ents[ents_size].parent = 0; + + h1->parent = h2->parent = ents_size; + + heap_insert (heap, ents, ++heap_last, ents_size++); + } + + IF_DEBUG (heap_check (heap, ents, heap_last)); + + /* Now compute prefix code lengths, counting parents. */ + for (i = 1; i < asize+1; i += 1) + { + usize_t b = 0; + + if (ents[i].freq != 0) + { + usize_t p = i; + + while ((p = ents[p].parent) != 0) { b += 1; } + + if (b > maxlen) { overflow = 1; } + + total_bits += b * freq[i-1]; + } + + /* clen is 0-origin, unlike ents. */ + IF_DEBUG2 (DP(RINT "clen[%"W"u] = %"W"u\n", i-1, b)); + clen[i-1] = b; + } + + IF_DEBUG (if (first_bits == 0) first_bits = total_bits); + + if (! overflow) + { + IF_DEBUG2 (if (first_bits != total_bits) + { + DP(RINT "code length overflow changed %"W"u bits\n", + total_bits - first_bits); + }); + return total_bits; + } + + /* OPT: There is a non-looping way to fix overflow shown in zlib, but this + * is easier (for now), as done in bzip2. */ + for (i = 1; i < asize+1; i += 1) + { + ents[i].freq = ents[i].freq / 2 + 1; + } + + goto again; +} + +static void +djw_build_codes (usize_t *codes, const uint8_t *clen, usize_t asize, usize_t abs_max) +{ + usize_t i, l; + usize_t min_clen = DJW_MAX_CODELEN; + usize_t max_clen = 0; + usize_t code = 0; + + /* Find the min and max code length */ + for (i = 0; i < asize; i += 1) + { + if (clen[i] > 0 && clen[i] < min_clen) + { + min_clen = clen[i]; + } + + max_clen = xd3_max (max_clen, (usize_t) clen[i]); + } + + XD3_ASSERT (max_clen <= abs_max); + + /* Generate a code for each symbol with the appropriate length. */ + for (l = min_clen; l <= max_clen; l += 1) + { + for (i = 0; i < asize; i += 1) + { + if (clen[i] == l) + { + codes[i] = code++; + } + } + + code <<= 1; + } + + IF_DEBUG2 ({ + for (i = 0; i < asize; i += 1) + { + DP(RINT "code[%"W"u] = %"W"u\n", i, codes[i]); + } + }); +} + +/*********************************************************************/ +/* MOVE-TO-FRONT */ +/*********************************************************************/ +static void +djw_compute_mtf_1_2 (djw_prefix *prefix, + uint8_t *mtf, + djw_weight *freq_out, + usize_t nsym) +{ + size_t i, j, k; + usize_t sym; + usize_t size = prefix->scount; + usize_t mtf_i = 0; + int mtf_run = 0; + + /* This +2 is for the RUN_0, RUN_1 codes */ + memset (freq_out, 0, sizeof (freq_out[0]) * (nsym+2)); + + for (i = 0; i < size; ) + { + /* OPT: Bzip optimizes this algorithm a little by effectively checking + * j==0 before the MTF update. */ + sym = prefix->symbol[i++]; + + for (j = 0; mtf[j] != sym; j += 1) { } + + XD3_ASSERT (j <= nsym); + + for (k = j; k >= 1; k -= 1) { mtf[k] = mtf[k-1]; } + + mtf[0] = sym; + + if (j == 0) + { + mtf_run += 1; + continue; + } + + if (mtf_run > 0) + { + djw_update_1_2 (& mtf_run, & mtf_i, prefix->mtfsym, freq_out); + } + + /* Non-zero symbols are offset by RUN_1 */ + prefix->mtfsym[mtf_i++] = (uint8_t)(j+RUN_1); + freq_out[j+RUN_1] += 1; + } + + if (mtf_run > 0) + { + djw_update_1_2 (& mtf_run, & mtf_i, prefix->mtfsym, freq_out); + } + + prefix->mcount = mtf_i; +} + +/* Counts character frequencies of the input buffer, returns the size. */ +static usize_t +djw_count_freqs (djw_weight *freq, xd3_output *input) +{ + xd3_output *in; + usize_t size = 0; + + memset (freq, 0, sizeof (freq[0]) * ALPHABET_SIZE); + + for (in = input; in; in = in->next_page) + { + const uint8_t *p = in->base; + const uint8_t *p_max = p + in->next; + + size += in->next; + + do + { + ++freq[*p]; + } + while (++p < p_max); + } + + IF_DEBUG2 ({int i; + DP(RINT "freqs: "); + for (i = 0; i < ALPHABET_SIZE; i += 1) + { + DP(RINT "%u ", freq[i]); + } + DP(RINT "\n");}); + + return size; +} + +static void +djw_compute_multi_prefix (usize_t groups, + uint8_t clen[DJW_MAX_GROUPS][ALPHABET_SIZE], + djw_prefix *prefix) +{ + usize_t gp, i; + + prefix->scount = ALPHABET_SIZE; + memcpy (prefix->symbol, clen[0], ALPHABET_SIZE); + + for (gp = 1; gp < groups; gp += 1) + { + for (i = 0; i < ALPHABET_SIZE; i += 1) + { + if (clen[gp][i] == 0) + { + continue; + } + + prefix->symbol[prefix->scount++] = clen[gp][i]; + } + } +} + +static void +djw_compute_prefix_1_2 (djw_prefix *prefix, djw_weight *freq) +{ + /* This +1 is for the 0 code-length. */ + uint8_t clmtf[DJW_MAX_CODELEN+1]; + + djw_init_clen_mtf_1_2 (clmtf); + + djw_compute_mtf_1_2 (prefix, clmtf, freq, DJW_MAX_CODELEN); +} + +static int +djw_encode_prefix (xd3_stream *stream, + xd3_output **output, + bit_state *bstate, + djw_prefix *prefix) +{ + int ret; + size_t i; + usize_t num_to_encode; + djw_weight clfreq[DJW_TOTAL_CODES]; + uint8_t clclen[DJW_TOTAL_CODES]; + usize_t clcode[DJW_TOTAL_CODES]; + + /* Move-to-front encode prefix symbols, count frequencies */ + djw_compute_prefix_1_2 (prefix, clfreq); + + /* Compute codes */ + djw_build_prefix (clfreq, clclen, DJW_TOTAL_CODES, DJW_MAX_CLCLEN); + djw_build_codes (clcode, clclen, DJW_TOTAL_CODES, DJW_MAX_CLCLEN); + + /* Compute number of extra codes beyond basic ones for this template. */ + num_to_encode = DJW_TOTAL_CODES; + while (num_to_encode > DJW_EXTRA_12OFFSET && clclen[num_to_encode-1] == 0) + { + num_to_encode -= 1; + } + XD3_ASSERT (num_to_encode - DJW_EXTRA_12OFFSET < (1 << DJW_EXTRA_CODE_BITS)); + + /* Encode: # of extra codes */ + if ((ret = xd3_encode_bits (stream, output, bstate, DJW_EXTRA_CODE_BITS, + num_to_encode - DJW_EXTRA_12OFFSET))) + { + return ret; + } + + /* Encode: MTF code lengths */ + for (i = 0; i < num_to_encode; i += 1) + { + if ((ret = xd3_encode_bits (stream, output, bstate, + DJW_CLCLEN_BITS, clclen[i]))) + { + return ret; + } + } + + /* Encode: CLEN code lengths */ + for (i = 0; i < prefix->mcount; i += 1) + { + usize_t mtf_sym = prefix->mtfsym[i]; + usize_t bits = clclen[mtf_sym]; + usize_t code = clcode[mtf_sym]; + + if ((ret = xd3_encode_bits (stream, output, bstate, bits, code))) + { + return ret; + } + } + + return 0; +} + +static void +djw_compute_selector_1_2 (djw_prefix *prefix, + usize_t groups, + djw_weight *gbest_freq) +{ + uint8_t grmtf[DJW_MAX_GROUPS]; + usize_t i; + + for (i = 0; i < groups; i += 1) { grmtf[i] = i; } + + djw_compute_mtf_1_2 (prefix, grmtf, gbest_freq, groups); +} + +static int +xd3_encode_howmany_groups (xd3_stream *stream, + xd3_sec_cfg *cfg, + usize_t input_size, + usize_t *ret_groups, + usize_t *ret_sector_size) +{ + usize_t cfg_groups = 0; + usize_t cfg_sector_size = 0; + usize_t sugg_groups = 0; + usize_t sugg_sector_size = 0; + + if (cfg->ngroups != 0) + { + if (cfg->ngroups > DJW_MAX_GROUPS) + { + stream->msg = "invalid secondary encoder group number"; + return XD3_INTERNAL; + } + + cfg_groups = cfg->ngroups; + } + + if (cfg->sector_size != 0) + { + if (cfg->sector_size < DJW_SECTORSZ_MULT || + cfg->sector_size > DJW_SECTORSZ_MAX || + (cfg->sector_size % DJW_SECTORSZ_MULT) != 0) + { + stream->msg = "invalid secondary encoder sector size"; + return XD3_INTERNAL; + } + + cfg_sector_size = cfg->sector_size; + } + + if (cfg_groups == 0 || cfg_sector_size == 0) + { + /* These values were found empirically using xdelta3-tune around version + * xdfs-0.256. */ + switch (cfg->data_type) + { + case DATA_SECTION: + if (input_size < 1000) { sugg_groups = 1; sugg_sector_size = 0; } + else if (input_size < 4000) { sugg_groups = 2; sugg_sector_size = 10; } + else if (input_size < 7000) { sugg_groups = 3; sugg_sector_size = 10; } + else if (input_size < 10000) { sugg_groups = 4; sugg_sector_size = 10; } + else if (input_size < 25000) { sugg_groups = 5; sugg_sector_size = 10; } + else if (input_size < 50000) { sugg_groups = 7; sugg_sector_size = 20; } + else if (input_size < 100000) { sugg_groups = 8; sugg_sector_size = 30; } + else { sugg_groups = 8; sugg_sector_size = 70; } + break; + case INST_SECTION: + if (input_size < 7000) { sugg_groups = 1; sugg_sector_size = 0; } + else if (input_size < 10000) { sugg_groups = 2; sugg_sector_size = 50; } + else if (input_size < 25000) { sugg_groups = 3; sugg_sector_size = 50; } + else if (input_size < 50000) { sugg_groups = 6; sugg_sector_size = 40; } + else if (input_size < 100000) { sugg_groups = 8; sugg_sector_size = 40; } + else { sugg_groups = 8; sugg_sector_size = 40; } + break; + case ADDR_SECTION: + if (input_size < 9000) { sugg_groups = 1; sugg_sector_size = 0; } + else if (input_size < 25000) { sugg_groups = 2; sugg_sector_size = 130; } + else if (input_size < 50000) { sugg_groups = 3; sugg_sector_size = 130; } + else if (input_size < 100000) { sugg_groups = 5; sugg_sector_size = 130; } + else { sugg_groups = 7; sugg_sector_size = 130; } + break; + } + + if (cfg_groups == 0) + { + cfg_groups = sugg_groups; + } + + if (cfg_sector_size == 0) + { + cfg_sector_size = sugg_sector_size; + } + } + + if (cfg_groups != 1 && cfg_sector_size == 0) + { + switch (cfg->data_type) + { + case DATA_SECTION: + cfg_sector_size = 20; + break; + case INST_SECTION: + cfg_sector_size = 50; + break; + case ADDR_SECTION: + cfg_sector_size = 130; + break; + } + } + + (*ret_groups) = cfg_groups; + (*ret_sector_size) = cfg_sector_size; + + XD3_ASSERT (cfg_groups > 0 && cfg_groups <= DJW_MAX_GROUPS); + XD3_ASSERT (cfg_groups == 1 || + (cfg_sector_size >= DJW_SECTORSZ_MULT && + cfg_sector_size <= DJW_SECTORSZ_MAX)); + + return 0; +} + +static int +xd3_encode_huff (xd3_stream *stream, + djw_stream *h, + xd3_output *input, + xd3_output *output, + xd3_sec_cfg *cfg) +{ + int ret; + usize_t groups, sector_size; + bit_state bstate = BIT_STATE_ENCODE_INIT; + xd3_output *in; + usize_t output_bits; + usize_t input_bits; + usize_t input_bytes; + usize_t initial_offset = output->next; + djw_weight real_freq[ALPHABET_SIZE]; + uint8_t *gbest = NULL; + uint8_t *gbest_mtf = NULL; + + input_bytes = djw_count_freqs (real_freq, input); + input_bits = input_bytes * 8; + + XD3_ASSERT (input_bytes > 0); + + if ((ret = xd3_encode_howmany_groups (stream, cfg, input_bytes, + & groups, & sector_size))) + { + return ret; + } + + if (0) + { + regroup: + /* Sometimes we dynamically decide there are too many groups. Arrive + * here. */ + output->next = initial_offset; + xd3_bit_state_encode_init (& bstate); + } + + /* Encode: # of groups (3 bits) */ + if ((ret = xd3_encode_bits (stream, & output, & bstate, + DJW_GROUP_BITS, groups-1))) { goto failure; } + + if (groups == 1) + { + /* Single Huffman group. */ + usize_t code[ALPHABET_SIZE]; /* Codes */ + uint8_t clen[ALPHABET_SIZE]; + uint8_t prefix_mtfsym[ALPHABET_SIZE]; + djw_prefix prefix; + + output_bits = + djw_build_prefix (real_freq, clen, ALPHABET_SIZE, DJW_MAX_CODELEN); + djw_build_codes (code, clen, ALPHABET_SIZE, DJW_MAX_CODELEN); + + if (output_bits + EFFICIENCY_BITS >= input_bits && ! cfg->inefficient) + { + goto nosecond; + } + + /* Encode: prefix */ + prefix.mtfsym = prefix_mtfsym; + prefix.symbol = clen; + prefix.scount = ALPHABET_SIZE; + + if ((ret = djw_encode_prefix (stream, & output, & bstate, & prefix))) + { + goto failure; + } + + if (output_bits + (8 * output->next) + EFFICIENCY_BITS >= + input_bits && ! cfg->inefficient) + { + goto nosecond; + } + + /* Encode: data */ + for (in = input; in; in = in->next_page) + { + const uint8_t *p = in->base; + const uint8_t *p_max = p + in->next; + + do + { + usize_t sym = *p++; + usize_t bits = clen[sym]; + + IF_DEBUG (output_bits -= bits); + + if ((ret = xd3_encode_bits (stream, & output, + & bstate, bits, code[sym]))) + { + goto failure; + } + } + while (p < p_max); + } + + XD3_ASSERT (output_bits == 0); + } + else + { + /* DJW Huffman */ + djw_weight evolve_freq[DJW_MAX_GROUPS][ALPHABET_SIZE]; + uint8_t evolve_clen[DJW_MAX_GROUPS][ALPHABET_SIZE]; + djw_weight left = input_bytes; + usize_t gp; + usize_t niter = 0; + usize_t select_bits; + usize_t sym1 = 0, sym2 = 0, s; + usize_t gcost[DJW_MAX_GROUPS]; + usize_t gbest_code[DJW_MAX_GROUPS+2]; + uint8_t gbest_clen[DJW_MAX_GROUPS+2]; + usize_t gbest_max = 1 + (input_bytes - 1) / sector_size; + usize_t best_bits = 0; + usize_t gbest_no; + usize_t gpcnt; + const uint8_t *p; + IF_DEBUG2 (usize_t gcount[DJW_MAX_GROUPS]); + + /* Encode: sector size (5 bits) */ + if ((ret = xd3_encode_bits (stream, & output, & bstate, + DJW_SECTORSZ_BITS, + (sector_size/DJW_SECTORSZ_MULT)-1))) + { + goto failure; + } + + /* Dynamic allocation. */ + if (gbest == NULL) + { + if ((gbest = (uint8_t*) xd3_alloc (stream, gbest_max, 1)) == NULL) + { + ret = ENOMEM; + goto failure; + } + } + + if (gbest_mtf == NULL) + { + if ((gbest_mtf = (uint8_t*) xd3_alloc (stream, gbest_max, 1)) == NULL) + { + ret = ENOMEM; + goto failure; + } + } + + /* OPT: Some of the inner loops can be optimized, as shown in bzip2 */ + + /* Generate initial code length tables. */ + for (gp = 0; gp < groups; gp += 1) + { + djw_weight sum = 0; + djw_weight goal = left / (groups - gp); + + IF_DEBUG2 (usize_t nz = 0); + + /* Due to the single-code granularity of this distribution, it may + * be that we can't generate a distribution for each group. In that + * case subtract one group and try again. If (inefficient), we're + * testing group behavior, so don't mess things up. */ + if (goal == 0 && !cfg->inefficient) + { + IF_DEBUG2 (DP(RINT "too many groups (%"W"u), dropping one\n", + groups)); + groups -= 1; + goto regroup; + } + + /* Sum == goal is possible when (cfg->inefficient)... */ + while (sum < goal) + { + XD3_ASSERT (sym2 < ALPHABET_SIZE); + IF_DEBUG2 (nz += real_freq[sym2] != 0); + sum += real_freq[sym2++]; + } + + IF_DEBUG2(DP(RINT "group %"W"u has symbols %"W"u..%"W"u (%"W"u non-zero) " + "(%u/%"W"u = %.3f)\n", + gp, sym1, sym2, nz, sum, + input_bytes, sum / (double)input_bytes);); + + for (s = 0; s < ALPHABET_SIZE; s += 1) + { + evolve_clen[gp][s] = (s >= sym1 && s <= sym2) ? 1 : 16; + } + + left -= sum; + sym1 = sym2+1; + } + + repeat: + + niter += 1; + gbest_no = 0; + memset (evolve_freq, 0, sizeof (evolve_freq[0]) * groups); + IF_DEBUG2 (memset (gcount, 0, sizeof (gcount[0]) * groups)); + + /* For each input page (loop is irregular to allow non-pow2-size group + * size. */ + in = input; + p = in->base; + + /* For each group-size sector. */ + do + { + const uint8_t *p0 = p; + xd3_output *in0 = in; + usize_t best = 0; + usize_t winner = 0; + + /* Select best group for each sector, update evolve_freq. */ + memset (gcost, 0, sizeof (gcost[0]) * groups); + + /* For each byte in sector. */ + for (gpcnt = 0; gpcnt < sector_size; gpcnt += 1) + { + /* For each group. */ + for (gp = 0; gp < groups; gp += 1) + { + gcost[gp] += evolve_clen[gp][*p]; + } + + /* Check end-of-input-page. */ +# define GP_PAGE() \ + if ((usize_t)(++p - in->base) == in->next) \ + { \ + in = in->next_page; \ + if (in == NULL) { break; } \ + p = in->base; \ + } + + GP_PAGE (); + } + + /* Find min cost group for this sector */ + best = USIZE_T_MAX; + for (gp = 0; gp < groups; gp += 1) + { + if (gcost[gp] < best) + { + best = gcost[gp]; + winner = gp; + } + } + + XD3_ASSERT(gbest_no < gbest_max); + gbest[gbest_no++] = winner; + IF_DEBUG2 (gcount[winner] += 1); + + p = p0; + in = in0; + + /* Update group frequencies. */ + for (gpcnt = 0; gpcnt < sector_size; gpcnt += 1) + { + evolve_freq[winner][*p] += 1; + + GP_PAGE (); + } + } + while (in != NULL); + + XD3_ASSERT (gbest_no == gbest_max); + + /* Recompute code lengths. */ + output_bits = 0; + for (gp = 0; gp < groups; gp += 1) + { + int i; + uint8_t evolve_zero[ALPHABET_SIZE]; + int any_zeros = 0; + + memset (evolve_zero, 0, sizeof (evolve_zero)); + + /* Cannot allow a zero clen when the real frequency is non-zero. + * Note: this means we are going to encode a fairly long code for + * these unused entries. An improvement would be to implement a + * NOTUSED code for when these are actually zero, but this requires + * another data structure (evolve_zero) since we don't know when + * evolve_freq[i] == 0... Briefly tested, looked worse. */ + for (i = 0; i < ALPHABET_SIZE; i += 1) + { + if (evolve_freq[gp][i] == 0 && real_freq[i] != 0) + { + evolve_freq[gp][i] = 1; + evolve_zero[i] = 1; + any_zeros = 1; + } + } + + output_bits += djw_build_prefix (evolve_freq[gp], evolve_clen[gp], + ALPHABET_SIZE, DJW_MAX_CODELEN); + + /* The above faking of frequencies does not matter for the last + * iteration, but we don't know when that is yet. However, it also + * breaks the output_bits computation. Necessary for accuracy, and + * for the (output_bits==0) assert after all bits are output. */ + if (any_zeros) + { + IF_DEBUG2 (usize_t save_total = output_bits); + + for (i = 0; i < ALPHABET_SIZE; i += 1) + { + if (evolve_zero[i]) { output_bits -= evolve_clen[gp][i]; } + } + + IF_DEBUG2 (DP(RINT "evolve_zero reduced %"W"u bits in group %"W"u\n", + save_total - output_bits, gp)); + } + } + + IF_DEBUG2( + DP(RINT "pass %"W"u total bits: %"W"u group uses: ", niter, output_bits); + for (gp = 0; gp < groups; gp += 1) { DP(RINT "%"W"u ", gcount[gp]); } + DP(RINT "\n"); + ); + + /* End iteration. */ + + IF_DEBUG2 (if (niter > 1 && best_bits < output_bits) { + DP(RINT "iteration lost %"W"u bits\n", output_bits - best_bits); }); + + if (niter == 1 || (niter < DJW_MAX_ITER && + (best_bits - output_bits) >= DJW_MIN_IMPROVEMENT)) + { + best_bits = output_bits; + goto repeat; + } + + /* Efficiency check. */ + if (output_bits + EFFICIENCY_BITS >= input_bits && ! cfg->inefficient) + { + goto nosecond; + } + + IF_DEBUG2 (DP(RINT "djw compression: %"W"u -> %0.3f\n", + input_bytes, output_bits / 8.0)); + + /* Encode: prefix */ + { + uint8_t prefix_symbol[DJW_MAX_GROUPS * ALPHABET_SIZE]; + uint8_t prefix_mtfsym[DJW_MAX_GROUPS * ALPHABET_SIZE]; + uint8_t prefix_repcnt[DJW_MAX_GROUPS * ALPHABET_SIZE]; + djw_prefix prefix; + + prefix.symbol = prefix_symbol; + prefix.mtfsym = prefix_mtfsym; + prefix.repcnt = prefix_repcnt; + + djw_compute_multi_prefix (groups, evolve_clen, & prefix); + if ((ret = djw_encode_prefix (stream, & output, & bstate, & prefix))) + { + goto failure; + } + } + + /* Encode: selector frequencies */ + { + /* DJW_MAX_GROUPS +2 is for RUN_0, RUN_1 symbols. */ + djw_weight gbest_freq[DJW_MAX_GROUPS+2]; + djw_prefix gbest_prefix; + usize_t i; + + gbest_prefix.scount = gbest_no; + gbest_prefix.symbol = gbest; + gbest_prefix.mtfsym = gbest_mtf; + + djw_compute_selector_1_2 (& gbest_prefix, groups, gbest_freq); + + select_bits = + djw_build_prefix (gbest_freq, gbest_clen, groups+1, DJW_MAX_GBCLEN); + djw_build_codes (gbest_code, gbest_clen, groups+1, DJW_MAX_GBCLEN); + + for (i = 0; i < groups+1; i += 1) + { + if ((ret = xd3_encode_bits (stream, & output, & bstate, + DJW_GBCLEN_BITS, gbest_clen[i]))) + { + goto failure; + } + } + + for (i = 0; i < gbest_prefix.mcount; i += 1) + { + usize_t gp_mtf = gbest_mtf[i]; + usize_t gp_sel_bits = gbest_clen[gp_mtf]; + usize_t gp_sel_code = gbest_code[gp_mtf]; + + XD3_ASSERT (gp_mtf < groups+1); + + if ((ret = xd3_encode_bits (stream, & output, & bstate, + gp_sel_bits, gp_sel_code))) + { + goto failure; + } + + IF_DEBUG (select_bits -= gp_sel_bits); + } + + XD3_ASSERT (select_bits == 0); + } + + /* Efficiency check. */ + if (output_bits + select_bits + (8 * output->next) + + EFFICIENCY_BITS >= input_bits && ! cfg->inefficient) + { + goto nosecond; + } + + /* Encode: data */ + { + usize_t evolve_code[DJW_MAX_GROUPS][ALPHABET_SIZE]; + usize_t sector = 0; + + /* Build code tables for each group. */ + for (gp = 0; gp < groups; gp += 1) + { + djw_build_codes (evolve_code[gp], evolve_clen[gp], + ALPHABET_SIZE, DJW_MAX_CODELEN); + } + + /* Now loop over the input. */ + in = input; + p = in->base; + + do + { + /* For each sector. */ + usize_t gp_best = gbest[sector]; + usize_t *gp_codes = evolve_code[gp_best]; + uint8_t *gp_clens = evolve_clen[gp_best]; + + XD3_ASSERT (sector < gbest_no); + + sector += 1; + + /* Encode the sector data. */ + for (gpcnt = 0; gpcnt < sector_size; gpcnt += 1) + { + usize_t sym = *p; + usize_t bits = gp_clens[sym]; + usize_t code = gp_codes[sym]; + + IF_DEBUG (output_bits -= bits); + + if ((ret = xd3_encode_bits (stream, & output, & bstate, + bits, code))) + { + goto failure; + } + + GP_PAGE (); + } + } + while (in != NULL); + + XD3_ASSERT (select_bits == 0); + XD3_ASSERT (output_bits == 0); + } + } + + ret = xd3_flush_bits (stream, & output, & bstate); + + if (0) + { + nosecond: + stream->msg = "secondary compression was inefficient"; + ret = XD3_NOSECOND; + } + + failure: + + xd3_free (stream, gbest); + xd3_free (stream, gbest_mtf); + return ret; +} +#endif /* XD3_ENCODER */ + +/*********************************************************************/ +/* DECODE */ +/*********************************************************************/ + +static void +djw_build_decoder (xd3_stream *stream, + usize_t asize, + usize_t abs_max, + const uint8_t *clen, + uint8_t *inorder, + usize_t *base, + usize_t *limit, + usize_t *min_clenp, + usize_t *max_clenp) +{ + usize_t i, l; + const uint8_t *ci; + usize_t nr_clen [DJW_TOTAL_CODES]; + usize_t tmp_base[DJW_TOTAL_CODES]; + usize_t min_clen; + usize_t max_clen; + + /* Assumption: the two temporary arrays are large enough to hold abs_max. */ + XD3_ASSERT (abs_max <= DJW_MAX_CODELEN); + + /* This looks something like the start of zlib's inftrees.c */ + memset (nr_clen, 0, sizeof (nr_clen[0]) * (abs_max+1)); + + /* Count number of each code length */ + i = asize; + ci = clen; + do + { + /* Caller _must_ check that values are in-range. Most of the time the + * caller decodes a specific number of bits, which imply the max value, + * and the other time the caller decodes a huffman value, which must be + * in-range. Therefore, its an assertion and this function cannot + * otherwise fail. */ + XD3_ASSERT (*ci <= abs_max); + + nr_clen[*ci++]++; + } + while (--i != 0); + + /* Compute min, max. */ + for (i = 1; i <= abs_max; i += 1) { if (nr_clen[i]) { break; } } + min_clen = i; + for (i = abs_max; i != 0; i -= 1) { if (nr_clen[i]) { break; } } + max_clen = i; + + /* Fill the BASE, LIMIT table. */ + tmp_base[min_clen] = 0; + base[min_clen] = 0; + limit[min_clen] = nr_clen[min_clen] - 1; + for (i = min_clen + 1; i <= max_clen; i += 1) + { + usize_t last_limit = ((limit[i-1] + 1) << 1); + tmp_base[i] = tmp_base[i-1] + nr_clen[i-1]; + limit[i] = last_limit + nr_clen[i] - 1; + base[i] = last_limit - tmp_base[i]; + } + + /* Fill the inorder array, canonically ordered codes. */ + ci = clen; + for (i = 0; i < asize; i += 1) + { + if ((l = *ci++) != 0) + { + inorder[tmp_base[l]++] = i; + } + } + + *min_clenp = min_clen; + *max_clenp = max_clen; +} + +static INLINE int +djw_decode_symbol (xd3_stream *stream, + bit_state *bstate, + const uint8_t **input, + const uint8_t *input_end, + const uint8_t *inorder, + const usize_t *base, + const usize_t *limit, + usize_t min_clen, + usize_t max_clen, + usize_t *sym, + usize_t max_sym) +{ + usize_t code = 0; + usize_t bits = 0; + + /* OPT: Supposedly a small lookup table improves speed here... */ + + /* Code outline is similar to xd3_decode_bits... */ + if (bstate->cur_mask == 0x100) { goto next_byte; } + + for (;;) + { + do + { + if (bits == max_clen) { goto corrupt; } + + bits += 1; + code = (code << 1); + + if (bstate->cur_byte & bstate->cur_mask) { code |= 1; } + + bstate->cur_mask <<= 1; + + if (bits >= min_clen && code <= limit[bits]) { goto done; } + } + while (bstate->cur_mask != 0x100); + + next_byte: + + if (*input == input_end) + { + stream->msg = "secondary decoder end of input"; + return XD3_INVALID_INPUT; + } + + bstate->cur_byte = *(*input)++; + bstate->cur_mask = 1; + } + + done: + + if (base[bits] <= code) + { + usize_t offset = code - base[bits]; + + if (offset <= max_sym) + { + IF_DEBUG2 (DP(RINT "(j) %"W"u ", code)); + *sym = inorder[offset]; + return 0; + } + } + + corrupt: + stream->msg = "secondary decoder invalid code"; + return XD3_INVALID_INPUT; +} + +static int +djw_decode_clclen (xd3_stream *stream, + bit_state *bstate, + const uint8_t **input, + const uint8_t *input_end, + uint8_t *cl_inorder, + usize_t *cl_base, + usize_t *cl_limit, + usize_t *cl_minlen, + usize_t *cl_maxlen, + uint8_t *cl_mtf) +{ + int ret; + uint8_t cl_clen[DJW_TOTAL_CODES]; + usize_t num_codes, value; + usize_t i; + + /* How many extra code lengths to encode. */ + if ((ret = xd3_decode_bits (stream, bstate, input, + input_end, DJW_EXTRA_CODE_BITS, & num_codes))) + { + return ret; + } + + num_codes += DJW_EXTRA_12OFFSET; + + /* Read num_codes. */ + for (i = 0; i < num_codes; i += 1) + { + if ((ret = xd3_decode_bits (stream, bstate, input, + input_end, DJW_CLCLEN_BITS, & value))) + { + return ret; + } + + cl_clen[i] = value; + } + + /* Set the rest to zero. */ + for (; i < DJW_TOTAL_CODES; i += 1) { cl_clen[i] = 0; } + + /* No need to check for in-range clen values, because: */ + XD3_ASSERT (1 << DJW_CLCLEN_BITS == DJW_MAX_CLCLEN + 1); + + /* Build the code-length decoder. */ + djw_build_decoder (stream, DJW_TOTAL_CODES, DJW_MAX_CLCLEN, + cl_clen, cl_inorder, cl_base, + cl_limit, cl_minlen, cl_maxlen); + + /* Initialize the MTF state. */ + djw_init_clen_mtf_1_2 (cl_mtf); + + return 0; +} + +static INLINE int +djw_decode_1_2 (xd3_stream *stream, + bit_state *bstate, + const uint8_t **input, + const uint8_t *input_end, + const uint8_t *inorder, + const usize_t *base, + const usize_t *limit, + const usize_t *minlen, + const usize_t *maxlen, + uint8_t *mtfvals, + usize_t elts, + usize_t skip_offset, + uint8_t *values) +{ + usize_t n = 0, rep = 0, mtf = 0, s = 0; + int ret; + + while (n < elts) + { + /* Special case inside generic code: CLEN only: If not the first group, + * we already know the zero frequencies. */ + if (skip_offset != 0 && n >= skip_offset && values[n-skip_offset] == 0) + { + values[n++] = 0; + continue; + } + + /* Repeat last symbol. */ + if (rep != 0) + { + values[n++] = mtfvals[0]; + rep -= 1; + continue; + } + + /* Symbol following last repeat code. */ + if (mtf != 0) + { + usize_t sym = djw_update_mtf (mtfvals, mtf); + values[n++] = sym; + mtf = 0; + continue; + } + + /* Decode next symbol/repeat code. */ + if ((ret = djw_decode_symbol (stream, bstate, input, input_end, + inorder, base, limit, *minlen, *maxlen, + & mtf, DJW_TOTAL_CODES))) { return ret; } + + if (mtf <= RUN_1) + { + /* Repetition. */ + rep = ((mtf + 1) << s); + mtf = 0; + s += 1; + } + else + { + /* Remove the RUN_1 MTF offset. */ + mtf -= 1; + s = 0; + } + } + + /* If (rep != 0) there were too many codes received. */ + if (rep != 0) + { + stream->msg = "secondary decoder invalid repeat code"; + return XD3_INVALID_INPUT; + } + + return 0; +} + +static INLINE int +djw_decode_prefix (xd3_stream *stream, + bit_state *bstate, + const uint8_t **input, + const uint8_t *input_end, + const uint8_t *cl_inorder, + const usize_t *cl_base, + const usize_t *cl_limit, + const usize_t *cl_minlen, + const usize_t *cl_maxlen, + uint8_t *cl_mtf, + usize_t groups, + uint8_t *clen) +{ + return djw_decode_1_2 (stream, bstate, input, input_end, + cl_inorder, cl_base, cl_limit, + cl_minlen, cl_maxlen, cl_mtf, + ALPHABET_SIZE * groups, ALPHABET_SIZE, clen); +} + +static int +xd3_decode_huff (xd3_stream *stream, + djw_stream *h, + const uint8_t **input_pos, + const uint8_t *const input_end, + uint8_t **output_pos, + const uint8_t *const output_end) +{ + const uint8_t *input = *input_pos; + uint8_t *output = *output_pos; + bit_state bstate = BIT_STATE_DECODE_INIT; + uint8_t *sel_group = NULL; + usize_t groups, gp; + usize_t output_bytes = (usize_t)(output_end - output); + usize_t sector_size; + usize_t sectors; + int ret; + + /* Invalid input. */ + if (output_bytes == 0) + { + stream->msg = "secondary decoder invalid input"; + return XD3_INVALID_INPUT; + } + + /* Decode: number of groups */ + if ((ret = xd3_decode_bits (stream, & bstate, & input, + input_end, DJW_GROUP_BITS, & groups))) + { + goto fail; + } + + groups += 1; + + if (groups > 1) + { + /* Decode: group size */ + if ((ret = xd3_decode_bits (stream, & bstate, & input, + input_end, DJW_SECTORSZ_BITS, + & sector_size))) { goto fail; } + + sector_size = (sector_size + 1) * DJW_SECTORSZ_MULT; + } + else + { + /* Default for groups == 1 */ + sector_size = output_bytes; + } + + sectors = 1 + (output_bytes - 1) / sector_size; + + /* TODO: In the case of groups==1, lots of extra stack space gets used here. + * Could dynamically allocate this memory, which would help with excess + * parameter passing, too. Passing too many parameters in this file, + * simplify it! */ + + /* Outer scope: per-group symbol decoder tables. */ + { + uint8_t inorder[DJW_MAX_GROUPS][ALPHABET_SIZE]; + usize_t base [DJW_MAX_GROUPS][DJW_TOTAL_CODES]; + usize_t limit [DJW_MAX_GROUPS][DJW_TOTAL_CODES]; + usize_t minlen [DJW_MAX_GROUPS]; + usize_t maxlen [DJW_MAX_GROUPS]; + + /* Nested scope: code length decoder tables. */ + { + uint8_t clen [DJW_MAX_GROUPS][ALPHABET_SIZE]; + uint8_t cl_inorder[DJW_TOTAL_CODES]; + usize_t cl_base [DJW_MAX_CLCLEN+2]; + usize_t cl_limit [DJW_MAX_CLCLEN+2]; + uint8_t cl_mtf [DJW_TOTAL_CODES]; + usize_t cl_minlen; + usize_t cl_maxlen; + + /* Compute the code length decoder. */ + if ((ret = djw_decode_clclen (stream, & bstate, & input, input_end, + cl_inorder, cl_base, cl_limit, & cl_minlen, + & cl_maxlen, cl_mtf))) { goto fail; } + + /* Now decode each group decoder. */ + if ((ret = djw_decode_prefix (stream, & bstate, & input, input_end, + cl_inorder, cl_base, cl_limit, + & cl_minlen, & cl_maxlen, cl_mtf, + groups, clen[0]))) { goto fail; } + + /* Prepare the actual decoding tables. */ + for (gp = 0; gp < groups; gp += 1) + { + djw_build_decoder (stream, ALPHABET_SIZE, DJW_MAX_CODELEN, + clen[gp], inorder[gp], base[gp], limit[gp], + & minlen[gp], & maxlen[gp]); + } + } + + /* Decode: selector clens. */ + { + uint8_t sel_inorder[DJW_MAX_GROUPS+2]; + usize_t sel_base [DJW_MAX_GBCLEN+2]; + usize_t sel_limit [DJW_MAX_GBCLEN+2]; + uint8_t sel_mtf [DJW_MAX_GROUPS+2]; + usize_t sel_minlen; + usize_t sel_maxlen; + + /* Setup group selection. */ + if (groups > 1) + { + uint8_t sel_clen[DJW_MAX_GROUPS+1]; + + for (gp = 0; gp < groups+1; gp += 1) + { + usize_t value; + + if ((ret = xd3_decode_bits (stream, & bstate, & input, + input_end, DJW_GBCLEN_BITS, + & value))) { goto fail; } + + sel_clen[gp] = value; + sel_mtf[gp] = gp; + } + + if ((sel_group = (uint8_t*) xd3_alloc (stream, sectors, 1)) == NULL) + { + ret = ENOMEM; + goto fail; + } + + djw_build_decoder (stream, groups+1, DJW_MAX_GBCLEN, sel_clen, + sel_inorder, sel_base, sel_limit, + & sel_minlen, & sel_maxlen); + + if ((ret = djw_decode_1_2 (stream, & bstate, & input, input_end, + sel_inorder, sel_base, + sel_limit, & sel_minlen, + & sel_maxlen, sel_mtf, + sectors, 0, sel_group))) { goto fail; } + } + + /* Now decode each sector. */ + { + /* Initialize for (groups==1) case. */ + uint8_t *gp_inorder = inorder[0]; + usize_t *gp_base = base[0]; + usize_t *gp_limit = limit[0]; + usize_t gp_minlen = minlen[0]; + usize_t gp_maxlen = maxlen[0]; + usize_t c; + + for (c = 0; c < sectors; c += 1) + { + usize_t n; + + if (groups >= 2) + { + gp = sel_group[c]; + + XD3_ASSERT (gp < groups); + + gp_inorder = inorder[gp]; + gp_base = base[gp]; + gp_limit = limit[gp]; + gp_minlen = minlen[gp]; + gp_maxlen = maxlen[gp]; + } + + if (output_end < output) + { + stream->msg = "secondary decoder invalid input"; + return XD3_INVALID_INPUT; + } + + /* Decode next sector. */ + n = xd3_min (sector_size, (usize_t) (output_end - output)); + + do + { + usize_t sym; + + if ((ret = djw_decode_symbol (stream, & bstate, + & input, input_end, + gp_inorder, gp_base, + gp_limit, gp_minlen, gp_maxlen, + & sym, ALPHABET_SIZE))) + { + goto fail; + } + + *output++ = sym; + } + while (--n); + } + } + } + } + + IF_REGRESSION (if ((ret = xd3_test_clean_bits (stream, & bstate))) + { goto fail; }); + XD3_ASSERT (ret == 0); + + fail: + xd3_free (stream, sel_group); + + (*input_pos) = input; + (*output_pos) = output; + return ret; +} + +#endif diff --git a/deps/xdelta3/xdelta3-fgk.h b/deps/xdelta3/xdelta3-fgk.h new file mode 100644 index 0000000000..88636c1998 --- /dev/null +++ b/deps/xdelta3/xdelta3-fgk.h @@ -0,0 +1,862 @@ +/* xdelta3 - delta compression tools and library + Copyright 2016 Joshua MacDonald + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + For demonstration purposes only. + */ + +#ifndef _XDELTA3_FGK_h_ +#define _XDELTA3_FGK_h_ + +/* To include RetroArch's INLINE macro */ +#include "retro_inline.h" + +/* An implementation of the FGK algorithm described by D.E. Knuth in + * "Dynamic Huffman Coding" in Journal of Algorithms 6. */ + +/* A 32bit counter (fgk_weight) is used as the frequency counter for + * nodes in the huffman tree. TODO: Need oto test for overflow and/or + * reset stats. */ + +typedef struct _fgk_stream fgk_stream; +typedef struct _fgk_node fgk_node; +typedef struct _fgk_block fgk_block; +typedef unsigned int fgk_bit; +typedef uint32_t fgk_weight; + +struct _fgk_block { + union { + fgk_node *un_leader; + fgk_block *un_freeptr; + } un; +}; + +#define block_leader un.un_leader +#define block_freeptr un.un_freeptr + +/* The code can also support fixed huffman encoding/decoding. */ +#define IS_ADAPTIVE 1 + +/* weight is a count of the number of times this element has been seen + * in the current encoding/decoding. parent, right_child, and + * left_child are pointers defining the tree structure. right and + * left point to neighbors in an ordered sequence of weights. The + * left child of a node is always guaranteed to have weight not + * greater than its sibling. fgk_blockLeader points to the element + * with the same weight as itself which is closest to the next + * increasing weight block. */ +struct _fgk_node +{ + fgk_weight weight; + fgk_node *parent; + fgk_node *left_child; + fgk_node *right_child; + fgk_node *left; + fgk_node *right; + fgk_block *my_block; +}; + +/* alphabet_size is the a count of the number of possible leaves in + * the huffman tree. The number of total nodes counting internal + * nodes is ((2 * alphabet_size) - 1). zero_freq_count is the number + * of elements remaining which have zero frequency. zero_freq_exp and + * zero_freq_rem satisfy the equation zero_freq_count = + * 2^zero_freq_exp + zero_freq_rem. root_node is the root of the + * tree, which is initialized to a node with zero frequency and + * contains the 0th such element. free_node contains a pointer to the + * next available fgk_node space. alphabet contains all the elements + * and is indexed by N. remaining_zeros points to the head of the + * list of zeros. */ +struct _fgk_stream +{ + usize_t alphabet_size; + usize_t zero_freq_count; + usize_t zero_freq_exp; + usize_t zero_freq_rem; + usize_t coded_depth; + + usize_t total_nodes; + usize_t total_blocks; + + fgk_bit *coded_bits; + + fgk_block *block_array; + fgk_block *free_block; + + fgk_node *decode_ptr; + fgk_node *remaining_zeros; + fgk_node *alphabet; + fgk_node *root_node; + fgk_node *free_node; +}; + +/*********************************************************************/ +/* Encoder */ +/*********************************************************************/ + +static fgk_stream* fgk_alloc (xd3_stream *stream /*, usize_t alphabet_size */); +static int fgk_init (xd3_stream *stream, + fgk_stream *h, + int is_encode); +static usize_t fgk_encode_data (fgk_stream *h, + usize_t n); +static INLINE fgk_bit fgk_get_encoded_bit (fgk_stream *h); + +static int xd3_encode_fgk (xd3_stream *stream, + fgk_stream *sec_stream, + xd3_output *input, + xd3_output *output, + xd3_sec_cfg *cfg); + +/*********************************************************************/ +/* Decoder */ +/*********************************************************************/ + +static INLINE int fgk_decode_bit (fgk_stream *h, + fgk_bit b); +static usize_t fgk_decode_data (fgk_stream *h); +static void fgk_destroy (xd3_stream *stream, + fgk_stream *h); + +static int xd3_decode_fgk (xd3_stream *stream, + fgk_stream *sec_stream, + const uint8_t **input, + const uint8_t *const input_end, + uint8_t **output, + const uint8_t *const output_end); + +/*********************************************************************/ +/* Private */ +/*********************************************************************/ + +static unsigned int fgk_find_nth_zero (fgk_stream *h, usize_t n); +static usize_t fgk_nth_zero (fgk_stream *h, usize_t n); +static void fgk_update_tree (fgk_stream *h, usize_t n); +static fgk_node* fgk_increase_zero_weight (fgk_stream *h, usize_t n); +static void fgk_eliminate_zero (fgk_stream* h, fgk_node *node); +static void fgk_move_right (fgk_stream *h, fgk_node *node); +static void fgk_promote (fgk_stream *h, fgk_node *node); +static void fgk_init_node (fgk_node *node, usize_t i, usize_t size); +static fgk_block* fgk_make_block (fgk_stream *h, fgk_node *l); +static void fgk_free_block (fgk_stream *h, fgk_block *b); +static void fgk_factor_remaining (fgk_stream *h); +static INLINE void fgk_swap_ptrs (fgk_node **one, fgk_node **two); + +/*********************************************************************/ +/* Basic Routines */ +/*********************************************************************/ + +/* returns an initialized huffman encoder for an alphabet with the + * given size. returns NULL if enough memory cannot be allocated */ +static fgk_stream* fgk_alloc (xd3_stream *stream /*, int alphabet_size0 */) +{ + usize_t alphabet_size0 = ALPHABET_SIZE; + fgk_stream *h; + + if ((h = (fgk_stream*) xd3_alloc (stream, 1, sizeof (fgk_stream))) == NULL) + { + return NULL; + } + + h->total_nodes = (2 * alphabet_size0) - 1; + h->total_blocks = (2 * h->total_nodes); + h->alphabet = (fgk_node*) xd3_alloc (stream, h->total_nodes, sizeof (fgk_node)); + h->block_array = (fgk_block*) xd3_alloc (stream, h->total_blocks, sizeof (fgk_block)); + h->coded_bits = (fgk_bit*) xd3_alloc (stream, alphabet_size0, sizeof (fgk_bit)); + + if (h->coded_bits == NULL || + h->alphabet == NULL || + h->block_array == NULL) + { + fgk_destroy (stream, h); + return NULL; + } + + h->alphabet_size = alphabet_size0; + + return h; +} + +static int fgk_init (xd3_stream *stream, fgk_stream *h, int is_encode) +{ + usize_t ui; + ssize_t si; + + h->root_node = h->alphabet; + h->decode_ptr = h->root_node; + h->free_node = h->alphabet + h->alphabet_size; + h->remaining_zeros = h->alphabet; + h->coded_depth = 0; + h->zero_freq_count = h->alphabet_size + 2; + + /* after two calls to factor_remaining, zero_freq_count == alphabet_size */ + fgk_factor_remaining(h); /* set ZFE and ZFR */ + fgk_factor_remaining(h); /* set ZFDB according to prev state */ + + IF_DEBUG (memset (h->alphabet, 0, sizeof (h->alphabet[0]) * h->total_nodes)); + + for (ui = 0; ui < h->total_blocks-1; ui += 1) + { + h->block_array[ui].block_freeptr = &h->block_array[ui + 1]; + } + + h->block_array[h->total_blocks - 1].block_freeptr = NULL; + h->free_block = h->block_array; + + /* Zero frequency nodes are inserted in the first alphabet_size + * positions, with Value, weight, and a pointer to the next zero + * frequency node. */ + for (si = h->alphabet_size - 1; si >= 0; si -= 1) + { + fgk_init_node (h->alphabet + si, (usize_t) si, h->alphabet_size); + } + + return 0; +} + +static void fgk_swap_ptrs(fgk_node **one, fgk_node **two) +{ + fgk_node *tmp = *one; + *one = *two; + *two = tmp; +} + +/* Takes huffman transmitter h and n, the nth elt in the alphabet, and + * returns the number of required to encode n. */ +static usize_t fgk_encode_data (fgk_stream* h, usize_t n) +{ + fgk_node *target_ptr = h->alphabet + n; + + XD3_ASSERT (n < h->alphabet_size); + + h->coded_depth = 0; + + /* First encode the binary representation of the nth remaining + * zero frequency element in reverse such that bit, which will be + * encoded from h->coded_depth down to 0 will arrive in increasing + * order following the tree path. If there is only one left, it + * is not neccesary to encode these bits. */ + if (IS_ADAPTIVE && target_ptr->weight == 0) + { + usize_t where, shift; + usize_t bits; + + where = fgk_find_nth_zero(h, n); + shift = 1; + + if (h->zero_freq_rem == 0) + { + bits = h->zero_freq_exp; + } + else + { + bits = h->zero_freq_exp + 1; + } + + while (bits > 0) + { + h->coded_bits[h->coded_depth++] = (shift & where) && 1; + + bits -= 1; + shift <<= 1; + }; + + target_ptr = h->remaining_zeros; + } + + /* The path from root to node is filled into coded_bits in reverse so + * that it is encoded in the right order */ + while (target_ptr != h->root_node) + { + h->coded_bits[h->coded_depth++] = (target_ptr->parent->right_child == target_ptr); + + target_ptr = target_ptr->parent; + } + + if (IS_ADAPTIVE) + { + fgk_update_tree(h, n); + } + + return h->coded_depth; +} + +/* Should be called as many times as fgk_encode_data returns. + */ +static INLINE fgk_bit fgk_get_encoded_bit (fgk_stream *h) +{ + XD3_ASSERT (h->coded_depth > 0); + + return h->coded_bits[--h->coded_depth]; +} + +/* This procedure updates the tree after alphabet[n] has been encoded + * or decoded. + */ +static void fgk_update_tree (fgk_stream *h, usize_t n) +{ + fgk_node *incr_node; + + if (h->alphabet[n].weight == 0) + { + incr_node = fgk_increase_zero_weight (h, n); + } + else + { + incr_node = h->alphabet + n; + } + + while (incr_node != h->root_node) + { + fgk_move_right (h, incr_node); + fgk_promote (h, incr_node); + incr_node->weight += 1; /* incr the parent */ + incr_node = incr_node->parent; /* repeat */ + } + + h->root_node->weight += 1; +} + +static void fgk_move_right (fgk_stream *h, fgk_node *move_fwd) +{ + fgk_node **fwd_par_ptr, **back_par_ptr; + fgk_node *move_back, *tmp; + + move_back = move_fwd->my_block->block_leader; + + if (move_fwd == move_back || + move_fwd->parent == move_back || + move_fwd->weight == 0) + { + return; + } + + move_back->right->left = move_fwd; + + if (move_fwd->left) + { + move_fwd->left->right = move_back; + } + + tmp = move_fwd->right; + move_fwd->right = move_back->right; + + if (tmp == move_back) + { + move_back->right = move_fwd; + } + else + { + tmp->left = move_back; + move_back->right = tmp; + } + + tmp = move_back->left; + move_back->left = move_fwd->left; + + if (tmp == move_fwd) + { + move_fwd->left = move_back; + } + else + { + tmp->right = move_fwd; + move_fwd->left = tmp; + } + + if (move_fwd->parent->right_child == move_fwd) + { + fwd_par_ptr = &move_fwd->parent->right_child; + } + else + { + fwd_par_ptr = &move_fwd->parent->left_child; + } + + if (move_back->parent->right_child == move_back) + { + back_par_ptr = &move_back->parent->right_child; + } + else + { + back_par_ptr = &move_back->parent->left_child; + } + + fgk_swap_ptrs (&move_fwd->parent, &move_back->parent); + fgk_swap_ptrs (fwd_par_ptr, back_par_ptr); + + move_fwd->my_block->block_leader = move_fwd; +} + +/* Shifts node, the leader of its block, into the next block. */ +static void fgk_promote (fgk_stream *h, fgk_node *node) +{ + fgk_node *my_left, *my_right; + fgk_block *cur_block; + + my_right = node->right; + my_left = node->left; + cur_block = node->my_block; + + if (node->weight == 0) + { + return; + } + + /* if left is right child, parent of remaining zeros case (?), means parent + * has same weight as right child. */ + if (my_left == node->right_child && + node->left_child && + node->left_child->weight == 0) + { + XD3_ASSERT (node->left_child == h->remaining_zeros); + XD3_ASSERT (node->right_child->weight == (node->weight+1)); /* child weight was already incremented */ + + if (node->weight == (my_right->weight - 1) && my_right != h->root_node) + { + fgk_free_block (h, cur_block); + node->my_block = my_right->my_block; + my_left->my_block = my_right->my_block; + } + + return; + } + + if (my_left == h->remaining_zeros) + { + return; + } + + /* true if not the leftmost node */ + if (my_left->my_block == cur_block) + { + my_left->my_block->block_leader = my_left; + } + else + { + fgk_free_block (h, cur_block); + } + + /* node->parent != my_right */ + if ((node->weight == (my_right->weight - 1)) && (my_right != h->root_node)) + { + node->my_block = my_right->my_block; + } + else + { + node->my_block = fgk_make_block (h, node); + } +} + +/* When an element is seen the first time this is called to remove it from the list of + * zero weight elements and introduce a new internal node to the tree. */ +static fgk_node* fgk_increase_zero_weight (fgk_stream *h, usize_t n) +{ + fgk_node *this_zero, *new_internal, *zero_ptr; + + this_zero = h->alphabet + n; + + if (h->zero_freq_count == 1) + { + /* this is the last one */ + this_zero->right_child = NULL; + + if (this_zero->right->weight == 1) + { + this_zero->my_block = this_zero->right->my_block; + } + else + { + this_zero->my_block = fgk_make_block (h, this_zero); + } + + h->remaining_zeros = NULL; + + return this_zero; + } + + zero_ptr = h->remaining_zeros; + + new_internal = h->free_node++; + + new_internal->parent = zero_ptr->parent; + new_internal->right = zero_ptr->right; + new_internal->weight = 0; + new_internal->right_child = this_zero; + new_internal->left = this_zero; + + if (h->remaining_zeros == h->root_node) + { + /* This is the first element to be coded */ + h->root_node = new_internal; + this_zero->my_block = fgk_make_block (h, this_zero); + new_internal->my_block = fgk_make_block (h, new_internal); + } + else + { + new_internal->right->left = new_internal; + + if (zero_ptr->parent->right_child == zero_ptr) + { + zero_ptr->parent->right_child = new_internal; + } + else + { + zero_ptr->parent->left_child = new_internal; + } + + if (new_internal->right->weight == 1) + { + new_internal->my_block = new_internal->right->my_block; + } + else + { + new_internal->my_block = fgk_make_block (h, new_internal); + } + + this_zero->my_block = new_internal->my_block; + } + + fgk_eliminate_zero (h, this_zero); + + new_internal->left_child = h->remaining_zeros; + + this_zero->right = new_internal; + this_zero->left = h->remaining_zeros; + this_zero->parent = new_internal; + this_zero->left_child = NULL; + this_zero->right_child = NULL; + + h->remaining_zeros->parent = new_internal; + h->remaining_zeros->right = this_zero; + + return this_zero; +} + +/* When a zero frequency element is encoded, it is followed by the + * binary representation of the index into the remaining elements. + * Sets a cache to the element before it so that it can be removed + * without calling this procedure again. */ +static unsigned int fgk_find_nth_zero (fgk_stream* h, usize_t n) +{ + fgk_node *target_ptr = h->alphabet + n; + fgk_node *head_ptr = h->remaining_zeros; + unsigned int idx = 0; + + while (target_ptr != head_ptr) + { + head_ptr = head_ptr->right_child; + idx += 1; + } + + return idx; +} + +/* Splices node out of the list of zeros. */ +static void fgk_eliminate_zero (fgk_stream* h, fgk_node *node) +{ + if (h->zero_freq_count == 1) + { + return; + } + + fgk_factor_remaining(h); + + if (node->left_child == NULL) + { + h->remaining_zeros = h->remaining_zeros->right_child; + h->remaining_zeros->left_child = NULL; + } + else if (node->right_child == NULL) + { + node->left_child->right_child = NULL; + } + else + { + node->right_child->left_child = node->left_child; + node->left_child->right_child = node->right_child; + } +} + +static void fgk_init_node (fgk_node *node, usize_t i, usize_t size) +{ + if (i < size - 1) + { + node->right_child = node + 1; + } + else + { + node->right_child = NULL; + } + + if (i >= 1) + { + node->left_child = node - 1; + } + else + { + node->left_child = NULL; + } + + node->weight = 0; + node->parent = NULL; + node->right = NULL; + node->left = NULL; + node->my_block = NULL; +} + +/* The data structure used is an array of blocks, which are unions of + * free pointers and huffnode pointers. free blocks are a linked list + * of free blocks, the front of which is h->free_block. The used + * blocks are pointers to the head of each block. */ +static fgk_block* fgk_make_block (fgk_stream *h, fgk_node* lead) +{ + fgk_block *ret = h->free_block; + + XD3_ASSERT (h->free_block != NULL); + + h->free_block = h->free_block->block_freeptr; + + ret->block_leader = lead; + + return ret; +} + +/* Restores the block to the front of the free list. */ +static void fgk_free_block (fgk_stream *h, fgk_block *b) +{ + b->block_freeptr = h->free_block; + h->free_block = b; +} + +/* sets zero_freq_count, zero_freq_rem, and zero_freq_exp to satsity + * the equation given above. */ +static void fgk_factor_remaining (fgk_stream *h) +{ + unsigned int i; + + i = (--h->zero_freq_count); + h->zero_freq_exp = 0; + + while (i > 1) + { + h->zero_freq_exp += 1; + i >>= 1; + } + + i = 1 << h->zero_freq_exp; + + h->zero_freq_rem = h->zero_freq_count - i; +} + +/* receives a bit at a time and returns true when a complete code has + * been received. + */ +static INLINE int fgk_decode_bit (fgk_stream* h, fgk_bit b) +{ + XD3_ASSERT (b == 1 || b == 0); + + if (IS_ADAPTIVE && h->decode_ptr->weight == 0) + { + usize_t bitsreq; + + if (h->zero_freq_rem == 0) + { + bitsreq = h->zero_freq_exp; + } + else + { + bitsreq = h->zero_freq_exp + 1; + } + + h->coded_bits[h->coded_depth] = b; + h->coded_depth += 1; + + return h->coded_depth >= bitsreq; + } + else + { + if (b) + { + h->decode_ptr = h->decode_ptr->right_child; + } + else + { + h->decode_ptr = h->decode_ptr->left_child; + } + + if (h->decode_ptr->left_child == NULL) + { + /* If the weight is non-zero, finished. */ + if (h->decode_ptr->weight != 0) + { + return 1; + } + + /* zero_freq_count is dropping to 0, finished. */ + return h->zero_freq_count == 1; + } + else + { + return 0; + } + } +} + +static usize_t fgk_nth_zero (fgk_stream* h, usize_t n) +{ + fgk_node *ret = h->remaining_zeros; + + /* ERROR: if during this loop (ret->right_child == NULL) then the + * encoder's zero count is too high. Could return an error code + * now, but is probably unnecessary overhead, since the caller + * should check integrity anyway. */ + for (; n != 0 && ret->right_child != NULL; n -= 1) + { + ret = ret->right_child; + } + + return (usize_t)(ret - h->alphabet); +} + +/* once fgk_decode_bit returns 1, this retrieves an index into the + * alphabet otherwise this returns 0, indicating more bits are + * required. + */ +static usize_t fgk_decode_data (fgk_stream* h) +{ + usize_t elt = (usize_t)(h->decode_ptr - h->alphabet); + + if (IS_ADAPTIVE && h->decode_ptr->weight == 0) { + usize_t i = 0; + usize_t n = 0; + + if (h->coded_depth > 0) + { + for (; i < h->coded_depth - 1; i += 1) + { + n |= h->coded_bits[i]; + n <<= 1; + } + } + + n |= h->coded_bits[i]; + elt = fgk_nth_zero(h, n); + } + + h->coded_depth = 0; + + if (IS_ADAPTIVE) + { + fgk_update_tree(h, elt); + } + + h->decode_ptr = h->root_node; + + return elt; +} + +static void fgk_destroy (xd3_stream *stream, + fgk_stream *h) +{ + if (h != NULL) + { + xd3_free (stream, h->alphabet); + xd3_free (stream, h->coded_bits); + xd3_free (stream, h->block_array); + xd3_free (stream, h); + } +} + +/*********************************************************************/ +/* Xdelta */ +/*********************************************************************/ + +#if XD3_ENCODER +static int +xd3_encode_fgk (xd3_stream *stream, fgk_stream *sec_stream, xd3_output *input, xd3_output *output, xd3_sec_cfg *cfg) +{ + bit_state bstate = BIT_STATE_ENCODE_INIT; + xd3_output *cur_page; + int ret; + + /* OPT: quit compression early if it looks bad */ + for (cur_page = input; cur_page; cur_page = cur_page->next_page) + { + const uint8_t *inp = cur_page->base; + const uint8_t *inp_max = inp + cur_page->next; + + while (inp < inp_max) + { + usize_t bits = fgk_encode_data (sec_stream, *inp++); + + while (bits--) + { + if ((ret = xd3_encode_bit (stream, & output, & bstate, fgk_get_encoded_bit (sec_stream)))) { return ret; } + } + } + } + + return xd3_flush_bits (stream, & output, & bstate); +} +#endif + +static int +xd3_decode_fgk (xd3_stream *stream, + fgk_stream *sec_stream, + const uint8_t **input_pos, + const uint8_t *const input_max, + uint8_t **output_pos, + const uint8_t *const output_max) +{ + bit_state bstate; + uint8_t *output = *output_pos; + const uint8_t *input = *input_pos; + + for (;;) + { + if (input == input_max) + { + stream->msg = "secondary decoder end of input"; + return XD3_INTERNAL; + } + + bstate.cur_byte = *input++; + + for (bstate.cur_mask = 1; bstate.cur_mask != 0x100; bstate.cur_mask <<= 1) + { + int done = fgk_decode_bit (sec_stream, (bstate.cur_byte & bstate.cur_mask) ? 1U : 0U); + + if (! done) { continue; } + + *output++ = fgk_decode_data (sec_stream); + + if (output == output_max) + { + /* During regression testing: */ + IF_REGRESSION ({ + int ret; + bstate.cur_mask <<= 1; + if ((ret = xd3_test_clean_bits (stream, & bstate))) { return ret; } + }); + + (*output_pos) = output; + (*input_pos) = input; + return 0; + } + } + } +} + +#endif /* _XDELTA3_FGK_ */ diff --git a/deps/xdelta3/xdelta3-hash.h b/deps/xdelta3/xdelta3-hash.h new file mode 100644 index 0000000000..822aa0245f --- /dev/null +++ b/deps/xdelta3/xdelta3-hash.h @@ -0,0 +1,163 @@ +/* xdelta3 - delta compression tools and library + Copyright 2016 Joshua MacDonald + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ +#ifndef _XDELTA3_HASH_H_ +#define _XDELTA3_HASH_H_ + +/* To include RetroArch's INLINE macro */ +#include "retro_inline.h" +#include "xdelta3-internal.h" + +#if XD3_DEBUG +#define SMALL_HASH_DEBUG1(s,inp) \ + uint32_t debug_state; \ + uint32_t debug_hval = xd3_checksum_hash (& (s)->small_hash, \ + xd3_scksum (&debug_state, (inp), (s)->smatcher.small_look)) +#define SMALL_HASH_DEBUG2(s,inp) \ + XD3_ASSERT (debug_hval == xd3_checksum_hash (& (s)->small_hash, \ + xd3_scksum (&debug_state, (inp), (s)->smatcher.small_look))) +#else +#define SMALL_HASH_DEBUG1(s,inp) +#define SMALL_HASH_DEBUG2(s,inp) +#endif /* XD3_DEBUG */ + +#if UNALIGNED_OK +#define UNALIGNED_READ32(dest,src) (*(dest)) = (*(uint32_t*)(src)) +#else +#define UNALIGNED_READ32(dest,src) memcpy((dest), (src), 4); +#endif + +/* These are good hash multipliers for 32-bit and 64-bit LCGs: see + * "linear congruential generators of different sizes and good lattice + * structure" */ +#define xd3_hash_multiplier32 1597334677U +#define xd3_hash_multiplier64 1181783497276652981ULL + +/* TODO: small cksum is hard-coded for 4 bytes (i.e., "look" is unused) */ +static INLINE uint32_t +xd3_scksum (uint32_t *state, + const uint8_t *base, + const usize_t look) +{ + UNALIGNED_READ32(state, base); + return (*state) * xd3_hash_multiplier32; +} +static INLINE uint32_t +xd3_small_cksum_update (uint32_t *state, + const uint8_t *base, + usize_t look) +{ + UNALIGNED_READ32(state, base+1); + return (*state) * xd3_hash_multiplier32; +} + +#if XD3_ENCODER +INLINE usize_t +xd3_checksum_hash (const xd3_hash_cfg *cfg, const usize_t cksum) +{ + return (cksum >> cfg->shift) ^ (cksum & cfg->mask); +} + +#if SIZEOF_USIZE_T == 4 +INLINE uint32_t +xd3_large32_cksum (xd3_hash_cfg *cfg, const uint8_t *base, const usize_t look) +{ + uint32_t h = 0; + for (usize_t i = 0; i < look; i++) { + h += base[i] * cfg->powers[i]; + } + return h; +} + +INLINE uint32_t +xd3_large32_cksum_update (xd3_hash_cfg *cfg, const uint32_t cksum, + const uint8_t *base, const usize_t look) +{ + return xd3_hash_multiplier32 * cksum - cfg->multiplier * base[0] + base[look]; +} +#endif + +#if SIZEOF_USIZE_T == 8 +INLINE uint64_t +xd3_large64_cksum (xd3_hash_cfg *cfg, const uint8_t *base, const usize_t look) +{ + uint64_t h = 0; + usize_t i; + for (i = 0; i < look; i++) { + h += base[i] * cfg->powers[i]; + } + return h; +} + +INLINE uint64_t +xd3_large64_cksum_update (xd3_hash_cfg *cfg, const uint64_t cksum, + const uint8_t *base, const usize_t look) +{ + return xd3_hash_multiplier64 * cksum - cfg->multiplier * base[0] + base[look]; +} +#endif + +static usize_t +xd3_size_hashtable_bits (usize_t slots) +{ + usize_t bits = (SIZEOF_USIZE_T * 8) - 1; + usize_t i; + + for (i = 3; i <= bits; i += 1) + { + if (slots < (1U << i)) + { + /* Note: this is the compaction=1 setting measured in + * checksum_test */ + bits = i - 1; + break; + } + } + + return bits; +} + +int +xd3_size_hashtable (xd3_stream *stream, + usize_t slots, + usize_t look, + xd3_hash_cfg *cfg) +{ + usize_t bits = xd3_size_hashtable_bits (slots); + int i; + + cfg->size = (1U << bits); + cfg->mask = (cfg->size - 1); + cfg->shift = (SIZEOF_USIZE_T * 8) - bits; + cfg->look = look; + + if ((cfg->powers = + (usize_t*) xd3_alloc0 (stream, look, sizeof (usize_t))) == NULL) + { + return ENOMEM; + } + + cfg->powers[look-1] = 1; + for (i = look-2; i >= 0; i--) + { + cfg->powers[i] = cfg->powers[i+1] * xd3_hash_multiplier; + } + cfg->multiplier = cfg->powers[0] * xd3_hash_multiplier; + + return 0; +} + +#endif /* XD3_ENCODER */ +#endif /* _XDELTA3_HASH_H_ */ diff --git a/deps/xdelta3/xdelta3-internal.h b/deps/xdelta3/xdelta3-internal.h new file mode 100644 index 0000000000..de7f09a54a --- /dev/null +++ b/deps/xdelta3/xdelta3-internal.h @@ -0,0 +1,387 @@ +/* xdelta3 - delta compression tools and library + Copyright 2016 Joshua MacDonald + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ +#ifndef XDELTA3_INTERNAL_H__ +#define XDELTA3_INTERNAL_H__ + +/* To include RetroArch's INLINE macro */ +#include "retro_inline.h" +#include "xdelta3.h" + +typedef struct _main_file main_file; +typedef struct _main_extcomp main_extcomp; + +void main_buffree (void *ptr); +void* main_bufalloc (size_t size); +void main_file_init (main_file *xfile); +int main_file_close (main_file *xfile); +void main_file_cleanup (main_file *xfile); +int main_file_isopen (main_file *xfile); +int main_file_open (main_file *xfile, const char* name, int mode); +int main_file_exists (main_file *xfile); +int main_file_stat (main_file *xfile, xoff_t *size); +int xd3_whole_append_window (xd3_stream *stream); +int xd3_main_cmdline (int argc, char **argv); +int main_file_read (main_file *ifile, + uint8_t *buf, + size_t size, + size_t *nread, + const char *msg); +int main_file_write (main_file *ofile, uint8_t *buf, + usize_t size, const char *msg); +void* main_malloc (size_t size); +void main_free (void *ptr); + +int test_compare_files (const char* f0, const char* f1); +usize_t xd3_bytes_on_srcblk (xd3_source *src, xoff_t blkno); +xoff_t xd3_source_eof(const xd3_source *src); + +uint32_t xd3_large_cksum_update (uint32_t cksum, + const uint8_t *base, + usize_t look); +int xd3_emit_byte (xd3_stream *stream, + xd3_output **outputp, + uint8_t code); + +int xd3_emit_bytes (xd3_stream *stream, + xd3_output **outputp, + const uint8_t *base, + usize_t size); +xd3_output* xd3_alloc_output (xd3_stream *stream, + xd3_output *old_output); + +int xd3_encode_init_full (xd3_stream *stream); +usize_t xd3_pow2_roundup (usize_t x); +long get_millisecs_now (void); +int xd3_process_stream (int is_encode, + xd3_stream *stream, + int (*func) (xd3_stream *), + int close_stream, + const uint8_t *input, + usize_t input_size, + uint8_t *output, + usize_t *output_size, + usize_t output_size_max); + +#if PYTHON_MODULE || SWIG_MODULE || NOT_MAIN +int xd3_main_cmdline (int argc, char **argv); +#endif + +#if REGRESSION_TEST +int xd3_selftest (void); +#endif + +/* main_file->mode values */ +typedef enum +{ + XO_READ = 0, + XO_WRITE = 1 +} main_file_modes; + +#ifndef XD3_POSIX +#define XD3_POSIX 0 +#endif +#ifndef XD3_STDIO +#define XD3_STDIO 0 +#endif +#ifndef XD3_WIN32 +#define XD3_WIN32 0 +#endif +#ifndef NOT_MAIN +#define NOT_MAIN 0 +#endif + +/* If none are set, default to posix. */ +#if (XD3_POSIX + XD3_STDIO + XD3_WIN32) == 0 +#undef XD3_POSIX +#define XD3_POSIX 1 +#endif + +struct _main_file +{ +#if XD3_WIN32 + HANDLE file; +#elif XD3_STDIO + FILE *file; +#elif XD3_POSIX + int file; +#endif + + int mode; /* XO_READ and XO_WRITE */ + const char *filename; /* File name or /dev/stdin, + * /dev/stdout, /dev/stderr. */ + char *filename_copy; /* File name or /dev/stdin, + * /dev/stdout, /dev/stderr. */ + const char *realname; /* File name or /dev/stdin, + * /dev/stdout, /dev/stderr. */ + const main_extcomp *compressor; /* External compression struct. */ + int flags; /* RD_FIRST, RD_NONEXTERNAL, ... */ + xoff_t nread; /* for input position */ + xoff_t nwrite; /* for output position */ + uint8_t *snprintf_buf; /* internal snprintf() use */ + int size_known; /* Set by main_set_souze */ + xoff_t source_position; /* for avoiding seek in getblk_func */ + int seek_failed; /* after seek fails once, try FIFO */ +}; + +#ifndef UINT32_MAX +#define UINT32_MAX 4294967295U +#endif + +#ifndef UINT64_MAX +#define UINT64_MAX 18446744073709551615ULL +#endif + +#define UINT32_OFLOW_MASK 0xfe000000U +#define UINT64_OFLOW_MASK 0xfe00000000000000ULL + +/********************************************************************* + Integer encoder/decoder functions + **********************************************************************/ + +/* Consume N bytes of input, only used by the decoder. */ +#define DECODE_INPUT(n) \ + do { \ + stream->total_in += (xoff_t) (n); \ + stream->avail_in -= (n); \ + stream->next_in += (n); \ + } while (0) + +#define DECODE_INTEGER_TYPE(PART,OFLOW) \ + while (stream->avail_in != 0) \ + { \ + usize_t next = stream->next_in[0]; \ + \ + DECODE_INPUT(1); \ + \ + if (PART & OFLOW) \ + { \ + stream->msg = "overflow in decode_integer"; \ + return XD3_INVALID_INPUT; \ + } \ + \ + PART = (PART << 7) | (next & 127); \ + \ + if ((next & 128) == 0) \ + { \ + (*val) = PART; \ + PART = 0; \ + return 0; \ + } \ + } \ + \ + stream->msg = "further input required"; \ + return XD3_INPUT + +#define READ_INTEGER_TYPE(TYPE, OFLOW) \ + TYPE val = 0; \ + const uint8_t *inp = (*inpp); \ + usize_t next; \ + \ + do \ + { \ + if (inp == maxp) \ + { \ + stream->msg = "end-of-input in read_integer"; \ + return XD3_INVALID_INPUT; \ + } \ + \ + if (val & OFLOW) \ + { \ + stream->msg = "overflow in read_intger"; \ + return XD3_INVALID_INPUT; \ + } \ + \ + next = (*inp++); \ + val = (val << 7) | (next & 127); \ + } \ + while (next & 128); \ + \ + (*valp) = val; \ + (*inpp) = inp; \ + \ + return 0 + +#define EMIT_INTEGER_TYPE() \ + /* max 64-bit value in base-7 encoding is 9.1 bytes */ \ + uint8_t buf[10]; \ + usize_t bufi = 10; \ + \ + /* This loop performs division and turns on all MSBs. */ \ + do \ + { \ + buf[--bufi] = (num & 127) | 128; \ + num >>= 7U; \ + } \ + while (num != 0); \ + \ + /* Turn off MSB of the last byte. */ \ + buf[9] &= 127; \ + \ + return xd3_emit_bytes (stream, output, buf + bufi, 10 - bufi) + +#define IF_SIZEOF32(x) if (num < (1U << (7 * (x)))) return (x); +#define IF_SIZEOF64(x) if (num < (1ULL << (7 * (x)))) return (x); + +#if USE_UINT32 +static INLINE uint32_t +xd3_sizeof_uint32_t (uint32_t num) +{ + IF_SIZEOF32(1); + IF_SIZEOF32(2); + IF_SIZEOF32(3); + IF_SIZEOF32(4); + return 5; +} + +static INLINE int +xd3_decode_uint32_t (xd3_stream *stream, uint32_t *val) +{ DECODE_INTEGER_TYPE (stream->dec_32part, UINT32_OFLOW_MASK); } + +static INLINE int +xd3_read_uint32_t (xd3_stream *stream, const uint8_t **inpp, + const uint8_t *maxp, uint32_t *valp) +{ READ_INTEGER_TYPE (uint32_t, UINT32_OFLOW_MASK); } + +#if XD3_ENCODER +static INLINE int +xd3_emit_uint32_t (xd3_stream *stream, xd3_output **output, uint32_t num) +{ EMIT_INTEGER_TYPE (); } +#endif /* XD3_ENCODER */ +#endif /* USE_UINT32 */ + +#if USE_UINT64 +static INLINE uint32_t +xd3_sizeof_uint64_t (uint64_t num) +{ + IF_SIZEOF64(1); + IF_SIZEOF64(2); + IF_SIZEOF64(3); + IF_SIZEOF64(4); + IF_SIZEOF64(5); + IF_SIZEOF64(6); + IF_SIZEOF64(7); + IF_SIZEOF64(8); + IF_SIZEOF64(9); + + return 10; +} + +static INLINE int +xd3_decode_uint64_t (xd3_stream *stream, uint64_t *val) +{ DECODE_INTEGER_TYPE (stream->dec_64part, UINT64_OFLOW_MASK); } + +static INLINE int +xd3_read_uint64_t (xd3_stream *stream, const uint8_t **inpp, + const uint8_t *maxp, uint64_t *valp) +{ READ_INTEGER_TYPE (uint64_t, UINT64_OFLOW_MASK); } + +#if XD3_ENCODER +static INLINE int +xd3_emit_uint64_t (xd3_stream *stream, xd3_output **output, uint64_t num) +{ EMIT_INTEGER_TYPE (); } +#endif /* XD3_ENCODER */ +#endif /* USE_UINT64 */ + +#if SIZEOF_USIZE_T == 4 +#define USIZE_T_MAX UINT32_MAX +#define USIZE_T_MAXBLKSZ 0x80000000U +#define XD3_MAXSRCWINSZ (1ULL << 31) +#define xd3_large_cksum xd3_large32_cksum +#define xd3_large_cksum_update xd3_large32_cksum_update +#define xd3_hash_multiplier xd3_hash_multiplier32 + +static INLINE uint32_t xd3_sizeof_size (usize_t num) +{ return xd3_sizeof_uint32_t (num); } +static INLINE int xd3_decode_size (xd3_stream *stream, usize_t *valp) +{ return xd3_decode_uint32_t (stream, (uint32_t*) valp); } +static INLINE int xd3_read_size (xd3_stream *stream, const uint8_t **inpp, + const uint8_t *maxp, usize_t *valp) +{ return xd3_read_uint32_t (stream, inpp, maxp, (uint32_t*) valp); } +#if XD3_ENCODER +static INLINE int xd3_emit_size (xd3_stream *stream, xd3_output **output, usize_t num) +{ return xd3_emit_uint32_t (stream, output, num); } +#endif + +#elif SIZEOF_USIZE_T == 8 +#define USIZE_T_MAX UINT64_MAX +#define USIZE_T_MAXBLKSZ 0x8000000000000000ULL +#define XD3_MAXSRCWINSZ (1ULL << 61) +#define xd3_large_cksum xd3_large64_cksum +#define xd3_large_cksum_update xd3_large64_cksum_update +#define xd3_hash_multiplier xd3_hash_multiplier64 + +static INLINE uint32_t xd3_sizeof_size (usize_t num) +{ return xd3_sizeof_uint64_t (num); } +static INLINE int xd3_decode_size (xd3_stream *stream, usize_t *valp) +{ return xd3_decode_uint64_t (stream, (uint64_t*) valp); } +static INLINE int xd3_read_size (xd3_stream *stream, const uint8_t **inpp, + const uint8_t *maxp, usize_t *valp) +{ return xd3_read_uint64_t (stream, inpp, maxp, (uint64_t*) valp); } +#if XD3_ENCODER +static INLINE int xd3_emit_size (xd3_stream *stream, xd3_output **output, usize_t num) +{ return xd3_emit_uint64_t (stream, output, num); } +#endif + +#endif /* SIZEOF_USIZE_T */ + +#if SIZEOF_XOFF_T == 4 +#define XOFF_T_MAX UINT32_MAX + +static INLINE int xd3_decode_offset (xd3_stream *stream, xoff_t *valp) +{ return xd3_decode_uint32_t (stream, (uint32_t*) valp); } +#if XD3_ENCODER +static INLINE int xd3_emit_offset (xd3_stream *stream, xd3_output **output, xoff_t num) +{ return xd3_emit_uint32_t (stream, output, num); } +#endif + +#elif SIZEOF_XOFF_T == 8 +#define XOFF_T_MAX UINT64_MAX + +static INLINE int xd3_decode_offset (xd3_stream *stream, xoff_t *valp) +{ return xd3_decode_uint64_t (stream, (uint64_t*) valp); } +#if XD3_ENCODER +static INLINE int xd3_emit_offset (xd3_stream *stream, xd3_output **output, xoff_t num) +{ return xd3_emit_uint64_t (stream, output, num); } +#endif + +#endif + +#define USIZE_T_OVERFLOW(a,b) ((USIZE_T_MAX - (usize_t) (a)) < (usize_t) (b)) +#define XOFF_T_OVERFLOW(a,b) ((XOFF_T_MAX - (xoff_t) (a)) < (xoff_t) (b)) + +int xd3_size_hashtable (xd3_stream *stream, + usize_t slots, + usize_t look, + xd3_hash_cfg *cfg); + +usize_t xd3_checksum_hash (const xd3_hash_cfg *cfg, const usize_t cksum); + +#if USE_UINT32 +uint32_t xd3_large32_cksum (xd3_hash_cfg *cfg, const uint8_t *base, const usize_t look); +uint32_t xd3_large32_cksum_update (xd3_hash_cfg *cfg, const uint32_t cksum, + const uint8_t *base, const usize_t look); +#endif /* USE_UINT32 */ + +#if USE_UINT64 +uint64_t xd3_large64_cksum (xd3_hash_cfg *cfg, const uint8_t *base, const usize_t look); +uint64_t xd3_large64_cksum_update (xd3_hash_cfg *cfg, const uint64_t cksum, + const uint8_t *base, const usize_t look); +#endif /* USE_UINT64 */ + +#define MAX_LRU_SIZE 32U +#define XD3_MINSRCWINSZ (XD3_ALLOCSIZE * MAX_LRU_SIZE) + +#endif /* XDELTA3_INTERNAL_H__ */ diff --git a/deps/xdelta3/xdelta3-list.h b/deps/xdelta3/xdelta3-list.h new file mode 100644 index 0000000000..be08220440 --- /dev/null +++ b/deps/xdelta3/xdelta3-list.h @@ -0,0 +1,130 @@ +/* xdelta3 - delta compression tools and library + Copyright 2016 Joshua MacDonald + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ +#ifndef __XDELTA3_LIST__ +#define __XDELTA3_LIST__ + +/* To include RetroArch's INLINE macro */ +#include "retro_inline.h" + +#define XD3_MAKELIST(LTYPE,ETYPE,LNAME) \ + \ +static INLINE ETYPE* \ +LTYPE ## _entry (LTYPE* l) \ +{ \ + return (ETYPE*) ((char*) l - (ptrdiff_t) &((ETYPE*) 0)->LNAME); \ +} \ + \ +static INLINE void \ +LTYPE ## _init (LTYPE *l) \ +{ \ + l->next = l; \ + l->prev = l; \ +} \ + \ +static INLINE void \ +LTYPE ## _add (LTYPE *prev, LTYPE *next, LTYPE *ins) \ +{ \ + next->prev = ins; \ + prev->next = ins; \ + ins->next = next; \ + ins->prev = prev; \ +} \ + \ +static INLINE void \ +LTYPE ## _push_back (LTYPE *l, ETYPE *i) \ +{ \ + LTYPE ## _add (l->prev, l, & i->LNAME); \ +} \ + \ +static INLINE void \ +LTYPE ## _del (LTYPE *next, \ + LTYPE *prev) \ +{ \ + next->prev = prev; \ + prev->next = next; \ +} \ + \ +static INLINE ETYPE* \ +LTYPE ## _remove (ETYPE *f) \ +{ \ + LTYPE *i = f->LNAME.next; \ + LTYPE ## _del (f->LNAME.next, f->LNAME.prev); \ + return LTYPE ## _entry (i); \ +} \ + \ +static INLINE ETYPE* \ +LTYPE ## _pop_back (LTYPE *l) \ +{ \ + LTYPE *i = l->prev; \ + LTYPE ## _del (i->next, i->prev); \ + return LTYPE ## _entry (i); \ +} \ + \ +static INLINE ETYPE* \ +LTYPE ## _pop_front (LTYPE *l) \ +{ \ + LTYPE *i = l->next; \ + LTYPE ## _del (i->next, i->prev); \ + return LTYPE ## _entry (i); \ +} \ + \ +static INLINE int \ +LTYPE ## _empty (LTYPE *l) \ +{ \ + return l == l->next; \ +} \ + \ +static INLINE ETYPE* \ +LTYPE ## _front (LTYPE *f) \ +{ \ + return LTYPE ## _entry (f->next); \ +} \ + \ +static INLINE ETYPE* \ +LTYPE ## _back (LTYPE *f) \ +{ \ + return LTYPE ## _entry (f->prev); \ +} \ + \ +static INLINE int \ +LTYPE ## _end (LTYPE *f, ETYPE *i) \ +{ \ + return f == & i->LNAME; \ +} \ + \ +static INLINE ETYPE* \ +LTYPE ## _next (ETYPE *f) \ +{ \ + return LTYPE ## _entry (f->LNAME.next); \ +} \ + \ +static INLINE usize_t \ +LTYPE ## _length (LTYPE *l) \ +{ \ + LTYPE *p; \ + usize_t c = 0; \ + \ + for (p = l->next; p != l; p = p->next) \ + { \ + c += 1; \ + } \ + \ + return c; \ +} \ + \ +typedef int unused_ ## LTYPE + +#endif diff --git a/deps/xdelta3/xdelta3-lzma.h b/deps/xdelta3/xdelta3-lzma.h new file mode 100644 index 0000000000..a707da8cac --- /dev/null +++ b/deps/xdelta3/xdelta3-lzma.h @@ -0,0 +1,195 @@ +/* xdelta3 - delta compression tools and library + Copyright 2016 Joshua MacDonald + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +/* Note: The use of the _easy_ decoder means we're not calling the + * xd3_stream malloc hooks. TODO(jmacd) Fix if anyone cares. */ + +#ifndef _XDELTA3_LZMA_H_ +#define _XDELTA3_LZMA_H_ + +#include + +typedef struct _xd3_lzma_stream xd3_lzma_stream; + +struct _xd3_lzma_stream { + lzma_stream lzma; + lzma_options_lzma options; + lzma_filter filters[2]; +}; + +static xd3_sec_stream* +xd3_lzma_alloc (xd3_stream *stream) +{ + return (xd3_sec_stream*) xd3_alloc (stream, sizeof (xd3_lzma_stream), 1); +} + +static void +xd3_lzma_destroy (xd3_stream *stream, xd3_sec_stream *sec_stream) +{ + xd3_lzma_stream *ls = (xd3_lzma_stream*) sec_stream; + lzma_end (&ls->lzma); + xd3_free (stream, ls); +} + +static int +xd3_lzma_init (xd3_stream *stream, xd3_lzma_stream *sec, int is_encode) +{ + int ret; + + memset (&sec->lzma, 0, sizeof(sec->lzma)); + + if (is_encode) + { + uint32_t preset = + (stream->flags & XD3_COMPLEVEL_MASK) >> XD3_COMPLEVEL_SHIFT; + + if (lzma_lzma_preset(&sec->options, preset)) + { + stream->msg = "invalid lzma preset"; + return XD3_INVALID; + } + + sec->filters[0].id = LZMA_FILTER_LZMA2; + sec->filters[0].options = &sec->options; + sec->filters[1].id = LZMA_VLI_UNKNOWN; + + ret = lzma_stream_encoder (&sec->lzma, &sec->filters[0], LZMA_CHECK_NONE); + } + else + { + ret = lzma_stream_decoder (&sec->lzma, UINT64_MAX, LZMA_TELL_NO_CHECK); + } + + if (ret != LZMA_OK) + { + stream->msg = "lzma stream init failed"; + return XD3_INTERNAL; + } + + return 0; +} + +static int xd3_decode_lzma (xd3_stream *stream, xd3_lzma_stream *sec, + const uint8_t **input_pos, + const uint8_t *const input_end, + uint8_t **output_pos, + const uint8_t *const output_end) +{ + uint8_t *output = *output_pos; + const uint8_t *input = *input_pos; + size_t avail_in = input_end - input; + size_t avail_out = output_end - output; + + sec->lzma.avail_in = avail_in; + sec->lzma.next_in = input; + sec->lzma.avail_out = avail_out; + sec->lzma.next_out = output; + + while (1) + { + int lret = lzma_code (&sec->lzma, LZMA_RUN); + + switch (lret) + { + case LZMA_NO_CHECK: + case LZMA_OK: + if (sec->lzma.avail_out == 0) + { + (*output_pos) = sec->lzma.next_out; + (*input_pos) = sec->lzma.next_in; + return 0; + } + break; + + default: + stream->msg = "lzma decoding error"; + return XD3_INTERNAL; + } + } +} + +#if XD3_ENCODER + +static int xd3_encode_lzma (xd3_stream *stream, + xd3_lzma_stream *sec, + xd3_output *input, + xd3_output *output, + xd3_sec_cfg *cfg) + +{ + lzma_action action = LZMA_RUN; + + cfg->inefficient = 1; /* Can't skip windows */ + sec->lzma.next_in = NULL; + sec->lzma.avail_in = 0; + sec->lzma.next_out = (output->base + output->next); + sec->lzma.avail_out = (output->avail - output->next); + + while (1) + { + int lret; + size_t nwrite; + if (sec->lzma.avail_in == 0 && input != NULL) + { + sec->lzma.avail_in = input->next; + sec->lzma.next_in = input->base; + + if ((input = input->next_page) == NULL) + { + action = LZMA_SYNC_FLUSH; + } + } + + lret = lzma_code (&sec->lzma, action); + + nwrite = (output->avail - output->next) - sec->lzma.avail_out; + + if (nwrite != 0) + { + output->next += nwrite; + + if (output->next == output->avail) + { + if ((output = xd3_alloc_output (stream, output)) == NULL) + { + return ENOMEM; + } + + sec->lzma.next_out = output->base; + sec->lzma.avail_out = output->avail; + } + } + + switch (lret) + { + case LZMA_OK: + break; + + case LZMA_STREAM_END: + return 0; + + default: + stream->msg = "lzma encoding error"; + return XD3_INTERNAL; + } + } + + return 0; +} + +#endif /* XD3_ENCODER */ + +#endif /* _XDELTA3_LZMA_H_ */ diff --git a/deps/xdelta3/xdelta3-second.h b/deps/xdelta3/xdelta3-second.h new file mode 100644 index 0000000000..9d715a9fe8 --- /dev/null +++ b/deps/xdelta3/xdelta3-second.h @@ -0,0 +1,321 @@ +/* xdelta3 - delta compression tools and library + Copyright 2016 Joshua MacDonald + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ +#ifndef _XDELTA3_SECOND_H_ +#define _XDELTA3_SECOND_H_ + +static INLINE void xd3_bit_state_encode_init (bit_state *bits) +{ + bits->cur_byte = 0; + bits->cur_mask = 1; +} + +static INLINE int xd3_decode_bits (xd3_stream *stream, + bit_state *bits, + const uint8_t **input, + const uint8_t *input_max, + usize_t nbits, + usize_t *valuep) +{ + usize_t value = 0; + usize_t vmask = 1 << nbits; + + if (bits->cur_mask == 0x100) { goto next_byte; } + + for (;;) + { + do + { + vmask >>= 1; + + if (bits->cur_byte & bits->cur_mask) + { + value |= vmask; + } + + bits->cur_mask <<= 1; + + if (vmask == 1) { goto done; } + } + while (bits->cur_mask != 0x100); + + next_byte: + + if (*input == input_max) + { + stream->msg = "secondary decoder end of input"; + return XD3_INTERNAL; + } + + bits->cur_byte = *(*input)++; + bits->cur_mask = 1; + } + + done: + + IF_DEBUG2 (DP(RINT "(d) %"W"u ", value)); + + (*valuep) = value; + return 0; +} + +#if REGRESSION_TEST +/* There may be extra bits at the end of secondary decompression, this macro + * checks for non-zero bits. This is overly strict, but helps pass the + * single-bit-error regression test. */ +static int +xd3_test_clean_bits (xd3_stream *stream, bit_state *bits) +{ + for (; bits->cur_mask != 0x100; bits->cur_mask <<= 1) + { + if (bits->cur_byte & bits->cur_mask) + { + stream->msg = "secondary decoder garbage"; + return XD3_INTERNAL; + } + } + + return 0; +} +#endif + +static int +xd3_get_secondary (xd3_stream *stream, xd3_sec_stream **sec_streamp, + int is_encode) +{ + if (*sec_streamp == NULL) + { + int ret; + + if ((*sec_streamp = stream->sec_type->alloc (stream)) == NULL) + { + stream->msg = "error initializing secondary stream"; + return XD3_INVALID; + } + + if ((ret = stream->sec_type->init (stream, *sec_streamp, is_encode)) != 0) + { + return ret; + } + } + + return 0; +} + +static int +xd3_decode_secondary (xd3_stream *stream, + xd3_desect *sect, + xd3_sec_stream **sec_streamp) +{ + usize_t dec_size; + uint8_t *out_used; + int ret; + + if ((ret = xd3_get_secondary (stream, sec_streamp, 0)) != 0) + { + return ret; + } + + /* Decode the size, allocate the buffer. */ + if ((ret = xd3_read_size (stream, & sect->buf, + sect->buf_max, & dec_size)) || + (ret = xd3_decode_allocate (stream, dec_size, + & sect->copied2, & sect->alloc2))) + { + return ret; + } + + if (dec_size == 0) + { + stream->msg = "secondary decoder invalid output size"; + return XD3_INVALID_INPUT; + } + + out_used = sect->copied2; + + if ((ret = stream->sec_type->decode (stream, *sec_streamp, + & sect->buf, sect->buf_max, + & out_used, out_used + dec_size))) + { + return ret; + } + + if (sect->buf != sect->buf_max) + { + stream->msg = "secondary decoder finished with unused input"; + return XD3_INTERNAL; + } + + if (out_used != sect->copied2 + dec_size) + { + stream->msg = "secondary decoder short output"; + return XD3_INTERNAL; + } + + sect->buf = sect->copied2; + sect->buf_max = sect->copied2 + dec_size; + sect->size = dec_size; + + return 0; +} + +#if XD3_ENCODER +static INLINE int xd3_encode_bit (xd3_stream *stream, + xd3_output **output, + bit_state *bits, + usize_t bit) +{ + int ret; + + if (bit) + { + bits->cur_byte |= bits->cur_mask; + } + + /* OPT: Might help to buffer more than 8 bits at once. */ + if (bits->cur_mask == 0x80) + { + if ((ret = xd3_emit_byte (stream, output, bits->cur_byte)) != 0) + { + return ret; + } + + bits->cur_mask = 1; + bits->cur_byte = 0; + } + else + { + bits->cur_mask <<= 1; + } + + return 0; +} + +static INLINE int xd3_flush_bits (xd3_stream *stream, + xd3_output **output, + bit_state *bits) +{ + return (bits->cur_mask == 1) ? 0 : + xd3_emit_byte (stream, output, bits->cur_byte); +} + +static INLINE int xd3_encode_bits (xd3_stream *stream, + xd3_output **output, + bit_state *bits, + usize_t nbits, + usize_t value) +{ + int ret; + usize_t mask = 1 << nbits; + + XD3_ASSERT (nbits > 0); + XD3_ASSERT (nbits < sizeof (usize_t) * 8); + XD3_ASSERT (value < mask); + + do + { + mask >>= 1; + + if ((ret = xd3_encode_bit (stream, output, bits, value & mask))) + { + return ret; + } + } + while (mask != 1); + + IF_DEBUG2 (DP(RINT "(e) %"W"u ", value)); + + return 0; +} + +static int +xd3_encode_secondary (xd3_stream *stream, + xd3_output **head, + xd3_output **tail, + xd3_sec_stream **sec_streamp, + xd3_sec_cfg *cfg, + int *did_it) +{ + xd3_output *tmp_head; + xd3_output *tmp_tail; + + usize_t comp_size; + usize_t orig_size; + + int ret; + + orig_size = xd3_sizeof_output (*head); + + if (orig_size < SECONDARY_MIN_INPUT) { return 0; } + + if ((ret = xd3_get_secondary (stream, sec_streamp, 1)) != 0) + { + return ret; + } + + tmp_head = xd3_alloc_output (stream, NULL); + + /* Encode the size, encode the data. Encoding the size makes it + * simpler, but is a little gross. Should not need the entire + * section in contiguous memory, but it is much easier this way. */ + if ((ret = xd3_emit_size (stream, & tmp_head, orig_size)) || + (ret = stream->sec_type->encode (stream, *sec_streamp, *head, + tmp_head, cfg))) + { + goto getout; + } + + /* If the secondary compressor determines it's no good, it returns + * XD3_NOSECOND. */ + + /* Setup tmp_tail, comp_size */ + tmp_tail = tmp_head; + comp_size = tmp_head->next; + + while (tmp_tail->next_page != NULL) + { + tmp_tail = tmp_tail->next_page; + comp_size += tmp_tail->next; + } + + XD3_ASSERT (comp_size == xd3_sizeof_output (tmp_head)); + XD3_ASSERT (tmp_tail != NULL); + + if (comp_size < (orig_size - SECONDARY_MIN_SAVINGS) || cfg->inefficient) + { + if (comp_size < orig_size) + { + IF_DEBUG1(DP(RINT "[encode_secondary] saved %"W"u bytes: %"W"u -> %"W"u (%0.2f%%)\n", + orig_size - comp_size, orig_size, comp_size, + 100.0 * (double) comp_size / (double) orig_size)); + } + + xd3_free_output (stream, *head); + + *head = tmp_head; + *tail = tmp_tail; + *did_it = 1; + } + else + { + getout: + if (ret == XD3_NOSECOND) { ret = 0; } + xd3_free_output (stream, tmp_head); + } + + return ret; +} +#endif /* XD3_ENCODER */ +#endif /* _XDELTA3_SECOND_H_ */ diff --git a/deps/xdelta3/xdelta3.c b/deps/xdelta3/xdelta3.c new file mode 100644 index 0000000000..a6653a1c3b --- /dev/null +++ b/deps/xdelta3/xdelta3.c @@ -0,0 +1,4813 @@ +/* xdelta3 - delta compression tools and library + Copyright 2016 Joshua MacDonald + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + ------------------------------------------------------------------- + + Xdelta 3 + + The goal of this library is to to implement both the (stand-alone) + data-compression and delta-compression aspects of VCDIFF encoding, and + to support a programming interface that works like Zlib + (http://www.gzip.org/zlib.html). See RFC3284: The VCDIFF Generic + Differencing and Compression Data Format. + + VCDIFF is a unified encoding that combines data-compression and + delta-encoding ("differencing"). + + VCDIFF has a detailed byte-code instruction set with many features. + The instruction format supports an immediate size operand for small + COPYs and ADDs (e.g., under 18 bytes). There are also instruction + "modes", which are used to compress COPY addresses by using two + address caches. An instruction mode refers to slots in the NEAR + and SAME caches for recent addresses. NEAR remembers the + previous 4 (by default) COPY addresses, and SAME catches + frequent re-uses of the same address using a 3-way (by default) + 256-entry associative cache of [ADDR mod 256], the encoded byte. + A hit in the NEAR/SAME cache requires 0/1 ADDR bytes. + + VCDIFF has a default instruction table, but an alternate + instruction tables may themselves be be delta-compressed and + included in the encoding header. This allows even more freedom. + There are 9 instruction modes in the default code table, 4 near, 3 + same, VCD_SELF (absolute encoding) and VCD_HERE (relative to the + current position). + + ---------------------------------------------------------------------- + + Algorithms + + Aside from the details of encoding and decoding, there are a bunch + of algorithms needed. + + 1. STRING-MATCH. A two-level fingerprinting approach is used. A + single loop computes the two checksums -- small and large -- at + successive offsets in the TARGET file. The large checksum is more + accurate and is used to discover SOURCE matches, which are + potentially very long. The small checksum is used to discover + copies within the TARGET. Small matching, which is more expensive, + usually dominates the large STRING-MATCH costs in this code - the + more exhaustive the search, the better the results. Either of the + two string-matching mechanisms may be disabled. + + 2. INSTRUCTION SELECTION. The IOPT buffer here represents a queue + used to store overlapping copy instructions. There are two possible + optimizations that go beyond a greedy search. Both of these fall + into the category of "non-greedy matching" optimizations. + + The first optimization stems from backward SOURCE-COPY matching. + When a new SOURCE-COPY instruction covers a previous instruction in + the target completely, it is erased from the queue. Randal Burns + originally analyzed these algorithms and did a lot of related work + (\cite the 1.5-pass algorithm). + + The second optimization comes by the encoding of common very-small + COPY and ADD instructions, for which there are special DOUBLE-code + instructions, which code two instructions in a single byte. + + The cost of bad instruction-selection overhead is relatively high + for data-compression, relative to delta-compression, so this second + optimization is fairly important. With "lazy" matching (the name + used in Zlib for a similar optimization), the string-match + algorithm searches after a match for potential overlapping copy + instructions. In Xdelta and by default, VCDIFF, the minimum match + size is 4 bytes, whereas Zlib searches with a 3-byte minimum. This + feature, combined with double instructions, provides a nice + challenge. Search in this file for "black magic", a heuristic. + + 3. STREAM ALIGNMENT. Stream alignment is needed to compress large + inputs in constant space. See xd3_srcwin_move_point(). + + 4. WINDOW SELECTION. When the IOPT buffer flushes, in the first call + to xd3_iopt_finish_encoding containing any kind of copy instruction, + the parameters of the source window must be decided: the offset into + the source and the length of the window. Since the IOPT buffer is + finite, the program may be forced to fix these values before knowing + the best offset/length. + + 5. SECONDARY COMPRESSION. VCDIFF supports a secondary encoding to + be applied to the individual sections of the data format, which are + ADDRess, INSTruction, and DATA. Several secondary compressor + variations are implemented here, although none is standardized yet. + + One is an adaptive huffman algorithm -- the FGK algorithm (Faller, + Gallager, and Knuth, 1985). This compressor is extremely slow. + + The other is a simple static Huffman routine, which is the base + case of a semi-adaptive scheme published by D.J. Wheeler and first + widely used in bzip2 (by Julian Seward). This is a very + interesting algorithm, originally published in nearly cryptic form + by D.J. Wheeler. !!!NOTE!!! Because these are not standardized, + secondary compression remains off by default. + ftp://ftp.cl.cam.ac.uk/users/djw3/bred3.{c,ps} + -------------------------------------------------------------------- + + Other Features + + 1. USER CONVENIENCE + + For user convenience, it is essential to recognize Gzip-compressed + files and automatically Gzip-decompress them prior to + delta-compression (or else no delta-compression will be achieved + unless the user manually decompresses the inputs). The compressed + represention competes with Xdelta, and this must be hidden from the + command-line user interface. The Xdelta-1.x encoding was simple, not + compressed itself, so Xdelta-1.x uses Zlib internally to compress the + representation. + + This implementation supports external compression, which implements + the necessary fork() and pipe() mechanics. There is a tricky step + involved to support automatic detection of a compressed input in a + non-seekable input. First you read a bit of the input to detect + magic headers. When a compressed format is recognized, exec() the + external compression program and create a second child process to + copy the original input stream. [Footnote: There is a difficulty + related to using Gzip externally. It is not possible to decompress + and recompress a Gzip file transparently. If FILE.GZ had a + cryptographic signature, then, after: (1) Gzip-decompression, (2) + Xdelta-encoding, (3) Gzip-compression the signature could be + broken. The only way to solve this problem is to guess at Gzip's + compression level or control it by other means. I recommend that + specific implementations of any compression scheme store + information needed to exactly re-compress the input, that way + external compression is transparent - however, this won't happen + here until it has stabilized.] + + 2. APPLICATION-HEADER + + This feature was introduced in RFC3284. It allows any application + to include a header within the VCDIFF file format. This allows + general inter-application data exchange with support for + application-specific extensions to communicate metadata. + + 3. VCDIFF CHECKSUM + + An optional checksum value is included with each window, which can + be used to validate the final result. This verifies the correct source + file was used for decompression as well as the obvious advantage: + checking the implementation (and underlying) correctness. + + 4. LIGHT WEIGHT + + The code makes efforts to avoid copying data more than necessary. + The code delays many initialization tasks until the first use, it + optimizes for identical (perfectly matching) inputs. It does not + compute any checksums until the first lookup misses. Memory usage + is reduced. String-matching is templatized (by slightly gross use + of CPP) to hard-code alternative compile-time defaults. The code + has few outside dependencies. + ---------------------------------------------------------------------- + + The default rfc3284 instruction table: + (see RFC for the explanation) + + TYPE SIZE MODE TYPE SIZE MODE INDEX + -------------------------------------------------------------------- + 1. Run 0 0 Noop 0 0 0 + 2. Add 0, [1,17] 0 Noop 0 0 [1,18] + 3. Copy 0, [4,18] 0 Noop 0 0 [19,34] + 4. Copy 0, [4,18] 1 Noop 0 0 [35,50] + 5. Copy 0, [4,18] 2 Noop 0 0 [51,66] + 6. Copy 0, [4,18] 3 Noop 0 0 [67,82] + 7. Copy 0, [4,18] 4 Noop 0 0 [83,98] + 8. Copy 0, [4,18] 5 Noop 0 0 [99,114] + 9. Copy 0, [4,18] 6 Noop 0 0 [115,130] + 10. Copy 0, [4,18] 7 Noop 0 0 [131,146] + 11. Copy 0, [4,18] 8 Noop 0 0 [147,162] + 12. Add [1,4] 0 Copy [4,6] 0 [163,174] + 13. Add [1,4] 0 Copy [4,6] 1 [175,186] + 14. Add [1,4] 0 Copy [4,6] 2 [187,198] + 15. Add [1,4] 0 Copy [4,6] 3 [199,210] + 16. Add [1,4] 0 Copy [4,6] 4 [211,222] + 17. Add [1,4] 0 Copy [4,6] 5 [223,234] + 18. Add [1,4] 0 Copy 4 6 [235,238] + 19. Add [1,4] 0 Copy 4 7 [239,242] + 20. Add [1,4] 0 Copy 4 8 [243,246] + 21. Copy 4 [0,8] Add 1 0 [247,255] + -------------------------------------------------------------------- + + Reading the source: Overview + + This file includes itself in several passes to macro-expand certain + sections with variable forms. Just read ahead, there's only a + little confusion. I know this sounds ugly, but hard-coding some of + the string-matching parameters results in a 10-15% increase in + string-match performance. The only time this hurts is when you have + unbalanced #if/endifs. + + A single compilation unit tames the Makefile. In short, this is to + allow the above-described hack without an explodingMakefile. The + single compilation unit includes the core library features, + configurable string-match templates, optional main() command-line + tool, misc optional features, and a regression test. Features are + controled with CPP #defines, see Makefile.am. + + The initial __XDELTA3_C_HEADER_PASS__ starts first, the _INLINE_ and + _TEMPLATE_ sections follow. Easy stuff first, hard stuff last. + + Optional features include: + + xdelta3-main.h The command-line interface, external compression + support, POSIX-specific, info & VCDIFF-debug tools. + (Excluded from RetroArch fork.) + xdelta3-second.h The common secondary compression routines. + xdelta3-decoder.h All decoding routines. + xdelta3-djw.h The semi-adaptive huffman secondary encoder. + xdelta3-fgk.h The adaptive huffman secondary encoder. + xdelta3-test.h The unit test covers major algorithms, + encoding and decoding. There are single-bit + error decoding tests. There are 32/64-bit file size + boundary tests. There are command-line tests. + There are compression tests. There are external + compression tests. There are string-matching tests. + (Excluded from RetroArch fork.) + There should be more tests... + + Additional headers include: + + xdelta3.h The public header file. + xdelta3-cfgs.h The default settings for default, built-in + encoders. These are hard-coded at + compile-time. There is also a single + soft-coded string matcher for experimenting + with arbitrary values. + xdelta3-list.h A cyclic list template + + Misc little debug utilities: + + badcopy.c Randomly modifies an input file based on two + parameters: (1) the probability that a byte in + the file is replaced with a pseudo-random value, + and (2) the mean change size. Changes are + generated using an expoential distribution + which approximates the expected error_prob + distribution. + -------------------------------------------------------------------- + + This file itself is unusually large. I hope to defend this layout + with lots of comments. Everything in this file is related to + encoding and decoding. I like it all together - the template stuff + is just a hack. */ + +#ifndef __XDELTA3_C_HEADER_PASS__ +#define __XDELTA3_C_HEADER_PASS__ + +#include "xdelta3.h" +#include "xdelta3-internal.h" + +/*********************************************************************** + STATIC CONFIGURATION + ***********************************************************************/ + +#ifndef XD3_MAIN /* the main application */ +#define XD3_MAIN 0 +#endif + +#ifndef VCDIFF_TOOLS +#define VCDIFF_TOOLS XD3_MAIN +#endif + +#ifndef SECONDARY_FGK /* one from the algorithm preservation department: */ +#define SECONDARY_FGK 0 /* adaptive Huffman routines */ +#endif + +#ifndef SECONDARY_DJW /* semi-adaptive/static Huffman for the eventual */ +#define SECONDARY_DJW 0 /* standardization, off by default until such time. */ +#endif + +#ifndef SECONDARY_LZMA +#ifdef HAVE_LZMA_H +#define SECONDARY_LZMA 1 +#else +#define SECONDARY_LZMA 0 +#endif +#endif + +#if XD3_ENCODER +#define IF_ENCODER(x) x +#else +#define IF_ENCODER(x) +#endif + +/***********************************************************************/ + + /* header indicator bits */ +#define VCD_SECONDARY (1U << 0) /* uses secondary compressor */ +#define VCD_CODETABLE (1U << 1) /* supplies code table data */ +#define VCD_APPHEADER (1U << 2) /* supplies application data */ +#define VCD_INVHDR (~0x7U) + + /* window indicator bits */ +#define VCD_SOURCE (1U << 0) /* copy window in source file */ +#define VCD_TARGET (1U << 1) /* copy window in target file */ +#define VCD_ADLER32 (1U << 2) /* has adler32 checksum */ +#define VCD_INVWIN (~0x7U) + +#define VCD_SRCORTGT (VCD_SOURCE | VCD_TARGET) + + /* delta indicator bits */ +#define VCD_DATACOMP (1U << 0) +#define VCD_INSTCOMP (1U << 1) +#define VCD_ADDRCOMP (1U << 2) +#define VCD_INVDEL (~0x7U) + +typedef enum { + VCD_DJW_ID = 1, + VCD_LZMA_ID = 2, + VCD_FGK_ID = 16 /* Note: these are not standard IANA-allocated IDs! */ +} xd3_secondary_ids; + +typedef enum { + SEC_NOFLAGS = 0, + + /* Note: SEC_COUNT_FREQS Not implemented (to eliminate 1st Huffman pass) */ + SEC_COUNT_FREQS = (1 << 0) +} xd3_secondary_flags; + +typedef enum { + DATA_SECTION, /* These indicate which section to the secondary + * compressor. */ + INST_SECTION, /* The header section is not compressed, therefore not + * listed here. */ + ADDR_SECTION +} xd3_section_type; + +typedef unsigned int xd3_rtype; + +/***********************************************************************/ + +#include "xdelta3-list.h" + +#if XD3_ENCODER +XD3_MAKELIST(xd3_rlist, xd3_rinst, link); +#endif + +/***********************************************************************/ + +#define SECONDARY_MIN_SAVINGS 2 /* Secondary compression has to save + at least this many bytes. */ +#define SECONDARY_MIN_INPUT 10 /* Secondary compression needs at + least this many bytes. */ + +#define VCDIFF_MAGIC1 0xd6 /* 1st file byte */ +#define VCDIFF_MAGIC2 0xc3 /* 2nd file byte */ +#define VCDIFF_MAGIC3 0xc4 /* 3rd file byte */ +#define VCDIFF_VERSION 0x00 /* 4th file byte */ + +#define VCD_SELF 0 /* 1st address mode */ +#define VCD_HERE 1 /* 2nd address mode */ + +#define SECONDARY_ANY (SECONDARY_DJW || SECONDARY_FGK || SECONDARY_LZMA) + +#define ALPHABET_SIZE 256 /* Used in test code--size of the secondary + * compressor alphabet. */ + +#define HASH_CKOFFSET 1U /* Table entries distinguish "no-entry" from + * offset 0 using this offset. */ + +#define MAX_MATCH_SPLIT 18U /* VCDIFF code table: 18 is the default limit + * for direct-coded ADD sizes */ + +#define LEAST_MATCH_INCR 0 /* The least number of bytes an overlapping + * match must beat the preceding match by. This + * is a bias for the lazy match optimization. A + * non-zero value means that an adjacent match + * has to be better by more than the step + * between them. 0. */ + +#define MIN_MATCH 4U /* VCDIFF code table: MIN_MATCH=4 */ +#define MIN_RUN 8U /* The shortest run, if it is shorter than this + * an immediate add/copy will be just as good. + * ADD1/COPY6 = 1I+1D+1A bytes, RUN18 = + * 1I+1D+1A. */ + +#define MAX_MODES 9 /* Maximum number of nodes used for + * compression--does not limit decompression. */ + +#define ENC_SECTS 4 /* Number of separate output sections. */ + +#define HDR_TAIL(s) ((s)->enc_tails[0]) +#define DATA_TAIL(s) ((s)->enc_tails[1]) +#define INST_TAIL(s) ((s)->enc_tails[2]) +#define ADDR_TAIL(s) ((s)->enc_tails[3]) + +#define HDR_HEAD(s) ((s)->enc_heads[0]) +#define DATA_HEAD(s) ((s)->enc_heads[1]) +#define INST_HEAD(s) ((s)->enc_heads[2]) +#define ADDR_HEAD(s) ((s)->enc_heads[3]) + +/* Template instances. */ +#if XD3_BUILD_SLOW +#define IF_BUILD_SLOW(x) x +#else +#define IF_BUILD_SLOW(x) +#endif +#if XD3_BUILD_FAST +#define IF_BUILD_FAST(x) x +#else +#define IF_BUILD_FAST(x) +#endif +#if XD3_BUILD_FASTER +#define IF_BUILD_FASTER(x) x +#else +#define IF_BUILD_FASTER(x) +#endif +#if XD3_BUILD_FASTEST +#define IF_BUILD_FASTEST(x) x +#else +#define IF_BUILD_FASTEST(x) +#endif +#if XD3_BUILD_SOFT +#define IF_BUILD_SOFT(x) x +#else +#define IF_BUILD_SOFT(x) +#endif +#if XD3_BUILD_DEFAULT +#define IF_BUILD_DEFAULT(x) x +#else +#define IF_BUILD_DEFAULT(x) +#endif + +/* Update the run-length state */ +#define NEXTRUN(c) do { if ((c) == run_c) { run_l += 1; } \ + else { run_c = (c); run_l = 1; } } while (0) + +/* This CPP-conditional stuff can be cleaned up... */ +#if REGRESSION_TEST +#define IF_REGRESSION(x) x +#else +#define IF_REGRESSION(x) +#endif + +/***********************************************************************/ + +#if XD3_ENCODER +static void* xd3_alloc0 (xd3_stream *stream, + usize_t elts, + usize_t size); + + +static int xd3_alloc_iopt (xd3_stream *stream, usize_t elts); + +static void xd3_free_output (xd3_stream *stream, + xd3_output *output); + +static int xd3_emit_double (xd3_stream *stream, xd3_rinst *first, + xd3_rinst *second, uint8_t code); +static int xd3_emit_single (xd3_stream *stream, xd3_rinst *single, + uint8_t code); + +static usize_t xd3_sizeof_output (xd3_output *output); +static void xd3_encode_reset (xd3_stream *stream); + +static int xd3_source_match_setup (xd3_stream *stream, xoff_t srcpos); +static int xd3_source_extend_match (xd3_stream *stream); +static int xd3_srcwin_setup (xd3_stream *stream); +static usize_t xd3_iopt_last_matched (xd3_stream *stream); +static int xd3_emit_uint32_t (xd3_stream *stream, xd3_output **output, + uint32_t num); + +static usize_t xd3_smatch (xd3_stream *stream, + usize_t base, + usize_t scksum, + usize_t *match_offset); +static int xd3_string_match_init (xd3_stream *stream); +static uint32_t xd3_scksum (uint32_t *state, const uint8_t *seg, + const usize_t ln); +static usize_t xd3_comprun (const uint8_t *seg, usize_t slook, uint8_t *run_cp); +static int xd3_srcwin_move_point (xd3_stream *stream, + usize_t *next_move_point); + +static int xd3_emit_run (xd3_stream *stream, usize_t pos, + usize_t size, uint8_t *run_c); +static xoff_t xd3_source_cksum_offset(xd3_stream *stream, usize_t low); +static void xd3_scksum_insert (xd3_stream *stream, + usize_t inx, + usize_t scksum, + usize_t pos); + + +#if XD3_DEBUG +static void xd3_verify_run_state (xd3_stream *stream, + const uint8_t *inp, + usize_t x_run_l, + uint8_t *x_run_c); +static void xd3_verify_large_state (xd3_stream *stream, + const uint8_t *inp, + usize_t x_cksum); +static void xd3_verify_small_state (xd3_stream *stream, + const uint8_t *inp, + uint32_t x_cksum); + +#endif /* XD3_DEBUG */ +#endif /* XD3_ENCODER */ + +static int xd3_decode_allocate (xd3_stream *stream, usize_t size, + uint8_t **copied1, usize_t *alloc1); + +static void* xd3_alloc (xd3_stream *stream, usize_t elts, usize_t size); +static void xd3_free (xd3_stream *stream, void *ptr); + +const char* xd3_strerror (int ret) +{ + switch (ret) + { + case XD3_INPUT: return "XD3_INPUT"; + case XD3_OUTPUT: return "XD3_OUTPUT"; + case XD3_GETSRCBLK: return "XD3_GETSRCBLK"; + case XD3_GOTHEADER: return "XD3_GOTHEADER"; + case XD3_WINSTART: return "XD3_WINSTART"; + case XD3_WINFINISH: return "XD3_WINFINISH"; + case XD3_TOOFARBACK: return "XD3_TOOFARBACK"; + case XD3_INTERNAL: return "XD3_INTERNAL"; + case XD3_INVALID: return "XD3_INVALID"; + case XD3_INVALID_INPUT: return "XD3_INVALID_INPUT"; + case XD3_NOSECOND: return "XD3_NOSECOND"; + case XD3_UNIMPLEMENTED: return "XD3_UNIMPLEMENTED"; + } + return NULL; +} + +/***********************************************************************/ + +#define xd3_sec_data(s) ((s)->sec_stream_d) +#define xd3_sec_inst(s) ((s)->sec_stream_i) +#define xd3_sec_addr(s) ((s)->sec_stream_a) + +struct _xd3_sec_type +{ + uint8_t id; + const char *name; + xd3_secondary_flags flags; + + /* xd3_sec_stream is opaque to the generic code */ + xd3_sec_stream* (*alloc) (xd3_stream *stream); + void (*destroy) (xd3_stream *stream, + xd3_sec_stream *sec); + int (*init) (xd3_stream *stream, + xd3_sec_stream *sec_stream, + int is_encode); + int (*decode) (xd3_stream *stream, + xd3_sec_stream *sec_stream, + const uint8_t **input, + const uint8_t *input_end, + uint8_t **output, + const uint8_t *output_end); +#if XD3_ENCODER + int (*encode) (xd3_stream *stream, + xd3_sec_stream *sec_stream, + xd3_output *input, + xd3_output *output, + xd3_sec_cfg *cfg); +#endif +}; + +#define BIT_STATE_ENCODE_INIT { 0, 1 } +#define BIT_STATE_DECODE_INIT { 0, 0x100 } + +typedef struct _bit_state bit_state; +struct _bit_state +{ + uint8_t cur_byte; + usize_t cur_mask; +}; + +#if SECONDARY_ANY == 0 +#define IF_SEC(x) +#define IF_NSEC(x) x +#else /* yuck */ +#define IF_SEC(x) x +#define IF_NSEC(x) +static int +xd3_decode_secondary (xd3_stream *stream, + xd3_desect *sect, + xd3_sec_stream **sec_streamp); +#if XD3_ENCODER +static int +xd3_encode_secondary (xd3_stream *stream, + xd3_output **head, + xd3_output **tail, + xd3_sec_stream **sec_streamp, + xd3_sec_cfg *cfg, + int *did_it); +#endif +#endif /* SECONDARY_ANY */ + +#if SECONDARY_FGK +extern const xd3_sec_type fgk_sec_type; +#define IF_FGK(x) x +#define FGK_CASE(s) \ + s->sec_type = & fgk_sec_type; \ + break; +#else +#define IF_FGK(x) +#define FGK_CASE(s) \ + s->msg = "unavailable secondary compressor: FGK Adaptive Huffman"; \ + return XD3_INTERNAL; +#endif + +#if SECONDARY_DJW +extern const xd3_sec_type djw_sec_type; +#define IF_DJW(x) x +#define DJW_CASE(s) \ + s->sec_type = & djw_sec_type; \ + break; +#else +#define IF_DJW(x) +#define DJW_CASE(s) \ + s->msg = "unavailable secondary compressor: DJW Static Huffman"; \ + return XD3_INTERNAL; +#endif + +#if SECONDARY_LZMA +extern const xd3_sec_type lzma_sec_type; +#define IF_LZMA(x) x +#define LZMA_CASE(s) \ + s->sec_type = & lzma_sec_type; \ + break; +#else +#define IF_LZMA(x) +#define LZMA_CASE(s) \ + s->msg = "unavailable secondary compressor: LZMA"; \ + return XD3_INTERNAL; +#endif + +/***********************************************************************/ + +#include "xdelta3-hash.h" + +/* Process template passes - this includes xdelta3.c several times. */ +#define __XDELTA3_C_TEMPLATE_PASS__ +#include "xdelta3-cfgs.h" +#undef __XDELTA3_C_TEMPLATE_PASS__ + +/* Process the inline pass. */ +#define __XDELTA3_C_INLINE_PASS__ +#include "xdelta3.c" +#undef __XDELTA3_C_INLINE_PASS__ + +/* Secondary compression */ +#if SECONDARY_ANY +#include "xdelta3-second.h" +#endif + +#if SECONDARY_FGK +#include "xdelta3-fgk.h" +const xd3_sec_type fgk_sec_type = +{ + VCD_FGK_ID, + "FGK Adaptive Huffman", + SEC_NOFLAGS, + (xd3_sec_stream* (*)(xd3_stream*)) fgk_alloc, + (void (*)(xd3_stream*, xd3_sec_stream*)) fgk_destroy, + (int (*)(xd3_stream*, xd3_sec_stream*, int)) fgk_init, + (int (*)(xd3_stream*, xd3_sec_stream*, const uint8_t**, const uint8_t*, + uint8_t**, const uint8_t*)) xd3_decode_fgk, + IF_ENCODER((int (*)(xd3_stream*, xd3_sec_stream*, xd3_output*, + xd3_output*, xd3_sec_cfg*)) xd3_encode_fgk) +}; +#endif + +#if SECONDARY_DJW +#include "xdelta3-djw.h" +const xd3_sec_type djw_sec_type = +{ + VCD_DJW_ID, + "Static Huffman", + SEC_COUNT_FREQS, + (xd3_sec_stream* (*)(xd3_stream*)) djw_alloc, + (void (*)(xd3_stream*, xd3_sec_stream*)) djw_destroy, + (int (*)(xd3_stream*, xd3_sec_stream*, int)) djw_init, + (int (*)(xd3_stream*, xd3_sec_stream*, const uint8_t**, const uint8_t*, + uint8_t**, const uint8_t*)) xd3_decode_huff, + IF_ENCODER((int (*)(xd3_stream*, xd3_sec_stream*, xd3_output*, + xd3_output*, xd3_sec_cfg*)) xd3_encode_huff) +}; +#endif + +#if SECONDARY_LZMA +#include "xdelta3-lzma.h" +const xd3_sec_type lzma_sec_type = +{ + VCD_LZMA_ID, + "lzma", + SEC_NOFLAGS, + (xd3_sec_stream* (*)(xd3_stream*)) xd3_lzma_alloc, + (void (*)(xd3_stream*, xd3_sec_stream*)) xd3_lzma_destroy, + (int (*)(xd3_stream*, xd3_sec_stream*, int)) xd3_lzma_init, + (int (*)(xd3_stream*, xd3_sec_stream*, const uint8_t**, const uint8_t*, + uint8_t**, const uint8_t*)) xd3_decode_lzma, + IF_ENCODER((int (*)(xd3_stream*, xd3_sec_stream*, xd3_output*, + xd3_output*, xd3_sec_cfg*)) xd3_encode_lzma) +}; +#endif + +#endif /* __XDELTA3_C_HEADER_PASS__ */ +#ifdef __XDELTA3_C_INLINE_PASS__ + +/**************************************************************** + Instruction tables + *****************************************************************/ + +/* The following code implements a parametrized description of the + * code table given above for a few reasons. It is not necessary for + * implementing the standard, to support compression with variable + * tables, so an implementation is only required to know the default + * code table to begin decompression. (If the encoder uses an + * alternate table, the table is included in compressed form inside + * the VCDIFF file.) + * + * Before adding variable-table support there were two functions which + * were hard-coded to the default table above. + * xd3_compute_default_table() would create the default table by + * filling a 256-elt array of xd3_dinst values. The corresponding + * function, xd3_choose_instruction(), would choose an instruction + * based on the hard-coded parameters of the default code table. + * + * Notes: The parametrized code table description here only generates + * tables of a certain regularity similar to the default table by + * allowing to vary the distribution of single- and + * double-instructions and change the number of near and same copy + * modes. More exotic tables are only possible by extending this + * code. + * + * For performance reasons, both the parametrized and non-parametrized + * versions of xd3_choose_instruction remain. The parametrized + * version is only needed for testing multi-table decoding support. + * If ever multi-table encoding is required, this can be optimized by + * compiling static functions for each table. + */ + +/* The XD3_CHOOSE_INSTRUCTION calls xd3_choose_instruction with the + * table description when GENERIC_ENCODE_TABLES are in use. The + * IF_GENCODETBL macro enables generic-code-table specific code + * (removed 10/2014). */ +#define XD3_CHOOSE_INSTRUCTION(stream,prev,inst) \ + xd3_choose_instruction (prev, inst) + +/* This structure maintains information needed by + * xd3_choose_instruction to compute the code for a double instruction + * by first indexing an array of code_table_sizes by copy mode, then + * using (offset + (muliplier * X)) */ +struct _xd3_code_table_sizes { + uint8_t cpy_max; + uint8_t offset; + uint8_t mult; +}; + +/* This contains a complete description of a code table. */ +struct _xd3_code_table_desc +{ + /* Assumes a single RUN instruction */ + /* Assumes that MIN_MATCH is 4 */ + + uint8_t add_sizes; /* Number of immediate-size single + adds (default 17) */ + uint8_t near_modes; /* Number of near copy modes (default 4) */ + uint8_t same_modes; /* Number of same copy modes (default 3) */ + uint8_t cpy_sizes; /* Number of immediate-size single + copies (default 15) */ + + uint8_t addcopy_add_max; /* Maximum add size for an add-copy + double instruction, all modes + (default 4) */ + uint8_t addcopy_near_cpy_max; /* Maximum cpy size for an add-copy + double instruction, up through + VCD_NEAR modes (default 6) */ + uint8_t addcopy_same_cpy_max; /* Maximum cpy size for an add-copy + double instruction, VCD_SAME modes + (default 4) */ + + uint8_t copyadd_add_max; /* Maximum add size for a copy-add + double instruction, all modes + (default 1) */ + uint8_t copyadd_near_cpy_max; /* Maximum cpy size for a copy-add + double instruction, up through + VCD_NEAR modes (default 4) */ + uint8_t copyadd_same_cpy_max; /* Maximum cpy size for a copy-add + double instruction, VCD_SAME modes + (default 4) */ + + xd3_code_table_sizes addcopy_max_sizes[MAX_MODES]; + xd3_code_table_sizes copyadd_max_sizes[MAX_MODES]; +}; + +/* The rfc3284 code table is represented: */ +static const xd3_code_table_desc __rfc3284_code_table_desc = { + 17, /* add sizes */ + 4, /* near modes */ + 3, /* same modes */ + 15, /* copy sizes */ + + 4, /* add-copy max add */ + 6, /* add-copy max cpy, near */ + 4, /* add-copy max cpy, same */ + + 1, /* copy-add max add */ + 4, /* copy-add max cpy, near */ + 4, /* copy-add max cpy, same */ + + /* addcopy */ + { {6,163,3},{6,175,3},{6,187,3},{6,199,3},{6,211,3},{6,223,3}, + {4,235,1},{4,239,1},{4,243,1} }, + /* copyadd */ + { {4,247,1},{4,248,1},{4,249,1},{4,250,1},{4,251,1},{4,252,1}, + {4,253,1},{4,254,1},{4,255,1} }, +}; + +/* Computes code table entries of TBL using the specified description. */ +static void +xd3_build_code_table (const xd3_code_table_desc *desc, xd3_dinst *tbl) +{ + uint8_t size1, size2; + uint8_t mode; + usize_t cpy_modes = 2U + desc->near_modes + desc->same_modes; + xd3_dinst *d = tbl; + + (d++)->type1 = XD3_RUN; + (d++)->type1 = XD3_ADD; + + for (size1 = 1; size1 <= desc->add_sizes; size1 += 1, d += 1) + { + d->type1 = XD3_ADD; + d->size1 = size1; + } + + for (mode = 0; mode < cpy_modes; mode += 1) + { + (d++)->type1 = XD3_CPY + mode; + + for (size1 = MIN_MATCH; size1 < MIN_MATCH + desc->cpy_sizes; + size1 += 1, d += 1) + { + d->type1 = XD3_CPY + mode; + d->size1 = size1; + } + } + + for (mode = 0; mode < cpy_modes; mode += 1) + { + for (size1 = 1; size1 <= desc->addcopy_add_max; size1 += 1) + { + usize_t max = (mode < 2U + desc->near_modes) ? + desc->addcopy_near_cpy_max : + desc->addcopy_same_cpy_max; + + for (size2 = MIN_MATCH; size2 <= max; size2 += 1, d += 1) + { + d->type1 = XD3_ADD; + d->size1 = size1; + d->type2 = XD3_CPY + mode; + d->size2 = size2; + } + } + } + + for (mode = 0; mode < cpy_modes; mode += 1) + { + usize_t max = (mode < 2U + desc->near_modes) ? + desc->copyadd_near_cpy_max : + desc->copyadd_same_cpy_max; + + for (size1 = MIN_MATCH; size1 <= max; size1 += 1) + { + for (size2 = 1; size2 <= desc->copyadd_add_max; size2 += 1, d += 1) + { + d->type1 = XD3_CPY + mode; + d->size1 = size1; + d->type2 = XD3_ADD; + d->size2 = size2; + } + } + } + + XD3_ASSERT (d - tbl == 256); +} + +/* This function generates the static default code table. */ +static const xd3_dinst* +xd3_rfc3284_code_table (void) +{ + static xd3_dinst __rfc3284_code_table[256]; + + if (__rfc3284_code_table[0].type1 != XD3_RUN) + { + xd3_build_code_table (& __rfc3284_code_table_desc, __rfc3284_code_table); + } + + return __rfc3284_code_table; +} + +#if XD3_ENCODER +/* This version of xd3_choose_instruction is hard-coded for the default + table. */ +static void +xd3_choose_instruction (xd3_rinst *prev, xd3_rinst *inst) +{ + switch (inst->type) + { + case XD3_RUN: + inst->code1 = 0; + break; + + case XD3_ADD: + inst->code1 = 1; + + if (inst->size <= 17) + { + inst->code1 += inst->size; + + if ( (inst->size == 1) && + (prev != NULL) && + (prev->size == 4) && + (prev->type >= XD3_CPY) ) + { + prev->code2 = 247 + (prev->type - XD3_CPY); + } + } + + break; + + default: + { + uint8_t mode = inst->type - XD3_CPY; + + XD3_ASSERT (inst->type >= XD3_CPY && inst->type < 12); + + inst->code1 = 19 + 16 * mode; + + if (inst->size <= 18 && inst->size >= 4) + { + inst->code1 += inst->size - 3; + + if ( (prev != NULL) && + (prev->type == XD3_ADD) && + (prev->size <= 4) ) + { + if ( (inst->size <= 6) && + (mode <= 5) ) + { + prev->code2 = (uint8_t)(163 + (mode * 12) + + (3 * (prev->size - 1)) + + (inst->size - 4)); + XD3_ASSERT (prev->code2 <= 234); + } + else if ( (inst->size == 4) && + (mode >= 6) ) + { + prev->code2 = 235 + ((mode - 6) * 4) + (prev->size - 1); + + XD3_ASSERT (prev->code2 <= 246); + } + } + } + + XD3_ASSERT (inst->code1 <= 162); + } + break; + } +} +#endif /* XD3_ENCODER */ + +/***********************************************************************/ + +static INLINE void +xd3_swap_uint8p (uint8_t** p1, uint8_t** p2) +{ + uint8_t *t = (*p1); + (*p1) = (*p2); + (*p2) = t; +} + +static INLINE void +xd3_swap_usize_t (usize_t* p1, usize_t* p2) +{ + usize_t t = (*p1); + (*p1) = (*p2); + (*p2) = t; +} + +/* It's not constant time, but it computes the log. */ +static int +xd3_check_pow2 (xoff_t value, usize_t *logof) +{ + xoff_t x = 1; + usize_t nolog; + if (logof == NULL) { + logof = &nolog; + } + + *logof = 0; + + for (; x != 0; x <<= 1, *logof += 1) + { + if (x == value) + { + return 0; + } + } + + return XD3_INTERNAL; +} + +usize_t +xd3_pow2_roundup (usize_t x) +{ + usize_t i = 1; + while (x > i) { + i <<= 1U; + } + return i; +} + +static xoff_t +xd3_xoff_roundup (xoff_t x) +{ + xoff_t i = 1; + while (x > i) { + i <<= 1U; + } + return i; +} + +static usize_t +xd3_round_blksize (usize_t sz, usize_t blksz) +{ + usize_t mod = sz & (blksz-1); + + XD3_ASSERT (xd3_check_pow2 (blksz, NULL) == 0); + + if (mod == 0) + { + return sz; + } + + if (sz > USIZE_T_MAXBLKSZ) + { + return USIZE_T_MAXBLKSZ; + } + + return sz + (blksz - mod); +} + +/*********************************************************************** + Adler32 stream function: code copied from Zlib, defined in RFC1950 + ***********************************************************************/ + +#define A32_BASE 65521L /* Largest prime smaller than 2^16 */ +#define A32_NMAX 5552 /* NMAX is the largest n such that 255n(n+1)/2 + + (n+1)(BASE-1) <= 2^32-1 */ + +#define A32_DO1(buf,i) {s1 += buf[i]; s2 += s1;} +#define A32_DO2(buf,i) A32_DO1(buf,i); A32_DO1(buf,i+1); +#define A32_DO4(buf,i) A32_DO2(buf,i); A32_DO2(buf,i+2); +#define A32_DO8(buf,i) A32_DO4(buf,i); A32_DO4(buf,i+4); +#define A32_DO16(buf) A32_DO8(buf,0); A32_DO8(buf,8); + +static uint32_t adler32 (uint32_t adler, const uint8_t *buf, usize_t len) +{ + uint32_t s1 = adler & 0xffffU; + uint32_t s2 = (adler >> 16) & 0xffffU; + int k; + + while (len > 0) + { + k = (len < A32_NMAX) ? len : A32_NMAX; + len -= k; + + while (k >= 16) + { + A32_DO16(buf); + buf += 16; + k -= 16; + } + + if (k != 0) + { + do + { + s1 += *buf++; + s2 += s1; + } + while (--k); + } + + s1 %= A32_BASE; + s2 %= A32_BASE; + } + + return (s2 << 16) | s1; +} + +/*********************************************************************** + Run-length function + ***********************************************************************/ + +#if XD3_ENCODER +static usize_t +xd3_comprun (const uint8_t *seg, usize_t slook, uint8_t *run_cp) +{ + usize_t i; + usize_t run_l = 0; + uint8_t run_c = 0; + + for (i = 0; i < slook; i += 1) + { + NEXTRUN(seg[i]); + } + + (*run_cp) = run_c; + + return run_l; +} +#endif + +/*********************************************************************** + Basic encoder/decoder functions + ***********************************************************************/ + +#if XD3_ENCODER +INLINE int +xd3_emit_byte (xd3_stream *stream, + xd3_output **outputp, + uint8_t code) +{ + xd3_output *output = (*outputp); + + if (output->next == output->avail) + { + xd3_output *aoutput; + + if ((aoutput = xd3_alloc_output (stream, output)) == NULL) + { + return ENOMEM; + } + + output = (*outputp) = aoutput; + } + + output->base[output->next++] = code; + + return 0; +} + +INLINE int +xd3_emit_bytes (xd3_stream *stream, + xd3_output **outputp, + const uint8_t *base, + usize_t size) +{ + xd3_output *output = (*outputp); + + do + { + usize_t take; + + if (output->next == output->avail) + { + xd3_output *aoutput; + + if ((aoutput = xd3_alloc_output (stream, output)) == NULL) + { + return ENOMEM; + } + + output = (*outputp) = aoutput; + } + + take = xd3_min (output->avail - output->next, size); + + memcpy (output->base + output->next, base, (size_t) take); + + output->next += take; + size -= take; + base += take; + } + while (size > 0); + + return 0; +} +#endif /* XD3_ENCODER */ + +/*********************************************************************** + Address cache stuff + ***********************************************************************/ + +static int +xd3_alloc_cache (xd3_stream *stream) +{ + if (stream->acache.near_array != NULL) + { + xd3_free (stream, stream->acache.near_array); + } + + if (stream->acache.same_array != NULL) + { + xd3_free (stream, stream->acache.same_array); + } + + if (((stream->acache.s_near > 0) && + (stream->acache.near_array = (usize_t*) + xd3_alloc (stream, stream->acache.s_near, + (usize_t) sizeof (usize_t))) + == NULL) || + ((stream->acache.s_same > 0) && + (stream->acache.same_array = (usize_t*) + xd3_alloc (stream, stream->acache.s_same * 256, + (usize_t) sizeof (usize_t))) + == NULL)) + { + return ENOMEM; + } + + return 0; +} + +void +xd3_init_cache (xd3_addr_cache* acache) +{ + if (acache->s_near > 0) + { + memset (acache->near_array, 0, acache->s_near * sizeof (usize_t)); + acache->next_slot = 0; + } + + if (acache->s_same > 0) + { + memset (acache->same_array, 0, acache->s_same * 256 * sizeof (usize_t)); + } +} + +static void +xd3_update_cache (xd3_addr_cache* acache, usize_t addr) +{ + if (acache->s_near > 0) + { + acache->near_array[acache->next_slot] = addr; + acache->next_slot = (acache->next_slot + 1) % acache->s_near; + } + + if (acache->s_same > 0) + { + acache->same_array[addr % (acache->s_same*256)] = addr; + } +} + +#if XD3_ENCODER +/* OPT: this gets called a lot, can it be optimized? */ +static int +xd3_encode_address (xd3_stream *stream, + usize_t addr, + usize_t here, + uint8_t* mode) +{ + usize_t d, bestd; + usize_t i, bestm; + int ret; + xd3_addr_cache* acache = & stream->acache; + +#define SMALLEST_INT(x) do { if (((x) & ~127U) == 0) { goto good; } } while (0) + + /* Attempt to find the address mode that yields the smallest integer value + * for "d", the encoded address value, thereby minimizing the encoded size + * of the address. */ + bestd = addr; + bestm = VCD_SELF; + + XD3_ASSERT (addr < here); + + SMALLEST_INT (bestd); + + if ((d = here-addr) < bestd) + { + bestd = d; + bestm = VCD_HERE; + + SMALLEST_INT (bestd); + } + + for (i = 0; i < acache->s_near; i += 1) + { + /* Note: If we used signed computation here, we'd could compte d + * and then check (d >= 0 && d < bestd). */ + if (addr >= acache->near_array[i]) + { + d = addr - acache->near_array[i]; + + if (d < bestd) + { + bestd = d; + bestm = i+2; /* 2 counts the VCD_SELF, VCD_HERE modes */ + + SMALLEST_INT (bestd); + } + } + } + + if (acache->s_same > 0 && + acache->same_array[d = addr%(acache->s_same*256)] == addr) + { + bestd = d%256; + /* 2 + s_near offsets past the VCD_NEAR modes */ + bestm = acache->s_near + 2 + d/256; + + if ((ret = xd3_emit_byte (stream, & ADDR_TAIL (stream), bestd))) + { + return ret; + } + } + else + { + good: + + if ((ret = xd3_emit_size (stream, & ADDR_TAIL (stream), bestd))) + { + return ret; + } + } + + xd3_update_cache (acache, addr); + + (*mode) += bestm; + + return 0; +} +#endif + +static int +xd3_decode_address (xd3_stream *stream, usize_t here, + usize_t mode, const uint8_t **inpp, + const uint8_t *max, usize_t *valp) +{ + int ret; + usize_t same_start = 2 + stream->acache.s_near; + + if (mode < same_start) + { + if ((ret = xd3_read_size (stream, inpp, max, valp))) { return ret; } + + switch (mode) + { + case VCD_SELF: + break; + case VCD_HERE: + (*valp) = here - (*valp); + break; + default: + (*valp) += stream->acache.near_array[mode - 2]; + break; + } + } + else + { + if (*inpp == max) + { + stream->msg = "address underflow"; + return XD3_INVALID_INPUT; + } + + mode -= same_start; + + (*valp) = stream->acache.same_array[mode*256 + (**inpp)]; + + (*inpp) += 1; + } + + xd3_update_cache (& stream->acache, *valp); + + return 0; +} + +/*********************************************************************** + Alloc/free +***********************************************************************/ + +static void* +__xd3_alloc_func (void* opaque, size_t items, usize_t size) +{ + return malloc (items * (size_t) size); +} + +static void +__xd3_free_func (void* opaque, void* address) +{ + free (address); +} + +static void* +xd3_alloc (xd3_stream *stream, + usize_t elts, + usize_t size) +{ + void *a = stream->alloc (stream->opaque, elts, size); + + if (a != NULL) + { + IF_DEBUG (stream->alloc_cnt += 1); + IF_DEBUG2 (DP(RINT "[stream %p malloc] size %"W"u ptr %p\n", + (void*)stream, elts * size, a)); + } + else + { + stream->msg = "out of memory"; + } + + return a; +} + +static void +xd3_free (xd3_stream *stream, + void *ptr) +{ + if (ptr != NULL) + { + IF_DEBUG (stream->free_cnt += 1); + XD3_ASSERT (stream->free_cnt <= stream->alloc_cnt); + IF_DEBUG2 (DP(RINT "[stream %p free] %p\n", + (void*)stream, ptr)); + stream->free (stream->opaque, ptr); + } +} + +#if XD3_ENCODER +static void* +xd3_alloc0 (xd3_stream *stream, + usize_t elts, + usize_t size) +{ + void *a = xd3_alloc (stream, elts, size); + + if (a != NULL) + { + memset (a, 0, (size_t) (elts * size)); + } + + return a; +} + +xd3_output* +xd3_alloc_output (xd3_stream *stream, + xd3_output *old_output) +{ + xd3_output *output; + uint8_t *base; + + if (stream->enc_free != NULL) + { + output = stream->enc_free; + stream->enc_free = output->next_page; + } + else + { + if ((output = (xd3_output*) xd3_alloc (stream, 1, + (usize_t) sizeof (xd3_output))) + == NULL) + { + return NULL; + } + + if ((base = (uint8_t*) xd3_alloc (stream, XD3_ALLOCSIZE, + sizeof (uint8_t))) == NULL) + { + xd3_free (stream, output); + return NULL; + } + + output->base = base; + output->avail = XD3_ALLOCSIZE; + } + + output->next = 0; + + if (old_output) + { + old_output->next_page = output; + } + + output->next_page = NULL; + + return output; +} + +static usize_t +xd3_sizeof_output (xd3_output *output) +{ + usize_t s = 0; + + for (; output; output = output->next_page) + { + s += output->next; + } + + return s; +} + +static void +xd3_freelist_output (xd3_stream *stream, + xd3_output *output) +{ + xd3_output *tmp; + + while (output) + { + tmp = output; + output = output->next_page; + + tmp->next = 0; + tmp->next_page = stream->enc_free; + stream->enc_free = tmp; + } +} + +static void +xd3_free_output (xd3_stream *stream, + xd3_output *output) +{ + xd3_output *next; + + again: + if (output == NULL) + { + return; + } + + next = output->next_page; + + xd3_free (stream, output->base); + xd3_free (stream, output); + + output = next; + goto again; +} +#endif /* XD3_ENCODER */ + +void +xd3_free_stream (xd3_stream *stream) +{ + xd3_iopt_buflist *blist = stream->iopt_alloc; + + while (blist != NULL) + { + xd3_iopt_buflist *tmp = blist; + blist = blist->next; + xd3_free (stream, tmp->buffer); + xd3_free (stream, tmp); + } + +#if XD3_ENCODER + xd3_free (stream, stream->large_table); + xd3_free (stream, stream->small_table); + xd3_free (stream, stream->large_hash.powers); + xd3_free (stream, stream->small_hash.powers); + xd3_free (stream, stream->small_prev); + + { + int i; + for (i = 0; i < ENC_SECTS; i += 1) + { + xd3_free_output (stream, stream->enc_heads[i]); + } + xd3_free_output (stream, stream->enc_free); + } +#endif + + xd3_free (stream, stream->acache.near_array); + xd3_free (stream, stream->acache.same_array); + + xd3_free (stream, stream->inst_sect.copied1); + xd3_free (stream, stream->addr_sect.copied1); + xd3_free (stream, stream->data_sect.copied1); + + if (stream->dec_lastwin != stream->dec_buffer) + { + xd3_free (stream, (uint8_t*) stream->dec_lastwin); + } + xd3_free (stream, stream->dec_buffer); + + xd3_free (stream, stream->buf_in); + xd3_free (stream, stream->dec_appheader); + xd3_free (stream, stream->dec_codetbl); + xd3_free (stream, stream->code_table_alloc); + +#if SECONDARY_ANY + xd3_free (stream, stream->inst_sect.copied2); + xd3_free (stream, stream->addr_sect.copied2); + xd3_free (stream, stream->data_sect.copied2); + + if (stream->sec_type != NULL) + { + stream->sec_type->destroy (stream, stream->sec_stream_d); + stream->sec_type->destroy (stream, stream->sec_stream_i); + stream->sec_type->destroy (stream, stream->sec_stream_a); + } +#endif + + xd3_free (stream, stream->whole_target.adds); + xd3_free (stream, stream->whole_target.inst); + xd3_free (stream, stream->whole_target.wininfo); + + XD3_ASSERT (stream->alloc_cnt == stream->free_cnt); + + memset (stream, 0, sizeof (xd3_stream)); +} + +#if (XD3_DEBUG > 1 || VCDIFF_TOOLS) +static const char* +xd3_rtype_to_string (xd3_rtype type, int print_mode) +{ + switch (type) + { + case XD3_NOOP: + return "NOOP "; + case XD3_RUN: + return "RUN "; + case XD3_ADD: + return "ADD "; + default: break; + } + if (! print_mode) + { + return "CPY "; + } + switch (type) + { + case XD3_CPY + 0: return "CPY_0"; + case XD3_CPY + 1: return "CPY_1"; + case XD3_CPY + 2: return "CPY_2"; + case XD3_CPY + 3: return "CPY_3"; + case XD3_CPY + 4: return "CPY_4"; + case XD3_CPY + 5: return "CPY_5"; + case XD3_CPY + 6: return "CPY_6"; + case XD3_CPY + 7: return "CPY_7"; + case XD3_CPY + 8: return "CPY_8"; + case XD3_CPY + 9: return "CPY_9"; + default: return "CPY>9"; + } +} +#endif + +/**************************************************************** + Stream configuration + ******************************************************************/ + +int +xd3_config_stream(xd3_stream *stream, + xd3_config *config) +{ + int ret; + xd3_config defcfg; + xd3_smatcher *smatcher = &stream->smatcher; + + if (config == NULL) + { + config = & defcfg; + memset (config, 0, sizeof (*config)); + } + + /* Initial setup: no error checks yet */ + memset (stream, 0, sizeof (*stream)); + + stream->winsize = config->winsize ? config->winsize : XD3_DEFAULT_WINSIZE; + stream->sprevsz = config->sprevsz ? config->sprevsz : XD3_DEFAULT_SPREVSZ; + + if (config->iopt_size == 0) + { + stream->iopt_size = XD3_ALLOCSIZE / sizeof(xd3_rinst); + stream->iopt_unlimited = 1; + } + else + { + stream->iopt_size = config->iopt_size; + } + + stream->getblk = config->getblk; + stream->alloc = config->alloc ? config->alloc : __xd3_alloc_func; + stream->free = config->freef ? config->freef : __xd3_free_func; + stream->opaque = config->opaque; + stream->flags = config->flags; + + /* Secondary setup. */ + stream->sec_data = config->sec_data; + stream->sec_inst = config->sec_inst; + stream->sec_addr = config->sec_addr; + + stream->sec_data.data_type = DATA_SECTION; + stream->sec_inst.data_type = INST_SECTION; + stream->sec_addr.data_type = ADDR_SECTION; + + /* Check static sizes. */ + if (sizeof (usize_t) != SIZEOF_USIZE_T || + sizeof (xoff_t) != SIZEOF_XOFF_T || + (ret = xd3_check_pow2(XD3_ALLOCSIZE, NULL))) + { + stream->msg = "incorrect compilation: wrong integer sizes"; + return XD3_INTERNAL; + } + + /* Check/set secondary compressor. */ + switch (stream->flags & XD3_SEC_TYPE) + { + case 0: + if (stream->flags & XD3_SEC_NOALL) + { + stream->msg = "XD3_SEC flags require a secondary compressor type"; + return XD3_INTERNAL; + } + break; + case XD3_SEC_FGK: + FGK_CASE (stream); + case XD3_SEC_DJW: + DJW_CASE (stream); + case XD3_SEC_LZMA: + LZMA_CASE (stream); + default: + stream->msg = "too many secondary compressor types set"; + return XD3_INTERNAL; + } + + stream->code_table_desc = & __rfc3284_code_table_desc; + stream->code_table_func = xd3_rfc3284_code_table; + + /* Check sprevsz */ + if (smatcher->small_chain == 1 && + smatcher->small_lchain == 1) + { + stream->sprevsz = 0; + } + else + { + if ((ret = xd3_check_pow2 (stream->sprevsz, NULL))) + { + stream->msg = "sprevsz is required to be a power of two"; + return XD3_INTERNAL; + } + + stream->sprevmask = stream->sprevsz - 1; + } + + /* Default scanner settings. */ +#if XD3_ENCODER + switch (config->smatch_cfg) + { + IF_BUILD_SOFT(case XD3_SMATCH_SOFT: + { + *smatcher = config->smatcher_soft; + smatcher->string_match = __smatcher_soft.string_match; + smatcher->name = __smatcher_soft.name; + if (smatcher->large_look < MIN_MATCH || + smatcher->large_step < 1 || + smatcher->small_look < MIN_MATCH) + { + stream->msg = "invalid soft string-match config"; + return XD3_INVALID; + } + break; + }) + + IF_BUILD_DEFAULT(case XD3_SMATCH_DEFAULT: + *smatcher = __smatcher_default; + break;) + IF_BUILD_SLOW(case XD3_SMATCH_SLOW: + *smatcher = __smatcher_slow; + break;) + IF_BUILD_FASTEST(case XD3_SMATCH_FASTEST: + *smatcher = __smatcher_fastest; + break;) + IF_BUILD_FASTER(case XD3_SMATCH_FASTER: + *smatcher = __smatcher_faster; + break;) + IF_BUILD_FAST(case XD3_SMATCH_FAST: + *smatcher = __smatcher_fast; + break;) + default: + stream->msg = "invalid string match config type"; + return XD3_INTERNAL; + } + + if (config->smatch_cfg == XD3_SMATCH_DEFAULT && + (stream->flags & XD3_COMPLEVEL_MASK) != 0) + { + int level = (stream->flags & XD3_COMPLEVEL_MASK) >> XD3_COMPLEVEL_SHIFT; + + switch (level) + { + case 1: + IF_BUILD_FASTEST(*smatcher = __smatcher_fastest; + break;) + case 2: + IF_BUILD_FASTER(*smatcher = __smatcher_faster; + break;) + case 3: case 4: case 5: + IF_BUILD_FAST(*smatcher = __smatcher_fast; + break;) + case 6: + IF_BUILD_DEFAULT(*smatcher = __smatcher_default; + break;) + default: + IF_BUILD_SLOW(*smatcher = __smatcher_slow; + break;) + IF_BUILD_DEFAULT(*smatcher = __smatcher_default; + break;) + IF_BUILD_FAST(*smatcher = __smatcher_fast; + break;) + IF_BUILD_FASTER(*smatcher = __smatcher_faster; + break;) + IF_BUILD_FASTEST(*smatcher = __smatcher_fastest; + break;) + } + } +#endif + + return 0; +} + +/*********************************************************** + Getblk interface + ***********************************************************/ + +INLINE +xoff_t xd3_source_eof(const xd3_source *src) +{ + xoff_t r = (src->max_blkno << src->shiftby) + (xoff_t)src->onlastblk; + return r; +} + +INLINE +usize_t xd3_bytes_on_srcblk (xd3_source *src, xoff_t blkno) +{ + usize_t r = (blkno == src->max_blkno ? + src->onlastblk : + src->blksize); + return r; +} + +/* This function interfaces with the client getblk function, checks + * its results, updates max_blkno, onlastblk, eof_known. */ +static int +xd3_getblk (xd3_stream *stream, xoff_t blkno) +{ + int ret; + xd3_source *source = stream->src; + + if (source->curblk == NULL || blkno != source->curblkno) + { + source->getblkno = blkno; + + if (stream->getblk == NULL) + { + IF_DEBUG2 (DP(RINT "[getblk] XD3_GETSRCBLK %"Q"u\n", blkno)); + stream->msg = "getblk source input"; + return XD3_GETSRCBLK; + } + + ret = stream->getblk (stream, source, blkno); + if (ret != 0) + { + IF_DEBUG2 (DP(RINT "[getblk] app error blkno %"Q"u: %s\n", + blkno, xd3_strerror (ret))); + return ret; + } + + IF_DEBUG2 (DP(RINT "[getblk] read source block %"Q"u onblk " + "%"W"u blksize %"W"u max_blkno %"Q"u\n", blkno, source->onblk, + source->blksize, source->max_blkno)); + } + + if (blkno > source->max_blkno) + { + source->max_blkno = blkno; + + if (source->onblk == source->blksize) + { + IF_DEBUG1 (DP(RINT "[getblk] full source blkno %"Q"u: " + "source length unknown %"Q"u\n", + blkno, + xd3_source_eof (source))); + } + else if (!source->eof_known) + { + IF_DEBUG1 (DP(RINT "[getblk] eof block has %"W"u bytes; " + "source length known %"Q"u\n", + xd3_bytes_on_srcblk (source, blkno), + xd3_source_eof (source))); + source->eof_known = 1; + } + } + + XD3_ASSERT (source->curblk != NULL); + + if (blkno == source->max_blkno) + { + /* In case the application sets the source as 1 block w/ a + * preset buffer. */ + source->onlastblk = source->onblk; + } + return 0; +} + +/*********************************************************** + Stream open/close + ***************************************************************/ + +int +xd3_set_source (xd3_stream *stream, + xd3_source *src) +{ + usize_t shiftby; + + stream->src = src; + src->srclen = 0; + src->srcbase = 0; + + /* Enforce power-of-two blocksize so that source-block number + * calculations are cheap. */ + if (xd3_check_pow2 (src->blksize, &shiftby) != 0) + { + src->blksize = xd3_pow2_roundup(src->blksize); + xd3_check_pow2 (src->blksize, &shiftby); + IF_DEBUG1 (DP(RINT "raising src_blksz to %"W"u\n", src->blksize)); + } + + src->shiftby = shiftby; + src->maskby = (1ULL << shiftby) - 1ULL; + + if (xd3_check_pow2 (src->max_winsize, NULL) != 0) + { + src->max_winsize = xd3_xoff_roundup(src->max_winsize); + IF_DEBUG1 (DP(RINT "raising src_maxsize to %"W"u\n", src->blksize)); + } + src->max_winsize = xd3_max (src->max_winsize, XD3_ALLOCSIZE); + return 0; +} + +int +xd3_set_source_and_size (xd3_stream *stream, + xd3_source *user_source, + xoff_t source_size) { + int ret = xd3_set_source (stream, user_source); + if (ret == 0) + { + stream->src->eof_known = 1; + IF_DEBUG2 (DP(RINT "[set source] size known %"Q"u\n", + source_size)); + xd3_blksize_div(source_size, + stream->src, + &stream->src->max_blkno, + &stream->src->onlastblk); + + IF_DEBUG1 (DP(RINT "[set source] size known %"Q"u max_blkno %"Q"u\n", + source_size, stream->src->max_blkno)); + } + return ret; +} + +void +xd3_abort_stream (xd3_stream *stream) +{ + stream->dec_state = DEC_ABORTED; + stream->enc_state = ENC_ABORTED; +} + +int +xd3_close_stream (xd3_stream *stream) +{ + if (stream->enc_state != 0 && stream->enc_state != ENC_ABORTED) + { + if (stream->buf_leftover != NULL) + { + stream->msg = "encoding is incomplete"; + return XD3_INTERNAL; + } + + if (stream->enc_state == ENC_POSTWIN) + { +#if XD3_ENCODER + xd3_encode_reset (stream); +#endif + stream->current_window += 1; + stream->enc_state = ENC_INPUT; + } + + /* If encoding, should be ready for more input but not actually + have any. */ + if (stream->enc_state != ENC_INPUT || stream->avail_in != 0) + { + stream->msg = "encoding is incomplete"; + return XD3_INTERNAL; + } + } + else + { + switch (stream->dec_state) + { + case DEC_VCHEAD: + case DEC_WININD: + /* TODO: Address the zero-byte ambiguity. Does the encoder + * emit a window or not? If so, then catch an error here. + * If not, need another routine to say + * decode_at_least_one_if_empty. */ + case DEC_ABORTED: + break; + default: + /* If decoding, should be ready for the next window. */ + stream->msg = "eof in decode"; + return XD3_INVALID_INPUT; + } + } + + return 0; +} + +/************************************************************** + Application header + ****************************************************************/ + +int +xd3_get_appheader (xd3_stream *stream, + uint8_t **data, + usize_t *size) +{ + if (stream->dec_state < DEC_WININD) + { + stream->msg = "application header not available"; + return XD3_INTERNAL; + } + + (*data) = stream->dec_appheader; + (*size) = stream->dec_appheadsz; + return 0; +} + +/********************************************************** + Decoder stuff + *************************************************/ + +#include "xdelta3-decode.h" + +/**************************************************************** + Encoder stuff + *****************************************************************/ + +#if XD3_ENCODER +void +xd3_set_appheader (xd3_stream *stream, + const uint8_t *data, + usize_t size) +{ + stream->enc_appheader = data; + stream->enc_appheadsz = size; +} + +#if XD3_DEBUG +static int +xd3_iopt_check (xd3_stream *stream) +{ + usize_t ul = xd3_rlist_length (& stream->iopt_used); + usize_t fl = xd3_rlist_length (& stream->iopt_free); + + return (ul + fl + (stream->iout ? 1 : 0)) == stream->iopt_size; +} +#endif + +static xd3_rinst* +xd3_iopt_free (xd3_stream *stream, xd3_rinst *i) +{ + xd3_rinst *n = xd3_rlist_remove (i); + xd3_rlist_push_back (& stream->iopt_free, i); + return n; +} + +static void +xd3_iopt_free_nonadd (xd3_stream *stream, xd3_rinst *i) +{ + if (i->type != XD3_ADD) + { + xd3_rlist_push_back (& stream->iopt_free, i); + } +} + +/* When an instruction is ready to flush from the iopt buffer, this + * function is called to produce an encoding. It writes the + * instruction plus size, address, and data to the various encoding + * sections. */ +static int +xd3_iopt_finish_encoding (xd3_stream *stream, xd3_rinst *inst) +{ + int ret; + + /* Check for input overflow. */ + XD3_ASSERT (inst->pos + inst->size <= stream->avail_in); + + switch (inst->type) + { + case XD3_CPY: + { + /* the address may have an offset if there is a source window. */ + usize_t addr; + xd3_source *src = stream->src; + + if (src != NULL) + { + /* If there is a source copy, the source must have its + * source window decided before we can encode. This can + * be bad -- we have to make this decision even if no + * source matches have been found. */ + if (stream->srcwin_decided == 0) + { + if ((ret = xd3_srcwin_setup (stream))) { return ret; } + } + else + { + stream->srcwin_decided_early = (!stream->src->eof_known || + (stream->srcwin_cksum_pos < + xd3_source_eof (stream->src))); + } + + /* xtra field indicates the copy is from the source */ + if (inst->xtra) + { + XD3_ASSERT (inst->addr >= src->srcbase); + XD3_ASSERT (inst->addr + inst->size <= + src->srcbase + src->srclen); + addr = inst->addr - src->srcbase; + stream->n_scpy += 1; + stream->l_scpy += inst->size; + } + else + { + /* with source window: target copy address is offset + * by taroff. */ + addr = stream->taroff + inst->addr; + stream->n_tcpy += 1; + stream->l_tcpy += inst->size; + } + } + else + { + addr = inst->addr; + stream->n_tcpy += 1; + stream->l_tcpy += inst->size; + } + + /* Note: used to assert inst->size >= MIN_MATCH, but not true + * for merge operations & identical match heuristics. */ + /* the "here" position is always offset by taroff */ + if ((ret = xd3_encode_address (stream, addr, inst->pos + stream->taroff, + & inst->type))) + { + return ret; + } + + IF_DEBUG2 ({ + static int cnt; + DP(RINT "[iopt copy:%d] pos %"Q"u-%"Q"u addr %"Q"u-%"Q"u size %"W"u\n", + cnt++, + stream->total_in + inst->pos, + stream->total_in + inst->pos + inst->size, + inst->addr, inst->addr + inst->size, inst->size); + }); + break; + } + case XD3_RUN: + { + if ((ret = xd3_emit_byte (stream, & DATA_TAIL (stream), inst->xtra))) { return ret; } + + stream->n_run += 1; + stream->l_run += inst->size; + + IF_DEBUG2 ({ + static int cnt; + DP(RINT "[iopt run:%d] pos %"Q"u size %"W"u\n", cnt++, stream->total_in + inst->pos, inst->size); + }); + break; + } + case XD3_ADD: + { + if ((ret = xd3_emit_bytes (stream, & DATA_TAIL (stream), + stream->next_in + inst->pos, inst->size))) { return ret; } + + stream->n_add += 1; + stream->l_add += inst->size; + + IF_DEBUG2 ({ + static int cnt; + DP(RINT "[iopt add:%d] pos %"Q"u size %"W"u\n", cnt++, stream->total_in + inst->pos, inst->size); + }); + + break; + } + } + + /* This is the only place stream->unencoded_offset is incremented. */ + XD3_ASSERT (stream->unencoded_offset == inst->pos); + stream->unencoded_offset += inst->size; + + inst->code2 = 0; + + XD3_CHOOSE_INSTRUCTION (stream, stream->iout, inst); + + if (stream->iout != NULL) + { + if (stream->iout->code2 != 0) + { + if ((ret = xd3_emit_double (stream, stream->iout, inst, + stream->iout->code2))) { return ret; } + + xd3_iopt_free_nonadd (stream, stream->iout); + xd3_iopt_free_nonadd (stream, inst); + stream->iout = NULL; + return 0; + } + else + { + if ((ret = xd3_emit_single (stream, stream->iout, stream->iout->code1))) { return ret; } + + xd3_iopt_free_nonadd (stream, stream->iout); + } + } + + stream->iout = inst; + + return 0; +} + +/* This possibly encodes an add instruction, iadd, which must remain + * on the stack until the following call to + * xd3_iopt_finish_encoding. */ +static int +xd3_iopt_add (xd3_stream *stream, usize_t pos, xd3_rinst *iadd) +{ + int ret; + usize_t off = stream->unencoded_offset; + + if (pos > off) + { + iadd->type = XD3_ADD; + iadd->pos = off; + iadd->size = pos - off; + + if ((ret = xd3_iopt_finish_encoding (stream, iadd))) { return ret; } + } + + return 0; +} + +/* This function calls xd3_iopt_finish_encoding to finish encoding an + * instruction, and it may also produce an add instruction for an + * unmatched region. */ +static int +xd3_iopt_add_encoding (xd3_stream *stream, xd3_rinst *inst) +{ + int ret; + xd3_rinst iadd; + + if ((ret = xd3_iopt_add (stream, inst->pos, & iadd))) { return ret; } + + if ((ret = xd3_iopt_finish_encoding (stream, inst))) { return ret; } + + return 0; +} + +/* Generates a final add instruction to encode the remaining input. */ +static int +xd3_iopt_add_finalize (xd3_stream *stream) +{ + int ret; + xd3_rinst iadd; + + if ((ret = xd3_iopt_add (stream, stream->avail_in, & iadd))) { return ret; } + + if (stream->iout) + { + if ((ret = xd3_emit_single (stream, stream->iout, stream->iout->code1))) { return ret; } + + xd3_iopt_free_nonadd (stream, stream->iout); + stream->iout = NULL; + } + + return 0; +} + +/* Compact the instruction buffer by choosing the best non-overlapping + * instructions when lazy string-matching. There are no ADDs in the + * iopt buffer because those are synthesized in xd3_iopt_add_encoding + * and during xd3_iopt_add_finalize. */ +static int +xd3_iopt_flush_instructions (xd3_stream *stream, int force) +{ + xd3_rinst *r1 = xd3_rlist_front (& stream->iopt_used); + xd3_rinst *r2; + xd3_rinst *r3; + usize_t r1end; + usize_t r2end; + usize_t r2off; + usize_t r2moff; + usize_t gap; + usize_t flushed; + int ret; + + XD3_ASSERT (xd3_iopt_check (stream)); + + /* Note: once tried to skip this step if it's possible to assert + * there are no overlapping instructions. Doesn't work because + * xd3_opt_erase leaves overlapping instructions. */ + while (! xd3_rlist_end (& stream->iopt_used, r1) && + ! xd3_rlist_end (& stream->iopt_used, r2 = xd3_rlist_next (r1))) + { + r1end = r1->pos + r1->size; + + /* If the instructions do not overlap, continue. */ + if (r1end <= r2->pos) + { + r1 = r2; + continue; + } + + r2end = r2->pos + r2->size; + + /* The min_match adjustments prevent this. */ + XD3_ASSERT (r2end > (r1end + LEAST_MATCH_INCR)); + + /* If r3 is available... */ + if (! xd3_rlist_end (& stream->iopt_used, r3 = xd3_rlist_next (r2))) + { + /* If r3 starts before r1 finishes or just about, r2 is irrelevant */ + if (r3->pos <= r1end + 1) + { + xd3_iopt_free (stream, r2); + continue; + } + } + else if (! force) + { + /* Unless force, end the loop when r3 is not available. */ + break; + } + + r2off = r2->pos - r1->pos; + r2moff = r2end - r1end; + gap = r2end - r1->pos; + + /* If the two matches overlap almost entirely, choose the better match + * and discard the other. The else branch can still create inefficient + * copies, e.g., a 4-byte copy that takes 4 bytes to encode, which + * xd3_smatch() wouldn't allow by its crude efficiency check. However, + * in this case there are adjacent copies which mean the add would cost + * one extra byte. Allow the inefficiency here. */ + if (gap < 2*MIN_MATCH || r2moff <= 2 || r2off <= 2) + { + /* Only one match should be used, choose the longer one. */ + if (r1->size < r2->size) + { + xd3_iopt_free (stream, r1); + r1 = r2; + } + else + { + /* We are guaranteed that r1 does not overlap now, so advance past r2 */ + r1 = xd3_iopt_free (stream, r2); + } + continue; + } + else + { + /* Shorten one of the instructions -- could be optimized + * based on the address cache. */ + usize_t average; + usize_t newsize; + usize_t adjust1; + + XD3_ASSERT (r1end > r2->pos && r2end > r1->pos); + + /* Try to balance the length of both instructions, but avoid + * making both longer than MAX_MATCH_SPLIT . */ + average = gap / 2; + newsize = xd3_min (MAX_MATCH_SPLIT, gap - average); + + /* Should be possible to simplify this code. */ + if (newsize > r1->size) + { + /* shorten r2 */ + adjust1 = r1end - r2->pos; + } + else if (newsize > r2->size) + { + /* shorten r1 */ + adjust1 = r1end - r2->pos; + + XD3_ASSERT (r1->size > adjust1); + + r1->size -= adjust1; + + /* don't shorten r2 */ + adjust1 = 0; + } + else + { + /* shorten r1 */ + adjust1 = r1->size - newsize; + + if (r2->pos > r1end - adjust1) + { + adjust1 -= r2->pos - (r1end - adjust1); + } + + XD3_ASSERT (r1->size > adjust1); + + r1->size -= adjust1; + + /* shorten r2 */ + XD3_ASSERT (r1->pos + r1->size >= r2->pos); + + adjust1 = r1->pos + r1->size - r2->pos; + } + + /* Fallthrough above if-else, shorten r2 */ + XD3_ASSERT (r2->size > adjust1); + + r2->size -= adjust1; + r2->pos += adjust1; + r2->addr += adjust1; + + XD3_ASSERT (r1->size >= MIN_MATCH); + XD3_ASSERT (r2->size >= MIN_MATCH); + + r1 = r2; + } + } + + XD3_ASSERT (xd3_iopt_check (stream)); + + /* If forcing, pick instructions until the list is empty, otherwise + * this empties 50% of the queue. */ + for (flushed = 0; ! xd3_rlist_empty (& stream->iopt_used); ) + { + xd3_rinst *renc = xd3_rlist_pop_front (& stream->iopt_used); + if ((ret = xd3_iopt_add_encoding (stream, renc))) + { + return ret; + } + + if (! force) + { + if (++flushed > stream->iopt_size / 2) + { + break; + } + + /* If there are only two instructions remaining, break, + * because they were not optimized. This means there were + * more than 50% eliminated by the loop above. */ + r1 = xd3_rlist_front (& stream->iopt_used); + if (xd3_rlist_end(& stream->iopt_used, r1) || + xd3_rlist_end(& stream->iopt_used, r2 = xd3_rlist_next (r1)) || + xd3_rlist_end(& stream->iopt_used, r3 = xd3_rlist_next (r2))) + { + break; + } + } + } + + XD3_ASSERT (xd3_iopt_check (stream)); + + XD3_ASSERT (!force || xd3_rlist_length (& stream->iopt_used) == 0); + + return 0; +} + +static int +xd3_iopt_get_slot (xd3_stream *stream, xd3_rinst** iptr) +{ + xd3_rinst *i; + int ret; + + if (xd3_rlist_empty (& stream->iopt_free)) + { + if (stream->iopt_unlimited) + { + usize_t elts = XD3_ALLOCSIZE / sizeof(xd3_rinst); + + if ((ret = xd3_alloc_iopt (stream, elts))) + { + return ret; + } + + stream->iopt_size += elts; + } + else + { + if ((ret = xd3_iopt_flush_instructions (stream, 0))) { return ret; } + + XD3_ASSERT (! xd3_rlist_empty (& stream->iopt_free)); + } + } + + i = xd3_rlist_pop_back (& stream->iopt_free); + + xd3_rlist_push_back (& stream->iopt_used, i); + + (*iptr) = i; + + ++stream->i_slots_used; + + return 0; +} + +/* A copy is about to be emitted that extends backwards to POS, + * therefore it may completely cover some existing instructions in the + * buffer. If an instruction is completely covered by this new match, + * erase it. If the new instruction is covered by the previous one, + * return 1 to skip it. */ +static void +xd3_iopt_erase (xd3_stream *stream, usize_t pos, usize_t size) +{ + while (! xd3_rlist_empty (& stream->iopt_used)) + { + xd3_rinst *r = xd3_rlist_back (& stream->iopt_used); + + /* Verify that greedy is working. The previous instruction + * should end before the new one begins. */ + XD3_ASSERT ((stream->flags & XD3_BEGREEDY) == 0 || (r->pos + r->size <= pos)); + /* Verify that min_match is working. The previous instruction + * should end before the new one ends. */ + XD3_ASSERT ((stream->flags & XD3_BEGREEDY) != 0 || (r->pos + r->size < pos + size)); + + /* See if the last instruction starts before the new + * instruction. If so, there is nothing to erase. */ + if (r->pos < pos) + { + return; + } + + /* Otherwise, the new instruction covers the old one, delete it + and repeat. */ + xd3_rlist_remove (r); + xd3_rlist_push_back (& stream->iopt_free, r); + --stream->i_slots_used; + } +} + +/* This function tells the last matched input position. */ +static usize_t +xd3_iopt_last_matched (xd3_stream *stream) +{ + xd3_rinst *r; + + if (xd3_rlist_empty (& stream->iopt_used)) + { + return 0; + } + + r = xd3_rlist_back (& stream->iopt_used); + + return r->pos + r->size; +} + +/********************************************************* + Emit routines + ***********************************************************/ + +static int +xd3_emit_single (xd3_stream *stream, xd3_rinst *single, uint8_t code) +{ + int has_size = stream->code_table[code].size1 == 0; + int ret; + + IF_DEBUG2 (DP(RINT "[emit1] %"W"u %s (%"W"u) code %u\n", + single->pos, + xd3_rtype_to_string ((xd3_rtype) single->type, 0), + single->size, + code)); + + if ((ret = xd3_emit_byte (stream, & INST_TAIL (stream), code))) + { + return ret; + } + + if (has_size) + { + if ((ret = xd3_emit_size (stream, & INST_TAIL (stream), single->size))) + { + return ret; + } + } + + return 0; +} + +static int +xd3_emit_double (xd3_stream *stream, xd3_rinst *first, + xd3_rinst *second, uint8_t code) +{ + int ret; + + /* All double instructions use fixed sizes, so all we need to do is + * output the instruction code, no sizes. */ + XD3_ASSERT (stream->code_table[code].size1 != 0 && + stream->code_table[code].size2 != 0); + + if ((ret = xd3_emit_byte (stream, & INST_TAIL (stream), code))) + { + return ret; + } + + IF_DEBUG2 (DP(RINT "[emit2]: %"W"u %s (%"W"u) %s (%"W"u) code %u\n", + first->pos, + xd3_rtype_to_string ((xd3_rtype) first->type, 0), + first->size, + xd3_rtype_to_string ((xd3_rtype) second->type, 0), + second->size, + code)); + + return 0; +} + +/* This enters a potential run instruction into the iopt buffer. The + * position argument is relative to the target window. */ +static int +xd3_emit_run (xd3_stream *stream, usize_t pos, usize_t size, uint8_t *run_c) +{ + xd3_rinst* ri; + int ret; + + if ((ret = xd3_iopt_get_slot (stream, & ri))) { return ret; } + + ri->type = XD3_RUN; + ri->xtra = *run_c; + ri->pos = pos; + ri->size = size; + + return 0; +} + +/* This enters a potential copy instruction into the iopt buffer. The + * position argument is relative to the target window.. */ +int +xd3_found_match (xd3_stream *stream, usize_t pos, + usize_t size, xoff_t addr, int is_source) +{ + xd3_rinst* ri; + int ret; + + if ((ret = xd3_iopt_get_slot (stream, & ri))) { return ret; } + + ri->type = XD3_CPY; + ri->xtra = is_source; + ri->pos = pos; + ri->size = size; + ri->addr = addr; + + return 0; +} + +static int +xd3_emit_hdr (xd3_stream *stream) +{ + int ret; + int use_secondary = stream->sec_type != NULL; + int use_adler32 = stream->flags & (XD3_ADLER32 | XD3_ADLER32_RECODE); + int vcd_source = xd3_encoder_used_source (stream); + uint8_t win_ind = 0; + uint8_t del_ind = 0; + usize_t enc_len; + usize_t tgt_len; + usize_t data_len; + usize_t inst_len; + usize_t addr_len; + + if (stream->current_window == 0) + { + uint8_t hdr_ind = 0; + int use_appheader = stream->enc_appheader != NULL; + + if (use_secondary) { hdr_ind |= VCD_SECONDARY; } + if (use_appheader) { hdr_ind |= VCD_APPHEADER; } + + if ((ret = xd3_emit_byte (stream, & HDR_TAIL (stream), + VCDIFF_MAGIC1)) != 0 || + (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), + VCDIFF_MAGIC2)) != 0 || + (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), + VCDIFF_MAGIC3)) != 0 || + (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), + VCDIFF_VERSION)) != 0 || + (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), hdr_ind)) != 0) + { + return ret; + } + + /* Secondary compressor ID */ +#if SECONDARY_ANY + if (use_secondary && + (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), + stream->sec_type->id))) + { + return ret; + } +#endif + + /* Application header */ + if (use_appheader) + { + if ((ret = xd3_emit_size (stream, & HDR_TAIL (stream), + stream->enc_appheadsz)) || + (ret = xd3_emit_bytes (stream, & HDR_TAIL (stream), + stream->enc_appheader, + stream->enc_appheadsz))) + { + return ret; + } + } + } + + /* try to compress this window */ +#if SECONDARY_ANY + if (use_secondary) + { + int data_sec = 0; + int inst_sec = 0; + int addr_sec = 0; + +# define ENCODE_SECONDARY_SECTION(UPPER,LOWER) \ + ((stream->flags & XD3_SEC_NO ## UPPER) == 0 && \ + (ret = xd3_encode_secondary (stream, \ + & UPPER ## _HEAD (stream), \ + & UPPER ## _TAIL (stream), \ + & xd3_sec_ ## LOWER (stream), \ + & stream->sec_ ## LOWER, \ + & LOWER ## _sec))) + + if (ENCODE_SECONDARY_SECTION (DATA, data) || + ENCODE_SECONDARY_SECTION (INST, inst) || + ENCODE_SECONDARY_SECTION (ADDR, addr)) + { + return ret; + } + + del_ind |= (data_sec ? VCD_DATACOMP : 0); + del_ind |= (inst_sec ? VCD_INSTCOMP : 0); + del_ind |= (addr_sec ? VCD_ADDRCOMP : 0); + } +#endif + + /* if (vcd_target) { win_ind |= VCD_TARGET; } */ + if (vcd_source) { win_ind |= VCD_SOURCE; } + if (use_adler32) { win_ind |= VCD_ADLER32; } + + /* window indicator */ + if ((ret = xd3_emit_byte (stream, & HDR_TAIL (stream), win_ind))) + { + return ret; + } + + /* source window */ + if (vcd_source) + { + /* or (vcd_target) { ... } */ + if ((ret = xd3_emit_size (stream, & HDR_TAIL (stream), + stream->src->srclen)) || + (ret = xd3_emit_offset (stream, & HDR_TAIL (stream), + stream->src->srcbase))) { return ret; } + } + + tgt_len = stream->avail_in; + data_len = xd3_sizeof_output (DATA_HEAD (stream)); + inst_len = xd3_sizeof_output (INST_HEAD (stream)); + addr_len = xd3_sizeof_output (ADDR_HEAD (stream)); + + /* The enc_len field is a redundency for future extensions. */ + enc_len = (1 + (xd3_sizeof_size (tgt_len) + + xd3_sizeof_size (data_len) + + xd3_sizeof_size (inst_len) + + xd3_sizeof_size (addr_len)) + + data_len + + inst_len + + addr_len + + (use_adler32 ? 4 : 0)); + + if ((ret = xd3_emit_size (stream, & HDR_TAIL (stream), enc_len)) || + (ret = xd3_emit_size (stream, & HDR_TAIL (stream), tgt_len)) || + (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), del_ind)) || + (ret = xd3_emit_size (stream, & HDR_TAIL (stream), data_len)) || + (ret = xd3_emit_size (stream, & HDR_TAIL (stream), inst_len)) || + (ret = xd3_emit_size (stream, & HDR_TAIL (stream), addr_len))) + { + return ret; + } + + if (use_adler32) + { + uint8_t send[4]; + uint32_t a32; + + if (stream->flags & XD3_ADLER32) + { + a32 = adler32 (1L, stream->next_in, stream->avail_in); + } + else + { + a32 = stream->recode_adler32; + } + + /* Four bytes. */ + send[0] = (uint8_t) (a32 >> 24); + send[1] = (uint8_t) (a32 >> 16); + send[2] = (uint8_t) (a32 >> 8); + send[3] = (uint8_t) (a32 & 0x000000FFU); + + if ((ret = xd3_emit_bytes (stream, & HDR_TAIL (stream), send, 4))) + { + return ret; + } + } + + return 0; +} + +/**************************************************************** + Encode routines + ****************************************************************/ + +static int +xd3_encode_buffer_leftover (xd3_stream *stream) +{ + usize_t take; + usize_t room; + + /* Allocate the buffer. */ + if (stream->buf_in == NULL && + (stream->buf_in = (uint8_t*) xd3_alloc (stream, stream->winsize, 1)) == NULL) + { + return ENOMEM; + } + + IF_DEBUG2 (DP(RINT "[leftover] flush?=%s\n", (stream->flags & XD3_FLUSH) ? "yes" : "no")); + + /* Take leftover input first. */ + if (stream->buf_leftover != NULL) + { + XD3_ASSERT (stream->buf_avail == 0); + XD3_ASSERT (stream->buf_leftavail < stream->winsize); + + IF_DEBUG2 (DP(RINT "[leftover] previous %"W"u avail %"W"u\n", + stream->buf_leftavail, stream->avail_in)); + + memcpy (stream->buf_in, stream->buf_leftover, stream->buf_leftavail); + + stream->buf_leftover = NULL; + stream->buf_avail = stream->buf_leftavail; + } + + /* Copy into the buffer. */ + room = stream->winsize - stream->buf_avail; + take = xd3_min (room, stream->avail_in); + + memcpy (stream->buf_in + stream->buf_avail, stream->next_in, take); + + stream->buf_avail += take; + + if (take < stream->avail_in) + { + /* Buffer is full */ + stream->buf_leftover = stream->next_in + take; + stream->buf_leftavail = stream->avail_in - take; + } + else if ((stream->buf_avail < stream->winsize) && !(stream->flags & XD3_FLUSH)) + { + /* Buffer has space */ + IF_DEBUG2 (DP(RINT "[leftover] emptied %"W"u\n", take)); + return XD3_INPUT; + } + + /* Use the buffer: */ + IF_DEBUG2 (DP(RINT "[leftover] take %"W"u remaining %"W"u\n", take, stream->buf_leftavail)); + stream->next_in = stream->buf_in; + stream->avail_in = stream->buf_avail; + stream->buf_avail = 0; + + return 0; +} + +/* Allocates one block of xd3_rlist elements */ +static int +xd3_alloc_iopt (xd3_stream *stream, usize_t elts) +{ + usize_t i; + xd3_iopt_buflist* last = + (xd3_iopt_buflist*) xd3_alloc (stream, sizeof (xd3_iopt_buflist), 1); + + if (last == NULL || + (last->buffer = (xd3_rinst*) xd3_alloc (stream, sizeof (xd3_rinst), elts)) == NULL) + { + return ENOMEM; + } + + last->next = stream->iopt_alloc; + stream->iopt_alloc = last; + + for (i = 0; i < elts; i += 1) + { + xd3_rlist_push_back (& stream->iopt_free, & last->buffer[i]); + } + + return 0; +} + +/* This function allocates all memory initially used by the encoder. */ +static int +xd3_encode_init (xd3_stream *stream, int full_init) +{ + int ret; + int i; + + if (full_init) + { + int large_comp = (stream->src != NULL); + int small_comp = ! (stream->flags & XD3_NOCOMPRESS); + + /* Memory allocations for checksum tables are delayed until + * xd3_string_match_init in the first call to string_match--that way + * identical or short inputs require no table allocation. */ + if (large_comp) + { + /* TODO Need to check for overflow here. */ + usize_t hash_values = stream->src->max_winsize / + stream->smatcher.large_step; + + if ((ret = xd3_size_hashtable (stream, + hash_values, + stream->smatcher.large_look, + & stream->large_hash))) + { + return ret; + } + } + + if (small_comp) + { + /* TODO: This is under devel: used to have min (sprevsz) here, which sort + * of makes sense, but observed fast performance w/ larger tables, which + * also sort of makes sense. @@@ */ + usize_t hash_values = stream->winsize; + + if ((ret = xd3_size_hashtable (stream, + hash_values, + stream->smatcher.small_look, + & stream->small_hash))) + { + return ret; + } + } + } + + /* data buffers */ + for (i = 0; i < ENC_SECTS; i += 1) + { + if ((stream->enc_heads[i] = + stream->enc_tails[i] = + xd3_alloc_output (stream, NULL)) == NULL) + { + return ENOMEM; + } + } + + /* iopt buffer */ + xd3_rlist_init (& stream->iopt_used); + xd3_rlist_init (& stream->iopt_free); + + if (xd3_alloc_iopt (stream, stream->iopt_size) != 0) { goto fail; } + + XD3_ASSERT (xd3_rlist_length (& stream->iopt_free) == stream->iopt_size); + XD3_ASSERT (xd3_rlist_length (& stream->iopt_used) == 0); + + /* address cache, code table */ + stream->acache.s_near = stream->code_table_desc->near_modes; + stream->acache.s_same = stream->code_table_desc->same_modes; + stream->code_table = stream->code_table_func (); + + return xd3_alloc_cache (stream); + + fail: + + return ENOMEM; +} + +int +xd3_encode_init_full (xd3_stream *stream) +{ + return xd3_encode_init (stream, 1); +} + +int +xd3_encode_init_partial (xd3_stream *stream) +{ + return xd3_encode_init (stream, 0); +} + +/* Called after the ENC_POSTOUT state, this puts the output buffers + * back into separate lists and re-initializes some variables. (The + * output lists were spliced together during the ENC_FLUSH state.) */ +static void +xd3_encode_reset (xd3_stream *stream) +{ + int i; + xd3_output *olist; + + stream->avail_in = 0; + stream->small_reset = 1; + stream->i_slots_used = 0; + + if (stream->src != NULL) + { + stream->src->srcbase = 0; + stream->src->srclen = 0; + stream->srcwin_decided = 0; + stream->srcwin_decided_early = 0; + stream->match_minaddr = 0; + stream->match_maxaddr = 0; + stream->taroff = 0; + } + + /* Reset output chains. */ + olist = stream->enc_heads[0]; + + for (i = 0; i < ENC_SECTS; i += 1) + { + XD3_ASSERT (olist != NULL); + + stream->enc_heads[i] = olist; + stream->enc_tails[i] = olist; + olist = olist->next_page; + + stream->enc_heads[i]->next = 0; + stream->enc_heads[i]->next_page = NULL; + + stream->enc_tails[i]->next_page = NULL; + stream->enc_tails[i] = stream->enc_heads[i]; + } + + xd3_freelist_output (stream, olist); +} + +/* The main encoding routine. */ +int +xd3_encode_input (xd3_stream *stream) +{ + int ret, i; + + if (stream->dec_state != 0) + { + stream->msg = "encoder/decoder transition"; + return XD3_INTERNAL; + } + + switch (stream->enc_state) + { + case ENC_INIT: + /* Only reached on first time through: memory setup. */ + if ((ret = xd3_encode_init_full (stream))) { return ret; } + + stream->enc_state = ENC_INPUT; + + case ENC_INPUT: + + /* If there is no input yet, just return. This checks for + * next_in == NULL, not avail_in == 0 since zero bytes is a + * valid input. There is an assertion in xd3_avail_input() that + * next_in != NULL for this reason. By returning right away we + * avoid creating an input buffer before the caller has supplied + * its first data. It is possible for xd3_avail_input to be + * called both before and after the first call to + * xd3_encode_input(). */ + if (stream->next_in == NULL) + { + return XD3_INPUT; + } + + enc_flush: + /* See if we should buffer the input: either if there is already + * a leftover buffer, or if the input is short of winsize + * without flush. The label at this point is reached by a goto + * below, when there is leftover input after postout. */ + if ((stream->buf_leftover != NULL) || + (stream->buf_avail != 0) || + (stream->avail_in < stream->winsize && ! (stream->flags & XD3_FLUSH))) + { + if ((ret = xd3_encode_buffer_leftover (stream))) { return ret; } + } + + /* Initalize the address cache before each window. */ + xd3_init_cache (& stream->acache); + + stream->input_position = 0; + stream->min_match = MIN_MATCH; + stream->unencoded_offset = 0; + + stream->enc_state = ENC_SEARCH; + + IF_DEBUG2 (DP(RINT "[WINSTART:%"Q"u] input bytes %"W"u offset %"Q"u\n", + stream->current_window, stream->avail_in, + stream->total_in)); + return XD3_WINSTART; + + case ENC_SEARCH: + IF_DEBUG2 (DP(RINT "[SEARCH] match_state %d avail_in %"W"u %s\n", + stream->match_state, stream->avail_in, + stream->src ? "source" : "no source")); + + /* Reentrant matching. */ + if (stream->src != NULL) + { + switch (stream->match_state) + { + case MATCH_TARGET: + /* Try matching forward at the start of the target. + * This is entered the first time through, to check for + * a perfect match, and whenever there is a source match + * that extends to the end of the previous window. The + * match_srcpos field is initially zero and later set + * during xd3_source_extend_match. */ + + if (stream->avail_in > 0) + { + /* This call can't fail because the source window is + * unrestricted. */ + ret = xd3_source_match_setup (stream, stream->match_srcpos); + XD3_ASSERT (ret == 0); + stream->match_state = MATCH_FORWARD; + } + else + { + stream->match_state = MATCH_SEARCHING; + stream->match_fwd = 0; + } + XD3_ASSERT (stream->match_fwd == 0); + + case MATCH_FORWARD: + case MATCH_BACKWARD: + if (stream->avail_in != 0) + { + if ((ret = xd3_source_extend_match (stream)) != 0) + { + return ret; + } + + /* The search has to make forward progress here + * or else it can get stuck in a match-backward + * (getsrcblk) then match-forward (getsrcblk), + * find insufficient match length, then repeat + + * exactly the same search. + */ + stream->input_position += stream->match_fwd; + } + + case MATCH_SEARCHING: + /* Continue string matching. (It's possible that the + * initial match continued through the entire input, in + * which case we're still in MATCH_FORWARD and should + * remain so for the next input window.) */ + break; + } + } + + /* String matching... */ + if (stream->avail_in != 0 && + (ret = stream->smatcher.string_match (stream))) + { + return ret; + } + + stream->enc_state = ENC_INSTR; + + case ENC_INSTR: + /* Note: Jump here to encode VCDIFF deltas w/o using this + * string-matching code. Merging code enters here. */ + + /* Flush the instrution buffer, then possibly add one more + * instruction, then emit the header. */ + if ((ret = xd3_iopt_flush_instructions (stream, 1)) || + (ret = xd3_iopt_add_finalize (stream))) + { + return ret; + } + + stream->enc_state = ENC_FLUSH; + + case ENC_FLUSH: + /* Note: main_recode_func() bypasses string-matching by setting + * ENC_FLUSH. */ + if ((ret = xd3_emit_hdr (stream))) + { + return ret; + } + + /* Begin output. */ + stream->enc_current = HDR_HEAD (stream); + + /* Chain all the outputs together. After doing this, it looks + * as if there is only one section. The other enc_heads are set + * to NULL to avoid freeing them more than once. */ + for (i = 1; i < ENC_SECTS; i += 1) + { + stream->enc_tails[i-1]->next_page = stream->enc_heads[i]; + stream->enc_heads[i] = NULL; + } + + enc_output: + + stream->enc_state = ENC_POSTOUT; + stream->next_out = stream->enc_current->base; + stream->avail_out = stream->enc_current->next; + stream->total_out += stream->avail_out; + + /* If there is any output in this buffer, return it, otherwise + * fall through to handle the next buffer or finish the window + * after all buffers have been output. */ + if (stream->avail_out > 0) + { + /* This is the only place xd3_encode returns XD3_OUTPUT */ + return XD3_OUTPUT; + } + + case ENC_POSTOUT: + + if (stream->avail_out != 0) + { + stream->msg = "missed call to consume output"; + return XD3_INTERNAL; + } + + /* Continue outputting one buffer at a time, until the next is NULL. */ + if ((stream->enc_current = stream->enc_current->next_page) != NULL) + { + goto enc_output; + } + + stream->total_in += stream->avail_in; + stream->enc_state = ENC_POSTWIN; + + IF_DEBUG2 (DP(RINT "[WINFINISH:%"Q"u] in=%"Q"u\n", + stream->current_window, + stream->total_in)); + return XD3_WINFINISH; + + case ENC_POSTWIN: + + xd3_encode_reset (stream); + + stream->current_window += 1; + stream->enc_state = ENC_INPUT; + + /* If there is leftover input to flush, repeat. */ + if (stream->buf_leftover != NULL) + { + goto enc_flush; + } + + /* Ready for more input. */ + return XD3_INPUT; + + default: + stream->msg = "invalid state"; + return XD3_INTERNAL; + } +} +#endif /* XD3_ENCODER */ + +/***************************************************************** + Client convenience functions + ******************************************************************/ + +int +xd3_process_stream (int is_encode, + xd3_stream *stream, + int (*func) (xd3_stream *), + int close_stream, + const uint8_t *input, + usize_t input_size, + uint8_t *output, + usize_t *output_size, + usize_t output_size_max) +{ + usize_t ipos = 0; + usize_t n = xd3_min (stream->winsize, input_size); + + (*output_size) = 0; + + stream->flags |= XD3_FLUSH; + + xd3_avail_input (stream, input + ipos, n); + ipos += n; + + for (;;) + { + int ret; + switch ((ret = func (stream))) + { + case XD3_OUTPUT: { /* memcpy below */ break; } + case XD3_INPUT: { + n = xd3_min(stream->winsize, input_size - ipos); + if (n == 0) + { + goto done; + } + xd3_avail_input (stream, input + ipos, n); + ipos += n; + continue; + } + case XD3_GOTHEADER: { /* ignore */ continue; } + case XD3_WINSTART: { /* ignore */ continue; } + case XD3_WINFINISH: { /* ignore */ continue; } + case XD3_GETSRCBLK: + { + /* When the getblk function is NULL, it is necessary to + * provide the complete source as a single block using + * xd3_set_source_and_size, otherwise this error. The + * library should never ask for another source block. */ + stream->msg = "library requested source block"; + return XD3_INTERNAL; + } + case 0: + { + /* xd3_encode_input/xd3_decode_input never return 0 */ + stream->msg = "invalid return: 0"; + return XD3_INTERNAL; + } + default: + return ret; + } + + if (*output_size + stream->avail_out > output_size_max) + { + stream->msg = "insufficient output space"; + return ENOSPC; + } + + memcpy (output + *output_size, stream->next_out, stream->avail_out); + + *output_size += stream->avail_out; + + xd3_consume_output (stream); + } + done: + return (close_stream == 0) ? 0 : xd3_close_stream (stream); +} + +static int +xd3_process_memory (int is_encode, + int (*func) (xd3_stream *), + const uint8_t *input, + usize_t input_size, + const uint8_t *source, + usize_t source_size, + uint8_t *output, + usize_t *output_size, + usize_t output_size_max, + int flags) { + xd3_stream stream; + xd3_config config; + xd3_source src; + int ret; + + memset (& stream, 0, sizeof (stream)); + memset (& config, 0, sizeof (config)); + + if (input == NULL || output == NULL) { + stream.msg = "invalid input/output buffer"; + ret = XD3_INTERNAL; + goto exit; + } + + config.flags = flags; + + if (is_encode) + { + config.winsize = xd3_min(input_size, (usize_t) XD3_DEFAULT_WINSIZE); + config.sprevsz = xd3_pow2_roundup (config.winsize); + } + + if ((ret = xd3_config_stream (&stream, &config)) != 0) + { + goto exit; + } + + if (source != NULL) + { + memset (& src, 0, sizeof (src)); + + src.blksize = source_size; + src.onblk = source_size; + src.curblk = source; + src.curblkno = 0; + src.max_winsize = source_size; + + if ((ret = xd3_set_source_and_size (&stream, &src, source_size)) != 0) + { + goto exit; + } + } + + if ((ret = xd3_process_stream (is_encode, + & stream, + func, 1, + input, input_size, + output, + output_size, + output_size_max)) != 0) + { + goto exit; + } + + exit: + if (ret != 0) + { + IF_DEBUG2 (DP(RINT "process_memory: %d: %s\n", ret, stream.msg)); + } + xd3_free_stream(&stream); + return ret; +} + +int +xd3_decode_stream (xd3_stream *stream, + const uint8_t *input, + usize_t input_size, + uint8_t *output, + usize_t *output_size, + usize_t output_size_max) +{ + return xd3_process_stream (0, stream, & xd3_decode_input, 1, + input, input_size, + output, output_size, output_size_max); +} + +int +xd3_decode_memory (const uint8_t *input, + usize_t input_size, + const uint8_t *source, + usize_t source_size, + uint8_t *output, + usize_t *output_size, + usize_t output_size_max, + int flags) { + return xd3_process_memory (0, & xd3_decode_input, + input, input_size, + source, source_size, + output, output_size, output_size_max, + flags); +} + + +#if XD3_ENCODER +int +xd3_encode_stream (xd3_stream *stream, + const uint8_t *input, + usize_t input_size, + uint8_t *output, + usize_t *output_size, + usize_t output_size_max) +{ + return xd3_process_stream (1, stream, & xd3_encode_input, 1, + input, input_size, + output, output_size, output_size_max); +} + +int +xd3_encode_memory (const uint8_t *input, + usize_t input_size, + const uint8_t *source, + usize_t source_size, + uint8_t *output, + usize_t *output_size, + usize_t output_size_max, + int flags) { + return xd3_process_memory (1, & xd3_encode_input, + input, input_size, + source, source_size, + output, output_size, output_size_max, + flags); +} +#endif + + +/************************************************************* + String matching helpers + *************************************************************/ + +#if XD3_ENCODER +/* Do the initial xd3_string_match() checksum table setup. + * Allocations are delayed until first use to avoid allocation + * sometimes (e.g., perfect matches, zero-length inputs). */ +static int +xd3_string_match_init (xd3_stream *stream) +{ + const int DO_SMALL = ! (stream->flags & XD3_NOCOMPRESS); + const int DO_LARGE = (stream->src != NULL); + + if (DO_LARGE && stream->large_table == NULL) + { + if ((stream->large_table = + (usize_t*) xd3_alloc0 (stream, stream->large_hash.size, sizeof (usize_t))) == NULL) + { + return ENOMEM; + } + } + + if (DO_SMALL) + { + /* Subsequent calls can return immediately after checking reset. */ + if (stream->small_table != NULL) + { + /* The target hash table is reinitialized once per window. */ + /* TODO: This would not have to be reinitialized if absolute + * offsets were being stored. */ + if (stream->small_reset) + { + stream->small_reset = 0; + memset (stream->small_table, 0, + sizeof (usize_t) * stream->small_hash.size); + } + + return 0; + } + + if ((stream->small_table = + (usize_t*) xd3_alloc0 (stream, + stream->small_hash.size, + sizeof (usize_t))) == NULL) + { + return ENOMEM; + } + + /* If there is a previous table needed. */ + if (stream->smatcher.small_lchain > 1 || + stream->smatcher.small_chain > 1) + { + if ((stream->small_prev = + (xd3_slist*) xd3_alloc (stream, + stream->sprevsz, + sizeof (xd3_slist))) == NULL) + { + return ENOMEM; + } + } + } + + return 0; +} + +#if XD3_USE_LARGEFILE64 && !XD3_USE_LARGESIZET +/* This function handles the 32/64bit ambiguity -- file positions are 64bit + * but the hash table for source-offsets is 32bit. */ +static xoff_t +xd3_source_cksum_offset(xd3_stream *stream, usize_t low) +{ + xoff_t scp = stream->srcwin_cksum_pos; + xoff_t s0 = scp >> 32; + + usize_t sr = (usize_t) scp; + + if (s0 == 0) { + return low; + } + + /* This should not be >= because srcwin_cksum_pos is the next + * position to index. */ + if (low > sr) { + return (--s0 << 32) | low; + } + + return (s0 << 32) | low; +} +#else +static xoff_t +xd3_source_cksum_offset(xd3_stream *stream, usize_t low) +{ + return low; +} +#endif + +/* This function sets up the stream->src fields srcbase, srclen. The + * call is delayed until these values are needed to encode a copy + * address. At this point the decision has to be made. */ +static int +xd3_srcwin_setup (xd3_stream *stream) +{ + xd3_source *src = stream->src; + xoff_t length, x; + + /* Check the undecided state. */ + XD3_ASSERT (src->srclen == 0 && src->srcbase == 0); + + /* Avoid repeating this call. */ + stream->srcwin_decided = 1; + + /* If the stream is flushing, then the iopt buffer was able to + * contain the complete encoding. If no copies were issued no + * source window is actually needed. This prevents the VCDIFF + * header from including source base/len. xd3_emit_hdr checks for + * srclen == 0. */ + if (stream->enc_state == ENC_INSTR && stream->match_maxaddr == 0) + { + goto done; + } + + /* Check for overflow, srclen is usize_t - this can't happen unless + * XD3_DEFAULT_SRCBACK and related parameters are extreme - should + * use smaller windows. */ + length = stream->match_maxaddr - stream->match_minaddr; + + x = USIZE_T_MAX; + if (length > x) + { + stream->msg = "source window length overflow (not 64bit)"; + return XD3_INTERNAL; + } + + /* If ENC_INSTR, then we know the exact source window to use because + * no more copies can be issued. */ + if (stream->enc_state == ENC_INSTR) + { + src->srcbase = stream->match_minaddr; + src->srclen = (usize_t) length; + XD3_ASSERT (src->srclen); + goto done; + } + + /* Otherwise, we have to make a guess. More copies may still be + * issued, but we have to decide the source window base and length + * now. + * TODO: This may not working well in practice, more testing needed. */ + src->srcbase = stream->match_minaddr; + src->srclen = xd3_max ((usize_t) length, + stream->avail_in + (stream->avail_in >> 2)); + + if (src->eof_known) + { + /* Note: if the source size is known, we must reduce srclen or + * code that expects to pass a single block w/ getblk == NULL + * will not function, as the code will return GETSRCBLK asking + * for the second block. */ + src->srclen = xd3_min (src->srclen, xd3_source_eof(src) - src->srcbase); + } + IF_DEBUG1 (DP(RINT "[srcwin_setup_constrained] base %"Q"u len %"W"u\n", + src->srcbase, src->srclen)); + + XD3_ASSERT (src->srclen); + done: + /* Set the taroff. This convenience variable is used even when + stream->src == NULL. */ + stream->taroff = src->srclen; + return 0; +} + +/* Sets the bounding region for a newly discovered source match, prior + * to calling xd3_source_extend_match(). This sets the match_maxfwd, + * match_maxback variables. Note: srcpos is an absolute position + * (xoff_t) but the match_maxfwd, match_maxback variables are usize_t. + * Returns 0 if the setup succeeds, or 1 if the source position lies + * outside an already-decided srcbase/srclen window. */ +static int +xd3_source_match_setup (xd3_stream *stream, xoff_t srcpos) +{ + xd3_source *const src = stream->src; + usize_t greedy_or_not; + + stream->match_maxback = 0; + stream->match_maxfwd = 0; + stream->match_back = 0; + stream->match_fwd = 0; + + /* This avoids a non-blocking endless loop caused by scanning + * backwards across a block boundary, only to find not enough + * matching bytes to beat the current min_match due to a better lazy + * target match: the re-entry to xd3_string_match() repeats the same + * long match because the input position hasn't changed. TODO: if + * ever duplicates are added to the source hash table, this logic + * won't suffice to avoid loops. See testing/regtest.cc's + * TestNonBlockingProgress test! */ + if (srcpos != 0 && srcpos == stream->match_last_srcpos) + { + IF_DEBUG2(DP(RINT "[match_setup] looping failure\n")); + goto bad; + } + + /* Implement src->max_winsize, which prevents the encoder from seeking + * back further than the LRU cache maintaining FIFO discipline, (to + * avoid seeking). */ + if (srcpos < stream->srcwin_cksum_pos && + stream->srcwin_cksum_pos - srcpos > src->max_winsize) + { + IF_DEBUG2(DP(RINT "[match_setup] rejected due to src->max_winsize " + "distance eof=%"Q"u srcpos=%"Q"u max_winsz=%"Q"u\n", + xd3_source_eof (src), + srcpos, src->max_winsize)); + goto bad; + } + + /* There are cases where the above test does not reject a match that + * will experience XD3_TOOFARBACK at the first xd3_getblk call + * because the input may have advanced up to one block beyond the + * actual EOF. */ + IF_DEBUG2(DP(RINT "[match_setup] %"Q"u srcpos %"Q"u, " + "src->max_winsize %"Q"u\n", + stream->total_in + stream->input_position, + srcpos, src->max_winsize)); + + /* Going backwards, the 1.5-pass algorithm allows some + * already-matched input may be covered by a longer source match. + * The greedy algorithm does not allow this. + * TODO: Measure this. */ + if (stream->flags & XD3_BEGREEDY) + { + /* The greedy algorithm allows backward matching to the last + * matched position. */ + greedy_or_not = xd3_iopt_last_matched (stream); + } + else + { + /* The 1.5-pass algorithm allows backward matching to go back as + * far as the unencoded offset, which is updated as instructions + * pass out of the iopt buffer. If this (default) is chosen, it + * means xd3_iopt_erase may be called to eliminate instructions + * when a covering source match is found. */ + greedy_or_not = stream->unencoded_offset; + } + + /* Backward target match limit. */ + XD3_ASSERT (stream->input_position >= greedy_or_not); + stream->match_maxback = stream->input_position - greedy_or_not; + + /* Forward target match limit. */ + XD3_ASSERT (stream->avail_in > stream->input_position); + stream->match_maxfwd = stream->avail_in - stream->input_position; + + /* Now we take the source position into account. It depends whether + * the srclen/srcbase have been decided yet. */ + if (stream->srcwin_decided == 0) + { + /* Unrestricted case: the match can cover the entire source, + * 0--src->size. We compare the usize_t + * match_maxfwd/match_maxback against the xoff_t + * src->size/srcpos values and take the min. */ + /* TODO #if XD3_USE_LARGESIZET ? */ + if (srcpos < stream->match_maxback) + { + stream->match_maxback = (usize_t) srcpos; + } + + if (src->eof_known) + { + xoff_t srcavail = xd3_source_eof (src) - srcpos; + + if (srcavail < stream->match_maxfwd) + { + stream->match_maxfwd = (usize_t) srcavail; + } + } + + IF_DEBUG2(DP(RINT + "[match_setup] srcpos %"Q"u (tgtpos %"Q"u) " + "unrestricted maxback %"W"u maxfwd %"W"u\n", + srcpos, + stream->total_in + stream->input_position, + stream->match_maxback, + stream->match_maxfwd)); + goto good; + } + + /* Decided some source window. */ + XD3_ASSERT (src->srclen > 0); + + /* Restricted case: fail if the srcpos lies outside the source window */ + if ((srcpos < src->srcbase) || + (srcpos > (src->srcbase + src->srclen))) + { + IF_DEBUG1(DP(RINT "[match_setup] restricted source window failure\n")); + goto bad; + } + else + { + usize_t srcavail; + + srcavail = (usize_t) (srcpos - src->srcbase); + if (srcavail < stream->match_maxback) + { + stream->match_maxback = srcavail; + } + + srcavail = src->srcbase + src->srclen - srcpos; + if (srcavail < stream->match_maxfwd) + { + stream->match_maxfwd = srcavail; + } + + IF_DEBUG2(DP(RINT + "[match_setup] srcpos %"Q"u (tgtpos %"Q"u) " + "restricted maxback %"W"u maxfwd %"W"u\n", + srcpos, + stream->total_in + stream->input_position, + stream->match_maxback, + stream->match_maxfwd)); + goto good; + } + + good: + stream->match_state = MATCH_BACKWARD; + stream->match_srcpos = srcpos; + stream->match_last_srcpos = srcpos; + return 0; + + bad: + stream->match_state = MATCH_SEARCHING; + stream->match_last_srcpos = srcpos; + return 1; +} + +static INLINE usize_t +xd3_forward_match(const uint8_t *s1c, const uint8_t *s2c, usize_t n) +{ + usize_t i = 0; +#if UNALIGNED_OK + usize_t nint = n / sizeof(int); + + if (nint >> 3) + { + usize_t j = 0; + const int *s1 = (const int*)s1c; + const int *s2 = (const int*)s2c; + usize_t nint_8 = nint - 8; + + while (i <= nint_8 && + s1[i++] == s2[j++] && + s1[i++] == s2[j++] && + s1[i++] == s2[j++] && + s1[i++] == s2[j++] && + s1[i++] == s2[j++] && + s1[i++] == s2[j++] && + s1[i++] == s2[j++] && + s1[i++] == s2[j++]) { } + + i = (i - 1) * sizeof(int); + } +#endif + + while (i < n && s1c[i] == s2c[i]) + { + i++; + } + return i; +} + +/* This function expands the source match backward and forward. It is + * reentrant, since xd3_getblk may return XD3_GETSRCBLK, so most + * variables are kept in xd3_stream. There are two callers of this + * function, the string_matching routine when a checksum match is + * discovered, and xd3_encode_input whenever a continuing (or initial) + * match is suspected. The two callers do different things with the + * input_position, thus this function leaves that variable untouched. + * If a match is taken the resulting stream->match_fwd is left + * non-zero. */ +static int +xd3_source_extend_match (xd3_stream *stream) +{ + int ret; + xd3_source *const src = stream->src; + xoff_t matchoff; /* matchoff is the current right/left-boundary of + the source match being tested. */ + usize_t streamoff; /* streamoff is the current right/left-boundary + of the input match being tested. */ + xoff_t tryblk; /* tryblk, tryoff are the block, offset position + of matchoff */ + usize_t tryoff; + usize_t tryrem; /* tryrem is the number of matchable bytes */ + usize_t matched; + + IF_DEBUG2(DP(RINT "[extend match] srcpos %"Q"u\n", + stream->match_srcpos)); + + XD3_ASSERT (src != NULL); + + /* Does it make sense to compute backward match AFTER forward match? */ + if (stream->match_state == MATCH_BACKWARD) + { + /* Note: this code is practically duplicated below, substituting + * match_fwd/match_back and direction. */ + matchoff = stream->match_srcpos - stream->match_back; + streamoff = stream->input_position - stream->match_back; + xd3_blksize_div (matchoff, src, &tryblk, &tryoff); + + /* this loops backward over source blocks */ + while (stream->match_back < stream->match_maxback) + { + /* see if we're backing across a source block boundary */ + if (tryoff == 0) + { + tryoff = src->blksize; + tryblk -= 1; + } + + if ((ret = xd3_getblk (stream, tryblk))) + { + if (ret == XD3_TOOFARBACK) + { + IF_DEBUG2(DP(RINT "[maxback] %"Q"u TOOFARBACK: %"W"u INP %"Q"u CKSUM %"Q"u\n", + tryblk, stream->match_back, + stream->total_in + stream->input_position, + stream->srcwin_cksum_pos)); + + /* the starting position is too far back. */ + if (stream->match_back == 0) + { + XD3_ASSERT(stream->match_fwd == 0); + goto donefwd; + } + + /* search went too far back, continue forward. */ + goto doneback; + } + + /* could be a XD3_GETSRCBLK failure. */ + return ret; + } + + tryrem = xd3_min (tryoff, stream->match_maxback - stream->match_back); + + IF_DEBUG2(DP(RINT "[maxback] maxback %"W"u trysrc %"Q"u/%"W"u tgt %"W"u tryrem %"W"u\n", + stream->match_maxback, tryblk, tryoff, streamoff, tryrem)); + + /* TODO: This code can be optimized similar to xd3_match_forward() */ + for (; tryrem != 0; tryrem -= 1, stream->match_back += 1) + { + if (src->curblk[tryoff-1] != stream->next_in[streamoff-1]) + { + goto doneback; + } + + tryoff -= 1; + streamoff -= 1; + } + } + + doneback: + stream->match_state = MATCH_FORWARD; + } + + XD3_ASSERT (stream->match_state == MATCH_FORWARD); + + matchoff = stream->match_srcpos + stream->match_fwd; + streamoff = stream->input_position + stream->match_fwd; + xd3_blksize_div (matchoff, src, & tryblk, & tryoff); + + /* Note: practically the same code as backwards case above: same comments */ + while (stream->match_fwd < stream->match_maxfwd) + { + if (tryoff == src->blksize) + { + tryoff = 0; + tryblk += 1; + } + + if ((ret = xd3_getblk (stream, tryblk))) + { + if (ret == XD3_TOOFARBACK) + { + IF_DEBUG2(DP(RINT "[maxfwd] %"Q"u TOOFARBACK: %"W"u INP %"Q"u CKSUM %"Q"u\n", + tryblk, stream->match_fwd, + stream->total_in + stream->input_position, + stream->srcwin_cksum_pos)); + goto donefwd; + } + + /* could be a XD3_GETSRCBLK failure. */ + return ret; + } + + tryrem = xd3_min(stream->match_maxfwd - stream->match_fwd, + src->onblk - tryoff); + + if (tryrem == 0) + { + /* Generally, this means we have a power-of-two size source + * and we just found the end-of-file, in this case it's an + * empty block. */ + XD3_ASSERT (src->onblk < src->blksize); + break; + } + + matched = xd3_forward_match(src->curblk + tryoff, + stream->next_in + streamoff, + tryrem); + tryoff += matched; + streamoff += matched; + stream->match_fwd += matched; + + if (tryrem != matched) + { + break; + } + } + + donefwd: + stream->match_state = MATCH_SEARCHING; + + IF_DEBUG2(DP(RINT "[extend match] input %"Q"u srcpos %"Q"u len %"W"u\n", + stream->input_position + stream->total_in, + stream->match_srcpos, + stream->match_fwd)); + + /* If the match ends short of the last instruction end, we probably + * don't want it. There is the possibility that a copy ends short + * of the last copy but also goes further back, in which case we + * might want it. This code does not implement such: if so we would + * need more complicated xd3_iopt_erase logic. */ + if (stream->match_fwd < stream->min_match) + { + stream->match_fwd = 0; + } + else + { + usize_t total = stream->match_fwd + stream->match_back; + + /* Correct the variables to remove match_back from the equation. */ + usize_t target_position = stream->input_position - stream->match_back; + usize_t match_length = stream->match_back + stream->match_fwd; + xoff_t match_position = stream->match_srcpos - stream->match_back; + xoff_t match_end = stream->match_srcpos + stream->match_fwd; + + /* At this point we may have to erase any iopt-buffer + * instructions that are fully covered by a backward-extending + * copy. */ + if (stream->match_back > 0) + { + xd3_iopt_erase (stream, target_position, total); + } + + stream->match_back = 0; + + /* Update ranges. The first source match occurs with both + values set to 0. */ + if (stream->match_maxaddr == 0 || + match_position < stream->match_minaddr) + { + stream->match_minaddr = match_position; + } + + if (match_end > stream->match_maxaddr) + { + /* Note: per-window */ + stream->match_maxaddr = match_end; + } + + if (match_end > stream->maxsrcaddr) + { + /* Note: across windows */ + stream->maxsrcaddr = match_end; + } + + IF_DEBUG2 ({ + static int x = 0; + DP(RINT "[source match:%d] length %"W"u (%s)\n", + x++, + match_length, + stream->total_in + target_position, + stream->total_in + target_position + match_length, + match_position, + match_position + match_length, + (stream->total_in + target_position == match_position) ? "same" : "diff"); + }); + + if ((ret = xd3_found_match (stream, + /* decoder position */ target_position, + /* length */ match_length, + /* address */ match_position, + /* is_source */ 1))) + { + return ret; + } + + /* If the match ends with the available input: */ + if (target_position + match_length == stream->avail_in) + { + /* Setup continuing match for the next window. */ + stream->match_state = MATCH_TARGET; + stream->match_srcpos = match_end; + } + } + + return 0; +} + +/* Update the small hash. Values in the small_table are offset by + * HASH_CKOFFSET (1) to distinguish empty buckets from real offsets. */ +static void +xd3_scksum_insert (xd3_stream *stream, + usize_t inx, + usize_t scksum, + usize_t pos) +{ + /* If we are maintaining previous duplicates. */ + if (stream->small_prev) + { + usize_t last_pos = stream->small_table[inx]; + xd3_slist *pos_list = & stream->small_prev[pos & stream->sprevmask]; + + /* Note last_pos is offset by HASH_CKOFFSET. */ + pos_list->last_pos = last_pos; + } + + /* Enter the new position into the hash bucket. */ + stream->small_table[inx] = pos + HASH_CKOFFSET; +} + +#if XD3_DEBUG +static int +xd3_check_smatch (const uint8_t *ref0, const uint8_t *inp0, + const uint8_t *inp_max, usize_t cmp_len) +{ + usize_t i; + + for (i = 0; i < cmp_len; i += 1) + { + XD3_ASSERT (ref0[i] == inp0[i]); + } + + if (inp0 + cmp_len < inp_max) + { + XD3_ASSERT (inp0[i] != ref0[i]); + } + + return 1; +} +#endif /* XD3_DEBUG */ + +/* When the hash table indicates a possible small string match, it + * calls this routine to find the best match. The first matching + * position is taken from the small_table, HASH_CKOFFSET is subtracted + * to get the actual position. After checking that match, if previous + * linked lists are in use (because stream->smatcher.small_chain > 1), + * previous matches are tested searching for the longest match. If + * (stream->min_match > MIN_MATCH) then a lazy match is in effect. + */ +static usize_t +xd3_smatch (xd3_stream *stream, + usize_t base, + usize_t scksum, + usize_t *match_offset) +{ + usize_t cmp_len; + usize_t match_length = 0; + usize_t chain = (stream->min_match == MIN_MATCH ? + stream->smatcher.small_chain : + stream->smatcher.small_lchain); + const uint8_t *inp_max = stream->next_in + stream->avail_in; + const uint8_t *inp; + const uint8_t *ref; + + SMALL_HASH_DEBUG1 (stream, stream->next_in + stream->input_position); + + XD3_ASSERT (stream->min_match + stream->input_position <= stream->avail_in); + + base -= HASH_CKOFFSET; + + again: + + IF_DEBUG2 (DP(RINT "smatch at base=%"W"u inp=%"W"u cksum=%"W"u\n", base, + stream->input_position, scksum)); + + /* For small matches, we can always go to the end-of-input because + * the matching position must be less than the input position. */ + XD3_ASSERT (base < stream->input_position); + + ref = stream->next_in + base; + inp = stream->next_in + stream->input_position; + + SMALL_HASH_DEBUG2 (stream, ref); + + /* Expand potential match forward. */ + while (inp < inp_max && *inp == *ref) + { + ++inp; + ++ref; + } + + cmp_len = (usize_t)(inp - (stream->next_in + stream->input_position)); + + /* Verify correctness */ + XD3_ASSERT (xd3_check_smatch (stream->next_in + base, + stream->next_in + stream->input_position, + inp_max, cmp_len)); + + /* Update longest match */ + if (cmp_len > match_length) + { + ( match_length) = cmp_len; + (*match_offset) = base; + + /* Stop if we match the entire input or have a long_enough match. */ + if (inp == inp_max || cmp_len >= stream->smatcher.long_enough) + { + goto done; + } + } + + /* If we have not reached the chain limit, see if there is another + previous position. */ + while (--chain != 0) + { + /* Calculate the previous offset. */ + usize_t prev_pos = stream->small_prev[base & stream->sprevmask].last_pos; + usize_t diff_pos; + + if (prev_pos == 0) + { + break; + } + + prev_pos -= HASH_CKOFFSET; + + if (prev_pos > base) + { + break; + } + + base = prev_pos; + + XD3_ASSERT (stream->input_position > base); + diff_pos = stream->input_position - base; + + /* Stop searching if we go beyond sprevsz, since those entries + * are for unrelated checksum entries. */ + if (diff_pos & ~stream->sprevmask) + { + break; + } + + goto again; + } + + done: + /* Crude efficiency test: if the match is very short and very far back, it's + * unlikely to help, but the exact calculation requires knowing the state of + * the address cache and adjacent instructions, which we can't do here. + * Rather than encode a probably inefficient copy here and check it later + * (which complicates the code a lot), do this: + */ + if (match_length == 4 && stream->input_position - (*match_offset) >= 1<<14) + { + /* It probably takes >2 bytes to encode an address >= 2^14 from here */ + return 0; + } + if (match_length == 5 && stream->input_position - (*match_offset) >= 1<<21) + { + /* It probably takes >3 bytes to encode an address >= 2^21 from here */ + return 0; + } + + /* It's unlikely that a window is large enough for the (match_length == 6 && + * address >= 2^28) check */ + return match_length; +} + +#if XD3_DEBUG +static void +xd3_verify_small_state (xd3_stream *stream, + const uint8_t *inp, + uint32_t x_cksum) +{ + uint32_t state; + uint32_t cksum = xd3_scksum (&state, inp, stream->smatcher.small_look); + + XD3_ASSERT (cksum == x_cksum); +} + +static void +xd3_verify_large_state (xd3_stream *stream, + const uint8_t *inp, + usize_t x_cksum) +{ + usize_t cksum = xd3_large_cksum (&stream->large_hash, inp, stream->smatcher.large_look); + XD3_ASSERT (cksum == x_cksum); +} +static void +xd3_verify_run_state (xd3_stream *stream, + const uint8_t *inp, + usize_t x_run_l, + uint8_t *x_run_c) +{ + usize_t slook = stream->smatcher.small_look; + uint8_t run_c; + usize_t run_l = xd3_comprun (inp, slook, &run_c); + + XD3_ASSERT (run_l == 0 || run_c == *x_run_c); + XD3_ASSERT (x_run_l > slook || run_l == x_run_l); +} +#endif /* XD3_DEBUG */ + +/* This function computes more source checksums to advance the window. + * Called at every entrance to the string-match loop and each time + * stream->input_position reaches the value returned as + * *next_move_point. NB: this is one of the most expensive functions + * in this code and also the most critical for good compression. + */ +static int +xd3_srcwin_move_point (xd3_stream *stream, usize_t *next_move_point) +{ + /* the source file is indexed until this point */ + xoff_t target_cksum_pos; + /* the absolute target file input position */ + xoff_t absolute_input_pos; + + if (stream->src->eof_known) + { + xoff_t source_size = xd3_source_eof (stream->src); + XD3_ASSERT(stream->srcwin_cksum_pos <= source_size); + + if (stream->srcwin_cksum_pos == source_size) + { + *next_move_point = USIZE_T_MAX; + return 0; + } + } + + absolute_input_pos = stream->total_in + stream->input_position; + + /* Immediately read the entire window. + * + * Note: this reverses a long held policy, at this point in the + * code, of advancing relatively slowly as the input is read, which + * results in better compression for very-similar inputs, but worse + * compression where data is deleted near the beginning of the file. + * + * The new policy is simpler, somewhat slower and can benefit, or + * slightly worsen, compression performance. */ + if (absolute_input_pos < stream->src->max_winsize / 2) + { + target_cksum_pos = stream->src->max_winsize; + } + else + { + /* TODO: The addition of 2 blocks here is arbitrary. Do a + * better job of stream alignment based on observed source copy + * addresses, and when both input sizes are known, the + * difference in size. */ + target_cksum_pos = absolute_input_pos + + stream->src->max_winsize / 2 + + stream->src->blksize * 2; + target_cksum_pos &= ~stream->src->maskby; + } + + /* A long match may have extended past srcwin_cksum_pos. Don't + * start checksumming already-matched source data. */ + if (stream->maxsrcaddr > stream->srcwin_cksum_pos) + { + stream->srcwin_cksum_pos = stream->maxsrcaddr; + } + + if (target_cksum_pos < stream->srcwin_cksum_pos) + { + target_cksum_pos = stream->srcwin_cksum_pos; + } + + while (stream->srcwin_cksum_pos < target_cksum_pos && + (!stream->src->eof_known || + stream->srcwin_cksum_pos < xd3_source_eof (stream->src))) + { + xoff_t blkno; + xoff_t blkbaseoffset; + usize_t blkrem; + ssize_t oldpos; /* Using ssize_t because of a */ + ssize_t blkpos; /* do { blkpos-- } + while (blkpos >= oldpos); */ + int ret; + xd3_blksize_div (stream->srcwin_cksum_pos, + stream->src, &blkno, &blkrem); + oldpos = blkrem; + + if ((ret = xd3_getblk (stream, blkno))) + { + /* TOOFARBACK should never occur here, since we read forward. */ + if (ret == XD3_TOOFARBACK) + { + ret = XD3_INTERNAL; + } + + IF_DEBUG1 (DP(RINT + "[srcwin_move_point] async getblk return for %"Q"u: %s\n", + blkno, xd3_strerror (ret))); + return ret; + } + + IF_DEBUG1 (DP(RINT + "[srcwin_move_point] block %"Q"u T=%"Q"u S=%"Q"u L=%"Q"u EOF=%"Q"u %s\n", + blkno, + stream->total_in + stream->input_position, + stream->srcwin_cksum_pos, + target_cksum_pos, + xd3_source_eof (stream->src), + stream->src->eof_known ? "known" : "unknown")); + + blkpos = xd3_bytes_on_srcblk (stream->src, blkno); + + if (blkpos < (ssize_t) stream->smatcher.large_look) + { + stream->srcwin_cksum_pos = (blkno + 1) * stream->src->blksize; + IF_DEBUG2 (DP(RINT "[srcwin_move_point] continue (end-of-block): %"Z"d\n", blkpos)); + continue; + } + + /* This inserts checksums for the entire block, in reverse, + * starting from the end of the block. This logic does not test + * stream->srcwin_cksum_pos because it always advances it to the + * start of the next block. + * + * oldpos is the srcwin_cksum_pos within this block. blkpos is + * the number of bytes available. Each iteration inspects + * large_look bytes then steps back large_step bytes. The + * if-stmt above ensures at least one large_look of data. */ + blkpos -= stream->smatcher.large_look; + blkbaseoffset = stream->src->blksize * blkno; + + do + { + /* TODO: This would be significantly faster if the compiler + * knew stream->smatcher.large_look (which the template for + * xd3_string_match_* allows). */ + usize_t cksum = xd3_large_cksum (&stream->large_hash, + stream->src->curblk + blkpos, + stream->smatcher.large_look); + usize_t hval = xd3_checksum_hash (& stream->large_hash, cksum); + + stream->large_table[hval] = + (usize_t) (blkbaseoffset + + (xoff_t)(blkpos + HASH_CKOFFSET)); + + IF_DEBUG (stream->large_ckcnt += 1); + + blkpos -= stream->smatcher.large_step; + } + while (blkpos >= oldpos); + + stream->srcwin_cksum_pos = (blkno + 1) * stream->src->blksize; + } + + IF_DEBUG1 (DP(RINT + "[srcwin_move_point] exited loop T=%"Q"u " + "S=%"Q"u EOF=%"Q"u %s\n", + stream->total_in + stream->input_position, + stream->srcwin_cksum_pos, + xd3_source_eof (stream->src), + stream->src->eof_known ? "known" : "unknown")); + + if (stream->src->eof_known) + { + xoff_t source_size = xd3_source_eof (stream->src); + if (stream->srcwin_cksum_pos >= source_size) + { + /* This invariant is needed for xd3_source_cksum_offset() */ + stream->srcwin_cksum_pos = source_size; + *next_move_point = USIZE_T_MAX; + IF_DEBUG1 (DP(RINT + "[srcwin_move_point] finished with source input\n")); + return 0; + } + } + + /* How long until this function should be called again. */ + XD3_ASSERT(stream->srcwin_cksum_pos >= target_cksum_pos); + + *next_move_point = stream->input_position + + stream->src->blksize - + ((stream->srcwin_cksum_pos - target_cksum_pos) & stream->src->maskby); + + IF_DEBUG2 (DP(RINT + "[srcwin_move_point] finished T=%"Q"u " + "S=%"Q"u L=%"Q"u EOF=%"Q"u %s again in %"W"u\n", + stream->total_in + stream->input_position, + stream->srcwin_cksum_pos, + target_cksum_pos, + xd3_source_eof (stream->src), + stream->src->eof_known ? "known" : "unknown", + *next_move_point - stream->input_position)); + + return 0; +} + +#endif /* XD3_ENCODER */ + +/******************************************************************** + TEMPLATE pass + *********************************************************************/ + +#endif /* __XDELTA3_C_INLINE_PASS__ */ +#ifdef __XDELTA3_C_TEMPLATE_PASS__ + +#if XD3_ENCODER + +/******************************************************************** + Templates + *******************************************************************/ + +/* Template macros */ +#define XD3_TEMPLATE(x) XD3_TEMPLATE2(x,TEMPLATE) +#define XD3_TEMPLATE2(x,n) XD3_TEMPLATE3(x,n) +#define XD3_TEMPLATE3(x,n) x ## n +#define XD3_STRINGIFY(x) XD3_STRINGIFY2(x) +#define XD3_STRINGIFY2(x) #x + +static int XD3_TEMPLATE(xd3_string_match_) (xd3_stream *stream); + +static const xd3_smatcher XD3_TEMPLATE(__smatcher_) = +{ + XD3_STRINGIFY(TEMPLATE), + XD3_TEMPLATE(xd3_string_match_), +#if SOFTCFG == 1 + 0, 0, 0, 0, 0, 0, 0 +#else + LLOOK, LSTEP, SLOOK, SCHAIN, SLCHAIN, MAXLAZY, LONGENOUGH +#endif +}; + +static int +XD3_TEMPLATE(xd3_string_match_) (xd3_stream *stream) +{ + const int DO_SMALL = ! (stream->flags & XD3_NOCOMPRESS); + const int DO_LARGE = (stream->src != NULL); + const int DO_RUN = (1); + + const uint8_t *inp; + uint32_t scksum = 0; + uint32_t scksum_state = 0; + usize_t lcksum = 0; + usize_t sinx; + usize_t linx; + uint8_t run_c; + usize_t run_l; + int ret; + usize_t match_length; + usize_t match_offset = 0; + usize_t next_move_point = 0; + + IF_DEBUG2(DP(RINT "[string_match] initial entry %"W"u\n", stream->input_position)); + + /* If there will be no compression due to settings or short input, + * skip it entirely. */ + if (! (DO_SMALL || DO_LARGE || DO_RUN) || + stream->input_position + SLOOK > stream->avail_in) { goto loopnomore; } + + if ((ret = xd3_string_match_init (stream))) { return ret; } + + /* The restartloop label is reached when the incremental loop state + * needs to be reset. */ + restartloop: + + IF_DEBUG2(DP(RINT "[string_match] restartloop %"W"u\n", stream->input_position)); + + /* If there is not enough input remaining for any kind of match, + skip it. */ + if (stream->input_position + SLOOK > stream->avail_in) { goto loopnomore; } + + /* Now reset the incremental loop state: */ + + /* The min_match variable is updated to avoid matching the same lazy + * match over and over again. For example, if you find a (small) + * match of length 9 at one position, you will likely find a match + * of length 8 at the next position. */ + if (xd3_iopt_last_matched (stream) > stream->input_position) + { + stream->min_match = xd3_max (MIN_MATCH, + 1 + xd3_iopt_last_matched(stream) - + stream->input_position); + } + else + { + stream->min_match = MIN_MATCH; + } + + /* The current input byte. */ + inp = stream->next_in + stream->input_position; + + /* Small match state. */ + if (DO_SMALL) + { + scksum = xd3_scksum (&scksum_state, inp, SLOOK); + } + + /* Run state. */ + if (DO_RUN) + { + run_l = xd3_comprun (inp, SLOOK, & run_c); + } + + /* Large match state. We continue the loop even after not enough + * bytes for LLOOK remain, so always check stream->input_position in + * DO_LARGE code. */ + if (DO_LARGE && (stream->input_position + LLOOK <= stream->avail_in)) + { + /* Source window: next_move_point is the point that + * stream->input_position must reach before computing more + * source checksum. Note: this is called unconditionally + * the first time after reentry, subsequent calls will be + * avoided if next_move_point is > input_position */ + if ((ret = xd3_srcwin_move_point (stream, & next_move_point))) + { + return ret; + } + + lcksum = xd3_large_cksum (&stream->large_hash, inp, LLOOK); + } + + /* TRYLAZYLEN: True if a certain length match should be followed by + * lazy search. This checks that LEN is shorter than MAXLAZY and + * that there is enough leftover data to consider lazy matching. + * "Enough" is set to 2 since the next match will start at the next + * offset, it must match two extra characters. */ +#define TRYLAZYLEN(LEN,POS,MAX) ((MAXLAZY) > 0 && (LEN) < (MAXLAZY) \ + && (POS) + (LEN) <= (MAX) - 2) + + /* HANDLELAZY: This statement is called each time an instruciton is + * emitted (three cases). If the instruction is large enough, the + * loop is restarted, otherwise lazy matching may ensue. */ +#define HANDLELAZY(mlen) \ + if (TRYLAZYLEN ((mlen), (stream->input_position), (stream->avail_in))) \ + { stream->min_match = (mlen) + LEAST_MATCH_INCR; goto updateone; } \ + else \ + { stream->input_position += (mlen); goto restartloop; } + + /* Now loop over one input byte at a time until a match is found... */ + for (;; inp += 1, stream->input_position += 1) + { + /* Now we try three kinds of string match in order of expense: + * run, large match, small match. */ + + /* Expand the start of a RUN. The test for (run_l == SLOOK) + * avoids repeating this check when we pass through a run area + * performing lazy matching. The run is only expanded once when + * the min_match is first reached. If lazy matching is + * performed, the run_l variable will remain inconsistent until + * the first non-running input character is reached, at which + * time the run_l may then again grow to SLOOK. */ + if (DO_RUN && run_l == SLOOK) + { + usize_t max_len = stream->avail_in - stream->input_position; + + IF_DEBUG (xd3_verify_run_state (stream, inp, run_l, &run_c)); + + while (run_l < max_len && inp[run_l] == run_c) { run_l += 1; } + + /* Output a RUN instruction. */ + if (run_l >= stream->min_match && run_l >= MIN_RUN) + { + if ((ret = xd3_emit_run (stream, stream->input_position, + run_l, &run_c))) { return ret; } + + HANDLELAZY (run_l); + } + } + + /* If there is enough input remaining. */ + if (DO_LARGE && (stream->input_position + LLOOK <= stream->avail_in)) + { + if ((stream->input_position >= next_move_point) && + (ret = xd3_srcwin_move_point (stream, & next_move_point))) + { + return ret; + } + + linx = xd3_checksum_hash (& stream->large_hash, lcksum); + + IF_DEBUG (xd3_verify_large_state (stream, inp, lcksum)); + + if (stream->large_table[linx] != 0) + { + /* the match_setup will fail if the source window has + * been decided and the match lies outside it. + * OPT: Consider forcing a window at this point to + * permit a new source window. */ + xoff_t adj_offset = + xd3_source_cksum_offset(stream, + stream->large_table[linx] - + HASH_CKOFFSET); + if (xd3_source_match_setup (stream, adj_offset) == 0) + { + if ((ret = xd3_source_extend_match (stream))) + { + return ret; + } + + /* Update stream position. match_fwd is zero if no + * match. */ + if (stream->match_fwd > 0) + { + HANDLELAZY (stream->match_fwd); + } + } + } + } + + /* Small matches. */ + if (DO_SMALL) + { + sinx = xd3_checksum_hash (& stream->small_hash, scksum); + + /* Verify incremental state in debugging mode. */ + IF_DEBUG (xd3_verify_small_state (stream, inp, scksum)); + + /* Search for the longest match */ + if (stream->small_table[sinx] != 0) + { + match_length = xd3_smatch (stream, + stream->small_table[sinx], + scksum, + & match_offset); + } + else + { + match_length = 0; + } + + /* Insert a hash for this string. */ + xd3_scksum_insert (stream, sinx, scksum, stream->input_position); + + /* Maybe output a COPY instruction */ + if (match_length >= stream->min_match) + { + IF_DEBUG2 ({ + static int x = 0; + DP(RINT "[target match:%d] " + "(-%"W"d) [ %"W"u bytes ]\n", + x++, + stream->input_position, + stream->input_position + match_length, + match_offset, + match_offset + match_length, + stream->input_position - match_offset, + match_length); + }); + + if ((ret = xd3_found_match (stream, + /* decoder position */ + stream->input_position, + /* length */ match_length, + /* address */ (xoff_t) match_offset, + /* is_source */ 0))) + { + return ret; + } + + /* Copy instruction. */ + HANDLELAZY (match_length); + } + } + + /* The logic above prevents excess work during lazy matching by + * increasing min_match to avoid smaller matches. Each time we + * advance stream->input_position by one, the minimum match + * shortens as well. */ + if (stream->min_match > MIN_MATCH) + { + stream->min_match -= 1; + } + + updateone: + + /* See if there are no more incremental cksums to compute. */ + if (stream->input_position + SLOOK == stream->avail_in) + { + goto loopnomore; + } + + /* Compute next RUN, CKSUM */ + if (DO_RUN) + { + NEXTRUN (inp[SLOOK]); + } + + if (DO_SMALL) + { + scksum = xd3_small_cksum_update (&scksum_state, inp, SLOOK); + } + + if (DO_LARGE && (stream->input_position + LLOOK < stream->avail_in)) + { + lcksum = xd3_large_cksum_update (&stream->large_hash, lcksum, inp, LLOOK); + } + } + + loopnomore: + return 0; +} + +#endif /* XD3_ENCODER */ +#endif /* __XDELTA3_C_TEMPLATE_PASS__ */ diff --git a/deps/xdelta3/xdelta3.h b/deps/xdelta3/xdelta3.h new file mode 100644 index 0000000000..92e1df8fb6 --- /dev/null +++ b/deps/xdelta3/xdelta3.h @@ -0,0 +1,1502 @@ +/* xdelta3 - delta compression tools and library + Copyright 2016 Joshua MacDonald + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +/* To learn more about Xdelta, start by reading xdelta3.c. If you are + * ready to use the API, continue reading here. There are two + * interfaces -- xd3_encode_input and xd3_decode_input -- plus a dozen + * or so related calls. This interface is styled after Zlib. */ + +#ifndef _XDELTA3_H_ +#define _XDELTA3_H_ + +#define _POSIX_SOURCE 200112L +#define _ISOC99_SOURCE +#define _C99_SOURCE +/* To include RetroArch's INLINE macro */ +#include "retro_inline.h" + +#if HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +/****************************************************************/ + +/* Default configured value of stream->winsize. If the program + * supplies xd3_encode_input() with data smaller than winsize the + * stream will automatically buffer the input, otherwise the input + * buffer is used directly. + */ +#ifndef XD3_DEFAULT_WINSIZE +#define XD3_DEFAULT_WINSIZE (1U << 23) +#endif + +/* Default total size of the source window used in xdelta3-main.h */ +#ifndef XD3_DEFAULT_SRCWINSZ +#define XD3_DEFAULT_SRCWINSZ (1U << 26) +#endif + +/* When Xdelta requests a memory allocation for certain buffers, it + * rounds up to units of at least this size. The code assumes (and + * asserts) that this is a power-of-two. */ +#ifndef XD3_ALLOCSIZE +#define XD3_ALLOCSIZE (1U<<14) +#endif + +/* The XD3_HARDMAXWINSIZE parameter is a safety mechanism to protect + * decoders against malicious files. The decoder will never decode a + * window larger than this. If the file specifies VCD_TARGET the + * decoder may require two buffers of this size. + * + * 8-16MB is reasonable, probably don't need to go larger. */ +#ifndef XD3_HARDMAXWINSIZE +#define XD3_HARDMAXWINSIZE (1U<<26) +#endif +/* The IOPT_SIZE value sets the size of a buffer used to batch + * overlapping copy instructions before they are optimized by picking + * the best non-overlapping ranges. The larger this buffer, the + * longer a forced xd3_srcwin_setup() decision is held off. Setting + * this value to 0 causes an unlimited buffer to be used. */ +#ifndef XD3_DEFAULT_IOPT_SIZE +#define XD3_DEFAULT_IOPT_SIZE (1U<<15) +#endif + +/* The maximum distance backward to search for small matches */ +#ifndef XD3_DEFAULT_SPREVSZ +#define XD3_DEFAULT_SPREVSZ (1U<<18) +#endif + +/* The default compression level */ +#ifndef XD3_DEFAULT_LEVEL +#define XD3_DEFAULT_LEVEL 3 +#endif + +#ifndef XD3_DEFAULT_SECONDARY_LEVEL +#define XD3_DEFAULT_SECONDARY_LEVEL 6 +#endif + +/* Sizes and addresses within VCDIFF windows are represented as usize_t + * + * For source-file offsets and total file sizes, total input and + * output counts, the xoff_t type is used. The decoder and encoder + * generally check for overflow of the xoff_t size (this is tested at + * the 32bit boundary [xdelta3-test.h]). + */ +#ifndef _WIN32 +#define __STDC_FORMAT_MACROS +#include +#include +#else /* WIN32 case */ +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif + +#include + +/* _MSV_VER is defined by Microsoft tools, not by Mingw32 */ +#ifdef _MSC_VER +typedef signed int ssize_t; +typedef int pid_t; +#if _MSC_VER < 1600 +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; +typedef unsigned long uint32_t; +typedef ULONGLONG uint64_t; +#else /* _MSC_VER >= 1600 */ +/* For MSVC10 and above */ +#include +#define inline __inline +#endif /* _MSC_VER < 1600 */ +#else /* _MSC_VER not defined */ +/* Mingw32 */ +#include +#endif /* _MSC_VER defined */ + +#endif /* _WIN32 defined */ + +#ifndef XD3_USE_LARGEFILE64 +#if SIZE_MAX == UINT64_MAX +#define XD3_USE_LARGEFILE64 1 +#else +#define XD3_USE_LARGEFILE64 0 +#endif +#endif + +/* The source window size is limited to 2GB unless + * XD3_USE_LARGESIZET is defined to 1. */ +#ifndef XD3_USE_LARGESIZET +#if SIZE_MAX == UINT64_MAX +#define XD3_USE_LARGESIZET 1 +#else +#define XD3_USE_LARGESIZET 0 +#endif +#endif + +#if SIZE_MAX == UINT64_MAX +#define SIZEOF_SIZE_T 8 +#elif SIZE_MAX == UINT32_MAX +#define SIZEOF_SIZE_T 4 +#endif + +#if ULONG_MAX == UINT64_MAX +#define SIZEOF_UNSIGNED_LONG 8 +#elif ULONG_MAX == UINT32_MAX +#define SIZEOF_UNSIGNED_LONG 4 +#endif + +#if UINT_MAX == UINT64_MAX +#define SIZEOF_UNSIGNED_INT 8 +#elif UINT_MAX == UINT32_MAX +#define SIZEOF_UNSIGNED_INT 4 +#endif + +#if defined(ULLONG_MAX) || defined(ULONG_LONG_MAX) +#if ULLONG_MAX == UINT64_MAX || ULONG_LONG_MAX == UINT64_MAX +#define SIZEOF_UNSIGNED_LONG_LONG 8 +#elif ULLONG_MAX == UINT32_MAX || ULONG_LONG_MAX == UINT32_MAX +#define SIZEOF_UNSIGNED_LONG_LONG 4 +#endif +#endif /* ULLONG_MAX defined or ULONG_LONG_MAX defined */ + +/* Settings based on the size of xoff_t (32 vs 64 file offsets) */ +#if XD3_USE_LARGEFILE64 +/* xoff_t is a 64-bit type */ +#define __USE_FILE_OFFSET64 1 /* GLIBC: for 64bit fileops. */ + +#ifndef _LARGEFILE_SOURCE +#define _LARGEFILE_SOURCE +#endif + +#ifndef _FILE_OFFSET_BITS +#define _FILE_OFFSET_BITS 64 +#endif + +_Static_assert(SIZEOF_SIZE_T == sizeof(size_t), "SIZEOF_SIZE_T not correctly set"); + +#ifdef SIZEOF_UNSIGNED_LONG_LONG +_Static_assert(SIZEOF_UNSIGNED_LONG_LONG == sizeof(unsigned long long), "SIZEOF_UNSIGNED_LONG_LONG not correctly set"); +#endif + +/* Set a xoff_t typedef and the "Q" printf insert. */ +#if defined(_WIN32) +typedef uint64_t xoff_t; +#define Q "I64" +#elif SIZEOF_UNSIGNED_LONG == 8 +typedef unsigned long xoff_t; +#define Q "l" +#elif SIZEOF_SIZE_T == 8 +typedef size_t xoff_t; +#define Q "z" +#elif SIZEOF_UNSIGNED_LONG_LONG == 8 +typedef unsigned long long xoff_t; +#define Q "ll" +#endif /* typedef and #define Q */ + +#define SIZEOF_XOFF_T 8 + +#else /* XD3_USE_LARGEFILE64 == 0 */ + +#if SIZEOF_UNSIGNED_INT == 4 +typedef unsigned int xoff_t; +#elif SIZEOF_UNSIGNED_LONG == 4 +typedef unsigned long xoff_t; +#else +typedef uint32_t xoff_t; +#endif /* xoff_t is 32 bits */ + +#define SIZEOF_XOFF_T 4 +#define Q +#endif /* 64 vs 32 bit xoff_t */ + +/* Settings based on the size of usize_t (32 and 64 bit window size) */ +#if XD3_USE_LARGESIZET + +/* Set a usize_ttypedef and the "W" printf insert. */ +#if defined(_WIN32) +typedef uint64_t usize_t; +#define W "I64" +#elif SIZEOF_UNSIGNED_LONG == 8 +typedef unsigned long usize_t; +#define W "l" +#elif SIZEOF_SIZE_T == 8 +typedef size_t usize_t; +#define W "z" +#elif SIZEOF_UNSIGNED_LONG_LONG == 8 +typedef unsigned long long usize_t; +#define W "ll" +#endif /* typedef and #define W */ + +#define SIZEOF_USIZE_T 8 + +#else /* XD3_USE_LARGESIZET == 0 */ + +#if SIZEOF_UNSIGNED_INT == 4 +typedef unsigned int usize_t; +#elif SIZEOF_UNSIGNED_LONG == 4 +typedef unsigned long usize_t; +#else +typedef uint32_t usize_t; +#endif /* usize_t is 32 bits */ + +#define SIZEOF_USIZE_T 4 +#define W + +#endif /* 64 vs 32 bit usize_t */ + +/* Settings based on the size of size_t (the system-provided, + * usually-but-maybe-not an unsigned type) */ +#if SIZEOF_SIZE_T == 4 +#define Z "z" +#elif SIZEOF_SIZE_T == 8 +#ifdef _WIN32 +#define Z "I64" +#else /* !_WIN32 */ +#define Z "z" +#endif /* Windows or not */ +#else +#error Bad configure script +#endif /* size_t printf flags */ + +#define USE_UINT32 (SIZEOF_USIZE_T == 4 || \ + SIZEOF_XOFF_T == 4 || REGRESSION_TEST) +#define USE_UINT64 (SIZEOF_USIZE_T == 8 || \ + SIZEOF_XOFF_T == 8 || REGRESSION_TEST) + +#ifndef UNALIGNED_OK +#ifdef HAVE_ALIGNED_ACCESS_REQUIRED +#define UNALIGNED_OK 0 +#else +/* This generally includes all Windows builds. */ +#define UNALIGNED_OK 1 +#endif +#endif + +/**********************************************************************/ + +/* Whether to build the encoder, otherwise only build the decoder. */ +#ifndef XD3_ENCODER +#define XD3_ENCODER 0 +#endif + +/* The code returned when main() fails, also defined in system + includes. */ +#ifndef EXIT_FAILURE +#define EXIT_FAILURE 1 +#endif + +/* REGRESSION TEST enables the "xdelta3 test" command, which runs a + series of self-tests. */ +#ifndef REGRESSION_TEST +#define REGRESSION_TEST 0 +#endif + +/* XD3_DEBUG=1 enables assertions and various statistics. Levels > 1 + * enable some additional output only useful during development and + * debugging. */ +#ifndef XD3_DEBUG +#define XD3_DEBUG 0 +#endif + +#ifndef PYTHON_MODULE +#define PYTHON_MODULE 0 +#endif + +#ifndef SWIG_MODULE +#define SWIG_MODULE 0 +#endif + +#ifndef NOT_MAIN +#define NOT_MAIN 0 +#endif + +/* There are three string matching functions supplied: one fast, one + * slow (default), and one soft-configurable. To disable any of + * these, use the following definitions. */ +#ifndef XD3_BUILD_SLOW +#define XD3_BUILD_SLOW 1 +#endif +#ifndef XD3_BUILD_FAST +#define XD3_BUILD_FAST 1 +#endif +#ifndef XD3_BUILD_FASTER +#define XD3_BUILD_FASTER 1 +#endif +#ifndef XD3_BUILD_FASTEST +#define XD3_BUILD_FASTEST 1 +#endif +#ifndef XD3_BUILD_SOFT +#define XD3_BUILD_SOFT 1 +#endif +#ifndef XD3_BUILD_DEFAULT +#define XD3_BUILD_DEFAULT 1 +#endif + +#if XD3_DEBUG +#include +#endif + +typedef struct _xd3_stream xd3_stream; +typedef struct _xd3_source xd3_source; +typedef struct _xd3_hash_cfg xd3_hash_cfg; +typedef struct _xd3_smatcher xd3_smatcher; +typedef struct _xd3_rinst xd3_rinst; +typedef struct _xd3_dinst xd3_dinst; +typedef struct _xd3_hinst xd3_hinst; +typedef struct _xd3_winst xd3_winst; +typedef struct _xd3_rpage xd3_rpage; +typedef struct _xd3_addr_cache xd3_addr_cache; +typedef struct _xd3_output xd3_output; +typedef struct _xd3_desect xd3_desect; +typedef struct _xd3_iopt_buflist xd3_iopt_buflist; +typedef struct _xd3_rlist xd3_rlist; +typedef struct _xd3_sec_type xd3_sec_type; +typedef struct _xd3_sec_cfg xd3_sec_cfg; +typedef struct _xd3_sec_stream xd3_sec_stream; +typedef struct _xd3_config xd3_config; +typedef struct _xd3_code_table_desc xd3_code_table_desc; +typedef struct _xd3_code_table_sizes xd3_code_table_sizes; +typedef struct _xd3_slist xd3_slist; +typedef struct _xd3_whole_state xd3_whole_state; +typedef struct _xd3_wininfo xd3_wininfo; + +/* The stream configuration has three callbacks functions, all of + * which may be supplied with NULL values. If config->getblk is + * provided as NULL, the stream returns XD3_GETSRCBLK. */ + +typedef void* (xd3_alloc_func) (void *opaque, + size_t items, + usize_t size); +typedef void (xd3_free_func) (void *opaque, + void *address); + +typedef int (xd3_getblk_func) (xd3_stream *stream, + xd3_source *source, + xoff_t blkno); + +typedef const xd3_dinst* (xd3_code_table_func) (void); + + +#ifdef _WIN32 +#define vsnprintf_func _vsnprintf +#define snprintf_func _snprintf +#else +#define vsnprintf_func vsnprintf +#define snprintf_func snprintf +#endif + +/* Type used for short snprintf calls. */ +typedef struct { + char buf[48]; +} shortbuf; + +#ifndef PRINTF_ATTRIBUTE +#ifdef __GNUC__ +#define PRINTF_ATTRIBUTE(x,y) __attribute__ ((__format__ (__printf__, x, y))) +#else +#define PRINTF_ATTRIBUTE(x,y) +#endif +#endif + +/* Underlying xprintf() */ +int xsnprintf_func (char *str, size_t n, const char *fmt, ...) + PRINTF_ATTRIBUTE(3,4); + +/* XPR(NT "", ...) (used by main) prefixes an "xdelta3: " to the output. */ +void xprintf(const char *fmt, ...) PRINTF_ATTRIBUTE(1,2); +#define XPR xprintf +#define NT "xdelta3: " +#define NTR "" +/* DP(RINT ...) */ +#define DP xprintf +#define RINT "" + +#if XD3_DEBUG +#define XD3_ASSERT(x) \ + do { \ + if (! (x)) { \ + DP(RINT "%s:%d: XD3 assertion failed: %s\n", \ + __FILE__, __LINE__, #x); \ + abort (); } } while (0) +#else +#define XD3_ASSERT(x) (void)0 +#endif /* XD3_DEBUG */ + +#define xd3_max(x,y) ((x) < (y) ? (y) : (x)) +#define xd3_min(x,y) ((x) < (y) ? (x) : (y)) + +/**************************************************************** + PUBLIC ENUMS + ******************************************************************/ + +/* These are the five ordinary status codes returned by the + * xd3_encode_input() and xd3_decode_input() state machines. */ +typedef enum { + + /* An application must be prepared to handle these five return + * values from either xd3_encode_input or xd3_decode_input, except + * in the case of no-source compression, in which case XD3_GETSRCBLK + * is never returned. More detailed comments for these are given in + * xd3_encode_input and xd3_decode_input comments, below. */ + XD3_INPUT = -17703, /* need input */ + XD3_OUTPUT = -17704, /* have output */ + XD3_GETSRCBLK = -17705, /* need a block of source input (with no + * xd3_getblk function), a chance to do + * non-blocking read. */ + XD3_GOTHEADER = -17706, /* (decode-only) after the initial VCDIFF & + first window header */ + XD3_WINSTART = -17707, /* notification: returned before a window is + * processed, giving a chance to + * XD3_SKIP_WINDOW or not XD3_SKIP_EMIT that + * window. */ + XD3_WINFINISH = -17708, /* notification: returned after + encode/decode & output for a window */ + XD3_TOOFARBACK = -17709, /* (encoder only) may be returned by + getblk() if the block is too old */ + XD3_INTERNAL = -17710, /* internal error */ + XD3_INVALID = -17711, /* invalid config */ + XD3_INVALID_INPUT = -17712, /* invalid input/decoder error */ + XD3_NOSECOND = -17713, /* when secondary compression finds no + improvement. */ + XD3_UNIMPLEMENTED = -17714 /* currently VCD_TARGET, VCD_CODETABLE */ +} xd3_rvalues; + +/* special values in config->flags */ +typedef enum +{ + XD3_JUST_HDR = (1 << 1), /* used by VCDIFF tools, see + xdelta3-main.h. */ + XD3_SKIP_WINDOW = (1 << 2), /* used by VCDIFF tools, see + xdelta3-main.h. */ + XD3_SKIP_EMIT = (1 << 3), /* used by VCDIFF tools, see + xdelta3-main.h. */ + XD3_FLUSH = (1 << 4), /* flush the stream buffer to + prepare for + xd3_stream_close(). */ + + XD3_SEC_DJW = (1 << 5), /* use DJW static huffman */ + XD3_SEC_FGK = (1 << 6), /* use FGK adaptive huffman */ + XD3_SEC_LZMA = (1 << 24), /* use LZMA secondary */ + + XD3_SEC_TYPE = (XD3_SEC_DJW | XD3_SEC_FGK | XD3_SEC_LZMA), + + XD3_SEC_NODATA = (1 << 7), /* disable secondary compression of + the data section. */ + XD3_SEC_NOINST = (1 << 8), /* disable secondary compression of + the inst section. */ + XD3_SEC_NOADDR = (1 << 9), /* disable secondary compression of + the addr section. */ + + XD3_SEC_NOALL = (XD3_SEC_NODATA | XD3_SEC_NOINST | XD3_SEC_NOADDR), + + XD3_ADLER32 = (1 << 10), /* enable checksum computation in + the encoder. */ + XD3_ADLER32_NOVER = (1 << 11), /* disable checksum verification in + the decoder. */ + + XD3_NOCOMPRESS = (1 << 13), /* disable ordinary data + * compression feature, only search + * the source, not the target. */ + XD3_BEGREEDY = (1 << 14), /* disable the "1.5-pass + * algorithm", instead use greedy + * matching. Greedy is off by + * default. */ + XD3_ADLER32_RECODE = (1 << 15), /* used by "recode". */ + + /* 4 bits to set the compression level the same as the command-line + * setting -1 through -9 (-0 corresponds to the XD3_NOCOMPRESS flag, + * and is independent of compression level). This is for + * convenience, especially with xd3_encode_memory(). */ + + XD3_COMPLEVEL_SHIFT = 20, /* 20 - 23 */ + XD3_COMPLEVEL_MASK = (0xF << XD3_COMPLEVEL_SHIFT), + XD3_COMPLEVEL_1 = (1 << XD3_COMPLEVEL_SHIFT), + XD3_COMPLEVEL_2 = (2 << XD3_COMPLEVEL_SHIFT), + XD3_COMPLEVEL_3 = (3 << XD3_COMPLEVEL_SHIFT), + XD3_COMPLEVEL_6 = (6 << XD3_COMPLEVEL_SHIFT), + XD3_COMPLEVEL_9 = (9 << XD3_COMPLEVEL_SHIFT) + +} xd3_flags; + +/* The values of this enumeration are set in xd3_config using the + * smatch_cfg variable. It can be set to default, slow, fast, etc., + * and soft. */ +typedef enum +{ + XD3_SMATCH_DEFAULT = 0, /* Flags may contain XD3_COMPLEVEL bits, + else default. */ + XD3_SMATCH_SLOW = 1, + XD3_SMATCH_FAST = 2, + XD3_SMATCH_FASTER = 3, + XD3_SMATCH_FASTEST = 4, + XD3_SMATCH_SOFT = 5 +} xd3_smatch_cfg; + +/********************************************************************* + PRIVATE ENUMS +**********************************************************************/ + +/* stream->match_state is part of the xd3_encode_input state machine + * for source matching: + * + * 1. the XD3_GETSRCBLK block-read mechanism means reentrant matching + * 2. this state spans encoder windows: a match and end-of-window + * will continue in the next 3. the initial target byte and source + * byte are a presumed match, to avoid some computation in case the + * inputs are identical. + */ +typedef enum { + + MATCH_TARGET = 0, /* in this state, attempt to match the start of + * the target with the previously set source + * address (initially 0). */ + MATCH_BACKWARD = 1, /* currently expanding a match backward in the + source/target. */ + MATCH_FORWARD = 2, /* currently expanding a match forward in the + source/target. */ + MATCH_SEARCHING = 3 /* currently searching for a match. */ + +} xd3_match_state; + +/* The xd3_encode_input state machine steps through these states in + * the following order. The matcher is reentrant and returns + * XD3_INPUT whenever it requires more data. After receiving + * XD3_INPUT, if the application reads EOF it should call + * xd3_stream_close(). + */ +typedef enum { + + ENC_INIT = 0, /* xd3_encode_input has never been called. */ + ENC_INPUT = 1, /* waiting for xd3_avail_input () to be called. */ + ENC_SEARCH = 2, /* currently searching for matches. */ + ENC_INSTR = 3, /* currently formatting output. */ + ENC_FLUSH = 4, /* currently emitting output. */ + ENC_POSTOUT = 5, /* after an output section. */ + ENC_POSTWIN = 6, /* after all output sections. */ + ENC_ABORTED = 7 /* abort. */ +} xd3_encode_state; + +/* The xd3_decode_input state machine steps through these states in + * the following order. The matcher is reentrant and returns + * XD3_INPUT whenever it requires more data. After receiving + * XD3_INPUT, if the application reads EOF it should call + * xd3_stream_close(). + * + * 0-8: the VCDIFF header + * 9-18: the VCDIFF window header + * 19-21: the three primary sections: data, inst, addr + * 22: producing output: returns XD3_OUTPUT, possibly XD3_GETSRCBLK, + * 23: return XD3_WINFINISH, set state=9 to decode more input + */ +typedef enum { + + DEC_VCHEAD = 0, /* VCDIFF header */ + DEC_HDRIND = 1, /* header indicator */ + + DEC_SECONDID = 2, /* secondary compressor ID */ + + DEC_TABLEN = 3, /* code table length */ + DEC_NEAR = 4, /* code table near */ + DEC_SAME = 5, /* code table same */ + DEC_TABDAT = 6, /* code table data */ + + DEC_APPLEN = 7, /* application data length */ + DEC_APPDAT = 8, /* application data */ + + DEC_WININD = 9, /* window indicator */ + + DEC_CPYLEN = 10, /* copy window length */ + DEC_CPYOFF = 11, /* copy window offset */ + + DEC_ENCLEN = 12, /* length of delta encoding */ + DEC_TGTLEN = 13, /* length of target window */ + DEC_DELIND = 14, /* delta indicator */ + + DEC_DATALEN = 15, /* length of ADD+RUN data */ + DEC_INSTLEN = 16, /* length of instruction data */ + DEC_ADDRLEN = 17, /* length of address data */ + + DEC_CKSUM = 18, /* window checksum */ + + DEC_DATA = 19, /* data section */ + DEC_INST = 20, /* instruction section */ + DEC_ADDR = 21, /* address section */ + + DEC_EMIT = 22, /* producing data */ + + DEC_FINISH = 23, /* window finished */ + + DEC_ABORTED = 24 /* xd3_abort_stream */ +} xd3_decode_state; + +/************************************************************ + internal types + ************************************************************/ + +/* instruction lists used in the IOPT buffer */ +struct _xd3_rlist +{ + xd3_rlist *next; + xd3_rlist *prev; +}; + +/* the raw encoding of an instruction used in the IOPT buffer */ +struct _xd3_rinst +{ + uint8_t type; + uint8_t xtra; + uint8_t code1; + uint8_t code2; + usize_t pos; + usize_t size; + xoff_t addr; + xd3_rlist link; +}; + +/* the code-table form of an single- or double-instruction */ +struct _xd3_dinst +{ + uint8_t type1; + uint8_t size1; + uint8_t type2; + uint8_t size2; +}; + +/* the decoded form of a single (half) instruction. */ +struct _xd3_hinst +{ + uint8_t type; + usize_t size; + usize_t addr; +}; + +/* the form of a whole-file instruction */ +struct _xd3_winst +{ + uint8_t type; /* RUN, ADD, COPY */ + uint8_t mode; /* 0, VCD_SOURCE, VCD_TARGET */ + usize_t size; + xoff_t addr; + xoff_t position; /* absolute position of this inst */ +}; + +/* used by the encoder to buffer output in sections. list of blocks. */ +struct _xd3_output +{ + uint8_t *base; + usize_t next; + usize_t avail; + xd3_output *next_page; +}; + +/* used by the decoder to buffer input in sections. */ +struct _xd3_desect +{ + const uint8_t *buf; + const uint8_t *buf_max; + usize_t size; + usize_t pos; + + /* used in xdelta3-decode.h */ + uint8_t *copied1; + usize_t alloc1; + + /* used in xdelta3-second.h */ + uint8_t *copied2; + usize_t alloc2; +}; + +/* the VCDIFF address cache, see the RFC */ +struct _xd3_addr_cache +{ + usize_t s_near; + usize_t s_same; + usize_t next_slot; /* the circular index for near */ + usize_t *near_array; /* array of size s_near */ + usize_t *same_array; /* array of size s_same*256 */ +}; + +/* the IOPT buffer list is just a list of buffers, which may be allocated + * during encode when using an unlimited buffer. */ +struct _xd3_iopt_buflist +{ + xd3_rinst *buffer; + xd3_iopt_buflist *next; +}; + +/* This is the record of a pre-compiled configuration, a subset of + xd3_config. */ +struct _xd3_smatcher +{ + const char *name; + int (*string_match) (xd3_stream *stream); + usize_t large_look; + usize_t large_step; + usize_t small_look; + usize_t small_chain; + usize_t small_lchain; + usize_t max_lazy; + usize_t long_enough; +}; + +/* hash table size & power-of-two hash function. */ +struct _xd3_hash_cfg +{ + usize_t size; /* Number of buckets */ + usize_t shift; + usize_t mask; + usize_t look; /* How wide is this checksum */ + usize_t multiplier; /* K * powers[0] */ + usize_t *powers; /* Array of [0,look) where powers[look-1] == 1 + and powers[N] = powers[N+1]*K (Rabin-Karp) */ +}; + +/* the sprev list */ +struct _xd3_slist +{ + usize_t last_pos; +}; + +/* window info (for whole state) */ +struct _xd3_wininfo { + xoff_t offset; + usize_t length; + uint32_t adler32; +}; + +/* whole state for, e.g., merge */ +struct _xd3_whole_state { + usize_t addslen; + uint8_t *adds; + usize_t adds_alloc; + + usize_t instlen; + xd3_winst *inst; + usize_t inst_alloc; + + usize_t wininfolen; + xd3_wininfo *wininfo; + usize_t wininfo_alloc; + + xoff_t length; +}; + +/******************************************************************** + public types + *******************************************************************/ + +/* Settings for the secondary compressor. */ +struct _xd3_sec_cfg +{ + int data_type; /* Which section. (set automatically) */ + usize_t ngroups; /* Number of DJW Huffman groups. */ + usize_t sector_size; /* Sector size. */ + int inefficient; /* If true, ignore efficiency check [avoid XD3_NOSECOND]. */ +}; + +/* This is the user-visible stream configuration. */ +struct _xd3_config +{ + usize_t winsize; /* The encoder window size. */ + usize_t sprevsz; /* How far back small string + matching goes */ + usize_t iopt_size; /* entries in the + instruction-optimizing + buffer */ + + xd3_getblk_func *getblk; /* The three callbacks. */ + xd3_alloc_func *alloc; + xd3_free_func *freef; + void *opaque; /* Not used. */ + uint32_t flags; /* stream->flags are initialized + * from xd3_config & never + * modified by the library. Use + * xd3_set_flags to modify flags + * settings mid-stream. */ + + xd3_sec_cfg sec_data; /* Secondary compressor config: data */ + xd3_sec_cfg sec_inst; /* Secondary compressor config: inst */ + xd3_sec_cfg sec_addr; /* Secondary compressor config: addr */ + + xd3_smatch_cfg smatch_cfg; /* See enum: use fields below for + soft config */ + xd3_smatcher smatcher_soft; +}; + +/* The primary source file object. You create one of these objects and + * initialize the first four fields. This library maintains the next + * 5 fields. The configured getblk implementation is responsible for + * setting the final 3 fields when called (and/or when XD3_GETSRCBLK + * is returned). + */ +struct _xd3_source +{ + /* you set */ + usize_t blksize; /* block size */ + const char *name; /* its name, for debug/print + purposes */ + void *ioh; /* opaque handle */ + xoff_t max_winsize; /* maximum visible buffer */ + + /* getblk sets */ + xoff_t curblkno; /* current block number: client + sets after getblk request */ + usize_t onblk; /* number of bytes on current + block: client sets, must be >= 0 + and <= blksize */ + const uint8_t *curblk; /* current block array: client + sets after getblk request */ + + /* xd3 sets */ + usize_t srclen; /* length of this source window */ + xoff_t srcbase; /* offset of this source window + in the source itself */ + usize_t shiftby; /* for power-of-two blocksizes */ + usize_t maskby; /* for power-of-two blocksizes */ + xoff_t cpyoff_blocks; /* offset of dec_cpyoff in blocks */ + usize_t cpyoff_blkoff; /* offset of copy window in + blocks, remainder */ + xoff_t getblkno; /* request block number: xd3 sets + current getblk request */ + + /* See xd3_getblk() */ + xoff_t max_blkno; /* Maximum block, if eof is known, + * otherwise, equals frontier_blkno + * (initially 0). */ + usize_t onlastblk; /* Number of bytes on max_blkno */ + int eof_known; /* Set to true when the first + * partial block is read. */ +}; + +/* The primary xd3_stream object, used for encoding and decoding. You + * may access only two fields: avail_out, next_out. Use the methods + * above to operate on xd3_stream. */ +struct _xd3_stream +{ + /* input state */ + const uint8_t *next_in; /* next input byte */ + usize_t avail_in; /* number of bytes available at + next_in */ + xoff_t total_in; /* how many bytes in */ + + /* output state */ + uint8_t *next_out; /* next output byte */ + usize_t avail_out; /* number of bytes available at + next_out */ + usize_t space_out; /* total out space */ + xoff_t current_window; /* number of windows encoded/decoded */ + xoff_t total_out; /* how many bytes out */ + + /* to indicate an error, xd3 sets */ + const char *msg; /* last error message, NULL if + no error */ + + /* source configuration */ + xd3_source *src; /* source array */ + + /* encoder memory configuration */ + usize_t winsize; /* suggested window size */ + usize_t sprevsz; /* small string, previous window + size (power of 2) */ + usize_t sprevmask; /* small string, previous window + size mask */ + usize_t iopt_size; + usize_t iopt_unlimited; + + /* general configuration */ + xd3_getblk_func *getblk; /* set nxtblk, nxtblkno to scanblkno */ + xd3_alloc_func *alloc; /* malloc function */ + xd3_free_func *free; /* free function */ + void* opaque; /* private data object passed to + alloc, free, and getblk */ + uint32_t flags; /* various options */ + + /* secondary compressor configuration */ + xd3_sec_cfg sec_data; /* Secondary compressor config: data */ + xd3_sec_cfg sec_inst; /* Secondary compressor config: inst */ + xd3_sec_cfg sec_addr; /* Secondary compressor config: addr */ + + xd3_smatcher smatcher; + + usize_t *large_table; /* table of large checksums */ + xd3_hash_cfg large_hash; /* large hash config */ + + usize_t *small_table; /* table of small checksums */ + xd3_slist *small_prev; /* table of previous offsets, + circular linked list */ + int small_reset; /* true if small table should + be reset */ + + xd3_hash_cfg small_hash; /* small hash config */ + xd3_addr_cache acache; /* the vcdiff address cache */ + xd3_encode_state enc_state; /* state of the encoder */ + + usize_t taroff; /* base offset of the target input */ + usize_t input_position; /* current input position */ + usize_t min_match; /* current minimum match + length, avoids redundent + matches */ + usize_t unencoded_offset; /* current input, first + * unencoded offset. this value + * is <= the first instruction's + * position in the iopt buffer, + * if there is at least one + * match in the buffer. */ + + /* SRCWIN */ + int srcwin_decided; /* boolean: true if srclen and + srcbase have been + decided. */ + int srcwin_decided_early; /* boolean: true if srclen + and srcbase were + decided early. */ + xoff_t srcwin_cksum_pos; /* Source checksum position */ + + /* MATCH */ + xd3_match_state match_state; /* encoder match state */ + xoff_t match_srcpos; /* current match source + position relative to + srcbase */ + xoff_t match_last_srcpos; /* previously attempted + * srcpos, to avoid loops. */ + xoff_t match_minaddr; /* smallest matching address to + * set window params (reset each + * window xd3_encode_reset) */ + xoff_t match_maxaddr; /* largest matching address to + * set window params (reset each + * window xd3_encode_reset) */ + usize_t match_back; /* match extends back so far */ + usize_t match_maxback; /* match extends back maximum */ + usize_t match_fwd; /* match extends forward so far */ + usize_t match_maxfwd; /* match extends forward maximum */ + + xoff_t maxsrcaddr; /* address of the last source + match (across windows) */ + + uint8_t *buf_in; /* for saving buffered input */ + usize_t buf_avail; /* amount of saved input */ + const uint8_t *buf_leftover; /* leftover content of next_in + (i.e., user's buffer) */ + usize_t buf_leftavail; /* amount of leftover content */ + + xd3_output *enc_current; /* current output buffer */ + xd3_output *enc_free; /* free output buffers */ + xd3_output *enc_heads[4]; /* array of encoded outputs: + head of chain */ + xd3_output *enc_tails[4]; /* array of encoded outputs: + tail of chain */ + uint32_t recode_adler32; /* set the adler32 checksum + * during "recode". */ + + xd3_rlist iopt_used; /* instruction optimizing buffer */ + xd3_rlist iopt_free; + xd3_rinst *iout; /* next single instruction */ + xd3_iopt_buflist *iopt_alloc; + + const uint8_t *enc_appheader; /* application header to encode */ + usize_t enc_appheadsz; /* application header size */ + + /* decoder stuff */ + xd3_decode_state dec_state; /* current DEC_XXX value */ + usize_t dec_hdr_ind; /* VCDIFF header indicator */ + usize_t dec_win_ind; /* VCDIFF window indicator */ + usize_t dec_del_ind; /* VCDIFF delta indicator */ + + uint8_t dec_magic[4]; /* First four bytes */ + usize_t dec_magicbytes; /* Magic position. */ + + usize_t dec_secondid; /* Optional secondary compressor ID. */ + + usize_t dec_codetblsz; /* Optional code table: length. */ + uint8_t *dec_codetbl; /* Optional code table: storage. */ + usize_t dec_codetblbytes; /* Optional code table: position. */ + + usize_t dec_appheadsz; /* Optional application header: + size. */ + uint8_t *dec_appheader; /* Optional application header: + storage */ + usize_t dec_appheadbytes; /* Optional application header: + position. */ + + usize_t dec_cksumbytes; /* Optional checksum: position. */ + uint8_t dec_cksum[4]; /* Optional checksum: storage. */ + uint32_t dec_adler32; /* Optional checksum: value. */ + + usize_t dec_cpylen; /* length of copy window + (VCD_SOURCE or VCD_TARGET) */ + xoff_t dec_cpyoff; /* offset of copy window + (VCD_SOURCE or VCD_TARGET) */ + usize_t dec_enclen; /* length of delta encoding */ + usize_t dec_tgtlen; /* length of target window */ + +#if USE_UINT64 + uint64_t dec_64part; /* part of a decoded uint64_t */ +#endif +#if USE_UINT32 + uint32_t dec_32part; /* part of a decoded uint32_t */ +#endif + + xoff_t dec_winstart; /* offset of the start of + current target window */ + xoff_t dec_window_count; /* == current_window + 1 in + DEC_FINISH */ + usize_t dec_winbytes; /* bytes of the three sections + so far consumed */ + usize_t dec_hdrsize; /* VCDIFF + app header size */ + + const uint8_t *dec_tgtaddrbase; /* Base of decoded target + addresses (addr >= + dec_cpylen). */ + const uint8_t *dec_cpyaddrbase; /* Base of decoded copy + addresses (addr < + dec_cpylen). */ + + usize_t dec_position; /* current decoder position + counting the cpylen + offset */ + usize_t dec_maxpos; /* maximum decoder position + counting the cpylen + offset */ + xd3_hinst dec_current1; /* current instruction */ + xd3_hinst dec_current2; /* current instruction */ + + uint8_t *dec_buffer; /* Decode buffer */ + uint8_t *dec_lastwin; /* In case of VCD_TARGET, the + last target window. */ + usize_t dec_lastlen; /* length of the last target + window */ + xoff_t dec_laststart; /* offset of the start of last + target window */ + usize_t dec_lastspace; /* allocated space of last + target window, for reuse */ + + xd3_desect inst_sect; /* staging area for decoding + window sections */ + xd3_desect addr_sect; + xd3_desect data_sect; + + xd3_code_table_func *code_table_func; + const xd3_dinst *code_table; + const xd3_code_table_desc *code_table_desc; + xd3_dinst *code_table_alloc; + + /* secondary compression */ + const xd3_sec_type *sec_type; + xd3_sec_stream *sec_stream_d; + xd3_sec_stream *sec_stream_i; + xd3_sec_stream *sec_stream_a; + + /* state for reconstructing whole files (e.g., for merge), this only + * supports loading USIZE_T_MAX instructions, adds, etc. */ + xd3_whole_state whole_target; + + /* statistics */ + xoff_t n_scpy; + xoff_t n_tcpy; + xoff_t n_add; + xoff_t n_run; + + xoff_t l_scpy; + xoff_t l_tcpy; + xoff_t l_add; + xoff_t l_run; + + usize_t i_slots_used; + +#if XD3_DEBUG + usize_t large_ckcnt; + + /* memory usage */ + usize_t alloc_cnt; + usize_t free_cnt; +#endif +}; + +/************************************************************************** + PUBLIC FUNCTIONS + **************************************************************************/ + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus*/ + + +/* This function configures an xd3_stream using the provided in-memory + * input buffer, source buffer, output buffer, and flags. The output + * array must be large enough or else ENOSPC will be returned. This + * is the simplest in-memory encoding interface. */ +int xd3_encode_memory (const uint8_t *input, + usize_t input_size, + const uint8_t *source, + usize_t source_size, + uint8_t *output_buffer, + usize_t *output_size, + usize_t avail_output, + int flags); + +/* The reverse of xd3_encode_memory. */ +int xd3_decode_memory (const uint8_t *input, + usize_t input_size, + const uint8_t *source, + usize_t source_size, + uint8_t *output_buf, + usize_t *output_size, + usize_t avail_output, + int flags); + +/* This function encodes an in-memory input using a pre-configured + * xd3_stream. This allows the caller to set a variety of options + * which are not available in the xd3_encode/decode_memory() + * functions. + * + * The output array must be large enough to hold the output or else + * ENOSPC is returned. The source (if any) should be set using + * xd3_set_source_and_size() with a single-block xd3_source. This + * calls the underlying non-blocking interfaces, + * xd3_encode/decode_input(), handling the necessary input/output + * states. This method may be considered a reference for any + * application using xd3_encode_input() directly. + * + * xd3_stream stream; + * xd3_config config; + * xd3_source src; + * + * memset (& src, 0, sizeof (src)); + * memset (& stream, 0, sizeof (stream)); + * memset (& config, 0, sizeof (config)); + * + * if (source != NULL) + * { + * src.size = source_size; + * src.blksize = source_size; + * src.curblkno = 0; + * src.onblk = source_size; + * src.curblk = source; + * src.max_winsize = source_size; + * xd3_set_source(&stream, &src); + * } + * + * config.flags = flags; + * config.winsize = input_size; + * + * ... set smatcher, appheader, encoding-table, compression-level, etc. + * + * xd3_config_stream(&stream, &config); + * xd3_encode_stream(&stream, ...); + * xd3_free_stream(&stream); + */ +int xd3_encode_stream (xd3_stream *stream, + const uint8_t *input, + usize_t input_size, + uint8_t *output, + usize_t *output_size, + usize_t avail_output); + +/* The reverse of xd3_encode_stream. */ +int xd3_decode_stream (xd3_stream *stream, + const uint8_t *input, + usize_t input_size, + uint8_t *output, + usize_t *output_size, + usize_t avail_size); + +/* This is the non-blocking interface. + * + * Handling input and output states is the same for encoding or + * decoding using the xd3_avail_input() and xd3_consume_output() + * routines, inlined below. + * + * Return values: + * + * XD3_INPUT: the process requires more input: call + * xd3_avail_input() then repeat + * + * XD3_OUTPUT: the process has more output: read stream->next_out, + * stream->avail_out, then call xd3_consume_output(), + * then repeat + * + * XD3_GOTHEADER: (decoder-only) notification returned following the + * VCDIFF header and first window header. the decoder + * may use the header to configure itself. + * + * XD3_WINSTART: a general notification returned once for each + * window except the 0-th window, which is implied by + * XD3_GOTHEADER. It is recommended to use a + * switch-stmt such as: + * + * ... + * again: + * switch ((ret = xd3_decode_input (stream))) { + * case XD3_GOTHEADER: { + * assert(stream->current_window == 0); + * stuff; + * } + * // fallthrough + * case XD3_WINSTART: { + * something(stream->current_window); + * goto again; + * } + * ... + * + * XD3_WINFINISH: a general notification, following the complete + * input & output of a window. at this point, + * stream->total_in and stream->total_out are consistent + * for either encoding or decoding. + * + * XD3_GETSRCBLK: If the xd3_getblk() callback is NULL, this value + * is returned to initiate a non-blocking source read. + */ +int xd3_decode_input (xd3_stream *stream); +int xd3_encode_input (xd3_stream *stream); + +/* The xd3_config structure is used to initialize a stream - all data + * is copied into stream so config may be a temporary variable. See + * the [documentation] or comments on the xd3_config structure. */ +int xd3_config_stream (xd3_stream *stream, + xd3_config *config); + +/* Since Xdelta3 doesn't open any files, xd3_close_stream is just an + * error check that the stream is in a proper state to be closed: this + * means the encoder is flushed and the decoder is at a window + * boundary. The application is responsible for freeing any of the + * resources it supplied. */ +int xd3_close_stream (xd3_stream *stream); + +/* This arranges for closes the stream to succeed. Does not free the + * stream.*/ +void xd3_abort_stream (xd3_stream *stream); + +/* xd3_free_stream frees all memory allocated for the stream. The + * application is responsible for freeing any of the resources it + * supplied. */ +void xd3_free_stream (xd3_stream *stream); + +/* This function informs the encoder or decoder that source matching + * (i.e., delta-compression) is possible. For encoding, this should + * be called before the first xd3_encode_input. A NULL source is + * ignored. For decoding, this should be called before the first + * window is decoded, but the appheader may be read first + * (XD3_GOTHEADER). After decoding the header, call xd3_set_source() + * if you have a source file. Note: if (stream->dec_win_ind & VCD_SOURCE) + * is true, it means the first window expects there to be a source file. + */ +int xd3_set_source (xd3_stream *stream, + xd3_source *source); + +/* If the source size is known, call this instead of xd3_set_source(). + * to avoid having stream->getblk called (and/or to avoid XD3_GETSRCBLK). + * + * Follow these steps: + xd3_source source; + memset(&source, 0, sizeof(source)); + source.blksize = size; + source.onblk = size; + source.curblk = buf; + source.curblkno = 0; + int ret = xd3_set_source_and_size(&stream, &source, size); + ... + */ +int xd3_set_source_and_size (xd3_stream *stream, + xd3_source *source, + xoff_t source_size); + +/* This should be called before the first call to xd3_encode_input() + * to include application-specific data in the VCDIFF header. */ +void xd3_set_appheader (xd3_stream *stream, + const uint8_t *data, + usize_t size); + +/* xd3_get_appheader may be called in the decoder after XD3_GOTHEADER. + * For convenience, the decoder always adds a single byte padding to + * the end of the application header, which is set to zero in case the + * application header is a string. */ +int xd3_get_appheader (xd3_stream *stream, + uint8_t **data, + usize_t *size); + +/* To generate a VCDIFF encoded delta with xd3_encode_init() from + * another format, use: + * + * xd3_encode_init_partial() -- initialze encoder state (w/o hash tables) + * xd3_init_cache() -- reset VCDIFF address cache + * xd3_found_match() -- to report a copy instruction + * + * set stream->enc_state to ENC_INSTR and call xd3_encode_input as usual. + */ +int xd3_encode_init_partial (xd3_stream *stream); +void xd3_init_cache (xd3_addr_cache* acache); +int xd3_found_match (xd3_stream *stream, + usize_t pos, usize_t size, + xoff_t addr, int is_source); + +/* Gives an error string for xdelta3-speficic errors, returns NULL for + system errors */ +const char* xd3_strerror (int ret); + +/* For convenience, zero & initialize the xd3_config structure with + specified flags. */ +static INLINE +void xd3_init_config (xd3_config *config, + uint32_t flags) +{ + memset (config, 0, sizeof (*config)); + config->flags = flags; +} + +/* This supplies some input to the stream. + * + * For encoding, if the input is larger than the configured window + * size (xd3_config.winsize), the entire input will be consumed and + * encoded anyway. If you wish to strictly limit the window size, + * limit the buffer passed to xd3_avail_input to the window size. + * + * For encoding, if the input is smaller than the configured window + * size (xd3_config.winsize), the library will create a window-sized + * buffer and accumulate input until a full-sized window can be + * encoded. XD3_INPUT will be returned. The input must remain valid + * until the next time xd3_encode_input() returns XD3_INPUT. + * + * For decoding, the input will be consumed entirely before XD3_INPUT + * is returned again. + */ +static INLINE +void xd3_avail_input (xd3_stream *stream, + const uint8_t *idata, + usize_t isize) +{ + /* Even if isize is zero, the code expects a non-NULL idata. Why? + * It uses this value to determine whether xd3_avail_input has ever + * been called. If xd3_encode_input is called before + * xd3_avail_input it will return XD3_INPUT right away without + * allocating a stream->winsize buffer. This is to avoid an + * unwanted allocation. */ + XD3_ASSERT (idata != NULL || isize == 0); + + stream->next_in = idata; + stream->avail_in = isize; +} + +/* This acknowledges receipt of output data, must be called after any + * XD3_OUTPUT return. */ +static INLINE +void xd3_consume_output (xd3_stream *stream) +{ + stream->avail_out = 0; +} + +/* These are set for each XD3_WINFINISH return. */ +static INLINE +int xd3_encoder_used_source (xd3_stream *stream) { + return stream->src != NULL && stream->src->srclen > 0; +} +static INLINE +xoff_t xd3_encoder_srcbase (xd3_stream *stream) { + return stream->src->srcbase; +} +static INLINE +usize_t xd3_encoder_srclen (xd3_stream *stream) { + return stream->src->srclen; +} + +/* Checks for legal flag changes. */ +static INLINE +void xd3_set_flags (xd3_stream *stream, uint32_t flags) +{ + /* The bitwise difference should contain only XD3_FLUSH or + XD3_SKIP_WINDOW */ + XD3_ASSERT(((flags ^ stream->flags) & ~(XD3_FLUSH | XD3_SKIP_WINDOW)) == 0); + stream->flags = flags; +} + +/* Gives some extra information about the latest library error, if any + * is known. */ +static INLINE +const char* xd3_errstring (xd3_stream *stream) +{ + return stream->msg ? stream->msg : ""; +} + + +/* 64-bit divisions are expensive, which is why we require a + * power-of-two source->blksize. To relax this restriction is + * relatively easy, see the history for xd3_blksize_div(). */ +static INLINE +void xd3_blksize_div (const xoff_t offset, + const xd3_source *source, + xoff_t *blkno, + usize_t *blkoff) { + *blkno = offset >> source->shiftby; + *blkoff = offset & source->maskby; + XD3_ASSERT (*blkoff < source->blksize); +} + +static INLINE +void xd3_blksize_add (xoff_t *blkno, + usize_t *blkoff, + const xd3_source *source, + const usize_t add) +{ + usize_t blkdiff; + + /* Does not check for overflow, checked in xdelta3-decode.h. */ + *blkoff += add; + blkdiff = *blkoff >> source->shiftby; + + if (blkdiff) + { + *blkno += blkdiff; + *blkoff &= source->maskby; + } + + XD3_ASSERT (*blkoff < source->blksize); +} + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#define XD3_NOOP 0U +#define XD3_ADD 1U +#define XD3_RUN 2U +#define XD3_CPY 3U /* XD3_CPY rtypes are represented as (XD3_CPY + + * copy-mode value) */ + +#if XD3_DEBUG +#define IF_DEBUG(x) x +#else +#define IF_DEBUG(x) +#endif +#if XD3_DEBUG > 1 +#define IF_DEBUG1(x) x +#else +#define IF_DEBUG1(x) +#endif +#if XD3_DEBUG > 2 +#define IF_DEBUG2(x) x +#else +#define IF_DEBUG2(x) +#endif + +#define SIZEOF_ARRAY(x) (sizeof(x) / sizeof(x[0])) + +#endif /* _XDELTA3_H_ */ diff --git a/docs/retroarch.6 b/docs/retroarch.6 index 4eaa5644bc..7478040e17 100644 --- a/docs/retroarch.6 +++ b/docs/retroarch.6 @@ -226,6 +226,12 @@ Attempts to apply a IPS patch to the current content image. No files are altered If this flag is not specified, RetroArch will look for a .ips file with same basename as content specified. Note that RetroArch cannot perform any error checking if patching was successful due to how IPS works. +.TP +\fB--xdelta PATCH\fR +Attempts to apply an Xdelta patch to the current content image. No files are altered. +If this flag is not specified, RetroArch will look for a .xdelta file with same basename as content specified. +Only available if RetroArch was built with Xdelta support. + .TP \fB--no-patch\fR Disables all kinds of content patching. diff --git a/file_path_special.h b/file_path_special.h index 7a35d9692b..a80c0b657e 100644 --- a/file_path_special.h +++ b/file_path_special.h @@ -63,6 +63,7 @@ RETRO_BEGIN_DECLS #define FILE_PATH_UPS_EXTENSION ".ups" #define FILE_PATH_IPS_EXTENSION ".ips" #define FILE_PATH_BPS_EXTENSION ".bps" +#define FILE_PATH_XDELTA_EXTENSION ".xdelta" #define FILE_PATH_RDB_EXTENSION ".rdb" #define FILE_PATH_RDB_EXTENSION_NO_DOT "rdb" #define FILE_PATH_ZIP_EXTENSION ".zip" diff --git a/qb/config.params.sh b/qb/config.params.sh index eea2495c18..82434196fe 100644 --- a/qb/config.params.sh +++ b/qb/config.params.sh @@ -6,6 +6,7 @@ HAVE_CORE_INFO_CACHE=yes # Core info cache support HAVE_BLUETOOTH=no # Bluetooth support HAVE_NVDA=yes # NVDA support HAVE_PATCH=yes # Softpatching support (BPS/IPS/UPS) +HAVE_XDELTA=yes # Xdelta softpatching support (requires softpatching) HAVE_SAPI=no # SAPI support HAVE_VIDEO_FILTER=yes # Video filter support HAVE_WINRAWINPUT=yes # Windows Raw Input support (XP and higher) diff --git a/retroarch.c b/retroarch.c index 52a66bf4ce..7e95f87095 100644 --- a/retroarch.c +++ b/retroarch.c @@ -270,6 +270,7 @@ enum RA_OPT_APPENDCONFIG, RA_OPT_BPS, RA_OPT_IPS, + RA_OPT_XDELTA, RA_OPT_NO_PATCH, RA_OPT_RECORDCONFIG, RA_OPT_SUBSYSTEM, @@ -4607,6 +4608,11 @@ void retroarch_override_setting_set( p_rarch->flags |= RARCH_FLAGS_HAS_SET_IPS_PREF; #endif break; + case RARCH_OVERRIDE_SETTING_XDELTA_PREF: +#if defined(HAVE_PATCH) && defined(HAVE_XDELTA) + p_rarch->flags |= RARCH_FLAGS_HAS_SET_XDELTA_PREF; +#endif + break; case RARCH_OVERRIDE_SETTING_LOG_TO_FILE: p_rarch->flags |= RARCH_FLAGS_HAS_SET_LOG_TO_FILE; break; @@ -4682,6 +4688,11 @@ void retroarch_override_setting_unset( case RARCH_OVERRIDE_SETTING_IPS_PREF: #ifdef HAVE_PATCH p_rarch->flags &= ~RARCH_FLAGS_HAS_SET_IPS_PREF; +#endif + break; + case RARCH_OVERRIDE_SETTING_XDELTA_PREF: +#if defined(HAVE_PATCH) && defined(HAVE_XDELTA) + p_rarch->flags &= ~RARCH_FLAGS_HAS_SET_XDELTA_PREF; #endif break; case RARCH_OVERRIDE_SETTING_LOG_TO_FILE: @@ -4734,7 +4745,8 @@ static void global_free(struct rarch_state *p_rarch) p_rarch->flags &= ~( RARCH_FLAGS_BPS_PREF | RARCH_FLAGS_IPS_PREF - | RARCH_FLAGS_UPS_PREF); + | RARCH_FLAGS_UPS_PREF + | RARCH_FLAGS_XDELTA_PREF); runloop_st->flags &= ~RUNLOOP_FLAG_PATCH_BLOCKED; #endif @@ -4759,6 +4771,7 @@ static void global_free(struct rarch_state *p_rarch) *runloop_st->name.ups = '\0'; *runloop_st->name.bps = '\0'; *runloop_st->name.ips = '\0'; + *runloop_st->name.xdelta = '\0'; *runloop_st->name.savefile = '\0'; *runloop_st->name.savestate = '\0'; *runloop_st->name.replay = '\0'; @@ -5445,10 +5458,18 @@ static void retroarch_print_help(const char *arg0) "Specifies path for BPS patch that will be applied to content.\n" " --ips=FILE " "Specifies path for IPS patch that will be applied to content.\n" + , sizeof(buf)); +#ifdef HAVE_XDELTA + strlcat(buf, + " --xdelta=FILE " + "Specifies path for Xdelta patch that will be applied to content.\n" + , sizeof(buf)); +#endif /* HAVE_XDELTA */ + strlcat(buf, " --no-patch " "Disables all forms of content patching.\n" , sizeof(buf)); -#endif +#endif /* HAVE_PATCH */ #ifdef HAVE_SCREENSHOTS strlcat(buf, @@ -5713,8 +5734,11 @@ static bool retroarch_parse_input_and_config( { "ups", 1, NULL, 'U' }, { "bps", 1, NULL, RA_OPT_BPS }, { "ips", 1, NULL, RA_OPT_IPS }, +#ifdef HAVE_XDELTA + { "xdelta", 1, NULL, RA_OPT_XDELTA }, +#endif /* HAVE_XDELTA */ { "no-patch", 0, NULL, RA_OPT_NO_PATCH }, -#endif +#endif /* HAVE_PATCH */ { "detach", 0, NULL, 'D' }, { "features", 0, NULL, RA_OPT_FEATURES }, { "subsystem", 1, NULL, RA_OPT_SUBSYSTEM }, @@ -5788,10 +5812,11 @@ static bool retroarch_parse_input_and_config( p_rarch->flags &= ~RARCH_FLAGS_HAS_SET_USERNAME; #ifdef HAVE_PATCH p_rarch->flags &= ~( RARCH_FLAGS_UPS_PREF | RARCH_FLAGS_IPS_PREF - | RARCH_FLAGS_BPS_PREF); + | RARCH_FLAGS_BPS_PREF | RARCH_FLAGS_XDELTA_PREF); *runloop_st->name.ups = '\0'; *runloop_st->name.bps = '\0'; *runloop_st->name.ips = '\0'; + *runloop_st->name.xdelta = '\0'; #endif #ifdef HAVE_CONFIGFILE runloop_st->flags &= ~RUNLOOP_FLAG_OVERRIDES_ACTIVE; @@ -6195,7 +6220,14 @@ static bool retroarch_parse_input_and_config( retroarch_override_setting_set(RARCH_OVERRIDE_SETTING_IPS_PREF, NULL); #endif break; - + case RA_OPT_XDELTA: +#if defined(HAVE_PATCH) && defined(HAVE_XDELTA) + strlcpy(runloop_st->name.xdelta, optarg, + sizeof(runloop_st->name.xdelta)); + p_rarch->flags |= RARCH_FLAGS_XDELTA_PREF; + retroarch_override_setting_set(RARCH_OVERRIDE_SETTING_XDELTA_PREF, NULL); +#endif + break; case RA_OPT_NO_PATCH: #ifdef HAVE_PATCH runloop_st->flags |= RUNLOOP_FLAG_PATCH_BLOCKED; @@ -6842,7 +6874,12 @@ bool retroarch_ctl(enum rarch_ctl_state state, void *data) case RARCH_CTL_UNSET_IPS_PREF: p_rarch->flags &= ~RARCH_FLAGS_IPS_PREF; break; -#endif +#ifdef HAVE_XDELTA + case RARCH_CTL_UNSET_XDELTA_PREF: + p_rarch->flags &= ~RARCH_FLAGS_XDELTA_PREF; + break; +#endif /* HAVE_XDELTA */ +#endif /* HAVE_PATCH */ case RARCH_CTL_IS_DUMMY_CORE: return runloop_st->current_core_type == CORE_TYPE_DUMMY; case RARCH_CTL_IS_CORE_LOADED: @@ -7100,7 +7137,11 @@ bool retroarch_override_setting_is_set( return ((p_rarch->flags & RARCH_FLAGS_HAS_SET_BPS_PREF) > 0); case RARCH_OVERRIDE_SETTING_IPS_PREF: return ((p_rarch->flags & RARCH_FLAGS_HAS_SET_IPS_PREF) > 0); -#endif +#ifdef HAVE_XDELTA + case RARCH_OVERRIDE_SETTING_XDELTA_PREF: + return ((p_rarch->flags & RARCH_FLAGS_HAS_SET_XDELTA_PREF) > 0); +#endif /* HAVE_XDELTA */ +#endif /* HAVE_PATCH */ case RARCH_OVERRIDE_SETTING_LOG_TO_FILE: return ((p_rarch->flags & RARCH_FLAGS_HAS_SET_LOG_TO_FILE) > 0); case RARCH_OVERRIDE_SETTING_DATABASE_SCAN: diff --git a/retroarch.h b/retroarch.h index 28689523bf..f806a85831 100644 --- a/retroarch.h +++ b/retroarch.h @@ -59,7 +59,7 @@ #define RETRO_ENVIRONMENT_POLL_TYPE_OVERRIDE (4 | RETRO_ENVIRONMENT_RETROARCH_START_BLOCK) /* unsigned * -- - * Tells the frontend to override the poll type behavior. + * Tells the frontend to override the poll type behavior. * Allows the frontend to influence the polling behavior of the * frontend. * @@ -104,7 +104,9 @@ enum rarch_state_flags RARCH_FLAGS_BPS_PREF = (1 << 11), RARCH_FLAGS_IPS_PREF = (1 << 12), RARCH_FLAGS_BLOCK_CONFIG_READ = (1 << 13), - RARCH_FLAGS_CLI_DATABASE_SCAN = (1 << 14) + RARCH_FLAGS_CLI_DATABASE_SCAN = (1 << 14), + RARCH_FLAGS_HAS_SET_XDELTA_PREF = (1 << 15), + RARCH_FLAGS_XDELTA_PREF = (1 << 16) }; bool retroarch_ctl(enum rarch_ctl_state state, void *data); @@ -182,7 +184,7 @@ const char* config_get_microphone_driver_options(void); unsigned int retroarch_get_core_requested_rotation(void); /* - Returns final rotation including both user chosen video rotation + Returns final rotation including both user chosen video rotation and core requested rotation if allowed by video_allow_rotate */ unsigned int retroarch_get_rotation(void); diff --git a/retroarch_types.h b/retroarch_types.h index 669c62812d..37e6492d15 100644 --- a/retroarch_types.h +++ b/retroarch_types.h @@ -62,6 +62,7 @@ enum rarch_ctl_state RARCH_CTL_UNSET_BPS_PREF, RARCH_CTL_UNSET_UPS_PREF, RARCH_CTL_UNSET_IPS_PREF, + RARCH_CTL_UNSET_XDELTA_PREF, #ifdef HAVE_CONFIGFILE /* Block config read */ @@ -122,6 +123,7 @@ enum rarch_override_setting RARCH_OVERRIDE_SETTING_UPS_PREF, RARCH_OVERRIDE_SETTING_BPS_PREF, RARCH_OVERRIDE_SETTING_IPS_PREF, + RARCH_OVERRIDE_SETTING_XDELTA_PREF, RARCH_OVERRIDE_SETTING_LIBRETRO_DEVICE, RARCH_OVERRIDE_SETTING_LOG_TO_FILE, RARCH_OVERRIDE_SETTING_DATABASE_SCAN, diff --git a/runloop.c b/runloop.c index 2a506d1763..36e58f0f50 100644 --- a/runloop.c +++ b/runloop.c @@ -4874,6 +4874,16 @@ void runloop_path_fill_names(void) ".ips", sizeof(runloop_st->name.ips) - len); } + + if (string_is_empty(runloop_st->name.xdelta)) + { + size_t len = strlcpy(runloop_st->name.xdelta, + runloop_st->runtime_content_path_basename, + sizeof(runloop_st->name.xdelta)); + strlcpy(runloop_st->name.xdelta + len, + ".xdelta", + sizeof(runloop_st->name.xdelta) - len); + } } diff --git a/runloop.h b/runloop.h index 0376644c5a..802a7f115b 100644 --- a/runloop.h +++ b/runloop.h @@ -294,6 +294,7 @@ struct runloop char ups[8192]; char bps[8192]; char ips[8192]; + char xdelta[8192]; char label[8192]; } name; diff --git a/tasks/task_content.c b/tasks/task_content.c index 14de793d57..99f92bda3e 100644 --- a/tasks/task_content.c +++ b/tasks/task_content.c @@ -123,7 +123,8 @@ enum content_information_flags CONTENT_INFO_FLAG_IS_UPS_PREF = (1 << 5), CONTENT_INFO_FLAG_PATCH_IS_BLOCKED = (1 << 6), CONTENT_INFO_FLAG_BIOS_IS_MISSING = (1 << 7), - CONTENT_INFO_FLAG_CHECK_FW_BEFORE_LOADING = (1 << 8) + CONTENT_INFO_FLAG_CHECK_FW_BEFORE_LOADING = (1 << 8), + CONTENT_INFO_FLAG_IS_XDELTA_PREF = (1 << 9) }; struct content_information_ctx @@ -131,6 +132,7 @@ struct content_information_ctx char *name_ips; char *name_bps; char *name_ups; + char *name_xdelta; char *valid_extensions; char *directory_cache; @@ -437,7 +439,7 @@ static content_file_list_t *content_file_list_init(size_t size) calloc(size, sizeof(struct retro_game_info)))) { /* Create retro_game_info_ext object */ - if ((file_list->game_info_ext = + if ((file_list->game_info_ext = (struct retro_game_info_ext *) calloc(size, sizeof(struct retro_game_info_ext)))) return file_list; @@ -726,9 +728,11 @@ static bool content_file_load_into_memory( content_ctx->flags & CONTENT_INFO_FLAG_IS_IPS_PREF, content_ctx->flags & CONTENT_INFO_FLAG_IS_BPS_PREF, content_ctx->flags & CONTENT_INFO_FLAG_IS_UPS_PREF, + content_ctx->flags & CONTENT_INFO_FLAG_IS_XDELTA_PREF, content_ctx->name_ips, content_ctx->name_bps, content_ctx->name_ups, + content_ctx->name_xdelta, (uint8_t**)&content_data, (void*)&content_size); #endif @@ -1026,7 +1030,7 @@ static bool content_file_load( * It would be better to set the ACL to allow full access for all application packages. However, * this is substantially easier than writing out new functions to do this * Copy ACL from localstate - * I am genuinely really proud of these work arounds + * I am genuinely really proud of these work arounds */ wchar_t wcontent_path[MAX_PATH]; mbstowcs(wcontent_path, content_path, MAX_PATH); @@ -1061,7 +1065,7 @@ static bool content_file_load( "VFSCACHE\\", sizeof(new_basedir) - _len); basedir_attribs = GetFileAttributes(new_basedir); - if ( (basedir_attribs == INVALID_FILE_ATTRIBUTES) + if ( (basedir_attribs == INVALID_FILE_ATTRIBUTES) || (!(basedir_attribs & FILE_ATTRIBUTE_DIRECTORY))) { if (!CreateDirectoryA(new_basedir, NULL)) @@ -1279,7 +1283,7 @@ CONTENT_INFO_FLAG_NEED_FULLPATH); if (string_is_empty(content_path)) { if ( (flags & CONTENT_ST_FLAG_CORE_DOES_NOT_NEED_CONTENT) - && content_ctx->flags + && content_ctx->flags & CONTENT_INFO_FLAG_SET_SUPPORTS_NO_GAME_ENABLE) string_list_append(content, "", attr); } @@ -1537,7 +1541,7 @@ void menu_content_environment_get(int *argc, char *argv[], wrap_args->content_path = path_get(RARCH_PATH_CONTENT); if (!retroarch_override_setting_is_set( RARCH_OVERRIDE_SETTING_LIBRETRO, NULL)) - wrap_args->libretro_path = string_is_empty(path_get(RARCH_PATH_CORE)) + wrap_args->libretro_path = string_is_empty(path_get(RARCH_PATH_CORE)) ? NULL : path_get(RARCH_PATH_CORE); } @@ -1557,7 +1561,7 @@ static void task_push_to_history_list( uint8_t flags = content_get_flags(); /* Push entry to top of history playlist */ - if ( (flags & CONTENT_ST_FLAG_IS_INITED) + if ( (flags & CONTENT_ST_FLAG_IS_INITED) || (flags & CONTENT_ST_FLAG_CORE_DOES_NOT_NEED_CONTENT)) { char tmp[PATH_MAX_LENGTH]; @@ -1664,7 +1668,7 @@ static void task_push_to_history_list( label = runloop_st->name.label; if ( - settings && settings->bools.history_list_enable + settings && settings->bools.history_list_enable && playlist_hist) { char subsystem_name[PATH_MAX_LENGTH]; @@ -1673,7 +1677,7 @@ static void task_push_to_history_list( subsystem_name[0] = '\0'; content_get_subsystem_friendly_name(path_get(RARCH_PATH_SUBSYSTEM), subsystem_name, sizeof(subsystem_name)); - /* The push function reads our entry as const, + /* The push function reads our entry as const, * so these casts are safe */ entry.path = (char*)tmp; entry.label = (char*)label; @@ -1866,7 +1870,7 @@ static bool firmware_update_status( bool set_missing_firmware = false; core_info_t *core_info = NULL; runloop_state_t *runloop_st = runloop_state_get_ptr(); - + core_info_get_current_core(&core_info); if (!core_info) @@ -1941,6 +1945,10 @@ bool task_push_start_dummy_core(content_ctx_info_t *content_info) content_ctx.flags |= CONTENT_INFO_FLAG_IS_BPS_PREF; if (rarch_flags & RARCH_FLAGS_UPS_PREF) content_ctx.flags |= CONTENT_INFO_FLAG_IS_UPS_PREF; +#ifdef HAVE_XDELTA + if (rarch_flags & RARCH_FLAGS_XDELTA_PREF) + content_ctx.flags |= CONTENT_INFO_FLAG_IS_XDELTA_PREF; +#endif /* HAVE_XDELTA */ if (runloop_st->flags & RUNLOOP_FLAG_PATCH_BLOCKED) content_ctx.flags |= CONTENT_INFO_FLAG_PATCH_IS_BLOCKED; #endif @@ -1951,6 +1959,7 @@ bool task_push_start_dummy_core(content_ctx_info_t *content_info) content_ctx.name_ips = NULL; content_ctx.name_bps = NULL; content_ctx.name_ups = NULL; + content_ctx.name_xdelta = NULL; content_ctx.valid_extensions = NULL; content_ctx.subsystem.data = NULL; @@ -1962,6 +1971,8 @@ bool task_push_start_dummy_core(content_ctx_info_t *content_info) content_ctx.name_bps = strdup(runloop_st->name.bps); if (!string_is_empty(runloop_st->name.ups)) content_ctx.name_ups = strdup(runloop_st->name.ups); + if (!string_is_empty(runloop_st->name.xdelta)) + content_ctx.name_xdelta = strdup(runloop_st->name.xdelta); if (!string_is_empty(path_dir_system)) content_ctx.directory_system = strdup(path_dir_system); @@ -1989,6 +2000,8 @@ bool task_push_start_dummy_core(content_ctx_info_t *content_info) free(content_ctx.name_bps); if (content_ctx.name_ups) free(content_ctx.name_ups); + if (content_ctx.name_xdelta) + free(content_ctx.name_xdelta); if (content_ctx.directory_system) free(content_ctx.directory_system); @@ -2031,6 +2044,10 @@ bool task_push_load_content_from_playlist_from_menu( content_ctx.flags |= CONTENT_INFO_FLAG_IS_BPS_PREF; if (rarch_flags & RARCH_FLAGS_UPS_PREF) content_ctx.flags |= CONTENT_INFO_FLAG_IS_UPS_PREF; +#ifdef HAVE_XDELTA + if (rarch_flags & RARCH_FLAGS_XDELTA_PREF) + content_ctx.flags |= CONTENT_INFO_FLAG_IS_XDELTA_PREF; +#endif /* HAVE_XDELTA */ if (runloop_st->flags & RUNLOOP_FLAG_PATCH_BLOCKED) content_ctx.flags |= CONTENT_INFO_FLAG_PATCH_IS_BLOCKED; #endif @@ -2041,6 +2058,7 @@ bool task_push_load_content_from_playlist_from_menu( content_ctx.name_ips = NULL; content_ctx.name_bps = NULL; content_ctx.name_ups = NULL; + content_ctx.name_xdelta = NULL; content_ctx.valid_extensions = NULL; content_ctx.subsystem.data = NULL; @@ -2052,6 +2070,8 @@ bool task_push_load_content_from_playlist_from_menu( content_ctx.name_bps = strdup(runloop_st->name.bps); if (!string_is_empty(runloop_st->name.ups)) content_ctx.name_ups = strdup(runloop_st->name.ups); + if (!string_is_empty(runloop_st->name.xdelta)) + content_ctx.name_xdelta = strdup(runloop_st->name.xdelta); if (label) strlcpy(runloop_st->name.label, label, sizeof(runloop_st->name.label)); else @@ -2138,6 +2158,8 @@ end: free(content_ctx.name_bps); if (content_ctx.name_ups) free(content_ctx.name_ups); + if (content_ctx.name_xdelta) + free(content_ctx.name_xdelta); if (content_ctx.directory_system) free(content_ctx.directory_system); @@ -2171,6 +2193,10 @@ bool task_push_start_current_core(content_ctx_info_t *content_info) content_ctx.flags |= CONTENT_INFO_FLAG_IS_BPS_PREF; if (rarch_flags & RARCH_FLAGS_UPS_PREF) content_ctx.flags |= CONTENT_INFO_FLAG_IS_UPS_PREF; +#ifdef HAVE_XDELTA + if (rarch_flags & RARCH_FLAGS_XDELTA_PREF) + content_ctx.flags |= CONTENT_INFO_FLAG_IS_XDELTA_PREF; +#endif if (runloop_st->flags & RUNLOOP_FLAG_PATCH_BLOCKED) content_ctx.flags |= CONTENT_INFO_FLAG_PATCH_IS_BLOCKED; } @@ -2182,6 +2208,7 @@ bool task_push_start_current_core(content_ctx_info_t *content_info) content_ctx.name_ips = NULL; content_ctx.name_bps = NULL; content_ctx.name_ups = NULL; + content_ctx.name_xdelta = NULL; content_ctx.valid_extensions = NULL; content_ctx.subsystem.data = NULL; @@ -2193,6 +2220,8 @@ bool task_push_start_current_core(content_ctx_info_t *content_info) content_ctx.name_bps = strdup(runloop_st->name.bps); if (!string_is_empty(runloop_st->name.ups)) content_ctx.name_ups = strdup(runloop_st->name.ups); + if (!string_is_empty(runloop_st->name.xdelta)) + content_ctx.name_xdelta = strdup(runloop_st->name.xdelta); if (!string_is_empty(path_dir_system)) content_ctx.directory_system = strdup(path_dir_system); @@ -2240,6 +2269,8 @@ end: free(content_ctx.name_bps); if (content_ctx.name_ups) free(content_ctx.name_ups); + if (content_ctx.name_xdelta) + free(content_ctx.name_xdelta); if (content_ctx.directory_system) free(content_ctx.directory_system); @@ -2405,6 +2436,10 @@ bool task_push_load_content_with_new_core_from_menu( content_ctx.flags |= CONTENT_INFO_FLAG_IS_BPS_PREF; if (rarch_flags & RARCH_FLAGS_UPS_PREF) content_ctx.flags |= CONTENT_INFO_FLAG_IS_UPS_PREF; +#ifdef HAVE_XDELTA + if (rarch_flags & RARCH_FLAGS_XDELTA_PREF) + content_ctx.flags |= CONTENT_INFO_FLAG_IS_XDELTA_PREF; +#endif if (runloop_st->flags & RUNLOOP_FLAG_PATCH_BLOCKED) content_ctx.flags |= CONTENT_INFO_FLAG_PATCH_IS_BLOCKED; } @@ -2416,6 +2451,7 @@ bool task_push_load_content_with_new_core_from_menu( content_ctx.name_ips = NULL; content_ctx.name_bps = NULL; content_ctx.name_ups = NULL; + content_ctx.name_xdelta = NULL; content_ctx.valid_extensions = NULL; content_ctx.subsystem.data = NULL; @@ -2427,6 +2463,8 @@ bool task_push_load_content_with_new_core_from_menu( content_ctx.name_bps = strdup(runloop_st->name.bps); if (!string_is_empty(runloop_st->name.ups)) content_ctx.name_ups = strdup(runloop_st->name.ups); + if (!string_is_empty(runloop_st->name.xdelta)) + content_ctx.name_xdelta = strdup(runloop_st->name.xdelta); runloop_st->name.label[0] = '\0'; @@ -2475,6 +2513,8 @@ end: free(content_ctx.name_bps); if (content_ctx.name_ups) free(content_ctx.name_ups); + if (content_ctx.name_xdelta) + free(content_ctx.name_xdelta); if (content_ctx.directory_system) free(content_ctx.directory_system); @@ -2513,6 +2553,10 @@ static bool task_load_content_internal( content_ctx.flags |= CONTENT_INFO_FLAG_IS_BPS_PREF; if (rarch_flags & RARCH_FLAGS_UPS_PREF) content_ctx.flags |= CONTENT_INFO_FLAG_IS_UPS_PREF; +#ifdef HAVE_XDELTA + if (rarch_flags & RARCH_FLAGS_XDELTA_PREF) + content_ctx.flags |= CONTENT_INFO_FLAG_IS_XDELTA_PREF; +#endif /* HAVE_XDELTA */ if (runloop_st->flags & RUNLOOP_FLAG_PATCH_BLOCKED) content_ctx.flags |= CONTENT_INFO_FLAG_PATCH_IS_BLOCKED; #endif @@ -2523,6 +2567,7 @@ static bool task_load_content_internal( content_ctx.name_ips = NULL; content_ctx.name_bps = NULL; content_ctx.name_ups = NULL; + content_ctx.name_xdelta = NULL; content_ctx.valid_extensions = NULL; content_ctx.subsystem.data = NULL; @@ -2555,6 +2600,8 @@ static bool task_load_content_internal( content_ctx.name_bps = strdup(runloop_st->name.bps); if (!string_is_empty(runloop_st->name.ups)) content_ctx.name_ups = strdup(runloop_st->name.ups); + if (!string_is_empty(runloop_st->name.xdelta)) + content_ctx.name_xdelta = strdup(runloop_st->name.xdelta); if (!string_is_empty(path_dir_system)) content_ctx.directory_system = strdup(path_dir_system); @@ -2587,6 +2634,8 @@ end: free(content_ctx.name_bps); if (content_ctx.name_ups) free(content_ctx.name_ups); + if (content_ctx.name_xdelta) + free(content_ctx.name_xdelta); if (content_ctx.directory_system) free(content_ctx.directory_system); if (content_ctx.directory_cache) @@ -2779,7 +2828,7 @@ void content_set_subsystem(unsigned idx) p_content->pending_subsystem_id = idx; - if ( subsystem + if ( subsystem && (runloop_st->subsystem_current_count > 0)) { strlcpy(p_content->pending_subsystem_ident, @@ -2801,7 +2850,7 @@ bool content_set_subsystem_by_name(const char* subsystem_name) rarch_system_info_t *sys_info = &runloop_st->system; unsigned i = 0; /* Core not loaded completely, use the data we peeked on load core */ - const struct retro_subsystem_info + const struct retro_subsystem_info *subsystem = runloop_st->subsystem_data; /* Core fully loaded, use the subsystem data */ @@ -2911,7 +2960,7 @@ static uint32_t file_crc32(uint32_t crc, const char *path) for (i = 0; i < CRC32_MAX_MB; i++) { int64_t nread = filestream_read(file, buf, CRC32_BUFFER_SIZE); - if (nread < 0) + if (nread < 0) { free(buf); filestream_close(file); @@ -3018,14 +3067,19 @@ bool content_init(void) content_ctx.flags |= CONTENT_INFO_FLAG_IS_BPS_PREF; if (rarch_flags & RARCH_FLAGS_UPS_PREF) content_ctx.flags |= CONTENT_INFO_FLAG_IS_UPS_PREF; +#ifdef HAVE_XDELTA + if (rarch_flags & RARCH_FLAGS_XDELTA_PREF) + content_ctx.flags |= CONTENT_INFO_FLAG_IS_XDELTA_PREF; +#endif /* HAVE_XDELTA */ if (runloop_st->flags & RUNLOOP_FLAG_PATCH_BLOCKED) content_ctx.flags |= CONTENT_INFO_FLAG_PATCH_IS_BLOCKED; -#endif +#endif /* HAVE_PATCH */ content_ctx.directory_system = NULL; content_ctx.directory_cache = NULL; content_ctx.name_ips = NULL; content_ctx.name_bps = NULL; content_ctx.name_ups = NULL; + content_ctx.name_xdelta = NULL; content_ctx.valid_extensions = NULL; content_ctx.subsystem.data = NULL; @@ -3037,6 +3091,8 @@ bool content_init(void) content_ctx.name_bps = strdup(runloop_st->name.bps); if (!string_is_empty(runloop_st->name.ups)) content_ctx.name_ups = strdup(runloop_st->name.ups); + if (!string_is_empty(runloop_st->name.xdelta)) + content_ctx.name_xdelta = strdup(runloop_st->name.xdelta); if (sys_info) { @@ -3080,13 +3136,15 @@ bool content_init(void) free(content_ctx.name_bps); if (content_ctx.name_ups) free(content_ctx.name_ups); + if (content_ctx.name_xdelta) + free(content_ctx.name_xdelta); if (content_ctx.directory_system) free(content_ctx.directory_system); if (content_ctx.directory_cache) free(content_ctx.directory_cache); if (content_ctx.valid_extensions) free(content_ctx.valid_extensions); - + if (error_enum != MSG_UNKNOWN) { switch (error_enum) diff --git a/tasks/task_patch.c b/tasks/task_patch.c index b5a30ae335..64579bf29c 100644 --- a/tasks/task_patch.c +++ b/tasks/task_patch.c @@ -36,6 +36,10 @@ #include "../verbosity.h" #include "../configuration.h" +#if HAVE_XDELTA +#include "../deps/xdelta3/xdelta3.h" +#endif + enum bps_mode { SOURCE_READ = 0, @@ -57,7 +61,8 @@ enum patch_error PATCH_TARGET_INVALID, PATCH_SOURCE_CHECKSUM_INVALID, PATCH_TARGET_CHECKSUM_INVALID, - PATCH_PATCH_CHECKSUM_INVALID + PATCH_PATCH_CHECKSUM_INVALID, + PATCH_PATCH_UNSUPPORTED }; struct bps_data @@ -381,7 +386,7 @@ static enum patch_error ups_apply_patch( } data.target_length = (unsigned)*targetlength; - + while (data.patch_offset < data.patch_length - 12) { unsigned length = (unsigned)ups_decode(&data); @@ -540,7 +545,7 @@ static enum patch_error ips_apply_patch( patchdata[3] != 'C' || patchdata[4] != 'H') return PATCH_PATCH_INVALID; - + if ((error_patch = ips_alloc_targetdata( patchdata, patchlen, sourcelength, targetdata, targetlength)) != PATCH_SUCCESS) @@ -611,6 +616,105 @@ static enum patch_error ips_apply_patch( return PATCH_PATCH_INVALID; } +static enum patch_error xdelta_apply_patch( + const uint8_t *patchdata, uint64_t patchlen, + const uint8_t *sourcedata, uint64_t sourcelength, + uint8_t **targetdata, uint64_t *targetlength) +{ +#if defined(HAVE_PATCH) && defined(HAVE_XDELTA) + int ret; + enum patch_error error_patch = PATCH_SUCCESS; + xd3_stream stream; + xd3_config config; + xd3_source source; + + /* Validate the magic number, as given by RFC 3284 section 4.1 */ + if (patchlen < 8 || + patchdata[0] != 0xD6 || + patchdata[1] != 0xC3 || + patchdata[2] != 0xC4 || + patchdata[3] != 0x00) + return PATCH_PATCH_INVALID_HEADER; + + xd3_init_config(&config, XD3_SKIP_EMIT); + /* The first pass is just to compute the buffer size, + * no need to emit patched data yet */ + + if (xd3_config_stream(&stream, &config) != 0) + return PATCH_UNKNOWN; + + memset(&source, 0, sizeof(source)); + source.blksize = sourcelength; + source.onblk = sourcelength; + source.curblk = sourcedata; + source.curblkno = 0; + xd3_set_source_and_size(&stream, &source, sourcelength); + + do + { /* Make a first pass over the patch, to compute the target size. + * XDelta3 doesn't store the target size in the patch file, + * so we have to either compute it ourselves + * or keep reallocating a buffer as we go. + * I went with the former because it's simpler and fails sooner. + */ + switch (ret = xd3_decode_input(&stream)) + { /* xd3 works like a zlib-styled state machine (stream is the machine) */ + case XD3_INPUT: /* When starting the first pass, provide the input */ + xd3_avail_input(&stream, patchdata, patchlen); + RARCH_DBG("[xdelta] Provided %lu bytes of input to xd3_stream\n", patchlen); + break; + case XD3_GOTHEADER: + case XD3_WINSTART: + *targetlength += stream.winsize; + RARCH_DBG("[xdelta] Discovered a window of %lu bytes (target filesize is %lu bytes)\n", stream.winsize, *targetlength); + /* xdelta updates the active stream window in the GOTHEADER and WINSTART states */ + break; + case XD3_OUTPUT: + xd3_consume_output(&stream); /* Need to call this after every output */ + break; + case XD3_INVALID_INPUT: + error_patch = PATCH_PATCH_INVALID; + RARCH_ERR("[xdelta] Invalid input in xd3_stream (%s)\n", xd3_errstring(&stream)); + goto cleanup_stream; + case XD3_INTERNAL: + error_patch = PATCH_UNKNOWN; + RARCH_ERR("[xdelta] Internal error in xd3_stream (%s)\n", xd3_errstring(&stream)); + goto cleanup_stream; + case XD3_WINFINISH: + RARCH_DBG("[xdelta] Finished processing window #%d\n", stream.current_window); + break; + default: + RARCH_DBG("[xdelta] xd3_decode_input returned %ld (%s; %s)\n", ret, xd3_strerror(ret), stream.msg); + } + } while (stream.avail_in); + + *targetdata = malloc(*targetlength); + switch (ret = xd3_decode_memory( + patchdata, patchlen, + sourcedata, sourcelength, + *targetdata, targetlength, *targetlength, 0)) + { + case 0: /* Success */ + break; + case ENOSPC: + error_patch = PATCH_TARGET_ALLOC_FAILED; + free(*targetdata); + goto cleanup_stream; + default: + error_patch = PATCH_UNKNOWN; + free(*targetdata); + goto cleanup_stream; + } + +cleanup_stream: + xd3_close_stream(&stream); + xd3_free_stream(&stream); + return error_patch; +#else /* HAVE_PATCH is defined and HAVE_XDELTA is defined */ + return PATCH_PATCH_UNSUPPORTED; +#endif +} + static bool apply_patch_content(uint8_t **buf, ssize_t *size, const char *patch_desc, const char *patch_path, patch_func_t func, void *patch_data, int64_t patch_size) @@ -662,7 +766,7 @@ static bool apply_patch_content(uint8_t **buf, static bool try_bps_patch(bool allow_bps, const char *name_bps, uint8_t **buf, ssize_t *size) { - if ( allow_bps + if ( allow_bps && !string_is_empty(name_bps) && path_is_valid(name_bps) ) @@ -715,7 +819,7 @@ static bool try_ups_patch(bool allow_ups, const char *name_ups, static bool try_ips_patch(bool allow_ips, const char *name_ips, uint8_t **buf, ssize_t *size) { - if ( allow_ips + if ( allow_ips && !string_is_empty(name_ips) && path_is_valid(name_ips) ) @@ -738,6 +842,34 @@ static bool try_ips_patch(bool allow_ips, return false; } +static bool try_xdelta_patch(bool allow_xdelta, + const char *name_xdelta, uint8_t **buf, ssize_t *size) +{ +#if defined(HAVE_PATCH) && defined(HAVE_XDELTA) + if ( allow_xdelta + && !string_is_empty(name_xdelta) + && path_is_valid(name_xdelta) + ) + { + int64_t patch_size; + bool ret = false; + void *patch_data = NULL; + + if (!filestream_read_file(name_xdelta, &patch_data, &patch_size)) + return false; + + if (patch_size >= 0) + ret = apply_patch_content(buf, size, "Xdelta", name_xdelta, + xdelta_apply_patch, patch_data, patch_size); + + if (patch_data) + free(patch_data); + return ret; + } +#endif + return false; +} + /** * patch_content: * @buf : buffer of the content file. @@ -750,20 +882,24 @@ bool patch_content( bool is_ips_pref, bool is_bps_pref, bool is_ups_pref, + bool is_xdelta_pref, const char *name_ips, const char *name_bps, const char *name_ups, + const char *name_xdelta, uint8_t **buf, void *data) { - ssize_t *size = (ssize_t*)data; - bool allow_ups = !is_bps_pref && !is_ips_pref; - bool allow_ips = !is_ups_pref && !is_bps_pref; - bool allow_bps = !is_ups_pref && !is_ips_pref; + ssize_t *size = (ssize_t*)data; + bool allow_ups = !is_bps_pref && !is_ips_pref && !is_xdelta_pref; + bool allow_ips = !is_ups_pref && !is_bps_pref && !is_xdelta_pref; + bool allow_bps = !is_ups_pref && !is_ips_pref && !is_xdelta_pref; + bool allow_xdelta = !is_bps_pref && !is_ups_pref && !is_ips_pref; if ( (unsigned)is_ips_pref + (unsigned)is_bps_pref - + (unsigned)is_ups_pref > 1) + + (unsigned)is_ups_pref + + (unsigned)is_xdelta_pref > 1) { RARCH_WARN("%s\n", msg_hash_to_str(MSG_SEVERAL_PATCHES_ARE_EXPLICITLY_DEFINED)); @@ -773,16 +909,19 @@ bool patch_content( /* Attempt to apply first (non-indexed) patch */ if ( try_ips_patch(allow_ips, name_ips, buf, size) || try_bps_patch(allow_bps, name_bps, buf, size) - || try_ups_patch(allow_ups, name_ups, buf, size)) + || try_ups_patch(allow_ups, name_ups, buf, size) + || try_xdelta_patch(allow_xdelta, name_xdelta, buf, size)) { /* A patch has been found. Now attempt to apply * any additional 'indexed' patch files */ - size_t name_ips_len = strlen(name_ips); - size_t name_bps_len = strlen(name_bps); - size_t name_ups_len = strlen(name_ups); - char *name_ips_indexed = (char*)malloc((name_ips_len + 2) * sizeof(char)); - char *name_bps_indexed = (char*)malloc((name_bps_len + 2) * sizeof(char)); - char *name_ups_indexed = (char*)malloc((name_ups_len + 2) * sizeof(char)); + size_t name_ips_len = strlen(name_ips); + size_t name_bps_len = strlen(name_bps); + size_t name_ups_len = strlen(name_ups); + size_t name_xdelta_len = strlen(name_xdelta); + char *name_ips_indexed = (char*)malloc((name_ips_len + 2) * sizeof(char)); + char *name_bps_indexed = (char*)malloc((name_bps_len + 2) * sizeof(char)); + char *name_ups_indexed = (char*)malloc((name_ups_len + 2) * sizeof(char)); + char *name_xdelta_indexed = (char*)malloc((name_xdelta_len + 2) * sizeof(char)); /* First patch already applied -> index * for subsequent patches starts at 1 */ size_t patch_index = 1; @@ -790,12 +929,14 @@ bool patch_content( strlcpy(name_ips_indexed, name_ips, (name_ips_len + 1) * sizeof(char)); strlcpy(name_bps_indexed, name_bps, (name_bps_len + 1) * sizeof(char)); strlcpy(name_ups_indexed, name_ups, (name_ups_len + 1) * sizeof(char)); + strlcpy(name_xdelta_indexed, name_xdelta, (name_xdelta_len + 1) * sizeof(char)); /* Ensure that we NUL terminate *after* the * index character */ name_ips_indexed[name_ips_len + 1] = '\0'; name_bps_indexed[name_bps_len + 1] = '\0'; name_ups_indexed[name_ups_len + 1] = '\0'; + name_xdelta_indexed[name_xdelta_len + 1] = '\0'; /* try to patch "*.ipsX" */ while (patch_index < 10) @@ -815,10 +956,12 @@ bool patch_content( name_ips_indexed[name_ips_len] = index_char; name_bps_indexed[name_bps_len] = index_char; name_ups_indexed[name_ups_len] = index_char; + name_xdelta_indexed[name_xdelta_len] = index_char; if ( !try_ips_patch(allow_ips, name_ips_indexed, buf, size) && !try_bps_patch(allow_bps, name_bps_indexed, buf, size) - && !try_ups_patch(allow_ups, name_ups_indexed, buf, size)) + && !try_ups_patch(allow_ups, name_ups_indexed, buf, size) + && !try_xdelta_patch(allow_xdelta, name_xdelta_indexed, buf, size)) break; patch_index++; @@ -827,6 +970,7 @@ bool patch_content( free(name_ips_indexed); free(name_bps_indexed); free(name_ups_indexed); + free(name_xdelta_indexed); return true; } diff --git a/tasks/tasks_internal.h b/tasks/tasks_internal.h index f31129b1a8..c3d3c1cb43 100644 --- a/tasks/tasks_internal.h +++ b/tasks/tasks_internal.h @@ -240,9 +240,11 @@ bool patch_content( bool is_ips_pref, bool is_bps_pref, bool is_ups_pref, + bool is_xdelta_pref, const char *name_ips, const char *name_bps, const char *name_ups, + const char *name_xdelta, uint8_t **buf, void *data);