diff options
Diffstat (limited to '')
-rw-r--r-- | applypatch/Android.bp | 206 | ||||
-rw-r--r-- | applypatch/Makefile | 33 | ||||
-rw-r--r-- | applypatch/applypatch.cpp | 52 | ||||
-rw-r--r-- | applypatch/bspatch.cpp | 23 | ||||
-rw-r--r-- | applypatch/freecache.cpp | 14 | ||||
-rw-r--r-- | applypatch/imgdiff.cpp | 1782 | ||||
-rw-r--r-- | applypatch/imgpatch.cpp | 94 | ||||
-rw-r--r-- | applypatch/include/applypatch/applypatch.h | 27 | ||||
-rw-r--r-- | applypatch/include/applypatch/imgdiff_image.h | 306 | ||||
-rw-r--r-- | applypatch/libimgpatch.pc | 6 |
10 files changed, 1769 insertions, 774 deletions
diff --git a/applypatch/Android.bp b/applypatch/Android.bp new file mode 100644 index 000000000..cb0b36746 --- /dev/null +++ b/applypatch/Android.bp @@ -0,0 +1,206 @@ +// Copyright (C) 2017 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +cc_defaults { + name: "applypatch_defaults", + + cflags: [ + "-D_FILE_OFFSET_BITS=64", + "-DZLIB_CONST", + "-Wall", + "-Werror", + ], + + local_include_dirs: [ + "include", + ], +} + +cc_library_static { + name: "libapplypatch", + + host_supported: true, + + defaults: [ + "applypatch_defaults", + ], + + srcs: [ + "applypatch.cpp", + "bspatch.cpp", + "freecache.cpp", + "imgpatch.cpp", + ], + + export_include_dirs: [ + "include", + ], + + static_libs: [ + "libbase", + "libbspatch", + "libbz", + "libcrypto", + "libedify", + "libotafault", + "libotautil", + "libz", + ], + + target: { + darwin: { + enabled: false, + }, + }, +} + +cc_library_static { + name: "libapplypatch_modes", + + defaults: [ + "applypatch_defaults", + ], + + srcs: [ + "applypatch_modes.cpp", + ], + + static_libs: [ + "libapplypatch", + "libbase", + "libcrypto", + "libedify", + "libotautil", + ], +} + +cc_binary { + name: "applypatch", + + defaults: [ + "applypatch_defaults", + ], + + srcs: [ + "applypatch_main.cpp", + ], + + static_libs: [ + "libapplypatch_modes", + "libapplypatch", + "libedify", + "libotafault", + "libotautil", + "libbspatch", + ], + + shared_libs: [ + "libbase", + "libbrotli", + "libbz", + "libcrypto", + "liblog", + "libz", + "libziparchive", + ], +} + +cc_library_static { + name: "libimgdiff", + + host_supported: true, + + defaults: [ + "applypatch_defaults", + ], + + srcs: [ + "imgdiff.cpp", + ], + + export_include_dirs: [ + "include", + ], + + static_libs: [ + "libbase", + "libbsdiff", + "libdivsufsort", + "libdivsufsort64", + "liblog", + "libotautil", + "libutils", + "libz", + "libziparchive", + ], +} + +cc_binary_host { + name: "imgdiff", + + srcs: [ + "imgdiff_main.cpp", + ], + + defaults: [ + "applypatch_defaults", + ], + + static_libs: [ + "libimgdiff", + "libotautil", + "libbsdiff", + "libdivsufsort", + "libdivsufsort64", + "libziparchive", + "libbase", + "libutils", + "liblog", + "libbrotli", + "libbz", + "libz", + ], +} + +cc_library_static { + name: "libimgpatch", + + // The host module is for recovery_host_test (Linux only). + host_supported: true, + + defaults: [ + "applypatch_defaults", + ], + + srcs: [ + "bspatch.cpp", + "imgpatch.cpp", + ], + + static_libs: [ + "libbase", + "libbspatch", + "libbz", + "libcrypto", + "libedify", + "libotautil", + "libz", + ], + + target: { + darwin: { + enabled: false, + }, + }, +} diff --git a/applypatch/Makefile b/applypatch/Makefile deleted file mode 100644 index fb4984303..000000000 --- a/applypatch/Makefile +++ /dev/null @@ -1,33 +0,0 @@ -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# This file is for building imgdiff in Chrome OS. - -CPPFLAGS += -iquote.. -Iinclude -CXXFLAGS += -std=c++11 -O3 -Wall -Werror -LDLIBS += -lbz2 -lz - -.PHONY: all clean - -all: imgdiff libimgpatch.a - -clean: - rm -f *.o imgdiff libimgpatch.a - -imgdiff: imgdiff.o bsdiff.o utils.o - $(CXX) $(CPPFLAGS) $(CXXFLAGS) $(LDLIBS) -o $@ $^ - -libimgpatch.a utils.o: CXXFLAGS += -fPIC -libimgpatch.a: imgpatch.o bspatch.o utils.o - ${AR} rcs $@ $^ diff --git a/applypatch/applypatch.cpp b/applypatch/applypatch.cpp index 43e9b80dc..5c6c83f4f 100644 --- a/applypatch/applypatch.cpp +++ b/applypatch/applypatch.cpp @@ -42,8 +42,9 @@ #include "mtdutils/mtdutils.h" #include "edify/expr.h" -#include "ota_io.h" -#include "print_sha1.h" +#include "otafault/ota_io.h" +#include "otautil/cache_location.h" +#include "otautil/print_sha1.h" static int LoadPartitionContents(const std::string& filename, FileContents* file); static size_t FileSink(const unsigned char* data, size_t len, int fd); @@ -64,12 +65,13 @@ int LoadFileContents(const char* filename, FileContents* file) { return LoadPartitionContents(filename, file); } - if (stat(filename, &file->st) == -1) { + struct stat sb; + if (stat(filename, &sb) == -1) { printf("failed to stat \"%s\": %s\n", filename, strerror(errno)); return -1; } - std::vector<unsigned char> data(file->st.st_size); + std::vector<unsigned char> data(sb.st_size); unique_file f(ota_fopen(filename, "rb")); if (!f) { printf("failed to open \"%s\": %s\n", filename, strerror(errno)); @@ -208,10 +210,6 @@ static int LoadPartitionContents(const std::string& filename, FileContents* file buffer.resize(buffer_size); file->data = std::move(buffer); - // Fake some stat() info. - file->st.st_mode = 0644; - file->st.st_uid = 0; - file->st.st_gid = 0; return 0; } @@ -240,15 +238,6 @@ int SaveFileContents(const char* filename, const FileContents* file) { return -1; } - if (chmod(filename, file->st.st_mode) != 0) { - printf("chmod of \"%s\" failed: %s\n", filename, strerror(errno)); - return -1; - } - if (chown(filename, file->st.st_uid, file->st.st_gid) != 0) { - printf("chown of \"%s\" failed: %s\n", filename, strerror(errno)); - return -1; - } - return 0; } @@ -509,12 +498,10 @@ int applypatch_check(const char* filename, const std::vector<std::string>& patch (!patch_sha1_str.empty() && FindMatchingPatch(file.sha1, patch_sha1_str) < 0)) { printf("file \"%s\" doesn't have any of expected sha1 sums; checking cache\n", filename); - // If the source file is missing or corrupted, it might be because - // we were killed in the middle of patching it. A copy of it - // should have been made in CACHE_TEMP_SOURCE. If that file - // exists and matches the sha1 we're looking for, the check still - // passes. - if (LoadFileContents(CACHE_TEMP_SOURCE, &file) != 0) { + // If the source file is missing or corrupted, it might be because we were killed in the middle + // of patching it. A copy of it should have been made in cache_temp_source. If that file + // exists and matches the sha1 we're looking for, the check still passes. + if (LoadFileContents(CacheLocation::location().cache_temp_source().c_str(), &file) != 0) { printf("failed to load cache file\n"); return 1; } @@ -560,9 +547,8 @@ int CacheSizeCheck(size_t bytes) { if (MakeFreeSpaceOnCache(bytes) < 0) { printf("unable to make %zu bytes available on /cache\n", bytes); return 1; - } else { - return 0; } + return 0; } // This function applies binary patches to EMMC target files in a way that is safe (the original @@ -587,7 +573,7 @@ int CacheSizeCheck(size_t bytes) { // become obsolete since we have dropped the support for patching non-EMMC targets (EMMC targets // have the size embedded in the filename). int applypatch(const char* source_filename, const char* target_filename, - const char* target_sha1_str, size_t target_size __unused, + const char* target_sha1_str, size_t /* target_size */, const std::vector<std::string>& patch_sha1_str, const std::vector<std::unique_ptr<Value>>& patch_data, const Value* bonus_data) { printf("patch %s: ", source_filename); @@ -637,7 +623,7 @@ int applypatch(const char* source_filename, const char* target_filename, printf("source file is bad; trying copy\n"); FileContents copy_file; - if (LoadFileContents(CACHE_TEMP_SOURCE, ©_file) < 0) { + if (LoadFileContents(CacheLocation::location().cache_temp_source().c_str(), ©_file) < 0) { printf("failed to read copy file\n"); return 1; } @@ -732,7 +718,7 @@ static int GenerateTarget(const FileContents& source_file, const std::unique_ptr printf("not enough free space on /cache\n"); return 1; } - if (SaveFileContents(CACHE_TEMP_SOURCE, &source_file) < 0) { + if (SaveFileContents(CacheLocation::location().cache_temp_source().c_str(), &source_file) < 0) { printf("failed to back up source file\n"); return 1; } @@ -749,11 +735,11 @@ static int GenerateTarget(const FileContents& source_file, const std::unique_ptr int result; if (use_bsdiff) { - result = ApplyBSDiffPatch(source_file.data.data(), source_file.data.size(), patch.get(), 0, - sink, &ctx); + result = + ApplyBSDiffPatch(source_file.data.data(), source_file.data.size(), *patch, 0, sink, &ctx); } else { - result = ApplyImagePatch(source_file.data.data(), source_file.data.size(), patch.get(), sink, - &ctx, bonus_data); + result = ApplyImagePatch(source_file.data.data(), source_file.data.size(), *patch, sink, &ctx, + bonus_data); } if (result != 0) { @@ -778,7 +764,7 @@ static int GenerateTarget(const FileContents& source_file, const std::unique_ptr } // Delete the backup copy of the source. - unlink(CACHE_TEMP_SOURCE); + unlink(CacheLocation::location().cache_temp_source().c_str()); // Success! return 0; diff --git a/applypatch/bspatch.cpp b/applypatch/bspatch.cpp index 65ee614ef..912dbbdd8 100644 --- a/applypatch/bspatch.cpp +++ b/applypatch/bspatch.cpp @@ -26,11 +26,12 @@ #include <string> #include <android-base/logging.h> -#include <bspatch.h> +#include <bsdiff/bspatch.h> #include <openssl/sha.h> #include "applypatch/applypatch.h" -#include "print_sha1.h" +#include "edify/expr.h" +#include "otautil/print_sha1.h" void ShowBSDiffLicense() { puts("The bsdiff library used herein is:\n" @@ -64,7 +65,7 @@ void ShowBSDiffLicense() { ); } -int ApplyBSDiffPatch(const unsigned char* old_data, size_t old_size, const Value* patch, +int ApplyBSDiffPatch(const unsigned char* old_data, size_t old_size, const Value& patch, size_t patch_offset, SinkFn sink, SHA_CTX* ctx) { auto sha_sink = [&sink, &ctx](const uint8_t* data, size_t len) { len = sink(data, len); @@ -72,23 +73,21 @@ int ApplyBSDiffPatch(const unsigned char* old_data, size_t old_size, const Value return len; }; - CHECK(patch != nullptr); - CHECK_LE(patch_offset, patch->data.size()); + CHECK_LE(patch_offset, patch.data.size()); int result = bsdiff::bspatch(old_data, old_size, - reinterpret_cast<const uint8_t*>(&patch->data[patch_offset]), - patch->data.size() - patch_offset, sha_sink); + reinterpret_cast<const uint8_t*>(&patch.data[patch_offset]), + patch.data.size() - patch_offset, sha_sink); if (result != 0) { LOG(ERROR) << "bspatch failed, result: " << result; // print SHA1 of the patch in the case of a data error. if (result == 2) { uint8_t digest[SHA_DIGEST_LENGTH]; - SHA1(reinterpret_cast<const uint8_t*>(patch->data.data() + patch_offset), - patch->data.size() - patch_offset, digest); + SHA1(reinterpret_cast<const uint8_t*>(patch.data.data() + patch_offset), + patch.data.size() - patch_offset, digest); std::string patch_sha1 = print_sha1(digest); - LOG(ERROR) << "Patch may be corrupted, offset: " << patch_offset << ", SHA1: " - << patch_sha1; + LOG(ERROR) << "Patch may be corrupted, offset: " << patch_offset << ", SHA1: " << patch_sha1; } } return result; -}
\ No newline at end of file +} diff --git a/applypatch/freecache.cpp b/applypatch/freecache.cpp index 331cae265..ea364d8e6 100644 --- a/applypatch/freecache.cpp +++ b/applypatch/freecache.cpp @@ -33,6 +33,7 @@ #include <android-base/stringprintf.h> #include "applypatch/applypatch.h" +#include "otautil/cache_location.h" static int EliminateOpenFiles(std::set<std::string>* files) { std::unique_ptr<DIR, decltype(&closedir)> d(opendir("/proc"), closedir); @@ -90,10 +91,9 @@ static std::set<std::string> FindExpendableFiles() { while ((de = readdir(d.get())) != 0) { std::string path = std::string(dirs[i]) + "/" + de->d_name; - // We can't delete CACHE_TEMP_SOURCE; if it's there we might have - // restarted during installation and could be depending on it to - // be there. - if (path == CACHE_TEMP_SOURCE) { + // We can't delete cache_temp_source; if it's there we might have restarted during + // installation and could be depending on it to be there. + if (path == CacheLocation::location().cache_temp_source()) { continue; } @@ -112,6 +112,12 @@ static std::set<std::string> FindExpendableFiles() { } int MakeFreeSpaceOnCache(size_t bytes_needed) { +#ifndef __ANDROID__ + // TODO (xunchang) implement a heuristic cache size check during host simulation. + printf("Skip making (%zu) bytes free space on cache; program is running on host\n", bytes_needed); + return 0; +#endif + size_t free_now = FreeSpaceForFile("/cache"); printf("%zu bytes free on /cache (%zu needed)\n", free_now, bytes_needed); diff --git a/applypatch/imgdiff.cpp b/applypatch/imgdiff.cpp index fc240644f..674cc2b16 100644 --- a/applypatch/imgdiff.cpp +++ b/applypatch/imgdiff.cpp @@ -15,53 +15,44 @@ */ /* - * This program constructs binary patches for images -- such as boot.img - * and recovery.img -- that consist primarily of large chunks of gzipped - * data interspersed with uncompressed data. Doing a naive bsdiff of - * these files is not useful because small changes in the data lead to - * large changes in the compressed bitstream; bsdiff patches of gzipped - * data are typically as large as the data itself. + * This program constructs binary patches for images -- such as boot.img and recovery.img -- that + * consist primarily of large chunks of gzipped data interspersed with uncompressed data. Doing a + * naive bsdiff of these files is not useful because small changes in the data lead to large + * changes in the compressed bitstream; bsdiff patches of gzipped data are typically as large as + * the data itself. * - * To patch these usefully, we break the source and target images up into - * chunks of two types: "normal" and "gzip". Normal chunks are simply - * patched using a plain bsdiff. Gzip chunks are first expanded, then a - * bsdiff is applied to the uncompressed data, then the patched data is - * gzipped using the same encoder parameters. Patched chunks are - * concatenated together to create the output file; the output image - * should be *exactly* the same series of bytes as the target image used - * originally to generate the patch. + * To patch these usefully, we break the source and target images up into chunks of two types: + * "normal" and "gzip". Normal chunks are simply patched using a plain bsdiff. Gzip chunks are + * first expanded, then a bsdiff is applied to the uncompressed data, then the patched data is + * gzipped using the same encoder parameters. Patched chunks are concatenated together to create + * the output file; the output image should be *exactly* the same series of bytes as the target + * image used originally to generate the patch. * - * To work well with this tool, the gzipped sections of the target - * image must have been generated using the same deflate encoder that - * is available in applypatch, namely, the one in the zlib library. - * In practice this means that images should be compressed using the - * "minigzip" tool included in the zlib distribution, not the GNU gzip - * program. + * To work well with this tool, the gzipped sections of the target image must have been generated + * using the same deflate encoder that is available in applypatch, namely, the one in the zlib + * library. In practice this means that images should be compressed using the "minigzip" tool + * included in the zlib distribution, not the GNU gzip program. * - * An "imgdiff" patch consists of a header describing the chunk structure - * of the file and any encoding parameters needed for the gzipped - * chunks, followed by N bsdiff patches, one per chunk. + * An "imgdiff" patch consists of a header describing the chunk structure of the file and any + * encoding parameters needed for the gzipped chunks, followed by N bsdiff patches, one per chunk. * - * For a diff to be generated, the source and target images must have the - * same "chunk" structure: that is, the same number of gzipped and normal - * chunks in the same order. Android boot and recovery images currently - * consist of five chunks: a small normal header, a gzipped kernel, a - * small normal section, a gzipped ramdisk, and finally a small normal - * footer. + * For a diff to be generated, the source and target must be in well-formed zip archive format; + * or they are image files with the same "chunk" structure: that is, the same number of gzipped and + * normal chunks in the same order. Android boot and recovery images currently consist of five + * chunks: a small normal header, a gzipped kernel, a small normal section, a gzipped ramdisk, and + * finally a small normal footer. * - * Caveats: we locate gzipped sections within the source and target - * images by searching for the byte sequence 1f8b0800: 1f8b is the gzip - * magic number; 08 specifies the "deflate" encoding [the only encoding - * supported by the gzip standard]; and 00 is the flags byte. We do not - * currently support any extra header fields (which would be indicated by - * a nonzero flags byte). We also don't handle the case when that byte - * sequence appears spuriously in the file. (Note that it would have to - * occur spuriously within a normal chunk to be a problem.) + * Caveats: we locate gzipped sections within the source and target images by searching for the + * byte sequence 1f8b0800: 1f8b is the gzip magic number; 08 specifies the "deflate" encoding + * [the only encoding supported by the gzip standard]; and 00 is the flags byte. We do not + * currently support any extra header fields (which would be indicated by a nonzero flags byte). + * We also don't handle the case when that byte sequence appears spuriously in the file. (Note + * that it would have to occur spuriously within a normal chunk to be a problem.) * * * The imgdiff patch header looks like this: * - * "IMGDIFF1" (8) [magic number and version] + * "IMGDIFF2" (8) [magic number and version] * chunk count (4) * for each chunk: * chunk type (4) [CHUNK_{NORMAL, GZIP, DEFLATE, RAW}] @@ -98,33 +89,62 @@ * target len (4) * data (target len) * - * All integers are little-endian. "source start" and "source len" - * specify the section of the input image that comprises this chunk, - * including the gzip header and footer for gzip chunks. "source - * expanded len" is the size of the uncompressed source data. "target - * expected len" is the size of the uncompressed data after applying - * the bsdiff patch. The next five parameters specify the zlib - * parameters to be used when compressing the patched data, and the - * next three specify the header and footer to be wrapped around the - * compressed data to create the output chunk (so that header contents - * like the timestamp are recreated exactly). + * All integers are little-endian. "source start" and "source len" specify the section of the + * input image that comprises this chunk, including the gzip header and footer for gzip chunks. + * "source expanded len" is the size of the uncompressed source data. "target expected len" is the + * size of the uncompressed data after applying the bsdiff patch. The next five parameters + * specify the zlib parameters to be used when compressing the patched data, and the next three + * specify the header and footer to be wrapped around the compressed data to create the output + * chunk (so that header contents like the timestamp are recreated exactly). * - * After the header there are 'chunk count' bsdiff patches; the offset - * of each from the beginning of the file is specified in the header. + * After the header there are 'chunk count' bsdiff patches; the offset of each from the beginning + * of the file is specified in the header. * - * This tool can take an optional file of "bonus data". This is an - * extra file of data that is appended to chunk #1 after it is - * compressed (it must be a CHUNK_DEFLATE chunk). The same file must - * be available (and passed to applypatch with -b) when applying the - * patch. This is used to reduce the size of recovery-from-boot - * patches by combining the boot image with recovery ramdisk + * This tool can take an optional file of "bonus data". This is an extra file of data that is + * appended to chunk #1 after it is compressed (it must be a CHUNK_DEFLATE chunk). The same file + * must be available (and passed to applypatch with -b) when applying the patch. This is used to + * reduce the size of recovery-from-boot patches by combining the boot image with recovery ramdisk * information that is stored on the system partition. + * + * When generating the patch between two zip files, this tool has an option "--block-limit" to + * split the large source/target files into several pair of pieces, with each piece has at most + * *limit* blocks. When this option is used, we also need to output the split info into the file + * path specified by "--split-info". + * + * Format of split info file: + * 2 [version of imgdiff] + * n [count of split pieces] + * <patch_size>, <tgt_size>, <src_range> [size and ranges for split piece#1] + * ... + * <patch_size>, <tgt_size>, <src_range> [size and ranges for split piece#n] + * + * To split a pair of large zip files, we walk through the chunks in target zip and search by its + * entry_name in the source zip. If the entry_name is non-empty and a matching entry in source + * is found, we'll add the source entry to the current split source image; otherwise we'll skip + * this chunk and later do bsdiff between all the skipped trunks and the whole split source image. + * We move on to the next pair of pieces if the size of the split source image reaches the block + * limit. + * + * After the split, the target pieces are continuous and block aligned, while the source pieces + * are mutually exclusive. Some of the source blocks may not be used if there's no matching + * entry_name in the target; as a result, they won't be included in any of these split source + * images. Then we will generate patches accordingly between each split image pairs; in particular, + * the unmatched trunks in the split target will diff against the entire split source image. + * + * For example: + * Input: [src_image, tgt_image] + * Split: [src-0, tgt-0; src-1, tgt-1, src-2, tgt-2] + * Diff: [ patch-0; patch-1; patch-2] + * + * Patch: [(src-0, patch-0) = tgt-0; (src-1, patch-1) = tgt-1; (src-2, patch-2) = tgt-2] + * Concatenate: [tgt-0 + tgt-1 + tgt-2 = tgt_image] */ #include "applypatch/imgdiff.h" #include <errno.h> #include <fcntl.h> +#include <getopt.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -139,15 +159,26 @@ #include <android-base/file.h> #include <android-base/logging.h> #include <android-base/memory.h> +#include <android-base/parseint.h> +#include <android-base/stringprintf.h> +#include <android-base/strings.h> #include <android-base/unique_fd.h> +#include <bsdiff/bsdiff.h> #include <ziparchive/zip_archive.h> - -#include <bsdiff.h> #include <zlib.h> +#include "applypatch/imgdiff_image.h" +#include "otautil/rangeset.h" + using android::base::get_unaligned; -static constexpr auto BUFFER_SIZE = 0x8000; +static constexpr size_t VERSION = 2; + +// We assume the header "IMGDIFF#" is 8 bytes. +static_assert(VERSION <= 9, "VERSION occupies more than one byte"); + +static constexpr size_t BLOCK_SIZE = 4096; +static constexpr size_t BUFFER_SIZE = 0x8000; // If we use this function to write the offset and length (type size_t), their values should not // exceed 2^63; because the signed bit will be casted away. @@ -161,99 +192,80 @@ static inline bool Write4(int fd, int32_t value) { return android::base::WriteFully(fd, &value, sizeof(int32_t)); } -class ImageChunk { - public: - static constexpr auto WINDOWBITS = -15; // 32kb window; negative to indicate a raw stream. - static constexpr auto MEMLEVEL = 8; // the default value. - static constexpr auto METHOD = Z_DEFLATED; - static constexpr auto STRATEGY = Z_DEFAULT_STRATEGY; - - ImageChunk(int type, size_t start, const std::vector<uint8_t>* file_content, size_t raw_data_len) - : type_(type), - start_(start), - input_file_ptr_(file_content), - raw_data_len_(raw_data_len), - compress_level_(6), - source_start_(0), - source_len_(0), - source_uncompressed_len_(0) { - CHECK(file_content != nullptr) << "input file container can't be nullptr"; - } - - int GetType() const { - return type_; - } - size_t GetRawDataLength() const { - return raw_data_len_; - } - const std::string& GetEntryName() const { - return entry_name_; - } - - // CHUNK_DEFLATE will return the uncompressed data for diff, while other types will simply return - // the raw data. - const uint8_t * DataForPatch() const; - size_t DataLengthForPatch() const; - - void Dump() const { - printf("type %d start %zu len %zu\n", type_, start_, DataLengthForPatch()); - } - - void SetSourceInfo(const ImageChunk& other); - void SetEntryName(std::string entryname); - void SetUncompressedData(std::vector<uint8_t> data); - bool SetBonusData(const std::vector<uint8_t>& bonus_data); - - bool operator==(const ImageChunk& other) const; - bool operator!=(const ImageChunk& other) const { - return !(*this == other); - } - - size_t GetHeaderSize(size_t patch_size) const; - // Return the offset of the next patch into the patch data. - size_t WriteHeaderToFd(int fd, const std::vector<uint8_t>& patch, size_t offset); - - /* - * Cause a gzip chunk to be treated as a normal chunk (ie, as a blob - * of uninterpreted data). The resulting patch will likely be about - * as big as the target file, but it lets us handle the case of images - * where some gzip chunks are reconstructible but others aren't (by - * treating the ones that aren't as normal chunks). - */ - void ChangeDeflateChunkToNormal(); - bool ChangeChunkToRaw(size_t patch_size); - - /* - * Verify that we can reproduce exactly the same compressed data that - * we started with. Sets the level, method, windowBits, memLevel, and - * strategy fields in the chunk to the encoding parameters needed to - * produce the right output. - */ - bool ReconstructDeflateChunk(); - bool IsAdjacentNormal(const ImageChunk& other) const; - void MergeAdjacentNormal(const ImageChunk& other); - - private: - int type_; // CHUNK_NORMAL, CHUNK_DEFLATE, CHUNK_RAW - size_t start_; // offset of chunk in the original input file - const std::vector<uint8_t>* input_file_ptr_; // ptr to the full content of original input file - size_t raw_data_len_; - - // --- for CHUNK_DEFLATE chunks only: --- - std::vector<uint8_t> uncompressed_data_; - std::string entry_name_; // used for zip entries - - // deflate encoder parameters - int compress_level_; - - size_t source_start_; - size_t source_len_; - size_t source_uncompressed_len_; - - const uint8_t* GetRawData() const; - bool TryReconstruction(int level); +// Trim the head or tail to align with the block size. Return false if the chunk has nothing left +// after alignment. +static bool AlignHead(size_t* start, size_t* length) { + size_t residual = (*start % BLOCK_SIZE == 0) ? 0 : BLOCK_SIZE - *start % BLOCK_SIZE; + + if (*length <= residual) { + *length = 0; + return false; + } + + // Trim the data in the beginning. + *start += residual; + *length -= residual; + return true; +} + +static bool AlignTail(size_t* start, size_t* length) { + size_t residual = (*start + *length) % BLOCK_SIZE; + if (*length <= residual) { + *length = 0; + return false; + } + + // Trim the data in the end. + *length -= residual; + return true; +} + +// Remove the used blocks from the source chunk to make sure the source ranges are mutually +// exclusive after split. Return false if we fail to get the non-overlapped ranges. In such +// a case, we'll skip the entire source chunk. +static bool RemoveUsedBlocks(size_t* start, size_t* length, const SortedRangeSet& used_ranges) { + if (!used_ranges.Overlaps(*start, *length)) { + return true; + } + + // TODO find the largest non-overlap chunk. + LOG(INFO) << "Removing block " << used_ranges.ToString() << " from " << *start << " - " + << *start + *length - 1; + + // If there's no duplicate entry name, we should only overlap in the head or tail block. Try to + // trim both blocks. Skip this source chunk in case it still overlaps with the used ranges. + if (AlignHead(start, length) && !used_ranges.Overlaps(*start, *length)) { + return true; + } + if (AlignTail(start, length) && !used_ranges.Overlaps(*start, *length)) { + return true; + } + + LOG(WARNING) << "Failed to remove the overlapped block ranges; skip the source"; + return false; +} + +static const struct option OPTIONS[] = { + { "zip-mode", no_argument, nullptr, 'z' }, + { "bonus-file", required_argument, nullptr, 'b' }, + { "block-limit", required_argument, nullptr, 0 }, + { "debug-dir", required_argument, nullptr, 0 }, + { "split-info", required_argument, nullptr, 0 }, + { "verbose", no_argument, nullptr, 'v' }, + { nullptr, 0, nullptr, 0 }, }; +ImageChunk::ImageChunk(int type, size_t start, const std::vector<uint8_t>* file_content, + size_t raw_data_len, std::string entry_name) + : type_(type), + start_(start), + input_file_ptr_(file_content), + raw_data_len_(raw_data_len), + compress_level_(6), + entry_name_(std::move(entry_name)) { + CHECK(file_content != nullptr) << "input file container can't be nullptr"; +} + const uint8_t* ImageChunk::GetRawData() const { CHECK_LE(start_ + raw_data_len_, input_file_ptr_->size()); return input_file_ptr_->data() + start_; @@ -273,6 +285,11 @@ size_t ImageChunk::DataLengthForPatch() const { return raw_data_len_; } +void ImageChunk::Dump(size_t index) const { + LOG(INFO) << "chunk: " << index << ", type: " << type_ << ", start: " << start_ + << ", len: " << DataLengthForPatch() << ", name: " << entry_name_; +} + bool ImageChunk::operator==(const ImageChunk& other) const { if (type_ != other.type_) { return false; @@ -281,20 +298,6 @@ bool ImageChunk::operator==(const ImageChunk& other) const { memcmp(GetRawData(), other.GetRawData(), raw_data_len_) == 0); } -void ImageChunk::SetSourceInfo(const ImageChunk& src) { - source_start_ = src.start_; - if (type_ == CHUNK_NORMAL) { - source_len_ = src.raw_data_len_; - } else if (type_ == CHUNK_DEFLATE) { - source_len_ = src.raw_data_len_; - source_uncompressed_len_ = src.uncompressed_data_.size(); - } -} - -void ImageChunk::SetEntryName(std::string entryname) { - entry_name_ = std::move(entryname); -} - void ImageChunk::SetUncompressedData(std::vector<uint8_t> data) { uncompressed_data_ = std::move(data); } @@ -307,80 +310,13 @@ bool ImageChunk::SetBonusData(const std::vector<uint8_t>& bonus_data) { return true; } -// Convert CHUNK_NORMAL & CHUNK_DEFLATE to CHUNK_RAW if the target size is -// smaller. Also take the header size into account during size comparison. -bool ImageChunk::ChangeChunkToRaw(size_t patch_size) { - if (type_ == CHUNK_RAW) { - return true; - } else if (type_ == CHUNK_NORMAL && (raw_data_len_ <= 160 || raw_data_len_ < patch_size)) { - type_ = CHUNK_RAW; - return true; - } - return false; -} - void ImageChunk::ChangeDeflateChunkToNormal() { if (type_ != CHUNK_DEFLATE) return; type_ = CHUNK_NORMAL; - entry_name_.clear(); + // No need to clear the entry name. uncompressed_data_.clear(); } -// Header size: -// header_type 4 bytes -// CHUNK_NORMAL 8*3 = 24 bytes -// CHUNK_DEFLATE 8*5 + 4*5 = 60 bytes -// CHUNK_RAW 4 bytes + patch_size -size_t ImageChunk::GetHeaderSize(size_t patch_size) const { - switch (type_) { - case CHUNK_NORMAL: - return 4 + 8 * 3; - case CHUNK_DEFLATE: - return 4 + 8 * 5 + 4 * 5; - case CHUNK_RAW: - return 4 + 4 + patch_size; - default: - CHECK(false) << "unexpected chunk type: " << type_; // Should not reach here. - return 0; - } -} - -size_t ImageChunk::WriteHeaderToFd(int fd, const std::vector<uint8_t>& patch, size_t offset) { - Write4(fd, type_); - switch (type_) { - case CHUNK_NORMAL: - printf("normal (%10zu, %10zu) %10zu\n", start_, raw_data_len_, patch.size()); - Write8(fd, static_cast<int64_t>(source_start_)); - Write8(fd, static_cast<int64_t>(source_len_)); - Write8(fd, static_cast<int64_t>(offset)); - return offset + patch.size(); - case CHUNK_DEFLATE: - printf("deflate (%10zu, %10zu) %10zu %s\n", start_, raw_data_len_, patch.size(), - entry_name_.c_str()); - Write8(fd, static_cast<int64_t>(source_start_)); - Write8(fd, static_cast<int64_t>(source_len_)); - Write8(fd, static_cast<int64_t>(offset)); - Write8(fd, static_cast<int64_t>(source_uncompressed_len_)); - Write8(fd, static_cast<int64_t>(uncompressed_data_.size())); - Write4(fd, compress_level_); - Write4(fd, METHOD); - Write4(fd, WINDOWBITS); - Write4(fd, MEMLEVEL); - Write4(fd, STRATEGY); - return offset + patch.size(); - case CHUNK_RAW: - printf("raw (%10zu, %10zu)\n", start_, raw_data_len_); - Write4(fd, static_cast<int32_t>(patch.size())); - if (!android::base::WriteFully(fd, patch.data(), patch.size())) { - CHECK(false) << "failed to write " << patch.size() <<" bytes patch"; - } - return offset; - default: - CHECK(false) << "unexpected chunk type: " << type_; - return offset; - } -} - bool ImageChunk::IsAdjacentNormal(const ImageChunk& other) const { if (type_ != CHUNK_NORMAL || other.type_ != CHUNK_NORMAL) { return false; @@ -393,14 +329,62 @@ void ImageChunk::MergeAdjacentNormal(const ImageChunk& other) { raw_data_len_ = raw_data_len_ + other.raw_data_len_; } +bool ImageChunk::MakePatch(const ImageChunk& tgt, const ImageChunk& src, + std::vector<uint8_t>* patch_data, + bsdiff::SuffixArrayIndexInterface** bsdiff_cache) { +#if defined(__ANDROID__) + char ptemp[] = "/data/local/tmp/imgdiff-patch-XXXXXX"; +#else + char ptemp[] = "/tmp/imgdiff-patch-XXXXXX"; +#endif + + int fd = mkstemp(ptemp); + if (fd == -1) { + PLOG(ERROR) << "MakePatch failed to create a temporary file"; + return false; + } + close(fd); + + int r = bsdiff::bsdiff(src.DataForPatch(), src.DataLengthForPatch(), tgt.DataForPatch(), + tgt.DataLengthForPatch(), ptemp, bsdiff_cache); + if (r != 0) { + LOG(ERROR) << "bsdiff() failed: " << r; + return false; + } + + android::base::unique_fd patch_fd(open(ptemp, O_RDONLY)); + if (patch_fd == -1) { + PLOG(ERROR) << "Failed to open " << ptemp; + return false; + } + struct stat st; + if (fstat(patch_fd, &st) != 0) { + PLOG(ERROR) << "Failed to stat patch file " << ptemp; + return false; + } + + size_t sz = static_cast<size_t>(st.st_size); + + patch_data->resize(sz); + if (!android::base::ReadFully(patch_fd, patch_data->data(), sz)) { + PLOG(ERROR) << "Failed to read " << ptemp; + unlink(ptemp); + return false; + } + + unlink(ptemp); + + return true; +} + bool ImageChunk::ReconstructDeflateChunk() { if (type_ != CHUNK_DEFLATE) { - printf("attempt to reconstruct non-deflate chunk\n"); + LOG(ERROR) << "Attempted to reconstruct non-deflate chunk"; return false; } - // We only check two combinations of encoder parameters: level 6 - // (the default) and level 9 (the maximum). + // We only check two combinations of encoder parameters: level 6 (the default) and level 9 + // (the maximum). for (int level = 6; level <= 9; level += 3) { if (TryReconstruction(level)) { compress_level_ = level; @@ -412,10 +396,9 @@ bool ImageChunk::ReconstructDeflateChunk() { } /* - * Takes the uncompressed data stored in the chunk, compresses it - * using the zlib parameters stored in the chunk, and checks that it - * matches exactly the compressed data we started with (also stored in - * the chunk). + * Takes the uncompressed data stored in the chunk, compresses it using the zlib parameters stored + * in the chunk, and checks that it matches exactly the compressed data we started with (also + * stored in the chunk). */ bool ImageChunk::TryReconstruction(int level) { z_stream strm; @@ -426,7 +409,7 @@ bool ImageChunk::TryReconstruction(int level) { strm.next_in = uncompressed_data_.data(); int ret = deflateInit2(&strm, level, METHOD, WINDOWBITS, MEMLEVEL, STRATEGY); if (ret < 0) { - printf("failed to initialize deflate: %d\n", ret); + LOG(ERROR) << "Failed to initialize deflate: " << ret; return false; } @@ -437,7 +420,7 @@ bool ImageChunk::TryReconstruction(int level) { strm.next_out = buffer.data(); ret = deflate(&strm, Z_FINISH); if (ret < 0) { - printf("failed to deflate: %d\n", ret); + LOG(ERROR) << "Failed to deflate: " << ret; return false; } @@ -458,195 +441,830 @@ bool ImageChunk::TryReconstruction(int level) { return true; } -// EOCD record -// offset 0: signature 0x06054b50, 4 bytes -// offset 4: number of this disk, 2 bytes -// ... -// offset 20: comment length, 2 bytes -// offset 22: comment, n bytes -static bool GetZipFileSize(const std::vector<uint8_t>& zip_file, size_t* input_file_size) { - if (zip_file.size() < 22) { - printf("file is too small to be a zip file\n"); - return false; +PatchChunk::PatchChunk(const ImageChunk& tgt, const ImageChunk& src, std::vector<uint8_t> data) + : type_(tgt.GetType()), + source_start_(src.GetStartOffset()), + source_len_(src.GetRawDataLength()), + source_uncompressed_len_(src.DataLengthForPatch()), + target_start_(tgt.GetStartOffset()), + target_len_(tgt.GetRawDataLength()), + target_uncompressed_len_(tgt.DataLengthForPatch()), + target_compress_level_(tgt.GetCompressLevel()), + data_(std::move(data)) {} + +// Construct a CHUNK_RAW patch from the target data directly. +PatchChunk::PatchChunk(const ImageChunk& tgt) + : type_(CHUNK_RAW), + source_start_(0), + source_len_(0), + source_uncompressed_len_(0), + target_start_(tgt.GetStartOffset()), + target_len_(tgt.GetRawDataLength()), + target_uncompressed_len_(tgt.DataLengthForPatch()), + target_compress_level_(tgt.GetCompressLevel()), + data_(tgt.DataForPatch(), tgt.DataForPatch() + tgt.DataLengthForPatch()) {} + +// Return true if raw data is smaller than the patch size. +bool PatchChunk::RawDataIsSmaller(const ImageChunk& tgt, size_t patch_size) { + size_t target_len = tgt.GetRawDataLength(); + return (tgt.GetType() == CHUNK_NORMAL && (target_len <= 160 || target_len < patch_size)); +} + +void PatchChunk::UpdateSourceOffset(const SortedRangeSet& src_range) { + if (type_ == CHUNK_DEFLATE) { + source_start_ = src_range.GetOffsetInRangeSet(source_start_); } +} - // Look for End of central directory record of the zip file, and calculate the actual - // zip_file size. - for (int i = zip_file.size() - 22; i >= 0; i--) { - if (zip_file[i] == 0x50) { - if (get_unaligned<uint32_t>(&zip_file[i]) == 0x06054b50) { - // double-check: this archive consists of a single "disk". - CHECK_EQ(get_unaligned<uint16_t>(&zip_file[i + 4]), 0); +// Header size: +// header_type 4 bytes +// CHUNK_NORMAL 8*3 = 24 bytes +// CHUNK_DEFLATE 8*5 + 4*5 = 60 bytes +// CHUNK_RAW 4 bytes + patch_size +size_t PatchChunk::GetHeaderSize() const { + switch (type_) { + case CHUNK_NORMAL: + return 4 + 8 * 3; + case CHUNK_DEFLATE: + return 4 + 8 * 5 + 4 * 5; + case CHUNK_RAW: + return 4 + 4 + data_.size(); + default: + CHECK(false) << "unexpected chunk type: " << type_; // Should not reach here. + return 0; + } +} - uint16_t comment_length = get_unaligned<uint16_t>(&zip_file[i + 20]); - size_t file_size = i + 22 + comment_length; - CHECK_LE(file_size, zip_file.size()); - *input_file_size = file_size; - return true; +// Return the offset of the next patch into the patch data. +size_t PatchChunk::WriteHeaderToFd(int fd, size_t offset, size_t index) const { + Write4(fd, type_); + switch (type_) { + case CHUNK_NORMAL: + LOG(INFO) << android::base::StringPrintf("chunk %zu: normal (%10zu, %10zu) %10zu", index, + target_start_, target_len_, data_.size()); + Write8(fd, static_cast<int64_t>(source_start_)); + Write8(fd, static_cast<int64_t>(source_len_)); + Write8(fd, static_cast<int64_t>(offset)); + return offset + data_.size(); + case CHUNK_DEFLATE: + LOG(INFO) << android::base::StringPrintf("chunk %zu: deflate (%10zu, %10zu) %10zu", index, + target_start_, target_len_, data_.size()); + Write8(fd, static_cast<int64_t>(source_start_)); + Write8(fd, static_cast<int64_t>(source_len_)); + Write8(fd, static_cast<int64_t>(offset)); + Write8(fd, static_cast<int64_t>(source_uncompressed_len_)); + Write8(fd, static_cast<int64_t>(target_uncompressed_len_)); + Write4(fd, target_compress_level_); + Write4(fd, ImageChunk::METHOD); + Write4(fd, ImageChunk::WINDOWBITS); + Write4(fd, ImageChunk::MEMLEVEL); + Write4(fd, ImageChunk::STRATEGY); + return offset + data_.size(); + case CHUNK_RAW: + LOG(INFO) << android::base::StringPrintf("chunk %zu: raw (%10zu, %10zu)", index, + target_start_, target_len_); + Write4(fd, static_cast<int32_t>(data_.size())); + if (!android::base::WriteFully(fd, data_.data(), data_.size())) { + CHECK(false) << "Failed to write " << data_.size() << " bytes patch"; } + return offset; + default: + CHECK(false) << "unexpected chunk type: " << type_; + return offset; + } +} + +size_t PatchChunk::PatchSize() const { + if (type_ == CHUNK_RAW) { + return GetHeaderSize(); + } + return GetHeaderSize() + data_.size(); +} + +// Write the contents of |patch_chunks| to |patch_fd|. +bool PatchChunk::WritePatchDataToFd(const std::vector<PatchChunk>& patch_chunks, int patch_fd) { + // Figure out how big the imgdiff file header is going to be, so that we can correctly compute + // the offset of each bsdiff patch within the file. + size_t total_header_size = 12; + for (const auto& patch : patch_chunks) { + total_header_size += patch.GetHeaderSize(); + } + + size_t offset = total_header_size; + + // Write out the headers. + if (!android::base::WriteStringToFd("IMGDIFF" + std::to_string(VERSION), patch_fd)) { + PLOG(ERROR) << "Failed to write \"IMGDIFF" << VERSION << "\""; + return false; + } + + Write4(patch_fd, static_cast<int32_t>(patch_chunks.size())); + LOG(INFO) << "Writing " << patch_chunks.size() << " patch headers..."; + for (size_t i = 0; i < patch_chunks.size(); ++i) { + offset = patch_chunks[i].WriteHeaderToFd(patch_fd, offset, i); + } + + // Append each chunk's bsdiff patch, in order. + for (const auto& patch : patch_chunks) { + if (patch.type_ == CHUNK_RAW) { + continue; + } + if (!android::base::WriteFully(patch_fd, patch.data_.data(), patch.data_.size())) { + PLOG(ERROR) << "Failed to write " << patch.data_.size() << " bytes patch to patch_fd"; + return false; } } - // EOCD not found, this file is likely not a valid zip file. - return false; + return true; +} + +ImageChunk& Image::operator[](size_t i) { + CHECK_LT(i, chunks_.size()); + return chunks_[i]; +} + +const ImageChunk& Image::operator[](size_t i) const { + CHECK_LT(i, chunks_.size()); + return chunks_[i]; +} + +void Image::MergeAdjacentNormalChunks() { + size_t merged_last = 0, cur = 0; + while (cur < chunks_.size()) { + // Look for normal chunks adjacent to the current one. If such chunk exists, extend the + // length of the current normal chunk. + size_t to_check = cur + 1; + while (to_check < chunks_.size() && chunks_[cur].IsAdjacentNormal(chunks_[to_check])) { + chunks_[cur].MergeAdjacentNormal(chunks_[to_check]); + to_check++; + } + + if (merged_last != cur) { + chunks_[merged_last] = std::move(chunks_[cur]); + } + merged_last++; + cur = to_check; + } + if (merged_last < chunks_.size()) { + chunks_.erase(chunks_.begin() + merged_last, chunks_.end()); + } +} + +void Image::DumpChunks() const { + std::string type = is_source_ ? "source" : "target"; + LOG(INFO) << "Dumping chunks for " << type; + for (size_t i = 0; i < chunks_.size(); ++i) { + chunks_[i].Dump(i); + } } -static bool ReadZip(const char* filename, std::vector<ImageChunk>* chunks, - std::vector<uint8_t>* zip_file, bool include_pseudo_chunk) { - CHECK(chunks != nullptr && zip_file != nullptr); +bool Image::ReadFile(const std::string& filename, std::vector<uint8_t>* file_content) { + CHECK(file_content != nullptr); - android::base::unique_fd fd(open(filename, O_RDONLY)); + android::base::unique_fd fd(open(filename.c_str(), O_RDONLY)); if (fd == -1) { - printf("failed to open \"%s\" %s\n", filename, strerror(errno)); + PLOG(ERROR) << "Failed to open " << filename; return false; } struct stat st; if (fstat(fd, &st) != 0) { - printf("failed to stat \"%s\": %s\n", filename, strerror(errno)); + PLOG(ERROR) << "Failed to stat " << filename; return false; } size_t sz = static_cast<size_t>(st.st_size); - zip_file->resize(sz); - if (!android::base::ReadFully(fd, zip_file->data(), sz)) { - printf("failed to read \"%s\" %s\n", filename, strerror(errno)); + file_content->resize(sz); + if (!android::base::ReadFully(fd, file_content->data(), sz)) { + PLOG(ERROR) << "Failed to read " << filename; return false; } fd.reset(); - // Trim the trailing zeros before we pass the file to ziparchive handler. + return true; +} + +bool ZipModeImage::Initialize(const std::string& filename) { + if (!ReadFile(filename, &file_content_)) { + return false; + } + + // Omit the trailing zeros before we pass the file to ziparchive handler. size_t zipfile_size; - if (!GetZipFileSize(*zip_file, &zipfile_size)) { - printf("failed to parse the actual size of %s\n", filename); + if (!GetZipFileSize(&zipfile_size)) { + LOG(ERROR) << "Failed to parse the actual size of " << filename; return false; } ZipArchiveHandle handle; - int err = OpenArchiveFromMemory(zip_file->data(), zipfile_size, filename, &handle); + int err = OpenArchiveFromMemory(const_cast<uint8_t*>(file_content_.data()), zipfile_size, + filename.c_str(), &handle); if (err != 0) { - printf("failed to open zip file %s: %s\n", filename, ErrorCodeString(err)); + LOG(ERROR) << "Failed to open zip file " << filename << ": " << ErrorCodeString(err); CloseArchive(handle); return false; } - // Create a list of deflated zip entries, sorted by offset. - std::vector<std::pair<std::string, ZipEntry>> temp_entries; + if (!InitializeChunks(filename, handle)) { + CloseArchive(handle); + return false; + } + + CloseArchive(handle); + return true; +} + +// Iterate the zip entries and compose the image chunks accordingly. +bool ZipModeImage::InitializeChunks(const std::string& filename, ZipArchiveHandle handle) { void* cookie; int ret = StartIteration(handle, &cookie, nullptr, nullptr); if (ret != 0) { - printf("failed to iterate over entries in %s: %s\n", filename, ErrorCodeString(ret)); - CloseArchive(handle); + LOG(ERROR) << "Failed to iterate over entries in " << filename << ": " << ErrorCodeString(ret); return false; } + // Create a list of deflated zip entries, sorted by offset. + std::vector<std::pair<std::string, ZipEntry>> temp_entries; ZipString name; ZipEntry entry; while ((ret = Next(cookie, &entry, &name)) == 0) { - if (entry.method == kCompressDeflated) { - std::string entryname(name.name, name.name + name.name_length); - temp_entries.push_back(std::make_pair(entryname, entry)); + if (entry.method == kCompressDeflated || limit_ > 0) { + std::string entry_name(name.name, name.name + name.name_length); + temp_entries.emplace_back(entry_name, entry); } } if (ret != -1) { - printf("Error while iterating over zip entries: %s\n", ErrorCodeString(ret)); - CloseArchive(handle); + LOG(ERROR) << "Error while iterating over zip entries: " << ErrorCodeString(ret); return false; } std::sort(temp_entries.begin(), temp_entries.end(), - [](auto& entry1, auto& entry2) { - return entry1.second.offset < entry2.second.offset; - }); + [](auto& entry1, auto& entry2) { return entry1.second.offset < entry2.second.offset; }); EndIteration(cookie); - if (include_pseudo_chunk) { - chunks->emplace_back(CHUNK_NORMAL, 0, zip_file, zip_file->size()); + // For source chunks, we don't need to compose chunks for the metadata. + if (is_source_) { + for (auto& entry : temp_entries) { + if (!AddZipEntryToChunks(handle, entry.first, &entry.second)) { + LOG(ERROR) << "Failed to add " << entry.first << " to source chunks"; + return false; + } + } + + // Add the end of zip file (mainly central directory) as a normal chunk. + size_t entries_end = 0; + if (!temp_entries.empty()) { + entries_end = static_cast<size_t>(temp_entries.back().second.offset + + temp_entries.back().second.compressed_length); + } + CHECK_LT(entries_end, file_content_.size()); + chunks_.emplace_back(CHUNK_NORMAL, entries_end, &file_content_, + file_content_.size() - entries_end); + + return true; } + // For target chunks, add the deflate entries as CHUNK_DEFLATE and the contents between two + // deflate entries as CHUNK_NORMAL. size_t pos = 0; size_t nextentry = 0; - while (pos < zip_file->size()) { + while (pos < file_content_.size()) { if (nextentry < temp_entries.size() && static_cast<off64_t>(pos) == temp_entries[nextentry].second.offset) { - // compose the next deflate chunk. - std::string entryname = temp_entries[nextentry].first; - size_t uncompressed_len = temp_entries[nextentry].second.uncompressed_length; - std::vector<uint8_t> uncompressed_data(uncompressed_len); - if ((ret = ExtractToMemory(handle, &temp_entries[nextentry].second, uncompressed_data.data(), - uncompressed_len)) != 0) { - printf("failed to extract %s with size %zu: %s\n", entryname.c_str(), uncompressed_len, - ErrorCodeString(ret)); - CloseArchive(handle); + // Add the next zip entry. + std::string entry_name = temp_entries[nextentry].first; + if (!AddZipEntryToChunks(handle, entry_name, &temp_entries[nextentry].second)) { + LOG(ERROR) << "Failed to add " << entry_name << " to target chunks"; return false; } - size_t compressed_len = temp_entries[nextentry].second.compressed_length; - ImageChunk curr(CHUNK_DEFLATE, pos, zip_file, compressed_len); - curr.SetEntryName(std::move(entryname)); - curr.SetUncompressedData(std::move(uncompressed_data)); - chunks->push_back(curr); - - pos += compressed_len; + pos += temp_entries[nextentry].second.compressed_length; ++nextentry; continue; } - // Use a normal chunk to take all the data up to the start of the next deflate section. + // Use a normal chunk to take all the data up to the start of the next entry. size_t raw_data_len; if (nextentry < temp_entries.size()) { raw_data_len = temp_entries[nextentry].second.offset - pos; } else { - raw_data_len = zip_file->size() - pos; + raw_data_len = file_content_.size() - pos; } - chunks->emplace_back(CHUNK_NORMAL, pos, zip_file, raw_data_len); + chunks_.emplace_back(CHUNK_NORMAL, pos, &file_content_, raw_data_len); pos += raw_data_len; } - CloseArchive(handle); return true; } -// Read the given file and break it up into chunks, and putting the data in to a vector. -static bool ReadImage(const char* filename, std::vector<ImageChunk>* chunks, - std::vector<uint8_t>* img) { - CHECK(chunks != nullptr && img != nullptr); +bool ZipModeImage::AddZipEntryToChunks(ZipArchiveHandle handle, const std::string& entry_name, + ZipEntry* entry) { + size_t compressed_len = entry->compressed_length; + if (compressed_len == 0) return true; + + // Split the entry into several normal chunks if it's too large. + if (limit_ > 0 && compressed_len > limit_) { + int count = 0; + while (compressed_len > 0) { + size_t length = std::min(limit_, compressed_len); + std::string name = entry_name + "-" + std::to_string(count); + chunks_.emplace_back(CHUNK_NORMAL, entry->offset + limit_ * count, &file_content_, length, + name); + + count++; + compressed_len -= length; + } + } else if (entry->method == kCompressDeflated) { + size_t uncompressed_len = entry->uncompressed_length; + std::vector<uint8_t> uncompressed_data(uncompressed_len); + int ret = ExtractToMemory(handle, entry, uncompressed_data.data(), uncompressed_len); + if (ret != 0) { + LOG(ERROR) << "Failed to extract " << entry_name << " with size " << uncompressed_len << ": " + << ErrorCodeString(ret); + return false; + } + ImageChunk curr(CHUNK_DEFLATE, entry->offset, &file_content_, compressed_len, entry_name); + curr.SetUncompressedData(std::move(uncompressed_data)); + chunks_.push_back(std::move(curr)); + } else { + chunks_.emplace_back(CHUNK_NORMAL, entry->offset, &file_content_, compressed_len, entry_name); + } + + return true; +} - android::base::unique_fd fd(open(filename, O_RDONLY)); - if (fd == -1) { - printf("failed to open \"%s\" %s\n", filename, strerror(errno)); +// EOCD record +// offset 0: signature 0x06054b50, 4 bytes +// offset 4: number of this disk, 2 bytes +// ... +// offset 20: comment length, 2 bytes +// offset 22: comment, n bytes +bool ZipModeImage::GetZipFileSize(size_t* input_file_size) { + if (file_content_.size() < 22) { + LOG(ERROR) << "File is too small to be a zip file"; return false; } - struct stat st; - if (fstat(fd, &st) != 0) { - printf("failed to stat \"%s\": %s\n", filename, strerror(errno)); + + // Look for End of central directory record of the zip file, and calculate the actual + // zip_file size. + for (int i = file_content_.size() - 22; i >= 0; i--) { + if (file_content_[i] == 0x50) { + if (get_unaligned<uint32_t>(&file_content_[i]) == 0x06054b50) { + // double-check: this archive consists of a single "disk". + CHECK_EQ(get_unaligned<uint16_t>(&file_content_[i + 4]), 0); + + uint16_t comment_length = get_unaligned<uint16_t>(&file_content_[i + 20]); + size_t file_size = i + 22 + comment_length; + CHECK_LE(file_size, file_content_.size()); + *input_file_size = file_size; + return true; + } + } + } + + // EOCD not found, this file is likely not a valid zip file. + return false; +} + +ImageChunk ZipModeImage::PseudoSource() const { + CHECK(is_source_); + return ImageChunk(CHUNK_NORMAL, 0, &file_content_, file_content_.size()); +} + +const ImageChunk* ZipModeImage::FindChunkByName(const std::string& name, bool find_normal) const { + if (name.empty()) { + return nullptr; + } + for (auto& chunk : chunks_) { + if (chunk.GetType() != CHUNK_DEFLATE && !find_normal) { + continue; + } + + if (chunk.GetEntryName() == name) { + return &chunk; + } + + // Edge case when target chunk is split due to size limit but source chunk isn't. + if (name == (chunk.GetEntryName() + "-0") || chunk.GetEntryName() == (name + "-0")) { + return &chunk; + } + + // TODO handle the .so files with incremental version number. + // (e.g. lib/arm64-v8a/libcronet.59.0.3050.4.so) + } + + return nullptr; +} + +ImageChunk* ZipModeImage::FindChunkByName(const std::string& name, bool find_normal) { + return const_cast<ImageChunk*>( + static_cast<const ZipModeImage*>(this)->FindChunkByName(name, find_normal)); +} + +bool ZipModeImage::CheckAndProcessChunks(ZipModeImage* tgt_image, ZipModeImage* src_image) { + for (auto& tgt_chunk : *tgt_image) { + if (tgt_chunk.GetType() != CHUNK_DEFLATE) { + continue; + } + + ImageChunk* src_chunk = src_image->FindChunkByName(tgt_chunk.GetEntryName()); + if (src_chunk == nullptr) { + tgt_chunk.ChangeDeflateChunkToNormal(); + } else if (tgt_chunk == *src_chunk) { + // If two deflate chunks are identical (eg, the kernel has not changed between two builds), + // treat them as normal chunks. This makes applypatch much faster -- it can apply a trivial + // patch to the compressed data, rather than uncompressing and recompressing to apply the + // trivial patch to the uncompressed data. + tgt_chunk.ChangeDeflateChunkToNormal(); + src_chunk->ChangeDeflateChunkToNormal(); + } else if (!tgt_chunk.ReconstructDeflateChunk()) { + // We cannot recompress the data and get exactly the same bits as are in the input target + // image. Treat the chunk as a normal non-deflated chunk. + LOG(WARNING) << "Failed to reconstruct target deflate chunk [" << tgt_chunk.GetEntryName() + << "]; treating as normal"; + + tgt_chunk.ChangeDeflateChunkToNormal(); + src_chunk->ChangeDeflateChunkToNormal(); + } + } + + // For zips, we only need merge normal chunks for the target: deflated chunks are matched via + // filename, and normal chunks are patched using the entire source file as the source. + if (tgt_image->limit_ == 0) { + tgt_image->MergeAdjacentNormalChunks(); + tgt_image->DumpChunks(); + } + + return true; +} + +// For each target chunk, look for the corresponding source chunk by the zip_entry name. If +// found, add the range of this chunk in the original source file to the block aligned source +// ranges. Construct the split src & tgt image once the size of source range reaches limit. +bool ZipModeImage::SplitZipModeImageWithLimit(const ZipModeImage& tgt_image, + const ZipModeImage& src_image, + std::vector<ZipModeImage>* split_tgt_images, + std::vector<ZipModeImage>* split_src_images, + std::vector<SortedRangeSet>* split_src_ranges) { + CHECK_EQ(tgt_image.limit_, src_image.limit_); + size_t limit = tgt_image.limit_; + + src_image.DumpChunks(); + LOG(INFO) << "Splitting " << tgt_image.NumOfChunks() << " tgt chunks..."; + + SortedRangeSet used_src_ranges; // ranges used for previous split source images. + + // Reserve the central directory in advance for the last split image. + const auto& central_directory = src_image.cend() - 1; + CHECK_EQ(CHUNK_NORMAL, central_directory->GetType()); + used_src_ranges.Insert(central_directory->GetStartOffset(), + central_directory->DataLengthForPatch()); + + SortedRangeSet src_ranges; + std::vector<ImageChunk> split_src_chunks; + std::vector<ImageChunk> split_tgt_chunks; + for (auto tgt = tgt_image.cbegin(); tgt != tgt_image.cend(); tgt++) { + const ImageChunk* src = src_image.FindChunkByName(tgt->GetEntryName(), true); + if (src == nullptr) { + split_tgt_chunks.emplace_back(CHUNK_NORMAL, tgt->GetStartOffset(), &tgt_image.file_content_, + tgt->GetRawDataLength()); + continue; + } + + size_t src_offset = src->GetStartOffset(); + size_t src_length = src->GetRawDataLength(); + + CHECK(src_length > 0); + CHECK_LE(src_length, limit); + + // Make sure this source range hasn't been used before so that the src_range pieces don't + // overlap with each other. + if (!RemoveUsedBlocks(&src_offset, &src_length, used_src_ranges)) { + split_tgt_chunks.emplace_back(CHUNK_NORMAL, tgt->GetStartOffset(), &tgt_image.file_content_, + tgt->GetRawDataLength()); + } else if (src_ranges.blocks() * BLOCK_SIZE + src_length <= limit) { + src_ranges.Insert(src_offset, src_length); + + // Add the deflate source chunk if it hasn't been aligned. + if (src->GetType() == CHUNK_DEFLATE && src_length == src->GetRawDataLength()) { + split_src_chunks.push_back(*src); + split_tgt_chunks.push_back(*tgt); + } else { + // TODO split smarter to avoid alignment of large deflate chunks + split_tgt_chunks.emplace_back(CHUNK_NORMAL, tgt->GetStartOffset(), &tgt_image.file_content_, + tgt->GetRawDataLength()); + } + } else { + bool added_image = ZipModeImage::AddSplitImageFromChunkList( + tgt_image, src_image, src_ranges, split_tgt_chunks, split_src_chunks, split_tgt_images, + split_src_images); + + split_tgt_chunks.clear(); + split_src_chunks.clear(); + // No need to update the split_src_ranges if we don't update the split source images. + if (added_image) { + used_src_ranges.Insert(src_ranges); + split_src_ranges->push_back(std::move(src_ranges)); + } + src_ranges.Clear(); + + // We don't have enough space for the current chunk; start a new split image and handle + // this chunk there. + tgt--; + } + } + + // TODO Trim it in case the CD exceeds limit too much. + src_ranges.Insert(central_directory->GetStartOffset(), central_directory->DataLengthForPatch()); + bool added_image = ZipModeImage::AddSplitImageFromChunkList(tgt_image, src_image, src_ranges, + split_tgt_chunks, split_src_chunks, + split_tgt_images, split_src_images); + if (added_image) { + split_src_ranges->push_back(std::move(src_ranges)); + } + + ValidateSplitImages(*split_tgt_images, *split_src_images, *split_src_ranges, + tgt_image.file_content_.size()); + + return true; +} + +bool ZipModeImage::AddSplitImageFromChunkList(const ZipModeImage& tgt_image, + const ZipModeImage& src_image, + const SortedRangeSet& split_src_ranges, + const std::vector<ImageChunk>& split_tgt_chunks, + const std::vector<ImageChunk>& split_src_chunks, + std::vector<ZipModeImage>* split_tgt_images, + std::vector<ZipModeImage>* split_src_images) { + CHECK(!split_tgt_chunks.empty()); + + std::vector<ImageChunk> aligned_tgt_chunks; + + // Align the target chunks in the beginning with BLOCK_SIZE. + size_t i = 0; + while (i < split_tgt_chunks.size()) { + size_t tgt_start = split_tgt_chunks[i].GetStartOffset(); + size_t tgt_length = split_tgt_chunks[i].GetRawDataLength(); + + // Current ImageChunk is long enough to align. + if (AlignHead(&tgt_start, &tgt_length)) { + aligned_tgt_chunks.emplace_back(CHUNK_NORMAL, tgt_start, &tgt_image.file_content_, + tgt_length); + break; + } + + i++; + } + + // Nothing left after alignment in the current split tgt chunks; skip adding the split_tgt_image. + if (i == split_tgt_chunks.size()) { return false; } - size_t sz = static_cast<size_t>(st.st_size); - img->resize(sz); - if (!android::base::ReadFully(fd, img->data(), sz)) { - printf("failed to read \"%s\" %s\n", filename, strerror(errno)); + aligned_tgt_chunks.insert(aligned_tgt_chunks.end(), split_tgt_chunks.begin() + i + 1, + split_tgt_chunks.end()); + CHECK(!aligned_tgt_chunks.empty()); + + // Add a normal chunk to align the contents in the end. + size_t end_offset = + aligned_tgt_chunks.back().GetStartOffset() + aligned_tgt_chunks.back().GetRawDataLength(); + if (end_offset % BLOCK_SIZE != 0 && end_offset < tgt_image.file_content_.size()) { + size_t tail_block_length = std::min<size_t>(tgt_image.file_content_.size() - end_offset, + BLOCK_SIZE - (end_offset % BLOCK_SIZE)); + aligned_tgt_chunks.emplace_back(CHUNK_NORMAL, end_offset, &tgt_image.file_content_, + tail_block_length); + } + + ZipModeImage split_tgt_image(false); + split_tgt_image.Initialize(std::move(aligned_tgt_chunks), {}); + split_tgt_image.MergeAdjacentNormalChunks(); + + // Construct the dummy source file based on the src_ranges. + std::vector<uint8_t> src_content; + for (const auto& r : split_src_ranges) { + size_t end = std::min(src_image.file_content_.size(), r.second * BLOCK_SIZE); + src_content.insert(src_content.end(), src_image.file_content_.begin() + r.first * BLOCK_SIZE, + src_image.file_content_.begin() + end); + } + + // We should not have an empty src in our design; otherwise we will encounter an error in + // bsdiff since src_content.data() == nullptr. + CHECK(!src_content.empty()); + + ZipModeImage split_src_image(true); + split_src_image.Initialize(split_src_chunks, std::move(src_content)); + + split_tgt_images->push_back(std::move(split_tgt_image)); + split_src_images->push_back(std::move(split_src_image)); + + return true; +} + +void ZipModeImage::ValidateSplitImages(const std::vector<ZipModeImage>& split_tgt_images, + const std::vector<ZipModeImage>& split_src_images, + std::vector<SortedRangeSet>& split_src_ranges, + size_t total_tgt_size) { + CHECK_EQ(split_tgt_images.size(), split_src_images.size()); + + LOG(INFO) << "Validating " << split_tgt_images.size() << " images"; + + // Verify that the target image pieces is continuous and can add up to the total size. + size_t last_offset = 0; + for (const auto& tgt_image : split_tgt_images) { + CHECK(!tgt_image.chunks_.empty()); + + CHECK_EQ(last_offset, tgt_image.chunks_.front().GetStartOffset()); + CHECK(last_offset % BLOCK_SIZE == 0); + + // Check the target chunks within the split image are continuous. + for (const auto& chunk : tgt_image.chunks_) { + CHECK_EQ(last_offset, chunk.GetStartOffset()); + last_offset += chunk.GetRawDataLength(); + } + } + CHECK_EQ(total_tgt_size, last_offset); + + // Verify that the source ranges are mutually exclusive. + CHECK_EQ(split_src_images.size(), split_src_ranges.size()); + SortedRangeSet used_src_ranges; + for (size_t i = 0; i < split_src_ranges.size(); i++) { + CHECK(!used_src_ranges.Overlaps(split_src_ranges[i])) + << "src range " << split_src_ranges[i].ToString() << " overlaps " + << used_src_ranges.ToString(); + used_src_ranges.Insert(split_src_ranges[i]); + } +} + +bool ZipModeImage::GeneratePatchesInternal(const ZipModeImage& tgt_image, + const ZipModeImage& src_image, + std::vector<PatchChunk>* patch_chunks) { + LOG(INFO) << "Constructing patches for " << tgt_image.NumOfChunks() << " chunks..."; + patch_chunks->clear(); + + bsdiff::SuffixArrayIndexInterface* bsdiff_cache = nullptr; + for (size_t i = 0; i < tgt_image.NumOfChunks(); i++) { + const auto& tgt_chunk = tgt_image[i]; + + if (PatchChunk::RawDataIsSmaller(tgt_chunk, 0)) { + patch_chunks->emplace_back(tgt_chunk); + continue; + } + + const ImageChunk* src_chunk = (tgt_chunk.GetType() != CHUNK_DEFLATE) + ? nullptr + : src_image.FindChunkByName(tgt_chunk.GetEntryName()); + + const auto& src_ref = (src_chunk == nullptr) ? src_image.PseudoSource() : *src_chunk; + bsdiff::SuffixArrayIndexInterface** bsdiff_cache_ptr = + (src_chunk == nullptr) ? &bsdiff_cache : nullptr; + + std::vector<uint8_t> patch_data; + if (!ImageChunk::MakePatch(tgt_chunk, src_ref, &patch_data, bsdiff_cache_ptr)) { + LOG(ERROR) << "Failed to generate patch, name: " << tgt_chunk.GetEntryName(); + return false; + } + + LOG(INFO) << "patch " << i << " is " << patch_data.size() << " bytes (of " + << tgt_chunk.GetRawDataLength() << ")"; + + if (PatchChunk::RawDataIsSmaller(tgt_chunk, patch_data.size())) { + patch_chunks->emplace_back(tgt_chunk); + } else { + patch_chunks->emplace_back(tgt_chunk, src_ref, std::move(patch_data)); + } + } + delete bsdiff_cache; + + CHECK_EQ(patch_chunks->size(), tgt_image.NumOfChunks()); + return true; +} + +bool ZipModeImage::GeneratePatches(const ZipModeImage& tgt_image, const ZipModeImage& src_image, + const std::string& patch_name) { + std::vector<PatchChunk> patch_chunks; + + ZipModeImage::GeneratePatchesInternal(tgt_image, src_image, &patch_chunks); + + CHECK_EQ(tgt_image.NumOfChunks(), patch_chunks.size()); + + android::base::unique_fd patch_fd( + open(patch_name.c_str(), O_CREAT | O_WRONLY | O_TRUNC, S_IRUSR | S_IWUSR)); + if (patch_fd == -1) { + PLOG(ERROR) << "Failed to open " << patch_name; return false; } - size_t pos = 0; + return PatchChunk::WritePatchDataToFd(patch_chunks, patch_fd); +} + +bool ZipModeImage::GeneratePatches(const std::vector<ZipModeImage>& split_tgt_images, + const std::vector<ZipModeImage>& split_src_images, + const std::vector<SortedRangeSet>& split_src_ranges, + const std::string& patch_name, + const std::string& split_info_file, + const std::string& debug_dir) { + LOG(INFO) << "Constructing patches for " << split_tgt_images.size() << " split images..."; + + android::base::unique_fd patch_fd( + open(patch_name.c_str(), O_CREAT | O_WRONLY | O_TRUNC, S_IRUSR | S_IWUSR)); + if (patch_fd == -1) { + PLOG(ERROR) << "Failed to open " << patch_name; + return false; + } + std::vector<std::string> split_info_list; + for (size_t i = 0; i < split_tgt_images.size(); i++) { + std::vector<PatchChunk> patch_chunks; + if (!ZipModeImage::GeneratePatchesInternal(split_tgt_images[i], split_src_images[i], + &patch_chunks)) { + LOG(ERROR) << "Failed to generate split patch"; + return false; + } + + size_t total_patch_size = 12; + for (auto& p : patch_chunks) { + p.UpdateSourceOffset(split_src_ranges[i]); + total_patch_size += p.PatchSize(); + } + + if (!PatchChunk::WritePatchDataToFd(patch_chunks, patch_fd)) { + return false; + } + + size_t split_tgt_size = split_tgt_images[i].chunks_.back().GetStartOffset() + + split_tgt_images[i].chunks_.back().GetRawDataLength() - + split_tgt_images[i].chunks_.front().GetStartOffset(); + std::string split_info = android::base::StringPrintf( + "%zu %zu %s", total_patch_size, split_tgt_size, split_src_ranges[i].ToString().c_str()); + split_info_list.push_back(split_info); + + // Write the split source & patch into the debug directory. + if (!debug_dir.empty()) { + std::string src_name = android::base::StringPrintf("%s/src-%zu", debug_dir.c_str(), i); + android::base::unique_fd fd( + open(src_name.c_str(), O_CREAT | O_WRONLY | O_TRUNC, S_IRUSR | S_IWUSR)); + + if (fd == -1) { + PLOG(ERROR) << "Failed to open " << src_name; + return false; + } + if (!android::base::WriteFully(fd, split_src_images[i].PseudoSource().DataForPatch(), + split_src_images[i].PseudoSource().DataLengthForPatch())) { + PLOG(ERROR) << "Failed to write split source data into " << src_name; + return false; + } + + std::string patch_name = android::base::StringPrintf("%s/patch-%zu", debug_dir.c_str(), i); + fd.reset(open(patch_name.c_str(), O_CREAT | O_WRONLY | O_TRUNC, S_IRUSR | S_IWUSR)); + + if (fd == -1) { + PLOG(ERROR) << "Failed to open " << patch_name; + return false; + } + if (!PatchChunk::WritePatchDataToFd(patch_chunks, fd)) { + return false; + } + } + } + + // Store the split in the following format: + // Line 0: imgdiff version# + // Line 1: number of pieces + // Line 2: patch_size_1 tgt_size_1 src_range_1 + // ... + // Line n+1: patch_size_n tgt_size_n src_range_n + std::string split_info_string = android::base::StringPrintf( + "%zu\n%zu\n", VERSION, split_info_list.size()) + android::base::Join(split_info_list, '\n'); + if (!android::base::WriteStringToFile(split_info_string, split_info_file)) { + PLOG(ERROR) << "Failed to write split info to " << split_info_file; + return false; + } + + return true; +} + +bool ImageModeImage::Initialize(const std::string& filename) { + if (!ReadFile(filename, &file_content_)) { + return false; + } + + size_t sz = file_content_.size(); + size_t pos = 0; while (pos < sz) { // 0x00 no header flags, 0x08 deflate compression, 0x1f8b gzip magic number - if (sz - pos >= 4 && get_unaligned<uint32_t>(img->data() + pos) == 0x00088b1f) { + if (sz - pos >= 4 && get_unaligned<uint32_t>(file_content_.data() + pos) == 0x00088b1f) { // 'pos' is the offset of the start of a gzip chunk. size_t chunk_offset = pos; // The remaining data is too small to be a gzip chunk; treat them as a normal chunk. if (sz - pos < GZIP_HEADER_LEN + GZIP_FOOTER_LEN) { - chunks->emplace_back(CHUNK_NORMAL, pos, img, sz - pos); + chunks_.emplace_back(CHUNK_NORMAL, pos, &file_content_, sz - pos); break; } // We need three chunks for the deflated image in total, one normal chunk for the header, // one deflated chunk for the body, and another normal chunk for the footer. - chunks->emplace_back(CHUNK_NORMAL, pos, img, GZIP_HEADER_LEN); + chunks_.emplace_back(CHUNK_NORMAL, pos, &file_content_, GZIP_HEADER_LEN); pos += GZIP_HEADER_LEN; // We must decompress this chunk in order to discover where it ends, and so we can update @@ -657,13 +1275,13 @@ static bool ReadImage(const char* filename, std::vector<ImageChunk>* chunks, strm.zfree = Z_NULL; strm.opaque = Z_NULL; strm.avail_in = sz - pos; - strm.next_in = img->data() + pos; + strm.next_in = file_content_.data() + pos; // -15 means we are decoding a 'raw' deflate stream; zlib will // not expect zlib headers. int ret = inflateInit2(&strm, -15); if (ret < 0) { - printf("failed to initialize inflate: %d\n", ret); + LOG(ERROR) << "Failed to initialize inflate: " << ret; return false; } @@ -675,8 +1293,8 @@ static bool ReadImage(const char* filename, std::vector<ImageChunk>* chunks, strm.next_out = uncompressed_data.data() + uncompressed_len; ret = inflate(&strm, Z_NO_FLUSH); if (ret < 0) { - printf("Warning: inflate failed [%s] at offset [%zu], treating as a normal chunk\n", - strm.msg, chunk_offset); + LOG(WARNING) << "Inflate failed [" << strm.msg << "] at offset [" << chunk_offset + << "]; treating as a normal chunk"; break; } uncompressed_len = allocated - strm.avail_out; @@ -697,25 +1315,25 @@ static bool ReadImage(const char* filename, std::vector<ImageChunk>* chunks, // matches the size of the data we got when we actually did the decompression. size_t footer_index = pos + raw_data_len + GZIP_FOOTER_LEN - 4; if (sz - footer_index < 4) { - printf("Warning: invalid footer position; treating as a nomal chunk\n"); + LOG(WARNING) << "invalid footer position; treating as a normal chunk"; continue; } - size_t footer_size = get_unaligned<uint32_t>(img->data() + footer_index); + size_t footer_size = get_unaligned<uint32_t>(file_content_.data() + footer_index); if (footer_size != uncompressed_len) { - printf("Warning: footer size %zu != decompressed size %zu; treating as a nomal chunk\n", - footer_size, uncompressed_len); + LOG(WARNING) << "footer size " << footer_size << " != " << uncompressed_len + << "; treating as a normal chunk"; continue; } - ImageChunk body(CHUNK_DEFLATE, pos, img, raw_data_len); + ImageChunk body(CHUNK_DEFLATE, pos, &file_content_, raw_data_len); uncompressed_data.resize(uncompressed_len); body.SetUncompressedData(std::move(uncompressed_data)); - chunks->push_back(body); + chunks_.push_back(std::move(body)); pos += raw_data_len; // create a normal chunk for the footer - chunks->emplace_back(CHUNK_NORMAL, pos, img, GZIP_FOOTER_LEN); + chunks_.emplace_back(CHUNK_NORMAL, pos, &file_content_, GZIP_FOOTER_LEN); pos += GZIP_FOOTER_LEN; } else { @@ -726,12 +1344,12 @@ static bool ReadImage(const char* filename, std::vector<ImageChunk>* chunks, size_t data_len = 0; while (data_len + pos < sz) { if (data_len + pos + 4 <= sz && - get_unaligned<uint32_t>(img->data() + pos + data_len) == 0x00088b1f) { + get_unaligned<uint32_t>(file_content_.data() + pos + data_len) == 0x00088b1f) { break; } data_len++; } - chunks->emplace_back(CHUNK_NORMAL, pos, img, data_len); + chunks_.emplace_back(CHUNK_NORMAL, pos, &file_content_, data_len); pos += data_len; } @@ -740,346 +1358,256 @@ static bool ReadImage(const char* filename, std::vector<ImageChunk>* chunks, return true; } -/* - * Given source and target chunks, compute a bsdiff patch between them. - * Store the result in the patch_data. - * |bsdiff_cache| can be used to cache the suffix array if the same |src| chunk - * is used repeatedly, pass nullptr if not needed. - */ -static bool MakePatch(const ImageChunk* src, ImageChunk* tgt, std::vector<uint8_t>* patch_data, - saidx_t** bsdiff_cache) { - if (tgt->ChangeChunkToRaw(0)) { - size_t patch_size = tgt->DataLengthForPatch(); - patch_data->resize(patch_size); - std::copy(tgt->DataForPatch(), tgt->DataForPatch() + patch_size, patch_data->begin()); - return true; - } - -#if defined(__ANDROID__) - char ptemp[] = "/data/local/tmp/imgdiff-patch-XXXXXX"; -#else - char ptemp[] = "/tmp/imgdiff-patch-XXXXXX"; -#endif - - int fd = mkstemp(ptemp); - if (fd == -1) { - printf("MakePatch failed to create a temporary file: %s\n", strerror(errno)); +bool ImageModeImage::SetBonusData(const std::vector<uint8_t>& bonus_data) { + CHECK(is_source_); + if (chunks_.size() < 2 || !chunks_[1].SetBonusData(bonus_data)) { + LOG(ERROR) << "Failed to set bonus data"; + DumpChunks(); return false; } - close(fd); - int r = bsdiff::bsdiff(src->DataForPatch(), src->DataLengthForPatch(), tgt->DataForPatch(), - tgt->DataLengthForPatch(), ptemp, bsdiff_cache); - if (r != 0) { - printf("bsdiff() failed: %d\n", r); - return false; - } + LOG(INFO) << " using " << bonus_data.size() << " bytes of bonus data"; + return true; +} - android::base::unique_fd patch_fd(open(ptemp, O_RDONLY)); - if (patch_fd == -1) { - printf("failed to open %s: %s\n", ptemp, strerror(errno)); +// In Image Mode, verify that the source and target images have the same chunk structure (ie, the +// same sequence of deflate and normal chunks). +bool ImageModeImage::CheckAndProcessChunks(ImageModeImage* tgt_image, ImageModeImage* src_image) { + // In image mode, merge the gzip header and footer in with any adjacent normal chunks. + tgt_image->MergeAdjacentNormalChunks(); + src_image->MergeAdjacentNormalChunks(); + + if (tgt_image->NumOfChunks() != src_image->NumOfChunks()) { + LOG(ERROR) << "Source and target don't have same number of chunks!"; + tgt_image->DumpChunks(); + src_image->DumpChunks(); return false; } - struct stat st; - if (fstat(patch_fd, &st) != 0) { - printf("failed to stat patch file %s: %s\n", ptemp, strerror(errno)); - return false; + for (size_t i = 0; i < tgt_image->NumOfChunks(); ++i) { + if ((*tgt_image)[i].GetType() != (*src_image)[i].GetType()) { + LOG(ERROR) << "Source and target don't have same chunk structure! (chunk " << i << ")"; + tgt_image->DumpChunks(); + src_image->DumpChunks(); + return false; + } } - size_t sz = static_cast<size_t>(st.st_size); - // Change the chunk type to raw if the patch takes less space that way. - if (tgt->ChangeChunkToRaw(sz)) { - unlink(ptemp); - size_t patch_size = tgt->DataLengthForPatch(); - patch_data->resize(patch_size); - std::copy(tgt->DataForPatch(), tgt->DataForPatch() + patch_size, patch_data->begin()); - return true; + for (size_t i = 0; i < tgt_image->NumOfChunks(); ++i) { + auto& tgt_chunk = (*tgt_image)[i]; + auto& src_chunk = (*src_image)[i]; + if (tgt_chunk.GetType() != CHUNK_DEFLATE) { + continue; + } + + // If two deflate chunks are identical treat them as normal chunks. + if (tgt_chunk == src_chunk) { + tgt_chunk.ChangeDeflateChunkToNormal(); + src_chunk.ChangeDeflateChunkToNormal(); + } else if (!tgt_chunk.ReconstructDeflateChunk()) { + // We cannot recompress the data and get exactly the same bits as are in the input target + // image, fall back to normal + LOG(WARNING) << "Failed to reconstruct target deflate chunk " << i << " [" + << tgt_chunk.GetEntryName() << "]; treating as normal"; + tgt_chunk.ChangeDeflateChunkToNormal(); + src_chunk.ChangeDeflateChunkToNormal(); + } } - patch_data->resize(sz); - if (!android::base::ReadFully(patch_fd, patch_data->data(), sz)) { - printf("failed to read \"%s\" %s\n", ptemp, strerror(errno)); + + // For images, we need to maintain the parallel structure of the chunk lists, so do the merging + // in both the source and target lists. + tgt_image->MergeAdjacentNormalChunks(); + src_image->MergeAdjacentNormalChunks(); + if (tgt_image->NumOfChunks() != src_image->NumOfChunks()) { + // This shouldn't happen. + LOG(ERROR) << "Merging normal chunks went awry"; return false; } - unlink(ptemp); - tgt->SetSourceInfo(*src); - return true; } -/* - * Look for runs of adjacent normal chunks and compress them down into - * a single chunk. (Such runs can be produced when deflate chunks are - * changed to normal chunks.) - */ -static void MergeAdjacentNormalChunks(std::vector<ImageChunk>* chunks) { - size_t merged_last = 0, cur = 0; - while (cur < chunks->size()) { - // Look for normal chunks adjacent to the current one. If such chunk exists, extend the - // length of the current normal chunk. - size_t to_check = cur + 1; - while (to_check < chunks->size() && chunks->at(cur).IsAdjacentNormal(chunks->at(to_check))) { - chunks->at(cur).MergeAdjacentNormal(chunks->at(to_check)); - to_check++; +// In image mode, generate patches against the given source chunks and bonus_data; write the +// result to |patch_name|. +bool ImageModeImage::GeneratePatches(const ImageModeImage& tgt_image, + const ImageModeImage& src_image, + const std::string& patch_name) { + LOG(INFO) << "Constructing patches for " << tgt_image.NumOfChunks() << " chunks..."; + std::vector<PatchChunk> patch_chunks; + patch_chunks.reserve(tgt_image.NumOfChunks()); + + for (size_t i = 0; i < tgt_image.NumOfChunks(); i++) { + const auto& tgt_chunk = tgt_image[i]; + const auto& src_chunk = src_image[i]; + + if (PatchChunk::RawDataIsSmaller(tgt_chunk, 0)) { + patch_chunks.emplace_back(tgt_chunk); + continue; } - if (merged_last != cur) { - chunks->at(merged_last) = std::move(chunks->at(cur)); + std::vector<uint8_t> patch_data; + if (!ImageChunk::MakePatch(tgt_chunk, src_chunk, &patch_data, nullptr)) { + LOG(ERROR) << "Failed to generate patch for target chunk " << i; + return false; } - merged_last++; - cur = to_check; - } - if (merged_last < chunks->size()) { - chunks->erase(chunks->begin() + merged_last, chunks->end()); - } -} + LOG(INFO) << "patch " << i << " is " << patch_data.size() << " bytes (of " + << tgt_chunk.GetRawDataLength() << ")"; -static ImageChunk* FindChunkByName(const std::string& name, std::vector<ImageChunk>& chunks) { - for (size_t i = 0; i < chunks.size(); ++i) { - if (chunks[i].GetType() == CHUNK_DEFLATE && chunks[i].GetEntryName() == name) { - return &chunks[i]; + if (PatchChunk::RawDataIsSmaller(tgt_chunk, patch_data.size())) { + patch_chunks.emplace_back(tgt_chunk); + } else { + patch_chunks.emplace_back(tgt_chunk, src_chunk, std::move(patch_data)); } } - return nullptr; -} -static void DumpChunks(const std::vector<ImageChunk>& chunks) { - for (size_t i = 0; i < chunks.size(); ++i) { - printf("chunk %zu: ", i); - chunks[i].Dump(); + CHECK_EQ(tgt_image.NumOfChunks(), patch_chunks.size()); + + android::base::unique_fd patch_fd( + open(patch_name.c_str(), O_CREAT | O_WRONLY | O_TRUNC, S_IRUSR | S_IWUSR)); + if (patch_fd == -1) { + PLOG(ERROR) << "Failed to open " << patch_name; + return false; } + + return PatchChunk::WritePatchDataToFd(patch_chunks, patch_fd); } int imgdiff(int argc, const char** argv) { + bool verbose = false; bool zip_mode = false; - - if (argc >= 2 && strcmp(argv[1], "-z") == 0) { - zip_mode = true; - --argc; - ++argv; - } - std::vector<uint8_t> bonus_data; - if (argc >= 3 && strcmp(argv[1], "-b") == 0) { - android::base::unique_fd fd(open(argv[2], O_RDONLY)); - if (fd == -1) { - printf("failed to open bonus file %s: %s\n", argv[2], strerror(errno)); - return 1; - } - struct stat st; - if (fstat(fd, &st) != 0) { - printf("failed to stat bonus file %s: %s\n", argv[2], strerror(errno)); - return 1; - } + size_t blocks_limit = 0; + std::string split_info_file; + std::string debug_dir; + + int opt; + int option_index; + optind = 0; // Reset the getopt state so that we can call it multiple times for test. + + while ((opt = getopt_long(argc, const_cast<char**>(argv), "zb:v", OPTIONS, &option_index)) != + -1) { + switch (opt) { + case 'z': + zip_mode = true; + break; + case 'b': { + android::base::unique_fd fd(open(optarg, O_RDONLY)); + if (fd == -1) { + PLOG(ERROR) << "Failed to open bonus file " << optarg; + return 1; + } + struct stat st; + if (fstat(fd, &st) != 0) { + PLOG(ERROR) << "Failed to stat bonus file " << optarg; + return 1; + } - size_t bonus_size = st.st_size; - bonus_data.resize(bonus_size); - if (!android::base::ReadFully(fd, bonus_data.data(), bonus_size)) { - printf("failed to read bonus file %s: %s\n", argv[2], strerror(errno)); - return 1; + size_t bonus_size = st.st_size; + bonus_data.resize(bonus_size); + if (!android::base::ReadFully(fd, bonus_data.data(), bonus_size)) { + PLOG(ERROR) << "Failed to read bonus file " << optarg; + return 1; + } + break; + } + case 'v': + verbose = true; + break; + case 0: { + std::string name = OPTIONS[option_index].name; + if (name == "block-limit" && !android::base::ParseUint(optarg, &blocks_limit)) { + LOG(ERROR) << "Failed to parse size blocks_limit: " << optarg; + return 1; + } else if (name == "split-info") { + split_info_file = optarg; + } else if (name == "debug-dir") { + debug_dir = optarg; + } + break; + } + default: + LOG(ERROR) << "unexpected opt: " << static_cast<char>(opt); + return 2; } + } - argc -= 2; - argv += 2; + if (!verbose) { + android::base::SetMinimumLogSeverity(android::base::WARNING); } - if (argc != 4) { - printf("usage: %s [-z] [-b <bonus-file>] <src-img> <tgt-img> <patch-file>\n", - argv[0]); + if (argc - optind != 3) { + LOG(ERROR) << "usage: " << argv[0] << " [options] <src-img> <tgt-img> <patch-file>"; + LOG(ERROR) + << " -z <zip-mode>, Generate patches in zip mode, src and tgt should be zip files.\n" + " -b <bonus-file>, Bonus file in addition to src, image mode only.\n" + " --block-limit, For large zips, split the src and tgt based on the block limit;\n" + " and generate patches between each pair of pieces. Concatenate " + "these\n" + " patches together and output them into <patch-file>.\n" + " --split-info, Output the split information (patch_size, tgt_size, src_ranges);\n" + " zip mode with block-limit only.\n" + " --debug-dir, Debug directory to put the split srcs and patches, zip mode only.\n" + " -v, --verbose, Enable verbose logging."; return 2; } - std::vector<ImageChunk> src_chunks; - std::vector<ImageChunk> tgt_chunks; - std::vector<uint8_t> src_file; - std::vector<uint8_t> tgt_file; - if (zip_mode) { - if (!ReadZip(argv[1], &src_chunks, &src_file, true)) { - printf("failed to break apart source zip file\n"); - return 1; - } - if (!ReadZip(argv[2], &tgt_chunks, &tgt_file, false)) { - printf("failed to break apart target zip file\n"); - return 1; - } - } else { - if (!ReadImage(argv[1], &src_chunks, &src_file)) { - printf("failed to break apart source image\n"); + ZipModeImage src_image(true, blocks_limit * BLOCK_SIZE); + ZipModeImage tgt_image(false, blocks_limit * BLOCK_SIZE); + + if (!src_image.Initialize(argv[optind])) { return 1; } - if (!ReadImage(argv[2], &tgt_chunks, &tgt_file)) { - printf("failed to break apart target image\n"); + if (!tgt_image.Initialize(argv[optind + 1])) { return 1; } - // Verify that the source and target images have the same chunk - // structure (ie, the same sequence of deflate and normal chunks). - - // Merge the gzip header and footer in with any adjacent normal chunks. - MergeAdjacentNormalChunks(&tgt_chunks); - MergeAdjacentNormalChunks(&src_chunks); - - if (src_chunks.size() != tgt_chunks.size()) { - printf("source and target don't have same number of chunks!\n"); - printf("source chunks:\n"); - DumpChunks(src_chunks); - printf("target chunks:\n"); - DumpChunks(tgt_chunks); + if (!ZipModeImage::CheckAndProcessChunks(&tgt_image, &src_image)) { return 1; } - for (size_t i = 0; i < src_chunks.size(); ++i) { - if (src_chunks[i].GetType() != tgt_chunks[i].GetType()) { - printf("source and target don't have same chunk structure! (chunk %zu)\n", i); - printf("source chunks:\n"); - DumpChunks(src_chunks); - printf("target chunks:\n"); - DumpChunks(tgt_chunks); + + // Compute bsdiff patches for each chunk's data (the uncompressed data, in the case of + // deflate chunks). + if (blocks_limit > 0) { + if (split_info_file.empty()) { + LOG(ERROR) << "split-info path cannot be empty when generating patches with a block-limit"; return 1; } - } - } - for (size_t i = 0; i < tgt_chunks.size(); ++i) { - if (tgt_chunks[i].GetType() == CHUNK_DEFLATE) { - // Confirm that given the uncompressed chunk data in the target, we - // can recompress it and get exactly the same bits as are in the - // input target image. If this fails, treat the chunk as a normal - // non-deflated chunk. - if (!tgt_chunks[i].ReconstructDeflateChunk()) { - printf("failed to reconstruct target deflate chunk %zu [%s]; treating as normal\n", i, - tgt_chunks[i].GetEntryName().c_str()); - tgt_chunks[i].ChangeDeflateChunkToNormal(); - if (zip_mode) { - ImageChunk* src = FindChunkByName(tgt_chunks[i].GetEntryName(), src_chunks); - if (src != nullptr) { - src->ChangeDeflateChunkToNormal(); - } - } else { - src_chunks[i].ChangeDeflateChunkToNormal(); - } - continue; - } + std::vector<ZipModeImage> split_tgt_images; + std::vector<ZipModeImage> split_src_images; + std::vector<SortedRangeSet> split_src_ranges; + ZipModeImage::SplitZipModeImageWithLimit(tgt_image, src_image, &split_tgt_images, + &split_src_images, &split_src_ranges); - // If two deflate chunks are identical (eg, the kernel has not - // changed between two builds), treat them as normal chunks. - // This makes applypatch much faster -- it can apply a trivial - // patch to the compressed data, rather than uncompressing and - // recompressing to apply the trivial patch to the uncompressed - // data. - ImageChunk* src; - if (zip_mode) { - src = FindChunkByName(tgt_chunks[i].GetEntryName(), src_chunks); - } else { - src = &src_chunks[i]; + if (!ZipModeImage::GeneratePatches(split_tgt_images, split_src_images, split_src_ranges, + argv[optind + 2], split_info_file, debug_dir)) { + return 1; } - if (src == nullptr) { - tgt_chunks[i].ChangeDeflateChunkToNormal(); - } else if (tgt_chunks[i] == *src) { - tgt_chunks[i].ChangeDeflateChunkToNormal(); - src->ChangeDeflateChunkToNormal(); - } + } else if (!ZipModeImage::GeneratePatches(tgt_image, src_image, argv[optind + 2])) { + return 1; } - } - - // Merging neighboring normal chunks. - if (zip_mode) { - // For zips, we only need to do this to the target: deflated - // chunks are matched via filename, and normal chunks are patched - // using the entire source file as the source. - MergeAdjacentNormalChunks(&tgt_chunks); - } else { - // For images, we need to maintain the parallel structure of the - // chunk lists, so do the merging in both the source and target - // lists. - MergeAdjacentNormalChunks(&tgt_chunks); - MergeAdjacentNormalChunks(&src_chunks); - if (src_chunks.size() != tgt_chunks.size()) { - // This shouldn't happen. - printf("merging normal chunks went awry\n"); + ImageModeImage src_image(true); + ImageModeImage tgt_image(false); + + if (!src_image.Initialize(argv[optind])) { return 1; } - } - - // Compute bsdiff patches for each chunk's data (the uncompressed - // data, in the case of deflate chunks). - - DumpChunks(src_chunks); - - printf("Construct patches for %zu chunks...\n", tgt_chunks.size()); - std::vector<std::vector<uint8_t>> patch_data(tgt_chunks.size()); - saidx_t* bsdiff_cache = nullptr; - for (size_t i = 0; i < tgt_chunks.size(); ++i) { - if (zip_mode) { - ImageChunk* src; - if (tgt_chunks[i].GetType() == CHUNK_DEFLATE && - (src = FindChunkByName(tgt_chunks[i].GetEntryName(), src_chunks))) { - if (!MakePatch(src, &tgt_chunks[i], &patch_data[i], nullptr)) { - printf("Failed to generate patch for target chunk %zu: ", i); - return 1; - } - } else { - if (!MakePatch(&src_chunks[0], &tgt_chunks[i], &patch_data[i], &bsdiff_cache)) { - printf("Failed to generate patch for target chunk %zu: ", i); - return 1; - } - } - } else { - if (i == 1 && !bonus_data.empty()) { - printf(" using %zu bytes of bonus data for chunk %zu\n", bonus_data.size(), i); - src_chunks[i].SetBonusData(bonus_data); - } - - if (!MakePatch(&src_chunks[i], &tgt_chunks[i], &patch_data[i], nullptr)) { - printf("Failed to generate patch for target chunk %zu: ", i); - return 1; - } + if (!tgt_image.Initialize(argv[optind + 1])) { + return 1; } - printf("patch %3zu is %zu bytes (of %zu)\n", i, patch_data[i].size(), - src_chunks[i].GetRawDataLength()); - } - - if (bsdiff_cache != nullptr) { - free(bsdiff_cache); - } - - // Figure out how big the imgdiff file header is going to be, so - // that we can correctly compute the offset of each bsdiff patch - // within the file. - size_t total_header_size = 12; - for (size_t i = 0; i < tgt_chunks.size(); ++i) { - total_header_size += tgt_chunks[i].GetHeaderSize(patch_data[i].size()); - } - - size_t offset = total_header_size; - - android::base::unique_fd patch_fd(open(argv[3], O_CREAT | O_WRONLY | O_TRUNC, S_IRUSR | S_IWUSR)); - if (patch_fd == -1) { - printf("failed to open \"%s\": %s\n", argv[3], strerror(errno)); - return 1; - } + if (!ImageModeImage::CheckAndProcessChunks(&tgt_image, &src_image)) { + return 1; + } - // Write out the headers. - if (!android::base::WriteStringToFd("IMGDIFF2", patch_fd)) { - printf("failed to write \"IMGDIFF2\" to \"%s\": %s\n", argv[3], strerror(errno)); - return 1; - } - Write4(patch_fd, static_cast<int32_t>(tgt_chunks.size())); - for (size_t i = 0; i < tgt_chunks.size(); ++i) { - printf("chunk %zu: ", i); - offset = tgt_chunks[i].WriteHeaderToFd(patch_fd, patch_data[i], offset); - } + if (!bonus_data.empty() && !src_image.SetBonusData(bonus_data)) { + return 1; + } - // Append each chunk's bsdiff patch, in order. - for (size_t i = 0; i < tgt_chunks.size(); ++i) { - if (tgt_chunks[i].GetType() != CHUNK_RAW) { - if (!android::base::WriteFully(patch_fd, patch_data[i].data(), patch_data[i].size())) { - CHECK(false) << "failed to write " << patch_data[i].size() << " bytes patch for chunk " - << i; - } + if (!ImageModeImage::GeneratePatches(tgt_image, src_image, argv[optind + 2])) { + return 1; } } diff --git a/applypatch/imgpatch.cpp b/applypatch/imgpatch.cpp index df75f98d4..3682d6115 100644 --- a/applypatch/imgpatch.cpp +++ b/applypatch/imgpatch.cpp @@ -37,6 +37,8 @@ #include <openssl/sha.h> #include <zlib.h> +#include "edify/expr.h" + static inline int64_t Read8(const void *address) { return android::base::get_unaligned<int64_t>(address); } @@ -48,7 +50,7 @@ static inline int32_t Read4(const void *address) { // This function is a wrapper of ApplyBSDiffPatch(). It has a custom sink function to deflate the // patched data and stream the deflated data to output. static bool ApplyBSDiffPatchAndStreamOutput(const uint8_t* src_data, size_t src_len, - const Value* patch, size_t patch_offset, + const Value& patch, size_t patch_offset, const char* deflate_header, SinkFn sink, SHA_CTX* ctx) { size_t expected_target_length = static_cast<size_t>(Read8(deflate_header + 32)); int level = Read4(deflate_header + 40); @@ -57,13 +59,13 @@ static bool ApplyBSDiffPatchAndStreamOutput(const uint8_t* src_data, size_t src_ int mem_level = Read4(deflate_header + 52); int strategy = Read4(deflate_header + 56); - std::unique_ptr<z_stream, decltype(&deflateEnd)> strm(new z_stream(), deflateEnd); - strm->zalloc = Z_NULL; - strm->zfree = Z_NULL; - strm->opaque = Z_NULL; - strm->avail_in = 0; - strm->next_in = nullptr; - int ret = deflateInit2(strm.get(), level, method, window_bits, mem_level, strategy); + z_stream strm; + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + strm.avail_in = 0; + strm.next_in = nullptr; + int ret = deflateInit2(&strm, level, method, window_bits, mem_level, strategy); if (ret != Z_OK) { LOG(ERROR) << "Failed to init uncompressed data deflation: " << ret; return false; @@ -74,18 +76,19 @@ static bool ApplyBSDiffPatchAndStreamOutput(const uint8_t* src_data, size_t src_ size_t actual_target_length = 0; size_t total_written = 0; static constexpr size_t buffer_size = 32768; - auto compression_sink = [&](const uint8_t* data, size_t len) -> size_t { + auto compression_sink = [&strm, &actual_target_length, &expected_target_length, &total_written, + &ret, &ctx, &sink](const uint8_t* data, size_t len) -> size_t { // The input patch length for an update never exceeds INT_MAX. - strm->avail_in = len; - strm->next_in = data; + strm.avail_in = len; + strm.next_in = data; do { std::vector<uint8_t> buffer(buffer_size); - strm->avail_out = buffer_size; - strm->next_out = buffer.data(); + strm.avail_out = buffer_size; + strm.next_out = buffer.data(); if (actual_target_length + len < expected_target_length) { - ret = deflate(strm.get(), Z_NO_FLUSH); + ret = deflate(&strm, Z_NO_FLUSH); } else { - ret = deflate(strm.get(), Z_FINISH); + ret = deflate(&strm, Z_FINISH); } if (ret != Z_OK && ret != Z_STREAM_END) { LOG(ERROR) << "Failed to deflate stream: " << ret; @@ -93,20 +96,24 @@ static bool ApplyBSDiffPatchAndStreamOutput(const uint8_t* src_data, size_t src_ return 0; } - size_t have = buffer_size - strm->avail_out; + size_t have = buffer_size - strm.avail_out; total_written += have; if (sink(buffer.data(), have) != have) { LOG(ERROR) << "Failed to write " << have << " compressed bytes to output."; return 0; } if (ctx) SHA1_Update(ctx, buffer.data(), have); - } while ((strm->avail_in != 0 || strm->avail_out == 0) && ret != Z_STREAM_END); + } while ((strm.avail_in != 0 || strm.avail_out == 0) && ret != Z_STREAM_END); actual_target_length += len; return len; }; - if (ApplyBSDiffPatch(src_data, src_len, patch, patch_offset, compression_sink, nullptr) != 0) { + int bspatch_result = + ApplyBSDiffPatch(src_data, src_len, patch, patch_offset, compression_sink, nullptr); + deflateEnd(&strm); + + if (bspatch_result != 0) { return false; } @@ -128,48 +135,39 @@ static bool ApplyBSDiffPatchAndStreamOutput(const uint8_t* src_data, size_t src_ int ApplyImagePatch(const unsigned char* old_data, size_t old_size, const unsigned char* patch_data, size_t patch_size, SinkFn sink) { Value patch(VAL_BLOB, std::string(reinterpret_cast<const char*>(patch_data), patch_size)); - - return ApplyImagePatch(old_data, old_size, &patch, sink, nullptr, nullptr); + return ApplyImagePatch(old_data, old_size, patch, sink, nullptr, nullptr); } -/* - * Apply the patch given in 'patch_filename' to the source data given - * by (old_data, old_size). Write the patched output to the 'output' - * file, and update the SHA context with the output data as well. - * Return 0 on success. - */ -int ApplyImagePatch(const unsigned char* old_data, size_t old_size, const Value* patch, SinkFn sink, +int ApplyImagePatch(const unsigned char* old_data, size_t old_size, const Value& patch, SinkFn sink, SHA_CTX* ctx, const Value* bonus_data) { - if (patch->data.size() < 12) { + if (patch.data.size() < 12) { printf("patch too short to contain header\n"); return -1; } - // IMGDIFF2 uses CHUNK_NORMAL, CHUNK_DEFLATE, and CHUNK_RAW. - // (IMGDIFF1, which is no longer supported, used CHUNK_NORMAL and - // CHUNK_GZIP.) - size_t pos = 12; - const char* header = &patch->data[0]; - if (memcmp(header, "IMGDIFF2", 8) != 0) { + // IMGDIFF2 uses CHUNK_NORMAL, CHUNK_DEFLATE, and CHUNK_RAW. (IMGDIFF1, which is no longer + // supported, used CHUNK_NORMAL and CHUNK_GZIP.) + const char* const patch_header = patch.data.data(); + if (memcmp(patch_header, "IMGDIFF2", 8) != 0) { printf("corrupt patch file header (magic number)\n"); return -1; } - int num_chunks = Read4(header + 8); - + int num_chunks = Read4(patch_header + 8); + size_t pos = 12; for (int i = 0; i < num_chunks; ++i) { // each chunk's header record starts with 4 bytes. - if (pos + 4 > patch->data.size()) { + if (pos + 4 > patch.data.size()) { printf("failed to read chunk %d record\n", i); return -1; } - int type = Read4(&patch->data[pos]); + int type = Read4(patch_header + pos); pos += 4; if (type == CHUNK_NORMAL) { - const char* normal_header = &patch->data[pos]; + const char* normal_header = patch_header + pos; pos += 24; - if (pos > patch->data.size()) { + if (pos > patch.data.size()) { printf("failed to read chunk %d normal header data\n", i); return -1; } @@ -187,30 +185,32 @@ int ApplyImagePatch(const unsigned char* old_data, size_t old_size, const Value* return -1; } } else if (type == CHUNK_RAW) { - const char* raw_header = &patch->data[pos]; + const char* raw_header = patch_header + pos; pos += 4; - if (pos > patch->data.size()) { + if (pos > patch.data.size()) { printf("failed to read chunk %d raw header data\n", i); return -1; } size_t data_len = static_cast<size_t>(Read4(raw_header)); - if (pos + data_len > patch->data.size()) { + if (pos + data_len > patch.data.size()) { printf("failed to read chunk %d raw data\n", i); return -1; } - if (ctx) SHA1_Update(ctx, &patch->data[pos], data_len); - if (sink(reinterpret_cast<const unsigned char*>(&patch->data[pos]), data_len) != data_len) { + if (ctx) { + SHA1_Update(ctx, patch_header + pos, data_len); + } + if (sink(reinterpret_cast<const unsigned char*>(patch_header + pos), data_len) != data_len) { printf("failed to write chunk %d raw data\n", i); return -1; } pos += data_len; } else if (type == CHUNK_DEFLATE) { // deflate chunks have an additional 60 bytes in their chunk header. - const char* deflate_header = &patch->data[pos]; + const char* deflate_header = patch_header + pos; pos += 60; - if (pos > patch->data.size()) { + if (pos > patch.data.size()) { printf("failed to read chunk %d deflate header data\n", i); return -1; } diff --git a/applypatch/include/applypatch/applypatch.h b/applypatch/include/applypatch/applypatch.h index 581360ef1..912ead1fa 100644 --- a/applypatch/include/applypatch/applypatch.h +++ b/applypatch/include/applypatch/applypatch.h @@ -18,7 +18,6 @@ #define _APPLYPATCH_H #include <stdint.h> -#include <sys/stat.h> #include <functional> #include <memory> @@ -27,24 +26,18 @@ #include <openssl/sha.h> -#include "edify/expr.h" +// Forward declaration to avoid including "edify/expr.h" in the header. +struct Value; struct FileContents { uint8_t sha1[SHA_DIGEST_LENGTH]; std::vector<unsigned char> data; - struct stat st; }; -// When there isn't enough room on the target filesystem to hold the -// patched version of the file, we copy the original here and delete -// it to free up space. If the expected source file doesn't exist, or -// is corrupted, we look to see if this file contains the bits we want -// and use it as the source instead. -#define CACHE_TEMP_SOURCE "/cache/saved.file" - using SinkFn = std::function<size_t(const unsigned char*, size_t)>; // applypatch.cpp + int ShowLicenses(); size_t FreeSpaceForFile(const char* filename); int CacheSizeCheck(size_t bytes); @@ -66,15 +59,25 @@ int LoadFileContents(const char* filename, FileContents* file); int SaveFileContents(const char* filename, const FileContents* file); // bspatch.cpp + void ShowBSDiffLicense(); -int ApplyBSDiffPatch(const unsigned char* old_data, size_t old_size, const Value* patch, + +// Applies the bsdiff-patch given in 'patch' (from offset 'patch_offset' to the end) to the source +// data given by (old_data, old_size). Writes the patched output through the given 'sink', and +// updates the SHA-1 context with the output data. Returns 0 on success. +int ApplyBSDiffPatch(const unsigned char* old_data, size_t old_size, const Value& patch, size_t patch_offset, SinkFn sink, SHA_CTX* ctx); // imgpatch.cpp -int ApplyImagePatch(const unsigned char* old_data, size_t old_size, const Value* patch, SinkFn sink, + +// Applies the imgdiff-patch given in 'patch' to the source data given by (old_data, old_size), with +// the optional bonus data. Writes the patched output through the given 'sink', and updates the +// SHA-1 context with the output data. Returns 0 on success. +int ApplyImagePatch(const unsigned char* old_data, size_t old_size, const Value& patch, SinkFn sink, SHA_CTX* ctx, const Value* bonus_data); // freecache.cpp + int MakeFreeSpaceOnCache(size_t bytes_needed); #endif diff --git a/applypatch/include/applypatch/imgdiff_image.h b/applypatch/include/applypatch/imgdiff_image.h new file mode 100644 index 000000000..084807237 --- /dev/null +++ b/applypatch/include/applypatch/imgdiff_image.h @@ -0,0 +1,306 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _APPLYPATCH_IMGDIFF_IMAGE_H +#define _APPLYPATCH_IMGDIFF_IMAGE_H + +#include <stddef.h> +#include <stdio.h> +#include <sys/types.h> + +#include <string> +#include <vector> + +#include <bsdiff/bsdiff.h> +#include <ziparchive/zip_archive.h> +#include <zlib.h> + +#include "imgdiff.h" +#include "otautil/rangeset.h" + +class ImageChunk { + public: + static constexpr auto WINDOWBITS = -15; // 32kb window; negative to indicate a raw stream. + static constexpr auto MEMLEVEL = 8; // the default value. + static constexpr auto METHOD = Z_DEFLATED; + static constexpr auto STRATEGY = Z_DEFAULT_STRATEGY; + + ImageChunk(int type, size_t start, const std::vector<uint8_t>* file_content, size_t raw_data_len, + std::string entry_name = {}); + + int GetType() const { + return type_; + } + size_t GetRawDataLength() const { + return raw_data_len_; + } + const std::string& GetEntryName() const { + return entry_name_; + } + size_t GetStartOffset() const { + return start_; + } + int GetCompressLevel() const { + return compress_level_; + } + + // CHUNK_DEFLATE will return the uncompressed data for diff, while other types will simply return + // the raw data. + const uint8_t* DataForPatch() const; + size_t DataLengthForPatch() const; + + void Dump(size_t index) const; + + void SetUncompressedData(std::vector<uint8_t> data); + bool SetBonusData(const std::vector<uint8_t>& bonus_data); + + bool operator==(const ImageChunk& other) const; + bool operator!=(const ImageChunk& other) const { + return !(*this == other); + } + + /* + * Cause a gzip chunk to be treated as a normal chunk (ie, as a blob of uninterpreted data). + * The resulting patch will likely be about as big as the target file, but it lets us handle + * the case of images where some gzip chunks are reconstructible but others aren't (by treating + * the ones that aren't as normal chunks). + */ + void ChangeDeflateChunkToNormal(); + + /* + * Verify that we can reproduce exactly the same compressed data that we started with. Sets the + * level, method, windowBits, memLevel, and strategy fields in the chunk to the encoding + * parameters needed to produce the right output. + */ + bool ReconstructDeflateChunk(); + bool IsAdjacentNormal(const ImageChunk& other) const; + void MergeAdjacentNormal(const ImageChunk& other); + + /* + * Compute a bsdiff patch between |src| and |tgt|; Store the result in the patch_data. + * |bsdiff_cache| can be used to cache the suffix array if the same |src| chunk is used + * repeatedly, pass nullptr if not needed. + */ + static bool MakePatch(const ImageChunk& tgt, const ImageChunk& src, + std::vector<uint8_t>* patch_data, + bsdiff::SuffixArrayIndexInterface** bsdiff_cache); + + private: + const uint8_t* GetRawData() const; + bool TryReconstruction(int level); + + int type_; // CHUNK_NORMAL, CHUNK_DEFLATE, CHUNK_RAW + size_t start_; // offset of chunk in the original input file + const std::vector<uint8_t>* input_file_ptr_; // ptr to the full content of original input file + size_t raw_data_len_; + + // deflate encoder parameters + int compress_level_; + + // --- for CHUNK_DEFLATE chunks only: --- + std::vector<uint8_t> uncompressed_data_; + std::string entry_name_; // used for zip entries +}; + +// PatchChunk stores the patch data between a source chunk and a target chunk. It also keeps track +// of the metadata of src&tgt chunks (e.g. offset, raw data length, uncompressed data length). +class PatchChunk { + public: + PatchChunk(const ImageChunk& tgt, const ImageChunk& src, std::vector<uint8_t> data); + + // Construct a CHUNK_RAW patch from the target data directly. + explicit PatchChunk(const ImageChunk& tgt); + + // Return true if raw data size is smaller than the patch size. + static bool RawDataIsSmaller(const ImageChunk& tgt, size_t patch_size); + + // Update the source start with the new offset within the source range. + void UpdateSourceOffset(const SortedRangeSet& src_range); + + // Return the total size (header + data) of the patch. + size_t PatchSize() const; + + static bool WritePatchDataToFd(const std::vector<PatchChunk>& patch_chunks, int patch_fd); + + private: + size_t GetHeaderSize() const; + size_t WriteHeaderToFd(int fd, size_t offset, size_t index) const; + + // The patch chunk type is the same as the target chunk type. The only exception is we change + // the |type_| to CHUNK_RAW if target length is smaller than the patch size. + int type_; + + size_t source_start_; + size_t source_len_; + size_t source_uncompressed_len_; + + size_t target_start_; // offset of the target chunk within the target file + size_t target_len_; + size_t target_uncompressed_len_; + size_t target_compress_level_; // the deflate compression level of the target chunk. + + std::vector<uint8_t> data_; // storage for the patch data +}; + +// Interface for zip_mode and image_mode images. We initialize the image from an input file and +// split the file content into a list of image chunks. +class Image { + public: + explicit Image(bool is_source) : is_source_(is_source) {} + + virtual ~Image() {} + + // Create a list of image chunks from input file. + virtual bool Initialize(const std::string& filename) = 0; + + // Look for runs of adjacent normal chunks and compress them down into a single chunk. (Such + // runs can be produced when deflate chunks are changed to normal chunks.) + void MergeAdjacentNormalChunks(); + + void DumpChunks() const; + + // Non const iterators to access the stored ImageChunks. + std::vector<ImageChunk>::iterator begin() { + return chunks_.begin(); + } + + std::vector<ImageChunk>::iterator end() { + return chunks_.end(); + } + + std::vector<ImageChunk>::const_iterator cbegin() const { + return chunks_.cbegin(); + } + + std::vector<ImageChunk>::const_iterator cend() const { + return chunks_.cend(); + } + + ImageChunk& operator[](size_t i); + const ImageChunk& operator[](size_t i) const; + + size_t NumOfChunks() const { + return chunks_.size(); + } + + protected: + bool ReadFile(const std::string& filename, std::vector<uint8_t>* file_content); + + bool is_source_; // True if it's for source chunks. + std::vector<ImageChunk> chunks_; // Internal storage of ImageChunk. + std::vector<uint8_t> file_content_; // Store the whole input file in memory. +}; + +class ZipModeImage : public Image { + public: + explicit ZipModeImage(bool is_source, size_t limit = 0) : Image(is_source), limit_(limit) {} + + bool Initialize(const std::string& filename) override; + + // Initialize a dummy ZipModeImage from an existing ImageChunk vector. For src img pieces, we + // reconstruct a new file_content based on the source ranges; but it's not needed for the tgt img + // pieces; because for each chunk both the data and their offset within the file are unchanged. + void Initialize(const std::vector<ImageChunk>& chunks, const std::vector<uint8_t>& file_content) { + chunks_ = chunks; + file_content_ = file_content; + } + + // The pesudo source chunk for bsdiff if there's no match for the given target chunk. It's in + // fact the whole source file. + ImageChunk PseudoSource() const; + + // Find the matching deflate source chunk by entry name. Search for normal chunks also if + // |find_normal| is true. + ImageChunk* FindChunkByName(const std::string& name, bool find_normal = false); + + const ImageChunk* FindChunkByName(const std::string& name, bool find_normal = false) const; + + // Verify that we can reconstruct the deflate chunks; also change the type to CHUNK_NORMAL if + // src and tgt are identical. + static bool CheckAndProcessChunks(ZipModeImage* tgt_image, ZipModeImage* src_image); + + // Compute the patch between tgt & src images, and write the data into |patch_name|. + static bool GeneratePatches(const ZipModeImage& tgt_image, const ZipModeImage& src_image, + const std::string& patch_name); + + // Compute the patch based on the lists of split src and tgt images. Generate patches for each + // pair of split pieces and write the data to |patch_name|. If |debug_dir| is specified, write + // each split src data and patch data into that directory. + static bool GeneratePatches(const std::vector<ZipModeImage>& split_tgt_images, + const std::vector<ZipModeImage>& split_src_images, + const std::vector<SortedRangeSet>& split_src_ranges, + const std::string& patch_name, const std::string& split_info_file, + const std::string& debug_dir); + + // Split the tgt chunks and src chunks based on the size limit. + static bool SplitZipModeImageWithLimit(const ZipModeImage& tgt_image, + const ZipModeImage& src_image, + std::vector<ZipModeImage>* split_tgt_images, + std::vector<ZipModeImage>* split_src_images, + std::vector<SortedRangeSet>* split_src_ranges); + + private: + // Initialize image chunks based on the zip entries. + bool InitializeChunks(const std::string& filename, ZipArchiveHandle handle); + // Add the a zip entry to the list. + bool AddZipEntryToChunks(ZipArchiveHandle handle, const std::string& entry_name, ZipEntry* entry); + // Return the real size of the zip file. (omit the trailing zeros that used for alignment) + bool GetZipFileSize(size_t* input_file_size); + + static void ValidateSplitImages(const std::vector<ZipModeImage>& split_tgt_images, + const std::vector<ZipModeImage>& split_src_images, + std::vector<SortedRangeSet>& split_src_ranges, + size_t total_tgt_size); + // Construct the dummy split images based on the chunks info and source ranges; and move them into + // the given vectors. Return true if we add a new split image into |split_tgt_images|, and + // false otherwise. + static bool AddSplitImageFromChunkList(const ZipModeImage& tgt_image, + const ZipModeImage& src_image, + const SortedRangeSet& split_src_ranges, + const std::vector<ImageChunk>& split_tgt_chunks, + const std::vector<ImageChunk>& split_src_chunks, + std::vector<ZipModeImage>* split_tgt_images, + std::vector<ZipModeImage>* split_src_images); + + // Function that actually iterates the tgt_chunks and makes patches. + static bool GeneratePatchesInternal(const ZipModeImage& tgt_image, const ZipModeImage& src_image, + std::vector<PatchChunk>* patch_chunks); + + // size limit in bytes of each chunk. Also, if the length of one zip_entry exceeds the limit, + // we'll split that entry into several smaller chunks in advance. + size_t limit_; +}; + +class ImageModeImage : public Image { + public: + explicit ImageModeImage(bool is_source) : Image(is_source) {} + + // Initialize the image chunks list by searching the magic numbers in an image file. + bool Initialize(const std::string& filename) override; + + bool SetBonusData(const std::vector<uint8_t>& bonus_data); + + // In Image Mode, verify that the source and target images have the same chunk structure (ie, the + // same sequence of deflate and normal chunks). + static bool CheckAndProcessChunks(ImageModeImage* tgt_image, ImageModeImage* src_image); + + // In image mode, generate patches against the given source chunks and bonus_data; write the + // result to |patch_name|. + static bool GeneratePatches(const ImageModeImage& tgt_image, const ImageModeImage& src_image, + const std::string& patch_name); +}; + +#endif // _APPLYPATCH_IMGDIFF_IMAGE_H diff --git a/applypatch/libimgpatch.pc b/applypatch/libimgpatch.pc deleted file mode 100644 index e5002934f..000000000 --- a/applypatch/libimgpatch.pc +++ /dev/null @@ -1,6 +0,0 @@ -# This file is for libimgpatch in Chrome OS. - -Name: libimgpatch -Description: Apply imgdiff patch -Version: 0.0.1 -Libs: -limgpatch -lbz2 -lz |