From 2903cddb58f6ee99116e0751a2305f75f9a86461 Mon Sep 17 00:00:00 2001 From: Tianjie Xu Date: Fri, 18 Aug 2017 18:15:47 -0700 Subject: Improve imgdiff for large zip files Due to the cache size limit for OTA generation, we used to split large zip files linearly into pieces and do bsdiff on them. As a result, i) we lose the advantage of imgdiff; ii) if there's an accidental order change of some huge files inside the zip, we'll create an insanely large patch. This patch splits the src&tgt more smartly based on the zip entry_name. If the entry_name is empty or no matching source is found for a target chunk, we'll skip adding its source and later do a bsdiff against the whole split source image (this rarely happens in our use cases except for the metadata inside a ziparchive). After the split, the target pieces are continuous and block aligned, while the sources pieces are mutually exclusive. (Some of the source blocks may not be used if there's no matching entry_name in the target.) Then we will generate patches accordingly between each split image pairs. Afterwards, if we apply imgpatch to each pair of split source/target images and add up the patched result, we can get back the original target image. For example: Input: [src_image, tgt_image] Split: [src-0,tgt-0; src-1,tgt-1, src-2,tgt-2] Diff: [ patch-0; patch-1; patch-2] Patch: [(src-0,patch-0)=tgt-0; (src-1,patch-1)=tgt-1; (src-2,patch-2)=tgt-2;] Append: [tgt-0 + tgt-1 + tgt-2 = tgt_image] Peformance: For the small package in b/34220646, we decrease the patch size of chrome.apk dramatically from 30M to 400K due to the order change of two big .so files. On two versions of angler, I also observe decent patch size decrease. For chrome.apk, we reduced the size from 5.9M to 3.2M; and for vevlet.apk from 8.0M to 6.5M. Bug: 34220646 Test: recovery component test && apply imgdiff & imgpatch on two chrome.apk Change-Id: I145d802984fa805efbbac9d01a2e64d82ef9728b --- applypatch/include/applypatch/imgdiff_image.h | 58 ++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) (limited to 'applypatch/include') diff --git a/applypatch/include/applypatch/imgdiff_image.h b/applypatch/include/applypatch/imgdiff_image.h index 221dd5ab5..9fb844b24 100644 --- a/applypatch/include/applypatch/imgdiff_image.h +++ b/applypatch/include/applypatch/imgdiff_image.h @@ -129,6 +129,9 @@ class PatchChunk { // Return true if raw data size is smaller than the patch size. static bool RawDataIsSmaller(const ImageChunk& tgt, size_t patch_size); + // Update the source start with the new offset within the source range. + void UpdateSourceOffset(const SortedRangeSet& src_range); + static bool WritePatchDataToFd(const std::vector& patch_chunks, int patch_fd); private: @@ -177,6 +180,14 @@ class Image { return chunks_.end(); } + std::vector::const_iterator cbegin() const { + return chunks_.cbegin(); + } + + std::vector::const_iterator cend() const { + return chunks_.cend(); + } + ImageChunk& operator[](size_t i); const ImageChunk& operator[](size_t i) const; @@ -194,10 +205,18 @@ class Image { class ZipModeImage : public Image { public: - explicit ZipModeImage(bool is_source) : Image(is_source) {} + explicit ZipModeImage(bool is_source, size_t limit = 0) : Image(is_source), limit_(limit) {} bool Initialize(const std::string& filename) override; + // Initialize a dummy ZipModeImage from an existing ImageChunk vector. For src img pieces, we + // reconstruct a new file_content based on the source ranges; but it's not needed for the tgt img + // pieces; because for each chunk both the data and their offset within the file are unchanged. + void Initialize(const std::vector& chunks, const std::vector& file_content) { + chunks_ = chunks; + file_content_ = file_content; + } + // The pesudo source chunk for bsdiff if there's no match for the given target chunk. It's in // fact the whole source file. ImageChunk PseudoSource() const; @@ -216,6 +235,21 @@ class ZipModeImage : public Image { static bool GeneratePatches(const ZipModeImage& tgt_image, const ZipModeImage& src_image, const std::string& patch_name); + // Compute the patch based on the lists of split src and tgt images. Generate patches for each + // pair of split pieces and write the data to |patch_name|. If |debug_dir| is specified, write + // each split src data and patch data into that directory. + static bool GeneratePatches(const std::vector& split_tgt_images, + const std::vector& split_src_images, + const std::vector& split_src_ranges, + const std::string& patch_name, const std::string& debug_dir); + + // Split the tgt chunks and src chunks based on the size limit. + static bool SplitZipModeImageWithLimit(const ZipModeImage& tgt_image, + const ZipModeImage& src_image, + std::vector* split_tgt_images, + std::vector* split_src_images, + std::vector* split_src_ranges); + private: // Initialize image chunks based on the zip entries. bool InitializeChunks(const std::string& filename, ZipArchiveHandle handle); @@ -223,6 +257,28 @@ class ZipModeImage : public Image { bool AddZipEntryToChunks(ZipArchiveHandle handle, const std::string& entry_name, ZipEntry* entry); // Return the real size of the zip file. (omit the trailing zeros that used for alignment) bool GetZipFileSize(size_t* input_file_size); + + static void ValidateSplitImages(const std::vector& split_tgt_images, + const std::vector& split_src_images, + std::vector& split_src_ranges, + size_t total_tgt_size); + // Construct the dummy split images based on the chunks info and source ranges; and move them into + // the given vectors. + static void AddSplitImageFromChunkList(const ZipModeImage& tgt_image, + const ZipModeImage& src_image, + const SortedRangeSet& split_src_ranges, + const std::vector& split_tgt_chunks, + const std::vector& split_src_chunks, + std::vector* split_tgt_images, + std::vector* split_src_images); + + // Function that actually iterates the tgt_chunks and makes patches. + static bool GeneratePatchesInternal(const ZipModeImage& tgt_image, const ZipModeImage& src_image, + std::vector* patch_chunks); + + // size limit in bytes of each chunk. Also, if the length of one zip_entry exceeds the limit, + // we'll split that entry into several smaller chunks in advance. + size_t limit_; }; class ImageModeImage : public Image { -- cgit v1.2.3