From b9e7fc7fa9c70df36881b2166e8bd2bcfa87fe2e Mon Sep 17 00:00:00 2001 From: Tianjie Xu Date: Wed, 26 Jul 2017 16:41:24 -0700 Subject: Add implemention of SortedRangeSet This is useful in imgdiff to maintain the block ranges of splitted source image. Bug: 34220646 Test: mma && unit tests pass Change-Id: I6427f2ea50f0e3b0aa3dd01880ec0206679b7429 --- updater/include/updater/rangeset.h | 116 ++++++++++++++++++++++++++++++++++++- 1 file changed, 115 insertions(+), 1 deletion(-) (limited to 'updater') diff --git a/updater/include/updater/rangeset.h b/updater/include/updater/rangeset.h index fad038043..b67c98724 100644 --- a/updater/include/updater/rangeset.h +++ b/updater/include/updater/rangeset.h @@ -24,6 +24,7 @@ #include #include +#include #include using Range = std::pair; @@ -74,6 +75,18 @@ class RangeSet { return RangeSet(std::move(pairs)); } + std::string ToString() const { + if (ranges_.empty()) { + return ""; + } + std::string result = std::to_string(ranges_.size() * 2); + for (const auto& r : ranges_) { + result += android::base::StringPrintf(",%zu,%zu", r.first, r.second); + } + + return result; + } + // Get the block number for the i-th (starting from 0) block in the RangeSet. size_t GetBlockNumber(size_t idx) const { CHECK_LT(idx, blocks_) << "Out of bound index " << idx << " (total blocks: " << blocks_ << ")"; @@ -157,8 +170,109 @@ class RangeSet { return ranges_ != other.ranges_; } - private: + protected: // Actual limit for each value and the total number are both INT_MAX. std::vector ranges_; size_t blocks_; }; + +static constexpr size_t kBlockSize = 4096; + +// The class is a sorted version of a RangeSet; and it's useful in imgdiff to split the input +// files when we're handling large zip files. Specifically, we can treat the input file as a +// continuous RangeSet (i.e. RangeSet("0-99") for a 100 blocks file); and break it down into +// several smaller chunks based on the zip entries. + +// For example, [source: 0-99] can be split into +// [split_src1: 10-29]; [split_src2: 40-49, 60-69]; [split_src3: 70-89] +// Here "10-29" simply means block 10th to block 29th with respect to the original input file. +// Also, note that the split sources should be mutual exclusive, but they don't need to cover +// every block in the original source. +class SortedRangeSet : public RangeSet { + public: + SortedRangeSet() {} + + // Ranges in the the set should be mutually exclusive; and they're sorted by the start block. + explicit SortedRangeSet(std::vector&& pairs) : RangeSet(std::move(pairs)) { + std::sort(ranges_.begin(), ranges_.end()); + } + + void Insert(const Range& to_insert) { + SortedRangeSet rs({ to_insert }); + Insert(rs); + } + + // Insert the input SortedRangeSet; keep the ranges sorted and merge the overlap ranges. + void Insert(const SortedRangeSet& rs) { + if (rs.size() == 0) { + return; + } + // Merge and sort the two RangeSets. + std::vector temp = std::move(ranges_); + std::copy(rs.begin(), rs.end(), std::back_inserter(temp)); + std::sort(temp.begin(), temp.end()); + + Clear(); + // Trim overlaps and insert the result back to ranges_. + Range to_insert = temp.front(); + for (auto it = temp.cbegin() + 1; it != temp.cend(); it++) { + if (it->first <= to_insert.second) { + to_insert.second = std::max(to_insert.second, it->second); + } else { + ranges_.push_back(to_insert); + blocks_ += (to_insert.second - to_insert.first); + to_insert = *it; + } + } + ranges_.push_back(to_insert); + blocks_ += (to_insert.second - to_insert.first); + } + + void Clear() { + blocks_ = 0; + ranges_.clear(); + } + + using RangeSet::Overlaps; + bool Overlaps(size_t start, size_t len) const { + RangeSet rs({ { start / kBlockSize, (start + len - 1) / kBlockSize + 1 } }); + return Overlaps(rs); + } + + // Compute the block range the file occupies, and insert that range. + void Insert(size_t start, size_t len) { + Range to_insert{ start / kBlockSize, (start + len - 1) / kBlockSize + 1 }; + Insert(to_insert); + } + + // Given an offset of the file, checks if the corresponding block (by considering the file as + // 0-based continuous block ranges) is covered by the SortedRangeSet. If so, returns the offset + // within this SortedRangeSet. + // + // For example, the 4106-th byte of a file is from block 1, assuming a block size of 4096-byte. + // The mapped offset within a SortedRangeSet("1-9 15-19") is 10. + // + // An offset of 65546 falls into the 16-th block in a file. Block 16 is contained as the 10-th + // item in SortedRangeSet("1-9 15-19"). So its data can be found at offset 40970 (i.e. 4096 * 10 + // + 10) in a range represented by this SortedRangeSet. + size_t GetOffsetInRangeSet(size_t old_offset) const { + size_t old_block_start = old_offset / kBlockSize; + size_t new_block_start = 0; + for (const auto& range : ranges_) { + // Find the index of old_block_start. + if (old_block_start >= range.second) { + new_block_start += (range.second - range.first); + } else if (old_block_start >= range.first) { + new_block_start += (old_block_start - range.first); + return (new_block_start * kBlockSize + old_offset % kBlockSize); + } else { + CHECK(false) <<"block_start " << old_block_start << " is missing between two ranges: " + << this->ToString(); + return 0; + } + } + CHECK(false) <<"block_start " << old_block_start << " exceeds the limit of current RangeSet: " + << this->ToString(); + return 0; + } +}; \ No newline at end of file -- cgit v1.2.3