| Tianjie Xu | 91caafe | 2020-03-13 16:16:24 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2020 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #pragma once |
| 18 | |
| 19 | #include <stdint.h> |
| 20 | |
| 21 | #include <map> |
| 22 | #include <memory> |
| 23 | #include <string_view> |
| 24 | #include <utility> |
| 25 | |
| Eric Miao | 561aac0 | 2022-12-05 16:14:57 -0800 | [diff] [blame] | 26 | #include <android-base/logging.h> |
| 27 | #include <log/log.h> |
| 28 | |
| Tianjie Xu | 91caafe | 2020-03-13 16:16:24 -0700 | [diff] [blame] | 29 | #include "zip_error.h" |
| 30 | |
| Eric Miao | 561aac0 | 2022-12-05 16:14:57 -0800 | [diff] [blame] | 31 | /* |
| 32 | * Round up to the next highest power of 2. |
| 33 | * |
| 34 | * Found on http://graphics.stanford.edu/~seander/bithacks.html. |
| 35 | * |
| 36 | * TODO: could switch to use std::bit_ceil() once ready |
| 37 | */ |
| 38 | static constexpr uint32_t RoundUpPower2(uint32_t val) { |
| 39 | val--; |
| 40 | val |= val >> 1; |
| 41 | val |= val >> 2; |
| 42 | val |= val >> 4; |
| 43 | val |= val >> 8; |
| 44 | val |= val >> 16; |
| 45 | val++; |
| 46 | |
| 47 | return val; |
| 48 | } |
| 49 | |
| Tianjie Xu | 91caafe | 2020-03-13 16:16:24 -0700 | [diff] [blame] | 50 | // This class is the interface of the central directory entries map. The map |
| 51 | // helps to locate a particular cd entry based on the filename. |
| 52 | class CdEntryMapInterface { |
| 53 | public: |
| 54 | virtual ~CdEntryMapInterface() = default; |
| 55 | // Adds an entry to the map. The |name| should internally points to the |
| 56 | // filename field of a cd entry. And |start| points to the beginning of the |
| 57 | // central directory. Returns 0 on success. |
| 58 | virtual ZipError AddToMap(std::string_view name, const uint8_t* start) = 0; |
| 59 | // For the zip entry |entryName|, finds the offset of its filename field in |
| 60 | // the central directory. Returns a pair of [status, offset]. The value of |
| 61 | // the status is 0 on success. |
| 62 | virtual std::pair<ZipError, uint64_t> GetCdEntryOffset(std::string_view name, |
| 63 | const uint8_t* cd_start) const = 0; |
| 64 | // Resets the iterator to the beginning of the map. |
| 65 | virtual void ResetIteration() = 0; |
| 66 | // Returns the [name, cd offset] of the current element. Also increments the |
| 67 | // iterator to points to the next element. Returns an empty pair we have read |
| 68 | // past boundary. |
| 69 | virtual std::pair<std::string_view, uint64_t> Next(const uint8_t* cd_start) = 0; |
| Eric Miao | 561aac0 | 2022-12-05 16:14:57 -0800 | [diff] [blame] | 70 | |
| 71 | static std::unique_ptr<CdEntryMapInterface> Create(uint64_t num_entries, |
| 72 | size_t cd_length, uint16_t max_file_name_length); |
| Tianjie Xu | 91caafe | 2020-03-13 16:16:24 -0700 | [diff] [blame] | 73 | }; |
| 74 | |
| 75 | /** |
| 76 | * More space efficient string representation of strings in an mmaped zipped |
| 77 | * file than std::string_view. Using std::string_view as an entry in the |
| 78 | * ZipArchive hash table wastes space. std::string_view stores a pointer to a |
| 79 | * string (on 64 bit, 8 bytes) and the length to read from that pointer, |
| 80 | * 2 bytes. Because of alignment, the structure consumes 16 bytes, wasting |
| 81 | * 6 bytes. |
| Eric Miao | 561aac0 | 2022-12-05 16:14:57 -0800 | [diff] [blame] | 82 | */ |
| 83 | |
| 84 | /** |
| 85 | * ZipStringOffset20 stores 20-bit for offset from a fixed location in the memory |
| 86 | * mapped file instead of the entire address, with 12-bit for filename length (i.e. |
| 87 | * typical PATH_MAX), consuming 4 bytes in total |
| 88 | */ |
| 89 | struct ZipStringOffset20 { |
| 90 | static constexpr size_t offset_max = (1u << 20) - 1; |
| 91 | static constexpr size_t length_max = (1u << 12) - 1; |
| 92 | uint32_t name_offset : 20; |
| 93 | uint16_t name_length : 12; |
| 94 | }; |
| 95 | |
| 96 | static_assert(sizeof(struct ZipStringOffset20) == 4); |
| 97 | |
| 98 | /** |
| 99 | * ZipStringOffset32 stores a 4 byte offset from a fixed location in the memory |
| Tianjie Xu | 91caafe | 2020-03-13 16:16:24 -0700 | [diff] [blame] | 100 | * mapped file instead of the entire address, consuming 8 bytes with alignment. |
| 101 | */ |
| Eric Miao | 561aac0 | 2022-12-05 16:14:57 -0800 | [diff] [blame] | 102 | struct ZipStringOffset32 { |
| Tianjie Xu | 91caafe | 2020-03-13 16:16:24 -0700 | [diff] [blame] | 103 | uint32_t name_offset; |
| 104 | uint16_t name_length; |
| Tianjie Xu | 91caafe | 2020-03-13 16:16:24 -0700 | [diff] [blame] | 105 | }; |
| 106 | |
| 107 | // This implementation of CdEntryMap uses an array hash table. It uses less |
| 108 | // memory than std::map; and it's used as the default implementation for zip |
| 109 | // archives without zip64 extension. |
| Eric Miao | 561aac0 | 2022-12-05 16:14:57 -0800 | [diff] [blame] | 110 | template <typename ZipStringOffset> |
| Tianjie Xu | 91caafe | 2020-03-13 16:16:24 -0700 | [diff] [blame] | 111 | class CdEntryMapZip32 : public CdEntryMapInterface { |
| 112 | public: |
| Tianjie Xu | 91caafe | 2020-03-13 16:16:24 -0700 | [diff] [blame] | 113 | ZipError AddToMap(std::string_view name, const uint8_t* start) override; |
| 114 | std::pair<ZipError, uint64_t> GetCdEntryOffset(std::string_view name, |
| 115 | const uint8_t* cd_start) const override; |
| 116 | void ResetIteration() override; |
| 117 | std::pair<std::string_view, uint64_t> Next(const uint8_t* cd_start) override; |
| 118 | |
| Eric Miao | 561aac0 | 2022-12-05 16:14:57 -0800 | [diff] [blame] | 119 | explicit CdEntryMapZip32(uint16_t num_entries) { |
| 120 | /* |
| 121 | * Create hash table. We have a minimum 75% load factor, possibly as |
| 122 | * low as 50% after we round off to a power of 2. There must be at |
| 123 | * least one unused entry to avoid an infinite loop during creation. |
| 124 | */ |
| 125 | hash_table_size_ = RoundUpPower2(1 + (num_entries * 4) / 3); |
| 126 | hash_table_.reset(static_cast<ZipStringOffset*>( |
| 127 | calloc(hash_table_size_, sizeof(ZipStringOffset)))); |
| 128 | |
| 129 | CHECK(hash_table_ != nullptr) |
| 130 | << "Zip: unable to allocate the " << hash_table_size_ |
| 131 | << " entry hash_table, entry size: " << sizeof(ZipStringOffset); |
| 132 | } |
| Tianjie Xu | 91caafe | 2020-03-13 16:16:24 -0700 | [diff] [blame] | 133 | |
| 134 | // We know how many entries are in the Zip archive, so we can have a |
| 135 | // fixed-size hash table. We define a load factor of 0.75 and over |
| 136 | // allocate so the maximum number entries can never be higher than |
| 137 | // ((4 * UINT16_MAX) / 3 + 1) which can safely fit into a uint32_t. |
| Yurii Zubrytskyi | d02278f | 2022-11-11 14:55:07 -0800 | [diff] [blame] | 138 | struct FreeDeleter { |
| 139 | void operator()(void* ptr) const { ::free(ptr); } |
| 140 | }; |
| 141 | std::unique_ptr<ZipStringOffset[], FreeDeleter> hash_table_; |
| Tianjie Xu | 91caafe | 2020-03-13 16:16:24 -0700 | [diff] [blame] | 142 | uint32_t hash_table_size_{0}; |
| Tianjie Xu | 91caafe | 2020-03-13 16:16:24 -0700 | [diff] [blame] | 143 | |
| Eric Miao | 561aac0 | 2022-12-05 16:14:57 -0800 | [diff] [blame] | 144 | private: |
| Tianjie Xu | 91caafe | 2020-03-13 16:16:24 -0700 | [diff] [blame] | 145 | // The position of element for the current iteration. |
| 146 | uint32_t current_position_{0}; |
| 147 | }; |
| 148 | |
| 149 | // This implementation of CdEntryMap uses a std::map |
| 150 | class CdEntryMapZip64 : public CdEntryMapInterface { |
| 151 | public: |
| Tianjie Xu | 91caafe | 2020-03-13 16:16:24 -0700 | [diff] [blame] | 152 | ZipError AddToMap(std::string_view name, const uint8_t* start) override; |
| 153 | std::pair<ZipError, uint64_t> GetCdEntryOffset(std::string_view name, |
| 154 | const uint8_t* cd_start) const override; |
| 155 | void ResetIteration() override; |
| 156 | std::pair<std::string_view, uint64_t> Next(const uint8_t* cd_start) override; |
| 157 | |
| Tianjie Xu | 91caafe | 2020-03-13 16:16:24 -0700 | [diff] [blame] | 158 | CdEntryMapZip64() = default; |
| Eric Miao | 561aac0 | 2022-12-05 16:14:57 -0800 | [diff] [blame] | 159 | private: |
| Tianjie Xu | 91caafe | 2020-03-13 16:16:24 -0700 | [diff] [blame] | 160 | |
| 161 | std::map<std::string_view, uint64_t> entry_table_; |
| 162 | |
| 163 | std::map<std::string_view, uint64_t>::iterator iterator_; |
| 164 | }; |