blob: 0ae60b90702d07ca8d7519cb1721edb75e1dd5ee [file] [log] [blame]
David Srbecky052f8ca2018-04-26 15:42:54 +01001/*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_LIBARTBASE_BASE_BIT_TABLE_H_
18#define ART_LIBARTBASE_BASE_BIT_TABLE_H_
19
David Srbeckydd966bc2018-05-24 13:55:52 +010020#include <array>
21#include <numeric>
22#include <string.h>
23#include <type_traits>
24#include <unordered_map>
David Srbecky052f8ca2018-04-26 15:42:54 +010025
26#include "base/bit_memory_region.h"
David Srbeckydd966bc2018-05-24 13:55:52 +010027#include "base/casts.h"
David Srbecky052f8ca2018-04-26 15:42:54 +010028#include "base/memory_region.h"
David Srbeckydd966bc2018-05-24 13:55:52 +010029#include "base/scoped_arena_containers.h"
30#include "base/stl_util.h"
David Srbecky052f8ca2018-04-26 15:42:54 +010031
32namespace art {
33
34constexpr uint32_t kVarintHeaderBits = 4;
35constexpr uint32_t kVarintSmallValue = 11; // Maximum value which is stored as-is.
36
37// Load variable-length bit-packed integer from `data` starting at `bit_offset`.
38// The first four bits determine the variable length of the encoded integer:
39// Values 0..11 represent the result as-is, with no further following bits.
40// Values 12..15 mean the result is in the next 8/16/24/32-bits respectively.
41ALWAYS_INLINE static inline uint32_t DecodeVarintBits(BitMemoryRegion region, size_t* bit_offset) {
42 uint32_t x = region.LoadBitsAndAdvance(bit_offset, kVarintHeaderBits);
43 if (x > kVarintSmallValue) {
44 x = region.LoadBitsAndAdvance(bit_offset, (x - kVarintSmallValue) * kBitsPerByte);
45 }
46 return x;
47}
48
49// Store variable-length bit-packed integer from `data` starting at `bit_offset`.
50template<typename Vector>
51ALWAYS_INLINE static inline void EncodeVarintBits(Vector* out, size_t* bit_offset, uint32_t value) {
52 if (value <= kVarintSmallValue) {
53 out->resize(BitsToBytesRoundUp(*bit_offset + kVarintHeaderBits));
54 BitMemoryRegion region(MemoryRegion(out->data(), out->size()));
55 region.StoreBitsAndAdvance(bit_offset, value, kVarintHeaderBits);
56 } else {
57 uint32_t num_bits = RoundUp(MinimumBitsToStore(value), kBitsPerByte);
58 out->resize(BitsToBytesRoundUp(*bit_offset + kVarintHeaderBits + num_bits));
59 BitMemoryRegion region(MemoryRegion(out->data(), out->size()));
60 uint32_t header = kVarintSmallValue + num_bits / kBitsPerByte;
61 region.StoreBitsAndAdvance(bit_offset, header, kVarintHeaderBits);
62 region.StoreBitsAndAdvance(bit_offset, value, num_bits);
63 }
64}
65
66template<uint32_t kNumColumns>
67class BitTable {
68 public:
69 class Accessor {
70 public:
David Srbeckyd97e0822018-06-03 12:00:24 +010071 static constexpr uint32_t kCount = kNumColumns;
David Srbecky052f8ca2018-04-26 15:42:54 +010072 static constexpr uint32_t kNoValue = std::numeric_limits<uint32_t>::max();
73
David Srbeckyd97e0822018-06-03 12:00:24 +010074 Accessor() {}
David Srbecky052f8ca2018-04-26 15:42:54 +010075 Accessor(const BitTable* table, uint32_t row) : table_(table), row_(row) {}
76
77 ALWAYS_INLINE uint32_t Row() const { return row_; }
78
79 ALWAYS_INLINE bool IsValid() const { return table_ != nullptr && row_ < table_->NumRows(); }
80
81 template<uint32_t Column>
82 ALWAYS_INLINE uint32_t Get() const {
83 static_assert(Column < kNumColumns, "Column out of bounds");
84 return table_->Get(row_, Column);
85 }
86
87 ALWAYS_INLINE bool Equals(const Accessor& other) {
88 return this->table_ == other.table_ && this->row_ == other.row_;
89 }
90
David Srbeckyd97e0822018-06-03 12:00:24 +010091// Helper macro to create constructors and per-table utilities in derived class.
92#define BIT_TABLE_HEADER() \
93 using BitTable<kCount>::Accessor::Accessor; /* inherit the constructors */ \
94 template<int COLUMN, int UNUSED /*needed to compile*/> struct ColumnName; \
95
96// Helper macro to create named column accessors in derived class.
97#define BIT_TABLE_COLUMN(COLUMN, NAME) \
98 static constexpr uint32_t k##NAME = COLUMN; \
99 ALWAYS_INLINE uint32_t Get##NAME() const { \
100 return table_->Get(row_, COLUMN); \
101 } \
102 ALWAYS_INLINE bool Has##NAME() const { \
103 return table_->Get(row_, COLUMN) != kNoValue; \
104 } \
105 template<int UNUSED> struct ColumnName<COLUMN, UNUSED> { \
106 static constexpr const char* Value = #NAME; \
107 }; \
David Srbecky052f8ca2018-04-26 15:42:54 +0100108
109 protected:
David Srbeckyd97e0822018-06-03 12:00:24 +0100110 const BitTable* table_ = nullptr;
111 uint32_t row_ = -1;
David Srbecky052f8ca2018-04-26 15:42:54 +0100112 };
113
114 static constexpr uint32_t kValueBias = -1;
115
116 BitTable() {}
117 BitTable(void* data, size_t size, size_t* bit_offset = 0) {
118 Decode(BitMemoryRegion(MemoryRegion(data, size)), bit_offset);
119 }
120
121 ALWAYS_INLINE void Decode(BitMemoryRegion region, size_t* bit_offset) {
122 // Decode row count and column sizes from the table header.
123 num_rows_ = DecodeVarintBits(region, bit_offset);
124 if (num_rows_ != 0) {
125 column_offset_[0] = 0;
126 for (uint32_t i = 0; i < kNumColumns; i++) {
127 size_t column_end = column_offset_[i] + DecodeVarintBits(region, bit_offset);
David Srbeckydd966bc2018-05-24 13:55:52 +0100128 column_offset_[i + 1] = dchecked_integral_cast<uint16_t>(column_end);
David Srbecky052f8ca2018-04-26 15:42:54 +0100129 }
130 }
131
132 // Record the region which contains the table data and skip past it.
133 table_data_ = region.Subregion(*bit_offset, num_rows_ * NumRowBits());
134 *bit_offset += table_data_.size_in_bits();
135 }
136
137 ALWAYS_INLINE uint32_t Get(uint32_t row, uint32_t column = 0) const {
138 DCHECK_LT(row, num_rows_);
139 DCHECK_LT(column, kNumColumns);
140 size_t offset = row * NumRowBits() + column_offset_[column];
141 return table_data_.LoadBits(offset, NumColumnBits(column)) + kValueBias;
142 }
143
David Srbecky5513c2b2018-05-24 15:03:20 +0100144 ALWAYS_INLINE BitMemoryRegion GetBitMemoryRegion(uint32_t row, uint32_t column = 0) const {
145 DCHECK_LT(row, num_rows_);
146 DCHECK_LT(column, kNumColumns);
147 size_t offset = row * NumRowBits() + column_offset_[column];
148 return table_data_.Subregion(offset, NumColumnBits(column));
149 }
150
David Srbecky052f8ca2018-04-26 15:42:54 +0100151 size_t NumRows() const { return num_rows_; }
152
153 uint32_t NumRowBits() const { return column_offset_[kNumColumns]; }
154
155 constexpr size_t NumColumns() const { return kNumColumns; }
156
157 uint32_t NumColumnBits(uint32_t column) const {
158 return column_offset_[column + 1] - column_offset_[column];
159 }
160
161 size_t DataBitSize() const { return num_rows_ * column_offset_[kNumColumns]; }
162
163 protected:
164 BitMemoryRegion table_data_;
165 size_t num_rows_ = 0;
166
167 uint16_t column_offset_[kNumColumns + 1] = {};
168};
169
David Srbeckyd97e0822018-06-03 12:00:24 +0100170// Template meta-programming helper.
171template<typename Accessor, size_t... Columns>
172static const char** GetBitTableColumnNamesImpl(std::index_sequence<Columns...>) {
173 static const char* names[] = { Accessor::template ColumnName<Columns, 0>::Value... };
174 return names;
175}
David Srbecky052f8ca2018-04-26 15:42:54 +0100176
David Srbeckyd97e0822018-06-03 12:00:24 +0100177template<typename Accessor>
178static const char** GetBitTableColumnNames() {
179 return GetBitTableColumnNamesImpl<Accessor>(std::make_index_sequence<Accessor::kCount>());
180}
David Srbecky052f8ca2018-04-26 15:42:54 +0100181
David Srbeckydd966bc2018-05-24 13:55:52 +0100182// Helper class for encoding BitTable. It can optionally de-duplicate the inputs.
183// Type 'T' must be POD type consisting of uint32_t fields (one for each column).
184template<typename T>
David Srbecky052f8ca2018-04-26 15:42:54 +0100185class BitTableBuilder {
186 public:
David Srbeckydd966bc2018-05-24 13:55:52 +0100187 static_assert(std::is_pod<T>::value, "Type 'T' must be POD");
188 static constexpr size_t kNumColumns = sizeof(T) / sizeof(uint32_t);
David Srbecky052f8ca2018-04-26 15:42:54 +0100189
David Srbeckydd966bc2018-05-24 13:55:52 +0100190 explicit BitTableBuilder(ScopedArenaAllocator* allocator)
David Srbecky159c9dd2018-05-24 14:56:51 +0100191 : rows_(allocator->Adapter(kArenaAllocBitTableBuilder)),
192 dedup_(8, allocator->Adapter(kArenaAllocBitTableBuilder)) {
David Srbeckydd966bc2018-05-24 13:55:52 +0100193 }
194
195 T& operator[](size_t row) { return rows_[row]; }
196 const T& operator[](size_t row) const { return rows_[row]; }
197 size_t size() const { return rows_.size(); }
198
David Srbecky159c9dd2018-05-24 14:56:51 +0100199 // Append given value to the vector without de-duplication.
200 // This will not add the element to the dedup map to avoid its associated costs.
David Srbeckydd966bc2018-05-24 13:55:52 +0100201 void Add(T value) {
202 rows_.push_back(value);
David Srbecky052f8ca2018-04-26 15:42:54 +0100203 }
204
David Srbecky159c9dd2018-05-24 14:56:51 +0100205 // Append given list of values and return the index of the first value.
206 // If the exact same set of values was already added, return the old index.
207 uint32_t Dedup(T* values, size_t count = 1) {
208 FNVHash<MemoryRegion> hasher;
209 uint32_t hash = hasher(MemoryRegion(values, sizeof(T) * count));
210
211 // Check if we have already added identical set of values.
212 auto range = dedup_.equal_range(hash);
213 for (auto it = range.first; it != range.second; ++it) {
214 uint32_t index = it->second;
215 if (count <= size() - index &&
216 std::equal(values,
217 values + count,
David Srbecky5f937102018-06-02 10:24:25 +0100218 rows_.begin() + index,
David Srbecky159c9dd2018-05-24 14:56:51 +0100219 [](const T& lhs, const T& rhs) {
220 return memcmp(&lhs, &rhs, sizeof(T)) == 0;
221 })) {
222 return index;
223 }
224 }
225
226 // Add the set of values and add the index to the dedup map.
227 uint32_t index = size();
228 rows_.insert(rows_.end(), values, values + count);
229 dedup_.emplace(hash, index);
230 return index;
231 }
232
David Srbecky052f8ca2018-04-26 15:42:54 +0100233 ALWAYS_INLINE uint32_t Get(uint32_t row, uint32_t column) const {
David Srbeckydd966bc2018-05-24 13:55:52 +0100234 DCHECK_LT(row, size());
235 DCHECK_LT(column, kNumColumns);
236 const uint32_t* data = reinterpret_cast<const uint32_t*>(&rows_[row]);
237 return data[column];
David Srbecky052f8ca2018-04-26 15:42:54 +0100238 }
239
David Srbeckydd966bc2018-05-24 13:55:52 +0100240 // Calculate the column bit widths based on the current data.
241 void Measure(/*out*/ std::array<uint32_t, kNumColumns>* column_bits) const {
242 uint32_t max_column_value[kNumColumns];
243 std::fill_n(max_column_value, kNumColumns, 0);
244 for (uint32_t r = 0; r < size(); r++) {
245 for (uint32_t c = 0; c < kNumColumns; c++) {
246 max_column_value[c] |= Get(r, c) - BitTable<kNumColumns>::kValueBias;
247 }
248 }
249 for (uint32_t c = 0; c < kNumColumns; c++) {
250 (*column_bits)[c] = MinimumBitsToStore(max_column_value[c]);
251 }
252 }
253
254 // Encode the stored data into a BitTable.
David Srbecky052f8ca2018-04-26 15:42:54 +0100255 template<typename Vector>
David Srbeckydd966bc2018-05-24 13:55:52 +0100256 void Encode(Vector* out, size_t* bit_offset) const {
David Srbecky052f8ca2018-04-26 15:42:54 +0100257 constexpr uint32_t bias = BitTable<kNumColumns>::kValueBias;
258 size_t initial_bit_offset = *bit_offset;
David Srbeckydd966bc2018-05-24 13:55:52 +0100259
260 std::array<uint32_t, kNumColumns> column_bits;
261 Measure(&column_bits);
262 EncodeVarintBits(out, bit_offset, size());
263 if (size() != 0) {
264 // Write table header.
David Srbecky052f8ca2018-04-26 15:42:54 +0100265 for (uint32_t c = 0; c < kNumColumns; c++) {
David Srbecky052f8ca2018-04-26 15:42:54 +0100266 EncodeVarintBits(out, bit_offset, column_bits[c]);
David Srbeckydd966bc2018-05-24 13:55:52 +0100267 }
268
269 // Write table data.
270 uint32_t row_bits = std::accumulate(column_bits.begin(), column_bits.end(), 0u);
271 out->resize(BitsToBytesRoundUp(*bit_offset + row_bits * size()));
272 BitMemoryRegion region(MemoryRegion(out->data(), out->size()));
273 for (uint32_t r = 0; r < size(); r++) {
274 for (uint32_t c = 0; c < kNumColumns; c++) {
275 region.StoreBitsAndAdvance(bit_offset, Get(r, c) - bias, column_bits[c]);
276 }
David Srbecky052f8ca2018-04-26 15:42:54 +0100277 }
278 }
David Srbeckydd966bc2018-05-24 13:55:52 +0100279
David Srbecky052f8ca2018-04-26 15:42:54 +0100280 // Verify the written data.
281 if (kIsDebugBuild) {
282 BitTable<kNumColumns> table;
David Srbeckydd966bc2018-05-24 13:55:52 +0100283 BitMemoryRegion region(MemoryRegion(out->data(), out->size()));
David Srbecky052f8ca2018-04-26 15:42:54 +0100284 table.Decode(region, &initial_bit_offset);
David Srbeckydd966bc2018-05-24 13:55:52 +0100285 DCHECK_EQ(size(), table.NumRows());
David Srbecky052f8ca2018-04-26 15:42:54 +0100286 for (uint32_t c = 0; c < kNumColumns; c++) {
287 DCHECK_EQ(column_bits[c], table.NumColumnBits(c));
288 }
David Srbeckydd966bc2018-05-24 13:55:52 +0100289 for (uint32_t r = 0; r < size(); r++) {
David Srbecky052f8ca2018-04-26 15:42:54 +0100290 for (uint32_t c = 0; c < kNumColumns; c++) {
David Srbeckydd966bc2018-05-24 13:55:52 +0100291 DCHECK_EQ(Get(r, c), table.Get(r, c)) << " (" << r << ", " << c << ")";
David Srbecky052f8ca2018-04-26 15:42:54 +0100292 }
293 }
294 }
295 }
296
297 protected:
David Srbeckydd966bc2018-05-24 13:55:52 +0100298 ScopedArenaDeque<T> rows_;
David Srbecky159c9dd2018-05-24 14:56:51 +0100299 ScopedArenaUnorderedMultimap<uint32_t, uint32_t> dedup_; // Hash -> row index.
David Srbecky052f8ca2018-04-26 15:42:54 +0100300};
301
David Srbecky5513c2b2018-05-24 15:03:20 +0100302// Helper class for encoding single-column BitTable of bitmaps (allows more than 32 bits).
303class BitmapTableBuilder {
304 public:
305 explicit BitmapTableBuilder(ScopedArenaAllocator* const allocator)
306 : allocator_(allocator),
307 rows_(allocator->Adapter(kArenaAllocBitTableBuilder)),
308 dedup_(8, allocator_->Adapter(kArenaAllocBitTableBuilder)) {
309 }
310
311 MemoryRegion operator[](size_t row) { return rows_[row]; }
312 const MemoryRegion operator[](size_t row) const { return rows_[row]; }
313 size_t size() const { return rows_.size(); }
314
315 // Add the given bitmap to the table and return its index.
316 // If the bitmap was already added it will be deduplicated.
317 // The last bit must be set and any padding bits in the last byte must be zero.
318 uint32_t Dedup(const void* bitmap, size_t num_bits) {
319 MemoryRegion region(const_cast<void*>(bitmap), BitsToBytesRoundUp(num_bits));
320 DCHECK(num_bits == 0 || BitMemoryRegion(region).LoadBit(num_bits - 1) == 1);
321 DCHECK_EQ(BitMemoryRegion(region).LoadBits(num_bits, region.size_in_bits() - num_bits), 0u);
322 FNVHash<MemoryRegion> hasher;
323 uint32_t hash = hasher(region);
324
325 // Check if we have already added identical bitmap.
326 auto range = dedup_.equal_range(hash);
327 for (auto it = range.first; it != range.second; ++it) {
328 if (MemoryRegion::ContentEquals()(region, rows_[it->second])) {
329 return it->second;
330 }
331 }
332
333 // Add the bitmap and add the index to the dedup map.
334 uint32_t index = size();
335 void* copy = allocator_->Alloc(region.size(), kArenaAllocBitTableBuilder);
336 memcpy(copy, region.pointer(), region.size());
337 rows_.push_back(MemoryRegion(copy, region.size()));
338 dedup_.emplace(hash, index);
339 max_num_bits_ = std::max(max_num_bits_, num_bits);
340 return index;
341 }
342
343 // Encode the stored data into a BitTable.
344 template<typename Vector>
345 void Encode(Vector* out, size_t* bit_offset) const {
346 size_t initial_bit_offset = *bit_offset;
347
348 EncodeVarintBits(out, bit_offset, size());
349 if (size() != 0) {
350 EncodeVarintBits(out, bit_offset, max_num_bits_);
351
352 // Write table data.
353 out->resize(BitsToBytesRoundUp(*bit_offset + max_num_bits_ * size()));
354 BitMemoryRegion region(MemoryRegion(out->data(), out->size()));
355 for (MemoryRegion row : rows_) {
356 BitMemoryRegion src(row);
357 region.StoreBits(*bit_offset, src, std::min(max_num_bits_, src.size_in_bits()));
358 *bit_offset += max_num_bits_;
359 }
360 }
361
362 // Verify the written data.
363 if (kIsDebugBuild) {
364 BitTable<1> table;
365 BitMemoryRegion region(MemoryRegion(out->data(), out->size()));
366 table.Decode(region, &initial_bit_offset);
367 DCHECK_EQ(size(), table.NumRows());
368 DCHECK_EQ(max_num_bits_, table.NumColumnBits(0));
369 for (uint32_t r = 0; r < size(); r++) {
370 BitMemoryRegion expected(rows_[r]);
371 BitMemoryRegion seen = table.GetBitMemoryRegion(r);
372 size_t num_bits = std::max(expected.size_in_bits(), seen.size_in_bits());
373 for (size_t b = 0; b < num_bits; b++) {
374 bool e = b < expected.size_in_bits() && expected.LoadBit(b);
375 bool s = b < seen.size_in_bits() && seen.LoadBit(b);
376 DCHECK_EQ(e, s) << " (" << r << ")[" << b << "]";
377 }
378 }
379 }
380 }
381
382 private:
383 ScopedArenaAllocator* const allocator_;
384 ScopedArenaDeque<MemoryRegion> rows_;
385 ScopedArenaUnorderedMultimap<uint32_t, uint32_t> dedup_; // Hash -> row index.
386 size_t max_num_bits_ = 0u;
387};
388
David Srbecky052f8ca2018-04-26 15:42:54 +0100389} // namespace art
390
391#endif // ART_LIBARTBASE_BASE_BIT_TABLE_H_