| Alex Light | 86fe9b8 | 2020-11-16 16:54:01 +0000 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2020 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #ifndef ART_COMPILER_OPTIMIZING_EXECUTION_SUBGRAPH_H_ |
| 18 | #define ART_COMPILER_OPTIMIZING_EXECUTION_SUBGRAPH_H_ |
| 19 | |
| 20 | #include <algorithm> |
| 21 | #include <sstream> |
| 22 | |
| 23 | #include "base/arena_allocator.h" |
| 24 | #include "base/arena_bit_vector.h" |
| 25 | #include "base/arena_containers.h" |
| 26 | #include "base/array_ref.h" |
| 27 | #include "base/bit_vector-inl.h" |
| 28 | #include "base/globals.h" |
| 29 | #include "base/iteration_range.h" |
| Alex Light | 3a73ffb | 2021-01-25 14:11:05 +0000 | [diff] [blame] | 30 | #include "base/mutex.h" |
| Alex Light | 86fe9b8 | 2020-11-16 16:54:01 +0000 | [diff] [blame] | 31 | #include "base/scoped_arena_allocator.h" |
| 32 | #include "base/scoped_arena_containers.h" |
| 33 | #include "base/stl_util.h" |
| 34 | #include "base/transform_iterator.h" |
| 35 | #include "nodes.h" |
| 36 | |
| 37 | namespace art { |
| 38 | |
| Alex Light | 3a73ffb | 2021-01-25 14:11:05 +0000 | [diff] [blame] | 39 | // Helper for transforming blocks to block_ids. |
| 40 | class BlockToBlockIdTransformer { |
| 41 | public: |
| 42 | BlockToBlockIdTransformer(BlockToBlockIdTransformer&&) = default; |
| 43 | BlockToBlockIdTransformer(const BlockToBlockIdTransformer&) = default; |
| 44 | BlockToBlockIdTransformer() {} |
| 45 | |
| 46 | inline uint32_t operator()(const HBasicBlock* b) const { |
| 47 | return b->GetBlockId(); |
| 48 | } |
| 49 | }; |
| 50 | |
| Alex Light | 86fe9b8 | 2020-11-16 16:54:01 +0000 | [diff] [blame] | 51 | // Helper for transforming block ids to blocks. |
| 52 | class BlockIdToBlockTransformer { |
| 53 | public: |
| 54 | BlockIdToBlockTransformer(BlockIdToBlockTransformer&&) = default; |
| 55 | BlockIdToBlockTransformer(const BlockIdToBlockTransformer&) = default; |
| 56 | explicit BlockIdToBlockTransformer(const HGraph* graph) : graph_(graph) {} |
| 57 | |
| 58 | inline const HGraph* GetGraph() const { |
| 59 | return graph_; |
| 60 | } |
| 61 | |
| 62 | inline HBasicBlock* GetBlock(uint32_t id) const { |
| 63 | DCHECK_LT(id, graph_->GetBlocks().size()) << graph_->PrettyMethod(); |
| 64 | HBasicBlock* blk = graph_->GetBlocks()[id]; |
| 65 | DCHECK(blk != nullptr); |
| 66 | return blk; |
| 67 | } |
| 68 | |
| 69 | inline HBasicBlock* operator()(uint32_t id) const { |
| 70 | return GetBlock(id); |
| 71 | } |
| 72 | |
| 73 | private: |
| 74 | const HGraph* const graph_; |
| 75 | }; |
| 76 | |
| Alex Light | 3a73ffb | 2021-01-25 14:11:05 +0000 | [diff] [blame] | 77 | class BlockIdFilterThunk { |
| 78 | public: |
| 79 | explicit BlockIdFilterThunk(const BitVector& i) : inner_(i) {} |
| 80 | BlockIdFilterThunk(BlockIdFilterThunk&& other) noexcept = default; |
| 81 | BlockIdFilterThunk(const BlockIdFilterThunk&) = default; |
| 82 | |
| 83 | bool operator()(const HBasicBlock* b) const { |
| 84 | return inner_.IsBitSet(b->GetBlockId()); |
| 85 | } |
| 86 | |
| 87 | private: |
| 88 | const BitVector& inner_; |
| 89 | }; |
| 90 | |
| Alex Light | 86fe9b8 | 2020-11-16 16:54:01 +0000 | [diff] [blame] | 91 | // A representation of a particular section of the graph. The graph is split |
| 92 | // into an excluded and included area and is used to track escapes. |
| 93 | // |
| 94 | // This object is a view of the graph and is not updated as the graph is |
| 95 | // changed. |
| 96 | // |
| 97 | // This is implemented by removing various escape points from the subgraph using |
| 98 | // the 'RemoveBlock' function. Once all required blocks are removed one will |
| 99 | // 'Finalize' the subgraph. This will extend the removed area to include: |
| 100 | // (1) Any block which inevitably leads to (post-dominates) a removed block |
| 101 | // (2) any block which is between 2 removed blocks |
| 102 | // |
| 103 | // This allows us to create a set of 'ExcludedCohorts' which are the |
| 104 | // well-connected subsets of the graph made up of removed blocks. These cohorts |
| 105 | // have a set of entry and exit blocks which act as the boundary of the cohort. |
| 106 | // Since we removed blocks between 2 excluded blocks it is impossible for any |
| 107 | // cohort-exit block to reach any cohort-entry block. This means we can use the |
| 108 | // boundary between the cohort and the rest of the graph to insert |
| 109 | // materialization blocks for partial LSE. |
| Alex Light | 3a73ffb | 2021-01-25 14:11:05 +0000 | [diff] [blame] | 110 | // |
| 111 | // TODO We really should expand this to take into account where the object |
| 112 | // allocation takes place directly. Currently we always act as though it were |
| 113 | // allocated in the entry block. This is a massively simplifying assumption but |
| 114 | // means we can't partially remove objects that are repeatedly allocated in a |
| 115 | // loop. |
| Vladimir Marko | 5c82493 | 2021-06-02 15:54:17 +0100 | [diff] [blame] | 116 | class ExecutionSubgraph : public DeletableArenaObject<kArenaAllocLSA> { |
| Alex Light | 86fe9b8 | 2020-11-16 16:54:01 +0000 | [diff] [blame] | 117 | public: |
| 118 | using BitVecBlockRange = |
| 119 | IterationRange<TransformIterator<BitVector::IndexIterator, BlockIdToBlockTransformer>>; |
| Alex Light | 3a73ffb | 2021-01-25 14:11:05 +0000 | [diff] [blame] | 120 | using FilteredBitVecBlockRange = IterationRange< |
| 121 | FilterIterator<ArenaVector<HBasicBlock*>::const_iterator, BlockIdFilterThunk>>; |
| Alex Light | 86fe9b8 | 2020-11-16 16:54:01 +0000 | [diff] [blame] | 122 | |
| 123 | // A set of connected blocks which are connected and removed from the |
| 124 | // ExecutionSubgraph. See above comment for explanation. |
| 125 | class ExcludedCohort : public ArenaObject<kArenaAllocLSA> { |
| 126 | public: |
| 127 | ExcludedCohort(ExcludedCohort&&) = default; |
| 128 | ExcludedCohort(const ExcludedCohort&) = delete; |
| 129 | explicit ExcludedCohort(ScopedArenaAllocator* allocator, HGraph* graph) |
| 130 | : graph_(graph), |
| 131 | entry_blocks_(allocator, graph_->GetBlocks().size(), false, kArenaAllocLSA), |
| 132 | exit_blocks_(allocator, graph_->GetBlocks().size(), false, kArenaAllocLSA), |
| 133 | blocks_(allocator, graph_->GetBlocks().size(), false, kArenaAllocLSA) {} |
| 134 | |
| 135 | ~ExcludedCohort() = default; |
| 136 | |
| 137 | // All blocks in the cohort. |
| 138 | BitVecBlockRange Blocks() const { |
| 139 | return BlockIterRange(blocks_); |
| 140 | } |
| 141 | |
| 142 | // Blocks that have predecessors outside of the cohort. These blocks will |
| 143 | // need to have PHIs/control-flow added to create the escaping value. |
| 144 | BitVecBlockRange EntryBlocks() const { |
| 145 | return BlockIterRange(entry_blocks_); |
| 146 | } |
| 147 | |
| Alex Light | 3a73ffb | 2021-01-25 14:11:05 +0000 | [diff] [blame] | 148 | FilteredBitVecBlockRange EntryBlocksReversePostOrder() const { |
| 149 | return Filter(MakeIterationRange(graph_->GetReversePostOrder()), |
| 150 | BlockIdFilterThunk(entry_blocks_)); |
| 151 | } |
| 152 | |
| 153 | bool IsEntryBlock(const HBasicBlock* blk) const { |
| 154 | return entry_blocks_.IsBitSet(blk->GetBlockId()); |
| 155 | } |
| 156 | |
| Alex Light | 86fe9b8 | 2020-11-16 16:54:01 +0000 | [diff] [blame] | 157 | // Blocks that have successors outside of the cohort. The successors of |
| 158 | // these blocks will need to have PHI's to restore state. |
| 159 | BitVecBlockRange ExitBlocks() const { |
| 160 | return BlockIterRange(exit_blocks_); |
| 161 | } |
| 162 | |
| 163 | bool operator==(const ExcludedCohort& other) const { |
| 164 | return blocks_.Equal(&other.blocks_); |
| 165 | } |
| 166 | |
| 167 | bool ContainsBlock(const HBasicBlock* blk) const { |
| 168 | return blocks_.IsBitSet(blk->GetBlockId()); |
| 169 | } |
| 170 | |
| 171 | // Returns true if there is a path from 'blk' to any block in this cohort. |
| 172 | // NB blocks contained within the cohort are not considered to be succeeded |
| 173 | // by the cohort (i.e. this function will return false). |
| 174 | bool SucceedsBlock(const HBasicBlock* blk) const { |
| 175 | if (ContainsBlock(blk)) { |
| 176 | return false; |
| 177 | } |
| 178 | auto idxs = entry_blocks_.Indexes(); |
| 179 | return std::any_of(idxs.begin(), idxs.end(), [&](uint32_t entry) -> bool { |
| 180 | return blk->GetGraph()->PathBetween(blk->GetBlockId(), entry); |
| 181 | }); |
| 182 | } |
| 183 | |
| 184 | // Returns true if there is a path from any block in this cohort to 'blk'. |
| 185 | // NB blocks contained within the cohort are not considered to be preceded |
| 186 | // by the cohort (i.e. this function will return false). |
| 187 | bool PrecedesBlock(const HBasicBlock* blk) const { |
| 188 | if (ContainsBlock(blk)) { |
| 189 | return false; |
| 190 | } |
| 191 | auto idxs = exit_blocks_.Indexes(); |
| 192 | return std::any_of(idxs.begin(), idxs.end(), [&](uint32_t exit) -> bool { |
| 193 | return blk->GetGraph()->PathBetween(exit, blk->GetBlockId()); |
| 194 | }); |
| 195 | } |
| 196 | |
| 197 | void Dump(std::ostream& os) const; |
| 198 | |
| 199 | private: |
| 200 | BitVecBlockRange BlockIterRange(const ArenaBitVector& bv) const { |
| 201 | auto indexes = bv.Indexes(); |
| 202 | BitVecBlockRange res = MakeTransformRange(indexes, BlockIdToBlockTransformer(graph_)); |
| 203 | return res; |
| 204 | } |
| 205 | |
| 206 | ExcludedCohort() = delete; |
| 207 | |
| 208 | HGraph* graph_; |
| 209 | ArenaBitVector entry_blocks_; |
| 210 | ArenaBitVector exit_blocks_; |
| 211 | ArenaBitVector blocks_; |
| 212 | |
| 213 | friend class ExecutionSubgraph; |
| 214 | friend class LoadStoreAnalysisTest; |
| 215 | }; |
| 216 | |
| 217 | // The number of successors we can track on a single block. Graphs which |
| 218 | // contain a block with a branching factor greater than this will not be |
| 219 | // analysed. This is used to both limit the memory usage of analysis to |
| 220 | // reasonable levels and ensure that the analysis will complete in a |
| 221 | // reasonable amount of time. It also simplifies the implementation somewhat |
| 222 | // to have a constant branching factor. |
| 223 | static constexpr uint32_t kMaxFilterableSuccessors = 8; |
| 224 | |
| Vladimir Marko | 5c82493 | 2021-06-02 15:54:17 +0100 | [diff] [blame] | 225 | // Instantiate a subgraph. The subgraph can be instantiated only if partial-escape |
| 226 | // analysis is desired (eg not when being used for instruction scheduling) and |
| 227 | // when the branching factor in the graph is not too high. These conditions |
| 228 | // are determined once and passed down for performance reasons. |
| 229 | ExecutionSubgraph(HGraph* graph, ScopedArenaAllocator* allocator); |
| Alex Light | 86fe9b8 | 2020-11-16 16:54:01 +0000 | [diff] [blame] | 230 | |
| 231 | void Invalidate() { |
| 232 | valid_ = false; |
| 233 | } |
| 234 | |
| 235 | // A block is contained by the ExecutionSubgraph if it is reachable. This |
| 236 | // means it has not been removed explicitly or via pruning/concavity removal. |
| 237 | // Finalization is needed to call this function. |
| 238 | // See RemoveConcavity and Prune for more information. |
| 239 | bool ContainsBlock(const HBasicBlock* blk) const { |
| 240 | DCHECK(!finalized_ || !needs_prune_) << "finalized: " << finalized_; |
| 241 | if (!valid_) { |
| 242 | return false; |
| 243 | } |
| 244 | return !unreachable_blocks_.IsBitSet(blk->GetBlockId()); |
| 245 | } |
| 246 | |
| 247 | // Mark the block as removed from the subgraph. |
| 248 | void RemoveBlock(const HBasicBlock* to_remove); |
| 249 | |
| 250 | // Called when no more updates will be done to the subgraph. Calculate the |
| 251 | // final subgraph |
| 252 | void Finalize() { |
| 253 | Prune(); |
| 254 | RemoveConcavity(); |
| 255 | finalized_ = true; |
| 256 | } |
| 257 | |
| 258 | BitVecBlockRange UnreachableBlocks() const { |
| 259 | auto idxs = unreachable_blocks_.Indexes(); |
| 260 | return MakeTransformRange(idxs, BlockIdToBlockTransformer(graph_)); |
| 261 | } |
| 262 | |
| 263 | // Returns true if all allowed execution paths from start eventually reach the |
| 264 | // graph's exit block (or diverge). |
| 265 | bool IsValid() const { |
| 266 | return valid_; |
| 267 | } |
| 268 | |
| 269 | ArrayRef<const ExcludedCohort> GetExcludedCohorts() const { |
| 270 | DCHECK(!valid_ || !needs_prune_); |
| 271 | if (!valid_ || !unreachable_blocks_.IsAnyBitSet()) { |
| 272 | return ArrayRef<const ExcludedCohort>(); |
| 273 | } else { |
| 274 | return ArrayRef<const ExcludedCohort>(*excluded_list_); |
| 275 | } |
| 276 | } |
| 277 | |
| 278 | // Helper class to create reachable blocks iterator. |
| 279 | class ContainsFunctor { |
| 280 | public: |
| 281 | bool operator()(HBasicBlock* blk) const { |
| 282 | return subgraph_->ContainsBlock(blk); |
| 283 | } |
| 284 | |
| 285 | private: |
| 286 | explicit ContainsFunctor(const ExecutionSubgraph* subgraph) : subgraph_(subgraph) {} |
| 287 | const ExecutionSubgraph* const subgraph_; |
| 288 | friend class ExecutionSubgraph; |
| 289 | }; |
| 290 | // Returns an iterator over reachable blocks (filtered as we go). This is primarilly for testing. |
| 291 | IterationRange< |
| 292 | FilterIterator<typename ArenaVector<HBasicBlock*>::const_iterator, ContainsFunctor>> |
| 293 | ReachableBlocks() const { |
| 294 | return Filter(MakeIterationRange(graph_->GetBlocks()), ContainsFunctor(this)); |
| 295 | } |
| 296 | |
| 297 | static bool CanAnalyse(HGraph* graph) { |
| 298 | // If there are any blocks with more than kMaxFilterableSuccessors we can't |
| 299 | // analyse the graph. We avoid this case to prevent excessive memory and |
| 300 | // time usage while allowing a simpler algorithm with a fixed-width |
| 301 | // branching factor. |
| 302 | return std::all_of(graph->GetBlocks().begin(), graph->GetBlocks().end(), [](HBasicBlock* blk) { |
| 303 | return blk == nullptr || blk->GetSuccessors().size() <= kMaxFilterableSuccessors; |
| 304 | }); |
| 305 | } |
| 306 | |
| 307 | private: |
| 308 | std::bitset<kMaxFilterableSuccessors> GetAllowedSuccessors(const HBasicBlock* blk) const { |
| 309 | DCHECK(valid_); |
| 310 | return allowed_successors_[blk->GetBlockId()]; |
| 311 | } |
| 312 | |
| 313 | void LimitBlockSuccessors(const HBasicBlock* block, |
| 314 | std::bitset<kMaxFilterableSuccessors> allowed) { |
| 315 | needs_prune_ = true; |
| 316 | allowed_successors_[block->GetBlockId()] &= allowed; |
| 317 | } |
| 318 | |
| 319 | // Remove nodes which both precede and follow any exclusions. This ensures we don't need to deal |
| 320 | // with only conditionally materializing objects depending on if we already materialized them |
| 321 | // Ensure that for all blocks A, B, C: Unreachable(A) && Unreachable(C) && PathBetween(A, B) && |
| 322 | // PathBetween(A, C) implies Unreachable(B). This simplifies later transforms since it ensures |
| 323 | // that no execution can leave and then re-enter any exclusion. |
| 324 | void RemoveConcavity(); |
| 325 | |
| 326 | // Removes sink nodes. Sink nodes are nodes where there is no execution which |
| 327 | // avoids all removed nodes. |
| 328 | void Prune(); |
| 329 | |
| 330 | void RecalculateExcludedCohort(); |
| 331 | |
| 332 | HGraph* graph_; |
| 333 | ScopedArenaAllocator* allocator_; |
| 334 | // The map from block_id -> allowed-successors. |
| 335 | // This is the canonical representation of this subgraph. If a bit in the |
| 336 | // bitset is not set then the corresponding outgoing edge of that block is not |
| 337 | // considered traversable. |
| 338 | ScopedArenaVector<std::bitset<kMaxFilterableSuccessors>> allowed_successors_; |
| 339 | // Helper that holds which blocks we are able to reach. Only valid if |
| 340 | // 'needs_prune_ == false'. |
| 341 | ArenaBitVector unreachable_blocks_; |
| 342 | // A list of the excluded-cohorts of this subgraph. This is only valid when |
| 343 | // 'needs_prune_ == false' |
| 344 | std::optional<ScopedArenaVector<ExcludedCohort>> excluded_list_; |
| 345 | // Bool to hold if there is at least one known path from the start block to |
| 346 | // the end in this graph. Used to short-circuit computation. |
| 347 | bool valid_; |
| 348 | // True if the subgraph is consistent and can be queried. Modifying the |
| 349 | // subgraph clears this and requires a prune to restore. |
| 350 | bool needs_prune_; |
| 351 | // True if no more modification of the subgraph is permitted. |
| 352 | bool finalized_; |
| 353 | |
| 354 | friend class ExecutionSubgraphTest; |
| 355 | friend class LoadStoreAnalysisTest; |
| 356 | |
| 357 | DISALLOW_COPY_AND_ASSIGN(ExecutionSubgraph); |
| 358 | }; |
| 359 | |
| 360 | std::ostream& operator<<(std::ostream& os, const ExecutionSubgraph::ExcludedCohort& ex); |
| 361 | |
| 362 | } // namespace art |
| 363 | |
| 364 | #endif // ART_COMPILER_OPTIMIZING_EXECUTION_SUBGRAPH_H_ |