Parallel image decompression

Add a runtime thread pool to facilitate parallel app image loading.

Use the thread pool to decompress the image, this results in a ~1%
app startup speedup.

Test: test-art-host
Test: manual
Bug: 116052292

Change-Id: If35f71ff632ac58e67d11eed4b5f5b19656cc301
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index ab79b9e..0bf5967 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -34,6 +34,7 @@
 #include <cstdio>
 #include <cstdlib>
 #include <limits>
+#include <thread>
 #include <vector>
 
 #include "android-base/strings.h"
@@ -388,6 +389,11 @@
     jit_->DeleteThreadPool();
   }
 
+  // Thread pools must be deleted before the runtime shuts down to avoid hanging.
+  if (thread_pool_ != nullptr) {
+    thread_pool_.reset();
+  }
+
   // Make sure our internal threads are dead before we start tearing down things they're using.
   GetRuntimeCallbacks()->StopDebugger();
   delete signal_catcher_;
@@ -910,6 +916,14 @@
     jit_->CreateThreadPool();
   }
 
+  if (thread_pool_ == nullptr) {
+    constexpr size_t kMaxRuntimeThreads = 4u;
+    thread_pool_.reset(
+        new ThreadPool("Runtime", std::min(
+            static_cast<size_t>(std::thread::hardware_concurrency()), kMaxRuntimeThreads)));
+    thread_pool_->StartWorkers(Thread::Current());
+  }
+
   // Create the thread pools.
   heap_->CreateThreadPool();
   // Reset the gc performance data at zygote fork so that the GCs