Merge "ART: Support for SourceDebugExtension"
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index ed34a8d..11af1c0 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -125,6 +125,7 @@
 ART_GTEST_transaction_test_DEX_DEPS := Transaction
 ART_GTEST_type_lookup_table_test_DEX_DEPS := Lookup
 ART_GTEST_unstarted_runtime_test_DEX_DEPS := Nested
+ART_GTEST_heap_verification_test_DEX_DEPS := ProtoCompare ProtoCompare2 StaticsFromCode XandY
 ART_GTEST_verifier_deps_test_DEX_DEPS := VerifierDeps VerifierDepsMulti MultiDex
 ART_GTEST_dex_to_dex_decompiler_test_DEX_DEPS := VerifierDeps DexToDexDecompiler
 
@@ -655,6 +656,7 @@
 ART_GTEST_stub_test_DEX_DEPS :=
 ART_GTEST_transaction_test_DEX_DEPS :=
 ART_GTEST_dex2oat_environment_tests_DEX_DEPS :=
+ART_GTEST_heap_verification_test_DEX_DEPS :=
 ART_GTEST_verifier_deps_test_DEX_DEPS :=
 ART_VALGRIND_DEPENDENCIES :=
 ART_VALGRIND_TARGET_DEPENDENCIES :=
diff --git a/compiler/Android.bp b/compiler/Android.bp
index 312fc7b..dec8b57 100644
--- a/compiler/Android.bp
+++ b/compiler/Android.bp
@@ -29,6 +29,7 @@
         "debug/elf_debug_writer.cc",
         "dex/dex_to_dex_compiler.cc",
         "dex/dex_to_dex_decompiler.cc",
+        "dex/inline_method_analyser.cc",
         "dex/verified_method.cc",
         "dex/verification_results.cc",
         "dex/quick_compiler_callbacks.cc",
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index 8b30292..39edd1e 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -18,7 +18,7 @@
 
 #include "arch/instruction_set_features.h"
 #include "art_field-inl.h"
-#include "art_method.h"
+#include "art_method-inl.h"
 #include "base/enums.h"
 #include "class_linker.h"
 #include "compiled_method.h"
diff --git a/compiler/debug/elf_debug_info_writer.h b/compiler/debug/elf_debug_info_writer.h
index 30d4b47..558c7d5 100644
--- a/compiler/debug/elf_debug_info_writer.h
+++ b/compiler/debug/elf_debug_info_writer.h
@@ -21,6 +21,7 @@
 #include <unordered_set>
 #include <vector>
 
+#include "art_field-inl.h"
 #include "debug/dwarf/debug_abbrev_writer.h"
 #include "debug/dwarf/debug_info_entry_writer.h"
 #include "debug/elf_compilation_unit.h"
diff --git a/compiler/dex/dex_to_dex_compiler.cc b/compiler/dex/dex_to_dex_compiler.cc
index 538fe93..1573062 100644
--- a/compiler/dex/dex_to_dex_compiler.cc
+++ b/compiler/dex/dex_to_dex_compiler.cc
@@ -27,7 +27,6 @@
 #include "dex_instruction-inl.h"
 #include "driver/compiler_driver.h"
 #include "driver/dex_compilation_unit.h"
-#include "mirror/class-inl.h"
 #include "mirror/dex_cache.h"
 #include "thread-inl.h"
 
diff --git a/runtime/quick/inline_method_analyser.cc b/compiler/dex/inline_method_analyser.cc
similarity index 95%
rename from runtime/quick/inline_method_analyser.cc
rename to compiler/dex/inline_method_analyser.cc
index 3347070..e691a67 100644
--- a/runtime/quick/inline_method_analyser.cc
+++ b/compiler/dex/inline_method_analyser.cc
@@ -26,7 +26,6 @@
 #include "dex_instruction_utils.h"
 #include "mirror/class-inl.h"
 #include "mirror/dex_cache-inl.h"
-#include "verifier/method_verifier-inl.h"
 
 /*
  * NOTE: This code is part of the quick compiler. It lives in the runtime
@@ -391,7 +390,6 @@
 #undef STORE_IPUT
 
   result->opcode = kInlineOpConstructor;
-  result->flags = kInlineSpecial;
   result->d.constructor_data.reserved = 0u;
   return true;
 }
@@ -429,25 +427,6 @@
 static_assert(InlineMethodAnalyser::IGetVariant(Instruction::IGET_SHORT) ==
     InlineMethodAnalyser::IPutVariant(Instruction::IPUT_SHORT), "iget/iput_short variant");
 
-// This is used by compiler and debugger. We look into the dex cache for resolved methods and
-// fields. However, in the context of the debugger, not all methods and fields are resolved. Since
-// we need to be able to detect possibly inlined method, we pass a null inline method to indicate
-// we don't want to take unresolved methods and fields into account during analysis.
-bool InlineMethodAnalyser::AnalyseMethodCode(verifier::MethodVerifier* verifier,
-                                             InlineMethod* result) {
-  DCHECK(verifier != nullptr);
-  if (!Runtime::Current()->UseJitCompilation()) {
-    DCHECK_EQ(verifier->CanLoadClasses(), result != nullptr);
-  }
-
-  // Note: verifier->GetMethod() may be null.
-  return AnalyseMethodCode(verifier->CodeItem(),
-                           verifier->GetMethodReference(),
-                           (verifier->GetAccessFlags() & kAccStatic) != 0u,
-                           verifier->GetMethod(),
-                           result);
-}
-
 bool InlineMethodAnalyser::AnalyseMethodCode(ArtMethod* method, InlineMethod* result) {
   const DexFile::CodeItem* code_item = method->GetCodeItem();
   if (code_item == nullptr) {
@@ -473,7 +452,6 @@
     case Instruction::RETURN_VOID:
       if (result != nullptr) {
         result->opcode = kInlineOpNop;
-        result->flags = kInlineSpecial;
         result->d.data = 0u;
       }
       return true;
@@ -549,7 +527,6 @@
 
   if (result != nullptr) {
     result->opcode = kInlineOpReturnArg;
-    result->flags = kInlineSpecial;
     InlineReturnArgData* data = &result->d.return_data;
     data->arg = reg - arg_start;
     data->is_wide = (return_opcode == Instruction::RETURN_WIDE) ? 1u : 0u;
@@ -586,7 +563,6 @@
   }
   if (result != nullptr) {
     result->opcode = kInlineOpNonWideConst;
-    result->flags = kInlineSpecial;
     result->d.data = static_cast<uint64_t>(const_value);
   }
   return true;
@@ -647,7 +623,6 @@
       return false;
     }
     result->opcode = kInlineOpIGet;
-    result->flags = kInlineSpecial;
     data->op_variant = IGetVariant(opcode);
     data->method_is_static = is_static ? 1u : 0u;
     data->object_arg = object_arg;  // Allow IGET on any register, not just "this".
@@ -716,7 +691,6 @@
       return false;
     }
     result->opcode = kInlineOpIPut;
-    result->flags = kInlineSpecial;
     data->op_variant = IPutVariant(opcode);
     data->method_is_static = is_static ? 1u : 0u;
     data->object_arg = object_arg;  // Allow IPUT on any register, not just "this".
diff --git a/runtime/quick/inline_method_analyser.h b/compiler/dex/inline_method_analyser.h
similarity index 61%
rename from runtime/quick/inline_method_analyser.h
rename to compiler/dex/inline_method_analyser.h
index 2df2ced..a35e97f 100644
--- a/runtime/quick/inline_method_analyser.h
+++ b/compiler/dex/inline_method_analyser.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef ART_RUNTIME_QUICK_INLINE_METHOD_ANALYSER_H_
-#define ART_RUNTIME_QUICK_INLINE_METHOD_ANALYSER_H_
+#ifndef ART_COMPILER_DEX_INLINE_METHOD_ANALYSER_H_
+#define ART_COMPILER_DEX_INLINE_METHOD_ANALYSER_H_
 
 #include "base/macros.h"
 #include "base/mutex.h"
@@ -36,128 +36,12 @@
 class ArtMethod;
 
 enum InlineMethodOpcode : uint16_t {
-  kIntrinsicDoubleCvt,
-  kIntrinsicFloatCvt,
-  kIntrinsicFloat2Int,
-  kIntrinsicDouble2Long,
-  kIntrinsicFloatIsInfinite,
-  kIntrinsicDoubleIsInfinite,
-  kIntrinsicFloatIsNaN,
-  kIntrinsicDoubleIsNaN,
-  kIntrinsicReverseBits,
-  kIntrinsicReverseBytes,
-  kIntrinsicBitCount,
-  kIntrinsicCompare,
-  kIntrinsicHighestOneBit,
-  kIntrinsicLowestOneBit,
-  kIntrinsicNumberOfLeadingZeros,
-  kIntrinsicNumberOfTrailingZeros,
-  kIntrinsicRotateRight,
-  kIntrinsicRotateLeft,
-  kIntrinsicSignum,
-  kIntrinsicAbsInt,
-  kIntrinsicAbsLong,
-  kIntrinsicAbsFloat,
-  kIntrinsicAbsDouble,
-  kIntrinsicMinMaxInt,
-  kIntrinsicMinMaxLong,
-  kIntrinsicMinMaxFloat,
-  kIntrinsicMinMaxDouble,
-  kIntrinsicCos,
-  kIntrinsicSin,
-  kIntrinsicAcos,
-  kIntrinsicAsin,
-  kIntrinsicAtan,
-  kIntrinsicAtan2,
-  kIntrinsicCbrt,
-  kIntrinsicCosh,
-  kIntrinsicExp,
-  kIntrinsicExpm1,
-  kIntrinsicHypot,
-  kIntrinsicLog,
-  kIntrinsicLog10,
-  kIntrinsicNextAfter,
-  kIntrinsicSinh,
-  kIntrinsicTan,
-  kIntrinsicTanh,
-  kIntrinsicSqrt,
-  kIntrinsicCeil,
-  kIntrinsicFloor,
-  kIntrinsicRint,
-  kIntrinsicRoundFloat,
-  kIntrinsicRoundDouble,
-  kIntrinsicReferenceGetReferent,
-  kIntrinsicCharAt,
-  kIntrinsicCompareTo,
-  kIntrinsicEquals,
-  kIntrinsicGetCharsNoCheck,
-  kIntrinsicIsEmptyOrLength,
-  kIntrinsicIndexOf,
-  kIntrinsicNewStringFromBytes,
-  kIntrinsicNewStringFromChars,
-  kIntrinsicNewStringFromString,
-  kIntrinsicCurrentThread,
-  kIntrinsicPeek,
-  kIntrinsicPoke,
-  kIntrinsicCas,
-  kIntrinsicUnsafeGet,
-  kIntrinsicUnsafePut,
-
-  // 1.8.
-  kIntrinsicUnsafeGetAndAddInt,
-  kIntrinsicUnsafeGetAndAddLong,
-  kIntrinsicUnsafeGetAndSetInt,
-  kIntrinsicUnsafeGetAndSetLong,
-  kIntrinsicUnsafeGetAndSetObject,
-  kIntrinsicUnsafeLoadFence,
-  kIntrinsicUnsafeStoreFence,
-  kIntrinsicUnsafeFullFence,
-
-  kIntrinsicSystemArrayCopyCharArray,
-  kIntrinsicSystemArrayCopy,
-
   kInlineOpNop,
   kInlineOpReturnArg,
   kInlineOpNonWideConst,
   kInlineOpIGet,
   kInlineOpIPut,
   kInlineOpConstructor,
-  kInlineStringInit,
-};
-std::ostream& operator<<(std::ostream& os, const InlineMethodOpcode& rhs);
-
-enum InlineMethodFlags : uint16_t {
-  kNoInlineMethodFlags = 0x0000,
-  kInlineIntrinsic     = 0x0001,
-  kInlineSpecial       = 0x0002,
-};
-
-// IntrinsicFlags are stored in InlineMethod::d::raw_data
-enum IntrinsicFlags {
-  kIntrinsicFlagNone = 0,
-
-  // kIntrinsicMinMaxInt
-  kIntrinsicFlagMax = kIntrinsicFlagNone,
-  kIntrinsicFlagMin = 1,
-
-  // kIntrinsicIsEmptyOrLength
-  kIntrinsicFlagLength  = kIntrinsicFlagNone,
-  kIntrinsicFlagIsEmpty = kIntrinsicFlagMin,
-
-  // kIntrinsicIndexOf
-  kIntrinsicFlagBase0 = kIntrinsicFlagMin,
-
-  // kIntrinsicUnsafeGet, kIntrinsicUnsafePut, kIntrinsicUnsafeCas
-  kIntrinsicFlagIsLong     = kIntrinsicFlagMin,
-  // kIntrinsicUnsafeGet, kIntrinsicUnsafePut
-  kIntrinsicFlagIsVolatile = 2,
-  // kIntrinsicUnsafePut, kIntrinsicUnsafeCas
-  kIntrinsicFlagIsObject   = 4,
-  // kIntrinsicUnsafePut
-  kIntrinsicFlagIsOrdered  = 8,
-
-  // kIntrinsicDoubleCvt, kIntrinsicFloatCvt.
-  kIntrinsicFlagToFloatingPoint = kIntrinsicFlagMin,
 };
 
 struct InlineIGetIPutData {
@@ -198,7 +82,6 @@
 
 struct InlineMethod {
   InlineMethodOpcode opcode;
-  InlineMethodFlags flags;
   union {
     uint64_t data;
     InlineIGetIPutData ifield_data;
@@ -213,12 +96,8 @@
    * Analyse method code to determine if the method is a candidate for inlining.
    * If it is, record the inlining data.
    *
-   * @param verifier the method verifier holding data about the method to analyse.
-   * @param method placeholder for the inline method data.
    * @return true if the method is a candidate for inlining, false otherwise.
    */
-  static bool AnalyseMethodCode(verifier::MethodVerifier* verifier, InlineMethod* result)
-      REQUIRES_SHARED(Locks::mutator_lock_);
   static bool AnalyseMethodCode(ArtMethod* method, InlineMethod* result)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -274,4 +153,4 @@
 
 }  // namespace art
 
-#endif  // ART_RUNTIME_QUICK_INLINE_METHOD_ANALYSER_H_
+#endif  // ART_COMPILER_DEX_INLINE_METHOD_ANALYSER_H_
diff --git a/compiler/dex/verified_method.cc b/compiler/dex/verified_method.cc
index cbca333..608a18a 100644
--- a/compiler/dex/verified_method.cc
+++ b/compiler/dex/verified_method.cc
@@ -18,22 +18,15 @@
 
 #include <algorithm>
 #include <memory>
-#include <vector>
 
-#include "art_method-inl.h"
-#include "base/enums.h"
 #include "base/logging.h"
-#include "base/stl_util.h"
 #include "dex_file.h"
 #include "dex_instruction-inl.h"
-#include "dex_instruction_utils.h"
-#include "mirror/class-inl.h"
-#include "mirror/dex_cache-inl.h"
-#include "mirror/object-inl.h"
-#include "utils.h"
+#include "runtime.h"
 #include "verifier/method_verifier-inl.h"
 #include "verifier/reg_type-inl.h"
 #include "verifier/register_line-inl.h"
+#include "verifier/verifier_deps.h"
 
 namespace art {
 
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index e823f67..805c5da 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -61,6 +61,7 @@
 #include "mirror/class-inl.h"
 #include "mirror/dex_cache-inl.h"
 #include "mirror/object-inl.h"
+#include "mirror/object-refvisitor-inl.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/throwable.h"
 #include "scoped_thread_state_change-inl.h"
@@ -2181,6 +2182,10 @@
       CHECK(klass->ShouldVerifyAtRuntime() || klass->IsVerified() || klass->IsErroneous())
           << klass->PrettyDescriptor() << ": state=" << klass->GetStatus();
 
+      // Class has a meaningful status for the compiler now, record it.
+      ClassReference ref(manager_->GetDexFile(), class_def_index);
+      manager_->GetCompiler()->RecordClassStatus(ref, klass->GetStatus());
+
       // It is *very* problematic if there are verification errors in the boot classpath. For example,
       // we rely on things working OK without verification when the decryption dialog is brought up.
       // So abort in a debug build if we find this violated.
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index fa1b3a3..42ff1e7 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -23,6 +23,7 @@
 #include "art_method-inl.h"
 #include "class_linker-inl.h"
 #include "common_compiler_test.h"
+#include "compiled_class.h"
 #include "dex_file.h"
 #include "dex_file_types.h"
 #include "gc/heap.h"
@@ -319,6 +320,47 @@
   CheckCompiledMethods(class_loader, "LSecond;", s);
 }
 
+// Test that a verify only compiler filter updates the CompiledClass map,
+// which will be used for OatClass.
+class CompilerDriverVerifyTest : public CompilerDriverTest {
+ protected:
+  CompilerFilter::Filter GetCompilerFilter() const OVERRIDE {
+    return CompilerFilter::kVerifyProfile;
+  }
+
+  void CheckVerifiedClass(jobject class_loader, const std::string& clazz) const {
+    ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+    Thread* self = Thread::Current();
+    ScopedObjectAccess soa(self);
+    StackHandleScope<1> hs(self);
+    Handle<mirror::ClassLoader> h_loader(
+        hs.NewHandle(soa.Decode<mirror::ClassLoader>(class_loader)));
+    mirror::Class* klass = class_linker->FindClass(self, clazz.c_str(), h_loader);
+    ASSERT_NE(klass, nullptr);
+    EXPECT_TRUE(klass->IsVerified());
+
+    CompiledClass* compiled_class = compiler_driver_->GetCompiledClass(
+        ClassReference(&klass->GetDexFile(), klass->GetDexTypeIndex().index_));
+    ASSERT_NE(compiled_class, nullptr);
+    EXPECT_EQ(compiled_class->GetStatus(), mirror::Class::kStatusVerified);
+  }
+};
+
+TEST_F(CompilerDriverVerifyTest, VerifyCompilation) {
+  Thread* self = Thread::Current();
+  jobject class_loader;
+  {
+    ScopedObjectAccess soa(self);
+    class_loader = LoadDex("ProfileTestMultiDex");
+  }
+  ASSERT_NE(class_loader, nullptr);
+
+  CompileAll(class_loader);
+
+  CheckVerifiedClass(class_loader, "LMain;");
+  CheckVerifiedClass(class_loader, "LSecond;");
+}
+
 // TODO: need check-cast test (when stub complete & we can throw/catch
 
 }  // namespace art
diff --git a/compiler/elf_writer.cc b/compiler/elf_writer.cc
index 0c06090..37e4f11 100644
--- a/compiler/elf_writer.cc
+++ b/compiler/elf_writer.cc
@@ -16,17 +16,8 @@
 
 #include "elf_writer.h"
 
-#include "art_method-inl.h"
 #include "base/unix_file/fd_file.h"
-#include "class_linker.h"
-#include "dex_file-inl.h"
-#include "dex_method_iterator.h"
-#include "driver/compiler_driver.h"
 #include "elf_file.h"
-#include "invoke_type.h"
-#include "mirror/object-inl.h"
-#include "oat.h"
-#include "scoped_thread_state_change-inl.h"
 
 namespace art {
 
diff --git a/compiler/exception_test.cc b/compiler/exception_test.cc
index c975944..dc880b0 100644
--- a/compiler/exception_test.cc
+++ b/compiler/exception_test.cc
@@ -30,7 +30,7 @@
 #include "mirror/stack_trace_element.h"
 #include "oat_quick_method_header.h"
 #include "optimizing/stack_map_stream.h"
-#include "runtime.h"
+#include "runtime-inl.h"
 #include "scoped_thread_state_change-inl.h"
 #include "handle_scope-inl.h"
 #include "thread.h"
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index 897d819..7e53d8d 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -22,6 +22,7 @@
 
 #include "android-base/stringprintf.h"
 
+#include "art_method-inl.h"
 #include "base/unix_file/fd_file.h"
 #include "class_linker-inl.h"
 #include "compiler_callbacks.h"
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index d129249..952a7c6 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -59,6 +59,7 @@
 #include "mirror/executable.h"
 #include "mirror/method.h"
 #include "mirror/object-inl.h"
+#include "mirror/object-refvisitor-inl.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/string-inl.h"
 #include "oat.h"
diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc
index 21042a3..b34d938 100644
--- a/compiler/jni/jni_compiler_test.cc
+++ b/compiler/jni/jni_compiler_test.cc
@@ -27,6 +27,7 @@
 #include "dex_file.h"
 #include "gtest/gtest.h"
 #include "indirect_reference_table.h"
+#include "java_vm_ext.h"
 #include "jni_internal.h"
 #include "mem_map.h"
 #include "mirror/class-inl.h"
diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc
index 53797d2..551c73b 100644
--- a/compiler/linker/arm64/relative_patcher_arm64.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64.cc
@@ -383,9 +383,14 @@
   static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
   static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
   __ Tbnz(ip0.W(), LockWord::kReadBarrierStateShift, slow_path);
-  static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == -4, "Check field LDR offset");
-  static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == -4, "Check array LDR offset");
-  __ Sub(lr, lr, 4);  // Adjust the return address one instruction back to the LDR.
+  static_assert(
+      BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET,
+      "Field and array LDR offsets must be the same to reuse the same code.");
+  // Adjust the return address back to the LDR (1 instruction; 2 for heap poisoning).
+  static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
+                "Field LDR must be 1 instruction (4B) before the return address label; "
+                " 2 instructions (8B) for heap poisoning.");
+  __ Add(lr, lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET);
   // Introduce a dependency on the lock_word including rb_state,
   // to prevent load-load reordering, and without using
   // a memory barrier (which would be more expensive).
@@ -431,8 +436,9 @@
       __ Bind(&slow_path);
       MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET);
       __ Ldr(ip0.W(), ldr_address);         // Load the LDR (immediate) unsigned offset.
-      __ Ubfx(ip0, ip0, 10, 12);            // Extract the offset.
+      __ Ubfx(ip0.W(), ip0.W(), 10, 12);    // Extract the offset.
       __ Ldr(ip0.W(), MemOperand(base_reg, ip0, LSL, 2));   // Load the reference.
+      // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference.
       __ Br(ip1);                           // Jump to the entrypoint.
       if (holder_reg.Is(base_reg)) {
         // Add null check slow path. The stack map is at the address pointed to by LR.
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index b7c8075..5136d7d 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -42,18 +42,23 @@
 #endif
 
 #include "bytecode_utils.h"
+#include "class_linker.h"
 #include "compiled_method.h"
 #include "dex/verified_method.h"
 #include "driver/compiler_driver.h"
 #include "graph_visualizer.h"
+#include "intern_table.h"
 #include "intrinsics.h"
 #include "leb128.h"
 #include "mirror/array-inl.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/object_reference.h"
+#include "mirror/reference.h"
 #include "mirror/string.h"
 #include "parallel_move_resolver.h"
 #include "ssa_liveness_analysis.h"
+#include "scoped_thread_state_change-inl.h"
+#include "thread-inl.h"
 #include "utils/assembler.h"
 
 namespace art {
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 4955562..4629c54 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -90,9 +90,8 @@
 constexpr uint32_t kReferenceLoadMinFarOffset = 16 * KB;
 
 // Flags controlling the use of link-time generated thunks for Baker read barriers.
-// Not yet implemented for heap poisoning.
-constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = !kPoisonHeapReferences;
-constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = !kPoisonHeapReferences;
+constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true;
+constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true;
 
 // Some instructions have special requirements for a temporary, for example
 // LoadClass/kBssEntry and LoadString/kBssEntry for Baker read barrier require
@@ -3053,6 +3052,11 @@
 
       if (!index.IsConstant()) {
         __ Add(temp, array, offset);
+      } else {
+        // We no longer need the `temp` here so release it as the store below may
+        // need a scratch register (if the constant index makes the offset too large)
+        // and the poisoned `source` could be using the other scratch register.
+        temps.Release(temp);
       }
       {
         // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
@@ -6093,17 +6097,21 @@
     const int32_t entry_point_offset =
         CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode());
     __ Ldr(ip1, MemOperand(tr, entry_point_offset));
-    EmissionCheckScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize);
+    EmissionCheckScope guard(GetVIXLAssembler(),
+                             (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
     vixl::aarch64::Label return_address;
     __ adr(lr, &return_address);
     __ Bind(cbnz_label);
     __ cbnz(ip1, static_cast<int64_t>(0));  // Placeholder, patched at link-time.
-    static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == -4,
-                  "Field LDR must be 1 instruction (4B) before the return address label.");
-    __ ldr(RegisterFrom(ref, Primitive::kPrimNot), MemOperand(base.X(), offset));
+    static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
+                  "Field LDR must be 1 instruction (4B) before the return address label; "
+                  " 2 instructions (8B) for heap poisoning.");
+    Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot);
+    __ ldr(ref_reg, MemOperand(base.X(), offset));
     if (needs_null_check) {
       MaybeRecordImplicitNullCheck(instruction);
     }
+    GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
     __ Bind(&return_address);
     return;
   }
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 58feea2..332ab49 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -321,7 +321,9 @@
   vixl::aarch64::MemOperand CreateVecMemRegisters(
       HVecMemoryOperation* instruction,
       Location* reg_loc,
-      bool is_load);
+      bool is_load,
+      // This function may acquire a scratch register.
+      vixl::aarch64::UseScratchRegisterScope* temps_scope);
 
   Arm64Assembler* const assembler_;
   CodeGeneratorARM64* const codegen_;
diff --git a/compiler/optimizing/code_generator_vector_arm.cc b/compiler/optimizing/code_generator_vector_arm.cc
index e7f7b30..f8552dc 100644
--- a/compiler/optimizing/code_generator_vector_arm.cc
+++ b/compiler/optimizing/code_generator_vector_arm.cc
@@ -124,6 +124,14 @@
   LOG(FATAL) << "No SIMD for " << instruction->GetId();
 }
 
+void LocationsBuilderARM::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
 void LocationsBuilderARM::VisitVecSub(HVecSub* instruction) {
   CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
 }
@@ -148,6 +156,22 @@
   LOG(FATAL) << "No SIMD for " << instruction->GetId();
 }
 
+void LocationsBuilderARM::VisitVecMin(HVecMin* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecMin(HVecMin* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecMax(HVecMax* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecMax(HVecMax* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
 void LocationsBuilderARM::VisitVecAnd(HVecAnd* instruction) {
   CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
 }
@@ -221,6 +245,14 @@
   LOG(FATAL) << "No SIMD for " << instruction->GetId();
 }
 
+void LocationsBuilderARM::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
+  LOG(FATAL) << "No SIMD for " << instr->GetId();
+}
+
+void InstructionCodeGeneratorARM::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
+  LOG(FATAL) << "No SIMD for " << instr->GetId();
+}
+
 void LocationsBuilderARM::VisitVecLoad(HVecLoad* instruction) {
   LOG(FATAL) << "No SIMD for " << instruction->GetId();
 }
diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc
index 0923920..93befa4 100644
--- a/compiler/optimizing/code_generator_vector_arm64.cc
+++ b/compiler/optimizing/code_generator_vector_arm64.cc
@@ -27,6 +27,7 @@
 using helpers::InputRegisterAt;
 using helpers::Int64ConstantFrom;
 using helpers::XRegisterFrom;
+using helpers::WRegisterFrom;
 
 #define __ GetVIXLAssembler()->
 
@@ -318,6 +319,47 @@
   }
 }
 
+void LocationsBuilderARM64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  VRegister lhs = VRegisterFrom(locations->InAt(0));
+  VRegister rhs = VRegisterFrom(locations->InAt(1));
+  VRegister dst = VRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        instruction->IsRounded()
+            ? __ Urhadd(dst.V16B(), lhs.V16B(), rhs.V16B())
+            : __ Uhadd(dst.V16B(), lhs.V16B(), rhs.V16B());
+      } else {
+        instruction->IsRounded()
+            ? __ Srhadd(dst.V16B(), lhs.V16B(), rhs.V16B())
+            : __ Shadd(dst.V16B(), lhs.V16B(), rhs.V16B());
+      }
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        instruction->IsRounded()
+            ? __ Urhadd(dst.V8H(), lhs.V8H(), rhs.V8H())
+            : __ Uhadd(dst.V8H(), lhs.V8H(), rhs.V8H());
+      } else {
+        instruction->IsRounded()
+            ? __ Srhadd(dst.V8H(), lhs.V8H(), rhs.V8H())
+            : __ Shadd(dst.V8H(), lhs.V8H(), rhs.V8H());
+      }
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
 void LocationsBuilderARM64::VisitVecSub(HVecSub* instruction) {
   CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
 }
@@ -420,6 +462,22 @@
   }
 }
 
+void LocationsBuilderARM64::VisitVecMin(HVecMin* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecMin(HVecMin* instruction) {
+  LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
+}
+
+void LocationsBuilderARM64::VisitVecMax(HVecMax* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecMax(HVecMax* instruction) {
+  LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
+}
+
 void LocationsBuilderARM64::VisitVecAnd(HVecAnd* instruction) {
   CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
 }
@@ -624,6 +682,67 @@
   }
 }
 
+void LocationsBuilderARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
+  switch (instr->GetPackedType()) {
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+      locations->SetInAt(
+          HVecMultiplyAccumulate::kInputAccumulatorIndex, Location::RequiresFpuRegister());
+      locations->SetInAt(
+          HVecMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresFpuRegister());
+      locations->SetInAt(
+          HVecMultiplyAccumulate::kInputMulRightIndex, Location::RequiresFpuRegister());
+      DCHECK_EQ(HVecMultiplyAccumulate::kInputAccumulatorIndex, 0);
+      locations->SetOut(Location::SameAsFirstInput());
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+// Some early revisions of the Cortex-A53 have an erratum (835769) whereby it is possible for a
+// 64-bit scalar multiply-accumulate instruction in AArch64 state to generate an incorrect result.
+// However vector MultiplyAccumulate instruction is not affected.
+void InstructionCodeGeneratorARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
+  LocationSummary* locations = instr->GetLocations();
+  VRegister acc = VRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputAccumulatorIndex));
+  VRegister left = VRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulLeftIndex));
+  VRegister right = VRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulRightIndex));
+  switch (instr->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instr->GetVectorLength());
+      if (instr->GetOpKind() == HInstruction::kAdd) {
+        __ Mla(acc.V16B(), left.V16B(), right.V16B());
+      } else {
+        __ Mls(acc.V16B(), left.V16B(), right.V16B());
+      }
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instr->GetVectorLength());
+      if (instr->GetOpKind() == HInstruction::kAdd) {
+        __ Mla(acc.V8H(), left.V8H(), right.V8H());
+      } else {
+        __ Mls(acc.V8H(), left.V8H(), right.V8H());
+      }
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instr->GetVectorLength());
+      if (instr->GetOpKind() == HInstruction::kAdd) {
+        __ Mla(acc.V4S(), left.V4S(), right.V4S());
+      } else {
+        __ Mls(acc.V4S(), left.V4S(), right.V4S());
+      }
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+  }
+}
+
 // Helper to set up locations for vector memory operations.
 static void CreateVecMemLocations(ArenaAllocator* arena,
                                   HVecMemoryOperation* instruction,
@@ -656,7 +775,8 @@
 MemOperand InstructionCodeGeneratorARM64::CreateVecMemRegisters(
     HVecMemoryOperation* instruction,
     Location* reg_loc,
-    bool is_load) {
+    bool is_load,
+    UseScratchRegisterScope* temps_scope) {
   LocationSummary* locations = instruction->GetLocations();
   Register base = InputRegisterAt(instruction, 0);
   Location index = locations->InAt(1);
@@ -666,20 +786,18 @@
   uint32_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(packed_type)).Uint32Value();
   size_t shift = Primitive::ComponentSizeShift(packed_type);
 
-  UseScratchRegisterScope temps(GetVIXLAssembler());
-  Register temp = temps.AcquireSameSizeAs(base);
+  // HIntermediateAddress optimization is only applied for scalar ArrayGet and ArraySet.
+  DCHECK(!instruction->InputAt(0)->IsIntermediateAddress());
+
   if (index.IsConstant()) {
     offset += Int64ConstantFrom(index) << shift;
-    __ Add(temp, base, offset);
+    return HeapOperand(base, offset);
   } else {
-    if (instruction->InputAt(0)->IsIntermediateAddress()) {
-      temp = base;
-    } else {
-      __ Add(temp, base, offset);
-    }
-    __ Add(temp.X(), temp.X(), Operand(XRegisterFrom(index), LSL, shift));
+    Register temp = temps_scope->AcquireSameSizeAs(base);
+    __ Add(temp, base, Operand(WRegisterFrom(index), LSL, shift));
+
+    return HeapOperand(temp, offset);
   }
-  return HeapOperand(temp);
 }
 
 void LocationsBuilderARM64::VisitVecLoad(HVecLoad* instruction) {
@@ -688,28 +806,22 @@
 
 void InstructionCodeGeneratorARM64::VisitVecLoad(HVecLoad* instruction) {
   Location reg_loc = Location::NoLocation();
-  MemOperand mem = CreateVecMemRegisters(instruction, &reg_loc, /*is_load*/ true);
+  UseScratchRegisterScope temps(GetVIXLAssembler());
+  MemOperand mem = CreateVecMemRegisters(instruction, &reg_loc, /*is_load*/ true, &temps);
   VRegister reg = VRegisterFrom(reg_loc);
+
   switch (instruction->GetPackedType()) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte:
-      DCHECK_EQ(16u, instruction->GetVectorLength());
-      __ Ld1(reg.V16B(), mem);
-      break;
     case Primitive::kPrimChar:
     case Primitive::kPrimShort:
-      DCHECK_EQ(8u, instruction->GetVectorLength());
-      __ Ld1(reg.V8H(), mem);
-      break;
     case Primitive::kPrimInt:
     case Primitive::kPrimFloat:
-      DCHECK_EQ(4u, instruction->GetVectorLength());
-      __ Ld1(reg.V4S(), mem);
-      break;
     case Primitive::kPrimLong:
     case Primitive::kPrimDouble:
-      DCHECK_EQ(2u, instruction->GetVectorLength());
-      __ Ld1(reg.V2D(), mem);
+      DCHECK_LE(2u, instruction->GetVectorLength());
+      DCHECK_LE(instruction->GetVectorLength(), 16u);
+      __ Ldr(reg, mem);
       break;
     default:
       LOG(FATAL) << "Unsupported SIMD type";
@@ -723,28 +835,22 @@
 
 void InstructionCodeGeneratorARM64::VisitVecStore(HVecStore* instruction) {
   Location reg_loc = Location::NoLocation();
-  MemOperand mem = CreateVecMemRegisters(instruction, &reg_loc, /*is_load*/ false);
+  UseScratchRegisterScope temps(GetVIXLAssembler());
+  MemOperand mem = CreateVecMemRegisters(instruction, &reg_loc, /*is_load*/ false, &temps);
   VRegister reg = VRegisterFrom(reg_loc);
+
   switch (instruction->GetPackedType()) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte:
-      DCHECK_EQ(16u, instruction->GetVectorLength());
-      __ St1(reg.V16B(), mem);
-      break;
     case Primitive::kPrimChar:
     case Primitive::kPrimShort:
-      DCHECK_EQ(8u, instruction->GetVectorLength());
-      __ St1(reg.V8H(), mem);
-      break;
     case Primitive::kPrimInt:
     case Primitive::kPrimFloat:
-      DCHECK_EQ(4u, instruction->GetVectorLength());
-      __ St1(reg.V4S(), mem);
-      break;
     case Primitive::kPrimLong:
     case Primitive::kPrimDouble:
-      DCHECK_EQ(2u, instruction->GetVectorLength());
-      __ St1(reg.V2D(), mem);
+      DCHECK_LE(2u, instruction->GetVectorLength());
+      DCHECK_LE(instruction->GetVectorLength(), 16u);
+      __ Str(reg, mem);
       break;
     default:
       LOG(FATAL) << "Unsupported SIMD type";
diff --git a/compiler/optimizing/code_generator_vector_arm_vixl.cc b/compiler/optimizing/code_generator_vector_arm_vixl.cc
index 74fa584..53f314e 100644
--- a/compiler/optimizing/code_generator_vector_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_vector_arm_vixl.cc
@@ -124,6 +124,14 @@
   LOG(FATAL) << "No SIMD for " << instruction->GetId();
 }
 
+void LocationsBuilderARMVIXL::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
 void LocationsBuilderARMVIXL::VisitVecSub(HVecSub* instruction) {
   CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
 }
@@ -148,6 +156,22 @@
   LOG(FATAL) << "No SIMD for " << instruction->GetId();
 }
 
+void LocationsBuilderARMVIXL::VisitVecMin(HVecMin* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecMin(HVecMin* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecMax(HVecMax* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecMax(HVecMax* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
 void LocationsBuilderARMVIXL::VisitVecAnd(HVecAnd* instruction) {
   CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
 }
@@ -221,6 +245,14 @@
   LOG(FATAL) << "No SIMD for " << instruction->GetId();
 }
 
+void LocationsBuilderARMVIXL::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
+  LOG(FATAL) << "No SIMD for " << instr->GetId();
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
+  LOG(FATAL) << "No SIMD for " << instr->GetId();
+}
+
 void LocationsBuilderARMVIXL::VisitVecLoad(HVecLoad* instruction) {
   LOG(FATAL) << "No SIMD for " << instruction->GetId();
 }
diff --git a/compiler/optimizing/code_generator_vector_mips.cc b/compiler/optimizing/code_generator_vector_mips.cc
index 6969abd..c4a3225 100644
--- a/compiler/optimizing/code_generator_vector_mips.cc
+++ b/compiler/optimizing/code_generator_vector_mips.cc
@@ -124,6 +124,14 @@
   LOG(FATAL) << "No SIMD for " << instruction->GetId();
 }
 
+void LocationsBuilderMIPS::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
 void LocationsBuilderMIPS::VisitVecSub(HVecSub* instruction) {
   CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
 }
@@ -148,6 +156,22 @@
   LOG(FATAL) << "No SIMD for " << instruction->GetId();
 }
 
+void LocationsBuilderMIPS::VisitVecMin(HVecMin* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecMin(HVecMin* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecMax(HVecMax* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecMax(HVecMax* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
 void LocationsBuilderMIPS::VisitVecAnd(HVecAnd* instruction) {
   CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
 }
@@ -221,6 +245,14 @@
   LOG(FATAL) << "No SIMD for " << instruction->GetId();
 }
 
+void LocationsBuilderMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
+  LOG(FATAL) << "No SIMD for " << instr->GetId();
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
+  LOG(FATAL) << "No SIMD for " << instr->GetId();
+}
+
 void LocationsBuilderMIPS::VisitVecLoad(HVecLoad* instruction) {
   LOG(FATAL) << "No SIMD for " << instruction->GetId();
 }
diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc
index 87118ce..50b95c1 100644
--- a/compiler/optimizing/code_generator_vector_mips64.cc
+++ b/compiler/optimizing/code_generator_vector_mips64.cc
@@ -124,6 +124,14 @@
   LOG(FATAL) << "No SIMD for " << instruction->GetId();
 }
 
+void LocationsBuilderMIPS64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
 void LocationsBuilderMIPS64::VisitVecSub(HVecSub* instruction) {
   CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
 }
@@ -148,6 +156,22 @@
   LOG(FATAL) << "No SIMD for " << instruction->GetId();
 }
 
+void LocationsBuilderMIPS64::VisitVecMin(HVecMin* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecMin(HVecMin* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecMax(HVecMax* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecMax(HVecMax* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
 void LocationsBuilderMIPS64::VisitVecAnd(HVecAnd* instruction) {
   CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
 }
@@ -221,6 +245,14 @@
   LOG(FATAL) << "No SIMD for " << instruction->GetId();
 }
 
+void LocationsBuilderMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
+  LOG(FATAL) << "No SIMD for " << instr->GetId();
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
+  LOG(FATAL) << "No SIMD for " << instr->GetId();
+}
+
 void LocationsBuilderMIPS64::VisitVecLoad(HVecLoad* instruction) {
   LOG(FATAL) << "No SIMD for " << instruction->GetId();
 }
diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc
index 8dabb4d..013b092 100644
--- a/compiler/optimizing/code_generator_vector_x86.cc
+++ b/compiler/optimizing/code_generator_vector_x86.cc
@@ -350,6 +350,35 @@
   }
 }
 
+void LocationsBuilderX86::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  DCHECK(locations->InAt(0).Equals(locations->Out()));
+  XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+  XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+
+  DCHECK(instruction->IsRounded());
+  DCHECK(instruction->IsUnsigned());
+
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+     __ pavgb(dst, src);
+     return;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ pavgw(dst, src);
+      return;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
 void LocationsBuilderX86::VisitVecSub(HVecSub* instruction) {
   CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
 }
@@ -448,6 +477,22 @@
   }
 }
 
+void LocationsBuilderX86::VisitVecMin(HVecMin* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecMin(HVecMin* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderX86::VisitVecMax(HVecMax* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecMax(HVecMax* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
 void LocationsBuilderX86::VisitVecAnd(HVecAnd* instruction) {
   CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
 }
@@ -685,6 +730,14 @@
   }
 }
 
+void LocationsBuilderX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
+  LOG(FATAL) << "No SIMD for " << instr->GetId();
+}
+
+void InstructionCodeGeneratorX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
+  LOG(FATAL) << "No SIMD for " << instr->GetId();
+}
+
 // Helper to set up locations for vector memory operations.
 static void CreateVecMemLocations(ArenaAllocator* arena,
                                   HVecMemoryOperation* instruction,
diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc
index e956088..66f19a4 100644
--- a/compiler/optimizing/code_generator_vector_x86_64.cc
+++ b/compiler/optimizing/code_generator_vector_x86_64.cc
@@ -343,6 +343,31 @@
   }
 }
 
+void LocationsBuilderX86_64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  DCHECK(locations->InAt(0).Equals(locations->Out()));
+  XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+  XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+     __ pavgb(dst, src);
+     return;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ pavgw(dst, src);
+      return;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
 void LocationsBuilderX86_64::VisitVecSub(HVecSub* instruction) {
   CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
 }
@@ -441,6 +466,22 @@
   }
 }
 
+void LocationsBuilderX86_64::VisitVecMin(HVecMin* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecMin(HVecMin* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderX86_64::VisitVecMax(HVecMax* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecMax(HVecMax* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
 void LocationsBuilderX86_64::VisitVecAnd(HVecAnd* instruction) {
   CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
 }
@@ -678,6 +719,14 @@
   }
 }
 
+void LocationsBuilderX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
+  LOG(FATAL) << "No SIMD for " << instr->GetId();
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
+  LOG(FATAL) << "No SIMD for " << instr->GetId();
+}
+
 // Helper to set up locations for vector memory operations.
 static void CreateVecMemLocations(ArenaAllocator* arena,
                                   HVecMemoryOperation* instruction,
diff --git a/compiler/optimizing/code_sinking.cc b/compiler/optimizing/code_sinking.cc
index dc3d378..0b4dcd3 100644
--- a/compiler/optimizing/code_sinking.cc
+++ b/compiler/optimizing/code_sinking.cc
@@ -161,9 +161,15 @@
   for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
     HInstruction* user = use.GetUser();
     if (!(filter && ShouldFilterUse(instruction, user, post_dominated))) {
-      finder.Update(user->IsPhi()
-          ? user->GetBlock()->GetPredecessors()[use.GetIndex()]
-          : user->GetBlock());
+      HBasicBlock* block = user->GetBlock();
+      if (user->IsPhi()) {
+        // Special case phis by taking the incoming block for regular ones,
+        // or the dominator for catch phis.
+        block = user->AsPhi()->IsCatchPhi()
+            ? block->GetDominator()
+            : block->GetPredecessors()[use.GetIndex()];
+      }
+      finder.Update(block);
     }
   }
   for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) {
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index cc3c143..e5d94c3 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -509,6 +509,15 @@
     StartAttributeStream("kind") << deoptimize->GetKind();
   }
 
+  void VisitVecHalvingAdd(HVecHalvingAdd* hadd) OVERRIDE {
+    StartAttributeStream("unsigned") << std::boolalpha << hadd->IsUnsigned() << std::noboolalpha;
+    StartAttributeStream("rounded") << std::boolalpha << hadd->IsRounded() << std::noboolalpha;
+  }
+
+  void VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) OVERRIDE {
+    StartAttributeStream("kind") << instruction->GetOpKind();
+  }
+
 #if defined(ART_ENABLE_CODEGEN_arm) || defined(ART_ENABLE_CODEGEN_arm64)
   void VisitMultiplyAccumulate(HMultiplyAccumulate* instruction) OVERRIDE {
     StartAttributeStream("kind") << instruction->GetOpKind();
diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc
index 1c8674d..7c833cf 100644
--- a/compiler/optimizing/induction_var_range.cc
+++ b/compiler/optimizing/induction_var_range.cc
@@ -45,18 +45,6 @@
   return c2 != 0 && CanLongValueFitIntoInt(static_cast<int64_t>(c1) / static_cast<int64_t>(c2));
 }
 
-/** Returns true for 32/64-bit constant instruction. */
-static bool IsIntAndGet(HInstruction* instruction, int64_t* value) {
-  if (instruction->IsIntConstant()) {
-    *value = instruction->AsIntConstant()->GetValue();
-    return true;
-  } else if (instruction->IsLongConstant()) {
-    *value = instruction->AsLongConstant()->GetValue();
-    return true;
-  }
-  return false;
-}
-
 /** Computes a * b for a,b > 0 (at least until first overflow happens). */
 static int64_t SafeMul(int64_t a, int64_t b, /*out*/ bool* overflow) {
   if (a > 0 && b > 0 && a > (std::numeric_limits<int64_t>::max() / b)) {
@@ -106,7 +94,7 @@
     }
   }
   int64_t value = -1;
-  return IsIntAndGet(instruction, &value) && value >= 0;
+  return IsInt64AndGet(instruction, &value) && value >= 0;
 }
 
 /** Hunts "under the hood" for a suitable instruction at the hint. */
@@ -149,7 +137,7 @@
     int64_t value;
     if (v.instruction->IsDiv() &&
         v.instruction->InputAt(0)->IsArrayLength() &&
-        IsIntAndGet(v.instruction->InputAt(1), &value) && v.a_constant == value) {
+        IsInt64AndGet(v.instruction->InputAt(1), &value) && v.a_constant == value) {
       return InductionVarRange::Value(v.instruction->InputAt(0), 1, v.b_constant);
     }
     // If a == 1, the most suitable one suffices as maximum value.
@@ -444,7 +432,7 @@
     // any of the three requests (kExact, kAtMost, and KAtLeast).
     if (info->induction_class == HInductionVarAnalysis::kInvariant &&
         info->operation == HInductionVarAnalysis::kFetch) {
-      if (IsIntAndGet(info->fetch, value)) {
+      if (IsInt64AndGet(info->fetch, value)) {
         return true;
       }
     }
@@ -635,7 +623,7 @@
   int64_t f = 0;
   if (IsConstant(info->op_a, kExact, &a) &&
       CanLongValueFitIntoInt(a) &&
-      IsIntAndGet(info->fetch, &f) && f >= 1) {
+      IsInt64AndGet(info->fetch, &f) && f >= 1) {
     // Conservative bounds on a * f^-i + b with f >= 1 can be computed without
     // trip count. Other forms would require a much more elaborate evaluation.
     const bool is_min_a = a >= 0 ? is_min : !is_min;
@@ -663,7 +651,7 @@
   // Unless at a constant or hint, chase the instruction a bit deeper into the HIR tree, so that
   // it becomes more likely range analysis will compare the same instructions as terminal nodes.
   int64_t value;
-  if (IsIntAndGet(instruction, &value) && CanLongValueFitIntoInt(value)) {
+  if (IsInt64AndGet(instruction, &value) && CanLongValueFitIntoInt(value)) {
     // Proper constant reveals best information.
     return Value(static_cast<int32_t>(value));
   } else if (instruction == chase_hint_) {
@@ -671,10 +659,10 @@
     return Value(instruction, 1, 0);
   } else if (instruction->IsAdd()) {
     // Incorporate suitable constants in the chased value.
-    if (IsIntAndGet(instruction->InputAt(0), &value) && CanLongValueFitIntoInt(value)) {
+    if (IsInt64AndGet(instruction->InputAt(0), &value) && CanLongValueFitIntoInt(value)) {
       return AddValue(Value(static_cast<int32_t>(value)),
                       GetFetch(instruction->InputAt(1), trip, in_body, is_min));
-    } else if (IsIntAndGet(instruction->InputAt(1), &value) && CanLongValueFitIntoInt(value)) {
+    } else if (IsInt64AndGet(instruction->InputAt(1), &value) && CanLongValueFitIntoInt(value)) {
       return AddValue(GetFetch(instruction->InputAt(0), trip, in_body, is_min),
                       Value(static_cast<int32_t>(value)));
     }
@@ -1074,7 +1062,7 @@
   // Detect known base and trip count (always taken).
   int64_t f = 0;
   int64_t m = 0;
-  if (IsIntAndGet(info->fetch, &f) && f >= 1 && IsConstant(trip->op_a, kExact, &m) && m >= 1) {
+  if (IsInt64AndGet(info->fetch, &f) && f >= 1 && IsConstant(trip->op_a, kExact, &m) && m >= 1) {
     HInstruction* opa = nullptr;
     HInstruction* opb = nullptr;
     if (GenerateCode(info->op_a, nullptr, graph, block, &opa, false, false) &&
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 66948eb..1f8a58c 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -22,6 +22,7 @@
 #include "class_linker.h"
 #include "constant_folding.h"
 #include "dead_code_elimination.h"
+#include "dex/inline_method_analyser.h"
 #include "dex/verified_method.h"
 #include "dex/verification_results.h"
 #include "driver/compiler_driver-inl.h"
@@ -37,7 +38,6 @@
 #include "optimizing_compiler.h"
 #include "reference_type_propagation.h"
 #include "register_allocator_linear_scan.h"
-#include "quick/inline_method_analyser.h"
 #include "sharpening.h"
 #include "ssa_builder.h"
 #include "ssa_phi_elimination.h"
@@ -1539,6 +1539,14 @@
   return iput;
 }
 
+template <typename T>
+static inline Handle<T> NewHandleIfDifferent(T* object,
+                                             Handle<T> hint,
+                                             VariableSizedHandleScope* handles)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  return (object != hint.Get()) ? handles->NewHandle(object) : hint;
+}
+
 bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
                                        ArtMethod* resolved_method,
                                        ReferenceTypeInfo receiver_type,
@@ -1550,9 +1558,13 @@
   const DexFile& callee_dex_file = *resolved_method->GetDexFile();
   uint32_t method_index = resolved_method->GetDexMethodIndex();
   ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
-  Handle<mirror::DexCache> dex_cache(handles_->NewHandle(resolved_method->GetDexCache()));
-  Handle<mirror::ClassLoader> class_loader(handles_->NewHandle(
-      resolved_method->GetDeclaringClass()->GetClassLoader()));
+  Handle<mirror::DexCache> dex_cache = NewHandleIfDifferent(resolved_method->GetDexCache(),
+                                                            caller_compilation_unit_.GetDexCache(),
+                                                            handles_);
+  Handle<mirror::ClassLoader> class_loader =
+      NewHandleIfDifferent(resolved_method->GetDeclaringClass()->GetClassLoader(),
+                           caller_compilation_unit_.GetClassLoader(),
+                           handles_);
 
   DexCompilationUnit dex_compilation_unit(
       class_loader,
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 60790e5..2dcc12e 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -16,6 +16,8 @@
 
 #include "instruction_simplifier.h"
 
+#include "art_method-inl.h"
+#include "class_linker-inl.h"
 #include "escape.h"
 #include "intrinsics.h"
 #include "mirror/class-inl.h"
diff --git a/compiler/optimizing/instruction_simplifier_arm.cc b/compiler/optimizing/instruction_simplifier_arm.cc
index 5f5e29b..3fc7c50 100644
--- a/compiler/optimizing/instruction_simplifier_arm.cc
+++ b/compiler/optimizing/instruction_simplifier_arm.cc
@@ -19,6 +19,7 @@
 #include "instruction_simplifier_arm.h"
 #include "instruction_simplifier_shared.h"
 #include "mirror/array-inl.h"
+#include "mirror/string.h"
 #include "nodes.h"
 
 namespace art {
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index 73b7b2b..f16e372 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -210,5 +210,11 @@
   }
 }
 
+void InstructionSimplifierArm64Visitor::VisitVecMul(HVecMul* instruction) {
+  if (TryCombineVecMultiplyAccumulate(instruction, kArm64)) {
+    RecordSimplification();
+  }
+}
+
 }  // namespace arm64
 }  // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h
index 65654f5..eec4e49 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.h
+++ b/compiler/optimizing/instruction_simplifier_arm64.h
@@ -74,6 +74,7 @@
   void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE;
   void VisitUShr(HUShr* instruction) OVERRIDE;
   void VisitXor(HXor* instruction) OVERRIDE;
+  void VisitVecMul(HVecMul* instruction) OVERRIDE;
 
   OptimizingCompilerStats* stats_;
 };
diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc
index c2b1374..7d1f146 100644
--- a/compiler/optimizing/instruction_simplifier_shared.cc
+++ b/compiler/optimizing/instruction_simplifier_shared.cc
@@ -278,5 +278,71 @@
   return true;
 }
 
+bool TryCombineVecMultiplyAccumulate(HVecMul* mul, InstructionSet isa) {
+  Primitive::Type type = mul->GetPackedType();
+  switch (isa) {
+    case kArm64:
+      if (!(type == Primitive::kPrimByte ||
+            type == Primitive::kPrimChar ||
+            type == Primitive::kPrimShort ||
+            type == Primitive::kPrimInt)) {
+        return false;
+      }
+      break;
+    default:
+      return false;
+  }
+
+  ArenaAllocator* arena = mul->GetBlock()->GetGraph()->GetArena();
+
+  if (mul->HasOnlyOneNonEnvironmentUse()) {
+    HInstruction* use = mul->GetUses().front().GetUser();
+    if (use->IsVecAdd() || use->IsVecSub()) {
+      // Replace code looking like
+      //    VECMUL tmp, x, y
+      //    VECADD/SUB dst, acc, tmp
+      // with
+      //    VECMULACC dst, acc, x, y
+      // Note that we do not want to (unconditionally) perform the merge when the
+      // multiplication has multiple uses and it can be merged in all of them.
+      // Multiple uses could happen on the same control-flow path, and we would
+      // then increase the amount of work. In the future we could try to evaluate
+      // whether all uses are on different control-flow paths (using dominance and
+      // reverse-dominance information) and only perform the merge when they are.
+      HInstruction* accumulator = nullptr;
+      HVecBinaryOperation* binop = use->AsVecBinaryOperation();
+      HInstruction* binop_left = binop->GetLeft();
+      HInstruction* binop_right = binop->GetRight();
+      // This is always true since the `HVecMul` has only one use (which is checked above).
+      DCHECK_NE(binop_left, binop_right);
+      if (binop_right == mul) {
+        accumulator = binop_left;
+      } else if (use->IsVecAdd()) {
+        DCHECK_EQ(binop_left, mul);
+        accumulator = binop_right;
+      }
+
+      HInstruction::InstructionKind kind =
+          use->IsVecAdd() ? HInstruction::kAdd : HInstruction::kSub;
+      if (accumulator != nullptr) {
+        HVecMultiplyAccumulate* mulacc =
+            new (arena) HVecMultiplyAccumulate(arena,
+                                               kind,
+                                               accumulator,
+                                               mul->GetLeft(),
+                                               mul->GetRight(),
+                                               binop->GetPackedType(),
+                                               binop->GetVectorLength());
+
+        binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc);
+        DCHECK(!mul->HasUses());
+        mul->GetBlock()->RemoveInstruction(mul);
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
 
 }  // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_shared.h b/compiler/optimizing/instruction_simplifier_shared.h
index 83e3ffc..2ea103a 100644
--- a/compiler/optimizing/instruction_simplifier_shared.h
+++ b/compiler/optimizing/instruction_simplifier_shared.h
@@ -58,6 +58,8 @@
                                   HInstruction* index,
                                   size_t data_offset);
 
+bool TryCombineVecMultiplyAccumulate(HVecMul* mul, InstructionSet isa);
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_SHARED_H_
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index 8df80ad..6236bd8 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -16,7 +16,8 @@
 
 #include "intrinsics.h"
 
-#include "art_method.h"
+#include "art_field-inl.h"
+#include "art_method-inl.h"
 #include "class_linker.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 1006a77..750f9cc 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -22,9 +22,13 @@
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "intrinsics.h"
 #include "intrinsics_utils.h"
+#include "lock_word.h"
 #include "mirror/array-inl.h"
+#include "mirror/object_array-inl.h"
+#include "mirror/reference.h"
 #include "mirror/string.h"
-#include "thread.h"
+#include "scoped_thread_state_change-inl.h"
+#include "thread-inl.h"
 #include "utils/arm/assembler_arm.h"
 
 namespace art {
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 47bcb5d..4d36015 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -22,9 +22,13 @@
 #include "common_arm64.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "intrinsics.h"
+#include "lock_word.h"
 #include "mirror/array-inl.h"
+#include "mirror/object_array-inl.h"
+#include "mirror/reference.h"
 #include "mirror/string-inl.h"
-#include "thread.h"
+#include "scoped_thread_state_change-inl.h"
+#include "thread-inl.h"
 #include "utils/arm64/assembler_arm64.h"
 
 using namespace vixl::aarch64;  // NOLINT(build/namespaces)
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index 0d933ea..fd8a37a 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -17,10 +17,16 @@
 #include "intrinsics_arm_vixl.h"
 
 #include "arch/arm/instruction_set_features_arm.h"
+#include "art_method.h"
 #include "code_generator_arm_vixl.h"
 #include "common_arm.h"
 #include "lock_word.h"
 #include "mirror/array-inl.h"
+#include "mirror/object_array-inl.h"
+#include "mirror/reference.h"
+#include "mirror/string.h"
+#include "scoped_thread_state_change-inl.h"
+#include "thread-inl.h"
 
 #include "aarch32/constants-aarch32.h"
 
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 82d0567..b57b41f 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -2093,6 +2093,199 @@
   __ Bind(&done);
 }
 
+// static void java.lang.System.arraycopy(Object src, int srcPos,
+//                                        Object dest, int destPos,
+//                                        int length)
+void IntrinsicLocationsBuilderMIPS64::VisitSystemArrayCopyChar(HInvoke* invoke) {
+  HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
+  HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
+  HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
+
+  // As long as we are checking, we might as well check to see if the src and dest
+  // positions are >= 0.
+  if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
+      (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
+    // We will have to fail anyways.
+    return;
+  }
+
+  // And since we are already checking, check the length too.
+  if (length != nullptr) {
+    int32_t len = length->GetValue();
+    if (len < 0) {
+      // Just call as normal.
+      return;
+    }
+  }
+
+  // Okay, it is safe to generate inline code.
+  LocationSummary* locations =
+      new (arena_) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
+  // arraycopy(Object src, int srcPos, Object dest, int destPos, int length).
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
+  locations->SetInAt(2, Location::RequiresRegister());
+  locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3)));
+  locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4)));
+
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+// Utility routine to verify that "length(input) - pos >= length"
+static void EnoughItems(Mips64Assembler* assembler,
+                        GpuRegister length_input_minus_pos,
+                        Location length,
+                        SlowPathCodeMIPS64* slow_path) {
+  if (length.IsConstant()) {
+    int32_t length_constant = length.GetConstant()->AsIntConstant()->GetValue();
+
+    if (IsInt<16>(length_constant)) {
+      __ Slti(TMP, length_input_minus_pos, length_constant);
+      __ Bnezc(TMP, slow_path->GetEntryLabel());
+    } else {
+      __ LoadConst32(TMP, length_constant);
+      __ Bltc(length_input_minus_pos, TMP, slow_path->GetEntryLabel());
+    }
+  } else {
+    __ Bltc(length_input_minus_pos, length.AsRegister<GpuRegister>(), slow_path->GetEntryLabel());
+  }
+}
+
+static void CheckPosition(Mips64Assembler* assembler,
+                          Location pos,
+                          GpuRegister input,
+                          Location length,
+                          SlowPathCodeMIPS64* slow_path,
+                          bool length_is_input_length = false) {
+  // Where is the length in the Array?
+  const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
+
+  // Calculate length(input) - pos.
+  if (pos.IsConstant()) {
+    int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
+    if (pos_const == 0) {
+      if (!length_is_input_length) {
+        // Check that length(input) >= length.
+        __ LoadFromOffset(kLoadWord, AT, input, length_offset);
+        EnoughItems(assembler, AT, length, slow_path);
+      }
+    } else {
+      // Check that (length(input) - pos) >= zero.
+      __ LoadFromOffset(kLoadWord, AT, input, length_offset);
+      DCHECK_GT(pos_const, 0);
+      __ Addiu32(AT, AT, -pos_const);
+      __ Bltzc(AT, slow_path->GetEntryLabel());
+
+      // Verify that (length(input) - pos) >= length.
+      EnoughItems(assembler, AT, length, slow_path);
+    }
+  } else if (length_is_input_length) {
+    // The only way the copy can succeed is if pos is zero.
+    GpuRegister pos_reg = pos.AsRegister<GpuRegister>();
+    __ Bnezc(pos_reg, slow_path->GetEntryLabel());
+  } else {
+    // Verify that pos >= 0.
+    GpuRegister pos_reg = pos.AsRegister<GpuRegister>();
+    __ Bltzc(pos_reg, slow_path->GetEntryLabel());
+
+    // Check that (length(input) - pos) >= zero.
+    __ LoadFromOffset(kLoadWord, AT, input, length_offset);
+    __ Subu(AT, AT, pos_reg);
+    __ Bltzc(AT, slow_path->GetEntryLabel());
+
+    // Verify that (length(input) - pos) >= length.
+    EnoughItems(assembler, AT, length, slow_path);
+  }
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitSystemArrayCopyChar(HInvoke* invoke) {
+  Mips64Assembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  GpuRegister src = locations->InAt(0).AsRegister<GpuRegister>();
+  Location src_pos = locations->InAt(1);
+  GpuRegister dest = locations->InAt(2).AsRegister<GpuRegister>();
+  Location dest_pos = locations->InAt(3);
+  Location length = locations->InAt(4);
+
+  Mips64Label loop;
+
+  GpuRegister dest_base = locations->GetTemp(0).AsRegister<GpuRegister>();
+  GpuRegister src_base = locations->GetTemp(1).AsRegister<GpuRegister>();
+  GpuRegister count = locations->GetTemp(2).AsRegister<GpuRegister>();
+
+  SlowPathCodeMIPS64* slow_path = new (GetAllocator()) IntrinsicSlowPathMIPS64(invoke);
+  codegen_->AddSlowPath(slow_path);
+
+  // Bail out if the source and destination are the same (to handle overlap).
+  __ Beqc(src, dest, slow_path->GetEntryLabel());
+
+  // Bail out if the source is null.
+  __ Beqzc(src, slow_path->GetEntryLabel());
+
+  // Bail out if the destination is null.
+  __ Beqzc(dest, slow_path->GetEntryLabel());
+
+  // Load length into register for count.
+  if (length.IsConstant()) {
+    __ LoadConst32(count, length.GetConstant()->AsIntConstant()->GetValue());
+  } else {
+    // If the length is negative, bail out.
+    // We have already checked in the LocationsBuilder for the constant case.
+    __ Bltzc(length.AsRegister<GpuRegister>(), slow_path->GetEntryLabel());
+
+    __ Move(count, length.AsRegister<GpuRegister>());
+  }
+
+  // Validity checks: source.
+  CheckPosition(assembler, src_pos, src, Location::RegisterLocation(count), slow_path);
+
+  // Validity checks: dest.
+  CheckPosition(assembler, dest_pos, dest, Location::RegisterLocation(count), slow_path);
+
+  // If count is zero, we're done.
+  __ Beqzc(count, slow_path->GetExitLabel());
+
+  // Okay, everything checks out.  Finally time to do the copy.
+  // Check assumption that sizeof(Char) is 2 (used in scaling below).
+  const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
+  DCHECK_EQ(char_size, 2u);
+
+  const size_t char_shift = Primitive::ComponentSizeShift(Primitive::kPrimChar);
+
+  const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
+
+  // Calculate source and destination addresses.
+  if (src_pos.IsConstant()) {
+    int32_t src_pos_const = src_pos.GetConstant()->AsIntConstant()->GetValue();
+
+    __ Daddiu64(src_base, src, data_offset + char_size * src_pos_const, TMP);
+  } else {
+    __ Daddiu64(src_base, src, data_offset, TMP);
+    __ Dlsa(src_base, src_pos.AsRegister<GpuRegister>(), src_base, char_shift);
+  }
+  if (dest_pos.IsConstant()) {
+    int32_t dest_pos_const = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+
+    __ Daddiu64(dest_base, dest, data_offset + char_size * dest_pos_const, TMP);
+  } else {
+    __ Daddiu64(dest_base, dest, data_offset, TMP);
+    __ Dlsa(dest_base, dest_pos.AsRegister<GpuRegister>(), dest_base, char_shift);
+  }
+
+  __ Bind(&loop);
+  __ Lh(TMP, src_base, 0);
+  __ Daddiu(src_base, src_base, char_size);
+  __ Daddiu(count, count, -1);
+  __ Sh(TMP, dest_base, 0);
+  __ Daddiu(dest_base, dest_base, char_size);
+  __ Bnezc(count, &loop);
+
+  __ Bind(slow_path->GetExitLabel());
+}
+
 static void GenHighestOneBit(LocationSummary* locations,
                              Primitive::Type type,
                              Mips64Assembler* assembler) {
@@ -2372,7 +2565,6 @@
 }
 
 UNIMPLEMENTED_INTRINSIC(MIPS64, ReferenceGetReferent)
-UNIMPLEMENTED_INTRINSIC(MIPS64, SystemArrayCopyChar)
 UNIMPLEMENTED_INTRINSIC(MIPS64, SystemArrayCopy)
 
 UNIMPLEMENTED_INTRINSIC(MIPS64, StringStringIndexOf);
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index ecf919b..8e45747 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -25,9 +25,13 @@
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "intrinsics.h"
 #include "intrinsics_utils.h"
+#include "lock_word.h"
 #include "mirror/array-inl.h"
+#include "mirror/object_array-inl.h"
+#include "mirror/reference.h"
 #include "mirror/string.h"
-#include "thread.h"
+#include "scoped_thread_state_change-inl.h"
+#include "thread-inl.h"
 #include "utils/x86/assembler_x86.h"
 #include "utils/x86/constants_x86.h"
 
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 13956df..8ed2ad8 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -19,15 +19,19 @@
 #include <limits>
 
 #include "arch/x86_64/instruction_set_features_x86_64.h"
-#include "art_method-inl.h"
+#include "art_method.h"
 #include "base/bit_utils.h"
 #include "code_generator_x86_64.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "intrinsics.h"
 #include "intrinsics_utils.h"
+#include "lock_word.h"
 #include "mirror/array-inl.h"
+#include "mirror/object_array-inl.h"
+#include "mirror/reference.h"
 #include "mirror/string.h"
-#include "thread.h"
+#include "scoped_thread_state_change-inl.h"
+#include "thread-inl.h"
 #include "utils/x86_64/assembler_x86_64.h"
 #include "utils/x86_64/constants_x86_64.h"
 
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 8e88c1e..5a95abd 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -63,12 +63,122 @@
   return false;
 }
 
+// Detect a sign extension from the given type. Returns the promoted operand on success.
+static bool IsSignExtensionAndGet(HInstruction* instruction,
+                                  Primitive::Type type,
+                                  /*out*/ HInstruction** operand) {
+  // Accept any already wider constant that would be handled properly by sign
+  // extension when represented in the *width* of the given narrower data type
+  // (the fact that char normally zero extends does not matter here).
+  int64_t value = 0;
+  if (IsInt64AndGet(instruction, &value)) {
+    switch (type) {
+      case Primitive::kPrimByte:
+        if (std::numeric_limits<int8_t>::min() <= value &&
+            std::numeric_limits<int8_t>::max() >= value) {
+          *operand = instruction;
+          return true;
+        }
+        return false;
+      case Primitive::kPrimChar:
+      case Primitive::kPrimShort:
+        if (std::numeric_limits<int16_t>::min() <= value &&
+            std::numeric_limits<int16_t>::max() <= value) {
+          *operand = instruction;
+          return true;
+        }
+        return false;
+      default:
+        return false;
+    }
+  }
+  // An implicit widening conversion of a signed integer to an integral type sign-extends
+  // the two's-complement representation of the integer value to fill the wider format.
+  if (instruction->GetType() == type && (instruction->IsArrayGet() ||
+                                         instruction->IsStaticFieldGet() ||
+                                         instruction->IsInstanceFieldGet())) {
+    switch (type) {
+      case Primitive::kPrimByte:
+      case Primitive::kPrimShort:
+        *operand = instruction;
+        return true;
+      default:
+        return false;
+    }
+  }
+  // TODO: perhaps explicit conversions later too?
+  //       (this may return something different from instruction)
+  return false;
+}
+
+// Detect a zero extension from the given type. Returns the promoted operand on success.
+static bool IsZeroExtensionAndGet(HInstruction* instruction,
+                                  Primitive::Type type,
+                                  /*out*/ HInstruction** operand) {
+  // Accept any already wider constant that would be handled properly by zero
+  // extension when represented in the *width* of the given narrower data type
+  // (the fact that byte/short normally sign extend does not matter here).
+  int64_t value = 0;
+  if (IsInt64AndGet(instruction, &value)) {
+    switch (type) {
+      case Primitive::kPrimByte:
+        if (std::numeric_limits<uint8_t>::min() <= value &&
+            std::numeric_limits<uint8_t>::max() >= value) {
+          *operand = instruction;
+          return true;
+        }
+        return false;
+      case Primitive::kPrimChar:
+      case Primitive::kPrimShort:
+        if (std::numeric_limits<uint16_t>::min() <= value &&
+            std::numeric_limits<uint16_t>::max() <= value) {
+          *operand = instruction;
+          return true;
+        }
+        return false;
+      default:
+        return false;
+    }
+  }
+  // An implicit widening conversion of a char to an integral type zero-extends
+  // the representation of the char value to fill the wider format.
+  if (instruction->GetType() == type && (instruction->IsArrayGet() ||
+                                         instruction->IsStaticFieldGet() ||
+                                         instruction->IsInstanceFieldGet())) {
+    if (type == Primitive::kPrimChar) {
+      *operand = instruction;
+      return true;
+    }
+  }
+  // A sign (or zero) extension followed by an explicit removal of just the
+  // higher sign bits is equivalent to a zero extension of the underlying operand.
+  if (instruction->IsAnd()) {
+    int64_t mask = 0;
+    HInstruction* a = instruction->InputAt(0);
+    HInstruction* b = instruction->InputAt(1);
+    // In (a & b) find (mask & b) or (a & mask) with sign or zero extension on the non-mask.
+    if ((IsInt64AndGet(a, /*out*/ &mask) && (IsSignExtensionAndGet(b, type, /*out*/ operand) ||
+                                             IsZeroExtensionAndGet(b, type, /*out*/ operand))) ||
+        (IsInt64AndGet(b, /*out*/ &mask) && (IsSignExtensionAndGet(a, type, /*out*/ operand) ||
+                                             IsZeroExtensionAndGet(a, type, /*out*/ operand)))) {
+      switch ((*operand)->GetType()) {
+        case Primitive::kPrimByte:  return mask == std::numeric_limits<uint8_t>::max();
+        case Primitive::kPrimChar:
+        case Primitive::kPrimShort: return mask == std::numeric_limits<uint16_t>::max();
+        default: return false;
+      }
+    }
+  }
+  // TODO: perhaps explicit conversions later too?
+  return false;
+}
+
 // Test vector restrictions.
 static bool HasVectorRestrictions(uint64_t restrictions, uint64_t tested) {
   return (restrictions & tested) != 0;
 }
 
-// Inserts an instruction.
+// Insert an instruction.
 static HInstruction* Insert(HBasicBlock* block, HInstruction* instruction) {
   DCHECK(block != nullptr);
   DCHECK(instruction != nullptr);
@@ -713,6 +823,10 @@
       return true;
     }
   } else if (instruction->IsShl() || instruction->IsShr() || instruction->IsUShr()) {
+    // Recognize vectorization idioms.
+    if (VectorizeHalvingAddIdiom(node, instruction, generate_code, type, restrictions)) {
+      return true;
+    }
     // Deal with vector restrictions.
     if ((HasVectorRestrictions(restrictions, kNoShift)) ||
         (instruction->IsShr() && HasVectorRestrictions(restrictions, kNoShr))) {
@@ -806,11 +920,11 @@
         switch (type) {
           case Primitive::kPrimBoolean:
           case Primitive::kPrimByte:
-            *restrictions |= kNoMul | kNoDiv | kNoShift | kNoAbs;
+            *restrictions |= kNoMul | kNoDiv | kNoShift | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd;
             return TrySetVectorLength(16);
           case Primitive::kPrimChar:
           case Primitive::kPrimShort:
-            *restrictions |= kNoDiv | kNoAbs;
+            *restrictions |= kNoDiv | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd;
             return TrySetVectorLength(8);
           case Primitive::kPrimInt:
             *restrictions |= kNoDiv;
@@ -1039,6 +1153,90 @@
 #undef GENERATE_VEC
 
 //
+// Vectorization idioms.
+//
+
+// Method recognizes the following idioms:
+//   rounding halving add (a + b + 1) >> 1 for unsigned/signed operands a, b
+//   regular  halving add (a + b)     >> 1 for unsigned/signed operands a, b
+// Provided that the operands are promoted to a wider form to do the arithmetic and
+// then cast back to narrower form, the idioms can be mapped into efficient SIMD
+// implementation that operates directly in narrower form (plus one extra bit).
+// TODO: current version recognizes implicit byte/short/char widening only;
+//       explicit widening from int to long could be added later.
+bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node,
+                                                 HInstruction* instruction,
+                                                 bool generate_code,
+                                                 Primitive::Type type,
+                                                 uint64_t restrictions) {
+  // Test for top level arithmetic shift right x >> 1 or logical shift right x >>> 1
+  // (note whether the sign bit in higher precision is shifted in has no effect
+  // on the narrow precision computed by the idiom).
+  int64_t value = 0;
+  if ((instruction->IsShr() ||
+       instruction->IsUShr()) &&
+      IsInt64AndGet(instruction->InputAt(1), &value) && value == 1) {
+    //
+    // TODO: make following code less sensitive to associativity and commutativity differences.
+    //
+    HInstruction* x = instruction->InputAt(0);
+    // Test for an optional rounding part (x + 1) >> 1.
+    bool is_rounded = false;
+    if (x->IsAdd() && IsInt64AndGet(x->InputAt(1), &value) && value == 1) {
+      x = x->InputAt(0);
+      is_rounded = true;
+    }
+    // Test for a core addition (a + b) >> 1 (possibly rounded), either unsigned or signed.
+    if (x->IsAdd()) {
+      HInstruction* a = x->InputAt(0);
+      HInstruction* b = x->InputAt(1);
+      HInstruction* r = nullptr;
+      HInstruction* s = nullptr;
+      bool is_unsigned = false;
+      if (IsZeroExtensionAndGet(a, type, &r) && IsZeroExtensionAndGet(b, type, &s)) {
+        is_unsigned = true;
+      } else if (IsSignExtensionAndGet(a, type, &r) && IsSignExtensionAndGet(b, type, &s)) {
+        is_unsigned = false;
+      } else {
+        return false;
+      }
+      // Deal with vector restrictions.
+      if ((!is_unsigned && HasVectorRestrictions(restrictions, kNoSignedHAdd)) ||
+          (!is_rounded && HasVectorRestrictions(restrictions, kNoUnroundedHAdd))) {
+        return false;
+      }
+      // Accept recognized halving add for vectorizable operands. Vectorized code uses the
+      // shorthand idiomatic operation. Sequential code uses the original scalar expressions.
+      DCHECK(r != nullptr && s != nullptr);
+      if (VectorizeUse(node, r, generate_code, type, restrictions) &&
+          VectorizeUse(node, s, generate_code, type, restrictions)) {
+        if (generate_code) {
+          if (vector_mode_ == kVector) {
+            vector_map_->Put(instruction, new (global_allocator_) HVecHalvingAdd(
+                global_allocator_,
+                vector_map_->Get(r),
+                vector_map_->Get(s),
+                type,
+                vector_length_,
+                is_unsigned,
+                is_rounded));
+          } else {
+            VectorizeUse(node, instruction->InputAt(0), generate_code, type, restrictions);
+            VectorizeUse(node, instruction->InputAt(1), generate_code, type, restrictions);
+            GenerateVecOp(instruction,
+                          vector_map_->Get(instruction->InputAt(0)),
+                          vector_map_->Get(instruction->InputAt(1)),
+                          type);
+          }
+        }
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+//
 // Helpers.
 //
 
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index d8f50aa..4a7da86 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -62,13 +62,15 @@
    * Vectorization restrictions (bit mask).
    */
   enum VectorRestrictions {
-    kNone     = 0,   // no restrictions
-    kNoMul    = 1,   // no multiplication
-    kNoDiv    = 2,   // no division
-    kNoShift  = 4,   // no shift
-    kNoShr    = 8,   // no arithmetic shift right
-    kNoHiBits = 16,  // "wider" operations cannot bring in higher order bits
-    kNoAbs    = 32,  // no absolute value
+    kNone            = 0,    // no restrictions
+    kNoMul           = 1,    // no multiplication
+    kNoDiv           = 2,    // no division
+    kNoShift         = 4,    // no shift
+    kNoShr           = 8,    // no arithmetic shift right
+    kNoHiBits        = 16,   // "wider" operations cannot bring in higher order bits
+    kNoSignedHAdd    = 32,   // no signed halving add
+    kNoUnroundedHAdd = 64,   // no unrounded halving add
+    kNoAbs           = 128,  // no absolute value
   };
 
   /*
@@ -136,6 +138,13 @@
                       Primitive::Type type);
   void GenerateVecOp(HInstruction* org, HInstruction* opa, HInstruction* opb, Primitive::Type type);
 
+  // Vectorization idioms.
+  bool VectorizeHalvingAddIdiom(LoopNode* node,
+                                HInstruction* instruction,
+                                bool generate_code,
+                                Primitive::Type type,
+                                uint64_t restrictions);
+
   // Helpers.
   bool TrySetPhiInduction(HPhi* phi, bool restrict_uses);
   bool TrySetSimpleLoopHeader(HBasicBlock* block);
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index e71fea9..ca953a1 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -17,6 +17,8 @@
 
 #include <cfloat>
 
+#include "art_method-inl.h"
+#include "class_linker-inl.h"
 #include "code_generator.h"
 #include "common_dominator.h"
 #include "ssa_builder.h"
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index c109369..8368026 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1362,16 +1362,18 @@
   M(UShr, BinaryOperation)                                              \
   M(Xor, BinaryOperation)                                               \
   M(VecReplicateScalar, VecUnaryOperation)                              \
-  M(VecSetScalars, VecUnaryOperation)                                   \
   M(VecSumReduce, VecUnaryOperation)                                    \
   M(VecCnv, VecUnaryOperation)                                          \
   M(VecNeg, VecUnaryOperation)                                          \
   M(VecAbs, VecUnaryOperation)                                          \
   M(VecNot, VecUnaryOperation)                                          \
   M(VecAdd, VecBinaryOperation)                                         \
+  M(VecHalvingAdd, VecBinaryOperation)                                  \
   M(VecSub, VecBinaryOperation)                                         \
   M(VecMul, VecBinaryOperation)                                         \
   M(VecDiv, VecBinaryOperation)                                         \
+  M(VecMin, VecBinaryOperation)                                         \
+  M(VecMax, VecBinaryOperation)                                         \
   M(VecAnd, VecBinaryOperation)                                         \
   M(VecAndNot, VecBinaryOperation)                                      \
   M(VecOr, VecBinaryOperation)                                          \
@@ -1379,6 +1381,8 @@
   M(VecShl, VecBinaryOperation)                                         \
   M(VecShr, VecBinaryOperation)                                         \
   M(VecUShr, VecBinaryOperation)                                        \
+  M(VecSetScalars, VecOperation)                                        \
+  M(VecMultiplyAccumulate, VecOperation)                                \
   M(VecLoad, VecMemoryOperation)                                        \
   M(VecStore, VecMemoryOperation)                                       \
 
@@ -6845,6 +6849,7 @@
   DISALLOW_COPY_AND_ASSIGN(HBlocksInLoopReversePostOrderIterator);
 };
 
+// Returns int64_t value of a properly typed constant.
 inline int64_t Int64FromConstant(HConstant* constant) {
   if (constant->IsIntConstant()) {
     return constant->AsIntConstant()->GetValue();
@@ -6856,6 +6861,21 @@
   }
 }
 
+// Returns true iff instruction is an integral constant (and sets value on success).
+inline bool IsInt64AndGet(HInstruction* instruction, /*out*/ int64_t* value) {
+  if (instruction->IsIntConstant()) {
+    *value = instruction->AsIntConstant()->GetValue();
+    return true;
+  } else if (instruction->IsLongConstant()) {
+    *value = instruction->AsLongConstant()->GetValue();
+    return true;
+  } else if (instruction->IsNullConstant()) {
+    *value = 0;
+    return true;
+  }
+  return false;
+}
+
 #define INSTRUCTION_TYPE_CHECK(type, super)                                    \
   inline bool HInstruction::Is##type() const { return GetKind() == k##type; }  \
   inline const H##type* HInstruction::As##type() const {                       \
diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h
index 0cbbf2a..fb9dfb7 100644
--- a/compiler/optimizing/nodes_vector.h
+++ b/compiler/optimizing/nodes_vector.h
@@ -116,16 +116,23 @@
 class HVecUnaryOperation : public HVecOperation {
  public:
   HVecUnaryOperation(ArenaAllocator* arena,
+                     HInstruction* input,
                      Primitive::Type packed_type,
                      size_t vector_length,
                      uint32_t dex_pc)
       : HVecOperation(arena,
                       packed_type,
                       SideEffects::None(),
-                      /*number_of_inputs*/ 1,
+                      /* number_of_inputs */ 1,
                       vector_length,
-                      dex_pc) { }
+                      dex_pc) {
+    SetRawInputAt(0, input);
+  }
+
+  HInstruction* GetInput() const { return InputAt(0); }
+
   DECLARE_ABSTRACT_INSTRUCTION(VecUnaryOperation);
+
  private:
   DISALLOW_COPY_AND_ASSIGN(HVecUnaryOperation);
 };
@@ -134,16 +141,26 @@
 class HVecBinaryOperation : public HVecOperation {
  public:
   HVecBinaryOperation(ArenaAllocator* arena,
+                      HInstruction* left,
+                      HInstruction* right,
                       Primitive::Type packed_type,
                       size_t vector_length,
                       uint32_t dex_pc)
       : HVecOperation(arena,
                       packed_type,
                       SideEffects::None(),
-                      /*number_of_inputs*/ 2,
+                      /* number_of_inputs */ 2,
                       vector_length,
-                      dex_pc) { }
+                      dex_pc) {
+    SetRawInputAt(0, left);
+    SetRawInputAt(1, right);
+  }
+
+  HInstruction* GetLeft() const { return InputAt(0); }
+  HInstruction* GetRight() const { return InputAt(1); }
+
   DECLARE_ABSTRACT_INSTRUCTION(VecBinaryOperation);
+
  private:
   DISALLOW_COPY_AND_ASSIGN(HVecBinaryOperation);
 };
@@ -175,7 +192,7 @@
 };
 
 //
-// Definitions of concrete vector operations in HIR.
+// Definitions of concrete unary vector operations in HIR.
 //
 
 // Replicates the given scalar into a vector,
@@ -187,32 +204,14 @@
                       Primitive::Type packed_type,
                       size_t vector_length,
                       uint32_t dex_pc = kNoDexPc)
-      : HVecUnaryOperation(arena, packed_type, vector_length, dex_pc) {
-    SetRawInputAt(0, scalar);
+      : HVecUnaryOperation(arena, scalar, packed_type, vector_length, dex_pc) {
+    DCHECK(!scalar->IsVecOperation());
   }
   DECLARE_INSTRUCTION(VecReplicateScalar);
  private:
   DISALLOW_COPY_AND_ASSIGN(HVecReplicateScalar);
 };
 
-// Assigns the given scalar elements to a vector,
-// viz. set( array(x1, .., xn) ) = [ x1, .. , xn ].
-class HVecSetScalars FINAL : public HVecUnaryOperation {
-  HVecSetScalars(ArenaAllocator* arena,
-                 HInstruction** scalars,  // array
-                 Primitive::Type packed_type,
-                 size_t vector_length,
-                 uint32_t dex_pc = kNoDexPc)
-      : HVecUnaryOperation(arena, packed_type, vector_length, dex_pc) {
-    for (size_t i = 0; i < vector_length; i++) {
-      SetRawInputAt(0, scalars[i]);
-    }
-  }
-  DECLARE_INSTRUCTION(VecSetScalars);
- private:
-  DISALLOW_COPY_AND_ASSIGN(HVecSetScalars);
-};
-
 // Sum-reduces the given vector into a shorter vector (m < n) or scalar (m = 1),
 // viz. sum-reduce[ x1, .. , xn ] = [ y1, .., ym ], where yi = sum_j x_j.
 class HVecSumReduce FINAL : public HVecUnaryOperation {
@@ -221,10 +220,9 @@
                 Primitive::Type packed_type,
                 size_t vector_length,
                 uint32_t dex_pc = kNoDexPc)
-      : HVecUnaryOperation(arena, packed_type, vector_length, dex_pc) {
+      : HVecUnaryOperation(arena, input, packed_type, vector_length, dex_pc) {
     DCHECK(input->IsVecOperation());
     DCHECK_EQ(input->AsVecOperation()->GetPackedType(), packed_type);
-    SetRawInputAt(0, input);
   }
 
   // TODO: probably integral promotion
@@ -244,10 +242,9 @@
           Primitive::Type packed_type,
           size_t vector_length,
           uint32_t dex_pc = kNoDexPc)
-      : HVecUnaryOperation(arena, packed_type, vector_length, dex_pc) {
+      : HVecUnaryOperation(arena, input, packed_type, vector_length, dex_pc) {
     DCHECK(input->IsVecOperation());
     DCHECK_NE(input->AsVecOperation()->GetPackedType(), packed_type);  // actual convert
-    SetRawInputAt(0, input);
   }
 
   Primitive::Type GetInputType() const { return InputAt(0)->AsVecOperation()->GetPackedType(); }
@@ -268,10 +265,9 @@
           Primitive::Type packed_type,
           size_t vector_length,
           uint32_t dex_pc = kNoDexPc)
-      : HVecUnaryOperation(arena, packed_type, vector_length, dex_pc) {
+      : HVecUnaryOperation(arena, input, packed_type, vector_length, dex_pc) {
     DCHECK(input->IsVecOperation());
     DCHECK_EQ(input->AsVecOperation()->GetPackedType(), packed_type);
-    SetRawInputAt(0, input);
   }
   DECLARE_INSTRUCTION(VecNeg);
  private:
@@ -287,10 +283,9 @@
           Primitive::Type packed_type,
           size_t vector_length,
           uint32_t dex_pc = kNoDexPc)
-      : HVecUnaryOperation(arena, packed_type, vector_length, dex_pc) {
+      : HVecUnaryOperation(arena, input, packed_type, vector_length, dex_pc) {
     DCHECK(input->IsVecOperation());
     DCHECK_EQ(input->AsVecOperation()->GetPackedType(), packed_type);
-    SetRawInputAt(0, input);
   }
   DECLARE_INSTRUCTION(VecAbs);
  private:
@@ -307,15 +302,18 @@
           Primitive::Type packed_type,
           size_t vector_length,
           uint32_t dex_pc = kNoDexPc)
-      : HVecUnaryOperation(arena, packed_type, vector_length, dex_pc) {
+      : HVecUnaryOperation(arena, input, packed_type, vector_length, dex_pc) {
     DCHECK(input->IsVecOperation());
-    SetRawInputAt(0, input);
   }
   DECLARE_INSTRUCTION(VecNot);
  private:
   DISALLOW_COPY_AND_ASSIGN(HVecNot);
 };
 
+//
+// Definitions of concrete binary vector operations in HIR.
+//
+
 // Adds every component in the two vectors,
 // viz. [ x1, .. , xn ] + [ y1, .. , yn ] = [ x1 + y1, .. , xn + yn ].
 class HVecAdd FINAL : public HVecBinaryOperation {
@@ -326,18 +324,50 @@
           Primitive::Type packed_type,
           size_t vector_length,
           uint32_t dex_pc = kNoDexPc)
-      : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) {
+      : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) {
     DCHECK(left->IsVecOperation() && right->IsVecOperation());
     DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type);
     DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type);
-    SetRawInputAt(0, left);
-    SetRawInputAt(1, right);
   }
   DECLARE_INSTRUCTION(VecAdd);
  private:
   DISALLOW_COPY_AND_ASSIGN(HVecAdd);
 };
 
+// Performs halving add on every component in the two vectors, viz.
+// rounded [ x1, .. , xn ] hradd [ y1, .. , yn ] = [ (x1 + y1 + 1) >> 1, .. , (xn + yn + 1) >> 1 ]
+// or      [ x1, .. , xn ] hadd  [ y1, .. , yn ] = [ (x1 + y1)     >> 1, .. , (xn + yn )    >> 1 ]
+// for signed operands x, y (sign extension) or unsigned operands x, y (zero extension).
+class HVecHalvingAdd FINAL : public HVecBinaryOperation {
+ public:
+  HVecHalvingAdd(ArenaAllocator* arena,
+                 HInstruction* left,
+                 HInstruction* right,
+                 Primitive::Type packed_type,
+                 size_t vector_length,
+                 bool is_unsigned,
+                 bool is_rounded,
+                 uint32_t dex_pc = kNoDexPc)
+      : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc),
+        is_unsigned_(is_unsigned),
+        is_rounded_(is_rounded) {
+    DCHECK(left->IsVecOperation() && right->IsVecOperation());
+    DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type);
+    DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type);
+  }
+
+  bool IsUnsigned() const { return is_unsigned_; }
+  bool IsRounded() const { return is_rounded_; }
+
+  DECLARE_INSTRUCTION(VecHalvingAdd);
+
+ private:
+  bool is_unsigned_;
+  bool is_rounded_;
+
+  DISALLOW_COPY_AND_ASSIGN(HVecHalvingAdd);
+};
+
 // Subtracts every component in the two vectors,
 // viz. [ x1, .. , xn ] - [ y1, .. , yn ] = [ x1 - y1, .. , xn - yn ].
 class HVecSub FINAL : public HVecBinaryOperation {
@@ -348,12 +378,10 @@
           Primitive::Type packed_type,
           size_t vector_length,
           uint32_t dex_pc = kNoDexPc)
-      : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) {
+      : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) {
     DCHECK(left->IsVecOperation() && right->IsVecOperation());
     DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type);
     DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type);
-    SetRawInputAt(0, left);
-    SetRawInputAt(1, right);
   }
   DECLARE_INSTRUCTION(VecSub);
  private:
@@ -370,12 +398,10 @@
           Primitive::Type packed_type,
           size_t vector_length,
           uint32_t dex_pc = kNoDexPc)
-      : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) {
+      : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) {
     DCHECK(left->IsVecOperation() && right->IsVecOperation());
     DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type);
     DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type);
-    SetRawInputAt(0, left);
-    SetRawInputAt(1, right);
   }
   DECLARE_INSTRUCTION(VecMul);
  private:
@@ -392,18 +418,56 @@
           Primitive::Type packed_type,
           size_t vector_length,
           uint32_t dex_pc = kNoDexPc)
-      : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) {
+      : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) {
     DCHECK(left->IsVecOperation() && right->IsVecOperation());
     DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type);
     DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type);
-    SetRawInputAt(0, left);
-    SetRawInputAt(1, right);
   }
   DECLARE_INSTRUCTION(VecDiv);
  private:
   DISALLOW_COPY_AND_ASSIGN(HVecDiv);
 };
 
+// Takes minimum of every component in the two vectors,
+// viz. MIN( [ x1, .. , xn ] , [ y1, .. , yn ]) = [ min(x1, y1), .. , min(xn, yn) ].
+class HVecMin FINAL : public HVecBinaryOperation {
+ public:
+  HVecMin(ArenaAllocator* arena,
+          HInstruction* left,
+          HInstruction* right,
+          Primitive::Type packed_type,
+          size_t vector_length,
+          uint32_t dex_pc = kNoDexPc)
+      : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) {
+    DCHECK(left->IsVecOperation() && right->IsVecOperation());
+    DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type);
+    DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type);
+  }
+  DECLARE_INSTRUCTION(VecMin);
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HVecMin);
+};
+
+// Takes maximum of every component in the two vectors,
+// viz. MAX( [ x1, .. , xn ] , [ y1, .. , yn ]) = [ max(x1, y1), .. , max(xn, yn) ].
+class HVecMax FINAL : public HVecBinaryOperation {
+ public:
+  HVecMax(ArenaAllocator* arena,
+          HInstruction* left,
+          HInstruction* right,
+          Primitive::Type packed_type,
+          size_t vector_length,
+          uint32_t dex_pc = kNoDexPc)
+      : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) {
+    DCHECK(left->IsVecOperation() && right->IsVecOperation());
+    DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type);
+    DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type);
+  }
+  DECLARE_INSTRUCTION(VecMax);
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HVecMax);
+};
+
 // Bitwise-ands every component in the two vectors,
 // viz. [ x1, .. , xn ] & [ y1, .. , yn ] = [ x1 & y1, .. , xn & yn ].
 class HVecAnd FINAL : public HVecBinaryOperation {
@@ -414,10 +478,8 @@
           Primitive::Type packed_type,
           size_t vector_length,
           uint32_t dex_pc = kNoDexPc)
-      : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) {
+      : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) {
     DCHECK(left->IsVecOperation() && right->IsVecOperation());
-    SetRawInputAt(0, left);
-    SetRawInputAt(1, right);
   }
   DECLARE_INSTRUCTION(VecAnd);
  private:
@@ -434,10 +496,8 @@
              Primitive::Type packed_type,
              size_t vector_length,
              uint32_t dex_pc = kNoDexPc)
-         : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) {
+         : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) {
     DCHECK(left->IsVecOperation() && right->IsVecOperation());
-    SetRawInputAt(0, left);
-    SetRawInputAt(1, right);
   }
   DECLARE_INSTRUCTION(VecAndNot);
  private:
@@ -454,10 +514,8 @@
          Primitive::Type packed_type,
          size_t vector_length,
          uint32_t dex_pc = kNoDexPc)
-      : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) {
+      : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) {
     DCHECK(left->IsVecOperation() && right->IsVecOperation());
-    SetRawInputAt(0, left);
-    SetRawInputAt(1, right);
   }
   DECLARE_INSTRUCTION(VecOr);
  private:
@@ -474,10 +532,8 @@
           Primitive::Type packed_type,
           size_t vector_length,
           uint32_t dex_pc = kNoDexPc)
-      : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) {
+      : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) {
     DCHECK(left->IsVecOperation() && right->IsVecOperation());
-    SetRawInputAt(0, left);
-    SetRawInputAt(1, right);
   }
   DECLARE_INSTRUCTION(VecXor);
  private:
@@ -494,11 +550,9 @@
           Primitive::Type packed_type,
           size_t vector_length,
           uint32_t dex_pc = kNoDexPc)
-      : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) {
+      : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) {
     DCHECK(left->IsVecOperation());
     DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type);
-    SetRawInputAt(0, left);
-    SetRawInputAt(1, right);
   }
   DECLARE_INSTRUCTION(VecShl);
  private:
@@ -515,11 +569,9 @@
           Primitive::Type packed_type,
           size_t vector_length,
           uint32_t dex_pc = kNoDexPc)
-      : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) {
+      : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) {
     DCHECK(left->IsVecOperation());
     DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type);
-    SetRawInputAt(0, left);
-    SetRawInputAt(1, right);
   }
   DECLARE_INSTRUCTION(VecShr);
  private:
@@ -536,17 +588,96 @@
            Primitive::Type packed_type,
            size_t vector_length,
            uint32_t dex_pc = kNoDexPc)
-      : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) {
+      : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) {
     DCHECK(left->IsVecOperation());
     DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type);
-    SetRawInputAt(0, left);
-    SetRawInputAt(1, right);
   }
   DECLARE_INSTRUCTION(VecUShr);
  private:
   DISALLOW_COPY_AND_ASSIGN(HVecUShr);
 };
 
+//
+// Definitions of concrete miscellaneous vector operations in HIR.
+//
+
+// Assigns the given scalar elements to a vector,
+// viz. set( array(x1, .., xn) ) = [ x1, .. , xn ].
+class HVecSetScalars FINAL : public HVecOperation {
+  HVecSetScalars(ArenaAllocator* arena,
+                 HInstruction** scalars,  // array
+                 Primitive::Type packed_type,
+                 size_t vector_length,
+                 uint32_t dex_pc = kNoDexPc)
+      : HVecOperation(arena,
+                      packed_type,
+                      SideEffects::None(),
+                      /* number_of_inputs */ vector_length,
+                      vector_length,
+                      dex_pc) {
+    for (size_t i = 0; i < vector_length; i++) {
+      DCHECK(!scalars[i]->IsVecOperation());
+      SetRawInputAt(0, scalars[i]);
+    }
+  }
+  DECLARE_INSTRUCTION(VecSetScalars);
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HVecSetScalars);
+};
+
+// Multiplies every component in the two vectors, adds the result vector to the accumulator vector.
+// viz. [ acc1, .., accn ] + [ x1, .. , xn ] * [ y1, .. , yn ] =
+//     [ acc1 + x1 * y1, .. , accn + xn * yn ].
+class HVecMultiplyAccumulate FINAL : public HVecOperation {
+ public:
+  HVecMultiplyAccumulate(ArenaAllocator* arena,
+                         InstructionKind op,
+                         HInstruction* accumulator,
+                         HInstruction* mul_left,
+                         HInstruction* mul_right,
+                         Primitive::Type packed_type,
+                         size_t vector_length,
+                         uint32_t dex_pc = kNoDexPc)
+      : HVecOperation(arena,
+                      packed_type,
+                      SideEffects::None(),
+                      /* number_of_inputs */ 3,
+                      vector_length,
+                      dex_pc),
+        op_kind_(op) {
+    DCHECK(op == InstructionKind::kAdd || op == InstructionKind::kSub);
+    DCHECK(accumulator->IsVecOperation());
+    DCHECK(mul_left->IsVecOperation() && mul_right->IsVecOperation());
+    DCHECK_EQ(accumulator->AsVecOperation()->GetPackedType(), packed_type);
+    DCHECK_EQ(mul_left->AsVecOperation()->GetPackedType(), packed_type);
+    DCHECK_EQ(mul_right->AsVecOperation()->GetPackedType(), packed_type);
+
+    SetRawInputAt(kInputAccumulatorIndex, accumulator);
+    SetRawInputAt(kInputMulLeftIndex, mul_left);
+    SetRawInputAt(kInputMulRightIndex, mul_right);
+  }
+
+  static constexpr int kInputAccumulatorIndex = 0;
+  static constexpr int kInputMulLeftIndex = 1;
+  static constexpr int kInputMulRightIndex = 2;
+
+  bool CanBeMoved() const OVERRIDE { return true; }
+
+  bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
+    return op_kind_ == other->AsVecMultiplyAccumulate()->op_kind_;
+  }
+
+  InstructionKind GetOpKind() const { return op_kind_; }
+
+  DECLARE_INSTRUCTION(VecMultiplyAccumulate);
+
+ private:
+  // Indicates if this is a MADD or MSUB.
+  const InstructionKind op_kind_;
+
+  DISALLOW_COPY_AND_ASSIGN(HVecMultiplyAccumulate);
+};
+
 // Loads a vector from memory, viz. load(mem, 1)
 // yield the vector [ mem(1), .. , mem(n) ].
 class HVecLoad FINAL : public HVecMemoryOperation {
@@ -560,7 +691,7 @@
       : HVecMemoryOperation(arena,
                             packed_type,
                             SideEffects::ArrayReadOfType(packed_type),
-                            /*number_of_inputs*/ 2,
+                            /* number_of_inputs */ 2,
                             vector_length,
                             dex_pc) {
     SetRawInputAt(0, base);
@@ -585,7 +716,7 @@
       : HVecMemoryOperation(arena,
                             packed_type,
                             SideEffects::ArrayWriteOfType(packed_type),
-                            /*number_of_inputs*/ 3,
+                            /* number_of_inputs */ 3,
                             vector_length,
                             dex_pc) {
     DCHECK(value->IsVecOperation());
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index d5637b9..98332d3 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -16,6 +16,8 @@
 
 #include "reference_type_propagation.h"
 
+#include "art_field-inl.h"
+#include "art_method-inl.h"
 #include "base/enums.h"
 #include "class_linker-inl.h"
 #include "mirror/class-inl.h"
diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc
index 071cd57..f8c4008 100644
--- a/compiler/utils/assembler_thumb_test_expected.cc.inc
+++ b/compiler/utils/assembler_thumb_test_expected.cc.inc
@@ -5610,7 +5610,7 @@
   " 214:	ecbd 8a10 	vpop	{s16-s31}\n",
   " 218:	e8bd 8de0 	ldmia.w	sp!, {r5, r6, r7, r8, sl, fp, pc}\n",
   " 21c:	4660      	mov	r0, ip\n",
-  " 21e:	f8d9 c2b4 	ldr.w	ip, [r9, #692]	; 0x2b4\n",
+  " 21e:	f8d9 c2b8 	ldr.w	ip, [r9, #696]	; 0x2b8\n",
   " 222:	47e0      	blx	ip\n",
   nullptr
 };
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index 0cff44d..57223b5 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -1703,6 +1703,7 @@
 
 // TODO: don't use rtmp, use daui, dahi, dati.
 void Mips64Assembler::Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, GpuRegister rtmp) {
+  CHECK_NE(rs, rtmp);
   if (IsInt<16>(value)) {
     Daddiu(rt, rs, value);
   } else {
diff --git a/compiler/verifier_deps_test.cc b/compiler/verifier_deps_test.cc
index fa7e985..0b1ab75 100644
--- a/compiler/verifier_deps_test.cc
+++ b/compiler/verifier_deps_test.cc
@@ -17,6 +17,7 @@
 // Test is in compiler, as it uses compiler related code.
 #include "verifier/verifier_deps.h"
 
+#include "art_method-inl.h"
 #include "class_linker.h"
 #include "common_compiler_test.h"
 #include "compiler_callbacks.h"
@@ -207,9 +208,9 @@
     ScopedObjectAccess soa(Thread::Current());
     LoadDexFile(&soa);
     mirror::Class* klass_dst = FindClassByName(dst, &soa);
-    DCHECK(klass_dst != nullptr);
+    DCHECK(klass_dst != nullptr) << dst;
     mirror::Class* klass_src = FindClassByName(src, &soa);
-    DCHECK(klass_src != nullptr);
+    DCHECK(klass_src != nullptr) << src;
     verifier_deps_->AddAssignability(*primary_dex_file_,
                                      klass_dst,
                                      klass_src,
@@ -1536,5 +1537,16 @@
   ASSERT_TRUE(HasAssignable("Ljava/lang/Exception;", "LIface;", false));
 }
 
+TEST_F(VerifierDepsTest, Assignable_Arrays) {
+  ASSERT_TRUE(TestAssignabilityRecording(/* dst */ "[LIface;",
+                                         /* src */ "[LMyClassExtendingInterface;",
+                                         /* is_strict */ false,
+                                         /* is_assignable */ true));
+  ASSERT_FALSE(HasAssignable(
+      "LIface;", "LMyClassExtendingInterface;", /* expected_is_assignable */ true));
+  ASSERT_FALSE(HasAssignable(
+      "LIface;", "LMyClassExtendingInterface;", /* expected_is_assignable */ false));
+}
+
 }  // namespace verifier
 }  // namespace art
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 81566c4..a9108e0e 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -385,6 +385,8 @@
   UsageError("      This option is incompatible with read barriers (e.g., if dex2oat has been");
   UsageError("      built with the environment variable `ART_USE_READ_BARRIER` set to `true`).");
   UsageError("");
+  UsageError("  --classpath-dir=<directory-path>: directory used to resolve relative class paths.");
+  UsageError("");
   std::cerr << "See log for usage error information\n";
   exit(EXIT_FAILURE);
 }
@@ -1234,6 +1236,8 @@
           Usage("Cannot use --force-determinism with read barriers or non-CMS garbage collector");
         }
         force_determinism_ = true;
+      } else if (option.starts_with("--classpath-dir=")) {
+        classpath_dir_ = option.substr(strlen("--classpath-dir=")).data();
       } else if (!compiler_options_->ParseCompilerOption(option, Usage)) {
         Usage("Unknown argument %s", option.data());
       }
@@ -1486,12 +1490,13 @@
       }
 
       // Open dex files for class path.
-      const std::vector<std::string> class_path_locations =
+      std::vector<std::string> class_path_locations =
           GetClassPathLocations(runtime_->GetClassPathString());
       OpenClassPathFiles(class_path_locations,
                          &class_path_files_,
                          &opened_oat_files_,
-                         runtime_->GetInstructionSet());
+                         runtime_->GetInstructionSet(),
+                         classpath_dir_);
 
       // Store the classpath we have right now.
       std::vector<const DexFile*> class_path_files = MakeNonOwningPointerVector(class_path_files_);
@@ -1501,7 +1506,7 @@
         // When passing the special shared library as the classpath, it is the only path.
         encoded_class_path = OatFile::kSpecialSharedLibrary;
       } else {
-        encoded_class_path = OatFile::EncodeDexFileDependencies(class_path_files);
+        encoded_class_path = OatFile::EncodeDexFileDependencies(class_path_files, classpath_dir_);
       }
       key_value_store_->Put(OatHeader::kClassPathKey, encoded_class_path);
     }
@@ -2180,18 +2185,23 @@
 
   // Opens requested class path files and appends them to opened_dex_files. If the dex files have
   // been stripped, this opens them from their oat files and appends them to opened_oat_files.
-  static void OpenClassPathFiles(const std::vector<std::string>& class_path_locations,
+  static void OpenClassPathFiles(std::vector<std::string>& class_path_locations,
                                  std::vector<std::unique_ptr<const DexFile>>* opened_dex_files,
                                  std::vector<std::unique_ptr<OatFile>>* opened_oat_files,
-                                 InstructionSet isa) {
+                                 InstructionSet isa,
+                                 std::string& classpath_dir) {
     DCHECK(opened_dex_files != nullptr) << "OpenClassPathFiles dex out-param is nullptr";
     DCHECK(opened_oat_files != nullptr) << "OpenClassPathFiles oat out-param is nullptr";
-    for (const std::string& location : class_path_locations) {
+    for (std::string& location : class_path_locations) {
       // Stop early if we detect the special shared library, which may be passed as the classpath
       // for dex2oat when we want to skip the shared libraries check.
       if (location == OatFile::kSpecialSharedLibrary) {
         break;
       }
+      // If path is relative, append it to the provided base directory.
+      if (!classpath_dir.empty() && location[0] != '/') {
+        location = classpath_dir + '/' + location;
+      }
       static constexpr bool kVerifyChecksum = true;
       std::string error_msg;
       if (!DexFile::Open(
@@ -2743,6 +2753,9 @@
   // See CompilerOptions.force_determinism_.
   bool force_determinism_;
 
+  // Directory of relative classpaths.
+  std::string classpath_dir_;
+
   // Whether the given input vdex is also the output.
   bool update_input_vdex_ = false;
 
diff --git a/imgdiag/imgdiag.cc b/imgdiag/imgdiag.cc
index f307cbc..06a0f23 100644
--- a/imgdiag/imgdiag.cc
+++ b/imgdiag/imgdiag.cc
@@ -28,6 +28,7 @@
 
 #include "android-base/stringprintf.h"
 
+#include "art_field-inl.h"
 #include "art_method-inl.h"
 #include "base/unix_file/fd_file.h"
 #include "gc/space/image_space.h"
diff --git a/patchoat/patchoat.cc b/patchoat/patchoat.cc
index 0c2717f..fbb0978 100644
--- a/patchoat/patchoat.cc
+++ b/patchoat/patchoat.cc
@@ -42,6 +42,7 @@
 #include "mirror/dex_cache.h"
 #include "mirror/executable.h"
 #include "mirror/object-inl.h"
+#include "mirror/object-refvisitor-inl.h"
 #include "mirror/method.h"
 #include "mirror/reference.h"
 #include "noop_compiler_callbacks.h"
diff --git a/runtime/Android.bp b/runtime/Android.bp
index 6c3bc04..0860b2e 100644
--- a/runtime/Android.bp
+++ b/runtime/Android.bp
@@ -37,6 +37,7 @@
         "base/hex_dump.cc",
         "base/logging.cc",
         "base/mutex.cc",
+        "base/safe_copy.cc",
         "base/scoped_arena_allocator.cc",
         "base/scoped_flock.cc",
         "base/stringpiece.cc",
@@ -92,6 +93,7 @@
         "gc/space/space.cc",
         "gc/space/zygote_space.cc",
         "gc/task_processor.cc",
+        "gc/verification.cc",
         "hprof/hprof.cc",
         "image.cc",
         "indirect_reference_table.cc",
@@ -185,13 +187,13 @@
         "plugin.cc",
         "primitive.cc",
         "quick_exception_handler.cc",
-        "quick/inline_method_analyser.cc",
         "reference_table.cc",
         "reflection.cc",
         "runtime.cc",
         "runtime_callbacks.cc",
         "runtime_common.cc",
         "runtime_options.cc",
+        "scoped_thread_state_change.cc",
         "signal_catcher.cc",
         "stack.cc",
         "stack_map.cc",
@@ -450,7 +452,6 @@
         "oat.h",
         "object_callbacks.h",
         "process_state.h",
-        "quick/inline_method_analyser.h",
         "runtime.h",
         "stack.h",
         "thread.h",
@@ -522,6 +523,7 @@
         "base/hex_dump_test.cc",
         "base/histogram_test.cc",
         "base/mutex_test.cc",
+        "base/safe_copy_test.cc",
         "base/scoped_flock_test.cc",
         "base/time_utils_test.cc",
         "base/timing_logger_test.cc",
@@ -546,6 +548,7 @@
         "gc/accounting/space_bitmap_test.cc",
         "gc/collector/immune_spaces_test.cc",
         "gc/heap_test.cc",
+        "gc/heap_verification_test.cc",
         "gc/reference_queue_test.cc",
         "gc/space/dlmalloc_space_static_test.cc",
         "gc/space/dlmalloc_space_random_test.cc",
diff --git a/runtime/arch/arm/fault_handler_arm.cc b/runtime/arch/arm/fault_handler_arm.cc
index 923ff4f..4c15450 100644
--- a/runtime/arch/arm/fault_handler_arm.cc
+++ b/runtime/arch/arm/fault_handler_arm.cc
@@ -19,14 +19,12 @@
 
 #include <sys/ucontext.h>
 
-#include "art_method-inl.h"
+#include "art_method.h"
 #include "base/enums.h"
-#include "base/macros.h"
 #include "base/hex_dump.h"
-#include "globals.h"
 #include "base/logging.h"
-#include "base/hex_dump.h"
-#include "thread.h"
+#include "base/macros.h"
+#include "globals.h"
 #include "thread-inl.h"
 
 //
diff --git a/runtime/arch/arm64/asm_support_arm64.h b/runtime/arch/arm64/asm_support_arm64.h
index cfcd6a7..6b77200 100644
--- a/runtime/arch/arm64/asm_support_arm64.h
+++ b/runtime/arch/arm64/asm_support_arm64.h
@@ -32,9 +32,17 @@
 #define BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET 0x300
 
 // The offset of the reference load LDR from the return address in LR for field loads.
+#ifdef USE_HEAP_POISONING
+#define BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET -8
+#else
 #define BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET -4
+#endif
 // The offset of the reference load LDR from the return address in LR for array loads.
+#ifdef USE_HEAP_POISONING
+#define BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET -8
+#else
 #define BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET -4
+#endif
 // The offset of the reference load LDR from the return address in LR for GC root loads.
 #define BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET -8
 
diff --git a/runtime/arch/arm64/fault_handler_arm64.cc b/runtime/arch/arm64/fault_handler_arm64.cc
index 193af58..dc4e8f3 100644
--- a/runtime/arch/arm64/fault_handler_arm64.cc
+++ b/runtime/arch/arm64/fault_handler_arm64.cc
@@ -19,14 +19,13 @@
 
 #include <sys/ucontext.h>
 
-#include "art_method-inl.h"
+#include "art_method.h"
 #include "base/enums.h"
+#include "base/hex_dump.h"
+#include "base/logging.h"
 #include "base/macros.h"
 #include "globals.h"
-#include "base/logging.h"
-#include "base/hex_dump.h"
 #include "registers_arm64.h"
-#include "thread.h"
 #include "thread-inl.h"
 
 extern "C" void art_quick_throw_stack_overflow();
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index c7fa7f5..d043962 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -2649,7 +2649,8 @@
      *
      * For field accesses and array loads with a constant index the thunk loads
      * the reference into IP0 using introspection and calls the main entrypoint,
-     * art_quick_read_barrier_mark_introspection.
+     * art_quick_read_barrier_mark_introspection. With heap poisoning enabled,
+     * the passed reference is poisoned.
      *
      * For array accesses with non-constant index, the thunk inserts the bits
      * 16-21 of the LDR instruction to the entrypoint address, effectively
@@ -2663,6 +2664,7 @@
      *
      * For GC root accesses we cannot use the main entrypoint because of the
      * different offset where the LDR instruction in generated code is located.
+     * (And even with heap poisoning enabled, GC roots are not poisoned.)
      * To re-use the same entrypoint pointer in generated code, we make sure
      * that the gc root entrypoint (a copy of the entrypoint with a different
      * offset for introspection loads) is located at a known offset (768 bytes,
@@ -2686,6 +2688,8 @@
     .balign 512
 ENTRY art_quick_read_barrier_mark_introspection
     // At this point, IP0 contains the reference, IP1 can be freely used.
+    // For heap poisoning, the reference is poisoned, so unpoison it first.
+    UNPOISON_HEAP_REF wIP0
     // If reference is null, just return it in the right register.
     cbz   wIP0, .Lmark_introspection_return
     // Use wIP1 as temp and check the mark bit of the reference.
diff --git a/runtime/arch/mips/fault_handler_mips.cc b/runtime/arch/mips/fault_handler_mips.cc
index f9c19e8..7072a8a 100644
--- a/runtime/arch/mips/fault_handler_mips.cc
+++ b/runtime/arch/mips/fault_handler_mips.cc
@@ -14,16 +14,16 @@
  * limitations under the License.
  */
 
-#include "arch/mips/quick_method_frame_info_mips.h"
 #include "fault_handler.h"
 #include <sys/ucontext.h>
-#include "art_method-inl.h"
+
+#include "art_method.h"
+#include "base/hex_dump.h"
+#include "base/logging.h"
 #include "base/macros.h"
 #include "globals.h"
-#include "base/logging.h"
-#include "base/hex_dump.h"
+#include "quick_method_frame_info_mips.h"
 #include "registers_mips.h"
-#include "thread.h"
 #include "thread-inl.h"
 
 extern "C" void art_quick_throw_stack_overflow();
diff --git a/runtime/arch/mips64/fault_handler_mips64.cc b/runtime/arch/mips64/fault_handler_mips64.cc
index d668d3a..f9a92c8 100644
--- a/runtime/arch/mips64/fault_handler_mips64.cc
+++ b/runtime/arch/mips64/fault_handler_mips64.cc
@@ -14,16 +14,17 @@
  * limitations under the License.
  */
 
-#include "arch/mips64/quick_method_frame_info_mips64.h"
 #include "fault_handler.h"
+
 #include <sys/ucontext.h>
-#include "art_method-inl.h"
+
+#include "art_method.h"
+#include "base/hex_dump.h"
+#include "base/logging.h"
 #include "base/macros.h"
 #include "globals.h"
-#include "base/logging.h"
-#include "base/hex_dump.h"
+#include "quick_method_frame_info_mips64.h"
 #include "registers_mips64.h"
-#include "thread.h"
 #include "thread-inl.h"
 
 extern "C" void art_quick_throw_stack_overflow();
diff --git a/runtime/arch/x86/fault_handler_x86.cc b/runtime/arch/x86/fault_handler_x86.cc
index f407ebf..7d8abb8 100644
--- a/runtime/arch/x86/fault_handler_x86.cc
+++ b/runtime/arch/x86/fault_handler_x86.cc
@@ -19,13 +19,13 @@
 
 #include <sys/ucontext.h>
 
-#include "art_method-inl.h"
+#include "art_method.h"
 #include "base/enums.h"
-#include "base/macros.h"
-#include "globals.h"
-#include "base/logging.h"
 #include "base/hex_dump.h"
-#include "thread.h"
+#include "base/logging.h"
+#include "base/macros.h"
+#include "base/safe_copy.h"
+#include "globals.h"
 #include "thread-inl.h"
 
 #if defined(__APPLE__)
@@ -78,6 +78,30 @@
 // Get the size of an instruction in bytes.
 // Return 0 if the instruction is not handled.
 static uint32_t GetInstructionSize(const uint8_t* pc) {
+  // Don't segfault if pc points to garbage.
+  char buf[15];  // x86/x86-64 have a maximum instruction length of 15 bytes.
+  ssize_t bytes = SafeCopy(buf, pc, sizeof(buf));
+
+  if (bytes == 0) {
+    // Nothing was readable.
+    return 0;
+  }
+
+  if (bytes == -1) {
+    // SafeCopy not supported, assume that the entire range is readable.
+    bytes = 16;
+  } else {
+    pc = reinterpret_cast<uint8_t*>(buf);
+  }
+
+#define INCREMENT_PC()          \
+  do {                          \
+    pc++;                       \
+    if (pc - startpc > bytes) { \
+      return 0;                 \
+    }                           \
+  } while (0)
+
 #if defined(__x86_64)
   const bool x86_64 = true;
 #else
@@ -86,7 +110,8 @@
 
   const uint8_t* startpc = pc;
 
-  uint8_t opcode = *pc++;
+  uint8_t opcode = *pc;
+  INCREMENT_PC();
   uint8_t modrm;
   bool has_modrm = false;
   bool two_byte = false;
@@ -118,7 +143,8 @@
 
       // Group 4
       case 0x67:
-        opcode = *pc++;
+        opcode = *pc;
+        INCREMENT_PC();
         prefix_present = true;
         break;
     }
@@ -128,13 +154,15 @@
   }
 
   if (x86_64 && opcode >= 0x40 && opcode <= 0x4f) {
-    opcode = *pc++;
+    opcode = *pc;
+    INCREMENT_PC();
   }
 
   if (opcode == 0x0f) {
     // Two byte opcode
     two_byte = true;
-    opcode = *pc++;
+    opcode = *pc;
+    INCREMENT_PC();
   }
 
   bool unhandled_instruction = false;
@@ -147,7 +175,8 @@
       case 0xb7:
       case 0xbe:        // movsx
       case 0xbf:
-        modrm = *pc++;
+        modrm = *pc;
+        INCREMENT_PC();
         has_modrm = true;
         break;
       default:
@@ -166,28 +195,32 @@
       case 0x3c:
       case 0x3d:
       case 0x85:        // test.
-        modrm = *pc++;
+        modrm = *pc;
+        INCREMENT_PC();
         has_modrm = true;
         break;
 
       case 0x80:        // group 1, byte immediate.
       case 0x83:
       case 0xc6:
-        modrm = *pc++;
+        modrm = *pc;
+        INCREMENT_PC();
         has_modrm = true;
         immediate_size = 1;
         break;
 
       case 0x81:        // group 1, word immediate.
       case 0xc7:        // mov
-        modrm = *pc++;
+        modrm = *pc;
+        INCREMENT_PC();
         has_modrm = true;
         immediate_size = operand_size_prefix ? 2 : 4;
         break;
 
       case 0xf6:
       case 0xf7:
-        modrm = *pc++;
+        modrm = *pc;
+        INCREMENT_PC();
         has_modrm = true;
         switch ((modrm >> 3) & 7) {  // Extract "reg/opcode" from "modr/m".
           case 0:  // test
@@ -222,7 +255,7 @@
 
     // Check for SIB.
     if (mod != 3U /* 0b11 */ && (modrm & 7U /* 0b111 */) == 4) {
-      ++pc;     // SIB
+      INCREMENT_PC();     // SIB
     }
 
     switch (mod) {
@@ -238,6 +271,9 @@
   pc += displacement_size + immediate_size;
 
   VLOG(signals) << "x86 instruction length calculated as " << (pc - startpc);
+  if (pc - startpc > bytes) {
+    return 0;
+  }
   return pc - startpc;
 }
 
diff --git a/runtime/art_field-inl.h b/runtime/art_field-inl.h
index 16b73c6..0de0f02 100644
--- a/runtime/art_field-inl.h
+++ b/runtime/art_field-inl.h
@@ -21,6 +21,7 @@
 
 #include "base/logging.h"
 #include "class_linker.h"
+#include "dex_file-inl.h"
 #include "gc_root-inl.h"
 #include "gc/accounting/card_table-inl.h"
 #include "jvalue.h"
@@ -46,16 +47,6 @@
   declaring_class_ = GcRoot<mirror::Class>(new_declaring_class);
 }
 
-inline uint32_t ArtField::GetAccessFlags() {
-  DCHECK(GetDeclaringClass()->IsLoaded() || GetDeclaringClass()->IsErroneous());
-  return access_flags_;
-}
-
-inline MemberOffset ArtField::GetOffset() {
-  DCHECK(GetDeclaringClass()->IsResolved());
-  return MemberOffset(offset_);
-}
-
 inline MemberOffset ArtField::GetOffsetDuringLinking() {
   DCHECK(GetDeclaringClass()->IsLoaded() || GetDeclaringClass()->IsErroneous());
   return MemberOffset(offset_);
diff --git a/runtime/art_field.cc b/runtime/art_field.cc
index 7e13104..bc728f4 100644
--- a/runtime/art_field.cc
+++ b/runtime/art_field.cc
@@ -78,5 +78,12 @@
   return result;
 }
 
+void ArtField::GetAccessFlagsDCheck() {
+  CHECK(GetDeclaringClass()->IsLoaded() || GetDeclaringClass()->IsErroneous());
+}
+
+void ArtField::GetOffsetDCheck() {
+  CHECK(GetDeclaringClass()->IsResolved());
+}
 
 }  // namespace art
diff --git a/runtime/art_field.h b/runtime/art_field.h
index 666ed8a..3789b0c 100644
--- a/runtime/art_field.h
+++ b/runtime/art_field.h
@@ -51,7 +51,12 @@
     return declaring_class_.AddressWithoutBarrier();
   }
 
-  uint32_t GetAccessFlags() REQUIRES_SHARED(Locks::mutator_lock_);
+  uint32_t GetAccessFlags() REQUIRES_SHARED(Locks::mutator_lock_) {
+    if (kIsDebugBuild) {
+      GetAccessFlagsDCheck();
+    }
+    return access_flags_;
+  }
 
   void SetAccessFlags(uint32_t new_access_flags) REQUIRES_SHARED(Locks::mutator_lock_) {
     // Not called within a transaction.
@@ -80,7 +85,12 @@
   }
 
   // Offset to field within an Object.
-  MemberOffset GetOffset() REQUIRES_SHARED(Locks::mutator_lock_);
+  MemberOffset GetOffset() REQUIRES_SHARED(Locks::mutator_lock_) {
+    if (kIsDebugBuild) {
+      GetOffsetDCheck();
+    }
+    return MemberOffset(offset_);
+  }
 
   static MemberOffset OffsetOffset() {
     return MemberOffset(OFFSETOF_MEMBER(ArtField, offset_));
@@ -227,6 +237,9 @@
                                               ObjPtr<mirror::DexCache> dex_cache)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
+  void GetAccessFlagsDCheck() REQUIRES_SHARED(Locks::mutator_lock_);
+  void GetOffsetDCheck() REQUIRES_SHARED(Locks::mutator_lock_);
+
   GcRoot<mirror::Class> declaring_class_;
 
   uint32_t access_flags_ = 0;
diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h
index 5cf0e0f..59cd978 100644
--- a/runtime/art_method-inl.h
+++ b/runtime/art_method-inl.h
@@ -82,37 +82,6 @@
           expected_root, desired_root);
 }
 
-// AssertSharedHeld doesn't work in GetAccessFlags, so use a NO_THREAD_SAFETY_ANALYSIS helper.
-// TODO: Figure out why ASSERT_SHARED_CAPABILITY doesn't work.
-template <ReadBarrierOption kReadBarrierOption>
-ALWAYS_INLINE static inline void DoGetAccessFlagsHelper(ArtMethod* method)
-    NO_THREAD_SAFETY_ANALYSIS {
-  CHECK(method->IsRuntimeMethod() ||
-        method->GetDeclaringClass<kReadBarrierOption>()->IsIdxLoaded() ||
-        method->GetDeclaringClass<kReadBarrierOption>()->IsErroneous());
-}
-
-template <ReadBarrierOption kReadBarrierOption>
-inline uint32_t ArtMethod::GetAccessFlags() {
-  if (kCheckDeclaringClassState) {
-    Thread* self = Thread::Current();
-    if (!Locks::mutator_lock_->IsSharedHeld(self)) {
-      if (self->IsThreadSuspensionAllowable()) {
-        ScopedObjectAccess soa(self);
-        CHECK(IsRuntimeMethod() ||
-              GetDeclaringClass<kReadBarrierOption>()->IsIdxLoaded() ||
-              GetDeclaringClass<kReadBarrierOption>()->IsErroneous());
-      }
-    } else {
-      // We cannot use SOA in this case. We might be holding the lock, but may not be in the
-      // runnable state (e.g., during GC).
-      Locks::mutator_lock_->AssertSharedHeld(self);
-      DoGetAccessFlagsHelper<kReadBarrierOption>(this);
-    }
-  }
-  return access_flags_.load(std::memory_order_relaxed);
-}
-
 inline uint16_t ArtMethod::GetMethodIndex() {
   DCHECK(IsRuntimeMethod() || GetDeclaringClass()->IsResolved());
   return method_index_;
@@ -224,10 +193,6 @@
   }
 }
 
-inline bool ArtMethod::IsRuntimeMethod() {
-  return dex_method_index_ == DexFile::kDexNoIndex;
-}
-
 inline bool ArtMethod::IsCalleeSaveMethod() {
   if (!IsRuntimeMethod()) {
     return false;
@@ -273,6 +238,11 @@
   return dex_file->GetMethodDeclaringClassDescriptor(dex_file->GetMethodId(dex_method_idx));
 }
 
+inline const char* ArtMethod::GetShorty() {
+  uint32_t unused_length;
+  return GetShorty(&unused_length);
+}
+
 inline const char* ArtMethod::GetShorty(uint32_t* out_length) {
   DCHECK(!IsProxyMethod());
   const DexFile* dex_file = GetDexFile();
diff --git a/runtime/art_method.cc b/runtime/art_method.cc
index 5a71be6..7de8916 100644
--- a/runtime/art_method.cc
+++ b/runtime/art_method.cc
@@ -21,7 +21,6 @@
 #include "android-base/stringprintf.h"
 
 #include "arch/context.h"
-#include "art_field-inl.h"
 #include "art_method-inl.h"
 #include "base/stringpiece.h"
 #include "class_linker-inl.h"
@@ -43,6 +42,7 @@
 #include "mirror/object-inl.h"
 #include "mirror/string.h"
 #include "oat_file-inl.h"
+#include "runtime_callbacks.h"
 #include "scoped_thread_state_change-inl.h"
 #include "well_known_classes.h"
 
@@ -55,6 +55,10 @@
 extern "C" void art_quick_invoke_static_stub(ArtMethod*, uint32_t*, uint32_t, Thread*, JValue*,
                                              const char*);
 
+// Enforce that we he have the right index for runtime methods.
+static_assert(ArtMethod::kRuntimeMethodDexMethodIndex == DexFile::kDexNoIndex,
+              "Wrong runtime-method dex method index");
+
 ArtMethod* ArtMethod::GetNonObsoleteMethod() {
   DCHECK_EQ(kRuntimePointerSize, Runtime::Current()->GetClassLinker()->GetImagePointerSize());
   if (LIKELY(!IsObsolete())) {
@@ -372,20 +376,25 @@
   self->PopManagedStackFragment(fragment);
 }
 
-void ArtMethod::RegisterNative(const void* native_method, bool is_fast) {
+const void* ArtMethod::RegisterNative(const void* native_method, bool is_fast) {
   CHECK(IsNative()) << PrettyMethod();
   CHECK(!IsFastNative()) << PrettyMethod();
   CHECK(native_method != nullptr) << PrettyMethod();
   if (is_fast) {
     AddAccessFlags(kAccFastNative);
   }
-  SetEntryPointFromJni(native_method);
+  void* new_native_method = nullptr;
+  Runtime::Current()->GetRuntimeCallbacks()->RegisterNativeMethod(this,
+                                                                  native_method,
+                                                                  /*out*/&new_native_method);
+  SetEntryPointFromJni(new_native_method);
+  return new_native_method;
 }
 
 void ArtMethod::UnregisterNative() {
   CHECK(IsNative() && !IsFastNative()) << PrettyMethod();
   // restore stub to lookup native pointer via dlsym
-  RegisterNative(GetJniDlsymLookupStub(), false);
+  SetEntryPointFromJni(GetJniDlsymLookupStub());
 }
 
 bool ArtMethod::IsOverridableByDefaultMethod() {
@@ -801,4 +810,35 @@
   return long_name;
 }
 
+// AssertSharedHeld doesn't work in GetAccessFlags, so use a NO_THREAD_SAFETY_ANALYSIS helper.
+// TODO: Figure out why ASSERT_SHARED_CAPABILITY doesn't work.
+template <ReadBarrierOption kReadBarrierOption>
+ALWAYS_INLINE static inline void DoGetAccessFlagsHelper(ArtMethod* method)
+    NO_THREAD_SAFETY_ANALYSIS {
+  CHECK(method->IsRuntimeMethod() ||
+        method->GetDeclaringClass<kReadBarrierOption>()->IsIdxLoaded() ||
+        method->GetDeclaringClass<kReadBarrierOption>()->IsErroneous());
+}
+
+template <ReadBarrierOption kReadBarrierOption> void ArtMethod::GetAccessFlagsDCheck() {
+  if (kCheckDeclaringClassState) {
+    Thread* self = Thread::Current();
+    if (!Locks::mutator_lock_->IsSharedHeld(self)) {
+      if (self->IsThreadSuspensionAllowable()) {
+        ScopedObjectAccess soa(self);
+        CHECK(IsRuntimeMethod() ||
+              GetDeclaringClass<kReadBarrierOption>()->IsIdxLoaded() ||
+              GetDeclaringClass<kReadBarrierOption>()->IsErroneous());
+      }
+    } else {
+      // We cannot use SOA in this case. We might be holding the lock, but may not be in the
+      // runnable state (e.g., during GC).
+      Locks::mutator_lock_->AssertSharedHeld(self);
+      DoGetAccessFlagsHelper<kReadBarrierOption>(this);
+    }
+  }
+}
+template void ArtMethod::GetAccessFlagsDCheck<ReadBarrierOption::kWithReadBarrier>();
+template void ArtMethod::GetAccessFlagsDCheck<ReadBarrierOption::kWithoutReadBarrier>();
+
 }  // namespace art
diff --git a/runtime/art_method.h b/runtime/art_method.h
index 51b6576..856bfd2 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -55,6 +55,10 @@
  public:
   static constexpr bool kCheckDeclaringClassState = kIsDebugBuild;
 
+  // The runtime dex_method_index is kDexNoIndex. To lower dependencies, we use this
+  // constexpr, and ensure that the value is correct in art_method.cc.
+  static constexpr uint32_t kRuntimeMethodDexMethodIndex = 0xFFFFFFFF;
+
   ArtMethod() : access_flags_(0), dex_code_item_offset_(0), dex_method_index_(0),
       method_index_(0), hotness_count_(0) { }
 
@@ -90,7 +94,12 @@
   // Note: GetAccessFlags acquires the mutator lock in debug mode to check that it is not called for
   // a proxy method.
   template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
-  ALWAYS_INLINE uint32_t GetAccessFlags();
+  uint32_t GetAccessFlags() {
+    if (kCheckDeclaringClassState) {
+      GetAccessFlagsDCheck<kReadBarrierOption>();
+    }
+    return access_flags_.load(std::memory_order_relaxed);
+  }
 
   // This version should only be called when it's certain there is no
   // concurrency so there is no need to guarantee atomicity. For example,
@@ -398,8 +407,10 @@
                      pointer_size);
   }
 
-  void RegisterNative(const void* native_method, bool is_fast)
-      REQUIRES_SHARED(Locks::mutator_lock_);
+  // Registers the native method and returns the new entry point. NB The returned entry point might
+  // be different from the native_method argument if some MethodCallback modifies it.
+  const void* RegisterNative(const void* native_method, bool is_fast)
+      REQUIRES_SHARED(Locks::mutator_lock_) WARN_UNUSED;
 
   void UnregisterNative() REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -502,7 +513,9 @@
 
   // Is this a CalleSaveMethod or ResolutionMethod and therefore doesn't adhere to normal
   // conventions for a method of managed code. Returns false for Proxy methods.
-  ALWAYS_INLINE bool IsRuntimeMethod();
+  ALWAYS_INLINE bool IsRuntimeMethod() {
+    return dex_method_index_ == kRuntimeMethodDexMethodIndex;;
+  }
 
   // Is this a hand crafted method used for something like describing callee saves?
   bool IsCalleeSaveMethod() REQUIRES_SHARED(Locks::mutator_lock_);
@@ -530,10 +543,7 @@
 
   const char* GetDeclaringClassDescriptor() REQUIRES_SHARED(Locks::mutator_lock_);
 
-  const char* GetShorty() REQUIRES_SHARED(Locks::mutator_lock_) {
-    uint32_t unused_length;
-    return GetShorty(&unused_length);
-  }
+  ALWAYS_INLINE const char* GetShorty() REQUIRES_SHARED(Locks::mutator_lock_);
 
   const char* GetShorty(uint32_t* out_length) REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -741,9 +751,21 @@
     }
   }
 
+  template <ReadBarrierOption kReadBarrierOption> void GetAccessFlagsDCheck();
+
   DISALLOW_COPY_AND_ASSIGN(ArtMethod);  // Need to use CopyFrom to deal with 32 vs 64 bits.
 };
 
+class MethodCallback {
+ public:
+  virtual ~MethodCallback() {}
+
+  virtual void RegisterNativeMethod(ArtMethod* method,
+                                    const void* original_implementation,
+                                    /*out*/void** new_implementation)
+      REQUIRES_SHARED(Locks::mutator_lock_) = 0;
+};
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_ART_METHOD_H_
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 4a2e34f..6d271ed 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -98,7 +98,7 @@
 ADD_TEST_EQ(THREAD_LOCAL_END_OFFSET,
             art::Thread::ThreadLocalEndOffset<POINTER_SIZE>().Int32Value())
 // Offset of field Thread::tlsPtr_.thread_local_objects.
-#define THREAD_LOCAL_OBJECTS_OFFSET (THREAD_LOCAL_END_OFFSET + __SIZEOF_POINTER__)
+#define THREAD_LOCAL_OBJECTS_OFFSET (THREAD_LOCAL_END_OFFSET + 2 * __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_LOCAL_OBJECTS_OFFSET,
             art::Thread::ThreadLocalObjectsOffset<POINTER_SIZE>().Int32Value())
 
diff --git a/runtime/base/allocator.cc b/runtime/base/allocator.cc
index f1d0a5f..2a2790c 100644
--- a/runtime/base/allocator.cc
+++ b/runtime/base/allocator.cc
@@ -21,7 +21,6 @@
 
 #include "atomic.h"
 #include "base/logging.h"
-#include "thread-inl.h"
 
 namespace art {
 
diff --git a/runtime/base/mutex-inl.h b/runtime/base/mutex-inl.h
index 44a84c8..08b370e 100644
--- a/runtime/base/mutex-inl.h
+++ b/runtime/base/mutex-inl.h
@@ -89,13 +89,14 @@
     // Check if a bad Mutex of this level or lower is held.
     bool bad_mutexes_held = false;
     for (int i = level_; i >= 0; --i) {
-      BaseMutex* held_mutex = self->GetHeldMutex(static_cast<LockLevel>(i));
-      if (UNLIKELY(held_mutex != nullptr)) {
+      LockLevel lock_level_i = static_cast<LockLevel>(i);
+      BaseMutex* held_mutex = self->GetHeldMutex(lock_level_i);
+      if (UNLIKELY(held_mutex != nullptr) && lock_level_i != kAbortLock) {
         LOG(ERROR) << "Lock level violation: holding \"" << held_mutex->name_ << "\" "
-                   << "(level " << LockLevel(i) << " - " << i
+                   << "(level " << lock_level_i << " - " << i
                    << ") while locking \"" << name_ << "\" "
                    << "(level " << level_ << " - " << static_cast<int>(level_) << ")";
-        if (i > kAbortLock) {
+        if (lock_level_i > kAbortLock) {
           // Only abort in the check below if this is more than abort level lock.
           bad_mutexes_held = true;
         }
diff --git a/runtime/base/safe_copy.cc b/runtime/base/safe_copy.cc
new file mode 100644
index 0000000..06249ac
--- /dev/null
+++ b/runtime/base/safe_copy.cc
@@ -0,0 +1,81 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "safe_copy.h"
+
+#include <unistd.h>
+#include <sys/uio.h>
+#include <sys/user.h>
+
+#include <algorithm>
+
+#include <android-base/macros.h>
+
+#include "runtime/base/bit_utils.h"
+
+namespace art {
+
+ssize_t SafeCopy(void *dst, const void *src, size_t len) {
+#if defined(__linux__)
+  struct iovec dst_iov = {
+    .iov_base = dst,
+    .iov_len = len,
+  };
+
+  // Split up the remote read across page boundaries.
+  // From the manpage:
+  //   A partial read/write may result if one of the remote_iov elements points to an invalid
+  //   memory region in the remote process.
+  //
+  //   Partial transfers apply at the granularity of iovec elements.  These system calls won't
+  //   perform a partial transfer that splits a single iovec element.
+  constexpr size_t kMaxIovecs = 64;
+  struct iovec src_iovs[kMaxIovecs];
+  size_t iovecs_used = 0;
+
+  const char* cur = static_cast<const char*>(src);
+  while (len > 0) {
+    if (iovecs_used == kMaxIovecs) {
+      errno = EINVAL;
+      return -1;
+    }
+
+    src_iovs[iovecs_used].iov_base = const_cast<char*>(cur);
+    if (!IsAlignedParam(cur, PAGE_SIZE)) {
+      src_iovs[iovecs_used].iov_len = AlignUp(cur, PAGE_SIZE) - cur;
+    } else {
+      src_iovs[iovecs_used].iov_len = PAGE_SIZE;
+    }
+
+    src_iovs[iovecs_used].iov_len = std::min(src_iovs[iovecs_used].iov_len, len);
+
+    len -= src_iovs[iovecs_used].iov_len;
+    cur += src_iovs[iovecs_used].iov_len;
+    ++iovecs_used;
+  }
+
+  ssize_t rc = process_vm_readv(getpid(), &dst_iov, 1, src_iovs, iovecs_used, 0);
+  if (rc == -1) {
+    return 0;
+  }
+  return rc;
+#else
+  UNUSED(dst, src, len);
+  return -1;
+#endif
+}
+
+}  // namespace art
diff --git a/test/912-classes/src/B.java b/runtime/base/safe_copy.h
similarity index 60%
copy from test/912-classes/src/B.java
copy to runtime/base/safe_copy.h
index 52ce4dd..d0f497c 100644
--- a/test/912-classes/src/B.java
+++ b/runtime/base/safe_copy.h
@@ -14,5 +14,18 @@
  * limitations under the License.
  */
 
-public class B {
-}
+#ifndef ART_RUNTIME_BASE_SAFE_COPY_H_
+#define ART_RUNTIME_BASE_SAFE_COPY_H_
+
+#include <sys/types.h>
+
+namespace art {
+
+// Safely dereference a pointer.
+// Returns -1 if safe copy isn't implemented on the platform, or if the transfer is too large.
+// Returns 0 if src is unreadable.
+ssize_t SafeCopy(void *dst, const void *src, size_t len);
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_BASE_SAFE_COPY_H_
diff --git a/runtime/base/safe_copy_test.cc b/runtime/base/safe_copy_test.cc
new file mode 100644
index 0000000..987895e
--- /dev/null
+++ b/runtime/base/safe_copy_test.cc
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "safe_copy.h"
+
+#include "common_runtime_test.h"
+
+#include <errno.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/user.h>
+
+namespace art {
+
+#if defined(__linux__)
+
+TEST(SafeCopyTest, smoke) {
+  // Map four pages, mark the second one as PROT_NONE, unmap the last one.
+  void* map = mmap(nullptr, PAGE_SIZE * 4, PROT_READ | PROT_WRITE,
+                   MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+  ASSERT_NE(MAP_FAILED, map);
+  char* page1 = static_cast<char*>(map);
+  char* page2 = page1 + PAGE_SIZE;
+  char* page3 = page2 + PAGE_SIZE;
+  char* page4 = page3 + PAGE_SIZE;
+  ASSERT_EQ(0, mprotect(page1 + PAGE_SIZE, PAGE_SIZE, PROT_NONE));
+  ASSERT_EQ(0, munmap(page4, PAGE_SIZE));
+
+  page1[0] = 'a';
+  page1[PAGE_SIZE - 1] = 'z';
+
+  page3[0] = 'b';
+  page3[PAGE_SIZE - 1] = 'y';
+
+  char buf[PAGE_SIZE];
+
+  // Completely valid read.
+  memset(buf, 0xCC, sizeof(buf));
+  EXPECT_EQ(static_cast<ssize_t>(PAGE_SIZE), SafeCopy(buf, page1, PAGE_SIZE)) << strerror(errno);
+  EXPECT_EQ(0, memcmp(buf, page1, PAGE_SIZE));
+
+  // Reading into a guard page.
+  memset(buf, 0xCC, sizeof(buf));
+  EXPECT_EQ(static_cast<ssize_t>(PAGE_SIZE - 1), SafeCopy(buf, page1 + 1, PAGE_SIZE));
+  EXPECT_EQ(0, memcmp(buf, page1 + 1, PAGE_SIZE - 1));
+
+  // Reading from a guard page into a real page.
+  memset(buf, 0xCC, sizeof(buf));
+  EXPECT_EQ(0, SafeCopy(buf, page2 + PAGE_SIZE - 1, PAGE_SIZE));
+
+  // Reading off of the end of a mapping.
+  memset(buf, 0xCC, sizeof(buf));
+  EXPECT_EQ(static_cast<ssize_t>(PAGE_SIZE), SafeCopy(buf, page3, PAGE_SIZE * 2));
+  EXPECT_EQ(0, memcmp(buf, page3, PAGE_SIZE));
+
+  // Completely invalid.
+  EXPECT_EQ(0, SafeCopy(buf, page1 + PAGE_SIZE, PAGE_SIZE));
+
+  // Clean up.
+  ASSERT_EQ(0, munmap(map, PAGE_SIZE * 3));
+}
+
+TEST(SafeCopyTest, alignment) {
+  // Copy the middle of a mapping to the end of another one.
+  void* src_map = mmap(nullptr, PAGE_SIZE * 3, PROT_READ | PROT_WRITE,
+                       MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+  ASSERT_NE(MAP_FAILED, src_map);
+
+  // Add a guard page to make sure we don't write past the end of the mapping.
+  void* dst_map = mmap(nullptr, PAGE_SIZE * 4, PROT_READ | PROT_WRITE,
+                       MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+  ASSERT_NE(MAP_FAILED, dst_map);
+
+  char* src = static_cast<char*>(src_map);
+  char* dst = static_cast<char*>(dst_map);
+  ASSERT_EQ(0, mprotect(dst + 3 * PAGE_SIZE, PAGE_SIZE, PROT_NONE));
+
+  src[512] = 'a';
+  src[PAGE_SIZE * 3 - 512 - 1] = 'z';
+
+  EXPECT_EQ(static_cast<ssize_t>(PAGE_SIZE * 3 - 1024),
+            SafeCopy(dst + 1024, src + 512, PAGE_SIZE * 3 - 1024));
+  EXPECT_EQ(0, memcmp(dst + 1024, src + 512, PAGE_SIZE * 3 - 1024));
+
+  ASSERT_EQ(0, munmap(src_map, PAGE_SIZE * 3));
+  ASSERT_EQ(0, munmap(dst_map, PAGE_SIZE * 4));
+}
+
+#endif  // defined(__linux__)
+
+}  // namespace art
diff --git a/runtime/class_linker-inl.h b/runtime/class_linker-inl.h
index 9ddc6cf..3c51f52 100644
--- a/runtime/class_linker-inl.h
+++ b/runtime/class_linker-inl.h
@@ -25,7 +25,7 @@
 #include "mirror/class_loader.h"
 #include "mirror/dex_cache-inl.h"
 #include "mirror/iftable.h"
-#include "mirror/object_array.h"
+#include "mirror/object_array-inl.h"
 #include "handle_scope-inl.h"
 #include "scoped_thread_state_change-inl.h"
 
@@ -33,10 +33,6 @@
 
 namespace art {
 
-inline mirror::Class* ClassLinker::FindSystemClass(Thread* self, const char* descriptor) {
-  return FindClass(self, descriptor, ScopedNullHandle<mirror::ClassLoader>());
-}
-
 inline mirror::Class* ClassLinker::FindArrayClass(Thread* self,
                                                   ObjPtr<mirror::Class>* element_class) {
   for (size_t i = 0; i < kFindArrayCacheSize; ++i) {
@@ -65,19 +61,6 @@
   return array_class.Ptr();
 }
 
-inline mirror::String* ClassLinker::ResolveString(dex::StringIndex string_idx,
-                                                  ArtMethod* referrer) {
-  Thread::PoisonObjectPointersIfDebug();
-  ObjPtr<mirror::String> string = referrer->GetDexCache()->GetResolvedString(string_idx);
-  if (UNLIKELY(string == nullptr)) {
-    StackHandleScope<1> hs(Thread::Current());
-    Handle<mirror::DexCache> dex_cache(hs.NewHandle(referrer->GetDexCache()));
-    const DexFile& dex_file = *dex_cache->GetDexFile();
-    string = ResolveString(dex_file, string_idx, dex_cache);
-  }
-  return string.Ptr();
-}
-
 inline ObjPtr<mirror::Class> ClassLinker::LookupResolvedType(
     dex::TypeIndex type_idx,
     ObjPtr<mirror::DexCache> dex_cache,
@@ -191,36 +174,6 @@
   return resolved_field;
 }
 
-inline mirror::Object* ClassLinker::AllocObject(Thread* self) {
-  return GetClassRoot(kJavaLangObject)->Alloc<true, false>(
-      self,
-      Runtime::Current()->GetHeap()->GetCurrentAllocator()).Ptr();
-}
-
-template <class T>
-inline mirror::ObjectArray<T>* ClassLinker::AllocObjectArray(Thread* self, size_t length) {
-  return mirror::ObjectArray<T>::Alloc(self, GetClassRoot(kObjectArrayClass), length);
-}
-
-inline mirror::ObjectArray<mirror::Class>* ClassLinker::AllocClassArray(Thread* self,
-                                                                        size_t length) {
-  return mirror::ObjectArray<mirror::Class>::Alloc(self, GetClassRoot(kClassArrayClass), length);
-}
-
-inline mirror::ObjectArray<mirror::String>* ClassLinker::AllocStringArray(Thread* self,
-                                                                          size_t length) {
-  return mirror::ObjectArray<mirror::String>::Alloc(self,
-                                                    GetClassRoot(kJavaLangStringArrayClass),
-                                                    length);
-}
-
-inline mirror::IfTable* ClassLinker::AllocIfTable(Thread* self, size_t ifcount) {
-  return down_cast<mirror::IfTable*>(
-      mirror::IfTable::Alloc(self,
-                             GetClassRoot(kObjectArrayClass),
-                             ifcount * mirror::IfTable::kMax));
-}
-
 inline mirror::Class* ClassLinker::GetClassRoot(ClassRoot class_root) {
   DCHECK(!class_roots_.IsNull());
   mirror::ObjectArray<mirror::Class>* class_roots = class_roots_.Read();
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 4bc8e8e..ee33fc4 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -65,6 +65,7 @@
 #include "imtable-inl.h"
 #include "intern_table.h"
 #include "interpreter/interpreter.h"
+#include "java_vm_ext.h"
 #include "jit/jit.h"
 #include "jit/jit_code_cache.h"
 #include "jit/profile_compilation_info.h"
@@ -2592,9 +2593,25 @@
         return nullptr;
       }
 
+      // Inlined DescriptorToDot(descriptor) with extra validation.
+      //
+      // Throw NoClassDefFoundError early rather than potentially load a class only to fail
+      // the DescriptorEquals() check below and give a confusing error message. For example,
+      // when native code erroneously calls JNI GetFieldId() with signature "java/lang/String"
+      // instead of "Ljava/lang/String;", the message below using the "dot" names would be
+      // "class loader [...] returned class java.lang.String instead of java.lang.String".
+      size_t descriptor_length = strlen(descriptor);
+      if (UNLIKELY(descriptor[0] != 'L') ||
+          UNLIKELY(descriptor[descriptor_length - 1] != ';') ||
+          UNLIKELY(memchr(descriptor + 1, '.', descriptor_length - 2) != nullptr)) {
+        ThrowNoClassDefFoundError("Invalid descriptor: %s.", descriptor);
+        return nullptr;
+      }
+      std::string class_name_string(descriptor + 1, descriptor_length - 2);
+      std::replace(class_name_string.begin(), class_name_string.end(), '/', '.');
+
       ScopedLocalRef<jobject> class_loader_object(
           soa.Env(), soa.AddLocalReference<jobject>(class_loader.Get()));
-      std::string class_name_string(DescriptorToDot(descriptor));
       ScopedLocalRef<jobject> result(soa.Env(), nullptr);
       {
         ScopedThreadStateChange tsc(self, kNative);
@@ -8967,6 +8984,13 @@
   return visitor.holder_.Ptr();
 }
 
+mirror::IfTable* ClassLinker::AllocIfTable(Thread* self, size_t ifcount) {
+  return down_cast<mirror::IfTable*>(
+      mirror::IfTable::Alloc(self,
+                             GetClassRoot(kObjectArrayClass),
+                             ifcount * mirror::IfTable::kMax));
+}
+
 // Instantiate ResolveMethod.
 template ArtMethod* ClassLinker::ResolveMethod<ClassLinker::kForceICCECheck>(
     const DexFile& dex_file,
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index a26e63b..1c280a4 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -185,7 +185,9 @@
   // boot_class_path_.
   mirror::Class* FindSystemClass(Thread* self, const char* descriptor)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!Locks::dex_lock_);
+      REQUIRES(!Locks::dex_lock_) {
+    return FindClass(self, descriptor, ScopedNullHandle<mirror::ClassLoader>());
+  }
 
   // Finds the array class given for the element class.
   mirror::Class* FindArrayClass(Thread* self, ObjPtr<mirror::Class>* element_class)
@@ -231,12 +233,6 @@
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Resolve a String with the given index from the DexFile, storing the
-  // result in the DexCache. The referrer is used to identify the
-  // target DexCache and ClassLoader to use for resolution.
-  mirror::String* ResolveString(dex::StringIndex string_idx, ArtMethod* referrer)
-      REQUIRES_SHARED(Locks::mutator_lock_);
-
-  // Resolve a String with the given index from the DexFile, storing the
   // result in the DexCache.
   mirror::String* ResolveString(const DexFile& dex_file,
                                 dex::StringIndex string_idx,
@@ -436,25 +432,6 @@
       REQUIRES(!Locks::dex_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  // Allocate an instance of a java.lang.Object.
-  mirror::Object* AllocObject(Thread* self)
-      REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!Roles::uninterruptible_);
-
-  // TODO: replace this with multiple methods that allocate the correct managed type.
-  template <class T>
-  mirror::ObjectArray<T>* AllocObjectArray(Thread* self, size_t length)
-      REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!Roles::uninterruptible_);
-
-  mirror::ObjectArray<mirror::Class>* AllocClassArray(Thread* self, size_t length)
-      REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!Roles::uninterruptible_);
-
-  mirror::ObjectArray<mirror::String>* AllocStringArray(Thread* self, size_t length)
-      REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!Roles::uninterruptible_);
-
   LengthPrefixedArray<ArtField>* AllocArtFieldArray(Thread* self,
                                                     LinearAlloc* allocator,
                                                     size_t length);
diff --git a/runtime/class_table.cc b/runtime/class_table.cc
index 374b711..0891d3f 100644
--- a/runtime/class_table.cc
+++ b/runtime/class_table.cc
@@ -17,6 +17,7 @@
 #include "class_table.h"
 
 #include "mirror/class-inl.h"
+#include "oat_file.h"
 
 namespace art {
 
diff --git a/runtime/common_runtime_test.cc b/runtime/common_runtime_test.cc
index 15724a1..01c6641 100644
--- a/runtime/common_runtime_test.cc
+++ b/runtime/common_runtime_test.cc
@@ -39,6 +39,7 @@
 #include "gtest/gtest.h"
 #include "handle_scope-inl.h"
 #include "interpreter/unstarted_runtime.h"
+#include "java_vm_ext.h"
 #include "jni_internal.h"
 #include "mirror/class-inl.h"
 #include "mirror/class_loader.h"
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 3ca526c..d0b50fe 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -2442,7 +2442,9 @@
   // Suspend thread to build stack trace.
   bool timed_out;
   ThreadList* thread_list = Runtime::Current()->GetThreadList();
-  Thread* thread = thread_list->SuspendThreadByPeer(peer.get(), request_suspension, true,
+  Thread* thread = thread_list->SuspendThreadByPeer(peer.get(),
+                                                    request_suspension,
+                                                    /* debug_suspension */ true,
                                                     &timed_out);
   if (thread != nullptr) {
     return JDWP::ERR_NONE;
@@ -2470,7 +2472,7 @@
   bool needs_resume;
   {
     MutexLock mu2(soa.Self(), *Locks::thread_suspend_count_lock_);
-    needs_resume = thread->GetSuspendCount() > 0;
+    needs_resume = thread->GetDebugSuspendCount() > 0;
   }
   if (needs_resume) {
     Runtime::Current()->GetThreadList()->Resume(thread, true);
@@ -3686,7 +3688,10 @@
           jobject thread_peer = Dbg::GetObjectRegistry()->GetJObject(thread_id);
           bool timed_out;
           ThreadList* const thread_list = Runtime::Current()->GetThreadList();
-          suspended_thread = thread_list->SuspendThreadByPeer(thread_peer, true, true, &timed_out);
+          suspended_thread = thread_list->SuspendThreadByPeer(thread_peer,
+                                                              /* request_suspension */ true,
+                                                              /* debug_suspension */ true,
+                                                              &timed_out);
         }
         if (suspended_thread == nullptr) {
           // Thread terminated from under us while suspending.
diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h
index ba8cec3..37734e8 100644
--- a/runtime/entrypoints/entrypoint_utils-inl.h
+++ b/runtime/entrypoints/entrypoint_utils-inl.h
@@ -19,6 +19,7 @@
 
 #include "entrypoint_utils.h"
 
+#include "art_field-inl.h"
 #include "art_method-inl.h"
 #include "base/enums.h"
 #include "class_linker-inl.h"
@@ -39,6 +40,7 @@
 #include "runtime.h"
 #include "stack_map.h"
 #include "thread.h"
+#include "well_known_classes.h"
 
 namespace art {
 
@@ -779,9 +781,32 @@
   return h_class.Get();
 }
 
+static inline mirror::String* ResolveString(ClassLinker* class_linker,
+                                            dex::StringIndex string_idx,
+                                            ArtMethod* referrer)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  Thread::PoisonObjectPointersIfDebug();
+  ObjPtr<mirror::String> string = referrer->GetDexCache()->GetResolvedString(string_idx);
+  if (UNLIKELY(string == nullptr)) {
+    StackHandleScope<1> hs(Thread::Current());
+    Handle<mirror::DexCache> dex_cache(hs.NewHandle(referrer->GetDexCache()));
+    const DexFile& dex_file = *dex_cache->GetDexFile();
+    string = class_linker->ResolveString(dex_file, string_idx, dex_cache);
+  }
+  return string.Ptr();
+}
+
 inline mirror::String* ResolveStringFromCode(ArtMethod* referrer, dex::StringIndex string_idx) {
-  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  return class_linker->ResolveString(string_idx, referrer);
+  Thread::PoisonObjectPointersIfDebug();
+  ObjPtr<mirror::String> string = referrer->GetDexCache()->GetResolvedString(string_idx);
+  if (UNLIKELY(string == nullptr)) {
+    StackHandleScope<1> hs(Thread::Current());
+    Handle<mirror::DexCache> dex_cache(hs.NewHandle(referrer->GetDexCache()));
+    const DexFile& dex_file = *dex_cache->GetDexFile();
+    ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+    string = class_linker->ResolveString(dex_file, string_idx, dex_cache);
+  }
+  return string.Ptr();
 }
 
 inline void UnlockJniSynchronizedMethod(jobject locked, Thread* self) {
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index b5130d7..c340a88 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -26,6 +26,7 @@
 #include "entrypoints/quick/callee_save_frame.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
 #include "gc/accounting/card_table-inl.h"
+#include "java_vm_ext.h"
 #include "mirror/class-inl.h"
 #include "mirror/method.h"
 #include "mirror/object-inl.h"
diff --git a/runtime/entrypoints/jni/jni_entrypoints.cc b/runtime/entrypoints/jni/jni_entrypoints.cc
index fd23ced..eeb138b 100644
--- a/runtime/entrypoints/jni/jni_entrypoints.cc
+++ b/runtime/entrypoints/jni/jni_entrypoints.cc
@@ -17,6 +17,7 @@
 #include "art_method-inl.h"
 #include "base/logging.h"
 #include "entrypoints/entrypoint_utils.h"
+#include "java_vm_ext.h"
 #include "mirror/object-inl.h"
 #include "scoped_thread_state_change-inl.h"
 #include "thread.h"
@@ -25,10 +26,10 @@
 
 // Used by the JNI dlsym stub to find the native method to invoke if none is registered.
 #if defined(__arm__) || defined(__aarch64__)
-extern "C" void* artFindNativeMethod() {
+extern "C" const void* artFindNativeMethod() {
   Thread* self = Thread::Current();
 #else
-extern "C" void* artFindNativeMethod(Thread* self) {
+extern "C" const void* artFindNativeMethod(Thread* self) {
   DCHECK_EQ(self, Thread::Current());
 #endif
   Locks::mutator_lock_->AssertNotHeld(self);  // We come here as Native.
@@ -45,8 +46,7 @@
     return nullptr;
   } else {
     // Register so that future calls don't come here
-    method->RegisterNative(native_code, false);
-    return native_code;
+    return method->RegisterNative(native_code, false);
   }
 }
 
diff --git a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
index 343343f..3820d85 100644
--- a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
@@ -14,22 +14,18 @@
  * limitations under the License.
  */
 
-#include "art_method-inl.h"
 #include "base/logging.h"
+#include "base/mutex.h"
 #include "callee_save_frame.h"
-#include "dex_file-inl.h"
 #include "interpreter/interpreter.h"
-#include "mirror/class-inl.h"
-#include "mirror/object_array-inl.h"
-#include "mirror/object-inl.h"
+#include "obj_ptr-inl.h"  // TODO: Find the other include that isn't complete, and clean this up.
 #include "quick_exception_handler.h"
 #include "thread.h"
-#include "verifier/method_verifier.h"
 
 namespace art {
 
 NO_RETURN static void artDeoptimizeImpl(Thread* self, bool single_frame)
-      REQUIRES_SHARED(Locks::mutator_lock_) {
+    REQUIRES_SHARED(Locks::mutator_lock_) {
   if (VLOG_IS_ON(deopt)) {
     if (single_frame) {
       // Deopt logging will be in DeoptimizeSingleFrame. It is there to take advantage of the
diff --git a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc b/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
index aa547bf..81560cc 100644
--- a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "art_method-inl.h"
+#include "art_method.h"
 #include "base/enums.h"
 #include "callee_save_frame.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 354ae20..2b349e3 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -2025,9 +2025,9 @@
 }
 
 #if defined(__arm__) || defined(__aarch64__)
-extern "C" void* artFindNativeMethod();
+extern "C" const void* artFindNativeMethod();
 #else
-extern "C" void* artFindNativeMethod(Thread* self);
+extern "C" const void* artFindNativeMethod(Thread* self);
 #endif
 
 static uint64_t artQuickGenericJniEndJNIRef(Thread* self,
@@ -2126,7 +2126,7 @@
   }
 
   // Retrieve the stored native code.
-  void* nativeCode = called->GetEntryPointFromJni();
+  void const* nativeCode = called->GetEntryPointFromJni();
 
   // There are two cases for the content of nativeCode:
   // 1) Pointer to the native function.
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index 55a4625..a3c3981 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -121,7 +121,8 @@
                         sizeof(Thread::tls_ptr_sized_values::active_suspend_barriers));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_start, thread_local_pos, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_pos, thread_local_end, sizeof(void*));
-    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_end, thread_local_objects, sizeof(void*));
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_end, thread_local_limit, sizeof(void*));
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_limit, thread_local_objects, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_objects, jni_entrypoints, sizeof(size_t));
 
     // Skip across the entrypoints structures.
diff --git a/runtime/fault_handler.cc b/runtime/fault_handler.cc
index 4220250..7f738bf 100644
--- a/runtime/fault_handler.cc
+++ b/runtime/fault_handler.cc
@@ -21,8 +21,10 @@
 #include <sys/ucontext.h>
 
 #include "art_method-inl.h"
+#include "base/safe_copy.h"
 #include "base/stl_util.h"
 #include "mirror/class.h"
+#include "mirror/object_reference.h"
 #include "oat_quick_method_header.h"
 #include "sigchain.h"
 #include "thread-inl.h"
@@ -42,6 +44,82 @@
   return fault_manager.HandleFault(sig, info, context);
 }
 
+#if defined(__linux__)
+
+// Change to verify the safe implementations against the original ones.
+constexpr bool kVerifySafeImpls = false;
+
+// Provide implementations of ArtMethod::GetDeclaringClass and VerifyClassClass that use SafeCopy
+// to safely dereference pointers which are potentially garbage.
+// Only available on Linux due to availability of SafeCopy.
+
+static mirror::Class* SafeGetDeclaringClass(ArtMethod* method)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  char* method_declaring_class =
+      reinterpret_cast<char*>(method) + ArtMethod::DeclaringClassOffset().SizeValue();
+
+  // ArtMethod::declaring_class_ is a GcRoot<mirror::Class>.
+  // Read it out into as a CompressedReference directly for simplicity's sake.
+  mirror::CompressedReference<mirror::Class> cls;
+  ssize_t rc = SafeCopy(&cls, method_declaring_class, sizeof(cls));
+  CHECK_NE(-1, rc);
+
+  if (kVerifySafeImpls) {
+    mirror::Class* actual_class = method->GetDeclaringClassUnchecked<kWithoutReadBarrier>();
+    CHECK_EQ(actual_class, cls.AsMirrorPtr());
+  }
+
+  if (rc != sizeof(cls)) {
+    return nullptr;
+  }
+
+  return cls.AsMirrorPtr();
+}
+
+static mirror::Class* SafeGetClass(mirror::Object* obj) REQUIRES_SHARED(Locks::mutator_lock_) {
+  char* obj_cls = reinterpret_cast<char*>(obj) + mirror::Object::ClassOffset().SizeValue();
+
+  mirror::HeapReference<mirror::Class> cls =
+      mirror::HeapReference<mirror::Class>::FromMirrorPtr(nullptr);
+  ssize_t rc = SafeCopy(&cls, obj_cls, sizeof(cls));
+  CHECK_NE(-1, rc);
+
+  if (kVerifySafeImpls) {
+    mirror::Class* actual_class = obj->GetClass<kVerifyNone>();
+    CHECK_EQ(actual_class, cls.AsMirrorPtr());
+  }
+
+  if (rc != sizeof(cls)) {
+    return nullptr;
+  }
+
+  return cls.AsMirrorPtr();
+}
+
+static bool SafeVerifyClassClass(mirror::Class* cls) REQUIRES_SHARED(Locks::mutator_lock_) {
+  mirror::Class* c_c = SafeGetClass(cls);
+  bool result = c_c != nullptr && c_c == SafeGetClass(c_c);
+
+  if (kVerifySafeImpls) {
+    CHECK_EQ(VerifyClassClass(cls), result);
+  }
+
+  return result;
+}
+
+#else
+
+static mirror::Class* SafeGetDeclaringClass(ArtMethod* method_obj)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  return method_obj->GetDeclaringClassUnchecked<kWithoutReadBarrier>();
+}
+
+static bool SafeVerifyClassClass(mirror::Class* cls) REQUIRES_SHARED(Locks::mutator_lock_) {
+  return VerifyClassClass(cls);
+}
+#endif
+
+
 FaultManager::FaultManager() : initialized_(false) {
   sigaction(SIGSEGV, nullptr, &oldaction_);
 }
@@ -191,20 +269,19 @@
   // Verify that the potential method is indeed a method.
   // TODO: check the GC maps to make sure it's an object.
   // Check that the class pointer inside the object is not null and is aligned.
-  // TODO: Method might be not a heap address, and GetClass could fault.
   // No read barrier because method_obj may not be a real object.
-  mirror::Class* cls = method_obj->GetDeclaringClassUnchecked<kWithoutReadBarrier>();
+  mirror::Class* cls = SafeGetDeclaringClass(method_obj);
   if (cls == nullptr) {
     VLOG(signals) << "not a class";
     return false;
   }
+
   if (!IsAligned<kObjectAlignment>(cls)) {
     VLOG(signals) << "not aligned";
     return false;
   }
 
-
-  if (!VerifyClassClass(cls)) {
+  if (!SafeVerifyClassClass(cls)) {
     VLOG(signals) << "not a class class";
     return false;
   }
diff --git a/runtime/gc/accounting/mod_union_table.cc b/runtime/gc/accounting/mod_union_table.cc
index a5bb91a..34e30c1 100644
--- a/runtime/gc/accounting/mod_union_table.cc
+++ b/runtime/gc/accounting/mod_union_table.cc
@@ -26,6 +26,7 @@
 #include "gc/space/image_space.h"
 #include "gc/space/space.h"
 #include "mirror/object-inl.h"
+#include "mirror/object-refvisitor-inl.h"
 #include "space_bitmap-inl.h"
 #include "thread-inl.h"
 
diff --git a/runtime/gc/accounting/remembered_set.cc b/runtime/gc/accounting/remembered_set.cc
index 7b1e2b8..f2fe58a 100644
--- a/runtime/gc/accounting/remembered_set.cc
+++ b/runtime/gc/accounting/remembered_set.cc
@@ -26,8 +26,9 @@
 #include "gc/collector/semi_space.h"
 #include "gc/heap.h"
 #include "gc/space/space.h"
-#include "mirror/object-inl.h"
 #include "mirror/class-inl.h"
+#include "mirror/object-inl.h"
+#include "mirror/object-refvisitor-inl.h"
 #include "mirror/object_array-inl.h"
 #include "space_bitmap-inl.h"
 #include "thread.h"
diff --git a/runtime/gc/allocator_type.h b/runtime/gc/allocator_type.h
index 185a9b7..2f1f577 100644
--- a/runtime/gc/allocator_type.h
+++ b/runtime/gc/allocator_type.h
@@ -35,6 +35,10 @@
 };
 std::ostream& operator<<(std::ostream& os, const AllocatorType& rhs);
 
+inline constexpr bool IsTLABAllocator(AllocatorType allocator) {
+  return allocator == kAllocatorTypeTLAB || allocator == kAllocatorTypeRegionTLAB;
+}
+
 }  // namespace gc
 }  // namespace art
 
diff --git a/runtime/gc/collector/concurrent_copying-inl.h b/runtime/gc/collector/concurrent_copying-inl.h
index 854d0a5..dd449f9 100644
--- a/runtime/gc/collector/concurrent_copying-inl.h
+++ b/runtime/gc/collector/concurrent_copying-inl.h
@@ -22,7 +22,7 @@
 #include "gc/accounting/space_bitmap-inl.h"
 #include "gc/heap.h"
 #include "gc/space/region_space.h"
-#include "mirror/object-inl.h"
+#include "mirror/object-readbarrier-inl.h"
 #include "lock_word.h"
 
 namespace art {
@@ -96,7 +96,9 @@
 }
 
 template<bool kGrayImmuneObject, bool kFromGCThread>
-inline mirror::Object* ConcurrentCopying::Mark(mirror::Object* from_ref) {
+inline mirror::Object* ConcurrentCopying::Mark(mirror::Object* from_ref,
+                                               mirror::Object* holder,
+                                               MemberOffset offset) {
   if (from_ref == nullptr) {
     return nullptr;
   }
@@ -141,7 +143,7 @@
       if (immune_spaces_.ContainsObject(from_ref)) {
         return MarkImmuneSpace<kGrayImmuneObject>(from_ref);
       } else {
-        return MarkNonMoving(from_ref);
+        return MarkNonMoving(from_ref, holder, offset);
       }
     default:
       UNREACHABLE();
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index bcf5008..d5af29e 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -29,10 +29,12 @@
 #include "gc/reference_processor.h"
 #include "gc/space/image_space.h"
 #include "gc/space/space-inl.h"
+#include "gc/verification.h"
 #include "image-inl.h"
 #include "intern_table.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
+#include "mirror/object-refvisitor-inl.h"
 #include "scoped_thread_state_change-inl.h"
 #include "thread-inl.h"
 #include "thread_list.h"
@@ -374,6 +376,15 @@
         cc->VerifyGrayImmuneObjects();
       }
     }
+    // May be null during runtime creation, in this case leave java_lang_Object null.
+    // This is safe since single threaded behavior should mean FillDummyObject does not
+    // happen when java_lang_Object_ is null.
+    if (WellKnownClasses::java_lang_Object != nullptr) {
+      cc->java_lang_Object_ = down_cast<mirror::Class*>(cc->Mark(
+          WellKnownClasses::ToClass(WellKnownClasses::java_lang_Object).Ptr()));
+    } else {
+      cc->java_lang_Object_ = nullptr;
+    }
   }
 
  private:
@@ -1031,13 +1042,15 @@
   explicit VerifyNoFromSpaceRefsVisitor(ConcurrentCopying* collector)
       : collector_(collector) {}
 
-  void operator()(mirror::Object* ref) const
+  void operator()(mirror::Object* ref,
+                  MemberOffset offset = MemberOffset(0),
+                  mirror::Object* holder = nullptr) const
       REQUIRES_SHARED(Locks::mutator_lock_) ALWAYS_INLINE {
     if (ref == nullptr) {
       // OK.
       return;
     }
-    collector_->AssertToSpaceInvariant(nullptr, MemberOffset(0), ref);
+    collector_->AssertToSpaceInvariant(holder, offset, ref);
     if (kUseBakerReadBarrier) {
       CHECK_EQ(ref->GetReadBarrierState(), ReadBarrier::WhiteState())
           << "Ref " << ref << " " << ref->PrettyTypeOf()
@@ -1067,7 +1080,7 @@
     mirror::Object* ref =
         obj->GetFieldObject<mirror::Object, kDefaultVerifyFlags, kWithoutReadBarrier>(offset);
     VerifyNoFromSpaceRefsVisitor visitor(collector_);
-    visitor(ref);
+    visitor(ref, offset, obj.Ptr());
   }
   void operator()(ObjPtr<mirror::Class> klass,
                   ObjPtr<mirror::Reference> ref) const
@@ -2065,11 +2078,10 @@
   size_t data_offset = mirror::Array::DataOffset(component_size).SizeValue();
   if (data_offset > byte_size) {
     // An int array is too big. Use java.lang.Object.
-    ObjPtr<mirror::Class> java_lang_Object =
-        WellKnownClasses::ToClass(WellKnownClasses::java_lang_Object);
-    AssertToSpaceInvariant(nullptr, MemberOffset(0), java_lang_Object.Ptr());
-    CHECK_EQ(byte_size, (java_lang_Object->GetObjectSize<kVerifyNone, kWithoutReadBarrier>()));
-    dummy_obj->SetClass(java_lang_Object.Ptr());
+    CHECK(java_lang_Object_ != nullptr);
+    AssertToSpaceInvariant(nullptr, MemberOffset(0), java_lang_Object_);
+    CHECK_EQ(byte_size, (java_lang_Object_->GetObjectSize<kVerifyNone, kWithoutReadBarrier>()));
+    dummy_obj->SetClass(java_lang_Object_);
     CHECK_EQ(byte_size, (dummy_obj->SizeOf<kVerifyNone>()));
   } else {
     // Use an int array.
@@ -2359,7 +2371,9 @@
   return alloc_stack->Contains(ref);
 }
 
-mirror::Object* ConcurrentCopying::MarkNonMoving(mirror::Object* ref) {
+mirror::Object* ConcurrentCopying::MarkNonMoving(mirror::Object* ref,
+                                                 mirror::Object* holder,
+                                                 MemberOffset offset) {
   // ref is in a non-moving space (from_ref == to_ref).
   DCHECK(!region_space_->HasAddress(ref)) << ref;
   DCHECK(!immune_spaces_.ContainsObject(ref));
@@ -2405,6 +2419,11 @@
           return ref;
         }
       }
+      if (is_los && !IsAligned<kPageSize>(ref)) {
+        // Ref is a large object that is not aligned, it must be heap corruption. Dump data before
+        // AtomicSetReadBarrierState since it will fault if the address is not valid.
+        heap_->GetVerification()->LogHeapCorruption(ref, offset, holder, /* fatal */ true);
+      }
       // Not marked or on the allocation stack. Try to mark it.
       // This may or may not succeed, which is ok.
       bool cas_success = false;
diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h
index a0da9fc..c21520d 100644
--- a/runtime/gc/collector/concurrent_copying.h
+++ b/runtime/gc/collector/concurrent_copying.h
@@ -106,7 +106,9 @@
     return IsMarked(ref) == ref;
   }
   template<bool kGrayImmuneObject = true, bool kFromGCThread = false>
-  ALWAYS_INLINE mirror::Object* Mark(mirror::Object* from_ref)
+  ALWAYS_INLINE mirror::Object* Mark(mirror::Object* from_ref,
+                                     mirror::Object* holder = nullptr,
+                                     MemberOffset offset = MemberOffset(0))
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_);
   ALWAYS_INLINE mirror::Object* MarkFromReadBarrier(mirror::Object* from_ref)
@@ -224,7 +226,10 @@
   void DisableMarking() REQUIRES_SHARED(Locks::mutator_lock_);
   void IssueDisableMarkingCheckpoint() REQUIRES_SHARED(Locks::mutator_lock_);
   void ExpandGcMarkStack() REQUIRES_SHARED(Locks::mutator_lock_);
-  mirror::Object* MarkNonMoving(mirror::Object* from_ref) REQUIRES_SHARED(Locks::mutator_lock_)
+  mirror::Object* MarkNonMoving(mirror::Object* from_ref,
+                                mirror::Object* holder = nullptr,
+                                MemberOffset offset = MemberOffset(0))
+      REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_);
   ALWAYS_INLINE mirror::Object* MarkUnevacFromSpaceRegion(mirror::Object* from_ref,
       accounting::SpaceBitmap<kObjectAlignment>* bitmap)
@@ -316,6 +321,11 @@
   Mutex immune_gray_stack_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   std::vector<mirror::Object*> immune_gray_stack_ GUARDED_BY(immune_gray_stack_lock_);
 
+  // Class of java.lang.Object. Filled in from WellKnownClasses in FlipCallback. Must
+  // be filled in before flipping thread roots so that FillDummyObject can run. Not
+  // ObjPtr since the GC may transition to suspended and runnable between phases.
+  mirror::Class* java_lang_Object_;
+
   class AssertToSpaceInvariantFieldVisitor;
   class AssertToSpaceInvariantObjectVisitor;
   class AssertToSpaceInvariantRefsVisitor;
diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc
index c61f69d..cab293f 100644
--- a/runtime/gc/collector/mark_compact.cc
+++ b/runtime/gc/collector/mark_compact.cc
@@ -29,6 +29,7 @@
 #include "gc/space/space-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
+#include "mirror/object-refvisitor-inl.h"
 #include "runtime.h"
 #include "stack.h"
 #include "thread-inl.h"
diff --git a/runtime/gc/collector/mark_sweep-inl.h b/runtime/gc/collector/mark_sweep-inl.h
index e72277f..e4993ce 100644
--- a/runtime/gc/collector/mark_sweep-inl.h
+++ b/runtime/gc/collector/mark_sweep-inl.h
@@ -21,6 +21,7 @@
 
 #include "gc/heap.h"
 #include "mirror/class-inl.h"
+#include "mirror/object-refvisitor-inl.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/reference.h"
 
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index 67e7383..41e6051 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -44,6 +44,7 @@
 #include "monitor.h"
 #include "mirror/reference-inl.h"
 #include "mirror/object-inl.h"
+#include "mirror/object-refvisitor-inl.h"
 #include "runtime.h"
 #include "thread-inl.h"
 #include "thread_list.h"
diff --git a/runtime/gc/gc_cause.cc b/runtime/gc/gc_cause.cc
index c1c1cad..c35ec7c 100644
--- a/runtime/gc/gc_cause.cc
+++ b/runtime/gc/gc_cause.cc
@@ -29,6 +29,7 @@
     case kGcCauseBackground: return "Background";
     case kGcCauseExplicit: return "Explicit";
     case kGcCauseForNativeAlloc: return "NativeAlloc";
+    case kGcCauseForNativeAllocBackground: return "NativeAllocBackground";
     case kGcCauseCollectorTransition: return "CollectorTransition";
     case kGcCauseDisableMovingGc: return "DisableMovingGc";
     case kGcCauseHomogeneousSpaceCompact: return "HomogeneousSpaceCompact";
diff --git a/runtime/gc/gc_cause.h b/runtime/gc/gc_cause.h
index eb27547..41c8943 100644
--- a/runtime/gc/gc_cause.h
+++ b/runtime/gc/gc_cause.h
@@ -33,6 +33,8 @@
   kGcCauseExplicit,
   // GC triggered for a native allocation.
   kGcCauseForNativeAlloc,
+  // Background GC triggered for a native allocation.
+  kGcCauseForNativeAllocBackground,
   // GC triggered for a collector transition.
   kGcCauseCollectorTransition,
   // Not a real GC cause, used when we disable moving GC (currently for GetPrimitiveArrayCritical).
diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h
index 394e541..a50d125 100644
--- a/runtime/gc/heap-inl.h
+++ b/runtime/gc/heap-inl.h
@@ -77,12 +77,11 @@
   size_t bytes_allocated;
   size_t usable_size;
   size_t new_num_bytes_allocated = 0;
-  if (allocator == kAllocatorTypeTLAB || allocator == kAllocatorTypeRegionTLAB) {
+  if (IsTLABAllocator(allocator)) {
     byte_count = RoundUp(byte_count, space::BumpPointerSpace::kAlignment);
   }
   // If we have a thread local allocation we don't need to update bytes allocated.
-  if ((allocator == kAllocatorTypeTLAB || allocator == kAllocatorTypeRegionTLAB) &&
-      byte_count <= self->TlabSize()) {
+  if (IsTLABAllocator(allocator) && byte_count <= self->TlabSize()) {
     obj = self->AllocTlab(byte_count);
     DCHECK(obj != nullptr) << "AllocTlab can't fail";
     obj->SetClass(klass);
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index a853b98..64dce5f 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -60,16 +60,19 @@
 #include "gc/space/space-inl.h"
 #include "gc/space/zygote_space.h"
 #include "gc/task_processor.h"
+#include "gc/verification.h"
 #include "entrypoints/quick/quick_alloc_entrypoints.h"
 #include "gc_pause_listener.h"
 #include "heap-inl.h"
 #include "image.h"
 #include "intern_table.h"
+#include "java_vm_ext.h"
 #include "jit/jit.h"
 #include "jit/jit_code_cache.h"
 #include "obj_ptr-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
+#include "mirror/object-refvisitor-inl.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/reference-inl.h"
 #include "os.h"
@@ -135,6 +138,13 @@
 
 static const char* kRegionSpaceName = "main space (region space)";
 
+// If true, we log all GCs in the both the foreground and background. Used for debugging.
+static constexpr bool kLogAllGCs = false;
+
+// How much we grow the TLAB if we can do it.
+static constexpr size_t kPartialTlabSize = 16 * KB;
+static constexpr bool kUsePartialTlabs = true;
+
 #if defined(__LP64__) || !defined(ADDRESS_SANITIZER)
 // 300 MB (0x12c00000) - (default non-moving space capacity).
 static uint8_t* const kPreferredAllocSpaceBegin =
@@ -278,6 +288,7 @@
     CHECK_EQ(foreground_collector_type_, kCollectorTypeCC);
     CHECK_EQ(background_collector_type_, kCollectorTypeCCBackground);
   }
+  verification_.reset(new Verification(this));
   CHECK_GE(large_object_threshold, kMinLargeObjectThreshold);
   ScopedTrace trace(__FUNCTION__);
   Runtime* const runtime = Runtime::Current();
@@ -2762,7 +2773,7 @@
   const std::vector<uint64_t>& pause_times = GetCurrentGcIteration()->GetPauseTimes();
   // Print the GC if it is an explicit GC (e.g. Runtime.gc()) or a slow GC
   // (mutator time blocked >= long_pause_log_threshold_).
-  bool log_gc = gc_cause == kGcCauseExplicit;
+  bool log_gc = kLogAllGCs || gc_cause == kGcCauseExplicit;
   if (!log_gc && CareAboutPauseTimes()) {
     // GC for alloc pauses the allocating thread, so consider it as a pause.
     log_gc = duration > long_gc_log_threshold_ ||
@@ -3686,20 +3697,21 @@
                                             ObjPtr<mirror::Object>* obj) {
   StackHandleScope<1> hs(self);
   HandleWrapperObjPtr<mirror::Object> wrapper(hs.NewHandleWrapper(obj));
-  RequestConcurrentGC(self, force_full);
+  RequestConcurrentGC(self, kGcCauseBackground, force_full);
 }
 
 class Heap::ConcurrentGCTask : public HeapTask {
  public:
-  ConcurrentGCTask(uint64_t target_time, bool force_full)
-      : HeapTask(target_time), force_full_(force_full) { }
+  ConcurrentGCTask(uint64_t target_time, GcCause cause, bool force_full)
+      : HeapTask(target_time), cause_(cause), force_full_(force_full) {}
   virtual void Run(Thread* self) OVERRIDE {
     gc::Heap* heap = Runtime::Current()->GetHeap();
-    heap->ConcurrentGC(self, force_full_);
+    heap->ConcurrentGC(self, cause_, force_full_);
     heap->ClearConcurrentGCRequest();
   }
 
  private:
+  const GcCause cause_;
   const bool force_full_;  // If true, force full (or partial) collection.
 };
 
@@ -3713,18 +3725,19 @@
   concurrent_gc_pending_.StoreRelaxed(false);
 }
 
-void Heap::RequestConcurrentGC(Thread* self, bool force_full) {
+void Heap::RequestConcurrentGC(Thread* self, GcCause cause, bool force_full) {
   if (CanAddHeapTask(self) &&
       concurrent_gc_pending_.CompareExchangeStrongSequentiallyConsistent(false, true)) {
     task_processor_->AddTask(self, new ConcurrentGCTask(NanoTime(),  // Start straight away.
+                                                        cause,
                                                         force_full));
   }
 }
 
-void Heap::ConcurrentGC(Thread* self, bool force_full) {
+void Heap::ConcurrentGC(Thread* self, GcCause cause, bool force_full) {
   if (!Runtime::Current()->IsShuttingDown(self)) {
     // Wait for any GCs currently running to finish.
-    if (WaitForGcToComplete(kGcCauseBackground, self) == collector::kGcTypeNone) {
+    if (WaitForGcToComplete(cause, self) == collector::kGcTypeNone) {
       // If the we can't run the GC type we wanted to run, find the next appropriate one and try that
       // instead. E.g. can't do partial, so do full instead.
       collector::GcType next_gc_type = next_gc_type_;
@@ -3732,13 +3745,11 @@
       if (force_full && next_gc_type == collector::kGcTypeSticky) {
         next_gc_type = NonStickyGcType();
       }
-      if (CollectGarbageInternal(next_gc_type, kGcCauseBackground, false) ==
-          collector::kGcTypeNone) {
+      if (CollectGarbageInternal(next_gc_type, cause, false) == collector::kGcTypeNone) {
         for (collector::GcType gc_type : gc_plan_) {
           // Attempt to run the collector, if we succeed, we are done.
           if (gc_type > next_gc_type &&
-              CollectGarbageInternal(gc_type, kGcCauseBackground, false) !=
-                  collector::kGcTypeNone) {
+              CollectGarbageInternal(gc_type, cause, false) != collector::kGcTypeNone) {
             break;
           }
         }
@@ -3940,7 +3951,7 @@
     // Trigger another GC because there have been enough native bytes
     // allocated since the last GC.
     if (IsGcConcurrent()) {
-      RequestConcurrentGC(ThreadForEnv(env), /*force_full*/true);
+      RequestConcurrentGC(ThreadForEnv(env), kGcCauseForNativeAllocBackground, /*force_full*/true);
     } else {
       CollectGarbageInternal(NonStickyGcType(), kGcCauseForNativeAlloc, false);
     }
@@ -4182,7 +4193,21 @@
                                        size_t* usable_size,
                                        size_t* bytes_tl_bulk_allocated) {
   const AllocatorType allocator_type = GetCurrentAllocator();
-  if (allocator_type == kAllocatorTypeTLAB) {
+  if (kUsePartialTlabs && alloc_size <= self->TlabRemainingCapacity()) {
+    DCHECK_GT(alloc_size, self->TlabSize());
+    // There is enough space if we grow the TLAB. Lets do that. This increases the
+    // TLAB bytes.
+    const size_t min_expand_size = alloc_size - self->TlabSize();
+    const size_t expand_bytes = std::max(
+        min_expand_size,
+        std::min(self->TlabRemainingCapacity() - self->TlabSize(), kPartialTlabSize));
+    if (UNLIKELY(IsOutOfMemoryOnAllocation(allocator_type, expand_bytes, grow))) {
+      return nullptr;
+    }
+    *bytes_tl_bulk_allocated = expand_bytes;
+    self->ExpandTlab(expand_bytes);
+    DCHECK_LE(alloc_size, self->TlabSize());
+  } else if (allocator_type == kAllocatorTypeTLAB) {
     DCHECK(bump_pointer_space_ != nullptr);
     const size_t new_tlab_size = alloc_size + kDefaultTLABSize;
     if (UNLIKELY(IsOutOfMemoryOnAllocation(allocator_type, new_tlab_size, grow))) {
@@ -4202,15 +4227,18 @@
       if (LIKELY(!IsOutOfMemoryOnAllocation(allocator_type,
                                             space::RegionSpace::kRegionSize,
                                             grow))) {
+        const size_t new_tlab_size = kUsePartialTlabs
+            ? std::max(alloc_size, kPartialTlabSize)
+            : gc::space::RegionSpace::kRegionSize;
         // Try to allocate a tlab.
-        if (!region_space_->AllocNewTlab(self)) {
+        if (!region_space_->AllocNewTlab(self, new_tlab_size)) {
           // Failed to allocate a tlab. Try non-tlab.
           return region_space_->AllocNonvirtual<false>(alloc_size,
                                                        bytes_allocated,
                                                        usable_size,
                                                        bytes_tl_bulk_allocated);
         }
-        *bytes_tl_bulk_allocated = space::RegionSpace::kRegionSize;
+        *bytes_tl_bulk_allocated = new_tlab_size;
         // Fall-through to using the TLAB below.
       } else {
         // Check OOME for a non-tlab allocation.
@@ -4242,5 +4270,9 @@
   return ret;
 }
 
+const Verification* Heap::GetVerification() const {
+  return verification_.get();
+}
+
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 1a782b4..aa123d8 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -64,6 +64,7 @@
 class GcPauseListener;
 class ReferenceProcessor;
 class TaskProcessor;
+class Verification;
 
 namespace accounting {
   class HeapBitmap;
@@ -330,7 +331,7 @@
 
   // Does a concurrent GC, should only be called by the GC daemon thread
   // through runtime.
-  void ConcurrentGC(Thread* self, bool force_full)
+  void ConcurrentGC(Thread* self, GcCause cause, bool force_full)
       REQUIRES(!Locks::runtime_shutdown_lock_, !*gc_complete_lock_, !*pending_task_lock_);
 
   // Implements VMDebug.countInstancesOfClass and JDWP VM_InstanceCount.
@@ -743,7 +744,8 @@
   void RequestTrim(Thread* self) REQUIRES(!*pending_task_lock_);
 
   // Request asynchronous GC.
-  void RequestConcurrentGC(Thread* self, bool force_full) REQUIRES(!*pending_task_lock_);
+  void RequestConcurrentGC(Thread* self, GcCause cause, bool force_full)
+      REQUIRES(!*pending_task_lock_);
 
   // Whether or not we may use a garbage collector, used so that we only create collectors we need.
   bool MayUseCollector(CollectorType type) const;
@@ -820,6 +822,8 @@
   // reasons, we assume it stays valid when we read it (so that we don't require a lock).
   void RemoveGcPauseListener();
 
+  const Verification* GetVerification() const;
+
  private:
   class ConcurrentGCTask;
   class CollectorTransitionTask;
@@ -1432,6 +1436,8 @@
   // An installed GC Pause listener.
   Atomic<GcPauseListener*> gc_pause_listener_;
 
+  std::unique_ptr<Verification> verification_;
+
   friend class CollectorTransitionTask;
   friend class collector::GarbageCollector;
   friend class collector::MarkCompact;
diff --git a/runtime/gc/heap_verification_test.cc b/runtime/gc/heap_verification_test.cc
new file mode 100644
index 0000000..c8233e3
--- /dev/null
+++ b/runtime/gc/heap_verification_test.cc
@@ -0,0 +1,125 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "common_runtime_test.h"
+
+#include "class_linker.h"
+#include "handle_scope-inl.h"
+#include "mirror/object-inl.h"
+#include "mirror/object_array-inl.h"
+#include "mirror/string.h"
+#include "runtime.h"
+#include "scoped_thread_state_change-inl.h"
+#include "verification.h"
+
+namespace art {
+namespace gc {
+
+class VerificationTest : public CommonRuntimeTest {
+ protected:
+  VerificationTest() {}
+
+  template <class T>
+  mirror::ObjectArray<T>* AllocObjectArray(Thread* self, size_t length)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
+    return mirror::ObjectArray<T>::Alloc(
+        self,
+        class_linker->GetClassRoot(ClassLinker::ClassRoot::kObjectArrayClass),
+        length);
+  }
+};
+
+TEST_F(VerificationTest, IsValidHeapObjectAddress) {
+  ScopedObjectAccess soa(Thread::Current());
+  const Verification* const v = Runtime::Current()->GetHeap()->GetVerification();
+  EXPECT_FALSE(v->IsValidHeapObjectAddress(reinterpret_cast<const void*>(1)));
+  EXPECT_FALSE(v->IsValidHeapObjectAddress(reinterpret_cast<const void*>(4)));
+  EXPECT_FALSE(v->IsValidHeapObjectAddress(nullptr));
+  VariableSizedHandleScope hs(soa.Self());
+  Handle<mirror::String> string(
+      hs.NewHandle(mirror::String::AllocFromModifiedUtf8(soa.Self(), "test")));
+  EXPECT_TRUE(v->IsValidHeapObjectAddress(string.Get()));
+  EXPECT_TRUE(v->IsValidHeapObjectAddress(string->GetClass()));
+  const uintptr_t uint_klass = reinterpret_cast<uintptr_t>(string->GetClass());
+  // Not actually a valid object but the verification can't know that. Guaranteed to be inside a
+  // heap space.
+  EXPECT_TRUE(v->IsValidHeapObjectAddress(
+      reinterpret_cast<const void*>(uint_klass + kObjectAlignment)));
+  EXPECT_FALSE(v->IsValidHeapObjectAddress(
+      reinterpret_cast<const void*>(&uint_klass)));
+}
+
+TEST_F(VerificationTest, IsValidClass) {
+  ScopedObjectAccess soa(Thread::Current());
+  VariableSizedHandleScope hs(soa.Self());
+  Handle<mirror::String> string(
+      hs.NewHandle(mirror::String::AllocFromModifiedUtf8(soa.Self(), "test")));
+  const Verification* const v = Runtime::Current()->GetHeap()->GetVerification();
+  EXPECT_FALSE(v->IsValidClass(reinterpret_cast<const void*>(1)));
+  EXPECT_FALSE(v->IsValidClass(reinterpret_cast<const void*>(4)));
+  EXPECT_FALSE(v->IsValidClass(nullptr));
+  EXPECT_FALSE(v->IsValidClass(string.Get()));
+  EXPECT_TRUE(v->IsValidClass(string->GetClass()));
+  const uintptr_t uint_klass = reinterpret_cast<uintptr_t>(string->GetClass());
+  EXPECT_FALSE(v->IsValidClass(reinterpret_cast<const void*>(uint_klass - kObjectAlignment)));
+  EXPECT_FALSE(v->IsValidClass(reinterpret_cast<const void*>(&uint_klass)));
+}
+
+TEST_F(VerificationTest, DumpObjectInfo) {
+  ScopedLogSeverity sls(LogSeverity::INFO);
+  ScopedObjectAccess soa(Thread::Current());
+  Runtime* const runtime = Runtime::Current();
+  VariableSizedHandleScope hs(soa.Self());
+  Handle<mirror::String> string(
+      hs.NewHandle(mirror::String::AllocFromModifiedUtf8(soa.Self(), "obj")));
+  Handle<mirror::ObjectArray<mirror::Object>> arr(
+      hs.NewHandle(AllocObjectArray<mirror::Object>(soa.Self(), 256)));
+  const Verification* const v = runtime->GetHeap()->GetVerification();
+  LOG(INFO) << v->DumpObjectInfo(reinterpret_cast<const void*>(1), "obj");
+  LOG(INFO) << v->DumpObjectInfo(reinterpret_cast<const void*>(4), "obj");
+  LOG(INFO) << v->DumpObjectInfo(nullptr, "obj");
+  LOG(INFO) << v->DumpObjectInfo(string.Get(), "test");
+  LOG(INFO) << v->DumpObjectInfo(string->GetClass(), "obj");
+  const uintptr_t uint_klass = reinterpret_cast<uintptr_t>(string->GetClass());
+  LOG(INFO) << v->DumpObjectInfo(reinterpret_cast<const void*>(uint_klass - kObjectAlignment),
+                                 "obj");
+  LOG(INFO) << v->DumpObjectInfo(reinterpret_cast<const void*>(&uint_klass), "obj");
+  LOG(INFO) << v->DumpObjectInfo(arr.Get(), "arr");
+}
+
+TEST_F(VerificationTest, LogHeapCorruption) {
+  ScopedLogSeverity sls(LogSeverity::INFO);
+  ScopedObjectAccess soa(Thread::Current());
+  Runtime* const runtime = Runtime::Current();
+  VariableSizedHandleScope hs(soa.Self());
+  Handle<mirror::String> string(
+      hs.NewHandle(mirror::String::AllocFromModifiedUtf8(soa.Self(), "obj")));
+  using ObjArray = mirror::ObjectArray<mirror::Object>;
+  Handle<ObjArray> arr(
+      hs.NewHandle(AllocObjectArray<mirror::Object>(soa.Self(), 256)));
+  const Verification* const v = runtime->GetHeap()->GetVerification();
+  arr->Set(0, string.Get());
+  // Test normal cases.
+  v->LogHeapCorruption(arr.Get(), ObjArray::DataOffset(kHeapReferenceSize), string.Get(), false);
+  v->LogHeapCorruption(string.Get(), mirror::Object::ClassOffset(), string->GetClass(), false);
+  // Test null holder cases.
+  v->LogHeapCorruption(nullptr, MemberOffset(0), string.Get(), false);
+  v->LogHeapCorruption(nullptr, MemberOffset(0), arr.Get(), false);
+}
+
+}  // namespace gc
+}  // namespace art
diff --git a/runtime/gc/reference_processor.cc b/runtime/gc/reference_processor.cc
index 65a550e..886c950 100644
--- a/runtime/gc/reference_processor.cc
+++ b/runtime/gc/reference_processor.cc
@@ -18,6 +18,7 @@
 
 #include "base/time_utils.h"
 #include "collector/garbage_collector.h"
+#include "java_vm_ext.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/reference-inl.h"
diff --git a/runtime/gc/space/bump_pointer_space.cc b/runtime/gc/space/bump_pointer_space.cc
index 1303d77..426b332 100644
--- a/runtime/gc/space/bump_pointer_space.cc
+++ b/runtime/gc/space/bump_pointer_space.cc
@@ -249,7 +249,7 @@
 void BumpPointerSpace::RevokeThreadLocalBuffersLocked(Thread* thread) {
   objects_allocated_.FetchAndAddSequentiallyConsistent(thread->GetThreadLocalObjectsAllocated());
   bytes_allocated_.FetchAndAddSequentiallyConsistent(thread->GetThreadLocalBytesAllocated());
-  thread->SetTlab(nullptr, nullptr);
+  thread->SetTlab(nullptr, nullptr, nullptr);
 }
 
 bool BumpPointerSpace::AllocNewTlab(Thread* self, size_t bytes) {
@@ -259,7 +259,7 @@
   if (start == nullptr) {
     return false;
   }
-  self->SetTlab(start, start + bytes);
+  self->SetTlab(start, start + bytes, start + bytes);
   return true;
 }
 
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index 662efe2..e9f0758 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -25,7 +25,8 @@
 #include "android-base/stringprintf.h"
 #include "android-base/strings.h"
 
-#include "art_method.h"
+#include "art_field-inl.h"
+#include "art_method-inl.h"
 #include "base/enums.h"
 #include "base/macros.h"
 #include "base/stl_util.h"
@@ -38,6 +39,7 @@
 #include "image_space_fs.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
+#include "mirror/object-refvisitor-inl.h"
 #include "oat_file.h"
 #include "os.h"
 #include "space-inl.h"
diff --git a/runtime/gc/space/region_space-inl.h b/runtime/gc/space/region_space-inl.h
index 5809027..3910a03 100644
--- a/runtime/gc/space/region_space-inl.h
+++ b/runtime/gc/space/region_space-inl.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_GC_SPACE_REGION_SPACE_INL_H_
 
 #include "region_space.h"
+#include "thread-inl.h"
 
 namespace art {
 namespace gc {
@@ -335,6 +336,28 @@
   return nullptr;
 }
 
+inline size_t RegionSpace::Region::BytesAllocated() const {
+  if (IsLarge()) {
+    DCHECK_LT(begin_ + kRegionSize, Top());
+    return static_cast<size_t>(Top() - begin_);
+  } else if (IsLargeTail()) {
+    DCHECK_EQ(begin_, Top());
+    return 0;
+  } else {
+    DCHECK(IsAllocated()) << static_cast<uint>(state_);
+    DCHECK_LE(begin_, Top());
+    size_t bytes;
+    if (is_a_tlab_) {
+      bytes = thread_->GetThreadLocalBytesAllocated();
+    } else {
+      bytes = static_cast<size_t>(Top() - begin_);
+    }
+    DCHECK_LE(bytes, kRegionSize);
+    return bytes;
+  }
+}
+
+
 }  // namespace space
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/space/region_space.cc b/runtime/gc/space/region_space.cc
index 1ad4843..09b4a3a 100644
--- a/runtime/gc/space/region_space.cc
+++ b/runtime/gc/space/region_space.cc
@@ -427,7 +427,7 @@
   r->objects_allocated_.FetchAndAddSequentiallyConsistent(1);
 }
 
-bool RegionSpace::AllocNewTlab(Thread* self) {
+bool RegionSpace::AllocNewTlab(Thread* self, size_t min_bytes) {
   MutexLock mu(self, region_lock_);
   RevokeThreadLocalBuffersLocked(self);
   // Retain sufficient free regions for full evacuation.
@@ -443,7 +443,7 @@
       r->SetTop(r->End());
       r->is_a_tlab_ = true;
       r->thread_ = self;
-      self->SetTlab(r->Begin(), r->End());
+      self->SetTlab(r->Begin(), r->Begin() + min_bytes, r->End());
       return true;
     }
   }
@@ -463,13 +463,13 @@
     DCHECK_ALIGNED(tlab_start, kRegionSize);
     Region* r = RefToRegionLocked(reinterpret_cast<mirror::Object*>(tlab_start));
     DCHECK(r->IsAllocated());
-    DCHECK_EQ(thread->GetThreadLocalBytesAllocated(), kRegionSize);
+    DCHECK_LE(thread->GetThreadLocalBytesAllocated(), kRegionSize);
     r->RecordThreadLocalAllocations(thread->GetThreadLocalObjectsAllocated(),
                                     thread->GetThreadLocalBytesAllocated());
     r->is_a_tlab_ = false;
     r->thread_ = nullptr;
   }
-  thread->SetTlab(nullptr, nullptr);
+  thread->SetTlab(nullptr, nullptr, nullptr);
 }
 
 size_t RegionSpace::RevokeAllThreadLocalBuffers() {
diff --git a/runtime/gc/space/region_space.h b/runtime/gc/space/region_space.h
index 2537929..80eecca 100644
--- a/runtime/gc/space/region_space.h
+++ b/runtime/gc/space/region_space.h
@@ -234,7 +234,7 @@
   }
 
   void RecordAlloc(mirror::Object* ref) REQUIRES(!region_lock_);
-  bool AllocNewTlab(Thread* self) REQUIRES(!region_lock_);
+  bool AllocNewTlab(Thread* self, size_t min_bytes) REQUIRES(!region_lock_);
 
   uint32_t Time() {
     return time_;
@@ -417,21 +417,7 @@
       return live_bytes_;
     }
 
-    size_t BytesAllocated() const {
-      if (IsLarge()) {
-        DCHECK_LT(begin_ + kRegionSize, Top());
-        return static_cast<size_t>(Top() - begin_);
-      } else if (IsLargeTail()) {
-        DCHECK_EQ(begin_, Top());
-        return 0;
-      } else {
-        DCHECK(IsAllocated()) << static_cast<uint>(state_);
-        DCHECK_LE(begin_, Top());
-        size_t bytes = static_cast<size_t>(Top() - begin_);
-        DCHECK_LE(bytes, kRegionSize);
-        return bytes;
-      }
-    }
+    size_t BytesAllocated() const;
 
     size_t ObjectsAllocated() const {
       if (IsLarge()) {
@@ -476,7 +462,7 @@
       DCHECK_EQ(Top(), end_);
       objects_allocated_.StoreRelaxed(num_objects);
       top_.StoreRelaxed(begin_ + num_bytes);
-      DCHECK_EQ(Top(), end_);
+      DCHECK_LE(Top(), end_);
     }
 
    private:
diff --git a/runtime/gc/system_weak_test.cc b/runtime/gc/system_weak_test.cc
index 9b601c0..dfbbd2a 100644
--- a/runtime/gc/system_weak_test.cc
+++ b/runtime/gc/system_weak_test.cc
@@ -23,6 +23,7 @@
 #include "base/mutex.h"
 #include "collector_type.h"
 #include "common_runtime_test.h"
+#include "gc_root-inl.h"
 #include "handle_scope-inl.h"
 #include "heap.h"
 #include "mirror/string.h"
diff --git a/runtime/gc/verification.cc b/runtime/gc/verification.cc
new file mode 100644
index 0000000..7b31c8a
--- /dev/null
+++ b/runtime/gc/verification.cc
@@ -0,0 +1,142 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "verification.h"
+
+#include <iomanip>
+#include <sstream>
+
+#include "art_field-inl.h"
+#include "mirror/class-inl.h"
+
+namespace art {
+namespace gc {
+
+std::string Verification::DumpObjectInfo(const void* addr, const char* tag) const {
+  std::ostringstream oss;
+  oss << tag << "=" << addr;
+  if (IsValidHeapObjectAddress(addr)) {
+    mirror::Object* obj = reinterpret_cast<mirror::Object*>(const_cast<void*>(addr));
+    mirror::Class* klass = obj->GetClass<kVerifyNone, kWithoutReadBarrier>();
+    oss << " klass=" << klass;
+    if (IsValidClass(klass)) {
+      oss << "(" << klass->PrettyClass() << ")";
+      if (klass->IsArrayClass<kVerifyNone, kWithoutReadBarrier>()) {
+        oss << " length=" << obj->AsArray<kVerifyNone, kWithoutReadBarrier>()->GetLength();
+      }
+    } else {
+      oss << " <invalid address>";
+    }
+    space::Space* const space = heap_->FindSpaceFromAddress(addr);
+    if (space != nullptr) {
+      oss << " space=" << *space;
+    }
+    accounting::CardTable* card_table = heap_->GetCardTable();
+    if (card_table->AddrIsInCardTable(addr)) {
+      oss << " card=" << static_cast<size_t>(
+          card_table->GetCard(reinterpret_cast<const mirror::Object*>(addr)));
+    }
+    // Dump adjacent RAM.
+    const uintptr_t uint_addr = reinterpret_cast<uintptr_t>(addr);
+    static constexpr size_t kBytesBeforeAfter = 2 * kObjectAlignment;
+    const uintptr_t dump_start = uint_addr - kBytesBeforeAfter;
+    const uintptr_t dump_end = uint_addr + kBytesBeforeAfter;
+    if (dump_start < dump_end &&
+        IsValidHeapObjectAddress(reinterpret_cast<const void*>(dump_start)) &&
+        IsValidHeapObjectAddress(reinterpret_cast<const void*>(dump_end - kObjectAlignment))) {
+      oss << " adjacent_ram=";
+      for (uintptr_t p = dump_start; p < dump_end; ++p) {
+        if (p == uint_addr) {
+          // Marker of where the object is.
+          oss << "|";
+        }
+        uint8_t* ptr = reinterpret_cast<uint8_t*>(p);
+        oss << std::hex << std::setfill('0') << std::setw(2) << static_cast<uintptr_t>(*ptr);
+      }
+    }
+  } else {
+    oss << " <invalid address>";
+  }
+  return oss.str();
+}
+
+void Verification::LogHeapCorruption(ObjPtr<mirror::Object> holder,
+                                     MemberOffset offset,
+                                     mirror::Object* ref,
+                                     bool fatal) const {
+  // Lowest priority logging first:
+  PrintFileToLog("/proc/self/maps", LogSeverity::FATAL_WITHOUT_ABORT);
+  MemMap::DumpMaps(LOG_STREAM(FATAL_WITHOUT_ABORT), true);
+  // Buffer the output in the string stream since it is more important than the stack traces
+  // and we want it to have log priority. The stack traces are printed from Runtime::Abort
+  // which is called from LOG(FATAL) but before the abort message.
+  std::ostringstream oss;
+  oss << "GC tried to mark invalid reference " << ref << std::endl;
+  oss << DumpObjectInfo(ref, "ref") << "\n";
+  if (holder != nullptr) {
+    oss << DumpObjectInfo(holder.Ptr(), "holder");
+    mirror::Class* holder_klass = holder->GetClass<kVerifyNone, kWithoutReadBarrier>();
+    if (IsValidClass(holder_klass)) {
+      oss << "field_offset=" << offset.Uint32Value();
+      ArtField* field = holder->FindFieldByOffset(offset);
+      if (field != nullptr) {
+        oss << " name=" << field->GetName();
+      }
+    }
+  }
+
+  if (fatal) {
+    LOG(FATAL) << oss.str();
+  } else {
+    LOG(FATAL_WITHOUT_ABORT) << oss.str();
+  }
+}
+
+bool Verification::IsValidHeapObjectAddress(const void* addr, space::Space** out_space) const {
+  if (!IsAligned<kObjectAlignment>(addr)) {
+    return false;
+  }
+  space::Space* const space = heap_->FindSpaceFromAddress(addr);
+  if (space != nullptr) {
+    if (out_space != nullptr) {
+      *out_space = space;
+    }
+    return true;
+  }
+  return false;
+}
+
+bool Verification::IsValidClass(const void* addr) const {
+  if (!IsValidHeapObjectAddress(addr)) {
+    return false;
+  }
+  mirror::Class* klass = reinterpret_cast<mirror::Class*>(const_cast<void*>(addr));
+  mirror::Class* k1 = klass->GetClass<kVerifyNone, kWithoutReadBarrier>();
+  if (!IsValidHeapObjectAddress(k1)) {
+    return false;
+  }
+  // k should be class class, take the class again to verify.
+  // Note that this check may not be valid for the no image space since the class class might move
+  // around from moving GC.
+  mirror::Class* k2 = k1->GetClass<kVerifyNone, kWithoutReadBarrier>();
+  if (!IsValidHeapObjectAddress(k2)) {
+    return false;
+  }
+  return k1 == k2;
+}
+
+}  // namespace gc
+}  // namespace art
diff --git a/runtime/gc/verification.h b/runtime/gc/verification.h
new file mode 100644
index 0000000..3d95d93
--- /dev/null
+++ b/runtime/gc/verification.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_VERIFICATION_H_
+#define ART_RUNTIME_GC_VERIFICATION_H_
+
+#include "obj_ptr.h"
+#include "offsets.h"
+
+namespace art {
+
+namespace mirror {
+class Class;
+class Object;
+}  // namespace mirror
+
+namespace gc {
+
+namespace space {
+class Space;
+}  // namespace space
+
+class Heap;
+
+class Verification {
+ public:
+  explicit Verification(gc::Heap* heap) : heap_(heap) {}
+
+  // Dump some reveant to debugging info about an object.
+  std::string DumpObjectInfo(const void* obj, const char* tag) const
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Don't use ObjPtr for things that might not be aligned like the invalid reference.
+  void LogHeapCorruption(ObjPtr<mirror::Object> holder,
+                         MemberOffset offset,
+                         mirror::Object* ref,
+                         bool fatal) const REQUIRES_SHARED(Locks::mutator_lock_);
+
+
+  // Return true if the klass is likely to be a valid mirror::Class.
+  bool IsValidClass(const void* klass) const REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Does not allow null.
+  bool IsValidHeapObjectAddress(const void* addr, space::Space** out_space = nullptr) const
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+ private:
+  gc::Heap* const heap_;
+};
+
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_VERIFICATION_H_
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index 495fec7..4f390fd 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -40,6 +40,7 @@
 #include "android-base/stringprintf.h"
 
 #include "art_field-inl.h"
+#include "art_method-inl.h"
 #include "base/logging.h"
 #include "base/time_utils.h"
 #include "base/unix_file/fd_file.h"
@@ -58,7 +59,7 @@
 #include "jdwp/jdwp_priv.h"
 #include "mirror/class.h"
 #include "mirror/class-inl.h"
-#include "mirror/object-inl.h"
+#include "mirror/object-refvisitor-inl.h"
 #include "os.h"
 #include "safe_map.h"
 #include "scoped_thread_state_change-inl.h"
diff --git a/runtime/image.cc b/runtime/image.cc
index b153ea0..b2486a1 100644
--- a/runtime/image.cc
+++ b/runtime/image.cc
@@ -17,6 +17,7 @@
 #include "image.h"
 
 #include "base/bit_utils.h"
+#include "base/length_prefixed_array.h"
 #include "mirror/object_array.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/object-inl.h"
diff --git a/runtime/indirect_reference_table-inl.h b/runtime/indirect_reference_table-inl.h
index 24ee227..2128f8c 100644
--- a/runtime/indirect_reference_table-inl.h
+++ b/runtime/indirect_reference_table-inl.h
@@ -24,7 +24,6 @@
 #include "base/dumpable.h"
 #include "gc_root-inl.h"
 #include "obj_ptr-inl.h"
-#include "runtime-inl.h"
 #include "verify_object.h"
 
 namespace art {
diff --git a/runtime/indirect_reference_table.cc b/runtime/indirect_reference_table.cc
index 9fbb2e9..c852d5a 100644
--- a/runtime/indirect_reference_table.cc
+++ b/runtime/indirect_reference_table.cc
@@ -18,6 +18,7 @@
 
 #include "base/dumpable-inl.h"
 #include "base/systrace.h"
+#include "java_vm_ext.h"
 #include "jni_internal.h"
 #include "nth_caller_visitor.h"
 #include "reference_table.h"
diff --git a/runtime/intern_table_test.cc b/runtime/intern_table_test.cc
index f0d0260..311515c 100644
--- a/runtime/intern_table_test.cc
+++ b/runtime/intern_table_test.cc
@@ -18,6 +18,7 @@
 
 #include "base/hash_set.h"
 #include "common_runtime_test.h"
+#include "gc_root-inl.h"
 #include "mirror/object.h"
 #include "handle_scope-inl.h"
 #include "mirror/string.h"
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index 8978bfd..326f5c9 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -28,7 +28,7 @@
 #include "mirror/array-inl.h"
 #include "mirror/class.h"
 #include "mirror/emulated_stack_frame.h"
-#include "mirror/method_handle_impl.h"
+#include "mirror/method_handle_impl-inl.h"
 #include "reflection.h"
 #include "reflection-inl.h"
 #include "stack.h"
diff --git a/runtime/interpreter/unstarted_runtime_test.cc b/runtime/interpreter/unstarted_runtime_test.cc
index 56e261c..c314f3c 100644
--- a/runtime/interpreter/unstarted_runtime_test.cc
+++ b/runtime/interpreter/unstarted_runtime_test.cc
@@ -1341,7 +1341,8 @@
   ASSERT_TRUE(cons != nullptr);
 
   Handle<mirror::ObjectArray<mirror::Object>> args = hs.NewHandle(
-      class_linker->AllocObjectArray<mirror::Object>(self, 1));
+      mirror::ObjectArray<mirror::Object>::Alloc(
+          self, class_linker_->GetClassRoot(ClassLinker::ClassRoot::kObjectArrayClass), 1));
   ASSERT_TRUE(args != nullptr);
   args->Set(0, input.Get());
 
diff --git a/runtime/java_vm_ext.cc b/runtime/java_vm_ext.cc
index b93b8f2..6d3118e 100644
--- a/runtime/java_vm_ext.cc
+++ b/runtime/java_vm_ext.cc
@@ -20,7 +20,7 @@
 
 #include "android-base/stringprintf.h"
 
-#include "art_method.h"
+#include "art_method-inl.h"
 #include "base/dumpable.h"
 #include "base/mutex.h"
 #include "base/stl_util.h"
@@ -40,6 +40,7 @@
 #include "ScopedLocalRef.h"
 #include "scoped_thread_state_change-inl.h"
 #include "sigchain.h"
+#include "ti/agent.h"
 #include "thread-inl.h"
 #include "thread_list.h"
 
@@ -268,7 +269,6 @@
     detail += "No implementation found for ";
     detail += m->PrettyMethod();
     detail += " (tried " + jni_short_name + " and " + jni_long_name + ")";
-    LOG(ERROR) << detail;
     return nullptr;
   }
 
@@ -929,6 +929,26 @@
   return was_successful;
 }
 
+static void* FindCodeForNativeMethodInAgents(ArtMethod* m) REQUIRES_SHARED(Locks::mutator_lock_) {
+  std::string jni_short_name(m->JniShortName());
+  std::string jni_long_name(m->JniLongName());
+  for (const ti::Agent& agent : Runtime::Current()->GetAgents()) {
+    void* fn = agent.FindSymbol(jni_short_name);
+    if (fn != nullptr) {
+      VLOG(jni) << "Found implementation for " << m->PrettyMethod()
+                << " (symbol: " << jni_short_name << ") in " << agent;
+      return fn;
+    }
+    fn = agent.FindSymbol(jni_long_name);
+    if (fn != nullptr) {
+      VLOG(jni) << "Found implementation for " << m->PrettyMethod()
+                << " (symbol: " << jni_long_name << ") in " << agent;
+      return fn;
+    }
+  }
+  return nullptr;
+}
+
 void* JavaVMExt::FindCodeForNativeMethod(ArtMethod* m) {
   CHECK(m->IsNative());
   mirror::Class* c = m->GetDeclaringClass();
@@ -941,8 +961,14 @@
     MutexLock mu(self, *Locks::jni_libraries_lock_);
     native_method = libraries_->FindNativeMethod(m, detail);
   }
+  if (native_method == nullptr) {
+    // Lookup JNI native methods from native TI Agent libraries. See runtime/ti/agent.h for more
+    // information. Agent libraries are searched for native methods after all jni libraries.
+    native_method = FindCodeForNativeMethodInAgents(m);
+  }
   // Throwing can cause libraries_lock to be reacquired.
   if (native_method == nullptr) {
+    LOG(ERROR) << detail;
     self->ThrowNewException("Ljava/lang/UnsatisfiedLinkError;", detail.c_str());
   }
   return native_method;
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index 3631a9d..b32b272 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -23,6 +23,7 @@
 #include "debugger.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
 #include "interpreter/interpreter.h"
+#include "java_vm_ext.h"
 #include "jit_code_cache.h"
 #include "oat_file_manager.h"
 #include "oat_quick_method_header.h"
diff --git a/runtime/jit/profile_compilation_info.cc b/runtime/jit/profile_compilation_info.cc
index 2d1601e..52649c7 100644
--- a/runtime/jit/profile_compilation_info.cc
+++ b/runtime/jit/profile_compilation_info.cc
@@ -24,7 +24,6 @@
 #include <sys/stat.h>
 #include <sys/uio.h>
 
-#include "art_method-inl.h"
 #include "base/mutex.h"
 #include "base/scoped_flock.h"
 #include "base/stl_util.h"
@@ -33,6 +32,7 @@
 #include "jit/profiling_info.h"
 #include "os.h"
 #include "safe_map.h"
+#include "utils.h"
 
 namespace art {
 
diff --git a/runtime/jni_env_ext-inl.h b/runtime/jni_env_ext-inl.h
index 004f824..25893b7 100644
--- a/runtime/jni_env_ext-inl.h
+++ b/runtime/jni_env_ext-inl.h
@@ -19,9 +19,7 @@
 
 #include "jni_env_ext.h"
 
-#include "indirect_reference_table-inl.h"
-#include "obj_ptr-inl.h"
-#include "utils.h"
+#include "mirror/object.h"
 
 namespace art {
 
diff --git a/runtime/jni_env_ext.h b/runtime/jni_env_ext.h
index 4004c45..60e4295 100644
--- a/runtime/jni_env_ext.h
+++ b/runtime/jni_env_ext.h
@@ -23,12 +23,17 @@
 #include "base/mutex.h"
 #include "indirect_reference_table.h"
 #include "object_callbacks.h"
+#include "obj_ptr.h"
 #include "reference_table.h"
 
 namespace art {
 
 class JavaVMExt;
 
+namespace mirror {
+class Object;
+}  // namespace mirror
+
 // Number of local references in the indirect reference table. The value is arbitrary but
 // low enough that it forces sanity checks.
 static constexpr size_t kLocalsInitial = 512;
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index 5418d35..2626eef 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -2159,10 +2159,11 @@
     }
     CHECK_NON_NULL_ARGUMENT_FN_NAME("RegisterNatives", java_class, JNI_ERR);
     ScopedObjectAccess soa(env);
-    ObjPtr<mirror::Class> c = soa.Decode<mirror::Class>(java_class);
+    StackHandleScope<1> hs(soa.Self());
+    Handle<mirror::Class> c = hs.NewHandle(soa.Decode<mirror::Class>(java_class));
     if (UNLIKELY(method_count == 0)) {
       LOG(WARNING) << "JNI RegisterNativeMethods: attempt to register 0 native methods for "
-          << mirror::Class::PrettyDescriptor(c);
+          << c->PrettyDescriptor();
       return JNI_OK;
     }
     CHECK_NON_NULL_ARGUMENT_FN_NAME("RegisterNatives", methods, JNI_ERR);
@@ -2171,13 +2172,13 @@
       const char* sig = methods[i].signature;
       const void* fnPtr = methods[i].fnPtr;
       if (UNLIKELY(name == nullptr)) {
-        ReportInvalidJNINativeMethod(soa, c, "method name", i, return_errors);
+        ReportInvalidJNINativeMethod(soa, c.Get(), "method name", i, return_errors);
         return JNI_ERR;
       } else if (UNLIKELY(sig == nullptr)) {
-        ReportInvalidJNINativeMethod(soa, c, "method signature", i, return_errors);
+        ReportInvalidJNINativeMethod(soa, c.Get(), "method signature", i, return_errors);
         return JNI_ERR;
       } else if (UNLIKELY(fnPtr == nullptr)) {
-        ReportInvalidJNINativeMethod(soa, c, "native function", i, return_errors);
+        ReportInvalidJNINativeMethod(soa, c.Get(), "native function", i, return_errors);
         return JNI_ERR;
       }
       bool is_fast = false;
@@ -2220,7 +2221,7 @@
       // the parent.
       ArtMethod* m = nullptr;
       bool warn_on_going_to_parent = down_cast<JNIEnvExt*>(env)->vm->IsCheckJniEnabled();
-      for (ObjPtr<mirror::Class> current_class = c;
+      for (ObjPtr<mirror::Class> current_class = c.Get();
            current_class != nullptr;
            current_class = current_class->GetSuperClass()) {
         // Search first only comparing methods which are native.
@@ -2252,14 +2253,14 @@
             << "Failed to register native method "
             << c->PrettyDescriptor() << "." << name << sig << " in "
             << c->GetDexCache()->GetLocation()->ToModifiedUtf8();
-        ThrowNoSuchMethodError(soa, c, name, sig, "static or non-static");
+        ThrowNoSuchMethodError(soa, c.Get(), name, sig, "static or non-static");
         return JNI_ERR;
       } else if (!m->IsNative()) {
         LOG(return_errors ? ::android::base::ERROR : ::android::base::FATAL)
             << "Failed to register non-native method "
             << c->PrettyDescriptor() << "." << name << sig
             << " as native";
-        ThrowNoSuchMethodError(soa, c, name, sig, "native");
+        ThrowNoSuchMethodError(soa, c.Get(), name, sig, "native");
         return JNI_ERR;
       }
 
@@ -2277,7 +2278,8 @@
         // TODO: make this a hard register error in the future.
       }
 
-      m->RegisterNative(fnPtr, is_fast);
+      const void* final_function_ptr = m->RegisterNative(fnPtr, is_fast);
+      UNUSED(final_function_ptr);
     }
     return JNI_OK;
   }
diff --git a/runtime/method_handles.cc b/runtime/method_handles.cc
index bd7c4ad..54d45b1 100644
--- a/runtime/method_handles.cc
+++ b/runtime/method_handles.cc
@@ -22,7 +22,7 @@
 #include "jvalue.h"
 #include "jvalue-inl.h"
 #include "mirror/emulated_stack_frame.h"
-#include "mirror/method_handle_impl.h"
+#include "mirror/method_handle_impl-inl.h"
 #include "mirror/method_type.h"
 #include "reflection.h"
 #include "reflection-inl.h"
@@ -925,8 +925,17 @@
     case mirror::MethodHandle::kInstancePut: {
       size_t obj_reg = is_range ? first_arg : args[0];
       size_t value_reg = is_range ? (first_arg + 1) : args[1];
-      JValue value = GetValueFromShadowFrame(shadow_frame, field_type, value_reg);
-      if (do_conversions && !ConvertArgumentValue(callsite_type, handle_type, 1, &value)) {
+      const size_t kPTypeIndex = 1;
+      // Use ptypes instead of field type since we may be unboxing a reference for a primitive
+      // field. The field type is incorrect for this case.
+      JValue value = GetValueFromShadowFrame(
+          shadow_frame,
+          callsite_type->GetPTypes()->Get(kPTypeIndex)->GetPrimitiveType(),
+          value_reg);
+      if (do_conversions && !ConvertArgumentValue(callsite_type,
+                                                  handle_type,
+                                                  kPTypeIndex,
+                                                  &value)) {
         DCHECK(self->IsExceptionPending());
         return false;
       }
@@ -940,8 +949,17 @@
         return false;
       }
       size_t value_reg = is_range ? first_arg : args[0];
-      JValue value = GetValueFromShadowFrame(shadow_frame, field_type, value_reg);
-      if (do_conversions && !ConvertArgumentValue(callsite_type, handle_type, 0, &value)) {
+      const size_t kPTypeIndex = 0;
+      // Use ptypes instead of field type since we may be unboxing a reference for a primitive
+      // field. The field type is incorrect for this case.
+      JValue value = GetValueFromShadowFrame(
+          shadow_frame,
+          callsite_type->GetPTypes()->Get(kPTypeIndex)->GetPrimitiveType(),
+          value_reg);
+      if (do_conversions && !ConvertArgumentValue(callsite_type,
+                                                  handle_type,
+                                                  kPTypeIndex,
+                                                  &value)) {
         DCHECK(self->IsExceptionPending());
         return false;
       }
diff --git a/runtime/mirror/array-inl.h b/runtime/mirror/array-inl.h
index 04c80c5..bfbd4df 100644
--- a/runtime/mirror/array-inl.h
+++ b/runtime/mirror/array-inl.h
@@ -24,8 +24,9 @@
 #include "base/bit_utils.h"
 #include "base/casts.h"
 #include "base/logging.h"
-#include "class-inl.h"
+#include "class.h"
 #include "gc/heap-inl.h"
+#include "object-inl.h"
 #include "obj_ptr-inl.h"
 #include "thread.h"
 
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index be3b937..6c723ef 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -19,18 +19,18 @@
 
 #include "class.h"
 
-#include "art_field-inl.h"
+#include "art_field.h"
 #include "art_method.h"
-#include "art_method-inl.h"
 #include "base/array_slice.h"
 #include "base/length_prefixed_array.h"
+#include "class_linker-inl.h"
 #include "class_loader.h"
 #include "common_throws.h"
-#include "dex_file.h"
+#include "dex_file-inl.h"
 #include "gc/heap-inl.h"
 #include "iftable.h"
-#include "class_ext-inl.h"
 #include "object_array-inl.h"
+#include "object-inl.h"
 #include "read_barrier-inl.h"
 #include "reference-inl.h"
 #include "runtime.h"
@@ -343,6 +343,21 @@
   return false;
 }
 
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
+inline bool Class::IsVariableSize() {
+  // Classes, arrays, and strings vary in size, and so the object_size_ field cannot
+  // be used to Get their instance size
+  return IsClassClass<kVerifyFlags, kReadBarrierOption>() ||
+         IsArrayClass<kVerifyFlags, kReadBarrierOption>() ||
+         IsStringClass();
+}
+
+inline void Class::SetObjectSize(uint32_t new_object_size) {
+  DCHECK(!IsVariableSize());
+  // Not called within a transaction.
+  return SetField32<false>(OFFSET_OF_OBJECT_MEMBER(Class, object_size_), new_object_size);
+}
+
 // Determine whether "this" is assignable from "src", where both of these
 // are array classes.
 //
@@ -784,32 +799,6 @@
   return size;
 }
 
-template <bool kVisitNativeRoots,
-          VerifyObjectFlags kVerifyFlags,
-          ReadBarrierOption kReadBarrierOption,
-          typename Visitor>
-inline void Class::VisitReferences(ObjPtr<Class> klass, const Visitor& visitor) {
-  VisitInstanceFieldsReferences<kVerifyFlags, kReadBarrierOption>(klass.Ptr(), visitor);
-  // Right after a class is allocated, but not yet loaded
-  // (kStatusNotReady, see ClassLinker::LoadClass()), GC may find it
-  // and scan it. IsTemp() may call Class::GetAccessFlags() but may
-  // fail in the DCHECK in Class::GetAccessFlags() because the class
-  // status is kStatusNotReady. To avoid it, rely on IsResolved()
-  // only. This is fine because a temp class never goes into the
-  // kStatusResolved state.
-  if (IsResolved<kVerifyFlags>()) {
-    // Temp classes don't ever populate imt/vtable or static fields and they are not even
-    // allocated with the right size for those. Also, unresolved classes don't have fields
-    // linked yet.
-    VisitStaticFieldsReferences<kVerifyFlags, kReadBarrierOption>(this, visitor);
-  }
-  if (kVisitNativeRoots) {
-    // Since this class is reachable, we must also visit the associated roots when we scan it.
-    VisitNativeRoots<kReadBarrierOption>(
-        visitor, Runtime::Current()->GetClassLinker()->GetImagePointerSize());
-  }
-}
-
 template<ReadBarrierOption kReadBarrierOption>
 inline bool Class::IsReferenceClass() const {
   return this == Reference::GetJavaLangRefReference<kReadBarrierOption>();
@@ -823,7 +812,10 @@
 }
 
 inline const DexFile& Class::GetDexFile() {
-  return *GetDexCache()->GetDexFile();
+  // From-space version is the same as the to-space version since the dex file never changes.
+  // Avoiding the read barrier here is important to prevent recursive AssertToSpaceInvariant issues
+  // from PrettyTypeOf.
+  return *GetDexCache<kDefaultVerifyFlags, kWithoutReadBarrier>()->GetDexFile();
 }
 
 inline bool Class::DescriptorEquals(const char* match) {
@@ -939,31 +931,6 @@
   }
 }
 
-template<ReadBarrierOption kReadBarrierOption, class Visitor>
-void Class::VisitNativeRoots(Visitor& visitor, PointerSize pointer_size) {
-  for (ArtField& field : GetSFieldsUnchecked()) {
-    // Visit roots first in case the declaring class gets moved.
-    field.VisitRoots(visitor);
-    if (kIsDebugBuild && IsResolved()) {
-      CHECK_EQ(field.GetDeclaringClass<kReadBarrierOption>(), this) << GetStatus();
-    }
-  }
-  for (ArtField& field : GetIFieldsUnchecked()) {
-    // Visit roots first in case the declaring class gets moved.
-    field.VisitRoots(visitor);
-    if (kIsDebugBuild && IsResolved()) {
-      CHECK_EQ(field.GetDeclaringClass<kReadBarrierOption>(), this) << GetStatus();
-    }
-  }
-  for (ArtMethod& method : GetMethods(pointer_size)) {
-    method.VisitRoots<kReadBarrierOption>(visitor, pointer_size);
-  }
-  ObjPtr<ClassExt> ext(GetExtData<kDefaultVerifyFlags, kReadBarrierOption>());
-  if (!ext.IsNull()) {
-    ext->VisitNativeRoots<kReadBarrierOption, Visitor>(visitor, pointer_size);
-  }
-}
-
 inline IterationRange<StrideIterator<ArtMethod>> Class::GetDirectMethods(PointerSize pointer_size) {
   CheckPointerSize(pointer_size);
   return GetDirectMethodsSliceUnchecked(pointer_size).AsRange();
@@ -1033,6 +1000,12 @@
   return GetComponentType<kVerifyFlags, kReadBarrierOption>() != nullptr;
 }
 
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
+inline bool Class::IsObjectArrayClass() {
+  ObjPtr<Class> const component_type = GetComponentType<kVerifyFlags, kReadBarrierOption>();
+  return component_type != nullptr && !component_type->IsPrimitive();
+}
+
 inline bool Class::IsAssignableFrom(ObjPtr<Class> src) {
   DCHECK(src != nullptr);
   if (this == src) {
diff --git a/runtime/mirror/class-refvisitor-inl.h b/runtime/mirror/class-refvisitor-inl.h
new file mode 100644
index 0000000..3d52ead
--- /dev/null
+++ b/runtime/mirror/class-refvisitor-inl.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_MIRROR_CLASS_REFVISITOR_INL_H_
+#define ART_RUNTIME_MIRROR_CLASS_REFVISITOR_INL_H_
+
+#include "class-inl.h"
+
+#include "art_field-inl.h"
+#include "class_ext-inl.h"
+
+namespace art {
+namespace mirror {
+
+template <bool kVisitNativeRoots,
+          VerifyObjectFlags kVerifyFlags,
+          ReadBarrierOption kReadBarrierOption,
+          typename Visitor>
+inline void Class::VisitReferences(ObjPtr<Class> klass, const Visitor& visitor) {
+  VisitInstanceFieldsReferences<kVerifyFlags, kReadBarrierOption>(klass.Ptr(), visitor);
+  // Right after a class is allocated, but not yet loaded
+  // (kStatusNotReady, see ClassLinker::LoadClass()), GC may find it
+  // and scan it. IsTemp() may call Class::GetAccessFlags() but may
+  // fail in the DCHECK in Class::GetAccessFlags() because the class
+  // status is kStatusNotReady. To avoid it, rely on IsResolved()
+  // only. This is fine because a temp class never goes into the
+  // kStatusResolved state.
+  if (IsResolved<kVerifyFlags>()) {
+    // Temp classes don't ever populate imt/vtable or static fields and they are not even
+    // allocated with the right size for those. Also, unresolved classes don't have fields
+    // linked yet.
+    VisitStaticFieldsReferences<kVerifyFlags, kReadBarrierOption>(this, visitor);
+  }
+  if (kVisitNativeRoots) {
+    // Since this class is reachable, we must also visit the associated roots when we scan it.
+    VisitNativeRoots<kReadBarrierOption>(
+        visitor, Runtime::Current()->GetClassLinker()->GetImagePointerSize());
+  }
+}
+
+template<ReadBarrierOption kReadBarrierOption, class Visitor>
+void Class::VisitNativeRoots(Visitor& visitor, PointerSize pointer_size) {
+  for (ArtField& field : GetSFieldsUnchecked()) {
+    // Visit roots first in case the declaring class gets moved.
+    field.VisitRoots(visitor);
+    if (kIsDebugBuild && IsResolved()) {
+      CHECK_EQ(field.GetDeclaringClass<kReadBarrierOption>(), this) << GetStatus();
+    }
+  }
+  for (ArtField& field : GetIFieldsUnchecked()) {
+    // Visit roots first in case the declaring class gets moved.
+    field.VisitRoots(visitor);
+    if (kIsDebugBuild && IsResolved()) {
+      CHECK_EQ(field.GetDeclaringClass<kReadBarrierOption>(), this) << GetStatus();
+    }
+  }
+  for (ArtMethod& method : GetMethods(pointer_size)) {
+    method.VisitRoots<kReadBarrierOption>(visitor, pointer_size);
+  }
+  ObjPtr<ClassExt> ext(GetExtData<kDefaultVerifyFlags, kReadBarrierOption>());
+  if (!ext.IsNull()) {
+    ext->VisitNativeRoots<kReadBarrierOption, Visitor>(visitor, pointer_size);
+  }
+}
+
+}  // namespace mirror
+}  // namespace art
+
+#endif  // ART_RUNTIME_MIRROR_CLASS_REFVISITOR_INL_H_
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index 26af488..06ee3d3 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -32,6 +32,7 @@
 #include "method.h"
 #include "object_array-inl.h"
 #include "object-inl.h"
+#include "object-refvisitor-inl.h"
 #include "object_lock.h"
 #include "runtime.h"
 #include "thread.h"
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 27aecd5..dfb2788 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -530,10 +530,7 @@
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
            ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
-  bool IsObjectArrayClass() REQUIRES_SHARED(Locks::mutator_lock_) {
-    ObjPtr<Class> const component_type = GetComponentType<kVerifyFlags, kReadBarrierOption>();
-    return component_type != nullptr && !component_type->IsPrimitive();
-  }
+  ALWAYS_INLINE bool IsObjectArrayClass() REQUIRES_SHARED(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   bool IsIntArrayClass() REQUIRES_SHARED(Locks::mutator_lock_) {
@@ -561,12 +558,7 @@
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
            ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
-  bool IsVariableSize() REQUIRES_SHARED(Locks::mutator_lock_) {
-    // Classes, arrays, and strings vary in size, and so the object_size_ field cannot
-    // be used to Get their instance size
-    return IsClassClass<kVerifyFlags, kReadBarrierOption>() ||
-        IsArrayClass<kVerifyFlags, kReadBarrierOption>() || IsStringClass();
-  }
+  ALWAYS_INLINE bool IsVariableSize() REQUIRES_SHARED(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
            ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
@@ -614,11 +606,7 @@
     return OFFSET_OF_OBJECT_MEMBER(Class, object_size_alloc_fast_path_);
   }
 
-  void SetObjectSize(uint32_t new_object_size) REQUIRES_SHARED(Locks::mutator_lock_) {
-    DCHECK(!IsVariableSize());
-    // Not called within a transaction.
-    return SetField32<false>(OFFSET_OF_OBJECT_MEMBER(Class, object_size_), new_object_size);
-  }
+  ALWAYS_INLINE void SetObjectSize(uint32_t new_object_size) REQUIRES_SHARED(Locks::mutator_lock_);
 
   void SetObjectSizeAllocFastPath(uint32_t new_object_size) REQUIRES_SHARED(Locks::mutator_lock_);
 
diff --git a/runtime/mirror/dex_cache-inl.h b/runtime/mirror/dex_cache-inl.h
index 5d3af50..18e22ef 100644
--- a/runtime/mirror/dex_cache-inl.h
+++ b/runtime/mirror/dex_cache-inl.h
@@ -19,13 +19,15 @@
 
 #include "dex_cache.h"
 
-#include "art_field-inl.h"
-#include "art_method-inl.h"
+#include "art_field.h"
+#include "art_method.h"
 #include "base/casts.h"
 #include "base/enums.h"
 #include "base/logging.h"
+#include "class_linker.h"
 #include "dex_file.h"
 #include "gc_root.h"
+#include "gc/heap-inl.h"
 #include "mirror/class.h"
 #include "mirror/call_site.h"
 #include "mirror/method_type.h"
diff --git a/runtime/mirror/dex_cache_test.cc b/runtime/mirror/dex_cache_test.cc
index 71a47f6..a110ed7 100644
--- a/runtime/mirror/dex_cache_test.cc
+++ b/runtime/mirror/dex_cache_test.cc
@@ -18,6 +18,7 @@
 
 #include <stdio.h>
 
+#include "art_method-inl.h"
 #include "class_linker.h"
 #include "common_runtime_test.h"
 #include "linear_alloc.h"
diff --git a/runtime/mirror/method_handle_impl-inl.h b/runtime/mirror/method_handle_impl-inl.h
new file mode 100644
index 0000000..0840d16
--- /dev/null
+++ b/runtime/mirror/method_handle_impl-inl.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_MIRROR_METHOD_HANDLE_IMPL_INL_H_
+#define ART_RUNTIME_MIRROR_METHOD_HANDLE_IMPL_INL_H_
+
+#include "method_handle_impl.h"
+
+#include "art_method-inl.h"
+#include "object-inl.h"
+
+namespace art {
+namespace mirror {
+
+inline mirror::MethodType* MethodHandle::GetMethodType() {
+  return GetFieldObject<mirror::MethodType>(OFFSET_OF_OBJECT_MEMBER(MethodHandle, method_type_));
+}
+
+inline mirror::MethodType* MethodHandle::GetNominalType() {
+  return GetFieldObject<mirror::MethodType>(OFFSET_OF_OBJECT_MEMBER(MethodHandle, nominal_type_));
+}
+
+inline ObjPtr<mirror::Class> MethodHandle::GetTargetClass() {
+  Kind kind = GetHandleKind();
+  return (kind <= kLastValidKind) ?
+      GetTargetMethod()->GetDeclaringClass() : GetTargetField()->GetDeclaringClass();
+}
+
+}  // namespace mirror
+}  // namespace art
+
+#endif  // ART_RUNTIME_MIRROR_METHOD_HANDLE_IMPL_INL_H_
diff --git a/runtime/mirror/method_handle_impl.cc b/runtime/mirror/method_handle_impl.cc
index fa4d25a..42b8473 100644
--- a/runtime/mirror/method_handle_impl.cc
+++ b/runtime/mirror/method_handle_impl.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "method_handle_impl.h"
+#include "method_handle_impl-inl.h"
 
 #include "class-inl.h"
 #include "gc_root-inl.h"
@@ -42,6 +42,10 @@
 
 GcRoot<mirror::Class> MethodHandleImpl::static_class_;
 
+mirror::Class* MethodHandleImpl::StaticClass()  {
+  return static_class_.Read();
+}
+
 void MethodHandleImpl::SetClass(Class* klass) {
   CHECK(static_class_.IsNull()) << static_class_.Read() << " " << klass;
   CHECK(klass != nullptr);
diff --git a/runtime/mirror/method_handle_impl.h b/runtime/mirror/method_handle_impl.h
index 9938af8..c598fa3 100644
--- a/runtime/mirror/method_handle_impl.h
+++ b/runtime/mirror/method_handle_impl.h
@@ -21,7 +21,7 @@
 #include "art_method.h"
 #include "class.h"
 #include "gc_root.h"
-#include "object-inl.h"
+#include "object.h"
 #include "method_type.h"
 
 namespace art {
@@ -65,13 +65,9 @@
     return static_cast<Kind>(handle_kind);
   }
 
-  mirror::MethodType* GetMethodType() REQUIRES_SHARED(Locks::mutator_lock_) {
-    return GetFieldObject<mirror::MethodType>(OFFSET_OF_OBJECT_MEMBER(MethodHandle, method_type_));
-  }
+  ALWAYS_INLINE mirror::MethodType* GetMethodType() REQUIRES_SHARED(Locks::mutator_lock_);
 
-  mirror::MethodType* GetNominalType() REQUIRES_SHARED(Locks::mutator_lock_) {
-    return GetFieldObject<mirror::MethodType>(OFFSET_OF_OBJECT_MEMBER(MethodHandle, nominal_type_));
-  }
+  ALWAYS_INLINE mirror::MethodType* GetNominalType() REQUIRES_SHARED(Locks::mutator_lock_);
 
   ArtField* GetTargetField() REQUIRES_SHARED(Locks::mutator_lock_) {
     return reinterpret_cast<ArtField*>(
@@ -83,11 +79,7 @@
         GetField64(OFFSET_OF_OBJECT_MEMBER(MethodHandle, art_field_or_method_)));
   }
 
-  ObjPtr<mirror::Class> GetTargetClass() REQUIRES_SHARED(Locks::mutator_lock_) {
-    Kind kind = GetHandleKind();
-    return (kind <= kLastValidKind) ?
-        GetTargetMethod()->GetDeclaringClass() : GetTargetField()->GetDeclaringClass();
-  }
+  ALWAYS_INLINE ObjPtr<mirror::Class> GetTargetClass() REQUIRES_SHARED(Locks::mutator_lock_);
 
   static mirror::Class* StaticClass() REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -132,9 +124,7 @@
                                           Handle<MethodType> method_type)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
 
-  static mirror::Class* StaticClass() REQUIRES_SHARED(Locks::mutator_lock_) {
-    return static_class_.Read();
-  }
+  static mirror::Class* StaticClass() REQUIRES_SHARED(Locks::mutator_lock_);
 
   static void SetClass(Class* klass) REQUIRES_SHARED(Locks::mutator_lock_);
   static void ResetClass() REQUIRES_SHARED(Locks::mutator_lock_);
diff --git a/runtime/mirror/method_handles_lookup.cc b/runtime/mirror/method_handles_lookup.cc
index c758e54..0c25fa8 100644
--- a/runtime/mirror/method_handles_lookup.cc
+++ b/runtime/mirror/method_handles_lookup.cc
@@ -16,7 +16,7 @@
 
 #include "method_handles_lookup.h"
 
-#include "class.h"
+#include "class-inl.h"
 #include "gc_root-inl.h"
 #include "object-inl.h"
 #include "handle_scope.h"
diff --git a/runtime/mirror/method_type_test.cc b/runtime/mirror/method_type_test.cc
index 41231ef..a361772 100644
--- a/runtime/mirror/method_type_test.cc
+++ b/runtime/mirror/method_type_test.cc
@@ -20,7 +20,7 @@
 #include <vector>
 
 #include "class-inl.h"
-#include "class_linker.h"
+#include "class_linker-inl.h"
 #include "class_loader.h"
 #include "common_runtime_test.h"
 #include "handle_scope-inl.h"
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index f83645e..baed5f1 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -23,7 +23,7 @@
 #include "art_method.h"
 #include "atomic.h"
 #include "array-inl.h"
-#include "class.h"
+#include "class-inl.h"
 #include "class_flags.h"
 #include "class_linker.h"
 #include "class_loader-inl.h"
@@ -32,6 +32,7 @@
 #include "monitor.h"
 #include "object_array-inl.h"
 #include "object_reference-inl.h"
+#include "object-readbarrier-inl.h"
 #include "obj_ptr-inl.h"
 #include "read_barrier-inl.h"
 #include "reference.h"
@@ -66,14 +67,6 @@
 }
 
 template<VerifyObjectFlags kVerifyFlags>
-inline LockWord Object::GetLockWord(bool as_volatile) {
-  if (as_volatile) {
-    return LockWord(GetField32Volatile<kVerifyFlags>(OFFSET_OF_OBJECT_MEMBER(Object, monitor_)));
-  }
-  return LockWord(GetField32<kVerifyFlags>(OFFSET_OF_OBJECT_MEMBER(Object, monitor_)));
-}
-
-template<VerifyObjectFlags kVerifyFlags>
 inline void Object::SetLockWord(LockWord new_val, bool as_volatile) {
   // Force use of non-transactional mode and do not check.
   if (as_volatile) {
@@ -91,24 +84,12 @@
       OFFSET_OF_OBJECT_MEMBER(Object, monitor_), old_val.GetValue(), new_val.GetValue());
 }
 
-inline bool Object::CasLockWordWeakRelaxed(LockWord old_val, LockWord new_val) {
-  // Force use of non-transactional mode and do not check.
-  return CasFieldWeakRelaxed32<false, false>(
-      OFFSET_OF_OBJECT_MEMBER(Object, monitor_), old_val.GetValue(), new_val.GetValue());
-}
-
 inline bool Object::CasLockWordWeakAcquire(LockWord old_val, LockWord new_val) {
   // Force use of non-transactional mode and do not check.
   return CasFieldWeakAcquire32<false, false>(
       OFFSET_OF_OBJECT_MEMBER(Object, monitor_), old_val.GetValue(), new_val.GetValue());
 }
 
-inline bool Object::CasLockWordWeakRelease(LockWord old_val, LockWord new_val) {
-  // Force use of non-transactional mode and do not check.
-  return CasFieldWeakRelease32<false, false>(
-      OFFSET_OF_OBJECT_MEMBER(Object, monitor_), old_val.GetValue(), new_val.GetValue());
-}
-
 inline uint32_t Object::GetLockOwnerThreadId() {
   return Monitor::GetLockOwnerThreadId(this);
 }
@@ -141,84 +122,6 @@
   Monitor::Wait(self, this, ms, ns, true, kTimedWaiting);
 }
 
-inline uint32_t Object::GetReadBarrierState(uintptr_t* fake_address_dependency) {
-  if (!kUseBakerReadBarrier) {
-    LOG(FATAL) << "Unreachable";
-    UNREACHABLE();
-  }
-#if defined(__arm__)
-  uintptr_t obj = reinterpret_cast<uintptr_t>(this);
-  uintptr_t result;
-  DCHECK_EQ(OFFSETOF_MEMBER(Object, monitor_), 4U);
-  // Use inline assembly to prevent the compiler from optimizing away the false dependency.
-  __asm__ __volatile__(
-      "ldr %[result], [%[obj], #4]\n\t"
-      // This instruction is enough to "fool the compiler and the CPU" by having `fad` always be
-      // null, without them being able to assume that fact.
-      "eor %[fad], %[result], %[result]\n\t"
-      : [result] "+r" (result), [fad] "=r" (*fake_address_dependency)
-      : [obj] "r" (obj));
-  DCHECK_EQ(*fake_address_dependency, 0U);
-  LockWord lw(static_cast<uint32_t>(result));
-  uint32_t rb_state = lw.ReadBarrierState();
-  return rb_state;
-#elif defined(__aarch64__)
-  uintptr_t obj = reinterpret_cast<uintptr_t>(this);
-  uintptr_t result;
-  DCHECK_EQ(OFFSETOF_MEMBER(Object, monitor_), 4U);
-  // Use inline assembly to prevent the compiler from optimizing away the false dependency.
-  __asm__ __volatile__(
-      "ldr %w[result], [%[obj], #4]\n\t"
-      // This instruction is enough to "fool the compiler and the CPU" by having `fad` always be
-      // null, without them being able to assume that fact.
-      "eor %[fad], %[result], %[result]\n\t"
-      : [result] "+r" (result), [fad] "=r" (*fake_address_dependency)
-      : [obj] "r" (obj));
-  DCHECK_EQ(*fake_address_dependency, 0U);
-  LockWord lw(static_cast<uint32_t>(result));
-  uint32_t rb_state = lw.ReadBarrierState();
-  return rb_state;
-#elif defined(__i386__) || defined(__x86_64__)
-  LockWord lw = GetLockWord(false);
-  // i386/x86_64 don't need fake address dependency. Use a compiler fence to avoid compiler
-  // reordering.
-  *fake_address_dependency = 0;
-  std::atomic_signal_fence(std::memory_order_acquire);
-  uint32_t rb_state = lw.ReadBarrierState();
-  return rb_state;
-#else
-  // MIPS32/MIPS64: use a memory barrier to prevent load-load reordering.
-  LockWord lw = GetLockWord(false);
-  *fake_address_dependency = 0;
-  std::atomic_thread_fence(std::memory_order_acquire);
-  uint32_t rb_state = lw.ReadBarrierState();
-  return rb_state;
-#endif
-}
-
-inline uint32_t Object::GetReadBarrierState() {
-  if (!kUseBakerReadBarrier) {
-    LOG(FATAL) << "Unreachable";
-    UNREACHABLE();
-  }
-  DCHECK(kUseBakerReadBarrier);
-  LockWord lw(GetField<uint32_t, /*kIsVolatile*/false>(OFFSET_OF_OBJECT_MEMBER(Object, monitor_)));
-  uint32_t rb_state = lw.ReadBarrierState();
-  DCHECK(ReadBarrier::IsValidReadBarrierState(rb_state)) << rb_state;
-  return rb_state;
-}
-
-inline uint32_t Object::GetReadBarrierStateAcquire() {
-  if (!kUseBakerReadBarrier) {
-    LOG(FATAL) << "Unreachable";
-    UNREACHABLE();
-  }
-  LockWord lw(GetFieldAcquire<uint32_t>(OFFSET_OF_OBJECT_MEMBER(Object, monitor_)));
-  uint32_t rb_state = lw.ReadBarrierState();
-  DCHECK(ReadBarrier::IsValidReadBarrierState(rb_state)) << rb_state;
-  return rb_state;
-}
-
 inline uint32_t Object::GetMarkBit() {
 #ifdef USE_READ_BARRIER
   return GetLockWord(false).MarkBitState();
@@ -239,54 +142,6 @@
   SetLockWord(lw, false);
 }
 
-template<bool kCasRelease>
-inline bool Object::AtomicSetReadBarrierState(uint32_t expected_rb_state, uint32_t rb_state) {
-  if (!kUseBakerReadBarrier) {
-    LOG(FATAL) << "Unreachable";
-    UNREACHABLE();
-  }
-  DCHECK(ReadBarrier::IsValidReadBarrierState(expected_rb_state)) << expected_rb_state;
-  DCHECK(ReadBarrier::IsValidReadBarrierState(rb_state)) << rb_state;
-  LockWord expected_lw;
-  LockWord new_lw;
-  do {
-    LockWord lw = GetLockWord(false);
-    if (UNLIKELY(lw.ReadBarrierState() != expected_rb_state)) {
-      // Lost the race.
-      return false;
-    }
-    expected_lw = lw;
-    expected_lw.SetReadBarrierState(expected_rb_state);
-    new_lw = lw;
-    new_lw.SetReadBarrierState(rb_state);
-    // ConcurrentCopying::ProcessMarkStackRef uses this with kCasRelease == true.
-    // If kCasRelease == true, use a CAS release so that when GC updates all the fields of
-    // an object and then changes the object from gray to black, the field updates (stores) will be
-    // visible (won't be reordered after this CAS.)
-  } while (!(kCasRelease ?
-             CasLockWordWeakRelease(expected_lw, new_lw) :
-             CasLockWordWeakRelaxed(expected_lw, new_lw)));
-  return true;
-}
-
-inline bool Object::AtomicSetMarkBit(uint32_t expected_mark_bit, uint32_t mark_bit) {
-  LockWord expected_lw;
-  LockWord new_lw;
-  do {
-    LockWord lw = GetLockWord(false);
-    if (UNLIKELY(lw.MarkBitState() != expected_mark_bit)) {
-      // Lost the race.
-      return false;
-    }
-    expected_lw = lw;
-    new_lw = lw;
-    new_lw.SetMarkBitState(mark_bit);
-    // Since this is only set from the mutator, we can use the non release Cas.
-  } while (!CasLockWordWeakRelaxed(expected_lw, new_lw));
-  return true;
-}
-
-
 inline void Object::AssertReadBarrierState() const {
   CHECK(kUseBakerReadBarrier);
   Object* obj = const_cast<Object*>(this);
@@ -727,24 +582,6 @@
 }
 
 template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags>
-inline bool Object::CasFieldWeakRelaxed32(MemberOffset field_offset,
-                                          int32_t old_value, int32_t new_value) {
-  if (kCheckTransaction) {
-    DCHECK_EQ(kTransactionActive, Runtime::Current()->IsActiveTransaction());
-  }
-  if (kTransactionActive) {
-    Runtime::Current()->RecordWriteField32(this, field_offset, old_value, true);
-  }
-  if (kVerifyFlags & kVerifyThis) {
-    VerifyObject(this);
-  }
-  uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
-  AtomicInteger* atomic_addr = reinterpret_cast<AtomicInteger*>(raw_addr);
-
-  return atomic_addr->CompareExchangeWeakRelaxed(old_value, new_value);
-}
-
-template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags>
 inline bool Object::CasFieldWeakAcquire32(MemberOffset field_offset,
                                           int32_t old_value, int32_t new_value) {
   if (kCheckTransaction) {
@@ -798,19 +635,6 @@
   return atomic_addr->CompareExchangeStrongSequentiallyConsistent(old_value, new_value);
 }
 
-template<VerifyObjectFlags kVerifyFlags, bool kIsVolatile>
-inline int64_t Object::GetField64(MemberOffset field_offset) {
-  if (kVerifyFlags & kVerifyThis) {
-    VerifyObject(this);
-  }
-  return GetField<int64_t, kIsVolatile>(field_offset);
-}
-
-template<VerifyObjectFlags kVerifyFlags>
-inline int64_t Object::GetField64Volatile(MemberOffset field_offset) {
-  return GetField64<kVerifyFlags, true>(field_offset);
-}
-
 template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags,
     bool kIsVolatile>
 inline void Object::SetField64(MemberOffset field_offset, int64_t new_value) {
@@ -1075,36 +899,6 @@
   return success;
 }
 
-template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags>
-inline bool Object::CasFieldStrongRelaxedObjectWithoutWriteBarrier(
-    MemberOffset field_offset,
-    ObjPtr<Object> old_value,
-    ObjPtr<Object> new_value) {
-  if (kCheckTransaction) {
-    DCHECK_EQ(kTransactionActive, Runtime::Current()->IsActiveTransaction());
-  }
-  if (kVerifyFlags & kVerifyThis) {
-    VerifyObject(this);
-  }
-  if (kVerifyFlags & kVerifyWrites) {
-    VerifyObject(new_value);
-  }
-  if (kVerifyFlags & kVerifyReads) {
-    VerifyObject(old_value);
-  }
-  if (kTransactionActive) {
-    Runtime::Current()->RecordWriteFieldReference(this, field_offset, old_value, true);
-  }
-  HeapReference<Object> old_ref(HeapReference<Object>::FromObjPtr(old_value));
-  HeapReference<Object> new_ref(HeapReference<Object>::FromObjPtr(new_value));
-  uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
-  Atomic<uint32_t>* atomic_addr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr);
-
-  bool success = atomic_addr->CompareExchangeStrongRelaxed(old_ref.reference_,
-                                                           new_ref.reference_);
-  return success;
-}
-
 template<bool kIsStatic,
          VerifyObjectFlags kVerifyFlags,
          ReadBarrierOption kReadBarrierOption,
@@ -1186,67 +980,6 @@
   return down_cast<mirror::DexCache*>(this);
 }
 
-template <bool kVisitNativeRoots,
-          VerifyObjectFlags kVerifyFlags,
-          ReadBarrierOption kReadBarrierOption,
-          typename Visitor,
-          typename JavaLangRefVisitor>
-inline void Object::VisitReferences(const Visitor& visitor,
-                                    const JavaLangRefVisitor& ref_visitor) {
-  ObjPtr<Class> klass = GetClass<kVerifyFlags, kReadBarrierOption>();
-  visitor(this, ClassOffset(), false);
-  const uint32_t class_flags = klass->GetClassFlags<kVerifyNone>();
-  if (LIKELY(class_flags == kClassFlagNormal)) {
-    DCHECK((!klass->IsVariableSize<kVerifyFlags, kReadBarrierOption>()));
-    VisitInstanceFieldsReferences<kVerifyFlags, kReadBarrierOption>(klass, visitor);
-    DCHECK((!klass->IsClassClass<kVerifyFlags, kReadBarrierOption>()));
-    DCHECK(!klass->IsStringClass());
-    DCHECK(!klass->IsClassLoaderClass());
-    DCHECK((!klass->IsArrayClass<kVerifyFlags, kReadBarrierOption>()));
-  } else {
-    if ((class_flags & kClassFlagNoReferenceFields) == 0) {
-      DCHECK(!klass->IsStringClass());
-      if (class_flags == kClassFlagClass) {
-        DCHECK((klass->IsClassClass<kVerifyFlags, kReadBarrierOption>()));
-        ObjPtr<Class> as_klass = AsClass<kVerifyNone, kReadBarrierOption>();
-        as_klass->VisitReferences<kVisitNativeRoots, kVerifyFlags, kReadBarrierOption>(klass,
-                                                                                       visitor);
-      } else if (class_flags == kClassFlagObjectArray) {
-        DCHECK((klass->IsObjectArrayClass<kVerifyFlags, kReadBarrierOption>()));
-        AsObjectArray<mirror::Object, kVerifyNone, kReadBarrierOption>()->VisitReferences(visitor);
-      } else if ((class_flags & kClassFlagReference) != 0) {
-        VisitInstanceFieldsReferences<kVerifyFlags, kReadBarrierOption>(klass, visitor);
-        ref_visitor(klass, AsReference<kVerifyFlags, kReadBarrierOption>());
-      } else if (class_flags == kClassFlagDexCache) {
-        mirror::DexCache* const dex_cache = AsDexCache<kVerifyFlags, kReadBarrierOption>();
-        dex_cache->VisitReferences<kVisitNativeRoots,
-                                   kVerifyFlags,
-                                   kReadBarrierOption>(klass, visitor);
-      } else {
-        mirror::ClassLoader* const class_loader = AsClassLoader<kVerifyFlags, kReadBarrierOption>();
-        class_loader->VisitReferences<kVisitNativeRoots,
-                                      kVerifyFlags,
-                                      kReadBarrierOption>(klass, visitor);
-      }
-    } else if (kIsDebugBuild) {
-      CHECK((!klass->IsClassClass<kVerifyFlags, kReadBarrierOption>()));
-      CHECK((!klass->IsObjectArrayClass<kVerifyFlags, kReadBarrierOption>()));
-      // String still has instance fields for reflection purposes but these don't exist in
-      // actual string instances.
-      if (!klass->IsStringClass()) {
-        size_t total_reference_instance_fields = 0;
-        ObjPtr<Class> super_class = klass;
-        do {
-          total_reference_instance_fields += super_class->NumReferenceInstanceFields();
-          super_class = super_class->GetSuperClass<kVerifyFlags, kReadBarrierOption>();
-        } while (super_class != nullptr);
-        // The only reference field should be the object's class. This field is handled at the
-        // beginning of the function.
-        CHECK_EQ(total_reference_instance_fields, 1u);
-      }
-    }
-  }
-}
 }  // namespace mirror
 }  // namespace art
 
diff --git a/runtime/mirror/object-readbarrier-inl.h b/runtime/mirror/object-readbarrier-inl.h
new file mode 100644
index 0000000..58e7c20
--- /dev/null
+++ b/runtime/mirror/object-readbarrier-inl.h
@@ -0,0 +1,227 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_MIRROR_OBJECT_READBARRIER_INL_H_
+#define ART_RUNTIME_MIRROR_OBJECT_READBARRIER_INL_H_
+
+#include "object.h"
+
+#include "atomic.h"
+#include "lock_word-inl.h"
+#include "object_reference-inl.h"
+#include "read_barrier.h"
+#include "runtime.h"
+
+namespace art {
+namespace mirror {
+
+template<VerifyObjectFlags kVerifyFlags>
+inline LockWord Object::GetLockWord(bool as_volatile) {
+  if (as_volatile) {
+    return LockWord(GetField32Volatile<kVerifyFlags>(OFFSET_OF_OBJECT_MEMBER(Object, monitor_)));
+  }
+  return LockWord(GetField32<kVerifyFlags>(OFFSET_OF_OBJECT_MEMBER(Object, monitor_)));
+}
+
+template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags>
+inline bool Object::CasFieldWeakRelaxed32(MemberOffset field_offset,
+                                          int32_t old_value, int32_t new_value) {
+  if (kCheckTransaction) {
+    DCHECK_EQ(kTransactionActive, Runtime::Current()->IsActiveTransaction());
+  }
+  if (kTransactionActive) {
+    Runtime::Current()->RecordWriteField32(this, field_offset, old_value, true);
+  }
+  if (kVerifyFlags & kVerifyThis) {
+    VerifyObject(this);
+  }
+  uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
+  AtomicInteger* atomic_addr = reinterpret_cast<AtomicInteger*>(raw_addr);
+
+  return atomic_addr->CompareExchangeWeakRelaxed(old_value, new_value);
+}
+
+inline bool Object::CasLockWordWeakRelaxed(LockWord old_val, LockWord new_val) {
+  // Force use of non-transactional mode and do not check.
+  return CasFieldWeakRelaxed32<false, false>(
+      OFFSET_OF_OBJECT_MEMBER(Object, monitor_), old_val.GetValue(), new_val.GetValue());
+}
+
+inline bool Object::CasLockWordWeakRelease(LockWord old_val, LockWord new_val) {
+  // Force use of non-transactional mode and do not check.
+  return CasFieldWeakRelease32<false, false>(
+      OFFSET_OF_OBJECT_MEMBER(Object, monitor_), old_val.GetValue(), new_val.GetValue());
+}
+
+inline uint32_t Object::GetReadBarrierState(uintptr_t* fake_address_dependency) {
+  if (!kUseBakerReadBarrier) {
+    LOG(FATAL) << "Unreachable";
+    UNREACHABLE();
+  }
+#if defined(__arm__)
+  uintptr_t obj = reinterpret_cast<uintptr_t>(this);
+  uintptr_t result;
+  DCHECK_EQ(OFFSETOF_MEMBER(Object, monitor_), 4U);
+  // Use inline assembly to prevent the compiler from optimizing away the false dependency.
+  __asm__ __volatile__(
+      "ldr %[result], [%[obj], #4]\n\t"
+      // This instruction is enough to "fool the compiler and the CPU" by having `fad` always be
+      // null, without them being able to assume that fact.
+      "eor %[fad], %[result], %[result]\n\t"
+      : [result] "+r" (result), [fad] "=r" (*fake_address_dependency)
+      : [obj] "r" (obj));
+  DCHECK_EQ(*fake_address_dependency, 0U);
+  LockWord lw(static_cast<uint32_t>(result));
+  uint32_t rb_state = lw.ReadBarrierState();
+  return rb_state;
+#elif defined(__aarch64__)
+  uintptr_t obj = reinterpret_cast<uintptr_t>(this);
+  uintptr_t result;
+  DCHECK_EQ(OFFSETOF_MEMBER(Object, monitor_), 4U);
+  // Use inline assembly to prevent the compiler from optimizing away the false dependency.
+  __asm__ __volatile__(
+      "ldr %w[result], [%[obj], #4]\n\t"
+      // This instruction is enough to "fool the compiler and the CPU" by having `fad` always be
+      // null, without them being able to assume that fact.
+      "eor %[fad], %[result], %[result]\n\t"
+      : [result] "+r" (result), [fad] "=r" (*fake_address_dependency)
+      : [obj] "r" (obj));
+  DCHECK_EQ(*fake_address_dependency, 0U);
+  LockWord lw(static_cast<uint32_t>(result));
+  uint32_t rb_state = lw.ReadBarrierState();
+  return rb_state;
+#elif defined(__i386__) || defined(__x86_64__)
+  LockWord lw = GetLockWord(false);
+  // i386/x86_64 don't need fake address dependency. Use a compiler fence to avoid compiler
+  // reordering.
+  *fake_address_dependency = 0;
+  std::atomic_signal_fence(std::memory_order_acquire);
+  uint32_t rb_state = lw.ReadBarrierState();
+  return rb_state;
+#else
+  // MIPS32/MIPS64: use a memory barrier to prevent load-load reordering.
+  LockWord lw = GetLockWord(false);
+  *fake_address_dependency = 0;
+  std::atomic_thread_fence(std::memory_order_acquire);
+  uint32_t rb_state = lw.ReadBarrierState();
+  return rb_state;
+#endif
+}
+
+inline uint32_t Object::GetReadBarrierState() {
+  if (!kUseBakerReadBarrier) {
+    LOG(FATAL) << "Unreachable";
+    UNREACHABLE();
+  }
+  DCHECK(kUseBakerReadBarrier);
+  LockWord lw(GetField<uint32_t, /*kIsVolatile*/false>(OFFSET_OF_OBJECT_MEMBER(Object, monitor_)));
+  uint32_t rb_state = lw.ReadBarrierState();
+  DCHECK(ReadBarrier::IsValidReadBarrierState(rb_state)) << rb_state;
+  return rb_state;
+}
+
+inline uint32_t Object::GetReadBarrierStateAcquire() {
+  if (!kUseBakerReadBarrier) {
+    LOG(FATAL) << "Unreachable";
+    UNREACHABLE();
+  }
+  LockWord lw(GetFieldAcquire<uint32_t>(OFFSET_OF_OBJECT_MEMBER(Object, monitor_)));
+  uint32_t rb_state = lw.ReadBarrierState();
+  DCHECK(ReadBarrier::IsValidReadBarrierState(rb_state)) << rb_state;
+  return rb_state;
+}
+
+template<bool kCasRelease>
+inline bool Object::AtomicSetReadBarrierState(uint32_t expected_rb_state, uint32_t rb_state) {
+  if (!kUseBakerReadBarrier) {
+    LOG(FATAL) << "Unreachable";
+    UNREACHABLE();
+  }
+  DCHECK(ReadBarrier::IsValidReadBarrierState(expected_rb_state)) << expected_rb_state;
+  DCHECK(ReadBarrier::IsValidReadBarrierState(rb_state)) << rb_state;
+  LockWord expected_lw;
+  LockWord new_lw;
+  do {
+    LockWord lw = GetLockWord(false);
+    if (UNLIKELY(lw.ReadBarrierState() != expected_rb_state)) {
+      // Lost the race.
+      return false;
+    }
+    expected_lw = lw;
+    expected_lw.SetReadBarrierState(expected_rb_state);
+    new_lw = lw;
+    new_lw.SetReadBarrierState(rb_state);
+    // ConcurrentCopying::ProcessMarkStackRef uses this with kCasRelease == true.
+    // If kCasRelease == true, use a CAS release so that when GC updates all the fields of
+    // an object and then changes the object from gray to black, the field updates (stores) will be
+    // visible (won't be reordered after this CAS.)
+  } while (!(kCasRelease ?
+             CasLockWordWeakRelease(expected_lw, new_lw) :
+             CasLockWordWeakRelaxed(expected_lw, new_lw)));
+  return true;
+}
+
+inline bool Object::AtomicSetMarkBit(uint32_t expected_mark_bit, uint32_t mark_bit) {
+  LockWord expected_lw;
+  LockWord new_lw;
+  do {
+    LockWord lw = GetLockWord(false);
+    if (UNLIKELY(lw.MarkBitState() != expected_mark_bit)) {
+      // Lost the race.
+      return false;
+    }
+    expected_lw = lw;
+    new_lw = lw;
+    new_lw.SetMarkBitState(mark_bit);
+    // Since this is only set from the mutator, we can use the non release Cas.
+  } while (!CasLockWordWeakRelaxed(expected_lw, new_lw));
+  return true;
+}
+
+template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags>
+inline bool Object::CasFieldStrongRelaxedObjectWithoutWriteBarrier(
+    MemberOffset field_offset,
+    ObjPtr<Object> old_value,
+    ObjPtr<Object> new_value) {
+  if (kCheckTransaction) {
+    DCHECK_EQ(kTransactionActive, Runtime::Current()->IsActiveTransaction());
+  }
+  if (kVerifyFlags & kVerifyThis) {
+    VerifyObject(this);
+  }
+  if (kVerifyFlags & kVerifyWrites) {
+    VerifyObject(new_value);
+  }
+  if (kVerifyFlags & kVerifyReads) {
+    VerifyObject(old_value);
+  }
+  if (kTransactionActive) {
+    Runtime::Current()->RecordWriteFieldReference(this, field_offset, old_value, true);
+  }
+  HeapReference<Object> old_ref(HeapReference<Object>::FromObjPtr(old_value));
+  HeapReference<Object> new_ref(HeapReference<Object>::FromObjPtr(new_value));
+  uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
+  Atomic<uint32_t>* atomic_addr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr);
+
+  bool success = atomic_addr->CompareExchangeStrongRelaxed(old_ref.reference_,
+                                                           new_ref.reference_);
+  return success;
+}
+
+}  // namespace mirror
+}  // namespace art
+
+#endif  // ART_RUNTIME_MIRROR_OBJECT_READBARRIER_INL_H_
diff --git a/runtime/mirror/object-refvisitor-inl.h b/runtime/mirror/object-refvisitor-inl.h
new file mode 100644
index 0000000..49ab7c2
--- /dev/null
+++ b/runtime/mirror/object-refvisitor-inl.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_MIRROR_OBJECT_REFVISITOR_INL_H_
+#define ART_RUNTIME_MIRROR_OBJECT_REFVISITOR_INL_H_
+
+#include "object-inl.h"
+
+#include "class-refvisitor-inl.h"
+
+namespace art {
+namespace mirror {
+
+template <bool kVisitNativeRoots,
+          VerifyObjectFlags kVerifyFlags,
+          ReadBarrierOption kReadBarrierOption,
+          typename Visitor,
+          typename JavaLangRefVisitor>
+inline void Object::VisitReferences(const Visitor& visitor,
+                                    const JavaLangRefVisitor& ref_visitor) {
+  ObjPtr<Class> klass = GetClass<kVerifyFlags, kReadBarrierOption>();
+  visitor(this, ClassOffset(), false);
+  const uint32_t class_flags = klass->GetClassFlags<kVerifyNone>();
+  if (LIKELY(class_flags == kClassFlagNormal)) {
+    DCHECK((!klass->IsVariableSize<kVerifyFlags, kReadBarrierOption>()));
+    VisitInstanceFieldsReferences<kVerifyFlags, kReadBarrierOption>(klass, visitor);
+    DCHECK((!klass->IsClassClass<kVerifyFlags, kReadBarrierOption>()));
+    DCHECK(!klass->IsStringClass());
+    DCHECK(!klass->IsClassLoaderClass());
+    DCHECK((!klass->IsArrayClass<kVerifyFlags, kReadBarrierOption>()));
+  } else {
+    if ((class_flags & kClassFlagNoReferenceFields) == 0) {
+      DCHECK(!klass->IsStringClass());
+      if (class_flags == kClassFlagClass) {
+        DCHECK((klass->IsClassClass<kVerifyFlags, kReadBarrierOption>()));
+        ObjPtr<Class> as_klass = AsClass<kVerifyNone, kReadBarrierOption>();
+        as_klass->VisitReferences<kVisitNativeRoots, kVerifyFlags, kReadBarrierOption>(klass,
+                                                                                       visitor);
+      } else if (class_flags == kClassFlagObjectArray) {
+        DCHECK((klass->IsObjectArrayClass<kVerifyFlags, kReadBarrierOption>()));
+        AsObjectArray<mirror::Object, kVerifyNone, kReadBarrierOption>()->VisitReferences(visitor);
+      } else if ((class_flags & kClassFlagReference) != 0) {
+        VisitInstanceFieldsReferences<kVerifyFlags, kReadBarrierOption>(klass, visitor);
+        ref_visitor(klass, AsReference<kVerifyFlags, kReadBarrierOption>());
+      } else if (class_flags == kClassFlagDexCache) {
+        mirror::DexCache* const dex_cache = AsDexCache<kVerifyFlags, kReadBarrierOption>();
+        dex_cache->VisitReferences<kVisitNativeRoots,
+                                   kVerifyFlags,
+                                   kReadBarrierOption>(klass, visitor);
+      } else {
+        mirror::ClassLoader* const class_loader = AsClassLoader<kVerifyFlags, kReadBarrierOption>();
+        class_loader->VisitReferences<kVisitNativeRoots,
+                                      kVerifyFlags,
+                                      kReadBarrierOption>(klass, visitor);
+      }
+    } else if (kIsDebugBuild) {
+      CHECK((!klass->IsClassClass<kVerifyFlags, kReadBarrierOption>()));
+      CHECK((!klass->IsObjectArrayClass<kVerifyFlags, kReadBarrierOption>()));
+      // String still has instance fields for reflection purposes but these don't exist in
+      // actual string instances.
+      if (!klass->IsStringClass()) {
+        size_t total_reference_instance_fields = 0;
+        ObjPtr<Class> super_class = klass;
+        do {
+          total_reference_instance_fields += super_class->NumReferenceInstanceFields();
+          super_class = super_class->GetSuperClass<kVerifyFlags, kReadBarrierOption>();
+        } while (super_class != nullptr);
+        // The only reference field should be the object's class. This field is handled at the
+        // beginning of the function.
+        CHECK_EQ(total_reference_instance_fields, 1u);
+      }
+    }
+  }
+}
+
+}  // namespace mirror
+}  // namespace art
+
+#endif  // ART_RUNTIME_MIRROR_OBJECT_REFVISITOR_INL_H_
diff --git a/runtime/mirror/object.cc b/runtime/mirror/object.cc
index f5b9ab3..6e5fdb7 100644
--- a/runtime/mirror/object.cc
+++ b/runtime/mirror/object.cc
@@ -30,6 +30,7 @@
 #include "iftable-inl.h"
 #include "monitor.h"
 #include "object-inl.h"
+#include "object-refvisitor-inl.h"
 #include "object_array-inl.h"
 #include "runtime.h"
 #include "handle_scope-inl.h"
@@ -281,12 +282,16 @@
 }
 
 std::string Object::PrettyTypeOf() {
-  if (GetClass() == nullptr) {
+  // From-space version is the same as the to-space version since the dex file never changes.
+  // Avoiding the read barrier here is important to prevent recursive AssertToSpaceInvariant
+  // issues.
+  ObjPtr<mirror::Class> klass = GetClass<kDefaultVerifyFlags, kWithoutReadBarrier>();
+  if (klass == nullptr) {
     return "(raw)";
   }
   std::string temp;
-  std::string result(PrettyDescriptor(GetClass()->GetDescriptor(&temp)));
-  if (IsClass()) {
+  std::string result(PrettyDescriptor(klass->GetDescriptor(&temp)));
+  if (klass->IsClassClass()) {
     result += "<" + PrettyDescriptor(AsClass()->GetDescriptor(&temp)) + ">";
   }
   return result;
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index 417a22d..35a1b73 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -487,11 +487,18 @@
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, bool kIsVolatile = false>
   ALWAYS_INLINE int64_t GetField64(MemberOffset field_offset)
-      REQUIRES_SHARED(Locks::mutator_lock_);
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    if (kVerifyFlags & kVerifyThis) {
+      VerifyObject(this);
+    }
+    return GetField<int64_t, kIsVolatile>(field_offset);
+  }
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   ALWAYS_INLINE int64_t GetField64Volatile(MemberOffset field_offset)
-      REQUIRES_SHARED(Locks::mutator_lock_);
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    return GetField64<kVerifyFlags, true>(field_offset);
+  }
 
   template<bool kTransactionActive, bool kCheckTransaction = true,
       VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, bool kIsVolatile = false>
diff --git a/runtime/mirror/object_array-inl.h b/runtime/mirror/object_array-inl.h
index 3e04bf6..dbec40c 100644
--- a/runtime/mirror/object_array-inl.h
+++ b/runtime/mirror/object_array-inl.h
@@ -24,8 +24,9 @@
 #include "android-base/stringprintf.h"
 
 #include "array-inl.h"
+#include "class.h"
 #include "gc/heap.h"
-#include "mirror/class.h"
+#include "object-inl.h"
 #include "obj_ptr-inl.h"
 #include "runtime.h"
 #include "handle_scope-inl.h"
diff --git a/runtime/mirror/object_test.cc b/runtime/mirror/object_test.cc
index d306f9c..d7527d5 100644
--- a/runtime/mirror/object_test.cc
+++ b/runtime/mirror/object_test.cc
@@ -73,6 +73,13 @@
     }
     EXPECT_EQ(expected_hash, string->GetHashCode());
   }
+
+  template <class T>
+  mirror::ObjectArray<T>* AllocObjectArray(Thread* self, size_t length)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    return mirror::ObjectArray<T>::Alloc(
+        self, class_linker_->GetClassRoot(ClassLinker::ClassRoot::kObjectArrayClass), length);
+  }
 };
 
 // Keep constants in sync.
@@ -100,8 +107,7 @@
 TEST_F(ObjectTest, Clone) {
   ScopedObjectAccess soa(Thread::Current());
   StackHandleScope<2> hs(soa.Self());
-  Handle<ObjectArray<Object>> a1(
-      hs.NewHandle(class_linker_->AllocObjectArray<Object>(soa.Self(), 256)));
+  Handle<ObjectArray<Object>> a1(hs.NewHandle(AllocObjectArray<Object>(soa.Self(), 256)));
   size_t s1 = a1->SizeOf();
   Object* clone = a1->Clone(soa.Self());
   EXPECT_EQ(s1, clone->SizeOf());
@@ -111,8 +117,7 @@
 TEST_F(ObjectTest, AllocObjectArray) {
   ScopedObjectAccess soa(Thread::Current());
   StackHandleScope<2> hs(soa.Self());
-  Handle<ObjectArray<Object>> oa(
-      hs.NewHandle(class_linker_->AllocObjectArray<Object>(soa.Self(), 2)));
+  Handle<ObjectArray<Object>> oa(hs.NewHandle(AllocObjectArray<Object>(soa.Self(), 2)));
   EXPECT_EQ(2, oa->GetLength());
   EXPECT_TRUE(oa->Get(0) == nullptr);
   EXPECT_TRUE(oa->Get(1) == nullptr);
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index f3cb0df..e365b42 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -31,7 +31,6 @@
 #include "lock_word-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
-#include "mirror/object_array-inl.h"
 #include "scoped_thread_state_change-inl.h"
 #include "thread.h"
 #include "thread_list.h"
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index 0e61cf6..0617dae 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -254,10 +254,11 @@
   return CreateSingleDexFileCookie(env, std::move(dex_mem_map));
 }
 
+// TODO(calin): clean up the unused parameters (here and in libcore).
 static jobject DexFile_openDexFileNative(JNIEnv* env,
                                          jclass,
                                          jstring javaSourceName,
-                                         jstring javaOutputName,
+                                         jstring javaOutputName ATTRIBUTE_UNUSED,
                                          jint flags ATTRIBUTE_UNUSED,
                                          jobject class_loader,
                                          jobjectArray dex_elements) {
@@ -265,10 +266,7 @@
   if (sourceName.c_str() == nullptr) {
     return 0;
   }
-  NullableScopedUtfChars outputName(env, javaOutputName);
-  if (env->ExceptionCheck()) {
-    return 0;
-  }
+
   Runtime* const runtime = Runtime::Current();
   ClassLinker* linker = runtime->GetClassLinker();
   std::vector<std::unique_ptr<const DexFile>> dex_files;
@@ -276,7 +274,6 @@
   const OatFile* oat_file = nullptr;
 
   dex_files = runtime->GetOatFileManager().OpenDexFilesFromOat(sourceName.c_str(),
-                                                               outputName.c_str(),
                                                                class_loader,
                                                                dex_elements,
                                                                /*out*/ &oat_file,
diff --git a/runtime/native/dalvik_system_VMDebug.cc b/runtime/native/dalvik_system_VMDebug.cc
index f6a73a8..5c4e242 100644
--- a/runtime/native/dalvik_system_VMDebug.cc
+++ b/runtime/native/dalvik_system_VMDebug.cc
@@ -31,9 +31,12 @@
 #include "gc/space/large_object_space.h"
 #include "gc/space/space-inl.h"
 #include "gc/space/zygote_space.h"
+#include "handle_scope-inl.h"
 #include "hprof/hprof.h"
+#include "java_vm_ext.h"
 #include "jni_internal.h"
 #include "mirror/class.h"
+#include "mirror/object_array-inl.h"
 #include "ScopedLocalRef.h"
 #include "ScopedUtfChars.h"
 #include "scoped_fast_native_object_access-inl.h"
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index 11f8505..ff4d931 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -46,6 +46,7 @@
 #include "gc/space/image_space.h"
 #include "gc/task_processor.h"
 #include "intern_table.h"
+#include "java_vm_ext.h"
 #include "jni_internal.h"
 #include "mirror/class-inl.h"
 #include "mirror/dex_cache-inl.h"
@@ -252,7 +253,7 @@
 }
 
 static void VMRuntime_concurrentGC(JNIEnv* env, jobject) {
-  Runtime::Current()->GetHeap()->ConcurrentGC(ThreadForEnv(env), true);
+  Runtime::Current()->GetHeap()->ConcurrentGC(ThreadForEnv(env), gc::kGcCauseBackground, true);
 }
 
 static void VMRuntime_requestHeapTrim(JNIEnv* env, jobject) {
@@ -260,7 +261,9 @@
 }
 
 static void VMRuntime_requestConcurrentGC(JNIEnv* env, jobject) {
-  Runtime::Current()->GetHeap()->RequestConcurrentGC(ThreadForEnv(env), true);
+  Runtime::Current()->GetHeap()->RequestConcurrentGC(ThreadForEnv(env),
+                                                     gc::kGcCauseBackground,
+                                                     true);
 }
 
 static void VMRuntime_startHeapTaskProcessor(JNIEnv* env, jobject) {
diff --git a/runtime/native/dalvik_system_ZygoteHooks.cc b/runtime/native/dalvik_system_ZygoteHooks.cc
index 836ba81..2eaa8c7 100644
--- a/runtime/native/dalvik_system_ZygoteHooks.cc
+++ b/runtime/native/dalvik_system_ZygoteHooks.cc
@@ -21,6 +21,7 @@
 #include "android-base/stringprintf.h"
 
 #include "arch/instruction_set.h"
+#include "art_method-inl.h"
 #include "debugger.h"
 #include "java_vm_ext.h"
 #include "jit/jit.h"
diff --git a/runtime/native/java_lang_Class.cc b/runtime/native/java_lang_Class.cc
index 381dc7b..4f99947 100644
--- a/runtime/native/java_lang_Class.cc
+++ b/runtime/native/java_lang_Class.cc
@@ -19,8 +19,9 @@
 #include <iostream>
 
 #include "art_field-inl.h"
+#include "art_method-inl.h"
 #include "base/enums.h"
-#include "class_linker.h"
+#include "class_linker-inl.h"
 #include "common_throws.h"
 #include "dex_file-inl.h"
 #include "dex_file_annotations.h"
diff --git a/runtime/native/java_lang_VMClassLoader.cc b/runtime/native/java_lang_VMClassLoader.cc
index 54ab861..a9ba33e 100644
--- a/runtime/native/java_lang_VMClassLoader.cc
+++ b/runtime/native/java_lang_VMClassLoader.cc
@@ -23,6 +23,7 @@
 #include "obj_ptr.h"
 #include "scoped_fast_native_object_access-inl.h"
 #include "ScopedUtfChars.h"
+#include "well_known_classes.h"
 #include "zip_archive.h"
 
 namespace art {
diff --git a/runtime/native/java_lang_Void.cc b/runtime/native/java_lang_Void.cc
index 96bfd1b..e2b4b82 100644
--- a/runtime/native/java_lang_Void.cc
+++ b/runtime/native/java_lang_Void.cc
@@ -16,7 +16,7 @@
 
 #include "java_lang_Void.h"
 
-#include "class_linker.h"
+#include "class_linker-inl.h"
 #include "jni_internal.h"
 #include "runtime.h"
 #include "scoped_fast_native_object_access-inl.h"
diff --git a/runtime/native/java_lang_reflect_Field.cc b/runtime/native/java_lang_reflect_Field.cc
index 9198964..0fb3903 100644
--- a/runtime/native/java_lang_reflect_Field.cc
+++ b/runtime/native/java_lang_reflect_Field.cc
@@ -18,6 +18,7 @@
 
 #include "android-base/stringprintf.h"
 
+#include "art_field-inl.h"
 #include "class_linker.h"
 #include "class_linker-inl.h"
 #include "common_throws.h"
@@ -29,6 +30,7 @@
 #include "reflection-inl.h"
 #include "scoped_fast_native_object_access-inl.h"
 #include "utils.h"
+#include "well_known_classes.h"
 
 namespace art {
 
diff --git a/runtime/native/scoped_fast_native_object_access-inl.h b/runtime/native/scoped_fast_native_object_access-inl.h
index 50a554c..b2abc46 100644
--- a/runtime/native/scoped_fast_native_object_access-inl.h
+++ b/runtime/native/scoped_fast_native_object_access-inl.h
@@ -19,7 +19,7 @@
 
 #include "scoped_fast_native_object_access.h"
 
-#include "art_method-inl.h"
+#include "art_method.h"
 #include "scoped_thread_state_change-inl.h"
 
 namespace art {
diff --git a/runtime/oat.h b/runtime/oat.h
index 58ea91b..05706252 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,7 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  static constexpr uint8_t kOatVersion[] = { '1', '1', '8', '\0' };  // ARM64 Read barriers thunks.
+  static constexpr uint8_t kOatVersion[] = { '1', '1', '9', '\0' };  // Add thread_local_limit.
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index 493da27..4a85d479 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -34,7 +34,7 @@
 
 #include "android-base/stringprintf.h"
 
-#include "art_method-inl.h"
+#include "art_method.h"
 #include "base/bit_vector.h"
 #include "base/enums.h"
 #include "base/stl_util.h"
@@ -1497,11 +1497,18 @@
 
 static constexpr char kDexClassPathEncodingSeparator = '*';
 
-std::string OatFile::EncodeDexFileDependencies(const std::vector<const DexFile*>& dex_files) {
+std::string OatFile::EncodeDexFileDependencies(const std::vector<const DexFile*>& dex_files,
+                                               std::string& base_dir) {
   std::ostringstream out;
 
   for (const DexFile* dex_file : dex_files) {
-    out << dex_file->GetLocation().c_str();
+    const std::string& location = dex_file->GetLocation();
+    // Find paths that were relative and convert them back from absolute.
+    if (!base_dir.empty() && location.substr(0, base_dir.length()) == base_dir) {
+      out << location.substr(base_dir.length() + 1).c_str();
+    } else {
+      out << dex_file->GetLocation().c_str();
+    }
     out << kDexClassPathEncodingSeparator;
     out << dex_file->GetLocationChecksum();
     out << kDexClassPathEncodingSeparator;
diff --git a/runtime/oat_file.h b/runtime/oat_file.h
index d24283a..06c76b5 100644
--- a/runtime/oat_file.h
+++ b/runtime/oat_file.h
@@ -288,7 +288,9 @@
       const char* abs_dex_location, const std::string& rel_dex_location);
 
   // Create a dependency list (dex locations and checksums) for the given dex files.
-  static std::string EncodeDexFileDependencies(const std::vector<const DexFile*>& dex_files);
+  // Removes dex file paths prefixed with base_dir to convert them back to relative paths.
+  static std::string EncodeDexFileDependencies(const std::vector<const DexFile*>& dex_files,
+                                               std::string& base_dir);
 
   // Finds the associated oat class for a dex_file and descriptor. Returns an invalid OatClass on
   // error and sets found to false.
diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc
index db6f8ee..a7be73a 100644
--- a/runtime/oat_file_assistant.cc
+++ b/runtime/oat_file_assistant.cc
@@ -20,6 +20,7 @@
 
 #include <sys/stat.h>
 
+#include "android-base/stringprintf.h"
 #include "android-base/strings.h"
 
 #include "base/logging.h"
diff --git a/runtime/oat_file_assistant_test.cc b/runtime/oat_file_assistant_test.cc
index 9b35489..4a738ab 100644
--- a/runtime/oat_file_assistant_test.cc
+++ b/runtime/oat_file_assistant_test.cc
@@ -897,7 +897,6 @@
     const OatFile* oat_file = nullptr;
     dex_files = Runtime::Current()->GetOatFileManager().OpenDexFilesFromOat(
         dex_location_.c_str(),
-        oat_location_.c_str(),
         /*class_loader*/nullptr,
         /*dex_elements*/nullptr,
         &oat_file,
diff --git a/runtime/oat_file_manager.cc b/runtime/oat_file_manager.cc
index a950980..6799918 100644
--- a/runtime/oat_file_manager.cc
+++ b/runtime/oat_file_manager.cc
@@ -440,8 +440,12 @@
     return false;
   }
 
+  // Check that the loaded dex files have the same order and checksums as the shared libraries.
   for (size_t i = 0; i < dex_files.size(); ++i) {
-    if (dex_files[i]->GetLocation() != shared_libraries_split[i * 2]) {
+    std::string absolute_library_path =
+        OatFile::ResolveRelativeEncodedDexLocation(dex_files[i]->GetLocation().c_str(),
+                                                   shared_libraries_split[i * 2]);
+    if (dex_files[i]->GetLocation() != absolute_library_path) {
       return false;
     }
     char* end;
@@ -611,7 +615,6 @@
 
 std::vector<std::unique_ptr<const DexFile>> OatFileManager::OpenDexFilesFromOat(
     const char* dex_location,
-    const char* oat_location,
     jobject class_loader,
     jobjectArray dex_elements,
     const OatFile** out_oat_file,
@@ -626,8 +629,9 @@
   Locks::mutator_lock_->AssertNotHeld(self);
   Runtime* const runtime = Runtime::Current();
 
+  // TODO(calin): remove the explicit oat_location for OatFileAssistant
   OatFileAssistant oat_file_assistant(dex_location,
-                                      oat_location,
+                                      /*oat_location*/ nullptr,
                                       kRuntimeISA,
                                       !runtime->IsAotCompiler());
 
diff --git a/runtime/oat_file_manager.h b/runtime/oat_file_manager.h
index 45ac4b7..05a5f5b 100644
--- a/runtime/oat_file_manager.h
+++ b/runtime/oat_file_manager.h
@@ -96,7 +96,6 @@
   // files.
   std::vector<std::unique_ptr<const DexFile>> OpenDexFilesFromOat(
       const char* dex_location,
-      const char* oat_location,
       jobject class_loader,
       jobjectArray dex_elements,
       /*out*/ const OatFile** out_oat_file,
diff --git a/runtime/openjdkjvm/OpenjdkJvm.cc b/runtime/openjdkjvm/OpenjdkJvm.cc
index bdaad20..0b93b07 100644
--- a/runtime/openjdkjvm/OpenjdkJvm.cc
+++ b/runtime/openjdkjvm/OpenjdkJvm.cc
@@ -35,28 +35,31 @@
 #include<stdio.h>
 #include <dlfcn.h>
 #include <limits.h>
-#include <unistd.h>
-
-#include "common_throws.h"
-#include "gc/heap.h"
-#include "thread.h"
-#include "thread_list.h"
-#include "runtime.h"
-#include "handle_scope-inl.h"
-#include "scoped_thread_state_change-inl.h"
-#include "ScopedUtfChars.h"
-#include "mirror/class_loader.h"
-#include "verify_object.h"
-#include "base/logging.h"
-#include "base/macros.h"
-#include "../../libcore/ojluni/src/main/native/jvm.h"  // TODO(narayan): fix it
-#include "jni_internal.h"
-#include "mirror/string-inl.h"
-#include "native/scoped_fast_native_object_access-inl.h"
-#include "ScopedLocalRef.h"
 #include <sys/time.h>
 #include <sys/socket.h>
 #include <sys/ioctl.h>
+#include <unistd.h>
+
+#include "../../libcore/ojluni/src/main/native/jvm.h"  // TODO(narayan): fix it
+
+#include "base/logging.h"
+#include "base/macros.h"
+#include "common_throws.h"
+#include "gc/heap.h"
+#include "handle_scope-inl.h"
+#include "java_vm_ext.h"
+#include "jni_internal.h"
+#include "mirror/class_loader.h"
+#include "mirror/string-inl.h"
+#include "monitor.h"
+#include "native/scoped_fast_native_object_access-inl.h"
+#include "runtime.h"
+#include "thread.h"
+#include "thread_list.h"
+#include "scoped_thread_state_change-inl.h"
+#include "ScopedLocalRef.h"
+#include "ScopedUtfChars.h"
+#include "verify_object.h"
 
 #undef LOG_TAG
 #define LOG_TAG "artopenjdk"
diff --git a/runtime/openjdkjvmti/OpenjdkJvmTi.cc b/runtime/openjdkjvmti/OpenjdkJvmTi.cc
index 39e603e..c3a94b9 100644
--- a/runtime/openjdkjvmti/OpenjdkJvmTi.cc
+++ b/runtime/openjdkjvmti/OpenjdkJvmTi.cc
@@ -1556,6 +1556,7 @@
   ThreadUtil::Register(&gEventHandler);
   ClassUtil::Register(&gEventHandler);
   DumpUtil::Register(&gEventHandler);
+  MethodUtil::Register(&gEventHandler);
   SearchUtil::Register();
   HeapUtil::Register();
 
@@ -1569,6 +1570,7 @@
   ThreadUtil::Unregister();
   ClassUtil::Unregister();
   DumpUtil::Unregister();
+  MethodUtil::Unregister();
   SearchUtil::Unregister();
   HeapUtil::Unregister();
 
diff --git a/runtime/openjdkjvmti/art_jvmti.h b/runtime/openjdkjvmti/art_jvmti.h
index 2ff3a47..2a2aa4c 100644
--- a/runtime/openjdkjvmti/art_jvmti.h
+++ b/runtime/openjdkjvmti/art_jvmti.h
@@ -223,7 +223,7 @@
     .can_generate_compiled_method_load_events        = 0,
     .can_generate_monitor_events                     = 0,
     .can_generate_vm_object_alloc_events             = 1,
-    .can_generate_native_method_bind_events          = 0,
+    .can_generate_native_method_bind_events          = 1,
     .can_generate_garbage_collection_events          = 1,
     .can_generate_object_free_events                 = 1,
     .can_force_early_return                          = 0,
diff --git a/runtime/openjdkjvmti/events-inl.h b/runtime/openjdkjvmti/events-inl.h
index 233b45c..57abf31 100644
--- a/runtime/openjdkjvmti/events-inl.h
+++ b/runtime/openjdkjvmti/events-inl.h
@@ -191,6 +191,27 @@
   }
 }
 
+// Need to give a custom specialization for NativeMethodBind since it has to deal with an out
+// variable.
+template <>
+inline void EventHandler::DispatchEvent<ArtJvmtiEvent::kNativeMethodBind>(art::Thread* thread,
+                                                                          JNIEnv* jnienv,
+                                                                          jthread jni_thread,
+                                                                          jmethodID method,
+                                                                          void* cur_method,
+                                                                          void** new_method) const {
+  *new_method = cur_method;
+  for (ArtJvmTiEnv* env : envs) {
+    if (env != nullptr && ShouldDispatch<ArtJvmtiEvent::kNativeMethodBind>(env, thread)) {
+      auto callback = impl::GetCallback<ArtJvmtiEvent::kNativeMethodBind>(env);
+      (*callback)(env, jnienv, jni_thread, method, cur_method, new_method);
+      if (*new_method != nullptr) {
+        cur_method = *new_method;
+      }
+    }
+  }
+}
+
 // C++ does not allow partial template function specialization. The dispatch for our separated
 // ClassFileLoadHook event types is the same, and in the DispatchClassFileLoadHookEvent helper.
 // The following two DispatchEvent specializations dispatch to it.
diff --git a/runtime/openjdkjvmti/events.cc b/runtime/openjdkjvmti/events.cc
index 521494a..0ec92b7 100644
--- a/runtime/openjdkjvmti/events.cc
+++ b/runtime/openjdkjvmti/events.cc
@@ -36,10 +36,11 @@
 #include "gc/allocation_listener.h"
 #include "gc/gc_pause_listener.h"
 #include "gc/heap.h"
+#include "handle_scope-inl.h"
 #include "instrumentation.h"
 #include "jni_env_ext-inl.h"
 #include "mirror/class.h"
-#include "mirror/object.h"
+#include "mirror/object-inl.h"
 #include "runtime.h"
 #include "ScopedLocalRef.h"
 #include "scoped_thread_state_change-inl.h"
diff --git a/runtime/openjdkjvmti/ti_class.cc b/runtime/openjdkjvmti/ti_class.cc
index e94c4e6..e0af6e8 100644
--- a/runtime/openjdkjvmti/ti_class.cc
+++ b/runtime/openjdkjvmti/ti_class.cc
@@ -55,6 +55,7 @@
 #include "mirror/object_array-inl.h"
 #include "mirror/object_reference.h"
 #include "mirror/object-inl.h"
+#include "mirror/object-refvisitor-inl.h"
 #include "mirror/reference.h"
 #include "primitive.h"
 #include "reflection.h"
@@ -68,6 +69,7 @@
 #include "ti_phase.h"
 #include "ti_redefine.h"
 #include "utils.h"
+#include "well_known_classes.h"
 
 namespace openjdkjvmti {
 
diff --git a/runtime/openjdkjvmti/ti_class_definition.cc b/runtime/openjdkjvmti/ti_class_definition.cc
index 0671105..180895b 100644
--- a/runtime/openjdkjvmti/ti_class_definition.cc
+++ b/runtime/openjdkjvmti/ti_class_definition.cc
@@ -32,10 +32,12 @@
 #include "ti_class_definition.h"
 
 #include "base/array_slice.h"
+#include "class_linker-inl.h"
 #include "dex_file.h"
 #include "fixed_up_dex_file.h"
 #include "handle_scope-inl.h"
 #include "handle.h"
+#include "mirror/class_ext.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
 #include "reflection.h"
diff --git a/runtime/openjdkjvmti/ti_class_loader.cc b/runtime/openjdkjvmti/ti_class_loader.cc
index 66357eb..5544dde 100644
--- a/runtime/openjdkjvmti/ti_class_loader.cc
+++ b/runtime/openjdkjvmti/ti_class_loader.cc
@@ -35,6 +35,7 @@
 
 #include "android-base/stringprintf.h"
 
+#include "art_field-inl.h"
 #include "art_jvmti.h"
 #include "base/array_slice.h"
 #include "base/logging.h"
diff --git a/runtime/openjdkjvmti/ti_method.cc b/runtime/openjdkjvmti/ti_method.cc
index 01bf21d..f7e5347 100644
--- a/runtime/openjdkjvmti/ti_method.cc
+++ b/runtime/openjdkjvmti/ti_method.cc
@@ -35,14 +35,62 @@
 #include "art_method-inl.h"
 #include "base/enums.h"
 #include "dex_file_annotations.h"
+#include "events-inl.h"
 #include "jni_internal.h"
 #include "mirror/object_array-inl.h"
 #include "modifiers.h"
+#include "runtime_callbacks.h"
 #include "scoped_thread_state_change-inl.h"
+#include "ScopedLocalRef.h"
 #include "thread-inl.h"
+#include "thread_list.h"
+#include "ti_phase.h"
 
 namespace openjdkjvmti {
 
+struct TiMethodCallback : public art::MethodCallback {
+  void RegisterNativeMethod(art::ArtMethod* method,
+                            const void* cur_method,
+                            /*out*/void** new_method)
+      OVERRIDE REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    if (event_handler->IsEventEnabledAnywhere(ArtJvmtiEvent::kNativeMethodBind)) {
+      art::Thread* thread = art::Thread::Current();
+      art::JNIEnvExt* jnienv = thread->GetJniEnv();
+      ScopedLocalRef<jthread> thread_jni(
+          jnienv, PhaseUtil::IsLivePhase() ? jnienv->AddLocalReference<jthread>(thread->GetPeer())
+                                           : nullptr);
+      art::ScopedThreadSuspension sts(thread, art::ThreadState::kNative);
+      event_handler->DispatchEvent<ArtJvmtiEvent::kNativeMethodBind>(
+          thread,
+          static_cast<JNIEnv*>(jnienv),
+          thread_jni.get(),
+          art::jni::EncodeArtMethod(method),
+          const_cast<void*>(cur_method),
+          new_method);
+    }
+  }
+
+  EventHandler* event_handler = nullptr;
+};
+
+TiMethodCallback gMethodCallback;
+
+void MethodUtil::Register(EventHandler* handler) {
+  gMethodCallback.event_handler = handler;
+  art::ScopedThreadStateChange stsc(art::Thread::Current(),
+                                    art::ThreadState::kWaitingForDebuggerToAttach);
+  art::ScopedSuspendAll ssa("Add method callback");
+  art::Runtime::Current()->GetRuntimeCallbacks()->AddMethodCallback(&gMethodCallback);
+}
+
+void MethodUtil::Unregister() {
+  art::ScopedThreadStateChange stsc(art::Thread::Current(),
+                                    art::ThreadState::kWaitingForDebuggerToAttach);
+  art::ScopedSuspendAll ssa("Remove method callback");
+  art::Runtime* runtime = art::Runtime::Current();
+  runtime->GetRuntimeCallbacks()->RemoveMethodCallback(&gMethodCallback);
+}
+
 jvmtiError MethodUtil::GetArgumentsSize(jvmtiEnv* env ATTRIBUTE_UNUSED,
                                         jmethodID method,
                                         jint* size_ptr) {
diff --git a/runtime/openjdkjvmti/ti_method.h b/runtime/openjdkjvmti/ti_method.h
index e5c1705..cc161c8 100644
--- a/runtime/openjdkjvmti/ti_method.h
+++ b/runtime/openjdkjvmti/ti_method.h
@@ -37,8 +37,13 @@
 
 namespace openjdkjvmti {
 
+class EventHandler;
+
 class MethodUtil {
  public:
+  static void Register(EventHandler* event_handler);
+  static void Unregister();
+
   static jvmtiError GetArgumentsSize(jvmtiEnv* env, jmethodID method, jint* size_ptr);
 
   static jvmtiError GetMaxLocals(jvmtiEnv* env, jmethodID method, jint* max_ptr);
diff --git a/runtime/openjdkjvmti/ti_redefine.cc b/runtime/openjdkjvmti/ti_redefine.cc
index 0655079..41ed862 100644
--- a/runtime/openjdkjvmti/ti_redefine.cc
+++ b/runtime/openjdkjvmti/ti_redefine.cc
@@ -35,9 +35,12 @@
 
 #include "android-base/stringprintf.h"
 
+#include "art_field-inl.h"
+#include "art_method-inl.h"
 #include "art_jvmti.h"
 #include "base/array_slice.h"
 #include "base/logging.h"
+#include "class_linker-inl.h"
 #include "debugger.h"
 #include "dex_file.h"
 #include "dex_file_types.h"
@@ -1418,14 +1421,18 @@
   art::mirror::Class* klass = GetMirrorClass();
   art::mirror::ClassExt* ext = klass->GetExtData();
   art::mirror::PointerArray* methods = ext->GetObsoleteMethods();
-  int32_t old_length =
-      cur_data->GetOldDexCaches() == nullptr ? 0 : cur_data->GetOldDexCaches()->GetLength();
+  art::mirror::PointerArray* old_methods = cur_data->GetOldObsoleteMethods();
+  int32_t old_length = old_methods == nullptr ? 0 : old_methods->GetLength();
   int32_t expected_length =
       old_length + klass->NumDirectMethods() + klass->NumDeclaredVirtualMethods();
   // Check to make sure we are only undoing this one.
   if (expected_length == methods->GetLength()) {
-    for (int32_t i = old_length; i < expected_length; i++) {
-      if (methods->GetElementPtrSize<art::ArtMethod*>(i, art::kRuntimePointerSize) != nullptr) {
+    for (int32_t i = 0; i < expected_length; i++) {
+      art::ArtMethod* expected = nullptr;
+      if (i < old_length) {
+        expected = old_methods->GetElementPtrSize<art::ArtMethod*>(i, art::kRuntimePointerSize);
+      }
+      if (methods->GetElementPtrSize<art::ArtMethod*>(i, art::kRuntimePointerSize) != expected) {
         // We actually have some new obsolete methods. Just abort since we cannot safely shrink the
         // obsolete methods array.
         return;
diff --git a/runtime/openjdkjvmti/ti_search.cc b/runtime/openjdkjvmti/ti_search.cc
index f51a98f..ec139f2 100644
--- a/runtime/openjdkjvmti/ti_search.cc
+++ b/runtime/openjdkjvmti/ti_search.cc
@@ -33,6 +33,7 @@
 
 #include "jni.h"
 
+#include "art_field-inl.h"
 #include "art_jvmti.h"
 #include "base/enums.h"
 #include "base/macros.h"
@@ -50,6 +51,7 @@
 #include "ti_phase.h"
 #include "thread-inl.h"
 #include "thread_list.h"
+#include "well_known_classes.h"
 
 namespace openjdkjvmti {
 
diff --git a/runtime/openjdkjvmti/ti_stack.cc b/runtime/openjdkjvmti/ti_stack.cc
index 067c7c1..1ddf04f 100644
--- a/runtime/openjdkjvmti/ti_stack.cc
+++ b/runtime/openjdkjvmti/ti_stack.cc
@@ -36,8 +36,9 @@
 #include <unordered_map>
 #include <vector>
 
-#include "art_jvmti.h"
+#include "art_field-inl.h"
 #include "art_method-inl.h"
+#include "art_jvmti.h"
 #include "base/bit_utils.h"
 #include "base/enums.h"
 #include "base/mutex.h"
@@ -54,6 +55,7 @@
 #include "thread-inl.h"
 #include "thread_list.h"
 #include "thread_pool.h"
+#include "well_known_classes.h"
 
 namespace openjdkjvmti {
 
diff --git a/runtime/openjdkjvmti/ti_thread.cc b/runtime/openjdkjvmti/ti_thread.cc
index e5ff090..3dfa633 100644
--- a/runtime/openjdkjvmti/ti_thread.cc
+++ b/runtime/openjdkjvmti/ti_thread.cc
@@ -32,7 +32,7 @@
 #include "ti_thread.h"
 
 #include "android-base/strings.h"
-#include "art_field.h"
+#include "art_field-inl.h"
 #include "art_jvmti.h"
 #include "base/logging.h"
 #include "base/mutex.h"
diff --git a/runtime/openjdkjvmti/ti_threadgroup.cc b/runtime/openjdkjvmti/ti_threadgroup.cc
index df14333..dd7be11 100644
--- a/runtime/openjdkjvmti/ti_threadgroup.cc
+++ b/runtime/openjdkjvmti/ti_threadgroup.cc
@@ -31,7 +31,7 @@
 
 #include "ti_threadgroup.h"
 
-#include "art_field.h"
+#include "art_field-inl.h"
 #include "art_jvmti.h"
 #include "base/logging.h"
 #include "base/macros.h"
diff --git a/runtime/proxy_test.cc b/runtime/proxy_test.cc
index 5748475..4e95b01 100644
--- a/runtime/proxy_test.cc
+++ b/runtime/proxy_test.cc
@@ -21,6 +21,7 @@
 #include "base/enums.h"
 #include "class_linker-inl.h"
 #include "common_compiler_test.h"
+#include "mirror/class-inl.h"
 #include "mirror/field-inl.h"
 #include "mirror/method.h"
 #include "scoped_thread_state_change-inl.h"
diff --git a/runtime/read_barrier-inl.h b/runtime/read_barrier-inl.h
index c102fb0..d3859b0 100644
--- a/runtime/read_barrier-inl.h
+++ b/runtime/read_barrier-inl.h
@@ -22,6 +22,7 @@
 #include "gc/collector/concurrent_copying-inl.h"
 #include "gc/heap.h"
 #include "mirror/object_reference.h"
+#include "mirror/object-readbarrier-inl.h"
 #include "mirror/reference.h"
 #include "runtime.h"
 #include "utils.h"
diff --git a/runtime/reference_table_test.cc b/runtime/reference_table_test.cc
index 4ccfb6d..e809ecf 100644
--- a/runtime/reference_table_test.cc
+++ b/runtime/reference_table_test.cc
@@ -18,6 +18,7 @@
 
 #include "android-base/stringprintf.h"
 
+#include "art_method-inl.h"
 #include "class_linker.h"
 #include "common_runtime_test.h"
 #include "handle_scope-inl.h"
diff --git a/runtime/reflection.cc b/runtime/reflection.cc
index 87bc7df..e16ef1d 100644
--- a/runtime/reflection.cc
+++ b/runtime/reflection.cc
@@ -23,6 +23,7 @@
 #include "common_throws.h"
 #include "dex_file-inl.h"
 #include "indirect_reference_table-inl.h"
+#include "java_vm_ext.h"
 #include "jni_internal.h"
 #include "mirror/class-inl.h"
 #include "mirror/executable.h"
diff --git a/runtime/reflection_test.cc b/runtime/reflection_test.cc
index 2f70ded..1ba4b7b 100644
--- a/runtime/reflection_test.cc
+++ b/runtime/reflection_test.cc
@@ -23,6 +23,7 @@
 #include "art_method-inl.h"
 #include "base/enums.h"
 #include "common_compiler_test.h"
+#include "java_vm_ext.h"
 #include "jni_internal.h"
 #include "scoped_thread_state_change-inl.h"
 
diff --git a/runtime/runtime-inl.h b/runtime/runtime-inl.h
index 8346550..75c25dd 100644
--- a/runtime/runtime-inl.h
+++ b/runtime/runtime-inl.h
@@ -21,6 +21,7 @@
 
 #include "art_method.h"
 #include "class_linker.h"
+#include "gc_root-inl.h"
 #include "obj_ptr-inl.h"
 #include "read_barrier-inl.h"
 
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index e563027..93b416c 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -83,6 +83,7 @@
 #include "instrumentation.h"
 #include "intern_table.h"
 #include "interpreter/interpreter.h"
+#include "java_vm_ext.h"
 #include "jit/jit.h"
 #include "jit/jit_code_cache.h"
 #include "jni_internal.h"
@@ -1722,6 +1723,7 @@
   mirror::MethodHandlesLookup::VisitRoots(visitor);
   mirror::EmulatedStackFrame::VisitRoots(visitor);
   mirror::ClassExt::VisitRoots(visitor);
+  mirror::CallSite::VisitRoots(visitor);
   // Visit all the primitive array types classes.
   mirror::PrimitiveArray<uint8_t>::VisitRoots(visitor);   // BooleanArray
   mirror::PrimitiveArray<int8_t>::VisitRoots(visitor);    // ByteArray
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 20db628..b91cb0c 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -657,6 +657,10 @@
 
   void AttachAgent(const std::string& agent_arg);
 
+  const std::list<ti::Agent>& GetAgents() const {
+    return agents_;
+  }
+
   RuntimeCallbacks* GetRuntimeCallbacks();
 
   void InitThreadGroups(Thread* self);
diff --git a/runtime/runtime_callbacks.cc b/runtime/runtime_callbacks.cc
index 25324b5..16d6c13 100644
--- a/runtime/runtime_callbacks.cc
+++ b/runtime/runtime_callbacks.cc
@@ -18,6 +18,7 @@
 
 #include <algorithm>
 
+#include "art_method.h"
 #include "base/macros.h"
 #include "class_linker.h"
 #include "thread.h"
@@ -131,4 +132,25 @@
   }
 }
 
+void RuntimeCallbacks::AddMethodCallback(MethodCallback* cb) {
+  method_callbacks_.push_back(cb);
+}
+
+void RuntimeCallbacks::RemoveMethodCallback(MethodCallback* cb) {
+  Remove(cb, &method_callbacks_);
+}
+
+void RuntimeCallbacks::RegisterNativeMethod(ArtMethod* method,
+                                            const void* in_cur_method,
+                                            /*out*/void** new_method) {
+  void* cur_method = const_cast<void*>(in_cur_method);
+  *new_method = cur_method;
+  for (MethodCallback* cb : method_callbacks_) {
+    cb->RegisterNativeMethod(method, cur_method, new_method);
+    if (*new_method != nullptr) {
+      cur_method = *new_method;
+    }
+  }
+}
+
 }  // namespace art
diff --git a/runtime/runtime_callbacks.h b/runtime/runtime_callbacks.h
index d321254..e8f1824 100644
--- a/runtime/runtime_callbacks.h
+++ b/runtime/runtime_callbacks.h
@@ -31,8 +31,10 @@
 class ClassLoader;
 }  // namespace mirror
 
+class ArtMethod;
 class ClassLoadCallback;
 class Thread;
+class MethodCallback;
 class ThreadLifecycleCallback;
 
 // Note: RuntimeCallbacks uses the mutator lock to synchronize the callback lists. A thread must
@@ -110,6 +112,14 @@
                       /*out*/DexFile::ClassDef const** final_class_def)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
+  void AddMethodCallback(MethodCallback* cb) REQUIRES(Locks::mutator_lock_);
+  void RemoveMethodCallback(MethodCallback* cb) REQUIRES(Locks::mutator_lock_);
+
+  void RegisterNativeMethod(ArtMethod* method,
+                            const void* original_implementation,
+                            /*out*/void** new_implementation)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
  private:
   std::vector<ThreadLifecycleCallback*> thread_callbacks_
       GUARDED_BY(Locks::mutator_lock_);
@@ -118,7 +128,9 @@
   std::vector<RuntimeSigQuitCallback*> sigquit_callbacks_
       GUARDED_BY(Locks::mutator_lock_);
   std::vector<RuntimePhaseCallback*> phase_callbacks_
-        GUARDED_BY(Locks::mutator_lock_);
+      GUARDED_BY(Locks::mutator_lock_);
+  std::vector<MethodCallback*> method_callbacks_
+      GUARDED_BY(Locks::mutator_lock_);
 };
 
 }  // namespace art
diff --git a/runtime/scoped_thread_state_change-inl.h b/runtime/scoped_thread_state_change-inl.h
index c817a9e..ed6e349 100644
--- a/runtime/scoped_thread_state_change-inl.h
+++ b/runtime/scoped_thread_state_change-inl.h
@@ -19,6 +19,7 @@
 
 #include "scoped_thread_state_change.h"
 
+#include "base/casts.h"
 #include "jni_env_ext-inl.h"
 #include "obj_ptr-inl.h"
 #include "thread-inl.h"
@@ -74,8 +75,10 @@
 template<typename T>
 inline T ScopedObjectAccessAlreadyRunnable::AddLocalReference(ObjPtr<mirror::Object> obj) const {
   Locks::mutator_lock_->AssertSharedHeld(Self());
-  DCHECK(IsRunnable());  // Don't work with raw objects in non-runnable states.
-  DCHECK_NE(obj, Runtime::Current()->GetClearedJniWeakGlobal());
+  if (kIsDebugBuild) {
+    CHECK(IsRunnable());  // Don't work with raw objects in non-runnable states.
+    DCheckObjIsNotClearedJniWeakGlobal(obj);
+  }
   return obj == nullptr ? nullptr : Env()->AddLocalReference<T>(obj);
 }
 
diff --git a/runtime/scoped_thread_state_change.cc b/runtime/scoped_thread_state_change.cc
new file mode 100644
index 0000000..94354fc
--- /dev/null
+++ b/runtime/scoped_thread_state_change.cc
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "scoped_thread_state_change.h"
+
+#include <type_traits>
+
+#include "base/casts.h"
+#include "base/logging.h"
+#include "java_vm_ext.h"
+#include "obj_ptr-inl.h"
+#include "runtime-inl.h"
+
+namespace art {
+
+// See ScopedObjectAccessAlreadyRunnable::ScopedObjectAccessAlreadyRunnable(JavaVM*).
+static_assert(std::is_base_of<JavaVM, JavaVMExt>::value, "JavaVMExt does not extend JavaVM");
+
+void ScopedObjectAccessAlreadyRunnable::DCheckObjIsNotClearedJniWeakGlobal(
+    ObjPtr<mirror::Object> obj) {
+  DCHECK_NE(obj, Runtime::Current()->GetClearedJniWeakGlobal());
+}
+
+bool ScopedObjectAccessAlreadyRunnable::ForceCopy() const {
+  return vm_->ForceCopy();
+}
+
+}  // namespace art
diff --git a/runtime/scoped_thread_state_change.h b/runtime/scoped_thread_state_change.h
index 5f03741..02b6124 100644
--- a/runtime/scoped_thread_state_change.h
+++ b/runtime/scoped_thread_state_change.h
@@ -17,17 +17,23 @@
 #ifndef ART_RUNTIME_SCOPED_THREAD_STATE_CHANGE_H_
 #define ART_RUNTIME_SCOPED_THREAD_STATE_CHANGE_H_
 
-#include "art_field.h"
-#include "base/casts.h"
+#include "jni.h"
+
+#include "base/macros.h"
+#include "base/mutex.h"
 #include "base/value_object.h"
-#include "java_vm_ext.h"
 #include "thread_state.h"
-#include "verify_object.h"
 
 namespace art {
 
+class JavaVMExt;
 struct JNIEnvExt;
 template<class MirrorType> class ObjPtr;
+class Thread;
+
+namespace mirror {
+class Object;
+}  // namespace mirror
 
 // Scoped change into and out of a particular state. Handles Runnable transitions that require
 // more complicated suspension checking. The subclasses ScopedObjectAccessUnchecked and
@@ -74,9 +80,7 @@
     return vm_;
   }
 
-  bool ForceCopy() const {
-    return vm_->ForceCopy();
-  }
+  bool ForceCopy() const;
 
   /*
    * Add a local reference for an object to the indirect reference table associated with the
@@ -105,12 +109,17 @@
 
   // Used when we want a scoped JNI thread state but have no thread/JNIEnv. Consequently doesn't
   // change into Runnable or acquire a share on the mutator_lock_.
+  // Note: The reinterpret_cast is backed by a static_assert in the cc file. Avoid a down_cast,
+  //       as it prevents forward declaration of JavaVMExt.
   explicit ScopedObjectAccessAlreadyRunnable(JavaVM* vm)
-      : self_(nullptr), env_(nullptr), vm_(down_cast<JavaVMExt*>(vm)) {}
+      : self_(nullptr), env_(nullptr), vm_(reinterpret_cast<JavaVMExt*>(vm)) {}
 
   // Here purely to force inlining.
   ALWAYS_INLINE ~ScopedObjectAccessAlreadyRunnable() {}
 
+  static void DCheckObjIsNotClearedJniWeakGlobal(ObjPtr<mirror::Object> obj)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
   // Self thread, can be null.
   Thread* const self_;
   // The full JNIEnv.
diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h
index 02a1e4d..aa769fa 100644
--- a/runtime/thread-inl.h
+++ b/runtime/thread-inl.h
@@ -303,10 +303,6 @@
   }
 }
 
-inline size_t Thread::TlabSize() const {
-  return tlsPtr_.thread_local_end - tlsPtr_.thread_local_pos;
-}
-
 inline mirror::Object* Thread::AllocTlab(size_t bytes) {
   DCHECK_GE(TlabSize(), bytes);
   ++tlsPtr_.thread_local_objects;
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 008c388..201701a 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -56,6 +56,7 @@
 #include "gc/space/space-inl.h"
 #include "handle_scope-inl.h"
 #include "indirect_reference_table-inl.h"
+#include "java_vm_ext.h"
 #include "jni_internal.h"
 #include "mirror/class_loader.h"
 #include "mirror/class-inl.h"
@@ -1451,7 +1452,8 @@
       MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
 
       DCHECK_NE(GetState(), ThreadState::kRunnable);
-      CHECK(ModifySuspendCount(self, -1, nullptr, false));
+      bool updated = ModifySuspendCount(self, -1, nullptr, false);
+      DCHECK(updated);
     }
 
     return;  // We're done, break out of the loop.
@@ -1928,6 +1930,23 @@
   Thread::Current()->AssertNoPendingException();
 
   Runtime::Current()->GetClassLinker()->RunRootClinits();
+
+  // The thread counts as started from now on. We need to add it to the ThreadGroup. For regular
+  // threads, this is done in Thread.start() on the Java side.
+  {
+    // This is only ever done once. There's no benefit in caching the method.
+    jmethodID thread_group_add = soa.Env()->GetMethodID(WellKnownClasses::java_lang_ThreadGroup,
+                                                        "add",
+                                                        "(Ljava/lang/Thread;)V");
+    CHECK(thread_group_add != nullptr);
+    ScopedLocalRef<jobject> thread_jobject(
+        soa.Env(), soa.Env()->AddLocalReference<jobject>(Thread::Current()->GetPeer()));
+    soa.Env()->CallNonvirtualVoidMethod(runtime->GetMainThreadGroup(),
+                                        WellKnownClasses::java_lang_ThreadGroup,
+                                        thread_group_add,
+                                        thread_jobject.get());
+    Thread::Current()->AssertNoPendingException();
+  }
 }
 
 void Thread::Shutdown() {
@@ -3450,11 +3469,13 @@
   }
 }
 
-void Thread::SetTlab(uint8_t* start, uint8_t* end) {
+void Thread::SetTlab(uint8_t* start, uint8_t* end, uint8_t* limit) {
   DCHECK_LE(start, end);
+  DCHECK_LE(end, limit);
   tlsPtr_.thread_local_start = start;
   tlsPtr_.thread_local_pos  = tlsPtr_.thread_local_start;
   tlsPtr_.thread_local_end = end;
+  tlsPtr_.thread_local_limit = limit;
   tlsPtr_.thread_local_objects = 0;
 }
 
diff --git a/runtime/thread.h b/runtime/thread.h
index de0b892..5251012 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -244,6 +244,7 @@
                           int delta,
                           AtomicInteger* suspend_barrier,
                           bool for_debugger)
+      WARN_UNUSED
       REQUIRES(Locks::thread_suspend_count_lock_);
 
   bool RequestCheckpoint(Closure* function)
@@ -1034,10 +1035,24 @@
   void ResetQuickAllocEntryPointsForThread(bool is_marking);
 
   // Returns the remaining space in the TLAB.
-  size_t TlabSize() const;
+  size_t TlabSize() const {
+    return tlsPtr_.thread_local_end - tlsPtr_.thread_local_pos;
+  }
+
+  // Returns the remaining space in the TLAB if we were to expand it to maximum capacity.
+  size_t TlabRemainingCapacity() const {
+    return tlsPtr_.thread_local_limit - tlsPtr_.thread_local_pos;
+  }
+
+  // Expand the TLAB by a fixed number of bytes. There must be enough capacity to do so.
+  void ExpandTlab(size_t bytes) {
+    tlsPtr_.thread_local_end += bytes;
+    DCHECK_LE(tlsPtr_.thread_local_end, tlsPtr_.thread_local_limit);
+  }
+
   // Doesn't check that there is room.
   mirror::Object* AllocTlab(size_t bytes);
-  void SetTlab(uint8_t* start, uint8_t* end);
+  void SetTlab(uint8_t* start, uint8_t* end, uint8_t* limit);
   bool HasTlab() const;
   uint8_t* GetTlabStart() {
     return tlsPtr_.thread_local_start;
@@ -1276,6 +1291,7 @@
                                   int delta,
                                   AtomicInteger* suspend_barrier,
                                   bool for_debugger)
+      WARN_UNUSED
       REQUIRES(Locks::thread_suspend_count_lock_);
 
   void RunCheckpointFunction();
@@ -1449,6 +1465,7 @@
       frame_id_to_shadow_frame(nullptr), name(nullptr), pthread_self(0),
       last_no_thread_suspension_cause(nullptr), checkpoint_function(nullptr),
       thread_local_start(nullptr), thread_local_pos(nullptr), thread_local_end(nullptr),
+      thread_local_limit(nullptr),
       thread_local_objects(0), mterp_current_ibase(nullptr), mterp_default_ibase(nullptr),
       mterp_alt_ibase(nullptr), thread_local_alloc_stack_top(nullptr),
       thread_local_alloc_stack_end(nullptr),
@@ -1575,6 +1592,10 @@
     uint8_t* thread_local_pos;
     uint8_t* thread_local_end;
 
+    // Thread local limit is how much we can expand the thread local buffer to, it is greater or
+    // equal to thread_local_end.
+    uint8_t* thread_local_limit;
+
     size_t thread_local_objects;
 
     // Entrypoint function pointers.
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 8d72fe8..2e0d866 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -323,7 +323,8 @@
               // Spurious fail, try again.
               continue;
             }
-            thread->ModifySuspendCount(self, +1, nullptr, false);
+            bool updated = thread->ModifySuspendCount(self, +1, nullptr, false);
+            DCHECK(updated);
             suspended_count_modified_threads.push_back(thread);
             break;
           }
@@ -365,7 +366,8 @@
     checkpoint_function->Run(thread);
     {
       MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
-      thread->ModifySuspendCount(self, -1, nullptr, false);
+      bool updated = thread->ModifySuspendCount(self, -1, nullptr, false);
+      DCHECK(updated);
     }
   }
 
@@ -565,7 +567,8 @@
       if ((state == kWaitingForGcThreadFlip || thread->IsTransitioningToRunnable()) &&
           thread->GetSuspendCount() == 1) {
         // The thread will resume right after the broadcast.
-        thread->ModifySuspendCount(self, -1, nullptr, false);
+        bool updated = thread->ModifySuspendCount(self, -1, nullptr, false);
+        DCHECK(updated);
         ++runnable_thread_count;
       } else {
         other_threads.push_back(thread);
@@ -598,7 +601,8 @@
     TimingLogger::ScopedTiming split4("ResumeOtherThreads", collector->GetTimings());
     MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
     for (const auto& thread : other_threads) {
-      thread->ModifySuspendCount(self, -1, nullptr, false);
+      bool updated = thread->ModifySuspendCount(self, -1, nullptr, false);
+      DCHECK(updated);
     }
     Thread::resume_cond_->Broadcast(self);
   }
@@ -708,7 +712,8 @@
         continue;
       }
       VLOG(threads) << "requesting thread suspend: " << *thread;
-      thread->ModifySuspendCount(self, +1, &pending_threads, debug_suspend);
+      bool updated = thread->ModifySuspendCount(self, +1, &pending_threads, debug_suspend);
+      DCHECK(updated);
 
       // Must install the pending_threads counter first, then check thread->IsSuspend() and clear
       // the counter. Otherwise there's a race with Thread::TransitionFromRunnableToSuspended()
@@ -786,7 +791,8 @@
       if (thread == self) {
         continue;
       }
-      thread->ModifySuspendCount(self, -1, nullptr, false);
+      bool updated = thread->ModifySuspendCount(self, -1, nullptr, false);
+      DCHECK(updated);
     }
 
     // Broadcast a notification to all suspended threads, some or all of
@@ -828,7 +834,8 @@
           << ") thread not within thread list";
       return;
     }
-    thread->ModifySuspendCount(self, -1, nullptr, for_debugger);
+    bool updated = thread->ModifySuspendCount(self, -1, nullptr, for_debugger);
+    DCHECK(updated);
   }
 
   {
@@ -884,7 +891,11 @@
           // If we incremented the suspend count but the thread reset its peer, we need to
           // re-decrement it since it is shutting down and may deadlock the runtime in
           // ThreadList::WaitForOtherNonDaemonThreadsToExit.
-          suspended_thread->ModifySuspendCount(soa.Self(), -1, nullptr, debug_suspension);
+          bool updated = suspended_thread->ModifySuspendCount(soa.Self(),
+                                                              -1,
+                                                              nullptr,
+                                                              debug_suspension);
+          DCHECK(updated);
         }
         ThreadSuspendByPeerWarning(self,
                                    ::android::base::WARNING,
@@ -910,7 +921,8 @@
           }
           CHECK(suspended_thread == nullptr);
           suspended_thread = thread;
-          suspended_thread->ModifySuspendCount(self, +1, nullptr, debug_suspension);
+          bool updated = suspended_thread->ModifySuspendCount(self, +1, nullptr, debug_suspension);
+          DCHECK(updated);
           request_suspension = false;
         } else {
           // If the caller isn't requesting suspension, a suspension should have already occurred.
@@ -942,7 +954,11 @@
                                      peer);
           if (suspended_thread != nullptr) {
             CHECK_EQ(suspended_thread, thread);
-            suspended_thread->ModifySuspendCount(soa.Self(), -1, nullptr, debug_suspension);
+            bool updated = suspended_thread->ModifySuspendCount(soa.Self(),
+                                                                -1,
+                                                                nullptr,
+                                                                debug_suspension);
+            DCHECK(updated);
           }
           *timed_out = true;
           return nullptr;
@@ -1015,7 +1031,8 @@
             // which will allow this thread to be suspended.
             continue;
           }
-          thread->ModifySuspendCount(self, +1, nullptr, debug_suspension);
+          bool updated = thread->ModifySuspendCount(self, +1, nullptr, debug_suspension);
+          DCHECK(updated);
           suspended_thread = thread;
         } else {
           CHECK_EQ(suspended_thread, thread);
@@ -1046,7 +1063,8 @@
                                          "Thread suspension timed out",
                                          thread_id);
           if (suspended_thread != nullptr) {
-            thread->ModifySuspendCount(soa.Self(), -1, nullptr, debug_suspension);
+            bool updated = thread->ModifySuspendCount(soa.Self(), -1, nullptr, debug_suspension);
+            DCHECK(updated);
           }
           *timed_out = true;
           return nullptr;
@@ -1123,7 +1141,8 @@
     // to ensure that we're the only one fiddling with the suspend count
     // though.
     MutexLock mu(self, *Locks::thread_suspend_count_lock_);
-    self->ModifySuspendCount(self, +1, nullptr, true);
+    bool updated = self->ModifySuspendCount(self, +1, nullptr, true);
+    DCHECK(updated);
     CHECK_GT(self->GetSuspendCount(), 0);
 
     VLOG(threads) << *self << " self-suspending (debugger)";
@@ -1207,7 +1226,8 @@
           continue;
         }
         VLOG(threads) << "requesting thread resume: " << *thread;
-        thread->ModifySuspendCount(self, -1, nullptr, true);
+        bool updated = thread->ModifySuspendCount(self, -1, nullptr, true);
+        DCHECK(updated);
       }
     }
   }
@@ -1236,7 +1256,11 @@
       if (thread == self || thread->GetDebugSuspendCount() == 0) {
         continue;
       }
-      thread->ModifySuspendCount(self, -thread->GetDebugSuspendCount(), nullptr, true);
+      bool suspended = thread->ModifySuspendCount(self,
+                                                  -thread->GetDebugSuspendCount(),
+                                                  nullptr,
+                                                  true);
+      DCHECK(suspended);
     }
   }
 
@@ -1293,7 +1317,8 @@
       // daemons.
       CHECK(thread->IsDaemon()) << *thread;
       if (thread != self) {
-        thread->ModifySuspendCount(self, +1, nullptr, false);
+        bool updated = thread->ModifySuspendCount(self, +1, nullptr, false);
+        DCHECK(updated);
         ++daemons_left;
       }
       // We are shutting down the runtime, set the JNI functions of all the JNIEnvs to be
@@ -1352,10 +1377,12 @@
   // Modify suspend count in increments of 1 to maintain invariants in ModifySuspendCount. While
   // this isn't particularly efficient the suspend counts are most commonly 0 or 1.
   for (int delta = debug_suspend_all_count_; delta > 0; delta--) {
-    self->ModifySuspendCount(self, +1, nullptr, true);
+    bool updated = self->ModifySuspendCount(self, +1, nullptr, true);
+    DCHECK(updated);
   }
   for (int delta = suspend_all_count_ - debug_suspend_all_count_; delta > 0; delta--) {
-    self->ModifySuspendCount(self, +1, nullptr, false);
+    bool updated = self->ModifySuspendCount(self, +1, nullptr, false);
+    DCHECK(updated);
   }
   CHECK(!Contains(self));
   list_.push_back(self);
@@ -1450,11 +1477,13 @@
     MutexLock mu(self, *Locks::thread_list_lock_);
     MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
     for (Thread* thread : list_) {
-      thread->ModifySuspendCount(self, +1, nullptr, false);
+      bool suspended = thread->ModifySuspendCount(self, +1, nullptr, false);
+      DCHECK(suspended);
       if (thread == self || thread->IsSuspended()) {
         threads_to_visit.push_back(thread);
       } else {
-        thread->ModifySuspendCount(self, -1, nullptr, false);
+        bool resumed = thread->ModifySuspendCount(self, -1, nullptr, false);
+        DCHECK(resumed);
       }
     }
   }
@@ -1469,7 +1498,8 @@
   {
     MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
     for (Thread* thread : threads_to_visit) {
-      thread->ModifySuspendCount(self, -1, nullptr, false);
+      bool updated = thread->ModifySuspendCount(self, -1, nullptr, false);
+      DCHECK(updated);
     }
   }
 }
diff --git a/runtime/ti/agent.cc b/runtime/ti/agent.cc
index 0bba44c..86f5282 100644
--- a/runtime/ti/agent.cc
+++ b/runtime/ti/agent.cc
@@ -72,6 +72,11 @@
   }
 }
 
+void* Agent::FindSymbol(const std::string& name) const {
+  CHECK(IsStarted()) << "Cannot find symbols in an unloaded agent library " << this;
+  return dlsym(dlopen_handle_, name.c_str());
+}
+
 Agent::LoadError Agent::DoDlOpen(/*out*/std::string* error_msg) {
   DCHECK(error_msg != nullptr);
 
@@ -86,18 +91,15 @@
     return kLoadingError;
   }
 
-  onload_ = reinterpret_cast<AgentOnLoadFunction>(dlsym(dlopen_handle_,
-                                                        AGENT_ON_LOAD_FUNCTION_NAME));
+  onload_ = reinterpret_cast<AgentOnLoadFunction>(FindSymbol(AGENT_ON_LOAD_FUNCTION_NAME));
   if (onload_ == nullptr) {
     VLOG(agents) << "Unable to find 'Agent_OnLoad' symbol in " << this;
   }
-  onattach_ = reinterpret_cast<AgentOnLoadFunction>(dlsym(dlopen_handle_,
-                                                            AGENT_ON_ATTACH_FUNCTION_NAME));
+  onattach_ = reinterpret_cast<AgentOnLoadFunction>(FindSymbol(AGENT_ON_ATTACH_FUNCTION_NAME));
   if (onattach_ == nullptr) {
     VLOG(agents) << "Unable to find 'Agent_OnAttach' symbol in " << this;
   }
-  onunload_= reinterpret_cast<AgentOnUnloadFunction>(dlsym(dlopen_handle_,
-                                                           AGENT_ON_UNLOAD_FUNCTION_NAME));
+  onunload_= reinterpret_cast<AgentOnUnloadFunction>(FindSymbol(AGENT_ON_UNLOAD_FUNCTION_NAME));
   if (onunload_ == nullptr) {
     VLOG(agents) << "Unable to find 'Agent_OnUnload' symbol in " << this;
   }
diff --git a/runtime/ti/agent.h b/runtime/ti/agent.h
index 7408aee..b5ecba1 100644
--- a/runtime/ti/agent.h
+++ b/runtime/ti/agent.h
@@ -29,8 +29,14 @@
 using AgentOnLoadFunction = jint (*)(JavaVM*, const char*, void*);
 using AgentOnUnloadFunction = void (*)(JavaVM*);
 
+// Agents are native libraries that will be loaded by the runtime for the purpose of
+// instrumentation. They will be entered by Agent_OnLoad or Agent_OnAttach depending on whether the
+// agent is being attached during runtime startup or later.
+//
+// The agent's Agent_OnUnload function will be called during runtime shutdown.
+//
 // TODO: consider splitting ti::Agent into command line, agent and shared library handler classes
-
+// TODO Support native-bridge. Currently agents can only be the actual runtime ISA of the device.
 class Agent {
  public:
   enum LoadError {
@@ -56,6 +62,8 @@
     return !GetArgs().empty();
   }
 
+  void* FindSymbol(const std::string& name) const;
+
   LoadError Load(/*out*/jint* call_res, /*out*/std::string* error_msg) {
     VLOG(agents) << "Loading agent: " << name_ << " " << args_;
     return DoLoadHelper(false, call_res, error_msg);
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 38d151b..e1c6af4 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -3732,7 +3732,7 @@
     self_->ClearException();
   }
   const RegType* result = nullptr;
-  if (klass != nullptr && !klass->IsErroneous()) {
+  if (klass != nullptr) {
     bool precise = klass->CannotBeAssignedFromOtherTypes();
     if (precise && !IsInstantiableOrPrimitive(klass)) {
       const char* descriptor = dex_file_->StringByTypeIdx(class_idx);
@@ -4137,6 +4137,12 @@
 }
 
 bool MethodVerifier::CheckCallSite(uint32_t call_site_idx) {
+  if (call_site_idx >= dex_file_->NumCallSiteIds()) {
+    Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Bad call site id #" << call_site_idx
+                                      << " >= " << dex_file_->NumCallSiteIds();
+    return false;
+  }
+
   CallSiteArrayValueIterator it(*dex_file_, dex_file_->GetCallSiteId(call_site_idx));
   // Check essential arguments are provided. The dex file verifier has verified indicies of the
   // main values (method handle, name, method_type).
@@ -4147,9 +4153,11 @@
     return false;
   }
 
-  // Get and check the first argument: the method handle.
+  // Get and check the first argument: the method handle (index range
+  // checked by the dex file verifier).
   uint32_t method_handle_idx = static_cast<uint32_t>(it.GetJavaValue().i);
   it.Next();
+
   const DexFile::MethodHandleItem& mh = dex_file_->GetMethodHandle(method_handle_idx);
   if (mh.method_handle_type_ != static_cast<uint16_t>(DexFile::MethodHandleType::kInvokeStatic)) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Call site #" << call_site_idx
diff --git a/runtime/verifier/method_verifier_test.cc b/runtime/verifier/method_verifier_test.cc
index be5c18b..bdb6b68 100644
--- a/runtime/verifier/method_verifier_test.cc
+++ b/runtime/verifier/method_verifier_test.cc
@@ -23,7 +23,7 @@
 
 #include "class_linker-inl.h"
 #include "common_runtime_test.h"
-#include "dex_file.h"
+#include "dex_file-inl.h"
 #include "scoped_thread_state_change-inl.h"
 #include "utils.h"
 #include "verifier_log_mode.h"
diff --git a/runtime/verifier/verifier_deps.cc b/runtime/verifier/verifier_deps.cc
index 8e4c166..0497a6d 100644
--- a/runtime/verifier/verifier_deps.cc
+++ b/runtime/verifier/verifier_deps.cc
@@ -18,8 +18,11 @@
 
 #include <cstring>
 
+#include "art_field-inl.h"
+#include "art_method-inl.h"
 #include "base/stl_util.h"
 #include "compiler_callbacks.h"
+#include "dex_file-inl.h"
 #include "leb128.h"
 #include "mirror/class-inl.h"
 #include "obj_ptr-inl.h"
@@ -428,8 +431,6 @@
     return;
   }
 
-  DCHECK_EQ(is_assignable, destination->IsAssignableFrom(source));
-
   if (destination->IsArrayClass() && source->IsArrayClass()) {
     // Both types are arrays. Break down to component types and add recursively.
     // This helps filter out destinations from compiled DEX files (see below)
@@ -447,6 +448,10 @@
                        is_assignable);
       return;
     }
+  } else {
+    // We only do this check for non-array types, as arrays might have erroneous
+    // component types which makes the IsAssignableFrom check unreliable.
+    DCHECK_EQ(is_assignable, destination->IsAssignableFrom(source));
   }
 
   DexFileDeps* dex_deps = GetDexFileDeps(dex_file);
diff --git a/test/051-thread/expected.txt b/test/051-thread/expected.txt
index 3fc3492..c8af963 100644
--- a/test/051-thread/expected.txt
+++ b/test/051-thread/expected.txt
@@ -12,4 +12,6 @@
 testSetName finished
 testThreadPriorities starting
 testThreadPriorities finished
+Found current Thread in ThreadGroup
+Found expected stack in getAllStackTraces()
 thread test done
diff --git a/test/051-thread/src/Main.java b/test/051-thread/src/Main.java
index 82fc0d4..08cb5de 100644
--- a/test/051-thread/src/Main.java
+++ b/test/051-thread/src/Main.java
@@ -15,6 +15,9 @@
  */
 
 import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
 
 /**
  * Test some basic thread stuff.
@@ -28,6 +31,8 @@
         testSleepZero();
         testSetName();
         testThreadPriorities();
+        testMainThreadGroup();
+        testMainThreadAllStackTraces();
         System.out.println("thread test done");
     }
 
@@ -159,6 +164,49 @@
         System.out.print("testThreadPriorities finished\n");
     }
 
+    private static void testMainThreadGroup() {
+      Thread threads[] = new Thread[10];
+      Thread current = Thread.currentThread();
+      current.getThreadGroup().enumerate(threads);
+
+      for (Thread t : threads) {
+        if (t == current) {
+          System.out.println("Found current Thread in ThreadGroup");
+          return;
+        }
+      }
+      throw new RuntimeException("Did not find main thread: " + Arrays.toString(threads));
+    }
+
+    private static void testMainThreadAllStackTraces() {
+      StackTraceElement[] trace = Thread.getAllStackTraces().get(Thread.currentThread());
+      if (trace == null) {
+        throw new RuntimeException("Did not find main thread: " + Thread.getAllStackTraces());
+      }
+      List<StackTraceElement> list = Arrays.asList(trace);
+      Iterator<StackTraceElement> it = list.iterator();
+      while (it.hasNext()) {
+        StackTraceElement ste = it.next();
+        if (ste.getClassName().equals("Main")) {
+          if (!ste.getMethodName().equals("testMainThreadAllStackTraces")) {
+            throw new RuntimeException(list.toString());
+          }
+
+          StackTraceElement ste2 = it.next();
+          if (!ste2.getClassName().equals("Main")) {
+            throw new RuntimeException(list.toString());
+          }
+          if (!ste2.getMethodName().equals("main")) {
+            throw new RuntimeException(list.toString());
+          }
+
+          System.out.println("Found expected stack in getAllStackTraces()");
+          return;
+        }
+      }
+      throw new RuntimeException(list.toString());
+    }
+
     private static native int getNativePriority();
     private static native boolean supportsThreadPriorities();
 
diff --git a/test/161-final-abstract-class/expected.txt b/test/161-final-abstract-class/expected.txt
new file mode 100644
index 0000000..1e63584
--- /dev/null
+++ b/test/161-final-abstract-class/expected.txt
@@ -0,0 +1 @@
+java.lang.InstantiationError: AbstractFinal
diff --git a/test/161-final-abstract-class/info.txt b/test/161-final-abstract-class/info.txt
new file mode 100644
index 0000000..2b7bee7
--- /dev/null
+++ b/test/161-final-abstract-class/info.txt
@@ -0,0 +1 @@
+Regression test for verifier crash when processing a final abstract (erroneous) class.
diff --git a/test/161-final-abstract-class/smali/AbstractFinal.smali b/test/161-final-abstract-class/smali/AbstractFinal.smali
new file mode 100644
index 0000000..796fc40
--- /dev/null
+++ b/test/161-final-abstract-class/smali/AbstractFinal.smali
@@ -0,0 +1,16 @@
+# Copyright (C) 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class abstract final LAbstractFinal;
+.super Ljava/lang/Object;
diff --git a/test/161-final-abstract-class/smali/TestClass.smali b/test/161-final-abstract-class/smali/TestClass.smali
new file mode 100644
index 0000000..fa38f59
--- /dev/null
+++ b/test/161-final-abstract-class/smali/TestClass.smali
@@ -0,0 +1,22 @@
+# Copyright (C) 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class LTestClass;
+.super Ljava/lang/Object;
+
+.method public static test()V
+    .registers 1
+    new-instance v0, LAbstractFinal;
+    return-void
+.end method
diff --git a/test/161-final-abstract-class/src/Main.java b/test/161-final-abstract-class/src/Main.java
new file mode 100644
index 0000000..2452490
--- /dev/null
+++ b/test/161-final-abstract-class/src/Main.java
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+
+public class Main {
+    public static void main(String[] args) {
+        try {
+            // Make sure that the abstract final class is marked as erroneous.
+            Class.forName("AbstractFinal");
+            System.out.println("UNREACHABLE!");
+        } catch (VerifyError expected) {
+        } catch (Throwable t) {
+            t.printStackTrace(System.out);
+        }
+        try {
+            // Verification of TestClass.test() used to crash when processing
+            // the final abstract (erroneous) class.
+            Class<?> tc = Class.forName("TestClass");
+            Method test = tc.getDeclaredMethod("test");
+            test.invoke(null);
+            System.out.println("UNREACHABLE!");
+        } catch (InvocationTargetException ite) {
+            if (ite.getCause() instanceof InstantiationError) {
+                System.out.println(
+                    ite.getCause().getClass().getName() + ": " + ite.getCause().getMessage());
+            } else {
+                ite.printStackTrace(System.out);
+            }
+        } catch (Throwable t) {
+            t.printStackTrace(System.out);
+        }
+    }
+}
diff --git a/test/497-inlining-and-class-loader/clear_dex_cache.cc b/test/497-inlining-and-class-loader/clear_dex_cache.cc
index 6c73d7d..9ba05bc 100644
--- a/test/497-inlining-and-class-loader/clear_dex_cache.cc
+++ b/test/497-inlining-and-class-loader/clear_dex_cache.cc
@@ -14,9 +14,12 @@
  * limitations under the License.
  */
 
-#include "art_method-inl.h"
+#include "art_method.h"
 #include "base/enums.h"
 #include "jni.h"
+#include "mirror/array-inl.h"
+#include "mirror/class-inl.h"
+#include "mirror/dex_cache-inl.h"
 #include "scoped_thread_state_change-inl.h"
 #include "stack.h"
 #include "thread.h"
diff --git a/test/550-checker-multiply-accumulate/src/Main.java b/test/550-checker-multiply-accumulate/src/Main.java
index 09376a2..810f0fa 100644
--- a/test/550-checker-multiply-accumulate/src/Main.java
+++ b/test/550-checker-multiply-accumulate/src/Main.java
@@ -424,6 +424,88 @@
     return - (left * right);
   }
 
+  /// CHECK-START-ARM64: void Main.SimdMulAdd(int[], int[]) instruction_simplifier_arm64 (before)
+  /// CHECK-DAG:     Phi                            loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:     VecMul                         loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:     VecAdd                         loop:<<Loop>>      outer_loop:none
+
+  /// CHECK-START-ARM64: void Main.SimdMulAdd(int[], int[]) instruction_simplifier_arm64 (after)
+  /// CHECK-DAG:     Phi                            loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:     VecMultiplyAccumulate kind:Add loop:<<Loop>>      outer_loop:none
+
+  /// CHECK-START-ARM64: void Main.SimdMulAdd(int[], int[]) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:     VecMull
+  /// CHECK-NOT:     VecAdd
+  public static void SimdMulAdd(int[] array1, int[] array2) {
+    for (int j = 0; j < 100; j++) {
+      array2[j] += 12345 * array1[j];
+    }
+  }
+
+  /// CHECK-START-ARM64: void Main.SimdMulSub(int[], int[]) instruction_simplifier_arm64 (before)
+  /// CHECK-DAG:     Phi                            loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:     VecMul                         loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:     VecSub                         loop:<<Loop>>      outer_loop:none
+
+  /// CHECK-START-ARM64: void Main.SimdMulSub(int[], int[]) instruction_simplifier_arm64 (after)
+  /// CHECK-DAG:     Phi                            loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:     VecMultiplyAccumulate kind:Sub loop:<<Loop>>      outer_loop:none
+
+  /// CHECK-START-ARM64: void Main.SimdMulSub(int[], int[]) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:     VecMull
+  /// CHECK-NOT:     VecSub
+  public static void SimdMulSub(int[] array1, int[] array2) {
+    for (int j = 0; j < 100; j++) {
+      array2[j] -= 12345 * array1[j];
+    }
+  }
+
+  /// CHECK-START-ARM64: void Main.SimdMulMultipleUses(int[], int[]) instruction_simplifier_arm64 (before)
+  /// CHECK-DAG:     Phi                            loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:     VecMul                         loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:     VecSub                         loop:<<Loop>>      outer_loop:none
+
+  /// CHECK-START-ARM64: void Main.SimdMulMultipleUses(int[], int[]) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT: VecMultiplyAccumulate
+  public static void SimdMulMultipleUses(int[] array1, int[] array2) {
+    for (int j = 0; j < 100; j++) {
+       int temp = 12345 * array1[j];
+       array2[j] -= temp;
+       array1[j] = temp;
+    }
+  }
+
+  public static final int ARRAY_SIZE = 1000;
+
+  public static void initArray(int[] array) {
+    for (int i = 0; i < ARRAY_SIZE; i++) {
+      array[i] = i;
+    }
+  }
+
+  public static int calcArraySum(int[] array) {
+    int sum = 0;
+    for (int i = 0; i < ARRAY_SIZE; i++) {
+      sum += array[i];
+    }
+    return sum;
+  }
+
+  public static void testSimdMultiplyAccumulate() {
+    int[] array1 = new int[ARRAY_SIZE];
+    int[] array2 = new int[ARRAY_SIZE];
+
+    initArray(array1);
+    initArray(array2);
+    SimdMulSub(array1, array2);
+    assertIntEquals(-60608250, calcArraySum(array2));
+
+    initArray(array1);
+    initArray(array2);
+    SimdMulAdd(array1, array2);
+    assertIntEquals(61607250, calcArraySum(array2));
+  }
+
   public static void main(String[] args) {
     assertIntEquals(7, $opt$noinline$mulAdd(1, 2, 3));
     assertLongEquals(-26, $opt$noinline$mulSub(4, 5, 6));
@@ -433,5 +515,7 @@
     assertLongEquals(-225, $opt$noinline$mulMinusOne(15, 16));
     assertIntEquals(-306, $opt$noinline$mulNeg(17, 18));
     assertLongEquals(-380, $opt$noinline$mulNeg(19, 20));
+
+    testSimdMultiplyAccumulate();
   }
 }
diff --git a/test/646-checker-hadd-alt-byte/expected.txt b/test/646-checker-hadd-alt-byte/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/646-checker-hadd-alt-byte/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/646-checker-hadd-alt-byte/info.txt b/test/646-checker-hadd-alt-byte/info.txt
new file mode 100644
index 0000000..46e7334
--- /dev/null
+++ b/test/646-checker-hadd-alt-byte/info.txt
@@ -0,0 +1 @@
+Functional tests on halving-add SIMD vectorization.
diff --git a/test/646-checker-hadd-alt-byte/src/Main.java b/test/646-checker-hadd-alt-byte/src/Main.java
new file mode 100644
index 0000000..d1b33ea
--- /dev/null
+++ b/test/646-checker-hadd-alt-byte/src/Main.java
@@ -0,0 +1,241 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests for halving-add idiomatic vectorization.
+ *
+ * Alternative version expressed with logical shift right
+ * in the higher precision (has no impact on idiom).
+ */
+public class Main {
+
+  private static final int N = 256;
+  private static final int M = N * N + 15;
+
+  static byte[] sB1 = new byte[M];
+  static byte[] sB2 = new byte[M];
+  static byte[] sBo = new byte[M];
+
+  /// CHECK-START: void Main.halving_add_signed(byte[], byte[], byte[]) loop_optimization (before)
+  /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:b\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:b\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add:i\d+>>  Add [<<Get1>>,<<Get2>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Add>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<UShr>>]           loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.halving_add_signed(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  private static void halving_add_signed(byte[] b1, byte[] b2, byte[] bo) {
+    int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
+    for (int i = 0; i < min_length; i++) {
+      bo[i] = (byte) ((b1[i] + b2[i]) >>> 1);
+    }
+  }
+
+  /// CHECK-START: void Main.halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (before)
+  /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
+  /// CHECK-DAG: <<I255:i\d+>> IntConstant 255                     loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:b\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:b\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And1:i\d+>> And [<<Get1>>,<<I255>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And2:i\d+>> And [<<Get2>>,<<I255>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add:i\d+>>  Add [<<And1>>,<<And2>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Add>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<UShr>>]           loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  private static void halving_add_unsigned(byte[] b1, byte[] b2, byte[] bo) {
+    int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
+    for (int i = 0; i < min_length; i++) {
+      bo[i] = (byte) (((b1[i] & 0xff) + (b2[i] & 0xff)) >>> 1);
+    }
+  }
+
+  /// CHECK-START: void Main.rounding_halving_add_signed(byte[], byte[], byte[]) loop_optimization (before)
+  /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:b\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:b\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add1:i\d+>> Add [<<Get1>>,<<Get2>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add2:i\d+>> Add [<<Add1>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Add2>>,<<I1>>]              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<UShr>>]           loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.rounding_halving_add_signed(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  private static void rounding_halving_add_signed(byte[] b1, byte[] b2, byte[] bo) {
+    int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
+    for (int i = 0; i < min_length; i++) {
+      bo[i] = (byte) ((b1[i] + b2[i] + 1) >>> 1);
+    }
+  }
+
+  /// CHECK-START: void Main.rounding_halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (before)
+  /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
+  /// CHECK-DAG: <<I255:i\d+>> IntConstant 255                     loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:b\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:b\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And1:i\d+>> And [<<Get1>>,<<I255>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And2:i\d+>> And [<<Get2>>,<<I255>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add1:i\d+>> Add [<<And1>>,<<And2>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add2:i\d+>> Add [<<Add1>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Add2>>,<<I1>>]              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<UShr>>]           loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.rounding_halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>]  unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  private static void rounding_halving_add_unsigned(byte[] b1, byte[] b2, byte[] bo) {
+    int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
+    for (int i = 0; i < min_length; i++) {
+      bo[i] = (byte) (((b1[i] & 0xff) + (b2[i] & 0xff) + 1) >>> 1);
+    }
+  }
+
+  /// CHECK-START: void Main.halving_add_signed_constant(byte[], byte[]) loop_optimization (before)
+  /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
+  /// CHECK-DAG: <<I127:i\d+>> IntConstant 127                     loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:b\d+>>  ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add:i\d+>>  Add [<<Get>>,<<I127>>]              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Add>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<UShr>>]           loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.halving_add_signed_constant(byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<I127:i\d+>> IntConstant 127                      loop:none
+  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I127>>]        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  private static void halving_add_signed_constant(byte[] b1, byte[] bo) {
+    int min_length = Math.min(bo.length, b1.length);
+    for (int i = 0; i < min_length; i++) {
+      bo[i] = (byte) ((b1[i] + 0x7f) >>> 1);
+    }
+  }
+
+  /// CHECK-START: void Main.halving_add_unsigned_constant(byte[], byte[]) loop_optimization (before)
+  /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
+  /// CHECK-DAG: <<I255:i\d+>> IntConstant 255                     loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:b\d+>>  ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And:i\d+>>  And [<<Get>>,<<I255>>]              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add:i\d+>>  Add [<<And>>,<<I255>>]              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Add>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<UShr>>]           loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.halving_add_unsigned_constant(byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<I255:i\d+>> IntConstant 255                      loop:none
+  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I255>>]        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  private static void halving_add_unsigned_constant(byte[] b1, byte[] bo) {
+    int min_length = Math.min(bo.length, b1.length);
+    for (int i = 0; i < min_length; i++) {
+      bo[i] = (byte) (((b1[i] & 0xff) + 0xff) >>> 1);
+    }
+  }
+
+  public static void main(String[] args) {
+    // Initialize cross-values to test all cases, and also
+    // set up some extra values to exercise the cleanup loop.
+    int k = 0;
+    for (int i = 0; i < N; i++) {
+      for (int j = 0; j < N; j++) {
+        sB1[k] = (byte) i;
+        sB2[k] = (byte) j;
+        k++;
+      }
+    }
+    for (int i = 0; i < 15; i++) {
+      sB1[k] = (byte) i;
+      sB2[k] = 100;
+      k++;
+    }
+    expectEquals(k, M);
+
+    // Test halving add idioms. Note that the expected result is computed
+    // with the arithmetic >> to demonstrate the computed narrower result
+    // does not depend on the wider >> or >>>.
+    halving_add_signed(sB1, sB2, sBo);
+    for (int i = 0; i < M; i++) {
+      byte e = (byte) ((sB1[i] + sB2[i]) >> 1);
+      expectEquals(e, sBo[i]);
+    }
+    halving_add_unsigned(sB1, sB2, sBo);
+    for (int i = 0; i < M; i++) {
+      byte e = (byte) (((sB1[i] & 0xff) + (sB2[i] & 0xff)) >> 1);
+      expectEquals(e, sBo[i]);
+    }
+    rounding_halving_add_signed(sB1, sB2, sBo);
+    for (int i = 0; i < M; i++) {
+      byte e = (byte) ((sB1[i] + sB2[i] + 1) >> 1);
+      expectEquals(e, sBo[i]);
+    }
+    rounding_halving_add_unsigned(sB1, sB2, sBo);
+    for (int i = 0; i < M; i++) {
+      byte e = (byte) (((sB1[i] & 0xff) + (sB2[i] & 0xff) + 1) >> 1);
+      expectEquals(e, sBo[i]);
+    }
+    halving_add_signed_constant(sB1, sBo);
+    for (int i = 0; i < M; i++) {
+      byte e = (byte) ((sB1[i] + 0x7f) >> 1);
+      expectEquals(e, sBo[i]);
+    }
+    halving_add_unsigned_constant(sB1, sBo);
+    for (int i = 0; i < M; i++) {
+      byte e = (byte) (((sB1[i] & 0xff) + 0xff) >> 1);
+      expectEquals(e, sBo[i]);
+    }
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/646-checker-hadd-alt-char/expected.txt b/test/646-checker-hadd-alt-char/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/646-checker-hadd-alt-char/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/646-checker-hadd-alt-char/info.txt b/test/646-checker-hadd-alt-char/info.txt
new file mode 100644
index 0000000..46e7334
--- /dev/null
+++ b/test/646-checker-hadd-alt-char/info.txt
@@ -0,0 +1 @@
+Functional tests on halving-add SIMD vectorization.
diff --git a/test/646-checker-hadd-alt-char/src/Main.java b/test/646-checker-hadd-alt-char/src/Main.java
new file mode 100644
index 0000000..1ea8d3f
--- /dev/null
+++ b/test/646-checker-hadd-alt-char/src/Main.java
@@ -0,0 +1,251 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests for halving-add idiomatic vectorization.
+ *
+ * Alternative version expressed with logical shift right
+ * in the higher precision (has no impact on idiom).
+ */
+public class Main {
+
+  private static final int N = 64 * 1024;
+  private static final int M = N + 31;
+
+  static char[] sB1 = new char[M];
+  static char[] sB2 = new char[M];
+  static char[] sBo = new char[M];
+
+  /// CHECK-START: void Main.halving_add_unsigned(char[], char[], char[]) loop_optimization (before)
+  /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:c\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:c\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add:i\d+>>  Add [<<Get1>>,<<Get2>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Add>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:c\d+>>  TypeConversion [<<UShr>>]           loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.halving_add_unsigned(char[], char[], char[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  private static void halving_add_unsigned(char[] b1, char[] b2, char[] bo) {
+    int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
+    for (int i = 0; i < min_length; i++) {
+      bo[i] = (char) ((b1[i] + b2[i]) >>> 1);
+    }
+  }
+
+  /// CHECK-START: void Main.halving_add_also_unsigned(char[], char[], char[]) loop_optimization (before)
+  /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
+  /// CHECK-DAG: <<IMAX:i\d+>> IntConstant 65535                   loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:c\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:c\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And1:i\d+>> And [<<Get1>>,<<IMAX>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And2:i\d+>> And [<<Get2>>,<<IMAX>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add:i\d+>>  Add [<<And1>>,<<And2>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Add>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:c\d+>>  TypeConversion [<<UShr>>]           loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.halving_add_also_unsigned(char[], char[], char[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  // Note: HAnd has no impact (already a zero extension).
+  //
+  private static void halving_add_also_unsigned(char[] b1, char[] b2, char[] bo) {
+    int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
+    for (int i = 0; i < min_length; i++) {
+      bo[i] = (char) (((b1[i] & 0xffff) + (b2[i] & 0xffff)) >>> 1);
+    }
+  }
+
+  /// CHECK-START: void Main.rounding_halving_add_unsigned(char[], char[], char[]) loop_optimization (before)
+  /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:c\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:c\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add1:i\d+>> Add [<<Get1>>,<<Get2>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add2:i\d+>> Add [<<Add1>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Add2>>,<<I1>>]              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:c\d+>>  TypeConversion [<<UShr>>]           loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.rounding_halving_add_unsigned(char[], char[], char[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  private static void rounding_halving_add_unsigned(char[] b1, char[] b2, char[] bo) {
+    int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
+    for (int i = 0; i < min_length; i++) {
+      bo[i] = (char) ((b1[i] + b2[i] + 1) >>> 1);
+    }
+  }
+
+  /// CHECK-START: void Main.rounding_halving_add_also_unsigned(char[], char[], char[]) loop_optimization (before)
+  /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
+  /// CHECK-DAG: <<IMAX:i\d+>> IntConstant 65535                   loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:c\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:c\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And1:i\d+>> And [<<Get1>>,<<IMAX>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And2:i\d+>> And [<<Get2>>,<<IMAX>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add1:i\d+>> Add [<<And1>>,<<And2>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add2:i\d+>> Add [<<Add1>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Add2>>,<<I1>>]              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:c\d+>>  TypeConversion [<<UShr>>]           loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.rounding_halving_add_also_unsigned(char[], char[], char[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  // Note: HAnd has no impact (already a zero extension).
+  //
+  private static void rounding_halving_add_also_unsigned(char[] b1, char[] b2, char[] bo) {
+    int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
+    for (int i = 0; i < min_length; i++) {
+      bo[i] = (char) (((b1[i] & 0xffff) + (b2[i] & 0xffff) + 1) >>> 1);
+    }
+  }
+
+  /// CHECK-START: void Main.halving_add_unsigned_constant(char[], char[]) loop_optimization (before)
+  /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
+  /// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535                   loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:c\d+>>  ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add:i\d+>>  Add [<<Get>>,<<UMAX>>]              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Add>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:c\d+>>  TypeConversion [<<UShr>>]           loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.halving_add_unsigned_constant(char[], char[]) loop_optimization (after)
+  /// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535                    loop:none
+  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<UMAX>>]        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  private static void halving_add_unsigned_constant(char[] b1, char[] bo) {
+    int min_length = Math.min(bo.length, b1.length);
+    for (int i = 0; i < min_length; i++) {
+      bo[i] = (char) ((b1[i] + 0xffff) >>> 1);
+    }
+  }
+
+  /// CHECK-START: void Main.halving_add_also_unsigned_constant(char[], char[]) loop_optimization (before)
+  /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
+  /// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535                   loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:c\d+>>  ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And:i\d+>>  And [<<Get>>,<<UMAX>>]              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add:i\d+>>  Add [<<And>>,<<UMAX>>]              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Add>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:c\d+>>  TypeConversion [<<UShr>>]           loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.halving_add_also_unsigned_constant(char[], char[]) loop_optimization (after)
+  /// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535                    loop:none
+  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<UMAX>>]        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  // Note: HAnd has no impact (already a zero extension).
+  //
+  private static void halving_add_also_unsigned_constant(char[] b1, char[] bo) {
+    int min_length = Math.min(bo.length, b1.length);
+    for (int i = 0; i < min_length; i++) {
+      bo[i] = (char) (((b1[i] & 0xffff) + 0xffff) >>> 1);
+    }
+  }
+
+  public static void main(String[] args) {
+    // Some interesting values.
+    char[] interesting = {
+      (char) 0x0000,
+      (char) 0x0001,
+      (char) 0x0002,
+      (char) 0x1234,
+      (char) 0x8000,
+      (char) 0x8001,
+      (char) 0x7fff,
+      (char) 0xffff
+    };
+    // Initialize cross-values to test all cases, and also
+    // set up some extra values to exercise the cleanup loop.
+    for (int i = 0; i < M; i++) {
+      sB1[i] = (char) i;
+      sB2[i] = interesting[i & 7];
+    }
+
+    // Test halving add idioms. Note that the expected result is computed
+    // with the arithmetic >> to demonstrate the computed narrower result
+    // does not depend on the wider >> or >>>.
+    halving_add_unsigned(sB1, sB2, sBo);
+    for (int i = 0; i < M; i++) {
+      char e = (char) ((sB1[i] + sB2[i]) >> 1);
+      expectEquals(e, sBo[i]);
+    }
+    halving_add_also_unsigned(sB1, sB2, sBo);
+    for (int i = 0; i < M; i++) {
+      char e = (char) ((sB1[i] + sB2[i]) >> 1);
+      expectEquals(e, sBo[i]);
+    }
+    rounding_halving_add_unsigned(sB1, sB2, sBo);
+    for (int i = 0; i < M; i++) {
+      char e = (char) ((sB1[i] + sB2[i] + 1) >> 1);
+      expectEquals(e, sBo[i]);
+    }
+    rounding_halving_add_also_unsigned(sB1, sB2, sBo);
+    for (int i = 0; i < M; i++) {
+      char e = (char) ((sB1[i] + sB2[i] + 1) >> 1);
+      expectEquals(e, sBo[i]);
+    }
+    halving_add_unsigned_constant(sB1, sBo);
+    for (int i = 0; i < M; i++) {
+      char e = (char) ((sB1[i] + 0xffff) >> 1);
+      expectEquals(e, sBo[i]);
+    }
+    halving_add_also_unsigned_constant(sB1, sBo);
+    for (int i = 0; i < M; i++) {
+      char e = (char) ((sB1[i] + 0xffff) >> 1);
+      expectEquals(e, sBo[i]);
+    }
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/646-checker-hadd-alt-short/expected.txt b/test/646-checker-hadd-alt-short/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/646-checker-hadd-alt-short/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/646-checker-hadd-alt-short/info.txt b/test/646-checker-hadd-alt-short/info.txt
new file mode 100644
index 0000000..46e7334
--- /dev/null
+++ b/test/646-checker-hadd-alt-short/info.txt
@@ -0,0 +1 @@
+Functional tests on halving-add SIMD vectorization.
diff --git a/test/646-checker-hadd-alt-short/src/Main.java b/test/646-checker-hadd-alt-short/src/Main.java
new file mode 100644
index 0000000..269e618
--- /dev/null
+++ b/test/646-checker-hadd-alt-short/src/Main.java
@@ -0,0 +1,242 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests for halving-add idiomatic vectorization.
+ *
+ * Alternative version expressed with logical shift right
+ * in the higher precision (has no impact on idiom).
+ */
+public class Main {
+
+  private static final int N = 64 * 1024;
+  private static final int M = N + 31;
+
+  static short[] sB1 = new short[M];
+  static short[] sB2 = new short[M];
+  static short[] sBo = new short[M];
+
+  /// CHECK-START: void Main.halving_add_signed(short[], short[], short[]) loop_optimization (before)
+  /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add:i\d+>>  Add [<<Get1>>,<<Get2>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Add>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<UShr>>]           loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.halving_add_signed(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  private static void halving_add_signed(short[] b1, short[] b2, short[] bo) {
+    int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
+    for (int i = 0; i < min_length; i++) {
+      bo[i] = (short) ((b1[i] + b2[i]) >>> 1);
+    }
+  }
+
+  /// CHECK-START: void Main.halving_add_unsigned(short[], short[], short[]) loop_optimization (before)
+  /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
+  /// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535                   loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And1:i\d+>> And [<<Get1>>,<<UMAX>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And2:i\d+>> And [<<Get2>>,<<UMAX>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add:i\d+>>  Add [<<And1>>,<<And2>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Add>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<UShr>>]           loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.halving_add_unsigned(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  private static void halving_add_unsigned(short[] b1, short[] b2, short[] bo) {
+    int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
+    for (int i = 0; i < min_length; i++) {
+      bo[i] = (short) (((b1[i] & 0xffff) + (b2[i] & 0xffff)) >>> 1);
+    }
+  }
+
+  /// CHECK-START: void Main.rounding_halving_add_signed(short[], short[], short[]) loop_optimization (before)
+  /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add1:i\d+>> Add [<<Get1>>,<<Get2>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add2:i\d+>> Add [<<Add1>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Add2>>,<<I1>>]              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<UShr>>]           loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.rounding_halving_add_signed(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  private static void rounding_halving_add_signed(short[] b1, short[] b2, short[] bo) {
+    int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
+    for (int i = 0; i < min_length; i++) {
+      bo[i] = (short) ((b1[i] + b2[i] + 1) >>> 1);
+    }
+  }
+
+  /// CHECK-START: void Main.rounding_halving_add_unsigned(short[], short[], short[]) loop_optimization (before)
+  /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
+  /// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535                   loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And1:i\d+>> And [<<Get1>>,<<UMAX>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And2:i\d+>> And [<<Get2>>,<<UMAX>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add1:i\d+>> Add [<<And1>>,<<And2>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add2:i\d+>> Add [<<Add1>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Add2>>,<<I1>>]              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<UShr>>]           loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.rounding_halving_add_unsigned(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  private static void rounding_halving_add_unsigned(short[] b1, short[] b2, short[] bo) {
+    int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
+    for (int i = 0; i < min_length; i++) {
+      bo[i] = (short) (((b1[i] & 0xffff) + (b2[i] & 0xffff) + 1) >>> 1);
+    }
+  }
+
+  /// CHECK-START: void Main.halving_add_signed_constant(short[], short[]) loop_optimization (before)
+  /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
+  /// CHECK-DAG: <<SMAX:i\d+>> IntConstant 32767                   loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:s\d+>>  ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add:i\d+>>  Add [<<Get>>,<<SMAX>>]              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Add>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<UShr>>]           loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.halving_add_signed_constant(short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<SMAX:i\d+>> IntConstant 32767                    loop:none
+  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<SMAX>>]        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  private static void halving_add_signed_constant(short[] b1, short[] bo) {
+    int min_length = Math.min(bo.length, b1.length);
+    for (int i = 0; i < min_length; i++) {
+      bo[i] = (short) ((b1[i] + 0x7fff) >>> 1);
+    }
+  }
+
+  /// CHECK-START: void Main.halving_add_unsigned_constant(short[], short[]) loop_optimization (before)
+  /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
+  /// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535                   loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:s\d+>>  ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And:i\d+>>  And [<<Get>>,<<UMAX>>]              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add:i\d+>>  Add [<<And>>,<<UMAX>>]              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Add>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<UShr>>]           loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.halving_add_unsigned_constant(short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535                    loop:none
+  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<UMAX>>]        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  private static void halving_add_unsigned_constant(short[] b1, short[] bo) {
+    int min_length = Math.min(bo.length, b1.length);
+    for (int i = 0; i < min_length; i++) {
+      bo[i] = (short) (((b1[i] & 0xffff) + 0xffff) >>> 1);
+    }
+  }
+
+  public static void main(String[] args) {
+    // Some interesting values.
+    short[] interesting = {
+      (short) 0x0000,
+      (short) 0x0001,
+      (short) 0x0002,
+      (short) 0x1234,
+      (short) 0x8000,
+      (short) 0x8001,
+      (short) 0x7fff,
+      (short) 0xffff
+    };
+    // Initialize cross-values to test all cases, and also
+    // set up some extra values to exercise the cleanup loop.
+    for (int i = 0; i < M; i++) {
+      sB1[i] = (short) i;
+      sB2[i] = interesting[i & 7];
+    }
+
+    // Test halving add idioms. Note that the expected result is computed
+    // with the arithmetic >> to demonstrate the computed narrower result
+    // does not depend on the wider >> or >>>.
+    halving_add_signed(sB1, sB2, sBo);
+    for (int i = 0; i < M; i++) {
+      short e = (short) ((sB1[i] + sB2[i]) >> 1);
+      expectEquals(e, sBo[i]);
+    }
+    halving_add_unsigned(sB1, sB2, sBo);
+    for (int i = 0; i < M; i++) {
+      short e = (short) (((sB1[i] & 0xffff) + (sB2[i] & 0xffff)) >> 1);
+      expectEquals(e, sBo[i]);
+    }
+    rounding_halving_add_signed(sB1, sB2, sBo);
+    for (int i = 0; i < M; i++) {
+      short e = (short) ((sB1[i] + sB2[i] + 1) >> 1);
+      expectEquals(e, sBo[i]);
+    }
+    rounding_halving_add_unsigned(sB1, sB2, sBo);
+    for (int i = 0; i < M; i++) {
+      short e = (short) (((sB1[i] & 0xffff) + (sB2[i] & 0xffff) + 1) >> 1);
+      expectEquals(e, sBo[i]);
+    }
+    halving_add_signed_constant(sB1, sBo);
+    for (int i = 0; i < M; i++) {
+      short e = (short) ((sB1[i] + 0x7fff) >> 1);
+      expectEquals(e, sBo[i]);
+    }
+    halving_add_unsigned_constant(sB1, sBo);
+    for (int i = 0; i < M; i++) {
+      short e = (short) (((sB1[i] & 0xffff) + 0xffff) >> 1);
+      expectEquals(e, sBo[i]);
+    }
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/646-checker-hadd-byte/expected.txt b/test/646-checker-hadd-byte/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/646-checker-hadd-byte/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/646-checker-hadd-byte/info.txt b/test/646-checker-hadd-byte/info.txt
new file mode 100644
index 0000000..46e7334
--- /dev/null
+++ b/test/646-checker-hadd-byte/info.txt
@@ -0,0 +1 @@
+Functional tests on halving-add SIMD vectorization.
diff --git a/test/646-checker-hadd-byte/src/Main.java b/test/646-checker-hadd-byte/src/Main.java
new file mode 100644
index 0000000..7e29a7e
--- /dev/null
+++ b/test/646-checker-hadd-byte/src/Main.java
@@ -0,0 +1,236 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests for halving-add idiomatic vectorization.
+ */
+public class Main {
+
+  private static final int N = 256;
+  private static final int M = N * N + 15;
+
+  static byte[] sB1 = new byte[M];
+  static byte[] sB2 = new byte[M];
+  static byte[] sBo = new byte[M];
+
+  /// CHECK-START: void Main.halving_add_signed(byte[], byte[], byte[]) loop_optimization (before)
+  /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:b\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:b\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add:i\d+>>  Add [<<Get1>>,<<Get2>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Shr:i\d+>>  Shr [<<Add>>,<<I1>>]                loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<Shr>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.halving_add_signed(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  private static void halving_add_signed(byte[] b1, byte[] b2, byte[] bo) {
+    int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
+    for (int i = 0; i < min_length; i++) {
+      bo[i] = (byte) ((b1[i] + b2[i]) >> 1);
+    }
+  }
+
+  /// CHECK-START: void Main.halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (before)
+  /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
+  /// CHECK-DAG: <<I255:i\d+>> IntConstant 255                     loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:b\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:b\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And1:i\d+>> And [<<Get1>>,<<I255>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And2:i\d+>> And [<<Get2>>,<<I255>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add:i\d+>>  Add [<<And1>>,<<And2>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Shr:i\d+>>  Shr [<<Add>>,<<I1>>]                loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<Shr>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  private static void halving_add_unsigned(byte[] b1, byte[] b2, byte[] bo) {
+    int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
+    for (int i = 0; i < min_length; i++) {
+      bo[i] = (byte) (((b1[i] & 0xff) + (b2[i] & 0xff)) >> 1);
+    }
+  }
+
+  /// CHECK-START: void Main.rounding_halving_add_signed(byte[], byte[], byte[]) loop_optimization (before)
+  /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:b\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:b\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add1:i\d+>> Add [<<Get1>>,<<Get2>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add2:i\d+>> Add [<<Add1>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Shr:i\d+>>  Shr [<<Add2>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<Shr>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.rounding_halving_add_signed(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  private static void rounding_halving_add_signed(byte[] b1, byte[] b2, byte[] bo) {
+    int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
+    for (int i = 0; i < min_length; i++) {
+      bo[i] = (byte) ((b1[i] + b2[i] + 1) >> 1);
+    }
+  }
+
+  /// CHECK-START: void Main.rounding_halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (before)
+  /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
+  /// CHECK-DAG: <<I255:i\d+>> IntConstant 255                     loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:b\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:b\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And1:i\d+>> And [<<Get1>>,<<I255>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And2:i\d+>> And [<<Get2>>,<<I255>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add1:i\d+>> Add [<<And1>>,<<And2>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add2:i\d+>> Add [<<Add1>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Shr:i\d+>>  Shr [<<Add2>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<Shr>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.rounding_halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>]  unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  private static void rounding_halving_add_unsigned(byte[] b1, byte[] b2, byte[] bo) {
+    int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
+    for (int i = 0; i < min_length; i++) {
+      bo[i] = (byte) (((b1[i] & 0xff) + (b2[i] & 0xff) + 1) >> 1);
+    }
+  }
+
+  /// CHECK-START: void Main.halving_add_signed_constant(byte[], byte[]) loop_optimization (before)
+  /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
+  /// CHECK-DAG: <<I127:i\d+>> IntConstant 127                     loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:b\d+>>  ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add:i\d+>>  Add [<<Get>>,<<I127>>]              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Shr:i\d+>>  Shr [<<Add>>,<<I1>>]                loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<Shr>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.halving_add_signed_constant(byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<I127:i\d+>> IntConstant 127                      loop:none
+  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I127>>]        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  private static void halving_add_signed_constant(byte[] b1, byte[] bo) {
+    int min_length = Math.min(bo.length, b1.length);
+    for (int i = 0; i < min_length; i++) {
+      bo[i] = (byte) ((b1[i] + 0x7f) >> 1);
+    }
+  }
+
+  /// CHECK-START: void Main.halving_add_unsigned_constant(byte[], byte[]) loop_optimization (before)
+  /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
+  /// CHECK-DAG: <<I255:i\d+>> IntConstant 255                     loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:b\d+>>  ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And:i\d+>>  And [<<Get>>,<<I255>>]              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add:i\d+>>  Add [<<And>>,<<I255>>]              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Shr:i\d+>>  Shr [<<Add>>,<<I1>>]                loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<Shr>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.halving_add_unsigned_constant(byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<I255:i\d+>> IntConstant 255                      loop:none
+  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I255>>]        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  private static void halving_add_unsigned_constant(byte[] b1, byte[] bo) {
+    int min_length = Math.min(bo.length, b1.length);
+    for (int i = 0; i < min_length; i++) {
+      bo[i] = (byte) (((b1[i] & 0xff) + 0xff) >> 1);
+    }
+  }
+
+  public static void main(String[] args) {
+    // Initialize cross-values to test all cases, and also
+    // set up some extra values to exercise the cleanup loop.
+    int k = 0;
+    for (int i = 0; i < N; i++) {
+      for (int j = 0; j < N; j++) {
+        sB1[k] = (byte) i;
+        sB2[k] = (byte) j;
+        k++;
+      }
+    }
+    for (int i = 0; i < 15; i++) {
+      sB1[k] = (byte) i;
+      sB2[k] = 100;
+      k++;
+    }
+    expectEquals(k, M);
+
+    // Test halving add idioms.
+    halving_add_signed(sB1, sB2, sBo);
+    for (int i = 0; i < M; i++) {
+      byte e = (byte) ((sB1[i] + sB2[i]) >> 1);
+      expectEquals(e, sBo[i]);
+    }
+    halving_add_unsigned(sB1, sB2, sBo);
+    for (int i = 0; i < M; i++) {
+      byte e = (byte) (((sB1[i] & 0xff) + (sB2[i] & 0xff)) >> 1);
+      expectEquals(e, sBo[i]);
+    }
+    rounding_halving_add_signed(sB1, sB2, sBo);
+    for (int i = 0; i < M; i++) {
+      byte e = (byte) ((sB1[i] + sB2[i] + 1) >> 1);
+      expectEquals(e, sBo[i]);
+    }
+    rounding_halving_add_unsigned(sB1, sB2, sBo);
+    for (int i = 0; i < M; i++) {
+      byte e = (byte) (((sB1[i] & 0xff) + (sB2[i] & 0xff) + 1) >> 1);
+      expectEquals(e, sBo[i]);
+    }
+    halving_add_signed_constant(sB1, sBo);
+    for (int i = 0; i < M; i++) {
+      byte e = (byte) ((sB1[i] + 0x7f) >> 1);
+      expectEquals(e, sBo[i]);
+    }
+    halving_add_unsigned_constant(sB1, sBo);
+    for (int i = 0; i < M; i++) {
+      byte e = (byte) (((sB1[i] & 0xff) + 0xff) >> 1);
+      expectEquals(e, sBo[i]);
+    }
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/646-checker-hadd-char/expected.txt b/test/646-checker-hadd-char/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/646-checker-hadd-char/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/646-checker-hadd-char/info.txt b/test/646-checker-hadd-char/info.txt
new file mode 100644
index 0000000..46e7334
--- /dev/null
+++ b/test/646-checker-hadd-char/info.txt
@@ -0,0 +1 @@
+Functional tests on halving-add SIMD vectorization.
diff --git a/test/646-checker-hadd-char/src/Main.java b/test/646-checker-hadd-char/src/Main.java
new file mode 100644
index 0000000..d24608f
--- /dev/null
+++ b/test/646-checker-hadd-char/src/Main.java
@@ -0,0 +1,246 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests for halving-add idiomatic vectorization.
+ */
+public class Main {
+
+  private static final int N = 64 * 1024;
+  private static final int M = N + 31;
+
+  static char[] sB1 = new char[M];
+  static char[] sB2 = new char[M];
+  static char[] sBo = new char[M];
+
+  /// CHECK-START: void Main.halving_add_unsigned(char[], char[], char[]) loop_optimization (before)
+  /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:c\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:c\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add:i\d+>>  Add [<<Get1>>,<<Get2>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Shr:i\d+>>  Shr [<<Add>>,<<I1>>]                loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:c\d+>>  TypeConversion [<<Shr>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.halving_add_unsigned(char[], char[], char[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  private static void halving_add_unsigned(char[] b1, char[] b2, char[] bo) {
+    int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
+    for (int i = 0; i < min_length; i++) {
+      bo[i] = (char) ((b1[i] + b2[i]) >> 1);
+    }
+  }
+
+  /// CHECK-START: void Main.halving_add_also_unsigned(char[], char[], char[]) loop_optimization (before)
+  /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
+  /// CHECK-DAG: <<IMAX:i\d+>> IntConstant 65535                   loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:c\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:c\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And1:i\d+>> And [<<Get1>>,<<IMAX>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And2:i\d+>> And [<<Get2>>,<<IMAX>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add:i\d+>>  Add [<<And1>>,<<And2>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Shr:i\d+>>  Shr [<<Add>>,<<I1>>]                loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:c\d+>>  TypeConversion [<<Shr>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.halving_add_also_unsigned(char[], char[], char[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  // Note: HAnd has no impact (already a zero extension).
+  //
+  private static void halving_add_also_unsigned(char[] b1, char[] b2, char[] bo) {
+    int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
+    for (int i = 0; i < min_length; i++) {
+      bo[i] = (char) (((b1[i] & 0xffff) + (b2[i] & 0xffff)) >> 1);
+    }
+  }
+
+  /// CHECK-START: void Main.rounding_halving_add_unsigned(char[], char[], char[]) loop_optimization (before)
+  /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:c\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:c\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add1:i\d+>> Add [<<Get1>>,<<Get2>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add2:i\d+>> Add [<<Add1>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Shr:i\d+>>  Shr [<<Add2>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:c\d+>>  TypeConversion [<<Shr>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.rounding_halving_add_unsigned(char[], char[], char[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  private static void rounding_halving_add_unsigned(char[] b1, char[] b2, char[] bo) {
+    int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
+    for (int i = 0; i < min_length; i++) {
+      bo[i] = (char) ((b1[i] + b2[i] + 1) >> 1);
+    }
+  }
+
+  /// CHECK-START: void Main.rounding_halving_add_also_unsigned(char[], char[], char[]) loop_optimization (before)
+  /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
+  /// CHECK-DAG: <<IMAX:i\d+>> IntConstant 65535                   loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:c\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:c\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And1:i\d+>> And [<<Get1>>,<<IMAX>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And2:i\d+>> And [<<Get2>>,<<IMAX>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add1:i\d+>> Add [<<And1>>,<<And2>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add2:i\d+>> Add [<<Add1>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Shr:i\d+>>  Shr [<<Add2>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:c\d+>>  TypeConversion [<<Shr>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.rounding_halving_add_also_unsigned(char[], char[], char[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  // Note: HAnd has no impact (already a zero extension).
+  //
+  private static void rounding_halving_add_also_unsigned(char[] b1, char[] b2, char[] bo) {
+    int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
+    for (int i = 0; i < min_length; i++) {
+      bo[i] = (char) (((b1[i] & 0xffff) + (b2[i] & 0xffff) + 1) >> 1);
+    }
+  }
+
+  /// CHECK-START: void Main.halving_add_unsigned_constant(char[], char[]) loop_optimization (before)
+  /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
+  /// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535                   loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:c\d+>>  ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add:i\d+>>  Add [<<Get>>,<<UMAX>>]              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Shr:i\d+>>  Shr [<<Add>>,<<I1>>]                loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:c\d+>>  TypeConversion [<<Shr>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.halving_add_unsigned_constant(char[], char[]) loop_optimization (after)
+  /// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535                   loop:none
+  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<UMAX>>]        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  private static void halving_add_unsigned_constant(char[] b1, char[] bo) {
+    int min_length = Math.min(bo.length, b1.length);
+    for (int i = 0; i < min_length; i++) {
+      bo[i] = (char) ((b1[i] + 0xffff) >> 1);
+    }
+  }
+
+  /// CHECK-START: void Main.halving_add_also_unsigned_constant(char[], char[]) loop_optimization (before)
+  /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
+  /// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535                   loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:c\d+>>  ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And:i\d+>>  And [<<Get>>,<<UMAX>>]              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add:i\d+>>  Add [<<And>>,<<UMAX>>]              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Shr:i\d+>>  Shr [<<Add>>,<<I1>>]                loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:c\d+>>  TypeConversion [<<Shr>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.halving_add_also_unsigned_constant(char[], char[]) loop_optimization (after)
+  /// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535                    loop:none
+  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<UMAX>>]        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  // Note: HAnd has no impact (already a zero extension).
+  //
+  private static void halving_add_also_unsigned_constant(char[] b1, char[] bo) {
+    int min_length = Math.min(bo.length, b1.length);
+    for (int i = 0; i < min_length; i++) {
+      bo[i] = (char) (((b1[i] & 0xffff) + 0xffff) >> 1);
+    }
+  }
+
+  public static void main(String[] args) {
+    // Some interesting values.
+    char[] interesting = {
+      (char) 0x0000,
+      (char) 0x0001,
+      (char) 0x0002,
+      (char) 0x1234,
+      (char) 0x8000,
+      (char) 0x8001,
+      (char) 0x7fff,
+      (char) 0xffff
+    };
+    // Initialize cross-values to test all cases, and also
+    // set up some extra values to exercise the cleanup loop.
+    for (int i = 0; i < M; i++) {
+      sB1[i] = (char) i;
+      sB2[i] = interesting[i & 7];
+    }
+
+    // Test halving add idioms.
+    halving_add_unsigned(sB1, sB2, sBo);
+    for (int i = 0; i < M; i++) {
+      char e = (char) ((sB1[i] + sB2[i]) >> 1);
+      expectEquals(e, sBo[i]);
+    }
+    halving_add_also_unsigned(sB1, sB2, sBo);
+    for (int i = 0; i < M; i++) {
+      char e = (char) ((sB1[i] + sB2[i]) >> 1);
+      expectEquals(e, sBo[i]);
+    }
+    rounding_halving_add_unsigned(sB1, sB2, sBo);
+    for (int i = 0; i < M; i++) {
+      char e = (char) ((sB1[i] + sB2[i] + 1) >> 1);
+      expectEquals(e, sBo[i]);
+    }
+    rounding_halving_add_also_unsigned(sB1, sB2, sBo);
+    for (int i = 0; i < M; i++) {
+      char e = (char) ((sB1[i] + sB2[i] + 1) >> 1);
+      expectEquals(e, sBo[i]);
+    }
+    halving_add_unsigned_constant(sB1, sBo);
+    for (int i = 0; i < M; i++) {
+      char e = (char) ((sB1[i] + 0xffff) >> 1);
+      expectEquals(e, sBo[i]);
+    }
+    halving_add_also_unsigned_constant(sB1, sBo);
+    for (int i = 0; i < M; i++) {
+      char e = (char) ((sB1[i] + 0xffff) >> 1);
+      expectEquals(e, sBo[i]);
+    }
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/646-checker-hadd-short/expected.txt b/test/646-checker-hadd-short/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/646-checker-hadd-short/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/646-checker-hadd-short/info.txt b/test/646-checker-hadd-short/info.txt
new file mode 100644
index 0000000..46e7334
--- /dev/null
+++ b/test/646-checker-hadd-short/info.txt
@@ -0,0 +1 @@
+Functional tests on halving-add SIMD vectorization.
diff --git a/test/646-checker-hadd-short/src/Main.java b/test/646-checker-hadd-short/src/Main.java
new file mode 100644
index 0000000..db495f6
--- /dev/null
+++ b/test/646-checker-hadd-short/src/Main.java
@@ -0,0 +1,237 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests for halving-add idiomatic vectorization.
+ */
+public class Main {
+
+  private static final int N = 64 * 1024;
+  private static final int M = N + 31;
+
+  static short[] sB1 = new short[M];
+  static short[] sB2 = new short[M];
+  static short[] sBo = new short[M];
+
+  /// CHECK-START: void Main.halving_add_signed(short[], short[], short[]) loop_optimization (before)
+  /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add:i\d+>>  Add [<<Get1>>,<<Get2>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Shr:i\d+>>  Shr [<<Add>>,<<I1>>]                loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<Shr>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.halving_add_signed(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  private static void halving_add_signed(short[] b1, short[] b2, short[] bo) {
+    int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
+    for (int i = 0; i < min_length; i++) {
+      bo[i] = (short) ((b1[i] + b2[i]) >> 1);
+    }
+  }
+
+  /// CHECK-START: void Main.halving_add_unsigned(short[], short[], short[]) loop_optimization (before)
+  /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
+  /// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535                   loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And1:i\d+>> And [<<Get1>>,<<UMAX>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And2:i\d+>> And [<<Get2>>,<<UMAX>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add:i\d+>>  Add [<<And1>>,<<And2>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Shr:i\d+>>  Shr [<<Add>>,<<I1>>]                loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<Shr>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.halving_add_unsigned(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  private static void halving_add_unsigned(short[] b1, short[] b2, short[] bo) {
+    int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
+    for (int i = 0; i < min_length; i++) {
+      bo[i] = (short) (((b1[i] & 0xffff) + (b2[i] & 0xffff)) >> 1);
+    }
+  }
+
+  /// CHECK-START: void Main.rounding_halving_add_signed(short[], short[], short[]) loop_optimization (before)
+  /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add1:i\d+>> Add [<<Get1>>,<<Get2>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add2:i\d+>> Add [<<Add1>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Shr:i\d+>>  Shr [<<Add2>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<Shr>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.rounding_halving_add_signed(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  private static void rounding_halving_add_signed(short[] b1, short[] b2, short[] bo) {
+    int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
+    for (int i = 0; i < min_length; i++) {
+      bo[i] = (short) ((b1[i] + b2[i] + 1) >> 1);
+    }
+  }
+
+  /// CHECK-START: void Main.rounding_halving_add_unsigned(short[], short[], short[]) loop_optimization (before)
+  /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
+  /// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535                   loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And1:i\d+>> And [<<Get1>>,<<UMAX>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And2:i\d+>> And [<<Get2>>,<<UMAX>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add1:i\d+>> Add [<<And1>>,<<And2>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add2:i\d+>> Add [<<Add1>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Shr:i\d+>>  Shr [<<Add2>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<Shr>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.rounding_halving_add_unsigned(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  private static void rounding_halving_add_unsigned(short[] b1, short[] b2, short[] bo) {
+    int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
+    for (int i = 0; i < min_length; i++) {
+      bo[i] = (short) (((b1[i] & 0xffff) + (b2[i] & 0xffff) + 1) >> 1);
+    }
+  }
+
+  /// CHECK-START: void Main.halving_add_signed_constant(short[], short[]) loop_optimization (before)
+  /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
+  /// CHECK-DAG: <<SMAX:i\d+>> IntConstant 32767                   loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:s\d+>>  ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add:i\d+>>  Add [<<Get>>,<<SMAX>>]              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Shr:i\d+>>  Shr [<<Add>>,<<I1>>]                loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<Shr>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.halving_add_signed_constant(short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<SMAX:i\d+>> IntConstant 32767                    loop:none
+  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<SMAX>>]        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  private static void halving_add_signed_constant(short[] b1, short[] bo) {
+    int min_length = Math.min(bo.length, b1.length);
+    for (int i = 0; i < min_length; i++) {
+      bo[i] = (short) ((b1[i] + 0x7fff) >> 1);
+    }
+  }
+
+  /// CHECK-START: void Main.halving_add_unsigned_constant(short[], short[]) loop_optimization (before)
+  /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
+  /// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535                   loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:s\d+>>  ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And:i\d+>>  And [<<Get>>,<<UMAX>>]              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add:i\d+>>  Add [<<And>>,<<UMAX>>]              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Shr:i\d+>>  Shr [<<Add>>,<<I1>>]                loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<Shr>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.halving_add_unsigned_constant(short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535                    loop:none
+  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<UMAX>>]        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  private static void halving_add_unsigned_constant(short[] b1, short[] bo) {
+    int min_length = Math.min(bo.length, b1.length);
+    for (int i = 0; i < min_length; i++) {
+      bo[i] = (short) (((b1[i] & 0xffff) + 0xffff) >> 1);
+    }
+  }
+
+  public static void main(String[] args) {
+    // Some interesting values.
+    short[] interesting = {
+      (short) 0x0000,
+      (short) 0x0001,
+      (short) 0x0002,
+      (short) 0x1234,
+      (short) 0x8000,
+      (short) 0x8001,
+      (short) 0x7fff,
+      (short) 0xffff
+    };
+    // Initialize cross-values to test all cases, and also
+    // set up some extra values to exercise the cleanup loop.
+    for (int i = 0; i < M; i++) {
+      sB1[i] = (short) i;
+      sB2[i] = interesting[i & 7];
+    }
+
+    // Test halving add idioms.
+    halving_add_signed(sB1, sB2, sBo);
+    for (int i = 0; i < M; i++) {
+      short e = (short) ((sB1[i] + sB2[i]) >> 1);
+      expectEquals(e, sBo[i]);
+    }
+    halving_add_unsigned(sB1, sB2, sBo);
+    for (int i = 0; i < M; i++) {
+      short e = (short) (((sB1[i] & 0xffff) + (sB2[i] & 0xffff)) >> 1);
+      expectEquals(e, sBo[i]);
+    }
+    rounding_halving_add_signed(sB1, sB2, sBo);
+    for (int i = 0; i < M; i++) {
+      short e = (short) ((sB1[i] + sB2[i] + 1) >> 1);
+      expectEquals(e, sBo[i]);
+    }
+    rounding_halving_add_unsigned(sB1, sB2, sBo);
+    for (int i = 0; i < M; i++) {
+      short e = (short) (((sB1[i] & 0xffff) + (sB2[i] & 0xffff) + 1) >> 1);
+      expectEquals(e, sBo[i]);
+    }
+    halving_add_signed_constant(sB1, sBo);
+    for (int i = 0; i < M; i++) {
+      short e = (short) ((sB1[i] + 0x7fff) >> 1);
+      expectEquals(e, sBo[i]);
+    }
+    halving_add_unsigned_constant(sB1, sBo);
+    for (int i = 0; i < M; i++) {
+      short e = (short) (((sB1[i] & 0xffff) + 0xffff) >> 1);
+      expectEquals(e, sBo[i]);
+    }
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/647-jni-get-field-id/expected.txt b/test/647-jni-get-field-id/expected.txt
new file mode 100644
index 0000000..9506dd7
--- /dev/null
+++ b/test/647-jni-get-field-id/expected.txt
@@ -0,0 +1,26 @@
+JNI_OnLoad called
+getFieldId(class TestClass, "intField", "I")
+Result: true
+getFieldId(class TestClass, "intField", "int")
+Caught java.lang.NoSuchFieldError
+  caused by java.lang.NoClassDefFoundError
+getFieldId(class TestClass, "intField", "Lint;")
+Caught java.lang.NoSuchFieldError
+  caused by java.lang.ClassNotFoundException
+getFieldId(class TestClass, "stringField", "I")
+Caught java.lang.NoSuchFieldError
+getFieldId(class TestClass, "stringField", "Ljava/lang/String;")
+Result: true
+getFieldId(class TestClass, "stringField", "java/lang/String")
+Caught java.lang.NoSuchFieldError
+  caused by java.lang.NoClassDefFoundError
+getFieldId(class TestClass, "stringField", "Ljava.lang.String;")
+Caught java.lang.NoSuchFieldError
+  caused by java.lang.NoClassDefFoundError
+getFieldId(class TestClass, "stringField", "java.lang.String")
+Caught java.lang.NoSuchFieldError
+  caused by java.lang.NoClassDefFoundError
+Test that MyClassLoader.loadClass("Bad.Class") shall not be called.
+  Error message for Bad/Class: Invalid descriptor: Bad/Class.
+  Error message for Bad.Class: Invalid descriptor: Bad.Class.
+  Error message for LBad.Class;: Invalid descriptor: LBad.Class;.
diff --git a/test/647-jni-get-field-id/get_field_id.cc b/test/647-jni-get-field-id/get_field_id.cc
new file mode 100644
index 0000000..2056cfb
--- /dev/null
+++ b/test/647-jni-get-field-id/get_field_id.cc
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "jni.h"
+
+#include "ScopedUtfChars.h"
+
+namespace art {
+
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_getFieldId(JNIEnv* env,
+                                                           jclass,
+                                                           jclass cls,
+                                                           jstring name,
+                                                           jstring signature) {
+  ScopedUtfChars name_chars(env, name);
+  if (name_chars.c_str() == nullptr) {
+    return false;
+  }
+  ScopedUtfChars signature_chars(env, signature);
+  if (signature_chars.c_str() == nullptr) {
+    return false;
+  }
+  jfieldID field_id = env->GetFieldID(cls, name_chars.c_str(), signature_chars.c_str());
+  if (field_id == nullptr) {
+    return false;
+  }
+  return true;
+}
+
+}  // namespace art
diff --git a/test/647-jni-get-field-id/info.txt b/test/647-jni-get-field-id/info.txt
new file mode 100644
index 0000000..00a2b20
--- /dev/null
+++ b/test/647-jni-get-field-id/info.txt
@@ -0,0 +1 @@
+Test for native calls to JNI GetFieldID() with odd signatures.
diff --git a/test/647-jni-get-field-id/src/DefiningLoader.java b/test/647-jni-get-field-id/src/DefiningLoader.java
new file mode 100644
index 0000000..8597c11
--- /dev/null
+++ b/test/647-jni-get-field-id/src/DefiningLoader.java
@@ -0,0 +1,239 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Method;
+import java.lang.reflect.InvocationTargetException;
+
+/**
+ * A class loader with atypical behavior: we try to load a private
+ * class implementation before asking the system or boot loader.  This
+ * is used to create multiple classes with identical names in a single VM.
+ *
+ * If DexFile is available, we use that; if not, we assume we're not in
+ * Dalvik and instantiate the class with defineClass().
+ *
+ * The location of the DEX files and class data is dependent upon the
+ * test framework.
+ */
+public class DefiningLoader extends ClassLoader {
+    static {
+        // For JVM, register as parallel capable.
+        // Android treats all class loaders as parallel capable and makes this a no-op.
+        registerAsParallelCapable();
+    }
+
+    /* this is where the .class files live */
+    static final String CLASS_PATH1 = "classes/";
+    static final String CLASS_PATH2 = "classes2/";
+
+    /* this is the DEX/Jar file */
+    static final String DEX_FILE = System.getenv("DEX_LOCATION") + "/647-jni-get-field-id.jar";
+
+    /* on Dalvik, this is a DexFile; otherwise, it's null */
+    private Class<?> mDexClass;
+
+    private Object mDexFile;
+
+    /**
+     * Construct DefiningLoader, grabbing a reference to the DexFile class
+     * if we're running under Dalvik.
+     */
+    public DefiningLoader(ClassLoader parent) {
+        super(parent);
+
+        try {
+            mDexClass = parent.loadClass("dalvik.system.DexFile");
+        } catch (ClassNotFoundException cnfe) {
+            // ignore -- not running Dalvik
+        }
+    }
+
+    /**
+     * Finds the class with the specified binary name.
+     *
+     * We search for a file in CLASS_PATH or pull an entry from DEX_FILE.
+     * If we don't find a match, we throw an exception.
+     */
+    protected Class<?> findClass(String name) throws ClassNotFoundException
+    {
+        if (mDexClass != null) {
+            return findClassDalvik(name);
+        } else {
+            return findClassNonDalvik(name);
+        }
+    }
+
+    /**
+     * Finds the class with the specified binary name, from a DEX file.
+     */
+    private Class<?> findClassDalvik(String name)
+        throws ClassNotFoundException {
+
+        if (mDexFile == null) {
+            synchronized (DefiningLoader.class) {
+                Constructor<?> ctor;
+                /*
+                 * Construct a DexFile object through reflection.
+                 */
+                try {
+                    ctor = mDexClass.getConstructor(String.class);
+                } catch (NoSuchMethodException nsme) {
+                    throw new ClassNotFoundException("getConstructor failed",
+                        nsme);
+                }
+
+                try {
+                    mDexFile = ctor.newInstance(DEX_FILE);
+                } catch (InstantiationException ie) {
+                    throw new ClassNotFoundException("newInstance failed", ie);
+                } catch (IllegalAccessException iae) {
+                    throw new ClassNotFoundException("newInstance failed", iae);
+                } catch (InvocationTargetException ite) {
+                    throw new ClassNotFoundException("newInstance failed", ite);
+                }
+            }
+        }
+
+        /*
+         * Call DexFile.loadClass(String, ClassLoader).
+         */
+        Method meth;
+
+        try {
+            meth = mDexClass.getMethod("loadClass", String.class, ClassLoader.class);
+        } catch (NoSuchMethodException nsme) {
+            throw new ClassNotFoundException("getMethod failed", nsme);
+        }
+
+        try {
+            meth.invoke(mDexFile, name, this);
+        } catch (IllegalAccessException iae) {
+            throw new ClassNotFoundException("loadClass failed", iae);
+        } catch (InvocationTargetException ite) {
+            throw new ClassNotFoundException("loadClass failed",
+                ite.getCause());
+        }
+
+        return null;
+    }
+
+    /**
+     * Finds the class with the specified binary name, from .class files.
+     */
+    private Class<?> findClassNonDalvik(String name)
+        throws ClassNotFoundException {
+
+        String[] pathNames = { CLASS_PATH1 + name + ".class", CLASS_PATH2 + name + ".class" };
+
+        String pathName = null;
+        RandomAccessFile raf = null;
+
+        for (String pn : pathNames) {
+            pathName = pn;
+            try {
+                //System.out.println("--- Defining: looking for " + pathName);
+                raf = new RandomAccessFile(new File(pathName), "r");
+                break;
+            } catch (FileNotFoundException fnfe) {
+            }
+        }
+        if (raf == null) {
+            throw new ClassNotFoundException("Not found: " + pathNames[0] + ":" + pathNames[1]);
+        }
+
+        /* read the entire file in */
+        byte[] fileData;
+        try {
+            fileData = new byte[(int) raf.length()];
+            raf.readFully(fileData);
+        } catch (IOException ioe) {
+            throw new ClassNotFoundException("Read error: " + pathName);
+        } finally {
+            try {
+                raf.close();
+            } catch (IOException ioe) {
+                // drop
+            }
+        }
+
+        /* create the class */
+        //System.out.println("--- Defining: defining " + name);
+        try {
+            return defineClass(name, fileData, 0, fileData.length);
+        } catch (Throwable th) {
+            throw new ClassNotFoundException("defineClass failed", th);
+        }
+    }
+
+    /**
+     * Load a class.
+     *
+     * Normally a class loader wouldn't override this, but we want our
+     * version of the class to take precedence over an already-loaded
+     * version.
+     *
+     * We still want the system classes (e.g. java.lang.Object) from the
+     * bootstrap class loader.
+     */
+    synchronized protected Class<?> loadClass(String name, boolean resolve)
+        throws ClassNotFoundException
+    {
+        Class<?> res;
+
+        /*
+         * 1. Invoke findLoadedClass(String) to check if the class has
+         * already been loaded.
+         *
+         * This doesn't change.
+         */
+        res = findLoadedClass(name);
+        if (res != null) {
+            // System.out.println("FancyLoader.loadClass: " + name + " already loaded");
+            if (resolve)
+                resolveClass(res);
+            return res;
+        }
+
+        /*
+         * 3. Invoke the findClass(String) method to find the class.
+         */
+        try {
+            res = findClass(name);
+            if (resolve)
+                resolveClass(res);
+        }
+        catch (ClassNotFoundException e) {
+            // we couldn't find it, so eat the exception and keep going
+        }
+
+        /*
+         * 2. Invoke the loadClass method on the parent class loader.  If
+         * the parent loader is null the class loader built-in to the
+         * virtual machine is used, instead.
+         *
+         * (Since we're not in java.lang, we can't actually invoke the
+         * parent's loadClass() method, but we passed our parent to the
+         * super-class which can take care of it for us.)
+         */
+        res = super.loadClass(name, resolve);   // returns class or throws
+        return res;
+    }
+}
diff --git a/test/647-jni-get-field-id/src/Main.java b/test/647-jni-get-field-id/src/Main.java
new file mode 100644
index 0000000..590ee8a
--- /dev/null
+++ b/test/647-jni-get-field-id/src/Main.java
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+
+public class Main {
+    public static void main(String[] args) {
+        System.loadLibrary(args[0]);
+
+        testGetFieldId(TestClass.class, "intField", "I");
+        testGetFieldId(TestClass.class, "intField", "int");
+        testGetFieldId(TestClass.class, "intField", "Lint;");
+        testGetFieldId(TestClass.class, "stringField", "I");
+        testGetFieldId(TestClass.class, "stringField", "Ljava/lang/String;");
+        testGetFieldId(TestClass.class, "stringField", "java/lang/String");
+        testGetFieldId(TestClass.class, "stringField", "Ljava.lang.String;");
+        testGetFieldId(TestClass.class, "stringField", "java.lang.String");
+
+        try {
+            Method get = Main.class.getDeclaredMethod("getFieldId",
+                                                      Class.class,
+                                                      String.class,
+                                                      String.class);
+            MyClassLoader loader = new MyClassLoader(Main.class.getClassLoader());
+            Class<?> otherMain = Class.forName("Main", true, loader);
+            Method m = otherMain.getDeclaredMethod("testClassLoading", Method.class);
+            m.invoke(null, get);
+        } catch (Throwable t) {
+            t.printStackTrace(System.out);
+        }
+    }
+
+    public static void testClassLoading(Method get) throws Exception {
+        System.out.println("Test that MyClassLoader.loadClass(\"Bad.Class\") shall not be called.");
+        String[] bad_class_names = { "Bad/Class", "Bad.Class", "LBad.Class;" };
+        for (String signature : bad_class_names) {
+            try {
+                get.invoke(null, TestClass.class, "bogus", signature);
+                System.out.println("FAIL!");
+            } catch (InvocationTargetException ite) {
+                if (!(ite.getCause() instanceof NoSuchFieldError) ||
+                    !(ite.getCause().getCause() instanceof NoClassDefFoundError)) {
+                  throw ite;
+                }
+                NoClassDefFoundError ncdfe = (NoClassDefFoundError) ite.getCause().getCause();
+                System.out.println("  Error message for " + signature + ": " + ncdfe.getMessage());
+            }
+        }
+    }
+
+    public static void testGetFieldId(Class<?> cls, String name, String signature) {
+        System.out.println("getFieldId(" + cls + ", \"" + name + "\", \"" + signature + "\")");
+        try {
+            boolean result = getFieldId(cls, name, signature);
+            System.out.println("Result: " + result);
+        } catch (Throwable t) {
+            System.out.println("Caught " + DescribeThrowable(t));
+            for (Throwable cause = t.getCause(); cause != null; cause = cause.getCause()) {
+                System.out.println("  caused by " + DescribeThrowable(cause));
+            }
+        }
+    }
+
+    public static String DescribeThrowable(Throwable t) {
+        return PRINT_MESSAGE ? t.getClass().getName() + ": " + t.getMessage()
+                             : t.getClass().getName();
+    }
+
+    public static native boolean getFieldId(Class<?> cls, String name, String signature);
+
+    // Set to true to see actual messages.
+    public static final boolean PRINT_MESSAGE = false;
+}
+
+class TestClass {
+    public int intField;
+    public String stringField;
+}
+
+class MyClassLoader extends DefiningLoader {
+  public MyClassLoader(ClassLoader parent) {
+      super(parent);
+  }
+
+  public Class<?> loadClass(String name) throws ClassNotFoundException
+  {
+      if (name.equals("Bad.Class")) {
+          throw new Error("findClass(\"Bad.Class\")");
+      }
+      return super.loadClass(name);
+  }
+}
diff --git a/test/647-sinking-catch/expected.txt b/test/647-sinking-catch/expected.txt
new file mode 100644
index 0000000..b2cde18
--- /dev/null
+++ b/test/647-sinking-catch/expected.txt
@@ -0,0 +1 @@
+Three
diff --git a/test/647-sinking-catch/info.txt b/test/647-sinking-catch/info.txt
new file mode 100644
index 0000000..7a8c6a9
--- /dev/null
+++ b/test/647-sinking-catch/info.txt
@@ -0,0 +1,2 @@
+Regression test for the code sinking optimization, which used
+to incorrectly use catch phis.
diff --git a/test/647-sinking-catch/smali/TestCase.smali b/test/647-sinking-catch/smali/TestCase.smali
new file mode 100644
index 0000000..49a3060
--- /dev/null
+++ b/test/647-sinking-catch/smali/TestCase.smali
@@ -0,0 +1,35 @@
+# Copyright (C) 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTestCase;
+.super Ljava/lang/Object;
+
+.method public static foo()V
+  .registers 6
+  new-instance v0, Ljava/lang/Exception;
+  invoke-direct {v0}, Ljava/lang/Exception;-><init>()V
+  const-string v1, "Zero"
+  :try_start
+  const-string v1, "One"
+  const-string v1, "Two"
+  const-string v1, "Three"
+  throw v0
+  :try_end
+  .catchall {:try_start .. :try_end} :catch_all
+
+  :catch_all
+  sget-object v5, Ljava/lang/System;->out:Ljava/io/PrintStream;
+  invoke-virtual {v5, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+  throw v0
+.end method
diff --git a/test/647-sinking-catch/src/Main.java b/test/647-sinking-catch/src/Main.java
new file mode 100644
index 0000000..0e59056
--- /dev/null
+++ b/test/647-sinking-catch/src/Main.java
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+
+public class Main {
+
+  public static void testMethod(String method) throws Exception {
+    Class<?> c = Class.forName("TestCase");
+    Method m = c.getMethod(method);
+
+    Object[] arguments = new Object[] { };
+    try {
+      m.invoke(null, arguments);
+      throw new Error();
+    } catch (InvocationTargetException e) {
+      // expected
+    }
+  }
+
+  public static void main(String[] args) throws Exception {
+    testMethod("foo");
+  }
+}
diff --git a/test/900-hello-plugin/load_unload.cc b/test/900-hello-plugin/load_unload.cc
index 290997a..19312b4 100644
--- a/test/900-hello-plugin/load_unload.cc
+++ b/test/900-hello-plugin/load_unload.cc
@@ -20,6 +20,8 @@
 #include "art_method-inl.h"
 #include "base/logging.h"
 #include "base/macros.h"
+#include "java_vm_ext.h"
+#include "runtime.h"
 
 namespace art {
 
diff --git a/test/911-get-stack-trace/expected.txt b/test/911-get-stack-trace/expected.txt
index 2318414..fb5f71b 100644
--- a/test/911-get-stack-trace/expected.txt
+++ b/test/911-get-stack-trace/expected.txt
@@ -4,7 +4,7 @@
 From top
 ---------
  getStackTrace (Ljava/lang/Thread;II)[[Ljava/lang/String; -1 -2
- print (Ljava/lang/Thread;II)V 0 36
+ print (Ljava/lang/Thread;II)V 0 38
  printOrWait (IILart/ControlData;)V 6 41
  baz (IIILart/ControlData;)Ljava/lang/Object; 2 32
  bar (IIILart/ControlData;)J 0 26
@@ -22,10 +22,9 @@
  bar (IIILart/ControlData;)J 0 26
  foo (IIILart/ControlData;)I 0 21
  doTest ()V 38 25
- run ()V 20 26
- main ([Ljava/lang/String;)V 0 19
+ run ()V 0 30
 ---------
- print (Ljava/lang/Thread;II)V 0 36
+ print (Ljava/lang/Thread;II)V 0 38
  printOrWait (IILart/ControlData;)V 6 41
  baz (IIILart/ControlData;)Ljava/lang/Object; 2 32
  bar (IIILart/ControlData;)J 0 26
@@ -43,11 +42,10 @@
  bar (IIILart/ControlData;)J 0 26
  foo (IIILart/ControlData;)I 0 21
  doTest ()V 42 26
- run ()V 20 26
- main ([Ljava/lang/String;)V 0 19
+ run ()V 0 30
 ---------
  getStackTrace (Ljava/lang/Thread;II)[[Ljava/lang/String; -1 -2
- print (Ljava/lang/Thread;II)V 0 36
+ print (Ljava/lang/Thread;II)V 0 38
  printOrWait (IILart/ControlData;)V 6 41
  baz (IIILart/ControlData;)Ljava/lang/Object; 2 32
  bar (IIILart/ControlData;)J 0 26
@@ -59,19 +57,19 @@
  baz (IIILart/ControlData;)Ljava/lang/Object; 9 34
 From bottom
 ---------
- main ([Ljava/lang/String;)V 0 19
+ run ()V 0 30
 ---------
+ baz (IIILart/ControlData;)Ljava/lang/Object; 9 34
  bar (IIILart/ControlData;)J 0 26
  foo (IIILart/ControlData;)I 0 21
  doTest ()V 65 32
- run ()V 20 26
- main ([Ljava/lang/String;)V 0 19
+ run ()V 0 30
 ---------
+ bar (IIILart/ControlData;)J 0 26
  foo (IIILart/ControlData;)I 0 21
  baz (IIILart/ControlData;)Ljava/lang/Object; 9 34
  bar (IIILart/ControlData;)J 0 26
  foo (IIILart/ControlData;)I 0 21
- doTest ()V 69 33
 
 ################################
 ### Other thread (suspended) ###
@@ -258,9 +256,12 @@
 Signal Catcher
 
 ---------
-main
+Test911
 
 ---------
+main
+<not printed>
+---------
 AllTraces Thread 0
  wait ()V -1 -2
  printOrWait (IILart/ControlData;)V 24 47
@@ -356,14 +357,16 @@
 Signal Catcher
 
 ---------
-main
+Test911
  getAllStackTraces (I)[[Ljava/lang/Object; -1 -2
  printAll (I)V 0 75
  doTest ()V 128 59
- run ()V 44 38
- main ([Ljava/lang/String;)V 0 19
+ run ()V 24 42
 
 ---------
+main
+<not printed>
+---------
 AllTraces Thread 0
  wait ()V -1 -2
  printOrWait (IILart/ControlData;)V 24 47
@@ -589,18 +592,23 @@
 Signal Catcher
 
 ---------
-main
+Test911
  getAllStackTraces (I)[[Ljava/lang/Object; -1 -2
  printAll (I)V 0 75
  doTest ()V 133 61
- run ()V 44 38
- main ([Ljava/lang/String;)V 0 19
+ run ()V 24 42
 
+---------
+main
+<not printed>
 
 ########################################
 ### Other select threads (suspended) ###
 ########################################
 ---------
+Test911
+
+---------
 ThreadListTraces Thread 0
 
 ---------
@@ -616,55 +624,58 @@
 ThreadListTraces Thread 8
 
 ---------
-main
-
----------
-ThreadListTraces Thread 0
- wait ()V -1 -2
- printOrWait (IILart/ControlData;)V 24 47
- baz (IIILart/ControlData;)Ljava/lang/Object; 2 32
- bar (IIILart/ControlData;)J 0 26
- foo (IIILart/ControlData;)I 0 21
-
----------
-ThreadListTraces Thread 2
- wait ()V -1 -2
- printOrWait (IILart/ControlData;)V 24 47
- baz (IIILart/ControlData;)Ljava/lang/Object; 2 32
- bar (IIILart/ControlData;)J 0 26
- foo (IIILart/ControlData;)I 0 21
-
----------
-ThreadListTraces Thread 4
- wait ()V -1 -2
- printOrWait (IILart/ControlData;)V 24 47
- baz (IIILart/ControlData;)Ljava/lang/Object; 2 32
- bar (IIILart/ControlData;)J 0 26
- foo (IIILart/ControlData;)I 0 21
-
----------
-ThreadListTraces Thread 6
- wait ()V -1 -2
- printOrWait (IILart/ControlData;)V 24 47
- baz (IIILart/ControlData;)Ljava/lang/Object; 2 32
- bar (IIILart/ControlData;)J 0 26
- foo (IIILart/ControlData;)I 0 21
-
----------
-ThreadListTraces Thread 8
- wait ()V -1 -2
- printOrWait (IILart/ControlData;)V 24 47
- baz (IIILart/ControlData;)Ljava/lang/Object; 2 32
- bar (IIILart/ControlData;)J 0 26
- foo (IIILart/ControlData;)I 0 21
-
----------
-main
+Test911
  getThreadListStackTraces ([Ljava/lang/Thread;I)[[Ljava/lang/Object; -1 -2
  printList ([Ljava/lang/Thread;I)V 0 68
  doTest ()V 116 54
- run ()V 52 42
- main ([Ljava/lang/String;)V 0 19
+ run ()V 32 46
+
+---------
+ThreadListTraces Thread 0
+ wait ()V -1 -2
+ printOrWait (IILart/ControlData;)V 24 47
+ baz (IIILart/ControlData;)Ljava/lang/Object; 2 32
+ bar (IIILart/ControlData;)J 0 26
+ foo (IIILart/ControlData;)I 0 21
+
+---------
+ThreadListTraces Thread 2
+ wait ()V -1 -2
+ printOrWait (IILart/ControlData;)V 24 47
+ baz (IIILart/ControlData;)Ljava/lang/Object; 2 32
+ bar (IIILart/ControlData;)J 0 26
+ foo (IIILart/ControlData;)I 0 21
+
+---------
+ThreadListTraces Thread 4
+ wait ()V -1 -2
+ printOrWait (IILart/ControlData;)V 24 47
+ baz (IIILart/ControlData;)Ljava/lang/Object; 2 32
+ bar (IIILart/ControlData;)J 0 26
+ foo (IIILart/ControlData;)I 0 21
+
+---------
+ThreadListTraces Thread 6
+ wait ()V -1 -2
+ printOrWait (IILart/ControlData;)V 24 47
+ baz (IIILart/ControlData;)Ljava/lang/Object; 2 32
+ bar (IIILart/ControlData;)J 0 26
+ foo (IIILart/ControlData;)I 0 21
+
+---------
+ThreadListTraces Thread 8
+ wait ()V -1 -2
+ printOrWait (IILart/ControlData;)V 24 47
+ baz (IIILart/ControlData;)Ljava/lang/Object; 2 32
+ bar (IIILart/ControlData;)J 0 26
+ foo (IIILart/ControlData;)I 0 21
+
+---------
+Test911
+ getThreadListStackTraces ([Ljava/lang/Thread;I)[[Ljava/lang/Object; -1 -2
+ printList ([Ljava/lang/Thread;I)V 0 68
+ doTest ()V 121 56
+ run ()V 32 46
 
 ---------
 ThreadListTraces Thread 0
@@ -771,25 +782,16 @@
  foo (IIILart/ControlData;)I 0 21
  run ()V 4 37
 
----------
-main
- getThreadListStackTraces ([Ljava/lang/Thread;I)[[Ljava/lang/Object; -1 -2
- printList ([Ljava/lang/Thread;I)V 0 68
- doTest ()V 121 56
- run ()V 52 42
- main ([Ljava/lang/String;)V 0 19
-
 
 ###################
 ### Same thread ###
 ###################
-5
+4
 JVMTI_ERROR_ILLEGAL_ARGUMENT
 [public static native java.lang.Object[] art.Frames.getFrameLocation(java.lang.Thread,int), ffffffff]
 [public static void art.Frames.doTestSameThread(), 38]
 [public static void art.Frames.doTest() throws java.lang.Exception, 0]
-[public static void art.Test911.run() throws java.lang.Exception, 3c]
-[public static void Main.main(java.lang.String[]) throws java.lang.Exception, 0]
+[public void art.Test911$1.run(), 28]
 JVMTI_ERROR_NO_MORE_FRAMES
 
 ################################
diff --git a/test/911-get-stack-trace/src/art/PrintThread.java b/test/911-get-stack-trace/src/art/PrintThread.java
index de1da9c..f50a66b 100644
--- a/test/911-get-stack-trace/src/art/PrintThread.java
+++ b/test/911-get-stack-trace/src/art/PrintThread.java
@@ -19,6 +19,8 @@
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 public class PrintThread {
   public static void print(String[][] stack) {
@@ -36,6 +38,20 @@
     print(getStackTrace(t, start, max));
   }
 
+  // We have to ignore some threads when printing all stack traces. These are threads that may or
+  // may not exist depending on the environment.
+  public final static String IGNORE_THREAD_NAME_REGEX =
+      "Binder:|RenderThread|hwuiTask|Jit thread pool worker|Instr:|JDWP|Profile Saver|main";
+  public final static Matcher IGNORE_THREADS =
+      Pattern.compile(IGNORE_THREAD_NAME_REGEX).matcher("");
+
+  // We have to skip the stack of some threads when printing all stack traces. These are threads
+  // that may have a different call stack (e.g., when run as an app), or may be in a
+  // non-deterministic state.
+  public final static String CUT_STACK_THREAD_NAME_REGEX = "Daemon|main";
+  public final static Matcher CUT_STACK_THREADS =
+      Pattern.compile(CUT_STACK_THREAD_NAME_REGEX).matcher("");
+
   public static void printAll(Object[][] stacks) {
     List<String> stringified = new ArrayList<String>(stacks.length);
 
@@ -43,11 +59,11 @@
       Thread t = (Thread)stackInfo[0];
       String name = (t != null) ? t.getName() : "null";
       String stackSerialization;
-      if (name.contains("Daemon")) {
+      if (CUT_STACK_THREADS.reset(name).find()) {
         // Do not print daemon stacks, as they're non-deterministic.
         stackSerialization = "<not printed>";
-      } else if (name.startsWith("Jit thread pool worker")) {
-        // Skip JIT thread pool. It may or may not be there depending on configuration.
+      } else if (IGNORE_THREADS.reset(name).find()) {
+        // Skip IGNORE_THREADS.
         continue;
       } else {
         StringBuilder sb = new StringBuilder();
diff --git a/test/911-get-stack-trace/src/art/Test911.java b/test/911-get-stack-trace/src/art/Test911.java
index 71a5196..ee59368 100644
--- a/test/911-get-stack-trace/src/art/Test911.java
+++ b/test/911-get-stack-trace/src/art/Test911.java
@@ -23,27 +23,38 @@
     Main.bindAgentJNIForClass(PrintThread.class);
     Main.bindAgentJNIForClass(ThreadListTraces.class);
 
-    SameThread.doTest();
+    Thread t = new Thread("Test911") {
+      @Override
+      public void run() {
+        try {
+          SameThread.doTest();
 
-    System.out.println();
+          System.out.println();
 
-    OtherThread.doTestOtherThreadWait();
+          OtherThread.doTestOtherThreadWait();
 
-    System.out.println();
+          System.out.println();
 
-    OtherThread.doTestOtherThreadBusyLoop();
+          OtherThread.doTestOtherThreadBusyLoop();
 
-    System.out.println();
+          System.out.println();
 
-    AllTraces.doTest();
+          AllTraces.doTest();
 
-    System.out.println();
+          System.out.println();
 
-    ThreadListTraces.doTest();
+          ThreadListTraces.doTest();
 
-    System.out.println();
+          System.out.println();
 
-    Frames.doTest();
+          Frames.doTest();
+        } catch (Exception e) {
+          throw new RuntimeException(e);
+        }
+      }
+    };
+    t.start();
+    t.join();
 
     System.out.println("Done");
   }
diff --git a/test/912-classes/classes.cc b/test/912-classes/classes.cc
index 2636367..869eacd 100644
--- a/test/912-classes/classes.cc
+++ b/test/912-classes/classes.cc
@@ -16,50 +16,39 @@
 
 #include <stdio.h>
 
-#include "android-base/macros.h"
+#include <mutex>
+#include <vector>
 
-#include "class_linker.h"
+#include "android-base/macros.h"
+#include "android-base/stringprintf.h"
+
 #include "jni.h"
-#include "mirror/class_loader.h"
 #include "jvmti.h"
-#include "runtime.h"
-#include "scoped_local_ref.h"
-#include "scoped_utf_chars.h"
-#include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
 
 // Test infrastructure
 #include "jni_helper.h"
 #include "jvmti_helper.h"
+#include "scoped_local_ref.h"
+#include "scoped_utf_chars.h"
 #include "test_env.h"
 
 namespace art {
 namespace Test912Classes {
 
-extern "C" JNIEXPORT jboolean JNICALL Java_Main_isModifiableClass(
-    JNIEnv* env ATTRIBUTE_UNUSED, jclass Main_klass ATTRIBUTE_UNUSED, jclass klass) {
+extern "C" JNIEXPORT jboolean JNICALL Java_art_Test912_isModifiableClass(
+    JNIEnv* env, jclass Main_klass ATTRIBUTE_UNUSED, jclass klass) {
   jboolean res = JNI_FALSE;
   jvmtiError result = jvmti_env->IsModifiableClass(klass, &res);
-  if (result != JVMTI_ERROR_NONE) {
-    char* err;
-    jvmti_env->GetErrorName(result, &err);
-    printf("Failure running IsModifiableClass: %s\n", err);
-    jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(err));
-    return JNI_FALSE;
-  }
+  JvmtiErrorToException(env, jvmti_env, result);
   return res;
 }
 
-extern "C" JNIEXPORT jobjectArray JNICALL Java_Main_getClassSignature(
+extern "C" JNIEXPORT jobjectArray JNICALL Java_art_Test912_getClassSignature(
     JNIEnv* env, jclass Main_klass ATTRIBUTE_UNUSED, jclass klass) {
   char* sig;
   char* gen;
   jvmtiError result = jvmti_env->GetClassSignature(klass, &sig, &gen);
-  if (result != JVMTI_ERROR_NONE) {
-    char* err;
-    jvmti_env->GetErrorName(result, &err);
-    printf("Failure running GetClassSignature: %s\n", err);
-    jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(err));
+  if (JvmtiErrorToException(env, jvmti_env, result)) {
     return nullptr;
   }
 
@@ -83,57 +72,36 @@
   return ret;
 }
 
-extern "C" JNIEXPORT jboolean JNICALL Java_Main_isInterface(
-    JNIEnv* env ATTRIBUTE_UNUSED, jclass Main_klass ATTRIBUTE_UNUSED, jclass klass) {
+extern "C" JNIEXPORT jboolean JNICALL Java_art_Test912_isInterface(
+    JNIEnv* env, jclass Main_klass ATTRIBUTE_UNUSED, jclass klass) {
   jboolean is_interface = JNI_FALSE;
   jvmtiError result = jvmti_env->IsInterface(klass, &is_interface);
-  if (result != JVMTI_ERROR_NONE) {
-    char* err;
-    jvmti_env->GetErrorName(result, &err);
-    printf("Failure running IsInterface: %s\n", err);
-    jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(err));
-    return JNI_FALSE;
-  }
+  JvmtiErrorToException(env, jvmti_env, result);
   return is_interface;
 }
 
-extern "C" JNIEXPORT jboolean JNICALL Java_Main_isArrayClass(
-    JNIEnv* env ATTRIBUTE_UNUSED, jclass Main_klass ATTRIBUTE_UNUSED, jclass klass) {
+extern "C" JNIEXPORT jboolean JNICALL Java_art_Test912_isArrayClass(
+    JNIEnv* env, jclass Main_klass ATTRIBUTE_UNUSED, jclass klass) {
   jboolean is_array_class = JNI_FALSE;
   jvmtiError result = jvmti_env->IsArrayClass(klass, &is_array_class);
-  if (result != JVMTI_ERROR_NONE) {
-    char* err;
-    jvmti_env->GetErrorName(result, &err);
-    printf("Failure running IsArrayClass: %s\n", err);
-    jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(err));
-    return JNI_FALSE;
-  }
+  JvmtiErrorToException(env, jvmti_env, result);
   return is_array_class;
 }
 
-extern "C" JNIEXPORT jint JNICALL Java_Main_getClassModifiers(
-    JNIEnv* env ATTRIBUTE_UNUSED, jclass Main_klass ATTRIBUTE_UNUSED, jclass klass) {
+extern "C" JNIEXPORT jint JNICALL Java_art_Test912_getClassModifiers(
+    JNIEnv* env, jclass Main_klass ATTRIBUTE_UNUSED, jclass klass) {
   jint mod;
   jvmtiError result = jvmti_env->GetClassModifiers(klass, &mod);
-  if (result != JVMTI_ERROR_NONE) {
-    char* err;
-    jvmti_env->GetErrorName(result, &err);
-    printf("Failure running GetClassModifiers: %s\n", err);
-    return JNI_FALSE;
-  }
+  JvmtiErrorToException(env, jvmti_env, result);
   return mod;
 }
 
-extern "C" JNIEXPORT jobjectArray JNICALL Java_Main_getClassFields(
+extern "C" JNIEXPORT jobjectArray JNICALL Java_art_Test912_getClassFields(
     JNIEnv* env, jclass Main_klass ATTRIBUTE_UNUSED, jclass klass) {
   jint count = 0;
   jfieldID* fields = nullptr;
   jvmtiError result = jvmti_env->GetClassFields(klass, &count, &fields);
-  if (result != JVMTI_ERROR_NONE) {
-    char* err;
-    jvmti_env->GetErrorName(result, &err);
-    printf("Failure running GetClassFields: %s\n", err);
-    jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(err));
+  if (JvmtiErrorToException(env, jvmti_env, result)) {
     return nullptr;
   }
 
@@ -153,15 +121,12 @@
   return ret;
 }
 
-extern "C" JNIEXPORT jobjectArray JNICALL Java_Main_getClassMethods(
+extern "C" JNIEXPORT jobjectArray JNICALL Java_art_Test912_getClassMethods(
     JNIEnv* env, jclass Main_klass ATTRIBUTE_UNUSED, jclass klass) {
   jint count = 0;
   jmethodID* methods = nullptr;
   jvmtiError result = jvmti_env->GetClassMethods(klass, &count, &methods);
-  if (result != JVMTI_ERROR_NONE) {
-    char* err;
-    jvmti_env->GetErrorName(result, &err);
-    printf("Failure running GetClassMethods: %s\n", err);
+  if (JvmtiErrorToException(env, jvmti_env, result)) {
     return nullptr;
   }
 
@@ -181,15 +146,12 @@
   return ret;
 }
 
-extern "C" JNIEXPORT jobjectArray JNICALL Java_Main_getImplementedInterfaces(
+extern "C" JNIEXPORT jobjectArray JNICALL Java_art_Test912_getImplementedInterfaces(
     JNIEnv* env, jclass Main_klass ATTRIBUTE_UNUSED, jclass klass) {
   jint count = 0;
   jclass* classes = nullptr;
   jvmtiError result = jvmti_env->GetImplementedInterfaces(klass, &count, &classes);
-  if (result != JVMTI_ERROR_NONE) {
-    char* err;
-    jvmti_env->GetErrorName(result, &err);
-    printf("Failure running GetImplementedInterfaces: %s\n", err);
+  if (JvmtiErrorToException(env, jvmti_env, result)) {
     return nullptr;
   }
 
@@ -203,35 +165,23 @@
   return ret;
 }
 
-extern "C" JNIEXPORT jint JNICALL Java_Main_getClassStatus(
-    JNIEnv* env ATTRIBUTE_UNUSED, jclass Main_klass ATTRIBUTE_UNUSED, jclass klass) {
+extern "C" JNIEXPORT jint JNICALL Java_art_Test912_getClassStatus(
+    JNIEnv* env, jclass Main_klass ATTRIBUTE_UNUSED, jclass klass) {
   jint status;
   jvmtiError result = jvmti_env->GetClassStatus(klass, &status);
-  if (result != JVMTI_ERROR_NONE) {
-    char* err;
-    jvmti_env->GetErrorName(result, &err);
-    printf("Failure running GetClassStatus: %s\n", err);
-    jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(err));
-    return JNI_FALSE;
-  }
+  JvmtiErrorToException(env, jvmti_env, result);
   return status;
 }
 
-extern "C" JNIEXPORT jobject JNICALL Java_Main_getClassLoader(
-    JNIEnv* env ATTRIBUTE_UNUSED, jclass Main_klass ATTRIBUTE_UNUSED, jclass klass) {
+extern "C" JNIEXPORT jobject JNICALL Java_art_Test912_getClassLoader(
+    JNIEnv* env, jclass Main_klass ATTRIBUTE_UNUSED, jclass klass) {
   jobject classloader;
   jvmtiError result = jvmti_env->GetClassLoader(klass, &classloader);
-  if (result != JVMTI_ERROR_NONE) {
-    char* err;
-    jvmti_env->GetErrorName(result, &err);
-    printf("Failure running GetClassLoader: %s\n", err);
-    jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(err));
-    return nullptr;
-  }
+  JvmtiErrorToException(env, jvmti_env, result);
   return classloader;
 }
 
-extern "C" JNIEXPORT jobjectArray JNICALL Java_Main_getClassLoaderClasses(
+extern "C" JNIEXPORT jobjectArray JNICALL Java_art_Test912_getClassLoaderClasses(
     JNIEnv* env, jclass Main_klass ATTRIBUTE_UNUSED, jobject jclassloader) {
   jint count = 0;
   jclass* classes = nullptr;
@@ -250,7 +200,7 @@
   return ret;
 }
 
-extern "C" JNIEXPORT jintArray JNICALL Java_Main_getClassVersion(
+extern "C" JNIEXPORT jintArray JNICALL Java_art_Test912_getClassVersion(
     JNIEnv* env, jclass Main_klass ATTRIBUTE_UNUSED, jclass klass) {
   jint major, minor;
   jvmtiError result = jvmti_env->GetClassVersionNumbers(klass, &minor, &major);
@@ -325,6 +275,22 @@
   JvmtiErrorToException(env, jvmti_env, ret);
 }
 
+static std::mutex gEventsMutex;
+static std::vector<std::string> gEvents;
+
+extern "C" JNIEXPORT jobjectArray JNICALL Java_art_Test912_getClassLoadMessages(
+    JNIEnv* env, jclass Main_klass ATTRIBUTE_UNUSED) {
+  std::lock_guard<std::mutex> guard(gEventsMutex);
+  jobjectArray ret = CreateObjectArray(env,
+                                       static_cast<jint>(gEvents.size()),
+                                       "java/lang/String",
+                                       [&](jint i) {
+    return env->NewStringUTF(gEvents[i].c_str());
+  });
+  gEvents.clear();
+  return ret;
+}
+
 class ClassLoadPreparePrinter {
  public:
   static void JNICALL ClassLoadCallback(jvmtiEnv* jenv,
@@ -339,7 +305,14 @@
     if (thread_name == "") {
       return;
     }
-    printf("Load: %s on %s\n", name.c_str(), thread_name.c_str());
+    if (thread_name_filter_ != "" && thread_name_filter_ != thread_name) {
+      return;
+    }
+
+    std::lock_guard<std::mutex> guard(gEventsMutex);
+    gEvents.push_back(android::base::StringPrintf("Load: %s on %s",
+                                                  name.c_str(),
+                                                  thread_name.c_str()));
   }
 
   static void JNICALL ClassPrepareCallback(jvmtiEnv* jenv,
@@ -354,14 +327,18 @@
     if (thread_name == "") {
       return;
     }
-    std::string cur_thread_name = GetThreadName(Thread::Current());
-    printf("Prepare: %s on %s (cur=%s)\n",
-           name.c_str(),
-           thread_name.c_str(),
-           cur_thread_name.c_str());
+    if (thread_name_filter_ != "" && thread_name_filter_ != thread_name) {
+      return;
+    }
+    std::string cur_thread_name = GetThreadName(jenv, jni_env, nullptr);
+
+    std::lock_guard<std::mutex> guard(gEventsMutex);
+    gEvents.push_back(android::base::StringPrintf("Prepare: %s on %s (cur=%s)",
+                                                  name.c_str(),
+                                                  thread_name.c_str(),
+                                                  cur_thread_name.c_str()));
   }
 
- private:
   static std::string GetThreadName(jvmtiEnv* jenv, JNIEnv* jni_env, jthread thread) {
     jvmtiThreadInfo info;
     jvmtiError result = jenv->GetThreadInfo(thread, &info);
@@ -382,60 +359,28 @@
     return tmp;
   }
 
-  static std::string GetThreadName(Thread* thread) {
-    std::string tmp;
-    thread->GetThreadName(tmp);
-    return tmp;
-  }
+  static std::string thread_name_filter_;
 };
+std::string ClassLoadPreparePrinter::thread_name_filter_;
 
-extern "C" JNIEXPORT void JNICALL Java_Main_enableClassLoadPreparePrintEvents(
-    JNIEnv* env, jclass Main_klass ATTRIBUTE_UNUSED, jboolean enable) {
+extern "C" JNIEXPORT void JNICALL Java_art_Test912_enableClassLoadPreparePrintEvents(
+    JNIEnv* env, jclass Main_klass ATTRIBUTE_UNUSED, jboolean enable, jthread thread) {
+  if (thread != nullptr) {
+    ClassLoadPreparePrinter::thread_name_filter_ =
+        ClassLoadPreparePrinter::GetThreadName(jvmti_env, env, thread);
+  } else {
+    ClassLoadPreparePrinter::thread_name_filter_ = "";
+  }
+
   EnableEvents(env,
                enable,
                ClassLoadPreparePrinter::ClassLoadCallback,
                ClassLoadPreparePrinter::ClassPrepareCallback);
 }
 
-struct ClassLoadSeen {
-  static void JNICALL ClassLoadSeenCallback(jvmtiEnv* jenv ATTRIBUTE_UNUSED,
-                                            JNIEnv* jni_env ATTRIBUTE_UNUSED,
-                                            jthread thread ATTRIBUTE_UNUSED,
-                                            jclass klass ATTRIBUTE_UNUSED) {
-    saw_event = true;
-  }
-
-  static bool saw_event;
-};
-bool ClassLoadSeen::saw_event = false;
-
-extern "C" JNIEXPORT void JNICALL Java_Main_enableClassLoadSeenEvents(
-    JNIEnv* env, jclass Main_klass ATTRIBUTE_UNUSED, jboolean b) {
-  EnableEvents(env, b, ClassLoadSeen::ClassLoadSeenCallback, nullptr);
-}
-
-extern "C" JNIEXPORT jboolean JNICALL Java_Main_hadLoadEvent(
-    JNIEnv* env ATTRIBUTE_UNUSED, jclass Main_klass ATTRIBUTE_UNUSED) {
-  return ClassLoadSeen::saw_event ? JNI_TRUE : JNI_FALSE;
-}
-
-extern "C" JNIEXPORT jboolean JNICALL Java_Main_isLoadedClass(
-    JNIEnv* env, jclass Main_klass ATTRIBUTE_UNUSED, jstring class_name) {
-  ScopedUtfChars name(env, class_name);
-  ScopedObjectAccess soa(Thread::Current());
-  Runtime* current = Runtime::Current();
-  ClassLinker* class_linker = current->GetClassLinker();
-  bool found =
-      class_linker->LookupClass(
-          soa.Self(),
-          name.c_str(),
-          soa.Decode<mirror::ClassLoader>(current->GetSystemClassLoader())) != nullptr;
-  return found ? JNI_TRUE : JNI_FALSE;
-}
-
 class ClassLoadPrepareEquality {
  public:
-  static constexpr const char* kClassName = "LMain$ClassE;";
+  static constexpr const char* kClassName = "Lart/Test912$ClassE;";
   static constexpr const char* kStorageFieldName = "STATIC";
   static constexpr const char* kStorageFieldSig = "Ljava/lang/Object;";
   static constexpr const char* kStorageWeakFieldName = "WEAK";
@@ -553,13 +498,13 @@
 bool ClassLoadPrepareEquality::found_ = false;
 bool ClassLoadPrepareEquality::compared_ = false;
 
-extern "C" JNIEXPORT void JNICALL Java_Main_setEqualityEventStorageClass(
+extern "C" JNIEXPORT void JNICALL Java_art_Test912_setEqualityEventStorageClass(
     JNIEnv* env, jclass Main_klass ATTRIBUTE_UNUSED, jclass klass) {
   ClassLoadPrepareEquality::storage_class_ =
       reinterpret_cast<jclass>(env->NewGlobalRef(klass));
 }
 
-extern "C" JNIEXPORT void JNICALL Java_Main_enableClassLoadPrepareEqualityEvents(
+extern "C" JNIEXPORT void JNICALL Java_art_Test912_enableClassLoadPrepareEqualityEvents(
     JNIEnv* env, jclass Main_klass ATTRIBUTE_UNUSED, jboolean b) {
   EnableEvents(env,
                b,
diff --git a/test/912-classes/classes_art.cc b/test/912-classes/classes_art.cc
new file mode 100644
index 0000000..de2e456
--- /dev/null
+++ b/test/912-classes/classes_art.cc
@@ -0,0 +1,146 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdio.h>
+
+#include <mutex>
+#include <vector>
+
+#include "android-base/macros.h"
+#include "android-base/stringprintf.h"
+
+#include "jni.h"
+#include "jvmti.h"
+
+// Test infrastructure
+#include "jni_helper.h"
+#include "jvmti_helper.h"
+#include "scoped_local_ref.h"
+#include "scoped_utf_chars.h"
+#include "test_env.h"
+
+namespace art {
+namespace Test912ArtClasses {
+
+static void EnableEvents(JNIEnv* env,
+                         jboolean enable,
+                         decltype(jvmtiEventCallbacks().ClassLoad) class_load,
+                         decltype(jvmtiEventCallbacks().ClassPrepare) class_prepare) {
+  if (enable == JNI_FALSE) {
+    jvmtiError ret = jvmti_env->SetEventNotificationMode(JVMTI_DISABLE,
+                                                         JVMTI_EVENT_CLASS_LOAD,
+                                                         nullptr);
+    if (JvmtiErrorToException(env, jvmti_env, ret)) {
+      return;
+    }
+    ret = jvmti_env->SetEventNotificationMode(JVMTI_DISABLE,
+                                              JVMTI_EVENT_CLASS_PREPARE,
+                                              nullptr);
+    JvmtiErrorToException(env, jvmti_env, ret);
+    return;
+  }
+
+  jvmtiEventCallbacks callbacks;
+  memset(&callbacks, 0, sizeof(jvmtiEventCallbacks));
+  callbacks.ClassLoad = class_load;
+  callbacks.ClassPrepare = class_prepare;
+  jvmtiError ret = jvmti_env->SetEventCallbacks(&callbacks, sizeof(callbacks));
+  if (JvmtiErrorToException(env, jvmti_env, ret)) {
+    return;
+  }
+
+  ret = jvmti_env->SetEventNotificationMode(JVMTI_ENABLE,
+                                            JVMTI_EVENT_CLASS_LOAD,
+                                            nullptr);
+  if (JvmtiErrorToException(env, jvmti_env, ret)) {
+    return;
+  }
+  ret = jvmti_env->SetEventNotificationMode(JVMTI_ENABLE,
+                                            JVMTI_EVENT_CLASS_PREPARE,
+                                            nullptr);
+  JvmtiErrorToException(env, jvmti_env, ret);
+}
+
+struct ClassLoadSeen {
+  static void JNICALL ClassLoadSeenCallback(jvmtiEnv* jenv ATTRIBUTE_UNUSED,
+                                            JNIEnv* jni_env ATTRIBUTE_UNUSED,
+                                            jthread thread ATTRIBUTE_UNUSED,
+                                            jclass klass ATTRIBUTE_UNUSED) {
+    saw_event = true;
+  }
+
+  static bool saw_event;
+};
+bool ClassLoadSeen::saw_event = false;
+
+extern "C" JNIEXPORT void JNICALL Java_art_Test912Art_enableClassLoadSeenEvents(
+    JNIEnv* env, jclass Main_klass ATTRIBUTE_UNUSED, jboolean b) {
+  EnableEvents(env, b, ClassLoadSeen::ClassLoadSeenCallback, nullptr);
+}
+
+extern "C" JNIEXPORT jboolean JNICALL Java_art_Test912Art_hadLoadEvent(
+    JNIEnv* env ATTRIBUTE_UNUSED, jclass Main_klass ATTRIBUTE_UNUSED) {
+  return ClassLoadSeen::saw_event ? JNI_TRUE : JNI_FALSE;
+}
+
+extern "C" JNIEXPORT jboolean JNICALL Java_art_Test912Art_isLoadedClass(
+    JNIEnv* env, jclass Main_klass ATTRIBUTE_UNUSED, jstring class_name) {
+  ScopedUtfChars name(env, class_name);
+
+  jint class_count;
+  jclass* classes;
+  jvmtiError res = jvmti_env->GetLoadedClasses(&class_count, &classes);
+  if (JvmtiErrorToException(env, jvmti_env, res)) {
+    return JNI_FALSE;
+  }
+
+  bool found = false;
+  for (jint i = 0; !found && i < class_count; ++i) {
+    char* sig;
+    jvmtiError res2 = jvmti_env->GetClassSignature(classes[i], &sig, nullptr);
+    if (JvmtiErrorToException(env, jvmti_env, res2)) {
+      return JNI_FALSE;
+    }
+
+    found = strcmp(name.c_str(), sig) == 0;
+
+    CheckJvmtiError(jvmti_env, jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(sig)));
+  }
+
+  CheckJvmtiError(jvmti_env, jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(classes)));
+
+  return found;
+}
+
+// We use the implementations from runtime_state.cc.
+
+extern "C" JNIEXPORT void JNICALL Java_Main_ensureJitCompiled(JNIEnv* env,
+                                                             jclass,
+                                                             jclass cls,
+                                                             jstring method_name);
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_hasJit(JNIEnv*, jclass);
+
+extern "C" JNIEXPORT void JNICALL Java_art_Test912Art_ensureJitCompiled(
+    JNIEnv* env, jclass klass, jclass test_class, jstring name) {
+  Java_Main_ensureJitCompiled(env, klass, test_class, name);
+}
+
+extern "C" JNIEXPORT jboolean JNICALL Java_art_Test912Art_hasJit(JNIEnv* env, jclass klass) {
+  return Java_Main_hasJit(env, klass);
+}
+
+}  // namespace Test912ArtClasses
+}  // namespace art
diff --git a/test/912-classes/expected.txt b/test/912-classes/expected.txt
index 0f2920a..9dcc5f9 100644
--- a/test/912-classes/expected.txt
+++ b/test/912-classes/expected.txt
@@ -6,14 +6,14 @@
 11
 [Ljava/util/List;, <E:Ljava/lang/Object;>Ljava/lang/Object;Ljava/util/Collection<TE;>;]
 601
-[L$Proxy0;, null]
+[L$Proxy20;, null]
 11
 [I, null]
 411
 [[D, null]
 411
 int interface=false array=false modifiable=false
-$Proxy0 interface=false array=false modifiable=false
+$Proxy20 interface=false array=false modifiable=false
 java.lang.Runnable interface=true array=false modifiable=false
 java.lang.String interface=false array=false modifiable=false
 java.util.ArrayList interface=false array=false modifiable=true
@@ -29,70 +29,65 @@
 int 100000
 class [Ljava.lang.String; 10000
 class java.lang.Object 111
-class Main$TestForNonInit 11
-class Main$TestForInitFail 1011
+class art.Test912$TestForNonInit 11
+class art.Test912$TestForInitFail 1011
 int []
 class [Ljava.lang.String; []
 class java.lang.Object []
-interface Main$InfA []
-interface Main$InfB [interface Main$InfA]
-interface Main$InfC [interface Main$InfB]
-class Main$ClassA [interface Main$InfA]
-class Main$ClassB [interface Main$InfB]
-class Main$ClassC [interface Main$InfA, interface Main$InfC]
+interface art.Test912$InfA []
+interface art.Test912$InfB [interface art.Test912$InfA]
+interface art.Test912$InfC [interface art.Test912$InfB]
+class art.Test912$ClassA [interface art.Test912$InfA]
+class art.Test912$ClassB [interface art.Test912$InfB]
+class art.Test912$ClassC [interface art.Test912$InfA, interface art.Test912$InfC]
 class java.lang.String null
 class [Ljava.lang.String; null
-interface Main$InfA dalvik.system.PathClassLoader
-class $Proxy0 dalvik.system.PathClassLoader
+interface art.Test912$InfA dalvik.system.PathClassLoader
+class $Proxy20 dalvik.system.PathClassLoader
 
-boot <- src <- src-ex (A,B)
-912-classes-ex.jar+ -> 912-classes.jar+ -> 
+boot <- (B) <- (A,C)
 [class A, class B, class java.lang.Object]
-912-classes.jar+ -> 
 [class B, class java.lang.Object]
 
-boot <- src (B) <- src-ex (A, List)
-912-classes-ex.jar+ -> 912-classes.jar+ -> 
+boot <- (B) <- (A, List)
 [class A, class java.lang.Object, interface java.util.List]
-912-classes.jar+ -> 
 [class B, class java.lang.Object]
 
-boot <- src+src-ex (A,B)
-912-classes.jar+ -> 
+boot <- 1+2 (A,B)
 [class A, class B, class java.lang.Object]
 
 [37, 0]
 
 B, false
-Load: LB; on main
-Prepare: LB; on main (cur=main)
+Load: LB; on ClassEvents
+Prepare: LB; on ClassEvents (cur=ClassEvents)
 B, true
-Load: LB; on main
-Prepare: LB; on main (cur=main)
+Load: LB; on ClassEvents
+Prepare: LB; on ClassEvents (cur=ClassEvents)
 C, false
-Load: LA; on main
-Prepare: LA; on main (cur=main)
-Load: LC; on main
-Prepare: LC; on main (cur=main)
+Load: LA; on ClassEvents
+Prepare: LA; on ClassEvents (cur=ClassEvents)
+Load: LC; on ClassEvents
+Prepare: LC; on ClassEvents (cur=ClassEvents)
 A, false
 C, true
-Load: LA; on main
-Prepare: LA; on main (cur=main)
-Load: LC; on main
-Prepare: LC; on main (cur=main)
+Load: LA; on ClassEvents
+Prepare: LA; on ClassEvents (cur=ClassEvents)
+Load: LC; on ClassEvents
+Prepare: LC; on ClassEvents (cur=ClassEvents)
 A, true
 A, true
-Load: LA; on main
-Prepare: LA; on main (cur=main)
+Load: LA; on ClassEvents
+Prepare: LA; on ClassEvents (cur=ClassEvents)
 C, true
-Load: LC; on main
-Prepare: LC; on main (cur=main)
+Load: LC; on ClassEvents
+Prepare: LC; on ClassEvents (cur=ClassEvents)
 C, true
 Load: LA; on TestRunner
 Prepare: LA; on TestRunner (cur=TestRunner)
 Load: LC; on TestRunner
 Prepare: LC; on TestRunner (cur=TestRunner)
-Load: L$Proxy1; on main
-Prepare: L$Proxy1; on main (cur=main)
-Load: [LMain; on main
-Prepare: [LMain; on main (cur=main)
+Load: L$Proxy21; on ClassEvents
+Prepare: L$Proxy21; on ClassEvents (cur=ClassEvents)
+Load: [Lart/Test912; on ClassEvents
+Prepare: [Lart/Test912; on ClassEvents (cur=ClassEvents)
diff --git a/test/912-classes/src-ex/A.java b/test/912-classes/src-ex/A.java
deleted file mode 100644
index 2c43cfb..0000000
--- a/test/912-classes/src-ex/A.java
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * Copyright (C) 2017 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-public class A {
-}
diff --git a/test/912-classes/src-ex/C.java b/test/912-classes/src-ex/C.java
deleted file mode 100644
index 97f8021..0000000
--- a/test/912-classes/src-ex/C.java
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * Copyright (C) 2017 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-public class C extends A {
-}
diff --git a/test/912-classes/src/Main.java b/test/912-classes/src/Main.java
index 6c8858a..395cf6f 100644
--- a/test/912-classes/src/Main.java
+++ b/test/912-classes/src/Main.java
@@ -14,452 +14,9 @@
  * limitations under the License.
  */
 
-import java.lang.ref.Reference;
-import java.lang.reflect.Constructor;
-import java.lang.reflect.Proxy;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Comparator;
-
 public class Main {
   public static void main(String[] args) throws Exception {
-    art.Main.bindAgentJNIForClass(Main.class);
-    doTest();
-  }
-
-  public static void doTest() throws Exception {
-    testClass("java.lang.Object");
-    testClass("java.lang.String");
-    testClass("java.lang.Math");
-    testClass("java.util.List");
-
-    testClass(getProxyClass());
-
-    testClass(int.class);
-    testClass(double[].class);
-
-    testClassType(int.class);
-    testClassType(getProxyClass());
-    testClassType(Runnable.class);
-    testClassType(String.class);
-    testClassType(ArrayList.class);
-
-    testClassType(int[].class);
-    testClassType(Runnable[].class);
-    testClassType(String[].class);
-
-    testClassFields(Integer.class);
-    testClassFields(int.class);
-    testClassFields(String[].class);
-
-    testClassMethods(Integer.class);
-    testClassMethods(int.class);
-    testClassMethods(String[].class);
-
-    testClassStatus(int.class);
-    testClassStatus(String[].class);
-    testClassStatus(Object.class);
-    testClassStatus(TestForNonInit.class);
-    try {
-      System.out.println(TestForInitFail.dummy);
-    } catch (ExceptionInInitializerError e) {
-    }
-    testClassStatus(TestForInitFail.class);
-
-    testInterfaces(int.class);
-    testInterfaces(String[].class);
-    testInterfaces(Object.class);
-    testInterfaces(InfA.class);
-    testInterfaces(InfB.class);
-    testInterfaces(InfC.class);
-    testInterfaces(ClassA.class);
-    testInterfaces(ClassB.class);
-    testInterfaces(ClassC.class);
-
-    testClassLoader(String.class);
-    testClassLoader(String[].class);
-    testClassLoader(InfA.class);
-    testClassLoader(getProxyClass());
-
-    testClassLoaderClasses();
-
-    System.out.println();
-
-    testClassVersion();
-
-    System.out.println();
-
-    testClassEvents();
-  }
-
-  private static Class<?> proxyClass = null;
-
-  private static Class<?> getProxyClass() throws Exception {
-    if (proxyClass != null) {
-      return proxyClass;
-    }
-
-    proxyClass = Proxy.getProxyClass(Main.class.getClassLoader(), new Class[] { Runnable.class });
-    return proxyClass;
-  }
-
-  private static void testClass(String className) throws Exception {
-    Class<?> base = Class.forName(className);
-    testClass(base);
-  }
-
-  private static void testClass(Class<?> base) throws Exception {
-    String[] result = getClassSignature(base);
-    System.out.println(Arrays.toString(result));
-    int mod = getClassModifiers(base);
-    if (mod != base.getModifiers()) {
-      throw new RuntimeException("Unexpected modifiers: " + base.getModifiers() + " vs " + mod);
-    }
-    System.out.println(Integer.toHexString(mod));
-  }
-
-  private static void testClassType(Class<?> c) throws Exception {
-    boolean isInterface = isInterface(c);
-    boolean isArray = isArrayClass(c);
-    boolean isModifiable = isModifiableClass(c);
-    System.out.println(c.getName() + " interface=" + isInterface + " array=" + isArray +
-        " modifiable=" + isModifiable);
-  }
-
-  private static void testClassFields(Class<?> c) throws Exception {
-    System.out.println(Arrays.toString(getClassFields(c)));
-  }
-
-  private static void testClassMethods(Class<?> c) throws Exception {
-    System.out.println(Arrays.toString(getClassMethods(c)));
-  }
-
-  private static void testClassStatus(Class<?> c) {
-    System.out.println(c + " " + Integer.toBinaryString(getClassStatus(c)));
-  }
-
-  private static void testInterfaces(Class<?> c) {
-    System.out.println(c + " " + Arrays.toString(getImplementedInterfaces(c)));
-  }
-
-  private static boolean IsBootClassLoader(ClassLoader l) {
-    // Hacky check for Android's fake boot classloader.
-    return l.getClass().getName().equals("java.lang.BootClassLoader");
-  }
-
-  private static void testClassLoader(Class<?> c) {
-    Object cl = getClassLoader(c);
-    System.out.println(c + " " + (cl != null ? cl.getClass().getName() : "null"));
-    if (cl == null) {
-      if (c.getClassLoader() != null && !IsBootClassLoader(c.getClassLoader())) {
-        throw new RuntimeException("Expected " + c.getClassLoader() + ", but got null.");
-      }
-    } else {
-      if (!(cl instanceof ClassLoader)) {
-        throw new RuntimeException("Unexpected \"classloader\": " + cl + " (" + cl.getClass() +
-            ")");
-      }
-      if (cl != c.getClassLoader()) {
-        throw new RuntimeException("Unexpected classloader: " + c.getClassLoader() + " vs " + cl);
-      }
-    }
-  }
-
-  private static void testClassLoaderClasses() throws Exception {
-    ClassLoader boot = ClassLoader.getSystemClassLoader().getParent();
-    while (boot.getParent() != null) {
-      boot = boot.getParent();
-    }
-
-    System.out.println();
-    System.out.println("boot <- src <- src-ex (A,B)");
-    ClassLoader cl1 = create(create(boot, DEX1), DEX2);
-    Class.forName("B", false, cl1);
-    Class.forName("A", false, cl1);
-    printClassLoaderClasses(cl1);
-
-    System.out.println();
-    System.out.println("boot <- src (B) <- src-ex (A, List)");
-    ClassLoader cl2 = create(create(boot, DEX1), DEX2);
-    Class.forName("A", false, cl2);
-    Class.forName("java.util.List", false, cl2);
-    Class.forName("B", false, cl2.getParent());
-    printClassLoaderClasses(cl2);
-
-    System.out.println();
-    System.out.println("boot <- src+src-ex (A,B)");
-    ClassLoader cl3 = create(boot, DEX1, DEX2);
-    Class.forName("B", false, cl3);
-    Class.forName("A", false, cl3);
-    printClassLoaderClasses(cl3);
-
-    // Check that the boot classloader dumps something non-empty.
-    Class<?>[] bootClasses = getClassLoaderClasses(boot);
-    if (bootClasses.length == 0) {
-      throw new RuntimeException("No classes initiated by boot classloader.");
-    }
-    // Check that at least java.util.List is loaded.
-    boolean foundList = false;
-    for (Class<?> c : bootClasses) {
-      if (c == java.util.List.class) {
-        foundList = true;
-        break;
-      }
-    }
-    if (!foundList) {
-      System.out.println(Arrays.toString(bootClasses));
-      throw new RuntimeException("Could not find class java.util.List.");
-    }
-  }
-
-  private static void testClassVersion() {
-    System.out.println(Arrays.toString(getClassVersion(Main.class)));
-  }
-
-  private static void testClassEvents() throws Exception {
-    ClassLoader cl = Main.class.getClassLoader();
-    while (cl.getParent() != null) {
-      cl = cl.getParent();
-    }
-    final ClassLoader boot = cl;
-
-    // The JIT may deeply inline and load some classes. Preload these for test determinism.
-    final String PRELOAD_FOR_JIT[] = {
-        "java.nio.charset.CoderMalfunctionError",
-        "java.util.NoSuchElementException"
-    };
-    for (String s : PRELOAD_FOR_JIT) {
-      Class.forName(s);
-    }
-
-    Runnable r = new Runnable() {
-      @Override
-      public void run() {
-        try {
-          ClassLoader cl6 = create(boot, DEX1, DEX2);
-          System.out.println("C, true");
-          Class.forName("C", true, cl6);
-        } catch (Exception e) {
-          throw new RuntimeException(e);
-        }
-      }
-    };
-
-    Thread dummyThread = new Thread();
-    dummyThread.start();
-    dummyThread.join();
-
-    ensureJitCompiled(Main.class, "testClassEvents");
-
-    enableClassLoadPreparePrintEvents(true);
-
-    ClassLoader cl1 = create(boot, DEX1, DEX2);
-    System.out.println("B, false");
-    Class.forName("B", false, cl1);
-
-    ClassLoader cl2 = create(boot, DEX1, DEX2);
-    System.out.println("B, true");
-    Class.forName("B", true, cl2);
-
-    ClassLoader cl3 = create(boot, DEX1, DEX2);
-    System.out.println("C, false");
-    Class.forName("C", false, cl3);
-    System.out.println("A, false");
-    Class.forName("A", false, cl3);
-
-    ClassLoader cl4 = create(boot, DEX1, DEX2);
-    System.out.println("C, true");
-    Class.forName("C", true, cl4);
-    System.out.println("A, true");
-    Class.forName("A", true, cl4);
-
-    ClassLoader cl5 = create(boot, DEX1, DEX2);
-    System.out.println("A, true");
-    Class.forName("A", true, cl5);
-    System.out.println("C, true");
-    Class.forName("C", true, cl5);
-
-    Thread t = new Thread(r, "TestRunner");
-    t.start();
-    t.join();
-
-    // Check creation of arrays and proxies.
-    Proxy.getProxyClass(Main.class.getClassLoader(), new Class[] { Comparable.class });
-    Class.forName("[LMain;");
-
-    enableClassLoadPreparePrintEvents(false);
-
-    // Note: the JIT part of this test is about the JIT pulling in a class not yet touched by
-    //       anything else in the system. This could be the verifier or the interpreter. We
-    //       block the interpreter by calling ensureJitCompiled. The verifier, however, must
-    //       run in configurations where dex2oat didn't verify the class itself. So explicitly
-    //       check whether the class has been already loaded, and skip then.
-    // TODO: Add multiple configurations to the run script once that becomes easier to do.
-    if (hasJit() && !isLoadedClass("Main$ClassD")) {
-      testClassEventsJit();
-    }
-
-    testClassLoadPrepareEquality();
-  }
-
-  private static void testClassEventsJit() throws Exception {
-    enableClassLoadSeenEvents(true);
-
-    testClassEventsJitImpl();
-
-    enableClassLoadSeenEvents(false);
-
-    if (!hadLoadEvent()) {
-      throw new RuntimeException("Did not get expected load event.");
-    }
-  }
-
-  private static void testClassEventsJitImpl() throws Exception {
-    ensureJitCompiled(Main.class, "testClassEventsJitImpl");
-
-    if (ClassD.x != 1) {
-      throw new RuntimeException("Unexpected value");
-    }
-  }
-
-  private static void testClassLoadPrepareEquality() throws Exception {
-    setEqualityEventStorageClass(ClassF.class);
-
-    enableClassLoadPrepareEqualityEvents(true);
-
-    Class.forName("Main$ClassE");
-
-    enableClassLoadPrepareEqualityEvents(false);
-  }
-
-  private static void printClassLoaderClasses(ClassLoader cl) {
-    for (;;) {
-      if (cl == null || !cl.getClass().getName().startsWith("dalvik.system")) {
-        break;
-      }
-
-      ClassLoader saved = cl;
-      for (;;) {
-        if (cl == null || !cl.getClass().getName().startsWith("dalvik.system")) {
-          break;
-        }
-        String s = cl.toString();
-        int index1 = s.indexOf("zip file");
-        int index2 = s.indexOf(']', index1);
-        if (index2 < 0) {
-          throw new RuntimeException("Unexpected classloader " + s);
-        }
-        String zip_file = s.substring(index1, index2);
-        int index3 = zip_file.indexOf('"');
-        int index4 = zip_file.indexOf('"', index3 + 1);
-        if (index4 < 0) {
-          throw new RuntimeException("Unexpected classloader " + s);
-        }
-        String paths = zip_file.substring(index3 + 1, index4);
-        String pathArray[] = paths.split(":");
-        for (String path : pathArray) {
-          int index5 = path.lastIndexOf('/');
-          System.out.print(path.substring(index5 + 1));
-          System.out.print('+');
-        }
-        System.out.print(" -> ");
-        cl = cl.getParent();
-      }
-      System.out.println();
-      Class<?> classes[] = getClassLoaderClasses(saved);
-      Arrays.sort(classes, new ClassNameComparator());
-      System.out.println(Arrays.toString(classes));
-
-      cl = saved.getParent();
-    }
-  }
-
-  private static native boolean isModifiableClass(Class<?> c);
-  private static native String[] getClassSignature(Class<?> c);
-
-  private static native boolean isInterface(Class<?> c);
-  private static native boolean isArrayClass(Class<?> c);
-
-  private static native int getClassModifiers(Class<?> c);
-
-  private static native Object[] getClassFields(Class<?> c);
-  private static native Object[] getClassMethods(Class<?> c);
-  private static native Class<?>[] getImplementedInterfaces(Class<?> c);
-
-  private static native int getClassStatus(Class<?> c);
-
-  private static native Object getClassLoader(Class<?> c);
-
-  private static native Class<?>[] getClassLoaderClasses(ClassLoader cl);
-
-  private static native int[] getClassVersion(Class<?> c);
-
-  private static native void enableClassLoadPreparePrintEvents(boolean b);
-
-  private static native void ensureJitCompiled(Class<?> c, String name);
-
-  private static native boolean hasJit();
-  private static native boolean isLoadedClass(String name);
-  private static native void enableClassLoadSeenEvents(boolean b);
-  private static native boolean hadLoadEvent();
-
-  private static native void setEqualityEventStorageClass(Class<?> c);
-  private static native void enableClassLoadPrepareEqualityEvents(boolean b);
-
-  private static class TestForNonInit {
-    public static double dummy = Math.random();  // So it can't be compile-time initialized.
-  }
-
-  private static class TestForInitFail {
-    public static int dummy = ((int)Math.random())/0;  // So it throws when initializing.
-  }
-
-  public static interface InfA {
-  }
-  public static interface InfB extends InfA {
-  }
-  public static interface InfC extends InfB {
-  }
-
-  public abstract static class ClassA implements InfA {
-  }
-  public abstract static class ClassB extends ClassA implements InfB {
-  }
-  public abstract static class ClassC implements InfA, InfC {
-  }
-
-  public static class ClassD {
-    static int x = 1;
-  }
-
-  public static class ClassE {
-    public void foo() {
-    }
-    public void bar() {
-    }
-  }
-
-  public static class ClassF {
-    public static Object STATIC = null;
-    public static Reference<Object> WEAK = null;
-  }
-
-  private static final String DEX1 = System.getenv("DEX_LOCATION") + "/912-classes.jar";
-  private static final String DEX2 = System.getenv("DEX_LOCATION") + "/912-classes-ex.jar";
-
-  private static ClassLoader create(ClassLoader parent, String... elements) throws Exception {
-    // Note: We use a PathClassLoader, as we do not care about code performance. We only load
-    //       the classes, and they're empty.
-    Class<?> pathClassLoaderClass = Class.forName("dalvik.system.PathClassLoader");
-    Constructor<?> pathClassLoaderInit = pathClassLoaderClass.getConstructor(String.class,
-                                                                             ClassLoader.class);
-    String path = String.join(":", elements);
-    return (ClassLoader) pathClassLoaderInit.newInstance(path, parent);
-  }
-
-  private static class ClassNameComparator implements Comparator<Class<?>> {
-    public int compare(Class<?> c1, Class<?> c2) {
-      return c1.getName().compareTo(c2.getName());
-    }
+    art.Test912.run();
+    art.Test912Art.run();
   }
 }
diff --git a/test/912-classes/src/art/DexData.java b/test/912-classes/src/art/DexData.java
new file mode 100644
index 0000000..7d15032
--- /dev/null
+++ b/test/912-classes/src/art/DexData.java
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package art;
+
+import java.nio.ByteBuffer;
+import java.util.Base64;
+
+import dalvik.system.InMemoryDexClassLoader;
+
+public class DexData {
+  public static ClassLoader getBootClassLoader() {
+    ClassLoader cl = DexData.class.getClassLoader();
+    while (cl.getParent() != null) {
+      cl = cl.getParent();
+    }
+    return cl;
+  }
+
+  public static ClassLoader create1() {
+    return create1(getBootClassLoader());
+  }
+  public static ClassLoader create1(ClassLoader parent) {
+    return create(parent, DEX_DATA_B);
+  }
+
+  public static ClassLoader create2() {
+    return create2(getBootClassLoader());
+  }
+  public static ClassLoader create2(ClassLoader parent) {
+    return create(parent, DEX_DATA_AC);
+  }
+
+  public static ClassLoader create12() {
+    return create12(getBootClassLoader());
+  }
+  public static ClassLoader create12(ClassLoader parent) {
+    return create(parent, DEX_DATA_AC, DEX_DATA_B);
+  }
+
+  private static ClassLoader create(ClassLoader parent, String... stringData) {
+    ByteBuffer byteBuffers[] = new ByteBuffer[stringData.length];
+    for (int i = 0; i < stringData.length; i++) {
+      byteBuffers[i] = ByteBuffer.wrap(Base64.getDecoder().decode(stringData[i]));
+    }
+    return new InMemoryDexClassLoader(byteBuffers, parent);
+  }
+
+  /*
+   * Derived from:
+   *
+   *   public class A {
+   *   }
+   *
+   *   public class C extends A {
+   *   }
+   *
+   */
+  private final static String DEX_DATA_AC =
+      "ZGV4CjAzNQD5KyH7WmGuqVEyL+2aKG1nyb27UJaCjFwQAgAAcAAAAHhWNBIAAAAAAAAAAIgBAAAH" +
+      "AAAAcAAAAAQAAACMAAAAAQAAAJwAAAAAAAAAAAAAAAMAAACoAAAAAgAAAMAAAAAQAQAAAAEAADAB" +
+      "AAA4AQAAQAEAAEgBAABNAQAAUgEAAGYBAAADAAAABAAAAAUAAAAGAAAABgAAAAMAAAAAAAAAAAAA" +
+      "AAAAAAABAAAAAAAAAAIAAAAAAAAAAAAAAAEAAAACAAAAAAAAAAEAAAAAAAAAcwEAAAAAAAABAAAA" +
+      "AQAAAAAAAAAAAAAAAgAAAAAAAAB9AQAAAAAAAAEAAQABAAAAaQEAAAQAAABwEAIAAAAOAAEAAQAB" +
+      "AAAAbgEAAAQAAABwEAAAAAAOAAY8aW5pdD4ABkEuamF2YQAGQy5qYXZhAANMQTsAA0xDOwASTGph" +
+      "dmEvbGFuZy9PYmplY3Q7AAFWABEABw4AEQAHDgAAAAEAAIGABIACAAABAAGBgASYAgALAAAAAAAA" +
+      "AAEAAAAAAAAAAQAAAAcAAABwAAAAAgAAAAQAAACMAAAAAwAAAAEAAACcAAAABQAAAAMAAACoAAAA" +
+      "BgAAAAIAAADAAAAAASAAAAIAAAAAAQAAAiAAAAcAAAAwAQAAAyAAAAIAAABpAQAAACAAAAIAAABz" +
+      "AQAAABAAAAEAAACIAQAA";
+
+  /*
+   * Derived from:
+   *
+   *   public class B {
+   *   }
+   *
+   */
+  private final static String DEX_DATA_B =
+      "ZGV4CjAzNQBgKV6iWFG4aOm5WEy8oGtDZjqsftBgwJ2oAQAAcAAAAHhWNBIAAAAAAAAAACABAAAF" +
+      "AAAAcAAAAAMAAACEAAAAAQAAAJAAAAAAAAAAAAAAAAIAAACcAAAAAQAAAKwAAADcAAAAzAAAAOQA" +
+      "AADsAAAA9AAAAPkAAAANAQAAAgAAAAMAAAAEAAAABAAAAAIAAAAAAAAAAAAAAAAAAAABAAAAAAAA" +
+      "AAAAAAABAAAAAQAAAAAAAAABAAAAAAAAABUBAAAAAAAAAQABAAEAAAAQAQAABAAAAHAQAQAAAA4A" +
+      "Bjxpbml0PgAGQi5qYXZhAANMQjsAEkxqYXZhL2xhbmcvT2JqZWN0OwABVgARAAcOAAAAAQAAgYAE" +
+      "zAEACwAAAAAAAAABAAAAAAAAAAEAAAAFAAAAcAAAAAIAAAADAAAAhAAAAAMAAAABAAAAkAAAAAUA" +
+      "AAACAAAAnAAAAAYAAAABAAAArAAAAAEgAAABAAAAzAAAAAIgAAAFAAAA5AAAAAMgAAABAAAAEAEA" +
+      "AAAgAAABAAAAFQEAAAAQAAABAAAAIAEAAA==";
+}
diff --git a/test/912-classes/src/art/Test912.java b/test/912-classes/src/art/Test912.java
new file mode 100644
index 0000000..f3ff2b0
--- /dev/null
+++ b/test/912-classes/src/art/Test912.java
@@ -0,0 +1,454 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package art;
+
+import java.lang.ref.Reference;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Proxy;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Comparator;
+
+public class Test912 {
+  public static void run() throws Exception {
+    art.Main.bindAgentJNIForClass(Test912.class);
+    doTest();
+  }
+
+  public static void doTest() throws Exception {
+    testClass("java.lang.Object");
+    testClass("java.lang.String");
+    testClass("java.lang.Math");
+    testClass("java.util.List");
+
+    testClass(getProxyClass());
+
+    testClass(int.class);
+    testClass(double[].class);
+
+    testClassType(int.class);
+    testClassType(getProxyClass());
+    testClassType(Runnable.class);
+    testClassType(String.class);
+    testClassType(ArrayList.class);
+
+    testClassType(int[].class);
+    testClassType(Runnable[].class);
+    testClassType(String[].class);
+
+    testClassFields(Integer.class);
+    testClassFields(int.class);
+    testClassFields(String[].class);
+
+    testClassMethods(Integer.class);
+    testClassMethods(int.class);
+    testClassMethods(String[].class);
+
+    testClassStatus(int.class);
+    testClassStatus(String[].class);
+    testClassStatus(Object.class);
+    testClassStatus(TestForNonInit.class);
+    try {
+      System.out.println(TestForInitFail.dummy);
+    } catch (ExceptionInInitializerError e) {
+    }
+    testClassStatus(TestForInitFail.class);
+
+    testInterfaces(int.class);
+    testInterfaces(String[].class);
+    testInterfaces(Object.class);
+    testInterfaces(InfA.class);
+    testInterfaces(InfB.class);
+    testInterfaces(InfC.class);
+    testInterfaces(ClassA.class);
+    testInterfaces(ClassB.class);
+    testInterfaces(ClassC.class);
+
+    testClassLoader(String.class);
+    testClassLoader(String[].class);
+    testClassLoader(InfA.class);
+    testClassLoader(getProxyClass());
+
+    testClassLoaderClasses();
+
+    System.out.println();
+
+    testClassVersion();
+
+    System.out.println();
+
+    // Use a dedicated thread to have a well-defined current thread.
+    Thread classEventsThread = new Thread("ClassEvents") {
+      @Override
+      public void run() {
+        try {
+          testClassEvents();
+        } catch (Exception e) {
+          throw new RuntimeException(e);
+        }
+      }
+    };
+    classEventsThread.start();
+    classEventsThread.join();
+  }
+
+  private static void testClass(String className) throws Exception {
+    Class<?> base = Class.forName(className);
+    testClass(base);
+  }
+
+  private static void testClass(Class<?> base) throws Exception {
+    String[] result = getClassSignature(base);
+    System.out.println(Arrays.toString(result));
+    int mod = getClassModifiers(base);
+    if (mod != base.getModifiers()) {
+      throw new RuntimeException("Unexpected modifiers: " + base.getModifiers() + " vs " + mod);
+    }
+    System.out.println(Integer.toHexString(mod));
+  }
+
+  private static void testClassType(Class<?> c) throws Exception {
+    boolean isInterface = isInterface(c);
+    boolean isArray = isArrayClass(c);
+    boolean isModifiable = isModifiableClass(c);
+    System.out.println(c.getName() + " interface=" + isInterface + " array=" + isArray +
+        " modifiable=" + isModifiable);
+  }
+
+  private static void testClassFields(Class<?> c) throws Exception {
+    System.out.println(Arrays.toString(getClassFields(c)));
+  }
+
+  private static void testClassMethods(Class<?> c) throws Exception {
+    System.out.println(Arrays.toString(getClassMethods(c)));
+  }
+
+  private static void testClassStatus(Class<?> c) {
+    System.out.println(c + " " + Integer.toBinaryString(getClassStatus(c)));
+  }
+
+  private static void testInterfaces(Class<?> c) {
+    System.out.println(c + " " + Arrays.toString(getImplementedInterfaces(c)));
+  }
+
+  private static boolean IsBootClassLoader(ClassLoader l) {
+    // Hacky check for Android's fake boot classloader.
+    return l.getClass().getName().equals("java.lang.BootClassLoader");
+  }
+
+  private static void testClassLoader(Class<?> c) {
+    Object cl = getClassLoader(c);
+    System.out.println(c + " " + (cl != null ? cl.getClass().getName() : "null"));
+    if (cl == null) {
+      if (c.getClassLoader() != null && !IsBootClassLoader(c.getClassLoader())) {
+        throw new RuntimeException("Expected " + c.getClassLoader() + ", but got null.");
+      }
+    } else {
+      if (!(cl instanceof ClassLoader)) {
+        throw new RuntimeException("Unexpected \"classloader\": " + cl + " (" + cl.getClass() +
+            ")");
+      }
+      if (cl != c.getClassLoader()) {
+        throw new RuntimeException("Unexpected classloader: " + c.getClassLoader() + " vs " + cl);
+      }
+    }
+  }
+
+  private static void testClassLoaderClasses() throws Exception {
+    System.out.println();
+    System.out.println("boot <- (B) <- (A,C)");
+    ClassLoader cl1 = DexData.create2(DexData.create1());
+    Class.forName("B", false, cl1);
+    Class.forName("A", false, cl1);
+    printClassLoaderClasses(cl1);
+
+    System.out.println();
+    System.out.println("boot <- (B) <- (A, List)");
+    ClassLoader cl2 = DexData.create2(DexData.create1());
+    Class.forName("A", false, cl2);
+    Class.forName("java.util.List", false, cl2);
+    Class.forName("B", false, cl2.getParent());
+    printClassLoaderClasses(cl2);
+
+    System.out.println();
+    System.out.println("boot <- 1+2 (A,B)");
+    ClassLoader cl3 = DexData.create12();
+    Class.forName("B", false, cl3);
+    Class.forName("A", false, cl3);
+    printClassLoaderClasses(cl3);
+
+    // Check that the boot classloader dumps something non-empty.
+    ClassLoader boot = ClassLoader.getSystemClassLoader().getParent();
+    while (boot.getParent() != null) {
+      boot = boot.getParent();
+    }
+
+    Class<?>[] bootClasses = getClassLoaderClasses(boot);
+    if (bootClasses.length == 0) {
+      throw new RuntimeException("No classes initiated by boot classloader.");
+    }
+    // Check that at least java.util.List is loaded.
+    boolean foundList = false;
+    for (Class<?> c : bootClasses) {
+      if (c == java.util.List.class) {
+        foundList = true;
+        break;
+      }
+    }
+    if (!foundList) {
+      System.out.println(Arrays.toString(bootClasses));
+      throw new RuntimeException("Could not find class java.util.List.");
+    }
+  }
+
+  private static void testClassVersion() {
+    System.out.println(Arrays.toString(getClassVersion(Main.class)));
+  }
+
+  private static void testClassEvents() throws Exception {
+    ClassLoader cl = Main.class.getClassLoader();
+    while (cl.getParent() != null) {
+      cl = cl.getParent();
+    }
+    final ClassLoader boot = cl;
+
+    // The JIT may deeply inline and load some classes. Preload these for test determinism.
+    final String PRELOAD_FOR_JIT[] = {
+        "java.nio.charset.CoderMalfunctionError",
+        "java.util.NoSuchElementException"
+    };
+    for (String s : PRELOAD_FOR_JIT) {
+      Class.forName(s);
+    }
+
+    Runnable r = new Runnable() {
+      @Override
+      public void run() {
+        try {
+          ClassLoader cl6 = DexData.create12();
+          System.out.println("C, true");
+          Class.forName("C", true, cl6);
+          printClassLoadMessages();
+        } catch (Exception e) {
+          throw new RuntimeException(e);
+        }
+      }
+    };
+
+    Thread dummyThread = new Thread();
+    dummyThread.start();
+    dummyThread.join();
+
+    enableClassLoadPreparePrintEvents(true, Thread.currentThread());
+
+    ClassLoader cl1 = DexData.create12();
+    System.out.println("B, false");
+    Class.forName("B", false, cl1);
+    printClassLoadMessages();
+
+    ClassLoader cl2 = DexData.create12();
+    System.out.println("B, true");
+    Class.forName("B", true, cl2);
+    printClassLoadMessages();
+
+    ClassLoader cl3 = DexData.create12();
+    System.out.println("C, false");
+    Class.forName("C", false, cl3);
+    printClassLoadMessages();
+    System.out.println("A, false");
+    Class.forName("A", false, cl3);
+    printClassLoadMessages();
+
+    ClassLoader cl4 = DexData.create12();
+    System.out.println("C, true");
+    Class.forName("C", true, cl4);
+    printClassLoadMessages();
+    System.out.println("A, true");
+    Class.forName("A", true, cl4);
+    printClassLoadMessages();
+
+    ClassLoader cl5 = DexData.create12();
+    System.out.println("A, true");
+    Class.forName("A", true, cl5);
+    printClassLoadMessages();
+    System.out.println("C, true");
+    Class.forName("C", true, cl5);
+    printClassLoadMessages();
+
+    enableClassLoadPreparePrintEvents(false, null);
+
+    Thread t = new Thread(r, "TestRunner");
+    enableClassLoadPreparePrintEvents(true, t);
+    t.start();
+    t.join();
+    enableClassLoadPreparePrintEvents(false, null);
+
+    enableClassLoadPreparePrintEvents(true, Thread.currentThread());
+
+    // Check creation of arrays and proxies.
+    Proxy.getProxyClass(Main.class.getClassLoader(), new Class[] { Comparable.class, I0.class });
+    Class.forName("[Lart.Test912;");
+    printClassLoadMessages();
+
+    enableClassLoadPreparePrintEvents(false, null);
+
+    testClassLoadPrepareEquality();
+  }
+
+  private static void testClassLoadPrepareEquality() throws Exception {
+    setEqualityEventStorageClass(ClassF.class);
+
+    enableClassLoadPrepareEqualityEvents(true);
+
+    Class.forName("art.Test912$ClassE");
+
+    enableClassLoadPrepareEqualityEvents(false);
+  }
+
+  private static void printClassLoaderClasses(ClassLoader cl) {
+    for (;;) {
+      if (cl == null || !cl.getClass().getName().startsWith("dalvik.system")) {
+        break;
+      }
+
+      Class<?> classes[] = getClassLoaderClasses(cl);
+      Arrays.sort(classes, new ClassNameComparator());
+      System.out.println(Arrays.toString(classes));
+
+      cl = cl.getParent();
+    }
+  }
+
+  private static void printClassLoadMessages() {
+    for (String s : getClassLoadMessages()) {
+      System.out.println(s);
+    }
+  }
+
+  private static native boolean isModifiableClass(Class<?> c);
+  private static native String[] getClassSignature(Class<?> c);
+
+  private static native boolean isInterface(Class<?> c);
+  private static native boolean isArrayClass(Class<?> c);
+
+  private static native int getClassModifiers(Class<?> c);
+
+  private static native Object[] getClassFields(Class<?> c);
+  private static native Object[] getClassMethods(Class<?> c);
+  private static native Class<?>[] getImplementedInterfaces(Class<?> c);
+
+  private static native int getClassStatus(Class<?> c);
+
+  private static native Object getClassLoader(Class<?> c);
+
+  private static native Class<?>[] getClassLoaderClasses(ClassLoader cl);
+
+  private static native int[] getClassVersion(Class<?> c);
+
+  private static native void enableClassLoadPreparePrintEvents(boolean b, Thread filter);
+  private static native String[] getClassLoadMessages();
+
+  private static native void setEqualityEventStorageClass(Class<?> c);
+  private static native void enableClassLoadPrepareEqualityEvents(boolean b);
+
+  private static class TestForNonInit {
+    public static double dummy = Math.random();  // So it can't be compile-time initialized.
+  }
+
+  private static class TestForInitFail {
+    public static int dummy = ((int)Math.random())/0;  // So it throws when initializing.
+  }
+
+  public static interface InfA {
+  }
+  public static interface InfB extends InfA {
+  }
+  public static interface InfC extends InfB {
+  }
+
+  public abstract static class ClassA implements InfA {
+  }
+  public abstract static class ClassB extends ClassA implements InfB {
+  }
+  public abstract static class ClassC implements InfA, InfC {
+  }
+
+  public static class ClassE {
+    public void foo() {
+    }
+    public void bar() {
+    }
+  }
+
+  public static class ClassF {
+    public static Object STATIC = null;
+    public static Reference<Object> WEAK = null;
+  }
+
+  private static class ClassNameComparator implements Comparator<Class<?>> {
+    public int compare(Class<?> c1, Class<?> c2) {
+      return c1.getName().compareTo(c2.getName());
+    }
+  }
+
+  // See run-test 910 for an explanation.
+
+  private static Class<?> proxyClass = null;
+
+  private static Class<?> getProxyClass() throws Exception {
+    if (proxyClass != null) {
+      return proxyClass;
+    }
+
+    for (int i = 1; i <= 21; i++) {
+      proxyClass = createProxyClass(i);
+      String name = proxyClass.getName();
+      if (name.equals("$Proxy20")) {
+        return proxyClass;
+      }
+    }
+    return proxyClass;
+  }
+
+  private static Class<?> createProxyClass(int i) throws Exception {
+    int count = Integer.bitCount(i);
+    Class<?>[] input = new Class<?>[count + 1];
+    input[0] = Runnable.class;
+    int inputIndex = 1;
+    int bitIndex = 0;
+    while (i != 0) {
+        if ((i & 1) != 0) {
+            input[inputIndex++] = Class.forName("art.Test912$I" + bitIndex);
+        }
+        i >>>= 1;
+        bitIndex++;
+    }
+    return Proxy.getProxyClass(Test912.class.getClassLoader(), input);
+  }
+
+  // Need this for the proxy naming.
+  public static interface I0 {
+  }
+  public static interface I1 {
+  }
+  public static interface I2 {
+  }
+  public static interface I3 {
+  }
+  public static interface I4 {
+  }
+}
diff --git a/test/912-classes/src/art/Test912Art.java b/test/912-classes/src/art/Test912Art.java
new file mode 100644
index 0000000..e438473
--- /dev/null
+++ b/test/912-classes/src/art/Test912Art.java
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package art;
+
+import java.lang.ref.Reference;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Proxy;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Comparator;
+
+public class Test912Art {
+  public static void run() throws Exception {
+    art.Main.bindAgentJNIForClass(Test912Art.class);
+    doTest();
+  }
+
+  public static void doTest() throws Exception {
+    testClassEvents();
+  }
+
+  private static void testClassEvents() throws Exception {
+    // Note: the JIT part of this test is about the JIT pulling in a class not yet touched by
+    //       anything else in the system. This could be the verifier or the interpreter. We
+    //       block the interpreter by calling ensureJitCompiled. The verifier, however, must
+    //       run in configurations where dex2oat didn't verify the class itself. So explicitly
+    //       check whether the class has been already loaded, and skip then.
+    // TODO: Add multiple configurations to the run script once that becomes easier to do.
+    if (hasJit() && !isLoadedClass("art.Test912Art$ClassD")) {
+      testClassEventsJit();
+    }
+  }
+
+  private static void testClassEventsJit() throws Exception {
+    enableClassLoadSeenEvents(true);
+
+    testClassEventsJitImpl();
+
+    enableClassLoadSeenEvents(false);
+
+    if (!hadLoadEvent()) {
+      throw new RuntimeException("Did not get expected load event.");
+    }
+  }
+
+  private static void testClassEventsJitImpl() throws Exception {
+    ensureJitCompiled(Test912Art.class, "testClassEventsJitImpl");
+
+    if (ClassD.x != 1) {
+      throw new RuntimeException("Unexpected value");
+    }
+  }
+
+  private static native void ensureJitCompiled(Class<?> c, String name);
+
+  private static native boolean hasJit();
+  private static native boolean isLoadedClass(String name);
+  private static native void enableClassLoadSeenEvents(boolean b);
+  private static native boolean hadLoadEvent();
+
+  public static class ClassD {
+    static int x = 1;
+  }
+}
diff --git a/test/913-heaps/heaps.cc b/test/913-heaps/heaps.cc
index 19e12ae..e319f7d 100644
--- a/test/913-heaps/heaps.cc
+++ b/test/913-heaps/heaps.cc
@@ -137,9 +137,9 @@
       if (reference_kind == JVMTI_HEAP_REFERENCE_JNI_GLOBAL && class_tag == 0) {
         return 0;
       }
-      // Ignore classes (1000-1002@0) for thread objects. These can be held by the JIT.
+      // Ignore classes (1000 <= tag < 3000) for thread objects. These can be held by the JIT.
       if (reference_kind == JVMTI_HEAP_REFERENCE_THREAD && class_tag == 0 &&
-              (1000 <= *tag_ptr &&  *tag_ptr <= 1002)) {
+              (1000 <= *tag_ptr &&  *tag_ptr < 3000)) {
         return 0;
       }
       // Ignore stack-locals of untagged threads. That is the environment.
diff --git a/test/924-threads/expected.txt b/test/924-threads/expected.txt
index 4c0f4ea..1eb2e1b 100644
--- a/test/924-threads/expected.txt
+++ b/test/924-threads/expected.txt
@@ -1,10 +1,10 @@
 currentThread OK
-main
+TestThread
 5
 false
 java.lang.ThreadGroup[name=main,maxpri=10]
 class dalvik.system.PathClassLoader
-main
+TestThread
 5
 false
 java.lang.ThreadGroup[name=main,maxpri=10]
@@ -33,10 +33,11 @@
 e1 = ALIVE|WAITING_WITH_TIMEOUT|SLEEPING|WAITING
 5 = ALIVE|RUNNABLE
 2 = TERMINATED
-[Thread[FinalizerDaemon,5,system], Thread[FinalizerWatchdogDaemon,5,system], Thread[HeapTaskDaemon,5,system], Thread[ReferenceQueueDaemon,5,system], Thread[Signal Catcher,5,system], Thread[main,5,main]]
+[Thread[FinalizerDaemon,5,system], Thread[FinalizerWatchdogDaemon,5,system], Thread[HeapTaskDaemon,5,system], Thread[ReferenceQueueDaemon,5,system], Thread[TestThread,5,main], Thread[main,5,main]]
 JVMTI_ERROR_THREAD_NOT_ALIVE
 JVMTI_ERROR_THREAD_NOT_ALIVE
 Constructed thread
-Thread(EventTestThread): start
-Thread(EventTestThread): end
+[]
+[Thread(EventTestThread): start]
+[Thread(EventTestThread): end]
 Thread joined
diff --git a/test/924-threads/src/art/Test924.java b/test/924-threads/src/art/Test924.java
index 160bf8e..5445939 100644
--- a/test/924-threads/src/art/Test924.java
+++ b/test/924-threads/src/art/Test924.java
@@ -25,17 +25,35 @@
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 
 public class Test924 {
   public static void run() throws Exception {
     Main.bindAgentJNIForClass(Test924.class);
-    doTest();
+
+    // Run the test on its own thread, so we have a known state for the "current" thread.
+    Thread t = new Thread("TestThread") {
+      @Override
+      public void run() {
+        try {
+          doTest();
+        } catch (Exception e) {
+          throw new RuntimeException(e);
+        }
+      }
+    };
+    t.start();
+    t.join();
   }
 
   private static void doTest() throws Exception {
     Thread t1 = Thread.currentThread();
     Thread t2 = getCurrentThread();
 
+    // Need to adjust priority, as on-device this may be unexpected (and we prefer not
+    // to special-case this.)
+    t1.setPriority(5);
+
     if (t1 != t2) {
       throw new RuntimeException("Expected " + t1 + " but got " + t2);
     }
@@ -188,7 +206,32 @@
     }
 
     Collections.sort(threadList, THREAD_COMP);
-    System.out.println(threadList);
+
+    List<Thread> expectedList = new ArrayList<>();
+    Set<Thread> threadsFromTraces = Thread.getAllStackTraces().keySet();
+
+    expectedList.add(findThreadByName(threadsFromTraces, "FinalizerDaemon"));
+    expectedList.add(findThreadByName(threadsFromTraces, "FinalizerWatchdogDaemon"));
+    expectedList.add(findThreadByName(threadsFromTraces, "HeapTaskDaemon"));
+    expectedList.add(findThreadByName(threadsFromTraces, "ReferenceQueueDaemon"));
+    // We can't get the signal catcher through getAllStackTraces. So ignore it.
+    // expectedList.add(findThreadByName(threadsFromTraces, "Signal Catcher"));
+    expectedList.add(findThreadByName(threadsFromTraces, "TestThread"));
+    expectedList.add(findThreadByName(threadsFromTraces, "main"));
+
+    if (!threadList.containsAll(expectedList)) {
+      throw new RuntimeException("Expected " + expectedList + " as subset, got " + threadList);
+    }
+    System.out.println(expectedList);
+  }
+
+  private static Thread findThreadByName(Set<Thread> threads, String name) {
+    for (Thread t : threads) {
+        if (t.getName().equals(name)) {
+            return t;
+        }
+    }
+    throw new RuntimeException("Did not find thread " + name + ": " + threads);
   }
 
   private static void doTLSTests() throws Exception {
@@ -256,13 +299,35 @@
   private static void doTestEvents() throws Exception {
     enableThreadEvents(true);
 
-    Thread t = new Thread("EventTestThread");
+    final CountDownLatch cdl1 = new CountDownLatch(1);
+    final CountDownLatch cdl2 = new CountDownLatch(1);
+
+    Runnable r = new Runnable() {
+      @Override
+      public void run() {
+        try {
+          cdl1.countDown();
+          cdl2.await();
+        } catch (Exception e) {
+          throw new RuntimeException(e);
+        }
+      }
+    };
+    Thread t = new Thread(r, "EventTestThread");
 
     System.out.println("Constructed thread");
     Thread.yield();
+    Thread.sleep(100);
+    System.out.println(Arrays.toString(getThreadEventMessages()));
 
     t.start();
+    cdl1.await();
+
+    System.out.println(Arrays.toString(getThreadEventMessages()));
+
+    cdl2.countDown();
     t.join();
+    System.out.println(Arrays.toString(getThreadEventMessages()));
 
     System.out.println("Thread joined");
 
@@ -337,4 +402,5 @@
   private static native void setTLS(Thread t, long l);
   private static native long getTLS(Thread t);
   private static native void enableThreadEvents(boolean b);
+  private static native String[] getThreadEventMessages();
 }
diff --git a/test/924-threads/threads.cc b/test/924-threads/threads.cc
index 701ab1d..e21dcc2 100644
--- a/test/924-threads/threads.cc
+++ b/test/924-threads/threads.cc
@@ -16,6 +16,10 @@
 
 #include <stdio.h>
 
+#include <mutex>
+#include <string>
+#include <vector>
+
 #include "android-base/logging.h"
 #include "android-base/stringprintf.h"
 #include "jni.h"
@@ -139,17 +143,27 @@
   JvmtiErrorToException(env, jvmti_env, result);
 }
 
+static std::mutex gEventsMutex;
+static std::vector<std::string> gEvents;
+
 static void JNICALL ThreadEvent(jvmtiEnv* jvmti_env,
                                 JNIEnv* jni_env,
                                 jthread thread,
                                 bool is_start) {
   jvmtiThreadInfo info;
-  jvmtiError result = jvmti_env->GetThreadInfo(thread, &info);
-  if (result != JVMTI_ERROR_NONE) {
-    printf("Error getting thread info");
-    return;
+  {
+    std::lock_guard<std::mutex> guard(gEventsMutex);
+
+    jvmtiError result = jvmti_env->GetThreadInfo(thread, &info);
+    if (result != JVMTI_ERROR_NONE) {
+      gEvents.push_back("Error getting thread info");
+      return;
+    }
+
+    gEvents.push_back(android::base::StringPrintf("Thread(%s): %s",
+                                                  info.name,
+                                                  is_start ? "start" : "end"));
   }
-  printf("Thread(%s): %s\n", info.name, is_start ? "start" : "end");
 
   jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(info.name));
   jni_env->DeleteLocalRef(info.thread_group);
@@ -205,5 +219,18 @@
   JvmtiErrorToException(env, jvmti_env, ret);
 }
 
+extern "C" JNIEXPORT jobjectArray JNICALL Java_art_Test924_getThreadEventMessages(
+    JNIEnv* env, jclass Main_klass ATTRIBUTE_UNUSED) {
+  std::lock_guard<std::mutex> guard(gEventsMutex);
+  jobjectArray ret = CreateObjectArray(env,
+                                       static_cast<jint>(gEvents.size()),
+                                       "java/lang/String",
+                                       [&](jint i) {
+    return env->NewStringUTF(gEvents[i].c_str());
+  });
+  gEvents.clear();
+  return ret;
+}
+
 }  // namespace Test924Threads
 }  // namespace art
diff --git a/test/959-invoke-polymorphic-accessors/src/Main.java b/test/959-invoke-polymorphic-accessors/src/Main.java
index b7ecf8e..59db807 100644
--- a/test/959-invoke-polymorphic-accessors/src/Main.java
+++ b/test/959-invoke-polymorphic-accessors/src/Main.java
@@ -794,6 +794,7 @@
             ValueHolder valueHolder = new ValueHolder();
             MethodHandles.Lookup lookup = MethodHandles.lookup();
             MethodHandle h0 = lookup.findSetter(ValueHolder.class, "m_f", float.class);
+            MethodHandle s0 = lookup.findSetter(ValueHolder.class, "m_s", short.class);
             h0.invoke(valueHolder, 0.22f);
             h0.invoke(valueHolder, new Float(1.11f));
             Number floatNumber = getFloatAsNumber();
@@ -807,6 +808,11 @@
               unreachable();
             } catch (NullPointerException e) {}
 
+            // Test that type conversion checks work on small field types.
+            short temp = (short)s0.invoke(valueHolder, new Byte((byte)45));
+            assertTrue(temp == 0);
+            assertTrue(valueHolder.m_s == 45);
+
             h0.invoke(valueHolder, (byte)1);
             h0.invoke(valueHolder, (short)2);
             h0.invoke(valueHolder, 3);
@@ -848,6 +854,7 @@
 
         private static void testStaticSetter() throws Throwable {
             MethodHandles.Lookup lookup = MethodHandles.lookup();
+            MethodHandle s0 = lookup.findStaticSetter(ValueHolder.class, "s_s", short.class);
             MethodHandle h0 = lookup.findStaticSetter(ValueHolder.class, "s_f", float.class);
             h0.invoke(0.22f);
             h0.invoke(new Float(1.11f));
@@ -860,6 +867,11 @@
               unreachable();
             } catch (NullPointerException e) {}
 
+            // Test that type conversion checks work on small field types.
+            short temp = (short)s0.invoke(new Byte((byte)45));
+            assertTrue(temp == 0);
+            assertTrue(ValueHolder.s_s == 45);
+
             h0.invoke((byte)1);
             h0.invoke((short)2);
             h0.invoke(3);
diff --git a/test/985-re-obsolete/expected.txt b/test/985-re-obsolete/expected.txt
new file mode 100644
index 0000000..5159a00
--- /dev/null
+++ b/test/985-re-obsolete/expected.txt
@@ -0,0 +1,35 @@
+Pre Start private method call
+hello - private
+Post Start private method call
+Not doing anything here
+Pre Finish private method call
+goodbye - private
+Post Finish private method call
+Pre Start private method call
+hello - private
+Post Start private method call
+transforming calling function
+Pre Finish private method call
+Goodbye - private - Transformed
+Post Finish private method call
+Pre Start private method call - Transformed
+Hello - private - Transformed
+Post Start private method call - Transformed
+Not doing anything here
+Pre Finish private method call - Transformed
+Goodbye - private - Transformed
+Post Finish private method call - Transformed
+Pre Start private method call - Transformed
+Hello - private - Transformed
+Post Start private method call - Transformed
+transforming calling function
+Pre Finish private method call - Transformed
+second - Goodbye - private - Transformed
+Post Finish private method call - Transformed
+second - Pre Start private method call - Transformed
+second - Hello - private - Transformed
+second - Post Start private method call - Transformed
+Not doing anything here
+second - Pre Finish private method call - Transformed
+second - Goodbye - private - Transformed
+second - Post Finish private method call - Transformed
diff --git a/test/985-re-obsolete/info.txt b/test/985-re-obsolete/info.txt
new file mode 100644
index 0000000..c8eafdc
--- /dev/null
+++ b/test/985-re-obsolete/info.txt
@@ -0,0 +1,4 @@
+Tests basic obsolete method support
+
+Regression test for b/37475600 which was caused by incorrectly checking for
+differences in the obsolete methods map.
diff --git a/test/985-re-obsolete/run b/test/985-re-obsolete/run
new file mode 100755
index 0000000..e92b873
--- /dev/null
+++ b/test/985-re-obsolete/run
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Copyright 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+./default-run "$@" --jvmti
diff --git a/test/912-classes/src/B.java b/test/985-re-obsolete/src/Main.java
similarity index 84%
rename from test/912-classes/src/B.java
rename to test/985-re-obsolete/src/Main.java
index 52ce4dd..d78d591 100644
--- a/test/912-classes/src/B.java
+++ b/test/985-re-obsolete/src/Main.java
@@ -14,5 +14,8 @@
  * limitations under the License.
  */
 
-public class B {
+public class Main {
+  public static void main(String[] args) throws Exception {
+    art.Test985.run();
+  }
 }
diff --git a/test/985-re-obsolete/src/art/Main.java b/test/985-re-obsolete/src/art/Main.java
new file mode 100644
index 0000000..8b01920
--- /dev/null
+++ b/test/985-re-obsolete/src/art/Main.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package art;
+
+// Binder class so the agent's C code has something that can be bound and exposed to tests.
+// In a package to separate cleanly and work around CTS reference issues (though this class
+// should be replaced in the CTS version).
+public class Main {
+  // Load the given class with the given classloader, and bind all native methods to corresponding
+  // C methods in the agent. Will abort if any of the steps fail.
+  public static native void bindAgentJNI(String className, ClassLoader classLoader);
+  // Same as above, giving the class directly.
+  public static native void bindAgentJNIForClass(Class<?> klass);
+}
diff --git a/test/985-re-obsolete/src/art/Redefinition.java b/test/985-re-obsolete/src/art/Redefinition.java
new file mode 100644
index 0000000..0350ab4
--- /dev/null
+++ b/test/985-re-obsolete/src/art/Redefinition.java
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package art;
+
+import java.util.ArrayList;
+// Common Redefinition functions. Placed here for use by CTS
+public class Redefinition {
+  // Bind native functions.
+  static {
+    Main.bindAgentJNIForClass(Redefinition.class);
+  }
+
+  public static final class CommonClassDefinition {
+    public final Class<?> target;
+    public final byte[] class_file_bytes;
+    public final byte[] dex_file_bytes;
+
+    public CommonClassDefinition(Class<?> target, byte[] class_file_bytes, byte[] dex_file_bytes) {
+      this.target = target;
+      this.class_file_bytes = class_file_bytes;
+      this.dex_file_bytes = dex_file_bytes;
+    }
+  }
+
+  // A set of possible test configurations. Test should set this if they need to.
+  // This must be kept in sync with the defines in ti-agent/common_helper.cc
+  public static enum Config {
+    COMMON_REDEFINE(0),
+    COMMON_RETRANSFORM(1),
+    COMMON_TRANSFORM(2);
+
+    private final int val;
+    private Config(int val) {
+      this.val = val;
+    }
+  }
+
+  public static void setTestConfiguration(Config type) {
+    nativeSetTestConfiguration(type.val);
+  }
+
+  private static native void nativeSetTestConfiguration(int type);
+
+  // Transforms the class
+  public static native void doCommonClassRedefinition(Class<?> target,
+                                                      byte[] classfile,
+                                                      byte[] dexfile);
+
+  public static void doMultiClassRedefinition(CommonClassDefinition... defs) {
+    ArrayList<Class<?>> classes = new ArrayList<>();
+    ArrayList<byte[]> class_files = new ArrayList<>();
+    ArrayList<byte[]> dex_files = new ArrayList<>();
+
+    for (CommonClassDefinition d : defs) {
+      classes.add(d.target);
+      class_files.add(d.class_file_bytes);
+      dex_files.add(d.dex_file_bytes);
+    }
+    doCommonMultiClassRedefinition(classes.toArray(new Class<?>[0]),
+                                   class_files.toArray(new byte[0][]),
+                                   dex_files.toArray(new byte[0][]));
+  }
+
+  public static void addMultiTransformationResults(CommonClassDefinition... defs) {
+    for (CommonClassDefinition d : defs) {
+      addCommonTransformationResult(d.target.getCanonicalName(),
+                                    d.class_file_bytes,
+                                    d.dex_file_bytes);
+    }
+  }
+
+  public static native void doCommonMultiClassRedefinition(Class<?>[] targets,
+                                                           byte[][] classfiles,
+                                                           byte[][] dexfiles);
+  public static native void doCommonClassRetransformation(Class<?>... target);
+  public static native void setPopRetransformations(boolean pop);
+  public static native void popTransformationFor(String name);
+  public static native void enableCommonRetransformation(boolean enable);
+  public static native void addCommonTransformationResult(String target_name,
+                                                          byte[] class_bytes,
+                                                          byte[] dex_bytes);
+}
diff --git a/test/985-re-obsolete/src/art/Test985.java b/test/985-re-obsolete/src/art/Test985.java
new file mode 100644
index 0000000..405abd5
--- /dev/null
+++ b/test/985-re-obsolete/src/art/Test985.java
@@ -0,0 +1,197 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package art;
+
+import java.util.Base64;
+
+public class Test985 {
+
+  static class Transform {
+    private void Start() {
+      System.out.println("hello - private");
+    }
+
+    private void Finish() {
+      System.out.println("goodbye - private");
+    }
+
+    public void sayHi(Runnable r) {
+      System.out.println("Pre Start private method call");
+      Start();
+      System.out.println("Post Start private method call");
+      r.run();
+      System.out.println("Pre Finish private method call");
+      Finish();
+      System.out.println("Post Finish private method call");
+    }
+  }
+
+  // static class Transform {
+  //   private void Start() {
+  //     System.out.println("Hello - private - Transformed");
+  //   }
+  //
+  //   private void Finish() {
+  //     System.out.println("Goodbye - private - Transformed");
+  //   }
+  //
+  //   public void sayHi(Runnable r) {
+  //     System.out.println("Pre Start private method call - Transformed");
+  //     Start();
+  //     System.out.println("Post Start private method call - Transformed");
+  //     r.run();
+  //     System.out.println("Pre Finish private method call - Transformed");
+  //     Finish();
+  //     System.out.println("Post Finish private method call - Transformed");
+  //   }
+  // }
+  private static final byte[] CLASS_BYTES_1 = Base64.getDecoder().decode(
+    "yv66vgAAADQANgoADgAZCQAaABsIABwKAB0AHggAHwgAIAoADQAhCAAiCwAjACQIACUKAA0AJggA" +
+    "JwcAKQcALAEABjxpbml0PgEAAygpVgEABENvZGUBAA9MaW5lTnVtYmVyVGFibGUBAAVTdGFydAEA" +
+    "BkZpbmlzaAEABXNheUhpAQAXKExqYXZhL2xhbmcvUnVubmFibGU7KVYBAApTb3VyY2VGaWxlAQAM" +
+    "VGVzdDk4NS5qYXZhDAAPABAHAC0MAC4ALwEAHUhlbGxvIC0gcHJpdmF0ZSAtIFRyYW5zZm9ybWVk" +
+    "BwAwDAAxADIBAB9Hb29kYnllIC0gcHJpdmF0ZSAtIFRyYW5zZm9ybWVkAQArUHJlIFN0YXJ0IHBy" +
+    "aXZhdGUgbWV0aG9kIGNhbGwgLSBUcmFuc2Zvcm1lZAwAEwAQAQAsUG9zdCBTdGFydCBwcml2YXRl" +
+    "IG1ldGhvZCBjYWxsIC0gVHJhbnNmb3JtZWQHADMMADQAEAEALFByZSBGaW5pc2ggcHJpdmF0ZSBt" +
+    "ZXRob2QgY2FsbCAtIFRyYW5zZm9ybWVkDAAUABABAC1Qb3N0IEZpbmlzaCBwcml2YXRlIG1ldGhv" +
+    "ZCBjYWxsIC0gVHJhbnNmb3JtZWQHADUBABVhcnQvVGVzdDk4NSRUcmFuc2Zvcm0BAAlUcmFuc2Zv" +
+    "cm0BAAxJbm5lckNsYXNzZXMBABBqYXZhL2xhbmcvT2JqZWN0AQAQamF2YS9sYW5nL1N5c3RlbQEA" +
+    "A291dAEAFUxqYXZhL2lvL1ByaW50U3RyZWFtOwEAE2phdmEvaW8vUHJpbnRTdHJlYW0BAAdwcmlu" +
+    "dGxuAQAVKExqYXZhL2xhbmcvU3RyaW5nOylWAQASamF2YS9sYW5nL1J1bm5hYmxlAQADcnVuAQAL" +
+    "YXJ0L1Rlc3Q5ODUAIAANAA4AAAAAAAQAAAAPABAAAQARAAAAHQABAAEAAAAFKrcAAbEAAAABABIA" +
+    "AAAGAAEAAAAEAAIAEwAQAAEAEQAAACUAAgABAAAACbIAAhIDtgAEsQAAAAEAEgAAAAoAAgAAAAYA" +
+    "CAAHAAIAFAAQAAEAEQAAACUAAgABAAAACbIAAhIFtgAEsQAAAAEAEgAAAAoAAgAAAAkACAAKAAEA" +
+    "FQAWAAEAEQAAAGMAAgACAAAAL7IAAhIGtgAEKrcAB7IAAhIItgAEK7kACQEAsgACEgq2AAQqtwAL" +
+    "sgACEgy2AASxAAAAAQASAAAAIgAIAAAADAAIAA0ADAAOABQADwAaABAAIgARACYAEgAuABMAAgAX" +
+    "AAAAAgAYACsAAAAKAAEADQAoACoACA==");
+  private static final byte[] DEX_BYTES_1 = Base64.getDecoder().decode(
+    "ZGV4CjAzNQAh+CJbAAAAAAAAAAAAAAAAAAAAAAAAAADUBQAAcAAAAHhWNBIAAAAAAAAAABAFAAAd" +
+    "AAAAcAAAAAoAAADkAAAAAwAAAAwBAAABAAAAMAEAAAcAAAA4AQAAAQAAAHABAABEBAAAkAEAAJAB" +
+    "AACYAQAAoAEAAMEBAADgAQAA+QEAAAgCAAAsAgAATAIAAGMCAAB3AgAAjQIAAKECAAC1AgAA5AIA" +
+    "ABIDAABAAwAAbQMAAHQDAACCAwAAjQMAAJADAACUAwAAoQMAAKcDAACsAwAAtQMAALoDAADBAwAA" +
+    "BAAAAAUAAAAGAAAABwAAAAgAAAAJAAAACgAAAAsAAAAMAAAAFAAAABQAAAAJAAAAAAAAABUAAAAJ" +
+    "AAAA0AMAABUAAAAJAAAAyAMAAAgABAAYAAAAAAAAAAAAAAAAAAAAAQAAAAAAAAARAAAAAAABABsA" +
+    "AAAEAAIAGQAAAAUAAAAAAAAABgAAABoAAAAAAAAAAAAAAAUAAAAAAAAAEgAAAAAFAADMBAAAAAAA" +
+    "AAY8aW5pdD4ABkZpbmlzaAAfR29vZGJ5ZSAtIHByaXZhdGUgLSBUcmFuc2Zvcm1lZAAdSGVsbG8g" +
+    "LSBwcml2YXRlIC0gVHJhbnNmb3JtZWQAF0xhcnQvVGVzdDk4NSRUcmFuc2Zvcm07AA1MYXJ0L1Rl" +
+    "c3Q5ODU7ACJMZGFsdmlrL2Fubm90YXRpb24vRW5jbG9zaW5nQ2xhc3M7AB5MZGFsdmlrL2Fubm90" +
+    "YXRpb24vSW5uZXJDbGFzczsAFUxqYXZhL2lvL1ByaW50U3RyZWFtOwASTGphdmEvbGFuZy9PYmpl" +
+    "Y3Q7ABRMamF2YS9sYW5nL1J1bm5hYmxlOwASTGphdmEvbGFuZy9TdHJpbmc7ABJMamF2YS9sYW5n" +
+    "L1N5c3RlbTsALVBvc3QgRmluaXNoIHByaXZhdGUgbWV0aG9kIGNhbGwgLSBUcmFuc2Zvcm1lZAAs" +
+    "UG9zdCBTdGFydCBwcml2YXRlIG1ldGhvZCBjYWxsIC0gVHJhbnNmb3JtZWQALFByZSBGaW5pc2gg" +
+    "cHJpdmF0ZSBtZXRob2QgY2FsbCAtIFRyYW5zZm9ybWVkACtQcmUgU3RhcnQgcHJpdmF0ZSBtZXRo" +
+    "b2QgY2FsbCAtIFRyYW5zZm9ybWVkAAVTdGFydAAMVGVzdDk4NS5qYXZhAAlUcmFuc2Zvcm0AAVYA" +
+    "AlZMAAthY2Nlc3NGbGFncwAEbmFtZQADb3V0AAdwcmludGxuAANydW4ABXNheUhpAAV2YWx1ZQAB" +
+    "AAAABwAAAAEAAAAGAAAABAAHDgAJAAcOAQgPAAYABw4BCA8ADAEABw4BCA8BAw8BCA8BAw8BCA8B" +
+    "Aw8BCA8AAQABAAEAAADYAwAABAAAAHAQBQAAAA4AAwABAAIAAADdAwAACQAAAGIAAAAbAQIAAABu" +
+    "IAQAEAAOAAAAAwABAAIAAADlAwAACQAAAGIAAAAbAQMAAABuIAQAEAAOAAAABAACAAIAAADtAwAA" +
+    "KgAAAGIAAAAbARAAAABuIAQAEABwEAIAAgBiAAAAGwEOAAAAbiAEABAAchAGAAMAYgAAABsBDwAA" +
+    "AG4gBAAQAHAQAQACAGIAAAAbAQ0AAABuIAQAEAAOAAAAAwEAgIAEiAgBAqAIAQLECAMB6AgAAAIC" +
+    "ARwYAQIDAhYECBcXEwACAAAA5AQAAOoEAAD0BAAAAAAAAAAAAAAAAAAAEAAAAAAAAAABAAAAAAAA" +
+    "AAEAAAAdAAAAcAAAAAIAAAAKAAAA5AAAAAMAAAADAAAADAEAAAQAAAABAAAAMAEAAAUAAAAHAAAA" +
+    "OAEAAAYAAAABAAAAcAEAAAIgAAAdAAAAkAEAAAEQAAACAAAAyAMAAAMgAAAEAAAA2AMAAAEgAAAE" +
+    "AAAACAQAAAAgAAABAAAAzAQAAAQgAAACAAAA5AQAAAMQAAABAAAA9AQAAAYgAAABAAAAAAUAAAAQ" +
+    "AAABAAAAEAUAAA==");
+
+  // static class Transform {
+  //   private void Start() {
+  //     System.out.println("second - Hello - private - Transformed");
+  //   }
+  //
+  //   private void Finish() {
+  //     System.out.println("second - Goodbye - private - Transformed");
+  //   }
+  //
+  //   public void sayHi(Runnable r) {
+  //     System.out.println("second - Pre Start private method call - Transformed");
+  //     Start();
+  //     System.out.println("second - Post Start private method call - Transformed");
+  //     r.run();
+  //     System.out.println("second - Pre Finish private method call - Transformed");
+  //     Finish();
+  //     System.out.println("second - Post Finish private method call - Transformed");
+  //   }
+  // }
+  private static final byte[] CLASS_BYTES_2 = Base64.getDecoder().decode(
+    "yv66vgAAADQANgoADgAZCQAaABsIABwKAB0AHggAHwgAIAoADQAhCAAiCwAjACQIACUKAA0AJggA" +
+    "JwcAKQcALAEABjxpbml0PgEAAygpVgEABENvZGUBAA9MaW5lTnVtYmVyVGFibGUBAAVTdGFydAEA" +
+    "BkZpbmlzaAEABXNheUhpAQAXKExqYXZhL2xhbmcvUnVubmFibGU7KVYBAApTb3VyY2VGaWxlAQAM" +
+    "VGVzdDk4NS5qYXZhDAAPABAHAC0MAC4ALwEAJnNlY29uZCAtIEhlbGxvIC0gcHJpdmF0ZSAtIFRy" +
+    "YW5zZm9ybWVkBwAwDAAxADIBAChzZWNvbmQgLSBHb29kYnllIC0gcHJpdmF0ZSAtIFRyYW5zZm9y" +
+    "bWVkAQA0c2Vjb25kIC0gUHJlIFN0YXJ0IHByaXZhdGUgbWV0aG9kIGNhbGwgLSBUcmFuc2Zvcm1l" +
+    "ZAwAEwAQAQA1c2Vjb25kIC0gUG9zdCBTdGFydCBwcml2YXRlIG1ldGhvZCBjYWxsIC0gVHJhbnNm" +
+    "b3JtZWQHADMMADQAEAEANXNlY29uZCAtIFByZSBGaW5pc2ggcHJpdmF0ZSBtZXRob2QgY2FsbCAt" +
+    "IFRyYW5zZm9ybWVkDAAUABABADZzZWNvbmQgLSBQb3N0IEZpbmlzaCBwcml2YXRlIG1ldGhvZCBj" +
+    "YWxsIC0gVHJhbnNmb3JtZWQHADUBABVhcnQvVGVzdDk4NSRUcmFuc2Zvcm0BAAlUcmFuc2Zvcm0B" +
+    "AAxJbm5lckNsYXNzZXMBABBqYXZhL2xhbmcvT2JqZWN0AQAQamF2YS9sYW5nL1N5c3RlbQEAA291" +
+    "dAEAFUxqYXZhL2lvL1ByaW50U3RyZWFtOwEAE2phdmEvaW8vUHJpbnRTdHJlYW0BAAdwcmludGxu" +
+    "AQAVKExqYXZhL2xhbmcvU3RyaW5nOylWAQASamF2YS9sYW5nL1J1bm5hYmxlAQADcnVuAQALYXJ0" +
+    "L1Rlc3Q5ODUAIAANAA4AAAAAAAQAAAAPABAAAQARAAAAHQABAAEAAAAFKrcAAbEAAAABABIAAAAG" +
+    "AAEAAAAEAAIAEwAQAAEAEQAAACUAAgABAAAACbIAAhIDtgAEsQAAAAEAEgAAAAoAAgAAAAYACAAH" +
+    "AAIAFAAQAAEAEQAAACUAAgABAAAACbIAAhIFtgAEsQAAAAEAEgAAAAoAAgAAAAkACAAKAAEAFQAW" +
+    "AAEAEQAAAGMAAgACAAAAL7IAAhIGtgAEKrcAB7IAAhIItgAEK7kACQEAsgACEgq2AAQqtwALsgAC" +
+    "Egy2AASxAAAAAQASAAAAIgAIAAAADAAIAA0ADAAOABQADwAaABAAIgARACYAEgAuABMAAgAXAAAA" +
+    "AgAYACsAAAAKAAEADQAoACoACA==");
+  private static final byte[] DEX_BYTES_2 = Base64.getDecoder().decode(
+    "ZGV4CjAzNQBw/x+UAAAAAAAAAAAAAAAAAAAAAAAAAAAMBgAAcAAAAHhWNBIAAAAAAAAAAEgFAAAd" +
+    "AAAAcAAAAAoAAADkAAAAAwAAAAwBAAABAAAAMAEAAAcAAAA4AQAAAQAAAHABAAB8BAAAkAEAAJAB" +
+    "AACYAQAAoAEAALkBAADIAQAA7AEAAAwCAAAjAgAANwIAAE0CAABhAgAAdQIAAHwCAACKAgAAlQIA" +
+    "AJgCAACcAgAAqQIAAK8CAAC0AgAAvQIAAMICAADJAgAA8wIAABsDAABTAwAAigMAAMEDAAD3AwAA" +
+    "AgAAAAMAAAAEAAAABQAAAAYAAAAHAAAACAAAAAkAAAAKAAAADgAAAA4AAAAJAAAAAAAAAA8AAAAJ" +
+    "AAAACAQAAA8AAAAJAAAAAAQAAAgABAASAAAAAAAAAAAAAAAAAAAAAQAAAAAAAAALAAAAAAABABUA" +
+    "AAAEAAIAEwAAAAUAAAAAAAAABgAAABQAAAAAAAAAAAAAAAUAAAAAAAAADAAAADgFAAAEBQAAAAAA" +
+    "AAY8aW5pdD4ABkZpbmlzaAAXTGFydC9UZXN0OTg1JFRyYW5zZm9ybTsADUxhcnQvVGVzdDk4NTsA" +
+    "IkxkYWx2aWsvYW5ub3RhdGlvbi9FbmNsb3NpbmdDbGFzczsAHkxkYWx2aWsvYW5ub3RhdGlvbi9J" +
+    "bm5lckNsYXNzOwAVTGphdmEvaW8vUHJpbnRTdHJlYW07ABJMamF2YS9sYW5nL09iamVjdDsAFExq" +
+    "YXZhL2xhbmcvUnVubmFibGU7ABJMamF2YS9sYW5nL1N0cmluZzsAEkxqYXZhL2xhbmcvU3lzdGVt" +
+    "OwAFU3RhcnQADFRlc3Q5ODUuamF2YQAJVHJhbnNmb3JtAAFWAAJWTAALYWNjZXNzRmxhZ3MABG5h" +
+    "bWUAA291dAAHcHJpbnRsbgADcnVuAAVzYXlIaQAoc2Vjb25kIC0gR29vZGJ5ZSAtIHByaXZhdGUg" +
+    "LSBUcmFuc2Zvcm1lZAAmc2Vjb25kIC0gSGVsbG8gLSBwcml2YXRlIC0gVHJhbnNmb3JtZWQANnNl" +
+    "Y29uZCAtIFBvc3QgRmluaXNoIHByaXZhdGUgbWV0aG9kIGNhbGwgLSBUcmFuc2Zvcm1lZAA1c2Vj" +
+    "b25kIC0gUG9zdCBTdGFydCBwcml2YXRlIG1ldGhvZCBjYWxsIC0gVHJhbnNmb3JtZWQANXNlY29u" +
+    "ZCAtIFByZSBGaW5pc2ggcHJpdmF0ZSBtZXRob2QgY2FsbCAtIFRyYW5zZm9ybWVkADRzZWNvbmQg" +
+    "LSBQcmUgU3RhcnQgcHJpdmF0ZSBtZXRob2QgY2FsbCAtIFRyYW5zZm9ybWVkAAV2YWx1ZQAAAAEA" +
+    "AAAHAAAAAQAAAAYAAAAEAAcOAAkABw4BCA8ABgAHDgEIDwAMAQAHDgEIDwEDDwEIDwEDDwEIDwED" +
+    "DwEIDwABAAEAAQAAABAEAAAEAAAAcBAFAAAADgADAAEAAgAAABUEAAAJAAAAYgAAABsBFgAAAG4g" +
+    "BAAQAA4AAAADAAEAAgAAAB0EAAAJAAAAYgAAABsBFwAAAG4gBAAQAA4AAAAEAAIAAgAAACUEAAAq" +
+    "AAAAYgAAABsBGwAAAG4gBAAQAHAQAgACAGIAAAAbARkAAABuIAQAEAByEAYAAwBiAAAAGwEaAAAA" +
+    "biAEABAAcBABAAIAYgAAABsBGAAAAG4gBAAQAA4AAAADAQCAgATACAEC2AgBAvwIAwGgCQAAAgIB" +
+    "HBgBAgMCEAQIERcNAAIAAAAcBQAAIgUAACwFAAAAAAAAAAAAAAAAAAAQAAAAAAAAAAEAAAAAAAAA" +
+    "AQAAAB0AAABwAAAAAgAAAAoAAADkAAAAAwAAAAMAAAAMAQAABAAAAAEAAAAwAQAABQAAAAcAAAA4" +
+    "AQAABgAAAAEAAABwAQAAAiAAAB0AAACQAQAAARAAAAIAAAAABAAAAyAAAAQAAAAQBAAAASAAAAQA" +
+    "AABABAAAACAAAAEAAAAEBQAABCAAAAIAAAAcBQAAAxAAAAEAAAAsBQAABiAAAAEAAAA4BQAAABAA" +
+    "AAEAAABIBQAA");
+
+  public static void run() {
+    Redefinition.setTestConfiguration(Redefinition.Config.COMMON_REDEFINE);
+    doTest(new Transform());
+  }
+
+  public static void doTest(Transform t) {
+    t.sayHi(() -> { System.out.println("Not doing anything here"); });
+    t.sayHi(() -> {
+      System.out.println("transforming calling function");
+      Redefinition.doCommonClassRedefinition(Transform.class, CLASS_BYTES_1, DEX_BYTES_1);
+    });
+    t.sayHi(() -> { System.out.println("Not doing anything here"); });
+    t.sayHi(() -> {
+      System.out.println("transforming calling function");
+      Redefinition.doCommonClassRedefinition(Transform.class, CLASS_BYTES_2, DEX_BYTES_2);
+    });
+    t.sayHi(() -> { System.out.println("Not doing anything here"); });
+  }
+}
diff --git a/test/986-native-method-bind/expected.txt b/test/986-native-method-bind/expected.txt
new file mode 100644
index 0000000..3376e6f
--- /dev/null
+++ b/test/986-native-method-bind/expected.txt
@@ -0,0 +1,10 @@
+private static native void art.Test986$Transform.sayHi2() = Java_art_Test986_00024Transform_sayHi2 -> Java_art_Test986_00024Transform_sayHi2
+Hello - 2
+private static native void art.Test986$Transform.sayHi() = Java_art_Test986_00024Transform_sayHi__ -> NoReallySayGoodbye
+Bye
+private static native void art.Test986$Transform.sayHi() = Java_art_Test986_00024Transform_sayHi__ -> Java_art_Test986_00024Transform_sayHi2
+private static native void art.Test986$Transform.sayHi2() = Java_art_Test986_00024Transform_sayHi2 -> Java_art_Test986_00024Transform_sayHi2
+Hello - 2
+private static native void art.Test986$Transform.sayHi() = Java_art_Test986_00024Transform_sayHi__ -> Java_art_Test986_00024Transform_sayHi__
+private static native void art.Test986$Transform.sayHi2() = Java_art_Test986_00024Transform_sayHi2 -> Java_art_Test986_00024Transform_sayHi2
+Hello
diff --git a/test/986-native-method-bind/info.txt b/test/986-native-method-bind/info.txt
new file mode 100644
index 0000000..1939936
--- /dev/null
+++ b/test/986-native-method-bind/info.txt
@@ -0,0 +1 @@
+Tests native-method-bind callback and native method replacement.
diff --git a/test/986-native-method-bind/native_bind.cc b/test/986-native-method-bind/native_bind.cc
new file mode 100644
index 0000000..eec635b
--- /dev/null
+++ b/test/986-native-method-bind/native_bind.cc
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <inttypes.h>
+#include <memory>
+#include <stdio.h>
+#include <dlfcn.h>
+
+#include "android-base/stringprintf.h"
+#include "jni.h"
+#include "jvmti.h"
+
+// Test infrastructure
+#include "jni_binder.h"
+#include "jvmti_helper.h"
+#include "test_env.h"
+#include "scoped_local_ref.h"
+
+namespace art {
+namespace Test986NativeBind {
+
+static void doUpPrintCall(JNIEnv* env, const char* function) {
+  ScopedLocalRef<jclass> klass(env, env->FindClass("art/Test986"));
+  jmethodID targetMethod = env->GetStaticMethodID(klass.get(), function, "()V");
+  env->CallStaticVoidMethod(klass.get(), targetMethod);
+}
+
+extern "C" JNIEXPORT void JNICALL Java_art_Test986_00024Transform_sayHi__(
+    JNIEnv* env, jclass klass ATTRIBUTE_UNUSED) {
+  doUpPrintCall(env, "doSayHi");
+}
+
+extern "C" JNIEXPORT void JNICALL Java_art_Test986_00024Transform_sayHi2(
+    JNIEnv* env, jclass klass ATTRIBUTE_UNUSED) {
+  doUpPrintCall(env, "doSayHi2");
+}
+
+extern "C" JNIEXPORT void JNICALL NoReallySayGoodbye(JNIEnv* env, jclass klass ATTRIBUTE_UNUSED) {
+  doUpPrintCall(env, "doSayBye");
+}
+
+static void doJvmtiMethodBind(jvmtiEnv* jvmtienv ATTRIBUTE_UNUSED,
+                              JNIEnv* env,
+                              jthread thread ATTRIBUTE_UNUSED,
+                              jmethodID m,
+                              void* address,
+                              /*out*/void** out_address) {
+  ScopedLocalRef<jclass> method_class(env, env->FindClass("java/lang/reflect/Method"));
+  ScopedLocalRef<jobject> method_obj(env, env->ToReflectedMethod(method_class.get(), m, false));
+  Dl_info addr_info;
+  if (dladdr(address, &addr_info) == 0 || addr_info.dli_sname == nullptr) {
+    ScopedLocalRef<jclass> exception_class(env, env->FindClass("java/lang/Exception"));
+    env->ThrowNew(exception_class.get(), "dladdr failure!");
+    return;
+  }
+  ScopedLocalRef<jstring> sym_name(env, env->NewStringUTF(addr_info.dli_sname));
+  ScopedLocalRef<jclass> klass(env, env->FindClass("art/Test986"));
+  jmethodID upcallMethod = env->GetStaticMethodID(
+      klass.get(),
+      "doNativeMethodBind",
+      "(Ljava/lang/reflect/Method;Ljava/lang/String;)Ljava/lang/String;");
+  if (env->ExceptionCheck()) {
+    return;
+  }
+  ScopedLocalRef<jstring> new_symbol(env,
+                                     reinterpret_cast<jstring>(
+                                         env->CallStaticObjectMethod(klass.get(),
+                                                                 upcallMethod,
+                                                                 method_obj.get(),
+                                                                 sym_name.get())));
+  const char* new_symbol_chars = env->GetStringUTFChars(new_symbol.get(), nullptr);
+  if (strcmp(new_symbol_chars, addr_info.dli_sname) != 0) {
+    *out_address = dlsym(RTLD_DEFAULT, new_symbol_chars);
+    if (*out_address == nullptr) {
+      ScopedLocalRef<jclass> exception_class(env, env->FindClass("java/lang/Exception"));
+      env->ThrowNew(exception_class.get(), "dlsym failure!");
+      return;
+    }
+  }
+  env->ReleaseStringUTFChars(new_symbol.get(), new_symbol_chars);
+}
+
+extern "C" JNIEXPORT void JNICALL Java_art_Test986_setupNativeBindNotify(
+    JNIEnv* env ATTRIBUTE_UNUSED, jclass klass ATTRIBUTE_UNUSED) {
+  jvmtiEventCallbacks cb;
+  memset(&cb, 0, sizeof(cb));
+  cb.NativeMethodBind = doJvmtiMethodBind;
+  jvmti_env->SetEventCallbacks(&cb, sizeof(cb));
+}
+
+extern "C" JNIEXPORT void JNICALL Java_art_Test986_setNativeBindNotify(
+    JNIEnv* env, jclass klass ATTRIBUTE_UNUSED, jboolean enable) {
+  jvmtiError res = jvmti_env->SetEventNotificationMode(enable ? JVMTI_ENABLE : JVMTI_DISABLE,
+                                                       JVMTI_EVENT_NATIVE_METHOD_BIND,
+                                                       nullptr);
+  if (res != JVMTI_ERROR_NONE) {
+    JvmtiErrorToException(env, jvmti_env, res);
+  }
+}
+
+extern "C" JNIEXPORT void JNICALL Java_art_Test986_rebindTransformClass(
+    JNIEnv* env, jclass klass ATTRIBUTE_UNUSED, jclass k) {
+  JNINativeMethod m[2];
+  m[0].name= "sayHi";
+  m[0].signature = "()V";
+  m[0].fnPtr = reinterpret_cast<void*>(Java_art_Test986_00024Transform_sayHi__);
+  m[1].name= "sayHi2";
+  m[1].signature = "()V";
+  m[1].fnPtr = reinterpret_cast<void*>(Java_art_Test986_00024Transform_sayHi2);
+  env->RegisterNatives(k, m, 2);
+}
+
+}  // namespace Test986NativeBind
+}  // namespace art
diff --git a/test/986-native-method-bind/run b/test/986-native-method-bind/run
new file mode 100755
index 0000000..e92b873
--- /dev/null
+++ b/test/986-native-method-bind/run
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Copyright 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+./default-run "$@" --jvmti
diff --git a/test/912-classes/src/B.java b/test/986-native-method-bind/src/Main.java
similarity index 84%
copy from test/912-classes/src/B.java
copy to test/986-native-method-bind/src/Main.java
index 52ce4dd..fac9d8e 100644
--- a/test/912-classes/src/B.java
+++ b/test/986-native-method-bind/src/Main.java
@@ -14,5 +14,8 @@
  * limitations under the License.
  */
 
-public class B {
+public class Main {
+  public static void main(String[] args) throws Exception {
+    art.Test986.run();
+  }
 }
diff --git a/test/986-native-method-bind/src/art/Main.java b/test/986-native-method-bind/src/art/Main.java
new file mode 100644
index 0000000..8b01920
--- /dev/null
+++ b/test/986-native-method-bind/src/art/Main.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package art;
+
+// Binder class so the agent's C code has something that can be bound and exposed to tests.
+// In a package to separate cleanly and work around CTS reference issues (though this class
+// should be replaced in the CTS version).
+public class Main {
+  // Load the given class with the given classloader, and bind all native methods to corresponding
+  // C methods in the agent. Will abort if any of the steps fail.
+  public static native void bindAgentJNI(String className, ClassLoader classLoader);
+  // Same as above, giving the class directly.
+  public static native void bindAgentJNIForClass(Class<?> klass);
+}
diff --git a/test/986-native-method-bind/src/art/Test986.java b/test/986-native-method-bind/src/art/Test986.java
new file mode 100644
index 0000000..aac73d3
--- /dev/null
+++ b/test/986-native-method-bind/src/art/Test986.java
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package art;
+
+import java.lang.reflect.Method;
+import java.util.HashMap;
+
+public class Test986 {
+  static {
+    // NB This is called before any setup is done so we don't need to worry about getting bind
+    // events.
+    Main.bindAgentJNIForClass(Test986.class);
+  }
+
+
+  private static final HashMap<Method, String> SymbolMap = new HashMap<>();
+
+  // A class with a native method we can play with.
+  static class Transform {
+    private static native void sayHi();
+    private static native void sayHi2();
+  }
+
+  public static void run() throws Exception {
+    setupNativeBindNotify();
+    setNativeBindNotify(true);
+    doTest();
+  }
+
+  private static void setNativeTransform(Method method, String dest) {
+    SymbolMap.put(method, dest);
+  }
+
+  private static void removeNativeTransform(Method method) {
+    SymbolMap.remove(method);
+  }
+
+  /**
+   * Notifies java that a native method bind has occurred and requests the new symbol to bind to.
+   */
+  public static String doNativeMethodBind(Method method, String nativeSym) {
+    // Disable native bind notify for now to avoid infinite loops.
+    setNativeBindNotify(false);
+    String transSym = SymbolMap.getOrDefault(method, nativeSym);
+    System.out.println(method + " = " + nativeSym + " -> " + transSym);
+    setNativeBindNotify(true);
+    return transSym;
+  }
+
+  public static void doTest() throws Exception {
+    Method say_hi_method = Transform.class.getDeclaredMethod("sayHi");
+
+    // Test we will bind fine if we make no changes.
+    Transform.sayHi2();
+
+    // Test we can get in the middle of autobind
+    setNativeTransform(say_hi_method, "NoReallySayGoodbye");
+    Transform.sayHi();
+
+    // Test we can get in between manual bind.
+    setNativeTransform(say_hi_method, "Java_art_Test986_00024Transform_sayHi2");
+    rebindTransformClass();
+    Transform.sayHi();
+
+    // Test we can get rid of transform
+    removeNativeTransform(say_hi_method);
+    rebindTransformClass();
+    Transform.sayHi();
+  }
+
+  // Functions called from native code.
+  public static void doSayHi() {
+    System.out.println("Hello");
+  }
+
+  public static void doSayHi2() {
+    System.out.println("Hello - 2");
+  }
+
+  public static void doSayBye() {
+    System.out.println("Bye");
+  }
+
+  private static native void setNativeBindNotify(boolean enable);
+  private static native void setupNativeBindNotify();
+  private static void rebindTransformClass() {
+    rebindTransformClass(Transform.class);
+  }
+  private static native void rebindTransformClass(Class<?> trans);
+}
diff --git a/test/987-agent-bind/agent_bind.cc b/test/987-agent-bind/agent_bind.cc
new file mode 100644
index 0000000..44366c1
--- /dev/null
+++ b/test/987-agent-bind/agent_bind.cc
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <inttypes.h>
+#include <memory>
+#include <stdio.h>
+#include <dlfcn.h>
+
+#include "android-base/stringprintf.h"
+#include "jni.h"
+#include "jvmti.h"
+
+// Test infrastructure
+#include "jni_binder.h"
+#include "jvmti_helper.h"
+#include "test_env.h"
+#include "scoped_local_ref.h"
+
+namespace art {
+namespace Test987AgentBind {
+
+static void doUpPrintCall(JNIEnv* env, const char* function) {
+  ScopedLocalRef<jclass> klass(env, env->FindClass("art/Test987"));
+  jmethodID targetMethod = env->GetStaticMethodID(klass.get(), function, "()V");
+  env->CallStaticVoidMethod(klass.get(), targetMethod);
+}
+
+extern "C" JNIEXPORT void JNICALL Java_art_Test987_00024Transform_sayHi__(
+    JNIEnv* env, jclass klass ATTRIBUTE_UNUSED) {
+  doUpPrintCall(env, "doSayHi");
+}
+
+extern "C" JNIEXPORT void JNICALL Java_art_Test987_00024Transform_sayHi2(
+    JNIEnv* env, jclass klass ATTRIBUTE_UNUSED) {
+  doUpPrintCall(env, "doSayHi2");
+}
+
+}  // namespace Test987AgentBind
+}  // namespace art
diff --git a/test/987-agent-bind/expected.txt b/test/987-agent-bind/expected.txt
new file mode 100644
index 0000000..ee4a424
--- /dev/null
+++ b/test/987-agent-bind/expected.txt
@@ -0,0 +1,2 @@
+Hello
+Hello - 2
diff --git a/test/987-agent-bind/info.txt b/test/987-agent-bind/info.txt
new file mode 100644
index 0000000..ae4a651
--- /dev/null
+++ b/test/987-agent-bind/info.txt
@@ -0,0 +1 @@
+Tests that native methods are bound from agent libs.
diff --git a/test/987-agent-bind/run b/test/987-agent-bind/run
new file mode 100755
index 0000000..e92b873
--- /dev/null
+++ b/test/987-agent-bind/run
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Copyright 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+./default-run "$@" --jvmti
diff --git a/test/912-classes/src/B.java b/test/987-agent-bind/src/Main.java
similarity index 84%
copy from test/912-classes/src/B.java
copy to test/987-agent-bind/src/Main.java
index 52ce4dd..9ce6242 100644
--- a/test/912-classes/src/B.java
+++ b/test/987-agent-bind/src/Main.java
@@ -14,5 +14,8 @@
  * limitations under the License.
  */
 
-public class B {
+public class Main {
+  public static void main(String[] args) throws Exception {
+    art.Test987.run();
+  }
 }
diff --git a/test/987-agent-bind/src/art/Main.java b/test/987-agent-bind/src/art/Main.java
new file mode 100644
index 0000000..8b01920
--- /dev/null
+++ b/test/987-agent-bind/src/art/Main.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package art;
+
+// Binder class so the agent's C code has something that can be bound and exposed to tests.
+// In a package to separate cleanly and work around CTS reference issues (though this class
+// should be replaced in the CTS version).
+public class Main {
+  // Load the given class with the given classloader, and bind all native methods to corresponding
+  // C methods in the agent. Will abort if any of the steps fail.
+  public static native void bindAgentJNI(String className, ClassLoader classLoader);
+  // Same as above, giving the class directly.
+  public static native void bindAgentJNIForClass(Class<?> klass);
+}
diff --git a/test/987-agent-bind/src/art/Test987.java b/test/987-agent-bind/src/art/Test987.java
new file mode 100644
index 0000000..ae97ff2
--- /dev/null
+++ b/test/987-agent-bind/src/art/Test987.java
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package art;
+
+public class Test987 {
+  // A class with a native method we can play with.
+  static class Transform {
+    private static native void sayHi();
+    private static native void sayHi2();
+  }
+
+  public static void run() throws Exception {
+    doTest();
+  }
+
+  public static void doTest() throws Exception {
+    Transform.sayHi();
+    Transform.sayHi2();
+  }
+  // Functions called from native code.
+  public static void doSayHi() {
+    System.out.println("Hello");
+  }
+
+  public static void doSayHi2() {
+    System.out.println("Hello - 2");
+  }
+}
diff --git a/test/Android.bp b/test/Android.bp
index c5d96da..1679669 100644
--- a/test/Android.bp
+++ b/test/Android.bp
@@ -261,6 +261,7 @@
         "908-gc-start-finish/gc_callbacks.cc",
         "910-methods/methods.cc",
         "911-get-stack-trace/stack_trace.cc",
+        "912-classes/classes.cc",
         "913-heaps/heaps.cc",
         "918-fields/fields.cc",
         "920-objects/objects.cc",
@@ -275,6 +276,8 @@
         "933-misc-events/misc_events.cc",
         "945-obsolete-native/obsolete_native.cc",
         "984-obsolete-invoke/obsolete_invoke.cc",
+        "986-native-method-bind/native_bind.cc",
+        "987-agent-bind/agent_bind.cc",
     ],
     shared_libs: [
         "libbase",
@@ -295,7 +298,7 @@
         // make this list smaller.
         "901-hello-ti-agent/basics.cc",
         "909-attach-agent/attach.cc",
-        "912-classes/classes.cc",
+        "912-classes/classes_art.cc",
         "936-search-onload/search_onload.cc",
         "983-source-transform-verify/source_transform.cc",
     ],
@@ -389,6 +392,7 @@
         "597-deopt-new-string/deopt.cc",
         "626-const-class-linking/clear_dex_cache_types.cc",
         "642-fp-callees/fp_callees.cc",
+        "647-jni-get-field-id/get_field_id.cc",
     ],
     shared_libs: [
         "libbacktrace",
diff --git a/test/Android.run-test-jvmti-java-library.mk b/test/Android.run-test-jvmti-java-library.mk
index dcb238c..70ee693 100644
--- a/test/Android.run-test-jvmti-java-library.mk
+++ b/test/Android.run-test-jvmti-java-library.mk
@@ -45,6 +45,8 @@
     911-get-stack-trace/src/art/Recurse.java \
     911-get-stack-trace/src/art/SameThread.java \
     911-get-stack-trace/src/art/ThreadListTraces.java \
+  912-classes/src/art/Test912.java \
+    912-classes/src/art/DexData.java \
   913-heaps/src/art/Test913.java \
   914-hello-obsolescence/src/art/Test914.java \
   915-obsolete-2/src/art/Test915.java \
@@ -84,6 +86,7 @@
   908 \
   910 \
   911 \
+  912 \
   913 \
   914 \
   915 \
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 4415b2c..afd9144 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -17,22 +17,6 @@
 
 include art/build/Android.common_test.mk
 
-# List of all tests of the form 003-omnibus-opcodes.
-TEST_ART_RUN_TESTS := $(wildcard $(LOCAL_PATH)/[0-9]*)
-TEST_ART_RUN_TESTS := $(subst $(LOCAL_PATH)/,, $(TEST_ART_RUN_TESTS))
-
-########################################################################
-# The art-run-tests module, used to build all run-tests into an image.
-
-# The path where build only targets will be output, e.g.
-# out/target/product/generic_x86_64/obj/PACKAGING/art-run-tests_intermediates/DATA
-art_run_tests_build_dir := $(call intermediates-dir-for,JAVA_LIBRARIES,art-run-tests)/DATA
-art_run_tests_install_dir := $(call intermediates-dir-for,PACKAGING,art-run-tests)/DATA
-
-# A generated list of prerequisites that call 'run-test --build-only', the actual prerequisite is
-# an empty file touched in the intermediate directory.
-TEST_ART_RUN_TEST_BUILD_RULES :=
-
 # Dependencies for actually running a run-test.
 TEST_ART_RUN_TEST_DEPENDENCIES := \
   $(DX) \
@@ -41,54 +25,6 @@
   $(HOST_OUT_EXECUTABLES)/dexmerger \
   $(JACK)
 
-TEST_ART_RUN_TEST_ORDERONLY_DEPENDENCIES := setup-jack-server
-
-# Helper to create individual build targets for tests. Must be called with $(eval).
-# $(1): the test number
-define define-build-art-run-test
-  dmart_target := $(art_run_tests_build_dir)/art-run-tests/$(1)/touch
-  dmart_install_target := $(art_run_tests_install_dir)/art-run-tests/$(1)/touch
-  run_test_options = --build-only
-  ifeq ($(ART_TEST_QUIET),true)
-    run_test_options += --quiet
-  endif
-$$(dmart_target): PRIVATE_RUN_TEST_OPTIONS := $$(run_test_options)
-$$(dmart_target): $(TEST_ART_RUN_TEST_DEPENDENCIES) | $(TEST_ART_RUN_TEST_ORDERONLY_DEPENDENCIES)
-ifeq ($(ANDROID_COMPILE_WITH_JACK),true)
-$$(dmart_target):  $(TARGET_JACK_CLASSPATH_DEPENDENCIES)
-endif
-$$(dmart_target):
-	$(hide) rm -rf $$(dir $$@) && mkdir -p $$(dir $$@)
-	$(hide) DX=$(abspath $(DX)) JASMIN=$(abspath $(HOST_OUT_EXECUTABLES)/jasmin) \
-	  SMALI=$(abspath $(HOST_OUT_EXECUTABLES)/smali) \
-	  DXMERGER=$(abspath $(HOST_OUT_EXECUTABLES)/dexmerger) \
-	  JACK_VERSION=$(JACK_DEFAULT_VERSION) \
-	  JACK=$(abspath $(JACK)) \
-	  JACK_VERSION=$(JACK_DEFAULT_VERSION) \
-	  JACK_CLASSPATH=$(TARGET_JACK_CLASSPATH) \
-	  $(LOCAL_PATH)/run-test $$(PRIVATE_RUN_TEST_OPTIONS) --output-path $$(abspath $$(dir $$@)) $(1)
-	$(hide) touch $$@
-
-$$(dmart_install_target): $$(dmart_target)
-	$(hide) rm -rf $$(dir $$@) && mkdir -p $$(dir $$@)
-	$(hide) cp $$(dir $$<)/* $$(dir $$@)/
-
-  TEST_ART_RUN_TEST_BUILD_RULES += $$(dmart_install_target)
-  dmart_target :=
-  dmart_install_target :=
-  run_test_options :=
-endef
-$(foreach test, $(TEST_ART_RUN_TESTS), $(eval $(call define-build-art-run-test,$(test))))
-
-include $(CLEAR_VARS)
-LOCAL_MODULE_TAGS := tests
-LOCAL_MODULE := art-run-tests
-LOCAL_ADDITIONAL_DEPENDENCIES := $(TEST_ART_RUN_TEST_BUILD_RULES)
-# The build system use this flag to pick up files generated by declare-make-art-run-test.
-LOCAL_PICKUP_FILES := $(art_run_tests_install_dir)
-
-include $(BUILD_PHONY_PACKAGE)
-
 # Convert's a rule name to the form used in variables, e.g. no-relocate to NO_RELOCATE
 define name-to-var
 $(shell echo $(1) | tr '[:lower:]' '[:upper:]' | tr '-' '_')
diff --git a/test/VerifierDeps/MyClassExtendingInterface.smali b/test/VerifierDeps/MyClassExtendingInterface.smali
new file mode 100644
index 0000000..43cf13b
--- /dev/null
+++ b/test/VerifierDeps/MyClassExtendingInterface.smali
@@ -0,0 +1,16 @@
+# Copyright (C) 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LMyClassExtendingInterface;
+.super LIface;
diff --git a/test/common/runtime_state.cc b/test/common/runtime_state.cc
index c7a57ce..b683a27 100644
--- a/test/common/runtime_state.cc
+++ b/test/common/runtime_state.cc
@@ -16,12 +16,14 @@
 
 #include "jni.h"
 
+#include "art_method-inl.h"
 #include "base/enums.h"
 #include "base/logging.h"
 #include "dex_file-inl.h"
 #include "instrumentation.h"
 #include "jit/jit.h"
 #include "jit/jit_code_cache.h"
+#include "jit/profiling_info.h"
 #include "mirror/class-inl.h"
 #include "oat_quick_method_header.h"
 #include "runtime.h"
diff --git a/test/common/stack_inspect.cc b/test/common/stack_inspect.cc
index df7fa20..ceb4ba2 100644
--- a/test/common/stack_inspect.cc
+++ b/test/common/stack_inspect.cc
@@ -144,22 +144,11 @@
   }
 }
 
-static jboolean IsManaged(JNIEnv* env, jclass cls, size_t level) {
+static jboolean IsManaged(JNIEnv* env, jclass, size_t level) {
   ScopedObjectAccess soa(env);
-
-  ObjPtr<mirror::Class> klass = soa.Decode<mirror::Class>(cls);
-  const DexFile& dex_file = klass->GetDexFile();
-  const OatFile::OatDexFile* oat_dex_file = dex_file.GetOatDexFile();
-  if (oat_dex_file == nullptr) {
-    // No oat file, this must be a test configuration that doesn't compile at all. Ignore that the
-    // result will be that we're running the interpreter.
-    return JNI_FALSE;
-  }
-
   NthCallerVisitor caller(soa.Self(), level, false);
   caller.WalkStack();
   CHECK(caller.caller != nullptr);
-
   return caller.GetCurrentShadowFrame() != nullptr ? JNI_FALSE : JNI_TRUE;
 }
 
diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar
index 56cfd24..f8b008b 100755
--- a/test/etc/run-test-jar
+++ b/test/etc/run-test-jar
@@ -372,16 +372,18 @@
 fi
 
 if [[ "$JVMTI_STRESS" = "y" ]]; then
-  if [[ "$USE_JVM" = "n" ]]; then
-    plugin=libopenjdkjvmtid.so
-    agent=libtistressd.so
-    if  [[ "$TEST_IS_NDEBUG" = "y" ]]; then
-      agent=libtistress.so
-      plugin=libopenjdkjvmti.so
-    fi
+  plugin=libopenjdkjvmtid.so
+  agent=libtistressd.so
+  if  [[ "$TEST_IS_NDEBUG" = "y" ]]; then
+    agent=libtistress.so
+    plugin=libopenjdkjvmti.so
+  fi
 
-    file_1=$(mktemp --tmpdir=${DEX_LOCATION})
-    file_2=$(mktemp --tmpdir=${DEX_LOCATION})
+  file_1=$(mktemp --tmpdir=${DEX_LOCATION})
+  file_2=$(mktemp --tmpdir=${DEX_LOCATION})
+  if [[ "$USE_JVM" = "y" ]]; then
+    FLAGS="${FLAGS} -agentpath:${ANDROID_HOST_OUT}/nativetest64/${agent}=/bin/false,${file_1},${file_2}"
+  else
     # TODO Remove need for DEXTER_BINARY!
     FLAGS="${FLAGS} -agentpath:${agent}=${DEXTER_BINARY},${file_1},${file_2}"
     if [ "$IS_JVMTI_TEST" = "n" ]; then
diff --git a/test/knownfailures.json b/test/knownfailures.json
index e7343a0..0e42a29 100644
--- a/test/knownfailures.json
+++ b/test/knownfailures.json
@@ -220,7 +220,6 @@
         "tests": ["604-hot-static-interface",
                   "612-jit-dex-cache",
                   "613-inlining-dex-cache",
-                  "616-cha",
                   "626-set-resolved-string"],
         "variant": "trace  | stream",
         "description": ["These tests expect JIT compilation, which is",
@@ -330,14 +329,8 @@
         "variant": "interpreter | optimizing | regalloc_gc | jit"
     },
     {
-        "tests": ["912-classes",
-                  "616-cha",
-                  "616-cha-abstract",
-                  "616-cha-interface",
-                  "616-cha-interface-default",
-                  "616-cha-miranda",
-                  "616-cha-proxy-method-inline"],
-        "bug": "http://b/36344364 http://b/36344221",
+        "tests": ["912-classes"],
+        "bug": "http://b/36344364",
         "variant": "no-dex2oat | relocate-npatchoat"
     },
     {
diff --git a/test/testrunner/run_build_test_target.py b/test/testrunner/run_build_test_target.py
index 0ab50af..b1274c9 100755
--- a/test/testrunner/run_build_test_target.py
+++ b/test/testrunner/run_build_test_target.py
@@ -62,7 +62,7 @@
 print custom_env
 os.environ.update(custom_env)
 
-if target.get('make'):
+if target.has_key('make'):
   build_command = 'make'
   build_command += ' -j' + str(n_threads)
   build_command += ' -C ' + env.ANDROID_BUILD_TOP
@@ -74,7 +74,7 @@
   if subprocess.call(build_command.split()):
     sys.exit(1)
 
-if target.get('golem'):
+if target.has_key('golem'):
   machine_type = target.get('golem')
   # use art-opt-cc by default since it mimics the default preopt config.
   default_golem_config = 'art-opt-cc'
@@ -92,7 +92,7 @@
   if subprocess.call(cmd):
     sys.exit(1)
 
-if target.get('run-test'):
+if target.has_key('run-test'):
   run_test_command = [os.path.join(env.ANDROID_BUILD_TOP,
                                    'art/test/testrunner/testrunner.py')]
   run_test_command += target.get('run-test', [])
diff --git a/test/testrunner/target_config.py b/test/testrunner/target_config.py
index 95ab2e7..6e47c5e 100644
--- a/test/testrunner/target_config.py
+++ b/test/testrunner/target_config.py
@@ -26,7 +26,7 @@
         'make' : 'test-art-host-gtest',
         'run-test' : [],
         'env' : {
-            'ART_USE_READ_BARRIER' : 'false'
+            'ART_USE_READ_BARRIER' : 'true'
         }
     },
 
@@ -45,19 +45,19 @@
     'art-interpreter' : {
         'run-test' : ['--interpreter'],
         'env' : {
-            'ART_USE_READ_BARRIER' : 'false'
+            'ART_USE_READ_BARRIER' : 'true'
         }
     },
     'art-interpreter-access-checks' : {
         'run-test' : ['--interp-ac'],
         'env' : {
-            'ART_USE_READ_BARRIER' : 'false'
+            'ART_USE_READ_BARRIER' : 'true'
         }
     },
     'art-jit' : {
         'run-test' : ['--jit'],
         'env' : {
-            'ART_USE_READ_BARRIER' : 'false'
+            'ART_USE_READ_BARRIER' : 'true'
         }
     },
     'art-gcstress-gcverify': {
@@ -167,51 +167,51 @@
     'art-tracing' : {
         'run-test' : ['--trace'],
         'env' : {
-            'ART_USE_READ_BARRIER' : 'false'
+            'ART_USE_READ_BARRIER' : 'true'
         }
     },
     'art-interpreter-tracing' : {
         'run-test' : ['--interpreter',
                       '--trace'],
         'env' : {
-            'ART_USE_READ_BARRIER' : 'false',
+            'ART_USE_READ_BARRIER' : 'true',
         }
     },
     'art-forcecopy' : {
         'run-test' : ['--forcecopy'],
         'env' : {
-            'ART_USE_READ_BARRIER' : 'false',
+            'ART_USE_READ_BARRIER' : 'true',
         }
     },
     'art-no-prebuild' : {
         'run-test' : ['--no-prebuild'],
         'env' : {
-            'ART_USE_READ_BARRIER' : 'false',
+            'ART_USE_READ_BARRIER' : 'true',
         }
     },
     'art-no-image' : {
         'run-test' : ['--no-image'],
         'env' : {
-            'ART_USE_READ_BARRIER' : 'false',
+            'ART_USE_READ_BARRIER' : 'true',
         }
     },
     'art-interpreter-no-image' : {
         'run-test' : ['--interpreter',
                       '--no-image'],
         'env' : {
-            'ART_USE_READ_BARRIER' : 'false',
+            'ART_USE_READ_BARRIER' : 'true',
         }
     },
     'art-relocate-no-patchoat' : {
         'run-test' : ['--relocate-npatchoat'],
         'env' : {
-            'ART_USE_READ_BARRIER' : 'false',
+            'ART_USE_READ_BARRIER' : 'true',
         }
     },
     'art-no-dex2oat' : {
         'run-test' : ['--no-dex2oat'],
         'env' : {
-            'ART_USE_READ_BARRIER' : 'false',
+            'ART_USE_READ_BARRIER' : 'true',
         }
     },
     'art-heap-poisoning' : {
@@ -231,7 +231,7 @@
                       '--relocate',
                       '--jit'],
         'env' : {
-            'ART_USE_READ_BARRIER' : 'false'
+            'ART_USE_READ_BARRIER' : 'true'
         }
     },
 
diff --git a/test/ti-agent/scoped_local_ref.h b/test/ti-agent/scoped_local_ref.h
index daa1583..ba9725f 100644
--- a/test/ti-agent/scoped_local_ref.h
+++ b/test/ti-agent/scoped_local_ref.h
@@ -44,7 +44,7 @@
     }
   }
 
-  T release() __attribute__((warn_unused_result)) {
+  T release() WARN_UNUSED {
     T localRef = mLocalRef;
     mLocalRef = nullptr;
     return localRef;
diff --git a/test/ti-stress/stress.cc b/test/ti-stress/stress.cc
index fa49a35..e8e3cc7 100644
--- a/test/ti-stress/stress.cc
+++ b/test/ti-stress/stress.cc
@@ -84,6 +84,48 @@
   return ReadIntoBuffer(data->out_temp_dex, dex);
 }
 
+static void doJvmtiMethodBind(jvmtiEnv* jvmtienv,
+                              JNIEnv* env,
+                              jthread thread,
+                              jmethodID m,
+                              void* address,
+                              /*out*/void** out_address) {
+  *out_address = address;
+  jvmtiThreadInfo info;
+  if (thread == nullptr) {
+    info.name = const_cast<char*>("<NULLPTR>");
+  } else if (jvmtienv->GetThreadInfo(thread, &info) != JVMTI_ERROR_NONE) {
+    LOG(WARNING) << "Unable to get thread info!";
+    info.name = const_cast<char*>("<UNKNOWN THREAD>");
+  }
+  char *fname, *fsig, *fgen;
+  char *cname, *cgen;
+  jclass klass = nullptr;
+  if (jvmtienv->GetMethodDeclaringClass(m, &klass) != JVMTI_ERROR_NONE) {
+    LOG(ERROR) << "Unable to get method declaring class!";
+    return;
+  }
+  if (jvmtienv->GetMethodName(m, &fname, &fsig, &fgen) != JVMTI_ERROR_NONE) {
+    LOG(ERROR) << "Unable to get method name!";
+    env->DeleteLocalRef(klass);
+    return;
+  }
+  if (jvmtienv->GetClassSignature(klass, &cname, &cgen) != JVMTI_ERROR_NONE) {
+    LOG(ERROR) << "Unable to get class name!";
+    env->DeleteLocalRef(klass);
+    return;
+  }
+  LOG(INFO) << "Loading native method \"" << cname << "->" << fname << fsig << "\". Thread is "
+            << info.name;
+  jvmtienv->Deallocate(reinterpret_cast<unsigned char*>(cname));
+  jvmtienv->Deallocate(reinterpret_cast<unsigned char*>(cgen));
+  jvmtienv->Deallocate(reinterpret_cast<unsigned char*>(fname));
+  jvmtienv->Deallocate(reinterpret_cast<unsigned char*>(fsig));
+  jvmtienv->Deallocate(reinterpret_cast<unsigned char*>(fgen));
+  env->DeleteLocalRef(klass);
+  return;
+}
+
 // The hook we are using.
 void JNICALL ClassFileLoadHookSecretNoOp(jvmtiEnv* jvmti,
                                          JNIEnv* jni_env ATTRIBUTE_UNUSED,
@@ -143,7 +185,10 @@
   LOG(INFO) << "manual load & initialization of class java/lang/VMClassLoader!";
   jclass klass = jni_env->FindClass("java/lang/VMClassLoader");
   if (klass == nullptr) {
-    LOG(ERROR) << "Unable to find VMClassLoader class!";
+    // Probably on RI. Clear the exception so we can continue but don't mark vmclassloader as
+    // initialized.
+    LOG(WARNING) << "Unable to find VMClassLoader class!";
+    jni_env->ExceptionClear();
   } else {
     // GetMethodID is spec'd to cause the class to be initialized.
     jni_env->GetMethodID(klass, "hashCode", "()I");
@@ -187,12 +232,19 @@
   jvmtiEventCallbacks cb;
   memset(&cb, 0, sizeof(cb));
   cb.ClassFileLoadHook = ClassFileLoadHookSecretNoOp;
+  cb.NativeMethodBind = doJvmtiMethodBind;
   cb.VMInit = EnsureVMClassloaderInitializedCB;
   if (jvmti->SetEventCallbacks(&cb, sizeof(cb)) != JVMTI_ERROR_NONE) {
     LOG(ERROR) << "Unable to set class file load hook cb!";
     return 1;
   }
   if (jvmti->SetEventNotificationMode(JVMTI_ENABLE,
+                                      JVMTI_EVENT_NATIVE_METHOD_BIND,
+                                      nullptr) != JVMTI_ERROR_NONE) {
+    LOG(ERROR) << "Unable to enable JVMTI_EVENT_NATIVE_METHOD_BIND event!";
+    return 1;
+  }
+  if (jvmti->SetEventNotificationMode(JVMTI_ENABLE,
                                       JVMTI_EVENT_VM_INIT,
                                       nullptr) != JVMTI_ERROR_NONE) {
     LOG(ERROR) << "Unable to enable JVMTI_EVENT_VM_INIT event!";
diff --git a/test/valgrind-suppressions.txt b/test/valgrind-suppressions.txt
index c775f98..086a856 100644
--- a/test/valgrind-suppressions.txt
+++ b/test/valgrind-suppressions.txt
@@ -69,3 +69,9 @@
    fun:_ZN12BacktraceMap6CreateEib
 }
 
+{
+   process_vm_readv
+   Memcheck:Param
+   process_vm_readv(lvec[...])
+   fun:process_vm_readv
+}
diff --git a/test/valgrind-target-suppressions.txt b/test/valgrind-target-suppressions.txt
index 452a174..0d63a1c 100644
--- a/test/valgrind-target-suppressions.txt
+++ b/test/valgrind-target-suppressions.txt
@@ -67,3 +67,10 @@
   fun:msync
   fun:_ZN3art6MemMap11MapInternalEPvmiiilb
 }
+
+{
+   process_vm_readv
+   Memcheck:Param
+   process_vm_readv(lvec[...])
+   fun:process_vm_readv
+}
diff --git a/tools/run-jdwp-tests.sh b/tools/run-jdwp-tests.sh
index 049d4fd..720b1d2 100755
--- a/tools/run-jdwp-tests.sh
+++ b/tools/run-jdwp-tests.sh
@@ -51,6 +51,12 @@
 # Use JIT compiling by default.
 use_jit=true
 variant_cmdline_parameter="--variant=X32"
+# Timeout of JDWP test in ms.
+#
+# Note: some tests expect a timeout to check that *no* reply/event is received for a specific case.
+# A lower timeout can save up several minutes when running the whole test suite, especially for
+# continuous testing. This value can be adjusted to fit the configuration of the host machine(s).
+jdwp_test_timeout=10000
 
 while true; do
   if [[ "$1" == "--mode=host" ]]; then
@@ -161,6 +167,8 @@
       $image_compiler_option \
       --timeout 800 \
       --vm-arg -Djpda.settings.verbose=true \
+      --vm-arg -Djpda.settings.timeout=$jdwp_test_timeout \
+      --vm-arg -Djpda.settings.waitingTime=$jdwp_test_timeout \
       --vm-arg -Djpda.settings.transportAddress=127.0.0.1:55107 \
       --vm-arg -Djpda.settings.debuggeeJavaPath="$art_debugee $image $debuggee_args" \
       --classpath $test_jack --resource-classpath $jsr45_dex \