Merge "Now we have a proper C++ library, use std::unique_ptr."
diff --git a/Android.mk b/Android.mk
index a7a1556..363e49c 100644
--- a/Android.mk
+++ b/Android.mk
@@ -40,20 +40,18 @@
 
 .PHONY: clean-oat-host
 clean-oat-host:
-	rm -f $(ART_NATIVETEST_OUT)/*.odex
-	rm -f $(ART_NATIVETEST_OUT)/*.oat
-	rm -f $(ART_NATIVETEST_OUT)/*.art
-	rm -f $(ART_TEST_OUT)/*.odex
-	rm -f $(ART_TEST_OUT)/*.oat
-	rm -f $(ART_TEST_OUT)/*.art
-	rm -f $(HOST_OUT_JAVA_LIBRARIES)/*.odex
-	rm -f $(HOST_OUT_JAVA_LIBRARIES)/*.oat
-	rm -f $(HOST_OUT_JAVA_LIBRARIES)/*.art
-	rm -f $(TARGET_OUT_JAVA_LIBRARIES)/*.odex
-	rm -f $(TARGET_OUT_JAVA_LIBRARIES)/*.oat
-	rm -f $(TARGET_OUT_JAVA_LIBRARIES)/*.art
-	rm -f $(DEXPREOPT_PRODUCT_DIR_FULL_PATH)/$(DEXPREOPT_BOOT_JAR_DIR)/*.oat
-	rm -f $(DEXPREOPT_PRODUCT_DIR_FULL_PATH)/$(DEXPREOPT_BOOT_JAR_DIR)/*.art
+	rm -rf $(ART_NATIVETEST_OUT)
+	rm -rf $(ART_TEST_OUT)
+	rm -f $(HOST_CORE_IMG_OUT)
+	rm -f $(HOST_CORE_OAT_OUT)
+	rm -f $(HOST_OUT_JAVA_LIBRARIES)/$(ART_HOST_ARCH)/*.odex
+	rm -f $(TARGET_CORE_IMG_OUT)
+	rm -f $(TARGET_CORE_OAT_OUT)
+ifdef TARGET_2ND_ARCH
+	rm -f $(2ND_TARGET_CORE_IMG_OUT)
+	rm -f $(2ND_TARGET_CORE_OAT_OUT)
+endif
+	rm -rf $(DEXPREOPT_PRODUCT_DIR_FULL_PATH)
 	rm -f $(TARGET_OUT_UNSTRIPPED)/system/framework/*.odex
 	rm -f $(TARGET_OUT_UNSTRIPPED)/system/framework/*.oat
 	rm -f $(TARGET_OUT_APPS)/*.odex
@@ -69,24 +67,19 @@
 .PHONY: clean-oat-target
 clean-oat-target:
 	adb remount
-	adb shell rm -f $(ART_NATIVETEST_DIR)/*.odex
-	adb shell rm -f $(ART_NATIVETEST_DIR)/*.oat
-	adb shell rm -f $(ART_NATIVETEST_DIR)/*.art
-	adb shell rm -f $(ART_TEST_DIR)/*.odex
-	adb shell rm -f $(ART_TEST_DIR)/*.oat
-	adb shell rm -f $(ART_TEST_DIR)/*.art
+	adb shell rm -rf $(ART_NATIVETEST_DIR)
+	adb shell rm -rf $(ART_TEST_DIR)
 ifdef TARGET_2ND_ARCH
-	adb shell rm -f $(2ND_ART_NATIVETEST_DIR)/*.odex
-	adb shell rm -f $(2ND_ART_NATIVETEST_DIR)/*.oat
-	adb shell rm -f $(2ND_ART_NATIVETEST_DIR)/*.art
-	adb shell rm -f $(2ND_ART_TEST_DIR)/*.odex
-	adb shell rm -f $(2ND_ART_TEST_DIR)/*.oat
-	adb shell rm -f $(2ND_ART_TEST_DIR)/*.art
+	adb shell rm -rf $(2ND_ART_NATIVETEST_DIR)
+	adb shell rm -rf $(2ND_ART_TEST_DIR)
 endif
 	adb shell rm -rf $(ART_DALVIK_CACHE_DIR)/*
-	adb shell rm -f $(DEXPREOPT_BOOT_JAR_DIR)/*.oat
-	adb shell rm -f $(DEXPREOPT_BOOT_JAR_DIR)/*.art
-	adb shell rm -f system/app/*.odex
+	adb shell rm -rf $(DEXPREOPT_BOOT_JAR_DIR)/$(DEX2OAT_TARGET_ARCH)
+	adb shell rm -rf system/app/$(DEX2OAT_TARGET_ARCH)
+ifdef TARGET_2ND_ARCH
+	adb shell rm -rf $(DEXPREOPT_BOOT_JAR_DIR)/$($(TARGET_2ND_ARCH_VAR_PREFIX)DEX2OAT_TARGET_ARCH)
+	adb shell rm -rf system/app/$($(TARGET_2ND_ARCH_VAR_PREFIX)DEX2OAT_TARGET_ARCH)
+endif
 	adb shell rm -rf data/run-test/test-*/dalvik-cache/*
 
 ifneq ($(art_dont_bother),true)
diff --git a/build/Android.common.mk b/build/Android.common.mk
index a8b27b5..bb17c15 100644
--- a/build/Android.common.mk
+++ b/build/Android.common.mk
@@ -386,5 +386,24 @@
   endif
 endef
 
+HOST_CORE_OAT := $(HOST_OUT_JAVA_LIBRARIES)/$(ART_HOST_ARCH)/core.oat
+TARGET_CORE_OAT := $(ART_TEST_DIR)/$(DEX2OAT_TARGET_ARCH)/core.oat
+ifdef TARGET_2ND_ARCH
+2ND_TARGET_CORE_OAT := $(2ND_ART_TEST_DIR)/$($(TARGET_2ND_ARCH_VAR_PREFIX)DEX2OAT_TARGET_ARCH)/core.oat
+endif
+
+HOST_CORE_OAT_OUT := $(HOST_OUT_JAVA_LIBRARIES)/$(ART_HOST_ARCH)/core.oat
+TARGET_CORE_OAT_OUT := $(ART_TEST_OUT)/$(DEX2OAT_TARGET_ARCH)/core.oat
+ifdef TARGET_2ND_ARCH
+2ND_TARGET_CORE_OAT_OUT := $(ART_TEST_OUT)/$($(TARGET_2ND_ARCH_VAR_PREFIX)DEX2OAT_TARGET_ARCH)/core.oat
+endif
+
+HOST_CORE_IMG_OUT := $(HOST_OUT_JAVA_LIBRARIES)/$(ART_HOST_ARCH)/core.art
+TARGET_CORE_IMG_OUT := $(ART_TEST_OUT)/$(DEX2OAT_TARGET_ARCH)/core.art
+ifdef TARGET_2ND_ARCH
+2ND_TARGET_CORE_IMG_OUT := $(ART_TEST_OUT)/$($(TARGET_2ND_ARCH_VAR_PREFIX)DEX2OAT_TARGET_ARCH)/core.art
+endif
+
+HOST_CORE_IMG_LOCATION := $(HOST_OUT_JAVA_LIBRARIES)/core.art
 
 endif # ANDROID_COMMON_MK
diff --git a/build/Android.oat.mk b/build/Android.oat.mk
index 4e8d1cc..bf07ecc 100644
--- a/build/Android.oat.mk
+++ b/build/Android.oat.mk
@@ -29,26 +29,6 @@
 HOST_CORE_DEX_FILES   := $(foreach jar,$(HOST_CORE_JARS),  $(call intermediates-dir-for,JAVA_LIBRARIES,$(jar),t,COMMON)/javalib.jar)
 TARGET_CORE_DEX_FILES := $(foreach jar,$(TARGET_CORE_JARS),$(call intermediates-dir-for,JAVA_LIBRARIES,$(jar), ,COMMON)/javalib.jar)
 
-HOST_CORE_OAT := $(HOST_OUT_JAVA_LIBRARIES)/$(ART_HOST_ARCH)/core.oat
-TARGET_CORE_OAT := $(ART_TEST_DIR)/$(DEX2OAT_TARGET_ARCH)/core.oat
-ifdef TARGET_2ND_ARCH
-2ND_TARGET_CORE_OAT := $(2ND_ART_TEST_DIR)/$($(TARGET_2ND_ARCH_VAR_PREFIX)DEX2OAT_TARGET_ARCH)/core.oat
-endif
-
-HOST_CORE_OAT_OUT := $(HOST_OUT_JAVA_LIBRARIES)/$(ART_HOST_ARCH)/core.oat
-TARGET_CORE_OAT_OUT := $(ART_TEST_OUT)/$(DEX2OAT_TARGET_ARCH)/core.oat
-ifdef TARGET_2ND_ARCH
-2ND_TARGET_CORE_OAT_OUT := $(ART_TEST_OUT)/$($(TARGET_2ND_ARCH_VAR_PREFIX)DEX2OAT_TARGET_ARCH)/core.oat
-endif
-
-HOST_CORE_IMG_OUT := $(HOST_OUT_JAVA_LIBRARIES)/$(ART_HOST_ARCH)/core.art
-TARGET_CORE_IMG_OUT := $(ART_TEST_OUT)/$(DEX2OAT_TARGET_ARCH)/core.art
-ifdef TARGET_2ND_ARCH
-2ND_TARGET_CORE_IMG_OUT := $(ART_TEST_OUT)/$($(TARGET_2ND_ARCH_VAR_PREFIX)DEX2OAT_TARGET_ARCH)/core.art
-endif
-
-HOST_CORE_IMG_LOCATION := $(HOST_OUT_JAVA_LIBRARIES)/core.art
-
 TARGET_INSTRUCTION_SET_FEATURES := $(DEX2OAT_TARGET_INSTRUCTION_SET_FEATURES)
 
 # Use dex2oat debug version for better error reporting
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 1ee59c6..876419c 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -68,7 +68,6 @@
     void AdjustSpillMask();
     void ClobberCallerSave();
     void FreeCallTemps();
-    void FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free);
     void LockCallTemps();
     void MarkPreservedSingle(int v_reg, RegStorage reg);
     void MarkPreservedDouble(int v_reg, RegStorage reg);
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index 2d4834c..384a008 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -284,10 +284,10 @@
     ccode = FlipComparisonOrder(ccode);
   }
   if (rl_src2.is_const) {
-    RegLocation rl_temp = UpdateLocWide(rl_src2);
+    rl_src2 = UpdateLocWide(rl_src2);
     // Do special compare/branch against simple const operand if not already in registers.
     int64_t val = mir_graph_->ConstantValueWide(rl_src2);
-    if ((rl_temp.location != kLocPhysReg) &&
+    if ((rl_src2.location != kLocPhysReg) &&
         ((ModifiedImmediate(Low32Bits(val)) >= 0) && (ModifiedImmediate(High32Bits(val)) >= 0))) {
       GenFusedLongCmpImmBranch(bb, rl_src1, val, ccode);
       return;
@@ -1092,6 +1092,8 @@
         DCHECK(!res_hi.Valid());
         DCHECK_NE(rl_src1.reg.GetLowReg(), rl_src2.reg.GetLowReg());
         DCHECK_NE(rl_src1.reg.GetHighReg(), rl_src2.reg.GetHighReg());
+        // Will force free src1_hi, so must clobber.
+        Clobber(rl_src1.reg);
         FreeTemp(rl_src1.reg.GetHigh());
         res_hi = AllocTemp();
       }
@@ -1103,9 +1105,7 @@
               tmp1.GetReg());
       NewLIR4(kThumb2AddRRR, res_hi.GetReg(), tmp1.GetReg(), res_hi.GetReg(), 0);
       if (reg_status == 2) {
-        // Clobber rl_src1 since it was corrupted.
-        FreeTemp(rl_src1.reg);
-        Clobber(rl_src1.reg);
+        FreeTemp(rl_src1.reg.GetLow());
       }
     }
 
diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc
index 8cf1f86..f7a7fe8 100644
--- a/compiler/dex/quick/arm/target_arm.cc
+++ b/compiler/dex/quick/arm/target_arm.cc
@@ -609,18 +609,6 @@
   reg_pool_->next_dp_reg_ = 0;
 }
 
-void ArmMir2Lir::FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free) {
-  DCHECK(rl_keep.wide);
-  DCHECK(rl_free.wide);
-  if ((rl_free.reg.GetLowReg() != rl_keep.reg.GetLowReg()) &&
-      (rl_free.reg.GetLowReg() != rl_keep.reg.GetHighReg()) &&
-      (rl_free.reg.GetHighReg() != rl_keep.reg.GetLowReg()) &&
-      (rl_free.reg.GetHighReg() != rl_keep.reg.GetHighReg())) {
-    // No overlap, free.
-    FreeTemp(rl_free.reg);
-  }
-}
-
 /*
  * TUNING: is true leaf?  Can't just use METHOD_IS_LEAF to determine as some
  * instructions might call out to C/assembly helper functions.  Until
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index b0211d6..86d32f4 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -853,7 +853,7 @@
         load = NewLIR4(kThumb2LdrdI8, r_dest.GetLowReg(), r_dest.GetHighReg(), r_ptr.GetReg(),
                        encoded_disp);
       }
-      if ((displacement & ~1020) != 0 && !r_dest.IsFloat()) {
+      if ((displacement & ~1020) != 0 && r_dest.IsFloat()) {
         FreeTemp(r_ptr);
       }
       already_generated = true;
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index 418a989..6a27a7e 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -68,7 +68,6 @@
     void AdjustSpillMask();
     void ClobberCallerSave();
     void FreeCallTemps();
-    void FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free);
     void LockCallTemps();
     void MarkPreservedSingle(int v_reg, RegStorage reg);
     void MarkPreservedDouble(int v_reg, RegStorage reg);
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index 10be0d6..18e605c 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -632,10 +632,6 @@
   reg_pool_->next_dp_reg_ = 0;
 }
 
-void Arm64Mir2Lir::FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free) {
-  LOG(FATAL) << "Unexpected call to FreeRegLocTemps for Arm64";
-}
-
 /*
  * TUNING: is true leaf?  Can't just use METHOD_IS_LEAF to determine as some
  * instructions might call out to C/assembly helper functions.  Until
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 24ed4a3..3662592 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -1368,6 +1368,7 @@
   OpRegRegReg(kOpAdc, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), sign_reg);
   OpRegReg(kOpXor, rl_result.reg.GetLow(), sign_reg);
   OpRegReg(kOpXor, rl_result.reg.GetHigh(), sign_reg);
+  FreeTemp(sign_reg);
   StoreValueWide(rl_dest, rl_result);
   return true;
 }
diff --git a/compiler/dex/quick/gen_loadstore.cc b/compiler/dex/quick/gen_loadstore.cc
index 8fcb09b..f5e7e63 100644
--- a/compiler/dex/quick/gen_loadstore.cc
+++ b/compiler/dex/quick/gen_loadstore.cc
@@ -145,10 +145,11 @@
       // Wrong register class, realloc, copy and transfer ownership.
       RegStorage new_reg = AllocTypedTemp(rl_src.fp, op_kind);
       OpRegCopy(new_reg, rl_src.reg);
-      // Associate the old sreg with the new register and clobber the old register.
-      GetRegInfo(new_reg)->SetSReg(GetRegInfo(rl_src.reg)->SReg());
+      // Clobber the old reg.
       Clobber(rl_src.reg);
+      // ...and mark the new one live.
       rl_src.reg = new_reg;
+      MarkLive(rl_src);
     }
     return rl_src;
   }
@@ -222,10 +223,11 @@
       // Wrong register class, realloc, copy and transfer ownership.
       RegStorage new_regs = AllocTypedTempWide(rl_src.fp, op_kind);
       OpRegCopyWide(new_regs, rl_src.reg);
-      // Associate the old sreg with the new register and clobber the old register.
-      GetRegInfo(new_regs)->SetSReg(GetRegInfo(rl_src.reg)->SReg());
+      // Clobber the old regs.
       Clobber(rl_src.reg);
+      // ...and mark the new ones live.
       rl_src.reg = new_regs;
+      MarkLive(rl_src);
     }
     return rl_src;
   }
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
index c5b40da..0c59465 100644
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h
@@ -68,7 +68,6 @@
     void AdjustSpillMask();
     void ClobberCallerSave();
     void FreeCallTemps();
-    void FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free);
     void LockCallTemps();
     void MarkPreservedSingle(int v_reg, RegStorage reg);
     void MarkPreservedDouble(int v_reg, RegStorage reg);
diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc
index 35345e8..7a3da71 100644
--- a/compiler/dex/quick/mips/target_mips.cc
+++ b/compiler/dex/quick/mips/target_mips.cc
@@ -492,17 +492,6 @@
   reg_pool_->next_dp_reg_ = 1;
 }
 
-void MipsMir2Lir::FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free) {
-  DCHECK(rl_keep.wide);
-  DCHECK(rl_free.wide);
-  if ((rl_free.reg.GetLowReg() != rl_keep.reg.GetLowReg()) &&
-      (rl_free.reg.GetLowReg() != rl_keep.reg.GetHighReg()) &&
-      (rl_free.reg.GetHighReg() != rl_keep.reg.GetLowReg()) &&
-      (rl_free.reg.GetHighReg() != rl_keep.reg.GetHighReg())) {
-    // No overlap, free.
-    FreeTemp(rl_free.reg);
-  }
-}
 /*
  * In the Arm code a it is typical to use the link register
  * to hold the target address.  However, for Mips we must
diff --git a/compiler/dex/quick/mir_to_lir-inl.h b/compiler/dex/quick/mir_to_lir-inl.h
index 2973e14..ba6865d 100644
--- a/compiler/dex/quick/mir_to_lir-inl.h
+++ b/compiler/dex/quick/mir_to_lir-inl.h
@@ -26,10 +26,9 @@
 /* Mark a temp register as dead.  Does not affect allocation state. */
 inline void Mir2Lir::ClobberBody(RegisterInfo* p) {
   DCHECK(p->IsTemp());
-  if (!p->IsDead()) {
+  if (p->SReg() != INVALID_SREG) {
     DCHECK(!(p->IsLive() && p->IsDirty()))  << "Live & dirty temp in clobber";
     p->MarkDead();
-    p->ResetDefBody();
     if (p->IsWide()) {
       p->SetIsWide(false);
       if (p->GetReg() != p->Partner()) {
@@ -37,7 +36,6 @@
         p = GetRegInfo(p->Partner());
         p->SetIsWide(false);
         p->MarkDead();
-        p->ResetDefBody();
       }
     }
   }
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 10c2459..ea8071d 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -318,6 +318,8 @@
   int opt_flags = mir->optimization_flags;
   uint32_t vB = mir->dalvikInsn.vB;
   uint32_t vC = mir->dalvikInsn.vC;
+  DCHECK(CheckCorePoolSanity()) << PrettyMethod(cu_->method_idx, *cu_->dex_file) << " @ 0x:"
+                                << std::hex << current_dalvik_offset_;
 
   // Prep Src and Dest locations.
   int next_sreg = 0;
@@ -946,6 +948,7 @@
     default:
       LOG(FATAL) << "Unexpected opcode: " << opcode;
   }
+  DCHECK(CheckCorePoolSanity());
 }  // NOLINT(readability/fn_size)
 
 // Process extended MIR instructions
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 3201b60..687c41d 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -339,23 +339,35 @@
       bool IsDead() { return (master_->liveness_ & storage_mask_) == 0; }
       // Liveness of this view matches.  Note: not equivalent to !IsDead().
       bool IsLive() { return (master_->liveness_ & storage_mask_) == storage_mask_; }
-      void MarkLive() { master_->liveness_ |= storage_mask_; }
+      void MarkLive(int s_reg) {
+        // TODO: Anything useful to assert here?
+        s_reg_ = s_reg;
+        master_->liveness_ |= storage_mask_;
+      }
       void MarkDead() {
-        master_->liveness_ &= ~storage_mask_;
-        SetSReg(INVALID_SREG);
+        if (SReg() != INVALID_SREG) {
+          s_reg_ = INVALID_SREG;
+          master_->liveness_ &= ~storage_mask_;
+          ResetDefBody();
+        }
       }
       RegStorage GetReg() { return reg_; }
       void SetReg(RegStorage reg) { reg_ = reg; }
       bool IsTemp() { return is_temp_; }
       void SetIsTemp(bool val) { is_temp_ = val; }
       bool IsWide() { return wide_value_; }
-      void SetIsWide(bool val) { wide_value_ = val; }
+      void SetIsWide(bool val) {
+        wide_value_ = val;
+        if (!val) {
+          // If not wide, reset partner to self.
+          SetPartner(GetReg());
+        }
+      }
       bool IsDirty() { return dirty_; }
       void SetIsDirty(bool val) { dirty_ = val; }
       RegStorage Partner() { return partner_; }
       void SetPartner(RegStorage partner) { partner_ = partner; }
-      int SReg() { return s_reg_; }
-      void SetSReg(int s_reg) { s_reg_ = s_reg; }
+      int SReg() { return (!IsTemp() || IsLive()) ? s_reg_ : INVALID_SREG; }
       uint64_t DefUseMask() { return def_use_mask_; }
       void SetDefUseMask(uint64_t def_use_mask) { def_use_mask_ = def_use_mask; }
       RegisterInfo* Master() { return master_; }
@@ -653,6 +665,7 @@
     RegStorage AllocLiveReg(int s_reg, int reg_class, bool wide);
     RegStorage FindLiveReg(GrowableArray<RegisterInfo*> &regs, int s_reg);
     void FreeTemp(RegStorage reg);
+    void FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free);
     bool IsLive(RegStorage reg);
     bool IsTemp(RegStorage reg);
     bool IsPromoted(RegStorage reg);
@@ -671,10 +684,10 @@
     void FlushAllRegs();
     bool RegClassMatches(int reg_class, RegStorage reg);
     void MarkLive(RegLocation loc);
-    void MarkLiveReg(RegStorage reg, int s_reg);
     void MarkTemp(RegStorage reg);
     void UnmarkTemp(RegStorage reg);
     void MarkWide(RegStorage reg);
+    void MarkNarrow(RegStorage reg);
     void MarkClean(RegLocation loc);
     void MarkDirty(RegLocation loc);
     void MarkInUse(RegStorage reg);
@@ -1074,7 +1087,6 @@
     virtual void AdjustSpillMask() = 0;
     virtual void ClobberCallerSave() = 0;
     virtual void FreeCallTemps() = 0;
-    virtual void FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free) = 0;
     virtual void LockCallTemps() = 0;
     virtual void MarkPreservedSingle(int v_reg, RegStorage reg) = 0;
     virtual void MarkPreservedDouble(int v_reg, RegStorage reg) = 0;
diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc
index bcc077b..06d05e2 100644
--- a/compiler/dex/quick/ralloc_util.cc
+++ b/compiler/dex/quick/ralloc_util.cc
@@ -152,6 +152,9 @@
   } else {
     RegisterInfo* info = GetRegInfo(reg);
     if (info->IsTemp() && !info->IsDead()) {
+      if (info->GetReg() != info->Partner()) {
+        ClobberBody(GetRegInfo(info->Partner()));
+      }
       ClobberBody(info);
       if (info->IsAliased()) {
         ClobberAliases(info);
@@ -169,19 +172,7 @@
   for (RegisterInfo* alias = info->GetAliasChain(); alias != nullptr;
        alias = alias->GetAliasChain()) {
     DCHECK(!alias->IsAliased());  // Only the master should be marked as alised.
-    if (alias->SReg() != INVALID_SREG) {
-      alias->SetSReg(INVALID_SREG);
-      alias->ResetDefBody();
-      if (alias->IsWide()) {
-        alias->SetIsWide(false);
-        if (alias->GetReg() != alias->Partner()) {
-          RegisterInfo* p = GetRegInfo(alias->Partner());
-          p->SetIsWide(false);
-          p->MarkDead();
-          p->ResetDefBody();
-        }
-      }
-    }
+    ClobberBody(alias);
   }
 }
 
@@ -204,6 +195,11 @@
     GrowableArray<RegisterInfo*>::Iterator iter(&tempreg_info_);
     for (RegisterInfo* info = iter.Next(); info != NULL; info = iter.Next()) {
       if (info->SReg() == s_reg) {
+        if (info->GetReg() != info->Partner()) {
+          // Dealing with a pair - clobber the other half.
+          DCHECK(!info->IsAliased());
+          ClobberBody(GetRegInfo(info->Partner()));
+        }
         ClobberBody(info);
         if (info->IsAliased()) {
           ClobberAliases(info);
@@ -325,7 +321,7 @@
       next = 0;
     RegisterInfo* info = regs.Get(next);
     // Try to allocate a register that doesn't hold a live value.
-    if (info->IsTemp() && !info->InUse() && !info->IsLive()) {
+    if (info->IsTemp() && !info->InUse() && info->IsDead()) {
       Clobber(info->GetReg());
       info->MarkInUse();
       /*
@@ -349,7 +345,13 @@
       ClobberSReg(info->SReg());
       Clobber(info->GetReg());
       info->MarkInUse();
-      info->SetIsWide(false);
+      if (info->IsWide()) {
+        RegisterInfo* partner = GetRegInfo(info->Partner());
+        DCHECK_EQ(info->GetReg().GetRegNum(), partner->Partner().GetRegNum());
+        DCHECK(partner->IsWide());
+        info->SetIsWide(false);
+        partner->SetIsWide(false);
+      }
       *next_temp = next + 1;
       return info->GetReg();
     }
@@ -463,6 +465,20 @@
   }
 }
 
+void Mir2Lir::FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free) {
+  DCHECK(rl_keep.wide);
+  DCHECK(rl_free.wide);
+  int free_low = rl_free.reg.GetLowReg();
+  int free_high = rl_free.reg.GetHighReg();
+  int keep_low = rl_keep.reg.GetLowReg();
+  int keep_high = rl_keep.reg.GetHighReg();
+  if ((free_low != keep_low) && (free_low != keep_high) &&
+      (free_high != keep_low) && (free_high != keep_high)) {
+    // No overlap, free both
+    FreeTemp(rl_free.reg);
+  }
+}
+
 bool Mir2Lir::IsLive(RegStorage reg) {
   bool res;
   if (reg.IsPair()) {
@@ -725,8 +741,6 @@
       FlushSpecificReg(info);
     }
     info->MarkDead();
-    info->SetSReg(INVALID_SREG);
-    info->ResetDefBody();
     info->SetIsWide(false);
   }
 }
@@ -742,35 +756,48 @@
   }
 }
 
-void Mir2Lir::MarkLiveReg(RegStorage reg, int s_reg) {
-  RegisterInfo* info = GetRegInfo(reg);
-  if ((info->SReg() == s_reg) && info->IsLive()) {
-    return;  // Already live.
-  }
-  if (s_reg != INVALID_SREG) {
-    ClobberSReg(s_reg);
-    if (info->IsTemp()) {
-      info->MarkLive();
-    }
-  } else {
-    // Can't be live if no associated s_reg.
-    DCHECK(info->IsTemp());
-    info->MarkDead();
-  }
-  info->SetSReg(s_reg);
-}
-
 void Mir2Lir::MarkLive(RegLocation loc) {
   RegStorage reg = loc.reg;
+  if (!IsTemp(reg)) {
+    return;
+  }
   int s_reg = loc.s_reg_low;
-  if (reg.IsPair()) {
-    MarkLiveReg(reg.GetLow(), s_reg);
-    MarkLiveReg(reg.GetHigh(), s_reg+1);
-  } else {
-    if (loc.wide) {
-      ClobberSReg(s_reg + 1);
+  if (s_reg == INVALID_SREG) {
+    // Can't be live if no associated sreg.
+    if (reg.IsPair()) {
+      GetRegInfo(reg.GetLow())->MarkDead();
+      GetRegInfo(reg.GetHigh())->MarkDead();
+    } else {
+      GetRegInfo(reg)->MarkDead();
     }
-    MarkLiveReg(reg, s_reg);
+  } else {
+    if (reg.IsPair()) {
+      RegisterInfo* info_lo = GetRegInfo(reg.GetLow());
+      RegisterInfo* info_hi = GetRegInfo(reg.GetHigh());
+      if (info_lo->IsLive() && (info_lo->SReg() == s_reg) && info_hi->IsLive() &&
+          (info_hi->SReg() == s_reg)) {
+        return;  // Already live.
+      }
+      ClobberSReg(s_reg);
+      ClobberSReg(s_reg + 1);
+      info_lo->MarkLive(s_reg);
+      info_hi->MarkLive(s_reg + 1);
+    } else {
+      RegisterInfo* info = GetRegInfo(reg);
+      if (info->IsLive() && (info->SReg() == s_reg)) {
+        return;  // Already live.
+      }
+      ClobberSReg(s_reg);
+      if (loc.wide) {
+        ClobberSReg(s_reg + 1);
+      }
+      info->MarkLive(s_reg);
+    }
+    if (loc.wide) {
+      MarkWide(reg);
+    } else {
+      MarkNarrow(reg);
+    }
   }
 }
 
@@ -792,6 +819,13 @@
   if (reg.IsPair()) {
     RegisterInfo* info_lo = GetRegInfo(reg.GetLow());
     RegisterInfo* info_hi = GetRegInfo(reg.GetHigh());
+    // Unpair any old partners.
+    if (info_lo->IsWide() && info_lo->Partner() != info_hi->GetReg()) {
+      GetRegInfo(info_lo->Partner())->SetIsWide(false);
+    }
+    if (info_hi->IsWide() && info_hi->Partner() != info_lo->GetReg()) {
+      GetRegInfo(info_hi->Partner())->SetIsWide(false);
+    }
     info_lo->SetIsWide(true);
     info_hi->SetIsWide(true);
     info_lo->SetPartner(reg.GetHigh());
@@ -803,6 +837,13 @@
   }
 }
 
+void Mir2Lir::MarkNarrow(RegStorage reg) {
+  DCHECK(!reg.IsPair());
+  RegisterInfo* info = GetRegInfo(reg);
+  info->SetIsWide(false);
+  info->SetPartner(reg);
+}
+
 void Mir2Lir::MarkClean(RegLocation loc) {
   if (loc.reg.IsPair()) {
     RegisterInfo* info = GetRegInfo(loc.reg.GetLow());
@@ -842,16 +883,17 @@
 }
 
 bool Mir2Lir::CheckCorePoolSanity() {
-  GrowableArray<RegisterInfo*>::Iterator it(&reg_pool_->core_regs_);
+  GrowableArray<RegisterInfo*>::Iterator it(&tempreg_info_);
   for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
-    RegStorage my_reg = info->GetReg();
-    if (info->IsWide() && my_reg.IsPair()) {
+    if (info->IsTemp() && info->IsLive() && info->IsWide()) {
+      RegStorage my_reg = info->GetReg();
       int my_sreg = info->SReg();
       RegStorage partner_reg = info->Partner();
       RegisterInfo* partner = GetRegInfo(partner_reg);
       DCHECK(partner != NULL);
       DCHECK(partner->IsWide());
       DCHECK_EQ(my_reg.GetReg(), partner->Partner().GetReg());
+      DCHECK(partner->IsLive());
       int partner_sreg = partner->SReg();
       if (my_sreg == INVALID_SREG) {
         DCHECK_EQ(partner_sreg, INVALID_SREG);
@@ -859,13 +901,41 @@
         int diff = my_sreg - partner_sreg;
         DCHECK((diff == 0) || (diff == -1) || (diff == 1));
       }
-    } else {
-      // TODO: add whatever sanity checks might be useful for 64BitSolo regs here.
-      // TODO: sanity checks for floating point pools?
     }
-    if (!info->IsLive()) {
-      DCHECK(info->DefStart() == NULL);
-      DCHECK(info->DefEnd() == NULL);
+    if (info->Master() != info) {
+      // Aliased.
+      if (info->IsLive() && (info->SReg() != INVALID_SREG)) {
+        // If I'm live, master should not be live, but should show liveness in alias set.
+        DCHECK_EQ(info->Master()->SReg(), INVALID_SREG);
+        DCHECK(!info->Master()->IsDead());
+      } else if (!info->IsDead()) {
+        // If I'm not live, but there is liveness in the set master must be live.
+        DCHECK_EQ(info->SReg(), INVALID_SREG);
+        DCHECK(info->Master()->IsLive());
+      }
+    }
+    if (info->IsAliased()) {
+      // Has child aliases.
+      DCHECK_EQ(info->Master(), info);
+      if (info->IsLive() && (info->SReg() != INVALID_SREG)) {
+        // Master live, no child should be dead - all should show liveness in set.
+        for (RegisterInfo* p = info->GetAliasChain(); p != nullptr; p = p->GetAliasChain()) {
+          DCHECK(!p->IsDead());
+          DCHECK_EQ(p->SReg(), INVALID_SREG);
+        }
+      } else if (!info->IsDead()) {
+        // Master not live, one or more aliases must be.
+        bool live_alias = false;
+        for (RegisterInfo* p = info->GetAliasChain(); p != nullptr; p = p->GetAliasChain()) {
+          live_alias |= p->IsLive();
+        }
+        DCHECK(live_alias);
+      }
+    }
+    if (info->IsLive() && (info->SReg() == INVALID_SREG)) {
+      // If not fully live, should have INVALID_SREG and def's should be null.
+      DCHECK(info->DefStart() == nullptr);
+      DCHECK(info->DefEnd() == nullptr);
     }
   }
   return true;
@@ -956,11 +1026,12 @@
     if (!RegClassMatches(reg_class, loc.reg)) {
       // Wrong register class.  Reallocate and transfer ownership.
       RegStorage new_regs = AllocTypedTempWide(loc.fp, reg_class);
-      // Associate the old sreg with the new register and clobber the old register.
-      GetRegInfo(new_regs)->SetSReg(GetRegInfo(loc.reg)->SReg());
+      // Clobber the old regs.
       Clobber(loc.reg);
+      // ...and mark the new ones live.
       loc.reg = new_regs;
       MarkWide(loc.reg);
+      MarkLive(loc);
     }
     return loc;
   }
@@ -989,10 +1060,11 @@
     if (!RegClassMatches(reg_class, loc.reg)) {
       // Wrong register class.  Reallocate and transfer ownership.
       RegStorage new_reg = AllocTypedTemp(loc.fp, reg_class);
-      // Associate the old sreg with the new register and clobber the old register.
-      GetRegInfo(new_reg)->SetSReg(GetRegInfo(loc.reg)->SReg());
+      // Clobber the old reg.
       Clobber(loc.reg);
+      // ...and mark the new one live.
       loc.reg = new_reg;
+      MarkLive(loc);
     }
     return loc;
   }
@@ -1220,19 +1292,9 @@
   RegLocation gpr_res = LocCReturnWide();
   RegLocation fpr_res = LocCReturnDouble();
   RegLocation res = is_double ? fpr_res : gpr_res;
-  if (res.reg.IsPair()) {
-    Clobber(res.reg);
-    LockTemp(res.reg);
-    // Does this wide value live in two registers or one vector register?
-    if (res.reg.GetLowReg() != res.reg.GetHighReg()) {
-      // FIXME: I think we want to mark these as wide as well.
-      MarkWide(res.reg);
-    }
-  } else {
-    Clobber(res.reg);
-    LockTemp(res.reg);
-    MarkWide(res.reg);
-  }
+  Clobber(res.reg);
+  LockTemp(res.reg);
+  MarkWide(res.reg);
   return res;
 }
 
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index cc0e1f2..52c870b 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -68,7 +68,6 @@
     void AdjustSpillMask();
     void ClobberCallerSave();
     void FreeCallTemps();
-    void FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free);
     void LockCallTemps();
     void MarkPreservedSingle(int v_reg, RegStorage reg);
     void MarkPreservedDouble(int v_reg, RegStorage reg);
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index fbb1785..71a3962 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -1542,7 +1542,6 @@
         LoadConstant(rl_result.reg.GetLow(), 0);
       } else if (shift_amount > 31) {
         OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow());
-        FreeTemp(rl_src.reg.GetHigh());
         NewLIR2(kX86Sal32RI, rl_result.reg.GetHighReg(), shift_amount - 32);
         LoadConstant(rl_result.reg.GetLow(), 0);
       } else {
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 237c68c..8f06791 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -572,20 +572,6 @@
   reg_pool_->next_dp_reg_ = 1;
 }
 
-void X86Mir2Lir::FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free) {
-  DCHECK(rl_keep.wide);
-  DCHECK(rl_free.wide);
-  int free_low = rl_free.reg.GetLowReg();
-  int free_high = rl_free.reg.GetHighReg();
-  int keep_low = rl_keep.reg.GetLowReg();
-  int keep_high = rl_keep.reg.GetHighReg();
-  if ((free_low != keep_low) && (free_low != keep_high) &&
-      (free_high != keep_low) && (free_high != keep_high)) {
-    // No overlap, free both
-    FreeTemp(rl_free.reg);
-  }
-}
-
 void X86Mir2Lir::SpillCoreRegs() {
   if (num_core_spills_ == 0) {
     return;
diff --git a/runtime/arch/arm64/context_arm64.cc b/runtime/arch/arm64/context_arm64.cc
index fae44af..09e8b59 100644
--- a/runtime/arch/arm64/context_arm64.cc
+++ b/runtime/arch/arm64/context_arm64.cc
@@ -99,14 +99,33 @@
   gprs_[X14] = NULL;
   gprs_[X15] = NULL;
 
-  fprs_[D8] = NULL;
-  fprs_[D9] = NULL;
-  fprs_[D10] = NULL;
-  fprs_[D11] = NULL;
-  fprs_[D12] = NULL;
-  fprs_[D13] = NULL;
-  fprs_[D14] = NULL;
-  fprs_[D15] = NULL;
+  // d0-d7, d16-d31 are caller-saved; d8-d15 are callee-saved.
+
+  fprs_[D0] = NULL;
+  fprs_[D1] = NULL;
+  fprs_[D2] = NULL;
+  fprs_[D3] = NULL;
+  fprs_[D4] = NULL;
+  fprs_[D5] = NULL;
+  fprs_[D6] = NULL;
+  fprs_[D7] = NULL;
+
+  fprs_[D16] = NULL;
+  fprs_[D17] = NULL;
+  fprs_[D18] = NULL;
+  fprs_[D19] = NULL;
+  fprs_[D20] = NULL;
+  fprs_[D21] = NULL;
+  fprs_[D22] = NULL;
+  fprs_[D23] = NULL;
+  fprs_[D24] = NULL;
+  fprs_[D25] = NULL;
+  fprs_[D26] = NULL;
+  fprs_[D27] = NULL;
+  fprs_[D28] = NULL;
+  fprs_[D29] = NULL;
+  fprs_[D30] = NULL;
+  fprs_[D31] = NULL;
 }
 
 extern "C" void art_quick_do_long_jump(uint64_t*, uint64_t*);
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index f2050b3..346b08c 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -42,7 +42,7 @@
 #endif
 
     // FP args
-    stp d1, d2,   [sp, #8]
+    stp d0, d1, [sp, #8]
     stp d2, d3, [sp, #24]
     stp d4, d5, [sp, #40]
     stp d6, d7, [sp, #56]
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index 32f313f..b22ca82 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -59,116 +59,7 @@
 
  public:
   size_t Invoke3(size_t arg0, size_t arg1, size_t arg2, uintptr_t code, Thread* self) {
-    // Push a transition back into managed code onto the linked list in thread.
-    ManagedStack fragment;
-    self->PushManagedStackFragment(&fragment);
-
-    size_t result;
-#if defined(__i386__)
-    // TODO: Set the thread?
-    __asm__ __volatile__(
-        "pushl $0\n\t"               // Push nullptr to terminate quick stack
-        "call *%%edi\n\t"           // Call the stub
-        "addl $4, %%esp"               // Pop nullptr
-        : "=a" (result)
-          // Use the result from eax
-        : "a"(arg0), "c"(arg1), "d"(arg2), "D"(code)
-          // This places code into edi, arg0 into eax, arg1 into ecx, and arg2 into edx
-        : );  // clobber.
-    // TODO: Should we clobber the other registers? EBX gets clobbered by some of the stubs,
-    //       but compilation fails when declaring that.
-#elif defined(__arm__)
-    __asm__ __volatile__(
-        "push {r1-r12, lr}\n\t"     // Save state, 13*4B = 52B
-        ".cfi_adjust_cfa_offset 52\n\t"
-        "push {r9}\n\t"
-        ".cfi_adjust_cfa_offset 4\n\t"
-        "mov r9, #0\n\n"
-        "str r9, [sp, #-8]!\n\t"   // Push nullptr to terminate stack, +8B padding so 16B aligned
-        ".cfi_adjust_cfa_offset 8\n\t"
-        "ldr r9, [sp, #8]\n\t"
-
-        // Push everything on the stack, so we don't rely on the order. What a mess. :-(
-        "sub sp, sp, #20\n\t"
-        "str %[arg0], [sp]\n\t"
-        "str %[arg1], [sp, #4]\n\t"
-        "str %[arg2], [sp, #8]\n\t"
-        "str %[code], [sp, #12]\n\t"
-        "str %[self], [sp, #16]\n\t"
-        "ldr r0, [sp]\n\t"
-        "ldr r1, [sp, #4]\n\t"
-        "ldr r2, [sp, #8]\n\t"
-        "ldr r3, [sp, #12]\n\t"
-        "ldr r9, [sp, #16]\n\t"
-        "add sp, sp, #20\n\t"
-
-        "blx r3\n\t"                // Call the stub
-        "add sp, sp, #12\n\t"       // Pop nullptr and padding
-        ".cfi_adjust_cfa_offset -12\n\t"
-        "pop {r1-r12, lr}\n\t"      // Restore state
-        ".cfi_adjust_cfa_offset -52\n\t"
-        "mov %[result], r0\n\t"     // Save the result
-        : [result] "=r" (result)
-          // Use the result from r0
-        : [arg0] "r"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self)
-        : );  // clobber.
-#elif defined(__aarch64__)
-    __asm__ __volatile__(
-        "sub sp, sp, #48\n\t"          // Reserve stack space, 16B aligned
-        ".cfi_adjust_cfa_offset 48\n\t"
-        "stp xzr, x1,  [sp]\n\t"        // nullptr(end of quick stack), x1
-        "stp x2, x3,   [sp, #16]\n\t"   // Save x2, x3
-        "stp x18, x30, [sp, #32]\n\t"   // Save x18(xSELF), xLR
-
-        // Push everything on the stack, so we don't rely on the order. What a mess. :-(
-        "sub sp, sp, #48\n\t"
-        "str %[arg0], [sp]\n\t"
-        "str %[arg1], [sp, #8]\n\t"
-        "str %[arg2], [sp, #16]\n\t"
-        "str %[code], [sp, #24]\n\t"
-        "str %[self], [sp, #32]\n\t"
-        "ldr x0, [sp]\n\t"
-        "ldr x1, [sp, #8]\n\t"
-        "ldr x2, [sp, #16]\n\t"
-        "ldr x3, [sp, #24]\n\t"
-        "ldr x18, [sp, #32]\n\t"
-        "add sp, sp, #48\n\t"
-
-        "blr x3\n\t"              // Call the stub
-        "ldp x1, x2, [sp, #8]\n\t"     // Restore x1, x2
-        "ldp x3, x18, [sp, #24]\n\t"   // Restore x3, xSELF
-        "ldr x30, [sp, #40]\n\t"      // Restore xLR
-        "add sp, sp, #48\n\t"          // Free stack space
-        ".cfi_adjust_cfa_offset -48\n\t"
-
-        "mov %[result], x0\n\t"        // Save the result
-        : [result] "=r" (result)
-          // Use the result from r0
-        : [arg0] "0"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self)
-        : "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17");  // clobber.
-#elif defined(__x86_64__)
-    // Note: Uses the native convention
-    // TODO: Set the thread?
-    __asm__ __volatile__(
-        "pushq $0\n\t"                 // Push nullptr to terminate quick stack
-        "pushq $0\n\t"                 // 16B alignment padding
-        ".cfi_adjust_cfa_offset 16\n\t"
-        "call *%%rax\n\t"              // Call the stub
-        "addq $16, %%rsp\n\t"              // Pop nullptr and padding
-        ".cfi_adjust_cfa_offset -16\n\t"
-        : "=a" (result)
-          // Use the result from rax
-        : "D"(arg0), "S"(arg1), "d"(arg2), "a"(code)
-          // This places arg0 into rdi, arg1 into rsi, arg2 into rdx, and code into rax
-        : "rbx", "rcx", "rbp", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15");  // clobber all
-    // TODO: Should we clobber the other registers?
-#else
-    LOG(WARNING) << "Was asked to invoke for an architecture I do not understand.";
-    result = 0;
-#endif
-    // Pop transition.
-    self->PopManagedStackFragment(fragment);
-    return result;
+    return Invoke3WithReferrer(arg0, arg1, arg2, code, self, nullptr);
   }
 
   // TODO: Set up a frame according to referrer's specs.
@@ -179,6 +70,7 @@
     self->PushManagedStackFragment(&fragment);
 
     size_t result;
+    size_t fpr_result = 0;
 #if defined(__i386__)
     // TODO: Set the thread?
     __asm__ __volatile__(
@@ -230,6 +122,14 @@
         : );  // clobber.
 #elif defined(__aarch64__)
     __asm__ __volatile__(
+        // Spill space for d8 - d15
+        "sub sp, sp, #64\n\t"
+        ".cfi_adjust_cfa_offset 64\n\t"
+        "stp d8, d9,   [sp]\n\t"
+        "stp d10, d11, [sp, #16]\n\t"
+        "stp d12, d13, [sp, #32]\n\t"
+        "stp d14, d15, [sp, #48]\n\t"
+
         "sub sp, sp, #48\n\t"          // Reserve stack space, 16B aligned
         ".cfi_adjust_cfa_offset 48\n\t"
         "stp %[referrer], x1, [sp]\n\t"// referrer, x1
@@ -238,27 +138,118 @@
 
         // Push everything on the stack, so we don't rely on the order. What a mess. :-(
         "sub sp, sp, #48\n\t"
+        ".cfi_adjust_cfa_offset 48\n\t"
         "str %[arg0], [sp]\n\t"
         "str %[arg1], [sp, #8]\n\t"
         "str %[arg2], [sp, #16]\n\t"
         "str %[code], [sp, #24]\n\t"
         "str %[self], [sp, #32]\n\t"
+
+        // Now we definitely have x0-x3 free, use it to garble d8 - d15
+        "movk x0, #0xfad0\n\t"
+        "movk x0, #0xebad, lsl #16\n\t"
+        "movk x0, #0xfad0, lsl #32\n\t"
+        "movk x0, #0xebad, lsl #48\n\t"
+        "fmov d8, x0\n\t"
+        "add x0, x0, 1\n\t"
+        "fmov d9, x0\n\t"
+        "add x0, x0, 1\n\t"
+        "fmov d10, x0\n\t"
+        "add x0, x0, 1\n\t"
+        "fmov d11, x0\n\t"
+        "add x0, x0, 1\n\t"
+        "fmov d12, x0\n\t"
+        "add x0, x0, 1\n\t"
+        "fmov d13, x0\n\t"
+        "add x0, x0, 1\n\t"
+        "fmov d14, x0\n\t"
+        "add x0, x0, 1\n\t"
+        "fmov d15, x0\n\t"
+
+        // Load call params
         "ldr x0, [sp]\n\t"
         "ldr x1, [sp, #8]\n\t"
         "ldr x2, [sp, #16]\n\t"
         "ldr x3, [sp, #24]\n\t"
         "ldr x18, [sp, #32]\n\t"
         "add sp, sp, #48\n\t"
-
-        "blr x3\n\t"              // Call the stub
-        "ldp x1, x2, [sp, #8]\n\t"     // Restore x1, x2
-        "ldp x3, x18, [sp, #24]\n\t"   // Restore x3, xSELF
-        "ldr x30, [sp, #40]\n\t"      // Restore xLR
-        "add sp, sp, #48\n\t"          // Free stack space
         ".cfi_adjust_cfa_offset -48\n\t"
 
+
+        "blr x3\n\t"              // Call the stub
+
+        // Test d8 - d15. We can use x1 and x2.
+        "movk x1, #0xfad0\n\t"
+        "movk x1, #0xebad, lsl #16\n\t"
+        "movk x1, #0xfad0, lsl #32\n\t"
+        "movk x1, #0xebad, lsl #48\n\t"
+        "fmov x2, d8\n\t"
+        "cmp x1, x2\n\t"
+        "b.ne 1f\n\t"
+        "add x1, x1, 1\n\t"
+
+        "fmov x2, d9\n\t"
+        "cmp x1, x2\n\t"
+        "b.ne 1f\n\t"
+        "add x1, x1, 1\n\t"
+
+        "fmov x2, d10\n\t"
+        "cmp x1, x2\n\t"
+        "b.ne 1f\n\t"
+        "add x1, x1, 1\n\t"
+
+        "fmov x2, d11\n\t"
+        "cmp x1, x2\n\t"
+        "b.ne 1f\n\t"
+        "add x1, x1, 1\n\t"
+
+        "fmov x2, d12\n\t"
+        "cmp x1, x2\n\t"
+        "b.ne 1f\n\t"
+        "add x1, x1, 1\n\t"
+
+        "fmov x2, d13\n\t"
+        "cmp x1, x2\n\t"
+        "b.ne 1f\n\t"
+        "add x1, x1, 1\n\t"
+
+        "fmov x2, d14\n\t"
+        "cmp x1, x2\n\t"
+        "b.ne 1f\n\t"
+        "add x1, x1, 1\n\t"
+
+        "fmov x2, d15\n\t"
+        "cmp x1, x2\n\t"
+        "b.ne 1f\n\t"
+
+        "mov %[fpr_result], #0\n\t"
+
+        // Finish up.
+        "2:\n\t"
+        "ldp x1, x2, [sp, #8]\n\t"     // Restore x1, x2
+        "ldp x3, x18, [sp, #24]\n\t"   // Restore x3, xSELF
+        "ldr x30, [sp, #40]\n\t"       // Restore xLR
+        "add sp, sp, #48\n\t"          // Free stack space
+        ".cfi_adjust_cfa_offset -48\n\t"
         "mov %[result], x0\n\t"        // Save the result
-        : [result] "=r" (result)
+
+        "ldp d8, d9,   [sp]\n\t"       // Restore d8 - d15
+        "ldp d10, d11, [sp, #16]\n\t"
+        "ldp d12, d13, [sp, #32]\n\t"
+        "ldp d14, d15, [sp, #48]\n\t"
+        "add sp, sp, #64\n\t"
+        ".cfi_adjust_cfa_offset -64\n\t"
+
+        "b 3f\n\t"                     // Goto end
+
+        // Failed fpr verification.
+        "1:\n\t"
+        "mov %[fpr_result], #1\n\t"
+        "b 2b\n\t"                     // Goto finish-up
+
+        // End
+        "3:\n\t"
+        : [result] "=r" (result), [fpr_result] "=r" (fpr_result)
           // Use the result from r0
         : [arg0] "0"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self),
           [referrer] "r"(referrer)
@@ -285,6 +276,10 @@
 #endif
     // Pop transition.
     self->PopManagedStackFragment(fragment);
+
+    fp_result = fpr_result;
+    EXPECT_EQ(0U, fp_result);
+
     return result;
   }
 
@@ -314,6 +309,9 @@
     return 0;
 #endif
   }
+
+ protected:
+  size_t fp_result;
 };
 
 
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index def98dd..0cc6b41 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -2801,7 +2801,7 @@
 static void CheckProxyMethod(mirror::ArtMethod* method,
                              Handle<mirror::ArtMethod>& prototype);
 
-mirror::Class* ClassLinker::CreateProxyClass(ScopedObjectAccess& soa, jstring name,
+mirror::Class* ClassLinker::CreateProxyClass(ScopedObjectAccessAlreadyRunnable& soa, jstring name,
                                              jobjectArray interfaces, jobject loader,
                                              jobjectArray methods, jobjectArray throws) {
   Thread* self = soa.Self();
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index c89fedf..54805be 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -46,7 +46,7 @@
 
 class InternTable;
 template<class T> class ObjectLock;
-class ScopedObjectAccess;
+class ScopedObjectAccessAlreadyRunnable;
 template<class T> class Handle;
 
 typedef bool (ClassVisitor)(mirror::Class* c, void* arg);
@@ -326,8 +326,9 @@
   void ResolveMethodExceptionHandlerTypes(const DexFile& dex_file, mirror::ArtMethod* klass)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  mirror::Class* CreateProxyClass(ScopedObjectAccess& soa, jstring name, jobjectArray interfaces,
-                                  jobject loader, jobjectArray methods, jobjectArray throws)
+  mirror::Class* CreateProxyClass(ScopedObjectAccessAlreadyRunnable& soa, jstring name,
+                                  jobjectArray interfaces, jobject loader, jobjectArray methods,
+                                  jobjectArray throws)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   std::string GetDescriptorForProxy(mirror::Class* proxy_class)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index c81706f..39b2ec2 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -139,7 +139,7 @@
   self->ResetDefaultStackEnd(!explicit_overflow_check);  // Return to default stack size.
 }
 
-JValue InvokeProxyInvocationHandler(ScopedObjectAccessUnchecked& soa, const char* shorty,
+JValue InvokeProxyInvocationHandler(ScopedObjectAccessAlreadyRunnable& soa, const char* shorty,
                                     jobject rcvr_jobj, jobject interface_method_jobj,
                                     std::vector<jvalue>& args) {
   DCHECK(soa.Env()->IsInstanceOf(rcvr_jobj, WellKnownClasses::java_lang_reflect_Proxy));
diff --git a/runtime/entrypoints/entrypoint_utils.h b/runtime/entrypoints/entrypoint_utils.h
index bfcb58f..f1795a5 100644
--- a/runtime/entrypoints/entrypoint_utils.h
+++ b/runtime/entrypoints/entrypoint_utils.h
@@ -699,7 +699,7 @@
   }
 }
 
-JValue InvokeProxyInvocationHandler(ScopedObjectAccessUnchecked& soa, const char* shorty,
+JValue InvokeProxyInvocationHandler(ScopedObjectAccessAlreadyRunnable& soa, const char* shorty,
                                     jobject rcvr_jobj, jobject interface_art_method_jobj,
                                     std::vector<jvalue>& args)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/mirror/art_field.cc b/runtime/mirror/art_field.cc
index 86c5c3f..b3b1b71 100644
--- a/runtime/mirror/art_field.cc
+++ b/runtime/mirror/art_field.cc
@@ -32,7 +32,8 @@
 // TODO: Get global references for these
 Class* ArtField::java_lang_reflect_ArtField_ = NULL;
 
-ArtField* ArtField::FromReflectedField(const ScopedObjectAccess& soa, jobject jlr_field) {
+ArtField* ArtField::FromReflectedField(const ScopedObjectAccessAlreadyRunnable& soa,
+                                       jobject jlr_field) {
   mirror::ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_reflect_Field_artField);
   mirror::ArtField* field = f->GetObject(soa.Decode<mirror::Object*>(jlr_field))->AsArtField();
   DCHECK(field != nullptr);
diff --git a/runtime/mirror/art_field.h b/runtime/mirror/art_field.h
index 029bd5a..36e62c2 100644
--- a/runtime/mirror/art_field.h
+++ b/runtime/mirror/art_field.h
@@ -27,14 +27,15 @@
 namespace art {
 
 struct ArtFieldOffsets;
-class ScopedObjectAccess;
+class ScopedObjectAccessAlreadyRunnable;
 
 namespace mirror {
 
 // C++ mirror of java.lang.reflect.ArtField
 class MANAGED ArtField : public Object {
  public:
-  static ArtField* FromReflectedField(const ScopedObjectAccess& soa, jobject jlr_field)
+  static ArtField* FromReflectedField(const ScopedObjectAccessAlreadyRunnable& soa,
+                                      jobject jlr_field)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   Class* GetDeclaringClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/mirror/art_method.cc b/runtime/mirror/art_method.cc
index 495ae2d..6af4cdb 100644
--- a/runtime/mirror/art_method.cc
+++ b/runtime/mirror/art_method.cc
@@ -48,7 +48,8 @@
 // TODO: get global references for these
 Class* ArtMethod::java_lang_reflect_ArtMethod_ = NULL;
 
-ArtMethod* ArtMethod::FromReflectedMethod(const ScopedObjectAccess& soa, jobject jlr_method) {
+ArtMethod* ArtMethod::FromReflectedMethod(const ScopedObjectAccessAlreadyRunnable& soa,
+                                          jobject jlr_method) {
   mirror::ArtField* f =
       soa.DecodeField(WellKnownClasses::java_lang_reflect_AbstractMethod_artMethod);
   mirror::ArtMethod* method = f->GetObject(soa.Decode<mirror::Object*>(jlr_method))->AsArtMethod();
diff --git a/runtime/mirror/art_method.h b/runtime/mirror/art_method.h
index 37f26a2..bb43328 100644
--- a/runtime/mirror/art_method.h
+++ b/runtime/mirror/art_method.h
@@ -32,7 +32,7 @@
 union JValue;
 struct MethodClassOffsets;
 class MethodHelper;
-class ScopedObjectAccess;
+class ScopedObjectAccessAlreadyRunnable;
 class StringPiece;
 class ShadowFrame;
 
@@ -46,7 +46,8 @@
 // C++ mirror of java.lang.reflect.Method and java.lang.reflect.Constructor
 class MANAGED ArtMethod : public Object {
  public:
-  static ArtMethod* FromReflectedMethod(const ScopedObjectAccess& soa, jobject jlr_method)
+  static ArtMethod* FromReflectedMethod(const ScopedObjectAccessAlreadyRunnable& soa,
+                                        jobject jlr_method)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   Class* GetDeclaringClass() ALWAYS_INLINE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/native/scoped_fast_native_object_access.h b/runtime/native/scoped_fast_native_object_access.h
index 744ac05..822aefa 100644
--- a/runtime/native/scoped_fast_native_object_access.h
+++ b/runtime/native/scoped_fast_native_object_access.h
@@ -24,12 +24,12 @@
 
 // Variant of ScopedObjectAccess that does no runnable transitions. Should only be used by "fast"
 // JNI methods.
-class ScopedFastNativeObjectAccess : public ScopedObjectAccess {
+class ScopedFastNativeObjectAccess : public ScopedObjectAccessAlreadyRunnable {
  public:
   explicit ScopedFastNativeObjectAccess(JNIEnv* env)
     LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
     SHARED_LOCK_FUNCTION(Locks::mutator_lock_) ALWAYS_INLINE
-     : ScopedObjectAccess(env) {
+     : ScopedObjectAccessAlreadyRunnable(env) {
     Locks::mutator_lock_->AssertSharedHeld(Self());
     DCHECK((*Self()->GetManagedStack()->GetTopQuickFrame())->IsFastNative());
     // Don't work with raw objects in non-runnable states.
diff --git a/runtime/reflection.cc b/runtime/reflection.cc
index 55d35d1..c08cc30 100644
--- a/runtime/reflection.cc
+++ b/runtime/reflection.cc
@@ -100,7 +100,8 @@
     AppendWide(jv.j);
   }
 
-  void BuildArgArrayFromVarArgs(const ScopedObjectAccess& soa, mirror::Object* receiver, va_list ap)
+  void BuildArgArrayFromVarArgs(const ScopedObjectAccessAlreadyRunnable& soa,
+                                mirror::Object* receiver, va_list ap)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // Set receiver if non-null (method is not static)
     if (receiver != nullptr) {
@@ -135,8 +136,8 @@
     }
   }
 
-  void BuildArgArrayFromJValues(const ScopedObjectAccessUnchecked& soa, mirror::Object* receiver,
-                                jvalue* args)
+  void BuildArgArrayFromJValues(const ScopedObjectAccessAlreadyRunnable& soa,
+                                mirror::Object* receiver, jvalue* args)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // Set receiver if non-null (method is not static)
     if (receiver != nullptr) {
@@ -217,7 +218,8 @@
                      PrettyDescriptor(found_descriptor.as_string()).c_str()).c_str());
   }
 
-  bool BuildArgArrayFromObjectArray(const ScopedObjectAccess& soa, mirror::Object* receiver,
+  bool BuildArgArrayFromObjectArray(const ScopedObjectAccessAlreadyRunnable& soa,
+                                    mirror::Object* receiver,
                                     mirror::ObjectArray<mirror::Object>* args, MethodHelper& mh)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     const DexFile::TypeList* classes = mh.GetParameterTypeList();
@@ -396,8 +398,9 @@
 }
 
 
-static void InvokeWithArgArray(const ScopedObjectAccessUnchecked& soa, mirror::ArtMethod* method,
-                               ArgArray* arg_array, JValue* result, const char* shorty)
+static void InvokeWithArgArray(const ScopedObjectAccessAlreadyRunnable& soa,
+                               mirror::ArtMethod* method, ArgArray* arg_array, JValue* result,
+                               const char* shorty)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   uint32_t* args = arg_array->GetArray();
   if (UNLIKELY(soa.Env()->check_jni)) {
@@ -406,7 +409,8 @@
   method->Invoke(soa.Self(), args, arg_array->GetNumBytes(), result, shorty);
 }
 
-JValue InvokeWithVarArgs(const ScopedObjectAccess& soa, jobject obj, jmethodID mid, va_list args)
+JValue InvokeWithVarArgs(const ScopedObjectAccessAlreadyRunnable& soa, jobject obj, jmethodID mid,
+                         va_list args)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtMethod* method = soa.DecodeMethod(mid);
   mirror::Object* receiver = method->IsStatic() ? nullptr : soa.Decode<mirror::Object*>(obj);
@@ -418,7 +422,7 @@
   return result;
 }
 
-JValue InvokeWithJValues(const ScopedObjectAccessUnchecked& soa, mirror::Object* receiver,
+JValue InvokeWithJValues(const ScopedObjectAccessAlreadyRunnable& soa, mirror::Object* receiver,
                          jmethodID mid, jvalue* args) {
   mirror::ArtMethod* method = soa.DecodeMethod(mid);
   MethodHelper mh(method);
@@ -429,7 +433,7 @@
   return result;
 }
 
-JValue InvokeVirtualOrInterfaceWithJValues(const ScopedObjectAccess& soa,
+JValue InvokeVirtualOrInterfaceWithJValues(const ScopedObjectAccessAlreadyRunnable& soa,
                                            mirror::Object* receiver, jmethodID mid, jvalue* args) {
   mirror::ArtMethod* method = FindVirtualMethod(receiver, soa.DecodeMethod(mid));
   MethodHelper mh(method);
@@ -440,7 +444,7 @@
   return result;
 }
 
-JValue InvokeVirtualOrInterfaceWithVarArgs(const ScopedObjectAccess& soa,
+JValue InvokeVirtualOrInterfaceWithVarArgs(const ScopedObjectAccessAlreadyRunnable& soa,
                                            jobject obj, jmethodID mid, va_list args) {
   mirror::Object* receiver = soa.Decode<mirror::Object*>(obj);
   mirror::ArtMethod* method = FindVirtualMethod(receiver, soa.DecodeMethod(mid));
@@ -460,7 +464,7 @@
                                     mh.GetShorty());
 }
 
-jobject InvokeMethod(const ScopedObjectAccess& soa, jobject javaMethod,
+jobject InvokeMethod(const ScopedObjectAccessAlreadyRunnable& soa, jobject javaMethod,
                      jobject javaReceiver, jobject javaArgs, bool accessible) {
   mirror::ArtMethod* m = mirror::ArtMethod::FromReflectedMethod(soa, javaMethod);
 
diff --git a/runtime/reflection.h b/runtime/reflection.h
index d9a7228..2c54c06 100644
--- a/runtime/reflection.h
+++ b/runtime/reflection.h
@@ -29,8 +29,7 @@
 }  // namespace mirror
 union JValue;
 class MethodHelper;
-class ScopedObjectAccess;
-class ScopedObjectAccessUnchecked;
+class ScopedObjectAccessAlreadyRunnable;
 class ShadowFrame;
 class ThrowLocation;
 
@@ -48,18 +47,19 @@
                            const JValue& src, JValue* dst)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-JValue InvokeWithVarArgs(const ScopedObjectAccess& soa, jobject obj, jmethodID mid, va_list args)
+JValue InvokeWithVarArgs(const ScopedObjectAccessAlreadyRunnable& soa, jobject obj, jmethodID mid,
+                         va_list args)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-JValue InvokeWithJValues(const ScopedObjectAccessUnchecked& soa, mirror::Object* receiver,
+JValue InvokeWithJValues(const ScopedObjectAccessAlreadyRunnable& soa, mirror::Object* receiver,
                          jmethodID mid, jvalue* args)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-JValue InvokeVirtualOrInterfaceWithJValues(const ScopedObjectAccess& soa,
+JValue InvokeVirtualOrInterfaceWithJValues(const ScopedObjectAccessAlreadyRunnable& soa,
                                            mirror::Object* receiver, jmethodID mid, jvalue* args)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-JValue InvokeVirtualOrInterfaceWithVarArgs(const ScopedObjectAccess& soa,
+JValue InvokeVirtualOrInterfaceWithVarArgs(const ScopedObjectAccessAlreadyRunnable& soa,
                                            jobject obj, jmethodID mid, va_list args)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -67,7 +67,7 @@
                            MethodHelper& mh, JValue* result)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-jobject InvokeMethod(const ScopedObjectAccess& soa, jobject method, jobject receiver,
+jobject InvokeMethod(const ScopedObjectAccessAlreadyRunnable& soa, jobject method, jobject receiver,
                      jobject args, bool accessible)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/runtime/scoped_thread_state_change.h b/runtime/scoped_thread_state_change.h
index dbd961f..d56495e 100644
--- a/runtime/scoped_thread_state_change.h
+++ b/runtime/scoped_thread_state_change.h
@@ -93,50 +93,15 @@
   ThreadState old_thread_state_;
   const bool expected_has_no_thread_;
 
+  friend class ScopedObjectAccessUnchecked;
   DISALLOW_COPY_AND_ASSIGN(ScopedThreadStateChange);
 };
 
-// Entry/exit processing for transitions from Native to Runnable (ie within JNI functions).
-//
-// This class performs the necessary thread state switching to and from Runnable and lets us
-// amortize the cost of working out the current thread. Additionally it lets us check (and repair)
-// apps that are using a JNIEnv on the wrong thread. The class also decodes and encodes Objects
-// into jobjects via methods of this class. Performing this here enforces the Runnable thread state
-// for use of Object, thereby inhibiting the Object being modified by GC whilst native or VM code
-// is also manipulating the Object.
-//
-// The destructor transitions back to the previous thread state, typically Native. In this state
-// GC and thread suspension may occur.
-//
-// For annotalysis the subclass ScopedObjectAccess (below) makes it explicit that a shared of
-// the mutator_lock_ will be acquired on construction.
-class ScopedObjectAccessUnchecked : public ScopedThreadStateChange {
+// Assumes we are already runnable.
+class ScopedObjectAccessAlreadyRunnable {
  public:
-  explicit ScopedObjectAccessUnchecked(JNIEnv* env)
-      LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_) ALWAYS_INLINE
-      : ScopedThreadStateChange(ThreadForEnv(env), kRunnable),
-        env_(down_cast<JNIEnvExt*>(env)), vm_(env_->vm) {
-    self_->VerifyStack();
-    Locks::mutator_lock_->AssertSharedHeld(self_);
-  }
-
-  explicit ScopedObjectAccessUnchecked(Thread* self)
-      LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
-      : ScopedThreadStateChange(self, kRunnable),
-        env_(down_cast<JNIEnvExt*>(self->GetJniEnv())),
-        vm_(env_ != NULL ? env_->vm : NULL) {
-    self_->VerifyStack();
-    Locks::mutator_lock_->AssertSharedHeld(self_);
-  }
-
-  // Used when we want a scoped JNI thread state but have no thread/JNIEnv. Consequently doesn't
-  // change into Runnable or acquire a share on the mutator_lock_.
-  explicit ScopedObjectAccessUnchecked(JavaVM* vm)
-      : ScopedThreadStateChange(), env_(NULL), vm_(down_cast<JavaVMExt*>(vm)) {}
-
-  // Here purely to force inlining.
-  ~ScopedObjectAccessUnchecked() ALWAYS_INLINE {
-    Locks::mutator_lock_->AssertSharedHeld(self_);
+  Thread* Self() const {
+    return self_;
   }
 
   JNIEnvExt* Env() const {
@@ -159,13 +124,11 @@
   template<typename T>
   T AddLocalReference(mirror::Object* obj) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     Locks::mutator_lock_->AssertSharedHeld(Self());
-    DCHECK_EQ(thread_state_, kRunnable);  // Don't work with raw objects in non-runnable states.
+    DCHECK(IsRunnable());  // Don't work with raw objects in non-runnable states.
     if (obj == NULL) {
       return NULL;
     }
-
     DCHECK_NE((reinterpret_cast<uintptr_t>(obj) & 0xffff0000), 0xebad0000);
-
     return Env()->AddLocalReference<T>(obj);
   }
 
@@ -173,14 +136,14 @@
   T Decode(jobject obj) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     Locks::mutator_lock_->AssertSharedHeld(Self());
-    DCHECK_EQ(thread_state_, kRunnable);  // Don't work with raw objects in non-runnable states.
+    DCHECK(IsRunnable());  // Don't work with raw objects in non-runnable states.
     return down_cast<T>(Self()->DecodeJObject(obj));
   }
 
   mirror::ArtField* DecodeField(jfieldID fid) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     Locks::mutator_lock_->AssertSharedHeld(Self());
-    DCHECK_EQ(thread_state_, kRunnable);  // Don't work with raw objects in non-runnable states.
+    DCHECK(IsRunnable());  // Don't work with raw objects in non-runnable states.
     CHECK(!kMovingFields);
     return reinterpret_cast<mirror::ArtField*>(fid);
   }
@@ -188,7 +151,7 @@
   jfieldID EncodeField(mirror::ArtField* field) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     Locks::mutator_lock_->AssertSharedHeld(Self());
-    DCHECK_EQ(thread_state_, kRunnable);  // Don't work with raw objects in non-runnable states.
+    DCHECK(IsRunnable());  // Don't work with raw objects in non-runnable states.
     CHECK(!kMovingFields);
     return reinterpret_cast<jfieldID>(field);
   }
@@ -196,7 +159,7 @@
   mirror::ArtMethod* DecodeMethod(jmethodID mid) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     Locks::mutator_lock_->AssertSharedHeld(Self());
-    DCHECK_EQ(thread_state_, kRunnable);  // Don't work with raw objects in non-runnable states.
+    DCHECK(IsRunnable());  // Don't work with raw objects in non-runnable states.
     CHECK(!kMovingMethods);
     return reinterpret_cast<mirror::ArtMethod*>(mid);
   }
@@ -204,16 +167,83 @@
   jmethodID EncodeMethod(mirror::ArtMethod* method) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     Locks::mutator_lock_->AssertSharedHeld(Self());
-    DCHECK_EQ(thread_state_, kRunnable);  // Don't work with raw objects in non-runnable states.
+    DCHECK(IsRunnable());  // Don't work with raw objects in non-runnable states.
     CHECK(!kMovingMethods);
     return reinterpret_cast<jmethodID>(method);
   }
 
- private:
+  bool IsRunnable() const {
+    return self_->GetState() == kRunnable;
+  }
+
+ protected:
+  explicit ScopedObjectAccessAlreadyRunnable(JNIEnv* env)
+      LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_) ALWAYS_INLINE
+      : self_(ThreadForEnv(env)), env_(down_cast<JNIEnvExt*>(env)), vm_(env_->vm) {
+  }
+
+  explicit ScopedObjectAccessAlreadyRunnable(Thread* self)
+      LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_) ALWAYS_INLINE
+      : self_(self), env_(down_cast<JNIEnvExt*>(self->GetJniEnv())),
+        vm_(env_ != nullptr ? env_->vm : nullptr) {
+  }
+
+  // Used when we want a scoped JNI thread state but have no thread/JNIEnv. Consequently doesn't
+  // change into Runnable or acquire a share on the mutator_lock_.
+  explicit ScopedObjectAccessAlreadyRunnable(JavaVM* vm)
+      : self_(nullptr), env_(nullptr), vm_(down_cast<JavaVMExt*>(vm)) {}
+
+  // Here purely to force inlining.
+  ~ScopedObjectAccessAlreadyRunnable() ALWAYS_INLINE {
+  }
+
+  // Self thread, can be null.
+  Thread* const self_;
   // The full JNIEnv.
   JNIEnvExt* const env_;
   // The full JavaVM.
   JavaVMExt* const vm_;
+};
+
+// Entry/exit processing for transitions from Native to Runnable (ie within JNI functions).
+//
+// This class performs the necessary thread state switching to and from Runnable and lets us
+// amortize the cost of working out the current thread. Additionally it lets us check (and repair)
+// apps that are using a JNIEnv on the wrong thread. The class also decodes and encodes Objects
+// into jobjects via methods of this class. Performing this here enforces the Runnable thread state
+// for use of Object, thereby inhibiting the Object being modified by GC whilst native or VM code
+// is also manipulating the Object.
+//
+// The destructor transitions back to the previous thread state, typically Native. In this state
+// GC and thread suspension may occur.
+//
+// For annotalysis the subclass ScopedObjectAccess (below) makes it explicit that a shared of
+// the mutator_lock_ will be acquired on construction.
+class ScopedObjectAccessUnchecked : public ScopedObjectAccessAlreadyRunnable {
+ public:
+  explicit ScopedObjectAccessUnchecked(JNIEnv* env)
+      LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_) ALWAYS_INLINE
+      : ScopedObjectAccessAlreadyRunnable(env), tsc_(Self(), kRunnable) {
+    Self()->VerifyStack();
+    Locks::mutator_lock_->AssertSharedHeld(Self());
+  }
+
+  explicit ScopedObjectAccessUnchecked(Thread* self)
+      LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_) ALWAYS_INLINE
+      : ScopedObjectAccessAlreadyRunnable(self), tsc_(self, kRunnable) {
+    Self()->VerifyStack();
+    Locks::mutator_lock_->AssertSharedHeld(Self());
+  }
+
+  // Used when we want a scoped JNI thread state but have no thread/JNIEnv. Consequently doesn't
+  // change into Runnable or acquire a share on the mutator_lock_.
+  explicit ScopedObjectAccessUnchecked(JavaVM* vm) ALWAYS_INLINE
+      : ScopedObjectAccessAlreadyRunnable(vm), tsc_() {}
+
+ private:
+  // The scoped thread state change makes sure that we are runnable and restores the thread state
+  // in the destructor.
+  const ScopedThreadStateChange tsc_;
 
   DISALLOW_COPY_AND_ASSIGN(ScopedObjectAccessUnchecked);
 };
@@ -229,7 +259,7 @@
 
   explicit ScopedObjectAccess(Thread* self)
       LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
-      SHARED_LOCK_FUNCTION(Locks::mutator_lock_)
+      SHARED_LOCK_FUNCTION(Locks::mutator_lock_) ALWAYS_INLINE
       : ScopedObjectAccessUnchecked(self) {
   }
 
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 326e02e..ebf9078 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -171,7 +171,7 @@
   return nullptr;
 }
 
-Thread* Thread::FromManagedThread(const ScopedObjectAccessUnchecked& soa,
+Thread* Thread::FromManagedThread(const ScopedObjectAccessAlreadyRunnable& soa,
                                   mirror::Object* thread_peer) {
   mirror::ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_Thread_nativePeer);
   Thread* result = reinterpret_cast<Thread*>(static_cast<uintptr_t>(f->GetLong(thread_peer)));
@@ -186,7 +186,8 @@
   return result;
 }
 
-Thread* Thread::FromManagedThread(const ScopedObjectAccessUnchecked& soa, jobject java_thread) {
+Thread* Thread::FromManagedThread(const ScopedObjectAccessAlreadyRunnable& soa,
+                                  jobject java_thread) {
   return FromManagedThread(soa, soa.Decode<mirror::Object*>(java_thread));
 }
 
@@ -556,7 +557,7 @@
   DumpStack(os);
 }
 
-mirror::String* Thread::GetThreadName(const ScopedObjectAccessUnchecked& soa) const {
+mirror::String* Thread::GetThreadName(const ScopedObjectAccessAlreadyRunnable& soa) const {
   mirror::ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_Thread_name);
   return (tlsPtr_.opeer != nullptr) ? reinterpret_cast<mirror::String*>(f->GetObject(tlsPtr_.opeer)) : nullptr;
 }
@@ -1432,7 +1433,7 @@
 };
 
 template<bool kTransactionActive>
-jobject Thread::CreateInternalStackTrace(const ScopedObjectAccessUnchecked& soa) const {
+jobject Thread::CreateInternalStackTrace(const ScopedObjectAccessAlreadyRunnable& soa) const {
   // Compute depth of stack
   CountStackDepthVisitor count_visitor(const_cast<Thread*>(this));
   count_visitor.WalkStack();
@@ -1455,11 +1456,14 @@
   }
   return soa.AddLocalReference<jobjectArray>(trace);
 }
-template jobject Thread::CreateInternalStackTrace<false>(const ScopedObjectAccessUnchecked& soa) const;
-template jobject Thread::CreateInternalStackTrace<true>(const ScopedObjectAccessUnchecked& soa) const;
+template jobject Thread::CreateInternalStackTrace<false>(
+    const ScopedObjectAccessAlreadyRunnable& soa) const;
+template jobject Thread::CreateInternalStackTrace<true>(
+    const ScopedObjectAccessAlreadyRunnable& soa) const;
 
-jobjectArray Thread::InternalStackTraceToStackTraceElementArray(const ScopedObjectAccess& soa,
-    jobject internal, jobjectArray output_array, int* stack_depth) {
+jobjectArray Thread::InternalStackTraceToStackTraceElementArray(
+    const ScopedObjectAccessAlreadyRunnable& soa, jobject internal, jobjectArray output_array,
+    int* stack_depth) {
   // Decode the internal stack trace into the depth, method trace and PC trace
   int32_t depth = soa.Decode<mirror::ObjectArray<mirror::Object>*>(internal)->GetLength() - 1;
 
diff --git a/runtime/thread.h b/runtime/thread.h
index 6d8af51..62fa323 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -72,8 +72,7 @@
 struct JNIEnvExt;
 class Monitor;
 class Runtime;
-class ScopedObjectAccess;
-class ScopedObjectAccessUnchecked;
+class ScopedObjectAccessAlreadyRunnable;
 class ShadowFrame;
 struct SingleStepControl;
 class Thread;
@@ -140,12 +139,12 @@
 
   static Thread* Current();
 
-  static Thread* FromManagedThread(const ScopedObjectAccessUnchecked& ts,
+  static Thread* FromManagedThread(const ScopedObjectAccessAlreadyRunnable& ts,
                                    mirror::Object* thread_peer)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::thread_list_lock_)
       LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  static Thread* FromManagedThread(const ScopedObjectAccessUnchecked& ts, jobject thread)
+  static Thread* FromManagedThread(const ScopedObjectAccessAlreadyRunnable& ts, jobject thread)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::thread_list_lock_)
       LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -276,7 +275,7 @@
   }
 
   // Returns the java.lang.Thread's name, or NULL if this Thread* doesn't have a peer.
-  mirror::String* GetThreadName(const ScopedObjectAccessUnchecked& ts) const
+  mirror::String* GetThreadName(const ScopedObjectAccessAlreadyRunnable& ts) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Sets 'name' to the java.lang.Thread's name. This requires no transition to managed code,
@@ -458,15 +457,16 @@
   // Create the internal representation of a stack trace, that is more time
   // and space efficient to compute than the StackTraceElement[].
   template<bool kTransactionActive>
-  jobject CreateInternalStackTrace(const ScopedObjectAccessUnchecked& soa) const
+  jobject CreateInternalStackTrace(const ScopedObjectAccessAlreadyRunnable& soa) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Convert an internal stack trace representation (returned by CreateInternalStackTrace) to a
   // StackTraceElement[]. If output_array is NULL, a new array is created, otherwise as many
   // frames as will fit are written into the given array. If stack_depth is non-NULL, it's updated
   // with the number of valid frames in the returned array.
-  static jobjectArray InternalStackTraceToStackTraceElementArray(const ScopedObjectAccess& soa,
-      jobject internal, jobjectArray output_array = nullptr, int* stack_depth = nullptr)
+  static jobjectArray InternalStackTraceToStackTraceElementArray(
+      const ScopedObjectAccessAlreadyRunnable& soa, jobject internal,
+      jobjectArray output_array = nullptr, int* stack_depth = nullptr)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void VisitRoots(RootCallback* visitor, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/test/Android.mk b/test/Android.mk
index a3aabe6..0684c90 100644
--- a/test/Android.mk
+++ b/test/Android.mk
@@ -134,11 +134,11 @@
   endif
   $(call declare-test-art-oat-targets-impl,$(1),)
 
-$(HOST_OUT_JAVA_LIBRARIES)/oat-test-dex-$(1).odex: $(HOST_OUT_JAVA_LIBRARIES)/oat-test-dex-$(1).jar $(HOST_CORE_IMG_OUT) | $(DEX2OATD)
-	$(DEX2OATD) $(DEX2OAT_FLAGS) --runtime-arg -Xms16m --runtime-arg -Xmx16m --boot-image=$(HOST_CORE_IMG_LOCATION) --dex-file=$$(realpath $$<) --oat-file=$$(realpath $(HOST_OUT_JAVA_LIBRARIES))/oat-test-dex-$(1).odex --instruction-set=$(ART_HOST_ARCH) --host --android-root=$(HOST_OUT)
+$(HOST_OUT_JAVA_LIBRARIES)/$(ART_HOST_ARCH)/oat-test-dex-$(1).odex: $(HOST_OUT_JAVA_LIBRARIES)/oat-test-dex-$(1).jar $(HOST_CORE_IMG_OUT) | $(DEX2OATD)
+	$(DEX2OATD) $(DEX2OAT_FLAGS) --runtime-arg -Xms16m --runtime-arg -Xmx16m --boot-image=$(HOST_CORE_IMG_LOCATION) --dex-file=$$(realpath $$<) --oat-file=$$@ --instruction-set=$(ART_HOST_ARCH) --host --android-root=$(HOST_OUT)
 
 .PHONY: test-art-host-oat-default-$(1)
-test-art-host-oat-default-$(1): $(HOST_OUT_JAVA_LIBRARIES)/oat-test-dex-$(1).odex test-art-host-dependencies
+test-art-host-oat-default-$(1): $(HOST_OUT_JAVA_LIBRARIES)/$(ART_HOST_ARCH)/oat-test-dex-$(1).odex test-art-host-dependencies
 	mkdir -p /tmp/android-data/test-art-host-oat-default-$(1)
 	ANDROID_DATA=/tmp/android-data/test-art-host-oat-default-$(1) \
 	  ANDROID_ROOT=$(HOST_OUT) \
@@ -148,7 +148,7 @@
 	$(hide) rm -r /tmp/android-data/test-art-host-oat-default-$(1)
 
 .PHONY: test-art-host-oat-interpreter-$(1)
-test-art-host-oat-interpreter-$(1): $(HOST_OUT_JAVA_LIBRARIES)/oat-test-dex-$(1).odex test-art-host-dependencies
+test-art-host-oat-interpreter-$(1): $(HOST_OUT_JAVA_LIBRARIES)/$(ART_HOST_ARCH)/oat-test-dex-$(1).odex test-art-host-dependencies
 	mkdir -p /tmp/android-data/test-art-host-oat-interpreter-$(1)
 	ANDROID_DATA=/tmp/android-data/test-art-host-oat-interpreter-$(1) \
 	  ANDROID_ROOT=$(HOST_OUT) \