Inline IRT frame push/pop into JNI stubs.

Golem results for art-opt-cc (higher is better):
linux-ia32                       before after
NativeDowncallStaticNormal       25.704 26.839 (+4.414%)
NativeDowncallStaticNormal6      23.857 25.086 (+5.152%)
NativeDowncallStaticNormalRefs6  23.704 25.248 (+6.513%)
NativeDowncallVirtualNormal      25.578 27.000 (+5.560%)
NativeDowncallVirtualNormal6     23.704 24.925 (+5.153%)
NativeDowncallVirtualNormalRefs6 23.704 25.074 (+5.870%)
NativeDowncallStaticFast         100.65 149.13 (+48.17%)
NativeDowncallStaticFast6        78.304 107.39 (+37.71%)
NativeDowncallStaticFastRefs6    76.962 104.45 (+35.71%)
NativeDowncallVirtualFast        100.40 147.28 (+46.69%)
NativeDowncallVirtualFast6       79.302 106.34 (+34.10%)
NativeDowncallVirtualFastRef26   76.617 103.29 (+34.82%)
linux-x64                        before after
NativeDowncallStaticNormal       26.083 26.987 (+3.465%)
NativeDowncallStaticNormal6      24.606 25.411 (+3.271%)
NativeDowncallStaticNormalRefs6  24.150 25.086 (+3.877%)
NativeDowncallVirtualNormal      25.743 26.812 (+4.156%)
NativeDowncallVirtualNormal6     24.294 25.248 (+3.927%)
NativeDowncallVirtualNormalRefs6 23.857 25.086 (+5.152%)
NativeDowncallStaticFast         109.95 133.10 (+21.06%)
NativeDowncallStaticFast6        90.274 109.12 (+20.87%)
NativeDowncallStaticFastRefs6    87.282 105.29 (+20.63%)
NativeDowncallVirtualFast        104.00 127.55 (+22.65%)
NativeDowncallVirtualFast6       88.191 106.73 (+21.02%)
NativeDowncallVirtualFastRef26   85.530 102.09 (+19.36%)
linux-armv7                      before after
NativeDowncallStaticNormal       6.1148 6.3694 (+4.316%)
NativeDowncallStaticNormal6      5.6845 5.9026 (+3.837%)
NativeDowncallStaticNormalRefs6  5.4054 5.6022 (+3.641%)
NativeDowncallVirtualNormal      5.4726 5.7088 (+4.316%)
NativeDowncallVirtualNormal6     5.1789 5.3685 (+3.660%)
NativeDowncallVirtualNormalRefs6 4.9140 5.0902 (+3.586%)
NativeDowncallStaticFast         16.683 18.058 (+8.239%)
NativeDowncallStaticFast6        13.951 14.896 (+6.770%)
NativeDowncallStaticFastRefs6    12.279 13.006 (+5.919%)
NativeDowncallVirtualFast        16.161 17.848 (+10.44%)
NativeDowncallVirtualFast6       14.085 15.196 (+7.892%)
NativeDowncallVirtualFastRef26   12.089 12.897 (+6.683%)
linux-armv8                      before after
NativeDowncallStaticNormal       6.0663 6.4229 (+5.879%)
NativeDowncallStaticNormal6      5.7252 6.0437 (+5.563%)
NativeDowncallStaticNormalRefs6  5.3114 5.5814 (+5.082%)
NativeDowncallVirtualNormal      5.8795 6.2651 (+6.558%)
NativeDowncallVirtualNormal6     5.6232 5.9494 (+5.801%)
NativeDowncallVirtualNormalRefs6 5.1862 5.4429 (+4.948%)
NativeDowncallStaticFast         17.638 19.183 (+8.760%)
NativeDowncallStaticFast6        14.903 16.161 (+8.438%)
NativeDowncallStaticFastRefs6    12.475 13.235 (+6.094%)
NativeDowncallVirtualFast        15.826 17.848 (+12.78%)
NativeDowncallVirtualFast6       14.064 15.504 (+10.24%)
NativeDowncallVirtualFastRef26   11.628 12.475 (+7.285%)

Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Test: run-gtests.sh
Test: testrunner.py --target --optimizing
Bug: 172332525
Change-Id: I5ecfa7a661f08ab63dd2a75d666e1c1b9121935f
diff --git a/runtime/entrypoints/quick/quick_jni_entrypoints.cc b/runtime/entrypoints/quick/quick_jni_entrypoints.cc
index 3ac7eca..d09e21d 100644
--- a/runtime/entrypoints/quick/quick_jni_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_jni_entrypoints.cc
@@ -60,27 +60,15 @@
 }
 
 // Called on entry to fast JNI, push a new local reference table only.
-extern uint32_t JniMethodFastStart(Thread* self) {
-  JNIEnvExt* env = self->GetJniEnv();
-  DCHECK(env != nullptr);
-  uint32_t saved_local_ref_cookie = bit_cast<uint32_t>(env->GetLocalRefCookie());
-  env->SetLocalRefCookie(env->GetLocalsSegmentState());
-
+extern void JniMethodFastStart(Thread* self) {
   if (kIsDebugBuild) {
     ArtMethod* native_method = *self->GetManagedStack()->GetTopQuickFrame();
     CHECK(native_method->IsFastNative()) << native_method->PrettyMethod();
   }
-
-  return saved_local_ref_cookie;
 }
 
 // Called on entry to JNI, transition out of Runnable and release share of mutator_lock_.
-extern uint32_t JniMethodStart(Thread* self) {
-  JNIEnvExt* env = self->GetJniEnv();
-  DCHECK(env != nullptr);
-  uint32_t saved_local_ref_cookie = bit_cast<uint32_t>(env->GetLocalRefCookie());
-  env->SetLocalRefCookie(env->GetLocalsSegmentState());
-
+extern void JniMethodStart(Thread* self) {
   if (kIsDebugBuild) {
     ArtMethod* native_method = *self->GetManagedStack()->GetTopQuickFrame();
     CHECK(!native_method->IsFastNative()) << native_method->PrettyMethod();
@@ -88,12 +76,11 @@
 
   // Transition out of runnable.
   self->TransitionFromRunnableToSuspended(kNative);
-  return saved_local_ref_cookie;
 }
 
-extern uint32_t JniMethodStartSynchronized(jobject to_lock, Thread* self) {
+extern void JniMethodStartSynchronized(jobject to_lock, Thread* self) {
   self->DecodeJObject(to_lock)->MonitorEnter(self);
-  return JniMethodStart(self);
+  JniMethodStart(self);
 }
 
 // TODO: NO_THREAD_SAFETY_ANALYSIS due to different control paths depending on fast JNI.
@@ -159,35 +146,27 @@
 // TODO: These should probably be templatized or macro-ized.
 // Otherwise there's just too much repetitive boilerplate.
 
-extern void JniMethodEnd(uint32_t saved_local_ref_cookie, Thread* self) {
+extern void JniMethodEnd(Thread* self) {
   GoToRunnable(self);
-  PopLocalReferences(saved_local_ref_cookie, self);
 }
 
-extern void JniMethodFastEnd(uint32_t saved_local_ref_cookie, Thread* self) {
+extern void JniMethodFastEnd(Thread* self) {
   GoToRunnableFast(self);
-  PopLocalReferences(saved_local_ref_cookie, self);
 }
 
-extern void JniMethodEndSynchronized(uint32_t saved_local_ref_cookie,
-                                     jobject locked,
-                                     Thread* self) {
+extern void JniMethodEndSynchronized(jobject locked, Thread* self) {
   GoToRunnable(self);
   UnlockJniSynchronizedMethod(locked, self);  // Must decode before pop.
-  PopLocalReferences(saved_local_ref_cookie, self);
 }
 
 // Common result handling for EndWithReference.
-static mirror::Object* JniMethodEndWithReferenceHandleResult(jobject result,
-                                                             uint32_t saved_local_ref_cookie,
-                                                             Thread* self)
+static mirror::Object* JniMethodEndWithReferenceHandleResult(jobject result, Thread* self)
     NO_THREAD_SAFETY_ANALYSIS {
   // Must decode before pop. The 'result' may not be valid in case of an exception, though.
   ObjPtr<mirror::Object> o;
   if (!self->IsExceptionPending()) {
     o = self->DecodeJObject(result);
   }
-  PopLocalReferences(saved_local_ref_cookie, self);
   // Process result.
   if (UNLIKELY(self->GetJniEnv()->IsCheckJniEnabled())) {
     // CheckReferenceResult can resolve types.
@@ -199,27 +178,22 @@
   return o.Ptr();
 }
 
-extern mirror::Object* JniMethodFastEndWithReference(jobject result,
-                                                     uint32_t saved_local_ref_cookie,
-                                                     Thread* self) {
+extern mirror::Object* JniMethodFastEndWithReference(jobject result, Thread* self) {
   GoToRunnableFast(self);
-  return JniMethodEndWithReferenceHandleResult(result, saved_local_ref_cookie, self);
+  return JniMethodEndWithReferenceHandleResult(result, self);
 }
 
-extern mirror::Object* JniMethodEndWithReference(jobject result,
-                                                 uint32_t saved_local_ref_cookie,
-                                                 Thread* self) {
+extern mirror::Object* JniMethodEndWithReference(jobject result, Thread* self) {
   GoToRunnable(self);
-  return JniMethodEndWithReferenceHandleResult(result, saved_local_ref_cookie, self);
+  return JniMethodEndWithReferenceHandleResult(result, self);
 }
 
 extern mirror::Object* JniMethodEndWithReferenceSynchronized(jobject result,
-                                                             uint32_t saved_local_ref_cookie,
                                                              jobject locked,
                                                              Thread* self) {
   GoToRunnable(self);
   UnlockJniSynchronizedMethod(locked, self);
-  return JniMethodEndWithReferenceHandleResult(result, saved_local_ref_cookie, self);
+  return JniMethodEndWithReferenceHandleResult(result, self);
 }
 
 extern uint64_t GenericJniMethodEnd(Thread* self,
@@ -251,8 +225,10 @@
   }
   char return_shorty_char = called->GetShorty()[0];
   if (return_shorty_char == 'L') {
-    return reinterpret_cast<uint64_t>(JniMethodEndWithReferenceHandleResult(
-        result.l, saved_local_ref_cookie, self));
+    uint64_t ret =
+        reinterpret_cast<uint64_t>(JniMethodEndWithReferenceHandleResult(result.l, self));
+    PopLocalReferences(saved_local_ref_cookie, self);
+    return ret;
   } else {
     if (LIKELY(!critical_native)) {
       PopLocalReferences(saved_local_ref_cookie, self);
@@ -290,44 +266,37 @@
   }
 }
 
-extern uint32_t JniMonitoredMethodStart(Thread* self) {
-  uint32_t result = JniMethodStart(self);
+extern void JniMonitoredMethodStart(Thread* self) {
+  JniMethodStart(self);
   MONITOR_JNI(PaletteNotifyBeginJniInvocation);
-  return result;
 }
 
-extern uint32_t JniMonitoredMethodStartSynchronized(jobject to_lock, Thread* self) {
-  uint32_t result = JniMethodStartSynchronized(to_lock, self);
+extern void JniMonitoredMethodStartSynchronized(jobject to_lock, Thread* self) {
+  JniMethodStartSynchronized(to_lock, self);
   MONITOR_JNI(PaletteNotifyBeginJniInvocation);
-  return result;
 }
 
-extern void JniMonitoredMethodEnd(uint32_t saved_local_ref_cookie, Thread* self) {
+extern void JniMonitoredMethodEnd(Thread* self) {
   MONITOR_JNI(PaletteNotifyEndJniInvocation);
-  return JniMethodEnd(saved_local_ref_cookie, self);
+  JniMethodEnd(self);
 }
 
-extern void JniMonitoredMethodEndSynchronized(uint32_t saved_local_ref_cookie,
-                                             jobject locked,
-                                             Thread* self) {
+extern void JniMonitoredMethodEndSynchronized(jobject locked, Thread* self) {
   MONITOR_JNI(PaletteNotifyEndJniInvocation);
-  return JniMethodEndSynchronized(saved_local_ref_cookie, locked, self);
+  JniMethodEndSynchronized(locked, self);
 }
 
-extern mirror::Object* JniMonitoredMethodEndWithReference(jobject result,
-                                                          uint32_t saved_local_ref_cookie,
-                                                          Thread* self) {
+extern mirror::Object* JniMonitoredMethodEndWithReference(jobject result, Thread* self) {
   MONITOR_JNI(PaletteNotifyEndJniInvocation);
-  return JniMethodEndWithReference(result, saved_local_ref_cookie, self);
+  return JniMethodEndWithReference(result, self);
 }
 
 extern mirror::Object* JniMonitoredMethodEndWithReferenceSynchronized(
     jobject result,
-    uint32_t saved_local_ref_cookie,
     jobject locked,
     Thread* self) {
   MONITOR_JNI(PaletteNotifyEndJniInvocation);
-  return JniMethodEndWithReferenceSynchronized(result, saved_local_ref_cookie, locked, self);
+  return JniMethodEndWithReferenceSynchronized(result, locked, self);
 }
 
 }  // namespace art