Check for forwarding address in READ_BARRIER_MARK_REG

When the object is in the from-space, the mark bit is not set.
In this case, we can also check the lock word for being a forwarding
address. The forwarding address case happens around 25% of the time.
This CL adds the case for forwarding address lock words to
READ_BARRIER_MARK_REG.

Reduces total read barriers reaching runtime on ritzperf:
Slow paths: 20758783 -> 15457783

Deleted the mark bit check in MarkFromReadBarrier since most of the
callers check the bit now.

Perf:
ReadBarrier::Mark: 2.59% -> 2.12%
art_quick_read_barrier_mark_reg01: 0.79% -> 0.78%
art_quick_read_barrier_mark_reg00: 0.54% -> 0.50%
art_quick_read_barrier_mark_reg02: 0.31% -> 0.25%

Only X86_64 for now, will do other archs after.

Bug: 30162165

Test: test-art-host

Change-Id: Ie7289d684d0e37a887943d77710092e380457860
diff --git a/runtime/lock_word.h b/runtime/lock_word.h
index 538b6eb..e9d06b3 100644
--- a/runtime/lock_word.h
+++ b/runtime/lock_word.h
@@ -61,7 +61,7 @@
  */
 class LockWord {
  public:
-  enum SizeShiftsAndMasks {  // private marker to avoid generate-operator-out.py from processing.
+  enum SizeShiftsAndMasks : uint32_t {  // private marker to avoid generate-operator-out.py from processing.
     // Number of bits to encode the state, currently just fat or thin/unlocked or hash code.
     kStateSize = 2,
     kReadBarrierStateSize = 1,
@@ -91,6 +91,8 @@
     kStateFat = 1,
     kStateHash = 2,
     kStateForwardingAddress = 3,
+    kStateForwardingAddressShifted = kStateForwardingAddress << kStateShift,
+    kStateForwardingAddressOverflow = (1 + kStateMask - kStateForwardingAddress) << kStateShift,
 
     // Read barrier bit.
     kReadBarrierStateShift = kThinLockCountSize + kThinLockCountShift,
@@ -140,7 +142,7 @@
 
   static LockWord FromForwardingAddress(size_t target) {
     DCHECK_ALIGNED(target, (1 << kStateSize));
-    return LockWord((target >> kForwardingAddressShift) | (kStateForwardingAddress << kStateShift));
+    return LockWord((target >> kForwardingAddressShift) | kStateForwardingAddressShifted);
   }
 
   static LockWord FromHashCode(uint32_t hash_code, uint32_t gc_state) {
@@ -256,6 +258,11 @@
   LockWord();
 
   explicit LockWord(uint32_t val) : value_(val) {
+    // Make sure adding the overflow causes an overflow.
+    constexpr uint64_t overflow = static_cast<uint64_t>(kStateForwardingAddressShifted) +
+        static_cast<uint64_t>(kStateForwardingAddressOverflow);
+    constexpr bool is_larger = overflow > static_cast<uint64_t>(0xFFFFFFFF);
+    static_assert(is_larger, "should have overflowed");
     CheckReadBarrierState();
   }