[optimizing] Implement more x86/x86_64 intrinsics

Implement CAS and bit reverse and byte reverse intrinsics that were
missing from x86 and x86_64 implementations.

Add assembler tests and compareAndSwapLong test.

Change-Id: Iabb2ff46036645df0a91f640288ef06090a64ee3
Signed-off-by: Mark Mendell <mark.p.mendell@intel.com>
diff --git a/test/004-UnsafeTest/src/Main.java b/test/004-UnsafeTest/src/Main.java
index 3d0f074..708f61f 100644
--- a/test/004-UnsafeTest/src/Main.java
+++ b/test/004-UnsafeTest/src/Main.java
@@ -104,6 +104,16 @@
     if (!unsafe.compareAndSwapInt(t, intOffset, 0, 1)) {
         System.out.println("Unexpectedly not succeeding compareAndSwap...");
     }
+
+    if (unsafe.compareAndSwapLong(t, longOffset, 0, 1)) {
+        System.out.println("Unexpectedly succeeding compareAndSwapLong...");
+    }
+    if (!unsafe.compareAndSwapLong(t, longOffset, longValue, 0)) {
+        System.out.println("Unexpectedly not succeeding compareAndSwapLong...");
+    }
+    if (!unsafe.compareAndSwapLong(t, longOffset, 0, 1)) {
+        System.out.println("Unexpectedly not succeeding compareAndSwapLong...");
+    }
   }
 
   private static class TestClass {