| Jeff Hao | 848f70a | 2014-01-15 13:49:50 -0800 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2010 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| Andreas Gampe | a14100c | 2017-04-24 15:09:56 -0700 | [diff] [blame] | 17 | #include "libcore_util_CharsetUtils.h" |
| 18 | |
| 19 | #include <string.h> |
| 20 | |
| Vladimir Marko | 09bfdf1 | 2021-07-19 12:17:20 +0100 | [diff] [blame^] | 21 | #include "dex/utf-inl.h" |
| Andreas Gampe | e15b9b1 | 2018-10-29 12:54:27 -0700 | [diff] [blame] | 22 | #include "handle_scope-inl.h" |
| Vladimir Marko | a3ad0cd | 2018-05-04 10:06:38 +0100 | [diff] [blame] | 23 | #include "jni/jni_internal.h" |
| Jeff Hao | 848f70a | 2014-01-15 13:49:50 -0800 | [diff] [blame] | 24 | #include "mirror/string-inl.h" |
| Steven Moreland | e431e27 | 2017-07-18 16:53:49 -0700 | [diff] [blame] | 25 | #include "mirror/string.h" |
| Andreas Gampe | 87583b3 | 2017-05-25 11:22:18 -0700 | [diff] [blame] | 26 | #include "native_util.h" |
| Andreas Gampe | 373a9b5 | 2017-10-18 09:01:57 -0700 | [diff] [blame] | 27 | #include "nativehelper/scoped_primitive_array.h" |
| Steven Moreland | e431e27 | 2017-07-18 16:53:49 -0700 | [diff] [blame] | 28 | #include "nativehelper/jni_macros.h" |
| Mathieu Chartier | 0795f23 | 2016-09-27 18:43:30 -0700 | [diff] [blame] | 29 | #include "scoped_fast_native_object_access-inl.h" |
| Jeff Hao | 848f70a | 2014-01-15 13:49:50 -0800 | [diff] [blame] | 30 | |
| Jeff Hao | 848f70a | 2014-01-15 13:49:50 -0800 | [diff] [blame] | 31 | namespace art { |
| 32 | |
| Jeff Hao | 848f70a | 2014-01-15 13:49:50 -0800 | [diff] [blame] | 33 | static void CharsetUtils_asciiBytesToChars(JNIEnv* env, jclass, jbyteArray javaBytes, jint offset, |
| 34 | jint length, jcharArray javaChars) { |
| 35 | ScopedByteArrayRO bytes(env, javaBytes); |
| 36 | if (bytes.get() == nullptr) { |
| 37 | return; |
| 38 | } |
| 39 | ScopedCharArrayRW chars(env, javaChars); |
| 40 | if (chars.get() == nullptr) { |
| 41 | return; |
| 42 | } |
| 43 | |
| 44 | const jbyte* src = &bytes[offset]; |
| 45 | jchar* dst = &chars[0]; |
| 46 | static const jchar REPLACEMENT_CHAR = 0xfffd; |
| 47 | for (int i = length - 1; i >= 0; --i) { |
| 48 | jchar ch = static_cast<jchar>(*src++ & 0xff); |
| 49 | *dst++ = (ch <= 0x7f) ? ch : REPLACEMENT_CHAR; |
| 50 | } |
| 51 | } |
| 52 | |
| 53 | static void CharsetUtils_isoLatin1BytesToChars(JNIEnv* env, jclass, jbyteArray javaBytes, |
| 54 | jint offset, jint length, jcharArray javaChars) { |
| 55 | ScopedByteArrayRO bytes(env, javaBytes); |
| 56 | if (bytes.get() == nullptr) { |
| 57 | return; |
| 58 | } |
| 59 | ScopedCharArrayRW chars(env, javaChars); |
| 60 | if (chars.get() == nullptr) { |
| 61 | return; |
| 62 | } |
| 63 | |
| 64 | const jbyte* src = &bytes[offset]; |
| 65 | jchar* dst = &chars[0]; |
| 66 | for (int i = length - 1; i >= 0; --i) { |
| 67 | *dst++ = static_cast<jchar>(*src++ & 0xff); |
| 68 | } |
| 69 | } |
| 70 | |
| 71 | /** |
| 72 | * Translates the given characters to US-ASCII or ISO-8859-1 bytes, using the fact that |
| 73 | * Unicode code points between U+0000 and U+007f inclusive are identical to US-ASCII, while |
| 74 | * U+0000 to U+00ff inclusive are identical to ISO-8859-1. |
| 75 | */ |
| 76 | static jbyteArray charsToBytes(JNIEnv* env, jstring java_string, jint offset, jint length, |
| 77 | jchar maxValidChar) { |
| Vladimir Marko | 645083c | 2020-10-26 11:44:29 +0000 | [diff] [blame] | 78 | ScopedFastNativeObjectAccess soa(env); |
| Jeff Hao | 848f70a | 2014-01-15 13:49:50 -0800 | [diff] [blame] | 79 | StackHandleScope<1> hs(soa.Self()); |
| Mathieu Chartier | 0795f23 | 2016-09-27 18:43:30 -0700 | [diff] [blame] | 80 | Handle<mirror::String> string(hs.NewHandle(soa.Decode<mirror::String>(java_string))); |
| Andreas Gampe | fa4333d | 2017-02-14 11:10:34 -0800 | [diff] [blame] | 81 | if (string == nullptr) { |
| Jeff Hao | 848f70a | 2014-01-15 13:49:50 -0800 | [diff] [blame] | 82 | return nullptr; |
| 83 | } |
| 84 | |
| Vladimir Marko | 645083c | 2020-10-26 11:44:29 +0000 | [diff] [blame] | 85 | ObjPtr<mirror::ByteArray> result = mirror::ByteArray::Alloc(soa.Self(), length); |
| 86 | if (result == nullptr) { |
| Jeff Hao | 848f70a | 2014-01-15 13:49:50 -0800 | [diff] [blame] | 87 | return nullptr; |
| 88 | } |
| 89 | |
| Vladimir Marko | 645083c | 2020-10-26 11:44:29 +0000 | [diff] [blame] | 90 | if (string->IsCompressed()) { |
| 91 | // All characters in a compressed string are ASCII and therefore do not need a replacement. |
| 92 | DCHECK_GE(maxValidChar, 0x7f); |
| 93 | memcpy(result->GetData(), string->GetValueCompressed() + offset, length); |
| 94 | } else { |
| 95 | const uint16_t* src = string->GetValue() + offset; |
| 96 | auto clamp = [maxValidChar](uint16_t c) { |
| 97 | return static_cast<jbyte>(dchecked_integral_cast<uint8_t>((c > maxValidChar) ? '?' : c)); |
| 98 | }; |
| 99 | std::transform(src, src + length, result->GetData(), clamp); |
| Jeff Hao | 848f70a | 2014-01-15 13:49:50 -0800 | [diff] [blame] | 100 | } |
| Vladimir Marko | 645083c | 2020-10-26 11:44:29 +0000 | [diff] [blame] | 101 | return soa.AddLocalReference<jbyteArray>(result); |
| Jeff Hao | 848f70a | 2014-01-15 13:49:50 -0800 | [diff] [blame] | 102 | } |
| 103 | |
| 104 | static jbyteArray CharsetUtils_toAsciiBytes(JNIEnv* env, jclass, jstring java_string, jint offset, |
| 105 | jint length) { |
| 106 | return charsToBytes(env, java_string, offset, length, 0x7f); |
| 107 | } |
| 108 | |
| 109 | static jbyteArray CharsetUtils_toIsoLatin1Bytes(JNIEnv* env, jclass, jstring java_string, |
| 110 | jint offset, jint length) { |
| 111 | return charsToBytes(env, java_string, offset, length, 0xff); |
| 112 | } |
| 113 | |
| 114 | static jbyteArray CharsetUtils_toUtf8Bytes(JNIEnv* env, jclass, jstring java_string, jint offset, |
| 115 | jint length) { |
| Vladimir Marko | 645083c | 2020-10-26 11:44:29 +0000 | [diff] [blame] | 116 | ScopedFastNativeObjectAccess soa(env); |
| Jeff Hao | 848f70a | 2014-01-15 13:49:50 -0800 | [diff] [blame] | 117 | StackHandleScope<1> hs(soa.Self()); |
| Mathieu Chartier | 0795f23 | 2016-09-27 18:43:30 -0700 | [diff] [blame] | 118 | Handle<mirror::String> string(hs.NewHandle(soa.Decode<mirror::String>(java_string))); |
| Andreas Gampe | fa4333d | 2017-02-14 11:10:34 -0800 | [diff] [blame] | 119 | if (string == nullptr) { |
| Jeff Hao | 848f70a | 2014-01-15 13:49:50 -0800 | [diff] [blame] | 120 | return nullptr; |
| 121 | } |
| 122 | |
| Vladimir Marko | 645083c | 2020-10-26 11:44:29 +0000 | [diff] [blame] | 123 | DCHECK_GE(offset, 0); |
| 124 | DCHECK_LE(offset, string->GetLength()); |
| 125 | DCHECK_GE(length, 0); |
| 126 | DCHECK_LE(length, string->GetLength() - offset); |
| 127 | |
| Vladimir Marko | 645083c | 2020-10-26 11:44:29 +0000 | [diff] [blame] | 128 | bool compressed = string->IsCompressed(); |
| 129 | size_t utf8_length = 0; |
| 130 | if (compressed) { |
| 131 | utf8_length = length; |
| 132 | } else { |
| Vladimir Marko | 09bfdf1 | 2021-07-19 12:17:20 +0100 | [diff] [blame^] | 133 | const uint16_t* utf16 = string->GetValue() + offset; |
| 134 | auto count_length = [&utf8_length](jbyte c ATTRIBUTE_UNUSED) ALWAYS_INLINE { ++utf8_length; }; |
| 135 | ConvertUtf16ToUtf8</*kUseShortZero=*/ true, |
| 136 | /*kUse4ByteSequence=*/ true, |
| 137 | /*kReplaceBadSurrogates=*/ true>(utf16, length, count_length); |
| Vladimir Marko | 645083c | 2020-10-26 11:44:29 +0000 | [diff] [blame] | 138 | } |
| 139 | ObjPtr<mirror::ByteArray> result = |
| 140 | mirror::ByteArray::Alloc(soa.Self(), dchecked_integral_cast<int32_t>(utf8_length)); |
| 141 | if (result == nullptr) { |
| Jeff Hao | 848f70a | 2014-01-15 13:49:50 -0800 | [diff] [blame] | 142 | return nullptr; |
| 143 | } |
| 144 | |
| Vladimir Marko | 645083c | 2020-10-26 11:44:29 +0000 | [diff] [blame] | 145 | if (compressed) { |
| 146 | memcpy(result->GetData(), string->GetValueCompressed() + offset, length); |
| 147 | } else { |
| Vladimir Marko | 09bfdf1 | 2021-07-19 12:17:20 +0100 | [diff] [blame^] | 148 | const uint16_t* utf16 = string->GetValue() + offset; |
| Vladimir Marko | 645083c | 2020-10-26 11:44:29 +0000 | [diff] [blame] | 149 | int8_t* data = result->GetData(); |
| Vladimir Marko | 09bfdf1 | 2021-07-19 12:17:20 +0100 | [diff] [blame^] | 150 | auto store_data = [&data](jbyte c) ALWAYS_INLINE { *data++ = c; }; |
| 151 | ConvertUtf16ToUtf8</*kUseShortZero=*/ true, |
| 152 | /*kUse4ByteSequence=*/ true, |
| 153 | /*kReplaceBadSurrogates=*/ true>(utf16, length, store_data); |
| Jeff Hao | 848f70a | 2014-01-15 13:49:50 -0800 | [diff] [blame] | 154 | } |
| Vladimir Marko | 645083c | 2020-10-26 11:44:29 +0000 | [diff] [blame] | 155 | return soa.AddLocalReference<jbyteArray>(result); |
| Jeff Hao | 848f70a | 2014-01-15 13:49:50 -0800 | [diff] [blame] | 156 | } |
| 157 | |
| 158 | static JNINativeMethod gMethods[] = { |
| Igor Murashkin | 3b6f440 | 2017-02-16 16:13:17 -0800 | [diff] [blame] | 159 | FAST_NATIVE_METHOD(CharsetUtils, asciiBytesToChars, "([BII[C)V"), |
| 160 | FAST_NATIVE_METHOD(CharsetUtils, isoLatin1BytesToChars, "([BII[C)V"), |
| 161 | FAST_NATIVE_METHOD(CharsetUtils, toAsciiBytes, "(Ljava/lang/String;II)[B"), |
| 162 | FAST_NATIVE_METHOD(CharsetUtils, toIsoLatin1Bytes, "(Ljava/lang/String;II)[B"), |
| 163 | FAST_NATIVE_METHOD(CharsetUtils, toUtf8Bytes, "(Ljava/lang/String;II)[B"), |
| Jeff Hao | 848f70a | 2014-01-15 13:49:50 -0800 | [diff] [blame] | 164 | }; |
| 165 | |
| 166 | void register_libcore_util_CharsetUtils(JNIEnv* env) { |
| 167 | REGISTER_NATIVE_METHODS("libcore/util/CharsetUtils"); |
| 168 | } |
| 169 | |
| 170 | } // namespace art |