| Jeff Hao | 848f70a | 2014-01-15 13:49:50 -0800 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2008 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #include "java_lang_StringFactory.h" |
| 18 | |
| 19 | #include "common_throws.h" |
| Andreas Gampe | e15b9b1 | 2018-10-29 12:54:27 -0700 | [diff] [blame] | 20 | #include "handle_scope-inl.h" |
| Vladimir Marko | a3ad0cd | 2018-05-04 10:06:38 +0100 | [diff] [blame] | 21 | #include "jni/jni_internal.h" |
| Jeff Hao | 848f70a | 2014-01-15 13:49:50 -0800 | [diff] [blame] | 22 | #include "mirror/object-inl.h" |
| Andreas Gampe | fd63bbf | 2018-10-29 12:55:35 -0700 | [diff] [blame] | 23 | #include "mirror/string-alloc-inl.h" |
| Andreas Gampe | 87583b3 | 2017-05-25 11:22:18 -0700 | [diff] [blame] | 24 | #include "native_util.h" |
| Steven Moreland | e431e27 | 2017-07-18 16:53:49 -0700 | [diff] [blame] | 25 | #include "nativehelper/jni_macros.h" |
| Andreas Gampe | 373a9b5 | 2017-10-18 09:01:57 -0700 | [diff] [blame] | 26 | #include "nativehelper/scoped_local_ref.h" |
| 27 | #include "nativehelper/scoped_primitive_array.h" |
| Mathieu Chartier | 0795f23 | 2016-09-27 18:43:30 -0700 | [diff] [blame] | 28 | #include "scoped_fast_native_object_access-inl.h" |
| 29 | #include "scoped_thread_state_change-inl.h" |
| Jeff Hao | 848f70a | 2014-01-15 13:49:50 -0800 | [diff] [blame] | 30 | |
| 31 | namespace art { |
| 32 | |
| 33 | static jstring StringFactory_newStringFromBytes(JNIEnv* env, jclass, jbyteArray java_data, |
| 34 | jint high, jint offset, jint byte_count) { |
| 35 | ScopedFastNativeObjectAccess soa(env); |
| 36 | if (UNLIKELY(java_data == nullptr)) { |
| 37 | ThrowNullPointerException("data == null"); |
| 38 | return nullptr; |
| 39 | } |
| 40 | StackHandleScope<1> hs(soa.Self()); |
| Mathieu Chartier | 0795f23 | 2016-09-27 18:43:30 -0700 | [diff] [blame] | 41 | Handle<mirror::ByteArray> byte_array(hs.NewHandle(soa.Decode<mirror::ByteArray>(java_data))); |
| Jeff Hao | 848f70a | 2014-01-15 13:49:50 -0800 | [diff] [blame] | 42 | int32_t data_size = byte_array->GetLength(); |
| 43 | if ((offset | byte_count) < 0 || byte_count > data_size - offset) { |
| 44 | soa.Self()->ThrowNewExceptionF("Ljava/lang/StringIndexOutOfBoundsException;", |
| 45 | "length=%d; regionStart=%d; regionLength=%d", data_size, |
| 46 | offset, byte_count); |
| 47 | return nullptr; |
| 48 | } |
| 49 | gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator(); |
| Vladimir Marko | 9b81ac3 | 2019-05-16 16:47:08 +0100 | [diff] [blame] | 50 | ObjPtr<mirror::String> result = mirror::String::AllocFromByteArray(soa.Self(), |
| 51 | byte_count, |
| 52 | byte_array, |
| 53 | offset, |
| 54 | high, |
| 55 | allocator_type); |
| Jeff Hao | 848f70a | 2014-01-15 13:49:50 -0800 | [diff] [blame] | 56 | return soa.AddLocalReference<jstring>(result); |
| 57 | } |
| 58 | |
| Roland Levillain | cc3839c | 2016-02-29 16:23:48 +0000 | [diff] [blame] | 59 | // The char array passed as `java_data` must not be a null reference. |
| Jeff Hao | 848f70a | 2014-01-15 13:49:50 -0800 | [diff] [blame] | 60 | static jstring StringFactory_newStringFromChars(JNIEnv* env, jclass, jint offset, |
| 61 | jint char_count, jcharArray java_data) { |
| Roland Levillain | cc3839c | 2016-02-29 16:23:48 +0000 | [diff] [blame] | 62 | DCHECK(java_data != nullptr); |
| Jeff Hao | 848f70a | 2014-01-15 13:49:50 -0800 | [diff] [blame] | 63 | ScopedFastNativeObjectAccess soa(env); |
| 64 | StackHandleScope<1> hs(soa.Self()); |
| Mathieu Chartier | 0795f23 | 2016-09-27 18:43:30 -0700 | [diff] [blame] | 65 | Handle<mirror::CharArray> char_array(hs.NewHandle(soa.Decode<mirror::CharArray>(java_data))); |
| Jeff Hao | 848f70a | 2014-01-15 13:49:50 -0800 | [diff] [blame] | 66 | gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator(); |
| Vladimir Marko | 9b81ac3 | 2019-05-16 16:47:08 +0100 | [diff] [blame] | 67 | ObjPtr<mirror::String> result = mirror::String::AllocFromCharArray(soa.Self(), |
| 68 | char_count, |
| 69 | char_array, |
| 70 | offset, |
| 71 | allocator_type); |
| Jeff Hao | 848f70a | 2014-01-15 13:49:50 -0800 | [diff] [blame] | 72 | return soa.AddLocalReference<jstring>(result); |
| 73 | } |
| 74 | |
| 75 | static jstring StringFactory_newStringFromString(JNIEnv* env, jclass, jstring to_copy) { |
| 76 | ScopedFastNativeObjectAccess soa(env); |
| 77 | if (UNLIKELY(to_copy == nullptr)) { |
| 78 | ThrowNullPointerException("toCopy == null"); |
| 79 | return nullptr; |
| 80 | } |
| 81 | StackHandleScope<1> hs(soa.Self()); |
| Mathieu Chartier | 0795f23 | 2016-09-27 18:43:30 -0700 | [diff] [blame] | 82 | Handle<mirror::String> string(hs.NewHandle(soa.Decode<mirror::String>(to_copy))); |
| Jeff Hao | 848f70a | 2014-01-15 13:49:50 -0800 | [diff] [blame] | 83 | gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator(); |
| Vladimir Marko | 9b81ac3 | 2019-05-16 16:47:08 +0100 | [diff] [blame] | 84 | ObjPtr<mirror::String> result = mirror::String::AllocFromString(soa.Self(), |
| 85 | string->GetLength(), |
| 86 | string, |
| 87 | /*offset=*/ 0, |
| 88 | allocator_type); |
| Jeff Hao | 848f70a | 2014-01-15 13:49:50 -0800 | [diff] [blame] | 89 | return soa.AddLocalReference<jstring>(result); |
| 90 | } |
| 91 | |
| Rock.Yeh | 755b533 | 2021-01-07 10:54:12 +0800 | [diff] [blame] | 92 | static jstring StringFactory_newStringFromUtf8Bytes(JNIEnv* env, jclass, jbyteArray java_data, |
| 93 | jint offset, jint byte_count) { |
| 94 | // Local Define in here |
| 95 | static const jchar kReplacementChar = 0xfffd; |
| 96 | static const int kDefaultBufferSize = 256; |
| 97 | static const int kTableUtf8Needed[] = { |
| 98 | // 0 1 2 3 4 5 6 7 8 9 a b c d e f |
| 99 | 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xc0 - 0xcf |
| 100 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xd0 - 0xdf |
| 101 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xe0 - 0xef |
| 102 | 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xf0 - 0xff |
| 103 | }; |
| 104 | |
| 105 | ScopedFastNativeObjectAccess soa(env); |
| 106 | if (UNLIKELY(java_data == nullptr)) { |
| 107 | ThrowNullPointerException("data == null"); |
| 108 | return nullptr; |
| 109 | } |
| 110 | |
| 111 | StackHandleScope<1> hs(soa.Self()); |
| 112 | Handle<mirror::ByteArray> byte_array(hs.NewHandle(soa.Decode<mirror::ByteArray>(java_data))); |
| 113 | int32_t data_size = byte_array->GetLength(); |
| 114 | if ((offset | byte_count) < 0 || byte_count > data_size - offset) { |
| 115 | soa.Self()->ThrowNewExceptionF("Ljava/lang/StringIndexOutOfBoundsException;", |
| 116 | "length=%d; regionStart=%d; regionLength=%d", data_size, |
| 117 | offset, byte_count); |
| 118 | return nullptr; |
| 119 | } |
| 120 | |
| 121 | /* |
| 122 | * This code converts a UTF-8 byte sequence to a Java String (UTF-16). |
| 123 | * It implements the W3C recommended UTF-8 decoder. |
| 124 | * https://www.w3.org/TR/encoding/#utf-8-decoder |
| 125 | * |
| 126 | * Unicode 3.2 Well-Formed UTF-8 Byte Sequences |
| 127 | * Code Points First Second Third Fourth |
| 128 | * U+0000..U+007F 00..7F |
| 129 | * U+0080..U+07FF C2..DF 80..BF |
| 130 | * U+0800..U+0FFF E0 A0..BF 80..BF |
| 131 | * U+1000..U+CFFF E1..EC 80..BF 80..BF |
| 132 | * U+D000..U+D7FF ED 80..9F 80..BF |
| 133 | * U+E000..U+FFFF EE..EF 80..BF 80..BF |
| 134 | * U+10000..U+3FFFF F0 90..BF 80..BF 80..BF |
| 135 | * U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF |
| 136 | * U+100000..U+10FFFF F4 80..8F 80..BF 80..BF |
| 137 | * |
| 138 | * Please refer to Unicode as the authority. |
| 139 | * p.126 Table 3-7 in http://www.unicode.org/versions/Unicode10.0.0/ch03.pdf |
| 140 | * |
| 141 | * Handling Malformed Input |
| 142 | * The maximal subpart should be replaced by a single U+FFFD. Maximal subpart is |
| 143 | * the longest code unit subsequence starting at an unconvertible offset that is either |
| 144 | * 1) the initial subsequence of a well-formed code unit sequence, or |
| 145 | * 2) a subsequence of length one: |
| 146 | * One U+FFFD should be emitted for every sequence of bytes that is an incomplete prefix |
| 147 | * of a valid sequence, and with the conversion to restart after the incomplete sequence. |
| 148 | * |
| 149 | * For example, in byte sequence "41 C0 AF 41 F4 80 80 41", the maximal subparts are |
| 150 | * "C0", "AF", and "F4 80 80". "F4 80 80" can be the initial subsequence of "F4 80 80 80", |
| 151 | * but "C0" can't be the initial subsequence of any well-formed code unit sequence. |
| 152 | * Thus, the output should be "A\ufffd\ufffdA\ufffdA". |
| 153 | * |
| 154 | * Please refer to section "Best Practices for Using U+FFFD." in |
| 155 | * http://www.unicode.org/versions/Unicode10.0.0/ch03.pdf |
| 156 | */ |
| 157 | |
| 158 | // Initial value |
| 159 | jchar stack_buffer[kDefaultBufferSize]; |
| 160 | std::unique_ptr<jchar[]> allocated_buffer; |
| 161 | jchar* v; |
| 162 | if (byte_count <= kDefaultBufferSize) { |
| 163 | v = stack_buffer; |
| 164 | } else { |
| 165 | allocated_buffer.reset(new jchar[byte_count]); |
| 166 | v = allocated_buffer.get(); |
| 167 | } |
| 168 | |
| 169 | jbyte* d = byte_array->GetData(); |
| 170 | DCHECK(d != nullptr); |
| 171 | |
| 172 | int idx = offset; |
| 173 | int last = offset + byte_count; |
| 174 | int s = 0; |
| 175 | |
| 176 | int code_point = 0; |
| 177 | int utf8_bytes_seen = 0; |
| 178 | int utf8_bytes_needed = 0; |
| 179 | int lower_bound = 0x80; |
| 180 | int upper_bound = 0xbf; |
| 181 | while (idx < last) { |
| 182 | int b = d[idx++] & 0xff; |
| 183 | if (utf8_bytes_needed == 0) { |
| 184 | if ((b & 0x80) == 0) { // ASCII char. 0xxxxxxx |
| 185 | v[s++] = (jchar) b; |
| 186 | continue; |
| 187 | } |
| 188 | |
| 189 | if ((b & 0x40) == 0) { // 10xxxxxx is illegal as first byte |
| 190 | v[s++] = kReplacementChar; |
| 191 | continue; |
| 192 | } |
| 193 | |
| 194 | // 11xxxxxx |
| 195 | int tableLookupIndex = b & 0x3f; |
| 196 | utf8_bytes_needed = kTableUtf8Needed[tableLookupIndex]; |
| 197 | if (utf8_bytes_needed == 0) { |
| 198 | v[s++] = kReplacementChar; |
| 199 | continue; |
| 200 | } |
| 201 | |
| 202 | // utf8_bytes_needed |
| 203 | // 1: b & 0x1f |
| 204 | // 2: b & 0x0f |
| 205 | // 3: b & 0x07 |
| 206 | code_point = b & (0x3f >> utf8_bytes_needed); |
| 207 | if (b == 0xe0) { |
| 208 | lower_bound = 0xa0; |
| 209 | } else if (b == 0xed) { |
| 210 | upper_bound = 0x9f; |
| 211 | } else if (b == 0xf0) { |
| 212 | lower_bound = 0x90; |
| 213 | } else if (b == 0xf4) { |
| 214 | upper_bound = 0x8f; |
| 215 | } |
| 216 | } else { |
| 217 | if (b < lower_bound || b > upper_bound) { |
| 218 | // The bytes seen are ill-formed. Substitute them with U+FFFD |
| 219 | v[s++] = kReplacementChar; |
| 220 | code_point = 0; |
| 221 | utf8_bytes_needed = 0; |
| 222 | utf8_bytes_seen = 0; |
| 223 | lower_bound = 0x80; |
| 224 | upper_bound = 0xbf; |
| 225 | /* |
| 226 | * According to the Unicode Standard, |
| 227 | * "a UTF-8 conversion process is required to never consume well-formed |
| 228 | * subsequences as part of its error handling for ill-formed subsequences" |
| 229 | * The current byte could be part of well-formed subsequences. Reduce the |
| 230 | * index by 1 to parse it in next loop. |
| 231 | */ |
| 232 | idx--; |
| 233 | continue; |
| 234 | } |
| 235 | |
| 236 | lower_bound = 0x80; |
| 237 | upper_bound = 0xbf; |
| 238 | code_point = (code_point << 6) | (b & 0x3f); |
| 239 | utf8_bytes_seen++; |
| 240 | if (utf8_bytes_needed != utf8_bytes_seen) { |
| 241 | continue; |
| 242 | } |
| 243 | |
| 244 | // Encode chars from U+10000 up as surrogate pairs |
| 245 | if (code_point < 0x10000) { |
| 246 | v[s++] = (jchar) code_point; |
| 247 | } else { |
| 248 | v[s++] = (jchar) ((code_point >> 10) + 0xd7c0); |
| 249 | v[s++] = (jchar) ((code_point & 0x3ff) + 0xdc00); |
| 250 | } |
| 251 | |
| 252 | utf8_bytes_seen = 0; |
| 253 | utf8_bytes_needed = 0; |
| 254 | code_point = 0; |
| 255 | } |
| 256 | } |
| 257 | |
| 258 | // The bytes seen are ill-formed. Substitute them by U+FFFD |
| 259 | if (utf8_bytes_needed != 0) { |
| 260 | v[s++] = kReplacementChar; |
| 261 | } |
| 262 | |
| 263 | ObjPtr<mirror::String> result = mirror::String::AllocFromUtf16(soa.Self(), s, v); |
| 264 | return soa.AddLocalReference<jstring>(result); |
| 265 | } |
| 266 | |
| Jeff Hao | 848f70a | 2014-01-15 13:49:50 -0800 | [diff] [blame] | 267 | static JNINativeMethod gMethods[] = { |
| Igor Murashkin | 3b6f440 | 2017-02-16 16:13:17 -0800 | [diff] [blame] | 268 | FAST_NATIVE_METHOD(StringFactory, newStringFromBytes, "([BIII)Ljava/lang/String;"), |
| 269 | FAST_NATIVE_METHOD(StringFactory, newStringFromChars, "(II[C)Ljava/lang/String;"), |
| 270 | FAST_NATIVE_METHOD(StringFactory, newStringFromString, "(Ljava/lang/String;)Ljava/lang/String;"), |
| Rock.Yeh | 755b533 | 2021-01-07 10:54:12 +0800 | [diff] [blame] | 271 | FAST_NATIVE_METHOD(StringFactory, newStringFromUtf8Bytes, "([BII)Ljava/lang/String;"), |
| Jeff Hao | 848f70a | 2014-01-15 13:49:50 -0800 | [diff] [blame] | 272 | }; |
| 273 | |
| 274 | void register_java_lang_StringFactory(JNIEnv* env) { |
| 275 | REGISTER_NATIVE_METHODS("java/lang/StringFactory"); |
| 276 | } |
| 277 | |
| 278 | } // namespace art |