blob: 24298049ee2d4af6c94b064dc01f881e934797a7 [file] [log] [blame]
Jeff Hao848f70a2014-01-15 13:49:50 -08001/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
Andreas Gampea14100c2017-04-24 15:09:56 -070017#include "libcore_util_CharsetUtils.h"
18
19#include <string.h>
20
Vladimir Markoa3ad0cd2018-05-04 10:06:38 +010021#include "jni/jni_internal.h"
Jeff Hao848f70a2014-01-15 13:49:50 -080022#include "mirror/string-inl.h"
Steven Morelande431e272017-07-18 16:53:49 -070023#include "mirror/string.h"
Andreas Gampe87583b32017-05-25 11:22:18 -070024#include "native_util.h"
Andreas Gampe373a9b52017-10-18 09:01:57 -070025#include "nativehelper/scoped_primitive_array.h"
Steven Morelande431e272017-07-18 16:53:49 -070026#include "nativehelper/jni_macros.h"
Mathieu Chartier0795f232016-09-27 18:43:30 -070027#include "scoped_fast_native_object_access-inl.h"
Jeff Hao848f70a2014-01-15 13:49:50 -080028#include "unicode/utf16.h"
29
Jeff Hao848f70a2014-01-15 13:49:50 -080030namespace art {
31
32/**
33 * Approximates java.lang.UnsafeByteSequence so we don't have to pay the cost of calling back into
34 * Java when converting a char[] to a UTF-8 byte[]. This lets us have UTF-8 conversions slightly
35 * faster than ICU for large char[]s without paying for the NIO overhead with small char[]s.
36 *
37 * We could avoid this by keeping the UTF-8 bytes on the native heap until we're done and only
38 * creating a byte[] on the Java heap when we know how big it needs to be, but one shouldn't lie
39 * to the garbage collector (nor hide potentially large allocations from it).
40 *
41 * Because a call to append might require an allocation, it might fail. Callers should always
42 * check the return value of append.
43 */
44class NativeUnsafeByteSequence {
45 public:
46 explicit NativeUnsafeByteSequence(JNIEnv* env)
47 : mEnv(env), mJavaArray(nullptr), mRawArray(nullptr), mSize(-1), mOffset(0) {
48 }
49
50 ~NativeUnsafeByteSequence() {
51 // Release our pointer to the raw array, copying changes back to the Java heap.
52 if (mRawArray != nullptr) {
53 mEnv->ReleaseByteArrayElements(mJavaArray, mRawArray, 0);
54 }
55 }
56
57 bool append(jbyte b) {
58 if (mOffset == mSize && !resize(mSize * 2)) {
59 return false;
60 }
61 mRawArray[mOffset++] = b;
62 return true;
63 }
64
65 bool resize(int newSize) {
66 if (newSize == mSize) {
67 return true;
68 }
69
70 // Allocate a new array.
71 jbyteArray newJavaArray = mEnv->NewByteArray(newSize);
72 if (newJavaArray == nullptr) {
73 return false;
74 }
75 jbyte* newRawArray = mEnv->GetByteArrayElements(newJavaArray, nullptr);
76 if (newRawArray == nullptr) {
77 return false;
78 }
79
80 // Copy data out of the old array and then let go of it.
81 // Note that we may be trimming the array.
82 if (mRawArray != nullptr) {
83 memcpy(newRawArray, mRawArray, mOffset);
84 mEnv->ReleaseByteArrayElements(mJavaArray, mRawArray, JNI_ABORT);
85 mEnv->DeleteLocalRef(mJavaArray);
86 }
87
88 // Point ourselves at the new array.
89 mJavaArray = newJavaArray;
90 mRawArray = newRawArray;
91 mSize = newSize;
92 return true;
93 }
94
95 jbyteArray toByteArray() {
96 // Trim any unused space, if necessary.
97 bool okay = resize(mOffset);
98 return okay ? mJavaArray : nullptr;
99 }
100
101 private:
102 JNIEnv* mEnv;
103 jbyteArray mJavaArray;
104 jbyte* mRawArray;
105 jint mSize;
106 jint mOffset;
107
108 // Disallow copy and assignment.
109 NativeUnsafeByteSequence(const NativeUnsafeByteSequence&);
110 void operator=(const NativeUnsafeByteSequence&);
111};
112
113static void CharsetUtils_asciiBytesToChars(JNIEnv* env, jclass, jbyteArray javaBytes, jint offset,
114 jint length, jcharArray javaChars) {
115 ScopedByteArrayRO bytes(env, javaBytes);
116 if (bytes.get() == nullptr) {
117 return;
118 }
119 ScopedCharArrayRW chars(env, javaChars);
120 if (chars.get() == nullptr) {
121 return;
122 }
123
124 const jbyte* src = &bytes[offset];
125 jchar* dst = &chars[0];
126 static const jchar REPLACEMENT_CHAR = 0xfffd;
127 for (int i = length - 1; i >= 0; --i) {
128 jchar ch = static_cast<jchar>(*src++ & 0xff);
129 *dst++ = (ch <= 0x7f) ? ch : REPLACEMENT_CHAR;
130 }
131}
132
133static void CharsetUtils_isoLatin1BytesToChars(JNIEnv* env, jclass, jbyteArray javaBytes,
134 jint offset, jint length, jcharArray javaChars) {
135 ScopedByteArrayRO bytes(env, javaBytes);
136 if (bytes.get() == nullptr) {
137 return;
138 }
139 ScopedCharArrayRW chars(env, javaChars);
140 if (chars.get() == nullptr) {
141 return;
142 }
143
144 const jbyte* src = &bytes[offset];
145 jchar* dst = &chars[0];
146 for (int i = length - 1; i >= 0; --i) {
147 *dst++ = static_cast<jchar>(*src++ & 0xff);
148 }
149}
150
151/**
152 * Translates the given characters to US-ASCII or ISO-8859-1 bytes, using the fact that
153 * Unicode code points between U+0000 and U+007f inclusive are identical to US-ASCII, while
154 * U+0000 to U+00ff inclusive are identical to ISO-8859-1.
155 */
156static jbyteArray charsToBytes(JNIEnv* env, jstring java_string, jint offset, jint length,
157 jchar maxValidChar) {
158 ScopedObjectAccess soa(env);
159 StackHandleScope<1> hs(soa.Self());
Mathieu Chartier0795f232016-09-27 18:43:30 -0700160 Handle<mirror::String> string(hs.NewHandle(soa.Decode<mirror::String>(java_string)));
Andreas Gampefa4333d2017-02-14 11:10:34 -0800161 if (string == nullptr) {
Jeff Hao848f70a2014-01-15 13:49:50 -0800162 return nullptr;
163 }
164
165 jbyteArray javaBytes = env->NewByteArray(length);
166 ScopedByteArrayRW bytes(env, javaBytes);
167 if (bytes.get() == nullptr) {
168 return nullptr;
169 }
170
Jeff Hao848f70a2014-01-15 13:49:50 -0800171 jbyte* dst = &bytes[0];
jessicahandojo3aaa37b2016-07-29 14:46:37 -0700172 for (int i = 0; i < length; ++i) {
173 jchar ch = string->CharAt(offset + i);
Jeff Hao848f70a2014-01-15 13:49:50 -0800174 if (ch > maxValidChar) {
175 ch = '?';
176 }
177 *dst++ = static_cast<jbyte>(ch);
178 }
179
180 return javaBytes;
181}
182
183static jbyteArray CharsetUtils_toAsciiBytes(JNIEnv* env, jclass, jstring java_string, jint offset,
184 jint length) {
185 return charsToBytes(env, java_string, offset, length, 0x7f);
186}
187
188static jbyteArray CharsetUtils_toIsoLatin1Bytes(JNIEnv* env, jclass, jstring java_string,
189 jint offset, jint length) {
190 return charsToBytes(env, java_string, offset, length, 0xff);
191}
192
193static jbyteArray CharsetUtils_toUtf8Bytes(JNIEnv* env, jclass, jstring java_string, jint offset,
194 jint length) {
195 ScopedObjectAccess soa(env);
196 StackHandleScope<1> hs(soa.Self());
Mathieu Chartier0795f232016-09-27 18:43:30 -0700197 Handle<mirror::String> string(hs.NewHandle(soa.Decode<mirror::String>(java_string)));
Andreas Gampefa4333d2017-02-14 11:10:34 -0800198 if (string == nullptr) {
Jeff Hao848f70a2014-01-15 13:49:50 -0800199 return nullptr;
200 }
201
202 NativeUnsafeByteSequence out(env);
203 if (!out.resize(length)) {
204 return nullptr;
205 }
206
207 const int end = offset + length;
208 for (int i = offset; i < end; ++i) {
209 jint ch = string->CharAt(i);
210 if (ch < 0x80) {
211 // One byte.
212 if (!out.append(ch)) {
213 return nullptr;
214 }
215 } else if (ch < 0x800) {
216 // Two bytes.
217 if (!out.append((ch >> 6) | 0xc0) || !out.append((ch & 0x3f) | 0x80)) {
218 return nullptr;
219 }
220 } else if (U16_IS_SURROGATE(ch)) {
221 // A supplementary character.
222 jchar high = static_cast<jchar>(ch);
223 jchar low = (i + 1 != end) ? string->CharAt(i + 1) : 0;
224 if (!U16_IS_SURROGATE_LEAD(high) || !U16_IS_SURROGATE_TRAIL(low)) {
225 if (!out.append('?')) {
226 return nullptr;
227 }
228 continue;
229 }
230 // Now we know we have a *valid* surrogate pair, we can consume the low surrogate.
231 ++i;
232 ch = U16_GET_SUPPLEMENTARY(high, low);
233 // Four bytes.
234 jbyte b1 = (ch >> 18) | 0xf0;
235 jbyte b2 = ((ch >> 12) & 0x3f) | 0x80;
236 jbyte b3 = ((ch >> 6) & 0x3f) | 0x80;
237 jbyte b4 = (ch & 0x3f) | 0x80;
238 if (!out.append(b1) || !out.append(b2) || !out.append(b3) || !out.append(b4)) {
239 return nullptr;
240 }
241 } else {
242 // Three bytes.
243 jbyte b1 = (ch >> 12) | 0xe0;
244 jbyte b2 = ((ch >> 6) & 0x3f) | 0x80;
245 jbyte b3 = (ch & 0x3f) | 0x80;
246 if (!out.append(b1) || !out.append(b2) || !out.append(b3)) {
247 return nullptr;
248 }
249 }
250 }
251 return out.toByteArray();
252}
253
254static JNINativeMethod gMethods[] = {
Igor Murashkin3b6f4402017-02-16 16:13:17 -0800255 FAST_NATIVE_METHOD(CharsetUtils, asciiBytesToChars, "([BII[C)V"),
256 FAST_NATIVE_METHOD(CharsetUtils, isoLatin1BytesToChars, "([BII[C)V"),
257 FAST_NATIVE_METHOD(CharsetUtils, toAsciiBytes, "(Ljava/lang/String;II)[B"),
258 FAST_NATIVE_METHOD(CharsetUtils, toIsoLatin1Bytes, "(Ljava/lang/String;II)[B"),
259 FAST_NATIVE_METHOD(CharsetUtils, toUtf8Bytes, "(Ljava/lang/String;II)[B"),
Jeff Hao848f70a2014-01-15 13:49:50 -0800260};
261
262void register_libcore_util_CharsetUtils(JNIEnv* env) {
263 REGISTER_NATIVE_METHODS("libcore/util/CharsetUtils");
264}
265
266} // namespace art