1 /*
2  * Copyright (C) 2020 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package android.util;
18 
19 import android.annotation.NonNull;
20 
21 import com.android.modules.utils.ModifiedUtf8;
22 
23 import dalvik.annotation.optimization.FastNative;
24 
25 /**
26  * Specializations of {@code libcore.util.CharsetUtils} which enable efficient
27  * in-place encoding without making any new allocations.
28  * <p>
29  * These methods purposefully accept only non-movable byte array addresses to
30  * avoid extra JNI overhead.
31  * <p>
32  * Callers are cautioned that there is a long-standing ART bug that emits
33  * non-standard 4-byte sequences, as described by {@code kUtfUse4ByteSequence}
34  * in {@code art/runtime/jni/jni_internal.cc}. If precise modified UTF-8
35  * encoding is required, use {@link ModifiedUtf8} instead.
36  *
37  * @hide
38  */
39 public class CharsetUtils {
40     /**
41      * Attempt to encode the given string as modified UTF-8 into the destination
42      * byte array without making any new allocations.
43      * <p>
44      * Callers are cautioned that there is a long-standing ART bug that emits
45      * non-standard 4-byte sequences, as described by
46      * {@code kUtfUse4ByteSequence} in {@code art/runtime/jni/jni_internal.cc}.
47      * If precise modified UTF-8 encoding is required, use {@link ModifiedUtf8}
48      * instead.
49      *
50      * @param src string value to be encoded
51      * @param dest destination byte array to encode into
52      * @param destOff offset into destination where encoding should begin
53      * @param destLen length of destination
54      * @return positive value when encoding succeeded, or negative value when
55      *         failed; the magnitude of the value is the number of bytes
56      *         required to encode the string.
57      */
toModifiedUtf8Bytes(@onNull String src, long dest, int destOff, int destLen)58     public static int toModifiedUtf8Bytes(@NonNull String src,
59             long dest, int destOff, int destLen) {
60         return toModifiedUtf8Bytes(src, src.length(), dest, destOff, destLen);
61     }
62 
63     /**
64      * Attempt to encode the given string as modified UTF-8 into the destination
65      * byte array without making any new allocations.
66      * <p>
67      * Callers are cautioned that there is a long-standing ART bug that emits
68      * non-standard 4-byte sequences, as described by
69      * {@code kUtfUse4ByteSequence} in {@code art/runtime/jni/jni_internal.cc}.
70      * If precise modified UTF-8 encoding is required, use {@link ModifiedUtf8}
71      * instead.
72      *
73      * @param src string value to be encoded
74      * @param srcLen exact length of string to be encoded
75      * @param dest destination byte array to encode into
76      * @param destOff offset into destination where encoding should begin
77      * @param destLen length of destination
78      * @return positive value when encoding succeeded, or negative value when
79      *         failed; the magnitude of the value is the number of bytes
80      *         required to encode the string.
81      */
82     @FastNative
toModifiedUtf8Bytes(@onNull String src, int srcLen, long dest, int destOff, int destLen)83     private static native int toModifiedUtf8Bytes(@NonNull String src, int srcLen,
84             long dest, int destOff, int destLen);
85 
86     /**
87      * Attempt to decode a modified UTF-8 string from the source byte array.
88      * <p>
89      * Callers are cautioned that there is a long-standing ART bug that emits
90      * non-standard 4-byte sequences, as described by
91      * {@code kUtfUse4ByteSequence} in {@code art/runtime/jni/jni_internal.cc}.
92      * If precise modified UTF-8 encoding is required, use {@link ModifiedUtf8}
93      * instead.
94      *
95      * @param src source byte array to decode from
96      * @param srcOff offset into source where decoding should begin
97      * @param srcLen length of source that should be decoded
98      * @return the successfully decoded string
99      */
100     @FastNative
fromModifiedUtf8Bytes( long src, int srcOff, int srcLen)101     public static native @NonNull String fromModifiedUtf8Bytes(
102             long src, int srcOff, int srcLen);
103 }
104