1 /*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "compile/PseudolocaleGenerator.h"
18
19 #include <algorithm>
20
21 #include "ResourceTable.h"
22 #include "ResourceValues.h"
23 #include "ValueVisitor.h"
24 #include "androidfw/Util.h"
25 #include "compile/Pseudolocalizer.h"
26 #include "util/Util.h"
27
28 using ::android::ConfigDescription;
29 using ::android::StringPiece;
30 using ::android::StringPiece16;
31
32 namespace aapt {
33
34 // The struct that represents both Span objects and UntranslatableSections.
35 struct UnifiedSpan {
36 // Only present for Span objects. If not present, this was an UntranslatableSection.
37 std::optional<std::string> tag;
38
39 // The UTF-16 index into the string where this span starts.
40 uint32_t first_char;
41
42 // The UTF-16 index into the string where this span ends, inclusive.
43 uint32_t last_char;
44 };
45
operator <(const UnifiedSpan & left,const UnifiedSpan & right)46 inline static bool operator<(const UnifiedSpan& left, const UnifiedSpan& right) {
47 if (left.first_char < right.first_char) {
48 return true;
49 } else if (left.first_char > right.first_char) {
50 return false;
51 } else if (left.last_char < right.last_char) {
52 return true;
53 }
54 return false;
55 }
56
SpanToUnifiedSpan(const android::StringPool::Span & span)57 inline static UnifiedSpan SpanToUnifiedSpan(const android::StringPool::Span& span) {
58 return UnifiedSpan{*span.name, span.first_char, span.last_char};
59 }
60
UntranslatableSectionToUnifiedSpan(const UntranslatableSection & section)61 inline static UnifiedSpan UntranslatableSectionToUnifiedSpan(const UntranslatableSection& section) {
62 return UnifiedSpan{
63 {}, static_cast<uint32_t>(section.start), static_cast<uint32_t>(section.end) - 1};
64 }
65
66 // Merges the Span and UntranslatableSections of this StyledString into a single vector of
67 // UnifiedSpans. This will first check that the Spans are sorted in ascending order.
MergeSpans(const StyledString & string)68 static std::vector<UnifiedSpan> MergeSpans(const StyledString& string) {
69 // Ensure the Spans are sorted and converted.
70 std::vector<UnifiedSpan> sorted_spans;
71 sorted_spans.reserve(string.value->spans.size());
72 std::transform(string.value->spans.begin(), string.value->spans.end(),
73 std::back_inserter(sorted_spans), SpanToUnifiedSpan);
74
75 // Stable sort to ensure tag sequences like "<b><i>" are preserved.
76 std::stable_sort(sorted_spans.begin(), sorted_spans.end());
77
78 // Ensure the UntranslatableSections are sorted and converted.
79 std::vector<UnifiedSpan> sorted_untranslatable_sections;
80 sorted_untranslatable_sections.reserve(string.untranslatable_sections.size());
81 std::transform(string.untranslatable_sections.begin(), string.untranslatable_sections.end(),
82 std::back_inserter(sorted_untranslatable_sections),
83 UntranslatableSectionToUnifiedSpan);
84 std::sort(sorted_untranslatable_sections.begin(), sorted_untranslatable_sections.end());
85
86 std::vector<UnifiedSpan> merged_spans;
87 merged_spans.reserve(sorted_spans.size() + sorted_untranslatable_sections.size());
88 auto span_iter = sorted_spans.begin();
89 auto untranslatable_iter = sorted_untranslatable_sections.begin();
90 while (span_iter != sorted_spans.end() &&
91 untranslatable_iter != sorted_untranslatable_sections.end()) {
92 if (*span_iter < *untranslatable_iter) {
93 merged_spans.push_back(std::move(*span_iter));
94 ++span_iter;
95 } else {
96 merged_spans.push_back(std::move(*untranslatable_iter));
97 ++untranslatable_iter;
98 }
99 }
100
101 while (span_iter != sorted_spans.end()) {
102 merged_spans.push_back(std::move(*span_iter));
103 ++span_iter;
104 }
105
106 while (untranslatable_iter != sorted_untranslatable_sections.end()) {
107 merged_spans.push_back(std::move(*untranslatable_iter));
108 ++untranslatable_iter;
109 }
110 return merged_spans;
111 }
112
PseudolocalizeStyledString(StyledString * string,Pseudolocalizer::Method method,android::StringPool * pool)113 std::unique_ptr<StyledString> PseudolocalizeStyledString(StyledString* string,
114 Pseudolocalizer::Method method,
115 android::StringPool* pool) {
116 Pseudolocalizer localizer(method);
117
118 // Collect the spans and untranslatable sections into one set of spans, sorted by first_char.
119 // This will effectively subdivide the string into multiple sections that can be individually
120 // pseudolocalized, while keeping the span indices synchronized.
121 std::vector<UnifiedSpan> merged_spans = MergeSpans(*string);
122
123 // All Span indices are UTF-16 based, according to the resources.arsc format expected by the
124 // runtime. So we will do all our processing in UTF-16, then convert back.
125 const std::u16string text16 = android::util::Utf8ToUtf16(string->value->value);
126
127 // Convenient wrapper around the text that allows us to work with StringPieces.
128 const StringPiece16 text(text16);
129
130 // The new string.
131 std::string new_string = localizer.Start();
132
133 // The stack that keeps track of what nested Span we're in.
134 std::vector<size_t> span_stack;
135
136 // The current position in the original text.
137 uint32_t cursor = 0u;
138
139 // The current position in the new text.
140 uint32_t new_cursor = utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_string.data()),
141 new_string.size(), false);
142
143 // We assume no nesting of untranslatable sections, since XLIFF doesn't allow it.
144 bool translatable = true;
145 size_t span_idx = 0u;
146 while (span_idx < merged_spans.size() || !span_stack.empty()) {
147 UnifiedSpan* span = span_idx >= merged_spans.size() ? nullptr : &merged_spans[span_idx];
148 UnifiedSpan* parent_span = span_stack.empty() ? nullptr : &merged_spans[span_stack.back()];
149
150 if (span != nullptr) {
151 if (parent_span == nullptr || parent_span->last_char > span->first_char) {
152 // There is no parent, or this span is the child of the parent.
153 // Pseudolocalize all the text until this span.
154 const StringPiece16 substr = text.substr(cursor, span->first_char - cursor);
155 cursor += substr.size();
156
157 // Pseudolocalize the substring.
158 std::string new_substr = android::util::Utf16ToUtf8(substr);
159 if (translatable) {
160 new_substr = localizer.Text(new_substr);
161 }
162 new_cursor += utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_substr.data()),
163 new_substr.size(), false);
164 new_string += new_substr;
165
166 // Rewrite the first_char.
167 span->first_char = new_cursor;
168 if (!span->tag) {
169 // An untranslatable section has begun!
170 translatable = false;
171 }
172 span_stack.push_back(span_idx);
173 ++span_idx;
174 continue;
175 }
176 }
177
178 if (parent_span != nullptr) {
179 // There is a parent, and either this span is not a child of it, or there are no more spans.
180 // Pop this off the stack.
181 const StringPiece16 substr = text.substr(cursor, parent_span->last_char - cursor + 1);
182 cursor += substr.size();
183
184 // Pseudolocalize the substring.
185 std::string new_substr = android::util::Utf16ToUtf8(substr);
186 if (translatable) {
187 new_substr = localizer.Text(new_substr);
188 }
189 new_cursor += utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_substr.data()),
190 new_substr.size(), false);
191 new_string += new_substr;
192
193 parent_span->last_char = new_cursor - 1;
194 if (parent_span->tag) {
195 // An end to an untranslatable section.
196 translatable = true;
197 }
198 span_stack.pop_back();
199 }
200 }
201
202 // Finish the pseudolocalization at the end of the string.
203 new_string +=
204 localizer.Text(android::util::Utf16ToUtf8(text.substr(cursor, text.size() - cursor)));
205 new_string += localizer.End();
206
207 android::StyleString localized;
208 localized.str = std::move(new_string);
209
210 // Convert the UnifiedSpans into regular Spans, skipping the UntranslatableSections.
211 for (UnifiedSpan& span : merged_spans) {
212 if (span.tag) {
213 localized.spans.push_back(
214 android::Span{std::move(span.tag.value()), span.first_char, span.last_char});
215 }
216 }
217 return util::make_unique<StyledString>(pool->MakeRef(localized));
218 }
219
220 namespace {
221
222 class Visitor : public ValueVisitor {
223 public:
224 // Either value or item will be populated upon visiting the value.
225 std::unique_ptr<Value> value;
226 std::unique_ptr<Item> item;
227
Visitor(android::StringPool * pool,Pseudolocalizer::Method method)228 Visitor(android::StringPool* pool, Pseudolocalizer::Method method)
229 : pool_(pool), method_(method), localizer_(method) {
230 }
231
Visit(Plural * plural)232 void Visit(Plural* plural) override {
233 CloningValueTransformer cloner(pool_);
234 std::unique_ptr<Plural> localized = util::make_unique<Plural>();
235 for (size_t i = 0; i < plural->values.size(); i++) {
236 Visitor sub_visitor(pool_, method_);
237 if (plural->values[i]) {
238 plural->values[i]->Accept(&sub_visitor);
239 if (sub_visitor.item) {
240 localized->values[i] = std::move(sub_visitor.item);
241 } else {
242 localized->values[i] = plural->values[i]->Transform(cloner);
243 }
244 }
245 }
246 localized->SetSource(plural->GetSource());
247 localized->SetWeak(true);
248 value = std::move(localized);
249 }
250
Visit(String * string)251 void Visit(String* string) override {
252 const StringPiece original_string = *string->value;
253 std::string result = localizer_.Start();
254
255 // Pseudolocalize only the translatable sections.
256 size_t start = 0u;
257 for (const UntranslatableSection& section : string->untranslatable_sections) {
258 // Pseudolocalize the content before the untranslatable section.
259 const size_t len = section.start - start;
260 if (len > 0u) {
261 result += localizer_.Text(original_string.substr(start, len));
262 }
263
264 // Copy the untranslatable content.
265 result += original_string.substr(section.start, section.end - section.start);
266 start = section.end;
267 }
268
269 // Pseudolocalize the content after the last untranslatable section.
270 if (start != original_string.size()) {
271 const size_t len = original_string.size() - start;
272 result += localizer_.Text(original_string.substr(start, len));
273 }
274 result += localizer_.End();
275
276 std::unique_ptr<String> localized = util::make_unique<String>(pool_->MakeRef(result));
277 localized->SetSource(string->GetSource());
278 localized->SetWeak(true);
279 item = std::move(localized);
280 }
281
Visit(StyledString * string)282 void Visit(StyledString* string) override {
283 item = PseudolocalizeStyledString(string, method_, pool_);
284 item->SetSource(string->GetSource());
285 item->SetWeak(true);
286 }
287
288 private:
289 DISALLOW_COPY_AND_ASSIGN(Visitor);
290
291 android::StringPool* pool_;
292 Pseudolocalizer::Method method_;
293 Pseudolocalizer localizer_;
294 };
295
ModifyConfigForPseudoLocale(const ConfigDescription & base,Pseudolocalizer::Method m)296 ConfigDescription ModifyConfigForPseudoLocale(const ConfigDescription& base,
297 Pseudolocalizer::Method m) {
298 ConfigDescription modified = base;
299 switch (m) {
300 case Pseudolocalizer::Method::kAccent:
301 modified.language[0] = 'e';
302 modified.language[1] = 'n';
303 modified.country[0] = 'X';
304 modified.country[1] = 'A';
305 break;
306
307 case Pseudolocalizer::Method::kBidi:
308 modified.language[0] = 'a';
309 modified.language[1] = 'r';
310 modified.country[0] = 'X';
311 modified.country[1] = 'B';
312 break;
313 default:
314 break;
315 }
316 return modified;
317 }
318
PseudolocalizeIfNeeded(const Pseudolocalizer::Method method,ResourceConfigValue * original_value,android::StringPool * pool,ResourceEntry * entry)319 void PseudolocalizeIfNeeded(const Pseudolocalizer::Method method,
320 ResourceConfigValue* original_value, android::StringPool* pool,
321 ResourceEntry* entry) {
322 Visitor visitor(pool, method);
323 original_value->value->Accept(&visitor);
324
325 std::unique_ptr<Value> localized_value;
326 if (visitor.value) {
327 localized_value = std::move(visitor.value);
328 } else if (visitor.item) {
329 localized_value = std::move(visitor.item);
330 }
331
332 if (!localized_value) {
333 return;
334 }
335
336 ConfigDescription config_with_accent =
337 ModifyConfigForPseudoLocale(original_value->config, method);
338
339 ResourceConfigValue* new_config_value =
340 entry->FindOrCreateValue(config_with_accent, original_value->product);
341 if (!new_config_value->value) {
342 // Only use auto-generated pseudo-localization if none is defined.
343 new_config_value->value = std::move(localized_value);
344 }
345 }
346
347 // A value is pseudolocalizable if it does not define a locale (or is the default locale) and is
348 // translatable.
IsPseudolocalizable(ResourceConfigValue * config_value)349 static bool IsPseudolocalizable(ResourceConfigValue* config_value) {
350 const int diff = config_value->config.diff(ConfigDescription::DefaultConfig());
351 if (diff & ConfigDescription::CONFIG_LOCALE) {
352 return false;
353 }
354 return config_value->value->IsTranslatable();
355 }
356
357 } // namespace
358
Consume(IAaptContext * context,ResourceTable * table)359 bool PseudolocaleGenerator::Consume(IAaptContext* context, ResourceTable* table) {
360 for (auto& package : table->packages) {
361 for (auto& type : package->types) {
362 for (auto& entry : type->entries) {
363 std::vector<ResourceConfigValue*> values = entry->FindValuesIf(IsPseudolocalizable);
364 for (ResourceConfigValue* value : values) {
365 PseudolocalizeIfNeeded(Pseudolocalizer::Method::kAccent, value, &table->string_pool,
366 entry.get());
367 PseudolocalizeIfNeeded(Pseudolocalizer::Method::kBidi, value, &table->string_pool,
368 entry.get());
369 }
370 }
371 }
372 }
373 return true;
374 }
375
376 } // namespace aapt
377