1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "compile/PseudolocaleGenerator.h"
18 
19 #include <algorithm>
20 
21 #include "ResourceTable.h"
22 #include "ResourceValues.h"
23 #include "ValueVisitor.h"
24 #include "androidfw/Util.h"
25 #include "compile/Pseudolocalizer.h"
26 #include "util/Util.h"
27 
28 using ::android::ConfigDescription;
29 using ::android::StringPiece;
30 using ::android::StringPiece16;
31 
32 namespace aapt {
33 
34 // The struct that represents both Span objects and UntranslatableSections.
35 struct UnifiedSpan {
36   // Only present for Span objects. If not present, this was an UntranslatableSection.
37   std::optional<std::string> tag;
38 
39   // The UTF-16 index into the string where this span starts.
40   uint32_t first_char;
41 
42   // The UTF-16 index into the string where this span ends, inclusive.
43   uint32_t last_char;
44 };
45 
operator <(const UnifiedSpan & left,const UnifiedSpan & right)46 inline static bool operator<(const UnifiedSpan& left, const UnifiedSpan& right) {
47   if (left.first_char < right.first_char) {
48     return true;
49   } else if (left.first_char > right.first_char) {
50     return false;
51   } else if (left.last_char < right.last_char) {
52     return true;
53   }
54   return false;
55 }
56 
SpanToUnifiedSpan(const android::StringPool::Span & span)57 inline static UnifiedSpan SpanToUnifiedSpan(const android::StringPool::Span& span) {
58   return UnifiedSpan{*span.name, span.first_char, span.last_char};
59 }
60 
UntranslatableSectionToUnifiedSpan(const UntranslatableSection & section)61 inline static UnifiedSpan UntranslatableSectionToUnifiedSpan(const UntranslatableSection& section) {
62   return UnifiedSpan{
63       {}, static_cast<uint32_t>(section.start), static_cast<uint32_t>(section.end) - 1};
64 }
65 
66 // Merges the Span and UntranslatableSections of this StyledString into a single vector of
67 // UnifiedSpans. This will first check that the Spans are sorted in ascending order.
MergeSpans(const StyledString & string)68 static std::vector<UnifiedSpan> MergeSpans(const StyledString& string) {
69   // Ensure the Spans are sorted and converted.
70   std::vector<UnifiedSpan> sorted_spans;
71   sorted_spans.reserve(string.value->spans.size());
72   std::transform(string.value->spans.begin(), string.value->spans.end(),
73                  std::back_inserter(sorted_spans), SpanToUnifiedSpan);
74 
75   // Stable sort to ensure tag sequences like "<b><i>" are preserved.
76   std::stable_sort(sorted_spans.begin(), sorted_spans.end());
77 
78   // Ensure the UntranslatableSections are sorted and converted.
79   std::vector<UnifiedSpan> sorted_untranslatable_sections;
80   sorted_untranslatable_sections.reserve(string.untranslatable_sections.size());
81   std::transform(string.untranslatable_sections.begin(), string.untranslatable_sections.end(),
82                  std::back_inserter(sorted_untranslatable_sections),
83                  UntranslatableSectionToUnifiedSpan);
84   std::sort(sorted_untranslatable_sections.begin(), sorted_untranslatable_sections.end());
85 
86   std::vector<UnifiedSpan> merged_spans;
87   merged_spans.reserve(sorted_spans.size() + sorted_untranslatable_sections.size());
88   auto span_iter = sorted_spans.begin();
89   auto untranslatable_iter = sorted_untranslatable_sections.begin();
90   while (span_iter != sorted_spans.end() &&
91          untranslatable_iter != sorted_untranslatable_sections.end()) {
92     if (*span_iter < *untranslatable_iter) {
93       merged_spans.push_back(std::move(*span_iter));
94       ++span_iter;
95     } else {
96       merged_spans.push_back(std::move(*untranslatable_iter));
97       ++untranslatable_iter;
98     }
99   }
100 
101   while (span_iter != sorted_spans.end()) {
102     merged_spans.push_back(std::move(*span_iter));
103     ++span_iter;
104   }
105 
106   while (untranslatable_iter != sorted_untranslatable_sections.end()) {
107     merged_spans.push_back(std::move(*untranslatable_iter));
108     ++untranslatable_iter;
109   }
110   return merged_spans;
111 }
112 
PseudolocalizeStyledString(StyledString * string,Pseudolocalizer::Method method,android::StringPool * pool)113 std::unique_ptr<StyledString> PseudolocalizeStyledString(StyledString* string,
114                                                          Pseudolocalizer::Method method,
115                                                          android::StringPool* pool) {
116   Pseudolocalizer localizer(method);
117 
118   // Collect the spans and untranslatable sections into one set of spans, sorted by first_char.
119   // This will effectively subdivide the string into multiple sections that can be individually
120   // pseudolocalized, while keeping the span indices synchronized.
121   std::vector<UnifiedSpan> merged_spans = MergeSpans(*string);
122 
123   // All Span indices are UTF-16 based, according to the resources.arsc format expected by the
124   // runtime. So we will do all our processing in UTF-16, then convert back.
125   const std::u16string text16 = android::util::Utf8ToUtf16(string->value->value);
126 
127   // Convenient wrapper around the text that allows us to work with StringPieces.
128   const StringPiece16 text(text16);
129 
130   // The new string.
131   std::string new_string = localizer.Start();
132 
133   // The stack that keeps track of what nested Span we're in.
134   std::vector<size_t> span_stack;
135 
136   // The current position in the original text.
137   uint32_t cursor = 0u;
138 
139   // The current position in the new text.
140   uint32_t new_cursor = utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_string.data()),
141                                              new_string.size(), false);
142 
143   // We assume no nesting of untranslatable sections, since XLIFF doesn't allow it.
144   bool translatable = true;
145   size_t span_idx = 0u;
146   while (span_idx < merged_spans.size() || !span_stack.empty()) {
147     UnifiedSpan* span = span_idx >= merged_spans.size() ? nullptr : &merged_spans[span_idx];
148     UnifiedSpan* parent_span = span_stack.empty() ? nullptr : &merged_spans[span_stack.back()];
149 
150     if (span != nullptr) {
151       if (parent_span == nullptr || parent_span->last_char > span->first_char) {
152         // There is no parent, or this span is the child of the parent.
153         // Pseudolocalize all the text until this span.
154         const StringPiece16 substr = text.substr(cursor, span->first_char - cursor);
155         cursor += substr.size();
156 
157         // Pseudolocalize the substring.
158         std::string new_substr = android::util::Utf16ToUtf8(substr);
159         if (translatable) {
160           new_substr = localizer.Text(new_substr);
161         }
162         new_cursor += utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_substr.data()),
163                                            new_substr.size(), false);
164         new_string += new_substr;
165 
166         // Rewrite the first_char.
167         span->first_char = new_cursor;
168         if (!span->tag) {
169           // An untranslatable section has begun!
170           translatable = false;
171         }
172         span_stack.push_back(span_idx);
173         ++span_idx;
174         continue;
175       }
176     }
177 
178     if (parent_span != nullptr) {
179       // There is a parent, and either this span is not a child of it, or there are no more spans.
180       // Pop this off the stack.
181       const StringPiece16 substr = text.substr(cursor, parent_span->last_char - cursor + 1);
182       cursor += substr.size();
183 
184       // Pseudolocalize the substring.
185       std::string new_substr = android::util::Utf16ToUtf8(substr);
186       if (translatable) {
187         new_substr = localizer.Text(new_substr);
188       }
189       new_cursor += utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_substr.data()),
190                                          new_substr.size(), false);
191       new_string += new_substr;
192 
193       parent_span->last_char = new_cursor - 1;
194       if (parent_span->tag) {
195         // An end to an untranslatable section.
196         translatable = true;
197       }
198       span_stack.pop_back();
199     }
200   }
201 
202   // Finish the pseudolocalization at the end of the string.
203   new_string +=
204       localizer.Text(android::util::Utf16ToUtf8(text.substr(cursor, text.size() - cursor)));
205   new_string += localizer.End();
206 
207   android::StyleString localized;
208   localized.str = std::move(new_string);
209 
210   // Convert the UnifiedSpans into regular Spans, skipping the UntranslatableSections.
211   for (UnifiedSpan& span : merged_spans) {
212     if (span.tag) {
213       localized.spans.push_back(
214           android::Span{std::move(span.tag.value()), span.first_char, span.last_char});
215     }
216   }
217   return util::make_unique<StyledString>(pool->MakeRef(localized));
218 }
219 
220 namespace {
221 
222 class Visitor : public ValueVisitor {
223  public:
224   // Either value or item will be populated upon visiting the value.
225   std::unique_ptr<Value> value;
226   std::unique_ptr<Item> item;
227 
Visitor(android::StringPool * pool,Pseudolocalizer::Method method)228   Visitor(android::StringPool* pool, Pseudolocalizer::Method method)
229       : pool_(pool), method_(method), localizer_(method) {
230   }
231 
Visit(Plural * plural)232   void Visit(Plural* plural) override {
233     CloningValueTransformer cloner(pool_);
234     std::unique_ptr<Plural> localized = util::make_unique<Plural>();
235     for (size_t i = 0; i < plural->values.size(); i++) {
236       Visitor sub_visitor(pool_, method_);
237       if (plural->values[i]) {
238         plural->values[i]->Accept(&sub_visitor);
239         if (sub_visitor.item) {
240           localized->values[i] = std::move(sub_visitor.item);
241         } else {
242           localized->values[i] = plural->values[i]->Transform(cloner);
243         }
244       }
245     }
246     localized->SetSource(plural->GetSource());
247     localized->SetWeak(true);
248     value = std::move(localized);
249   }
250 
Visit(String * string)251   void Visit(String* string) override {
252     const StringPiece original_string = *string->value;
253     std::string result = localizer_.Start();
254 
255     // Pseudolocalize only the translatable sections.
256     size_t start = 0u;
257     for (const UntranslatableSection& section : string->untranslatable_sections) {
258       // Pseudolocalize the content before the untranslatable section.
259       const size_t len = section.start - start;
260       if (len > 0u) {
261         result += localizer_.Text(original_string.substr(start, len));
262       }
263 
264       // Copy the untranslatable content.
265       result += original_string.substr(section.start, section.end - section.start);
266       start = section.end;
267     }
268 
269     // Pseudolocalize the content after the last untranslatable section.
270     if (start != original_string.size()) {
271       const size_t len = original_string.size() - start;
272       result += localizer_.Text(original_string.substr(start, len));
273     }
274     result += localizer_.End();
275 
276     std::unique_ptr<String> localized = util::make_unique<String>(pool_->MakeRef(result));
277     localized->SetSource(string->GetSource());
278     localized->SetWeak(true);
279     item = std::move(localized);
280   }
281 
Visit(StyledString * string)282   void Visit(StyledString* string) override {
283     item = PseudolocalizeStyledString(string, method_, pool_);
284     item->SetSource(string->GetSource());
285     item->SetWeak(true);
286   }
287 
288  private:
289   DISALLOW_COPY_AND_ASSIGN(Visitor);
290 
291   android::StringPool* pool_;
292   Pseudolocalizer::Method method_;
293   Pseudolocalizer localizer_;
294 };
295 
ModifyConfigForPseudoLocale(const ConfigDescription & base,Pseudolocalizer::Method m)296 ConfigDescription ModifyConfigForPseudoLocale(const ConfigDescription& base,
297                                               Pseudolocalizer::Method m) {
298   ConfigDescription modified = base;
299   switch (m) {
300     case Pseudolocalizer::Method::kAccent:
301       modified.language[0] = 'e';
302       modified.language[1] = 'n';
303       modified.country[0] = 'X';
304       modified.country[1] = 'A';
305       break;
306 
307     case Pseudolocalizer::Method::kBidi:
308       modified.language[0] = 'a';
309       modified.language[1] = 'r';
310       modified.country[0] = 'X';
311       modified.country[1] = 'B';
312       break;
313     default:
314       break;
315   }
316   return modified;
317 }
318 
PseudolocalizeIfNeeded(const Pseudolocalizer::Method method,ResourceConfigValue * original_value,android::StringPool * pool,ResourceEntry * entry)319 void PseudolocalizeIfNeeded(const Pseudolocalizer::Method method,
320                             ResourceConfigValue* original_value, android::StringPool* pool,
321                             ResourceEntry* entry) {
322   Visitor visitor(pool, method);
323   original_value->value->Accept(&visitor);
324 
325   std::unique_ptr<Value> localized_value;
326   if (visitor.value) {
327     localized_value = std::move(visitor.value);
328   } else if (visitor.item) {
329     localized_value = std::move(visitor.item);
330   }
331 
332   if (!localized_value) {
333     return;
334   }
335 
336   ConfigDescription config_with_accent =
337       ModifyConfigForPseudoLocale(original_value->config, method);
338 
339   ResourceConfigValue* new_config_value =
340       entry->FindOrCreateValue(config_with_accent, original_value->product);
341   if (!new_config_value->value) {
342     // Only use auto-generated pseudo-localization if none is defined.
343     new_config_value->value = std::move(localized_value);
344   }
345 }
346 
347 // A value is pseudolocalizable if it does not define a locale (or is the default locale) and is
348 // translatable.
IsPseudolocalizable(ResourceConfigValue * config_value)349 static bool IsPseudolocalizable(ResourceConfigValue* config_value) {
350   const int diff = config_value->config.diff(ConfigDescription::DefaultConfig());
351   if (diff & ConfigDescription::CONFIG_LOCALE) {
352     return false;
353   }
354   return config_value->value->IsTranslatable();
355 }
356 
357 }  // namespace
358 
Consume(IAaptContext * context,ResourceTable * table)359 bool PseudolocaleGenerator::Consume(IAaptContext* context, ResourceTable* table) {
360   for (auto& package : table->packages) {
361     for (auto& type : package->types) {
362       for (auto& entry : type->entries) {
363         std::vector<ResourceConfigValue*> values = entry->FindValuesIf(IsPseudolocalizable);
364         for (ResourceConfigValue* value : values) {
365           PseudolocalizeIfNeeded(Pseudolocalizer::Method::kAccent, value, &table->string_pool,
366                                  entry.get());
367           PseudolocalizeIfNeeded(Pseudolocalizer::Method::kBidi, value, &table->string_pool,
368                                  entry.get());
369         }
370       }
371     }
372   }
373   return true;
374 }
375 
376 }  // namespace aapt
377