1 /*
2  * Copyright (c) 2023-2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "anonymizer.h"
17 
18 #include <locale.h>
19 #include <securec.h>
20 #include <stdbool.h>
21 #include <stdint.h>
22 #include <wchar.h>
23 
24 #include "comm_log.h"
25 #include "softbus_error_code.h"
26 
27 #define DEVICE_NAME_MAX_LEN 128
28 #define WIDE_CHAR_MAX_LEN 8
29 
30 typedef struct {
31     bool (*Matcher)(const char *, uint32_t);
32     int32_t (*Anonymizer)(const char *, uint32_t, char **);
33 } AnonymizeHandler;
34 
35 static const char SYMBOL_ANONYMIZE = '*';
36 static const char SYMBOL_COLON = ':';
37 static const char SYMBOL_DASH = '-';
38 static const char SYMBOL_DOT = '.';
39 
InRange(char chr,char left,char right)40 static inline bool InRange(char chr, char left, char right)
41 {
42     return left <= chr && chr <= right;
43 }
44 
IsNum(char chr)45 static inline bool IsNum(char chr)
46 {
47     return InRange(chr, '0', '9');
48 }
49 
IsHex(char chr)50 static inline bool IsHex(char chr)
51 {
52     return IsNum(chr) || InRange(chr, 'A', 'F') || InRange(chr, 'a', 'f');
53 }
54 
IsAlphabet(char chr)55 static inline bool IsAlphabet(char chr)
56 {
57     return InRange(chr, 'A', 'Z') || InRange(chr, 'a', 'z');
58 }
59 
IsDot(char chr)60 static inline bool IsDot(char chr)
61 {
62     return chr == SYMBOL_DOT;
63 }
64 
IsColon(char chr)65 static inline bool IsColon(char chr)
66 {
67     return chr == SYMBOL_COLON;
68 }
69 
IsDash(char chr)70 static inline bool IsDash(char chr)
71 {
72     return chr == SYMBOL_DASH;
73 }
74 
FindChar(char chr,const char * str,uint32_t len,uint32_t startPos)75 static inline int32_t FindChar(char chr, const char *str, uint32_t len, uint32_t startPos)
76 {
77     for (uint32_t i = startPos; i < len; ++i) {
78         if (str[i] == chr) {
79             return (int32_t)i;
80         }
81     }
82     return -1; // not find
83 }
84 
IsValid(const char * str,const uint32_t * positions,uint32_t positionNum,bool (* isValidFunc)(char))85 static bool IsValid(const char *str, const uint32_t *positions, uint32_t positionNum, bool(*isValidFunc)(char))
86 {
87     for (uint32_t i = 0; i < positionNum; ++i) {
88         if (!isValidFunc(str[positions[i]])) {
89             return false;
90         }
91     }
92     return true;
93 }
94 
MatchEmpty(const char * str,uint32_t len)95 static bool MatchEmpty(const char *str, uint32_t len)
96 {
97     (void)str;
98     return len == 0;
99 }
100 
MatchIpAddr(const char * str,uint32_t len)101 static bool MatchIpAddr(const char *str, uint32_t len)
102 {
103     static const uint32_t DOT_NUM_MAX = 3;
104     static const int32_t NUM_LEN_MAX = 3;
105     static const int32_t NUM_LEN_MIN = 1;
106     static const uint32_t IP_ADDR_MAX_LEN = 15;
107 
108     if (len > IP_ADDR_MAX_LEN) {
109         return false;
110     }
111 
112     for (uint32_t i = 0; i < len; ++i) {
113         if (!IsNum(str[i]) && !IsDot(str[i])) {
114             return false;
115         }
116     }
117 
118     int32_t numLen = 0;
119     int32_t posPrevDot = -1;
120     int32_t posNextDot = -1;
121     for (uint32_t dotNum = 0; dotNum < DOT_NUM_MAX; ++dotNum) {
122         posNextDot = FindChar(SYMBOL_DOT, str, len, posPrevDot + 1);
123         numLen = posNextDot - posPrevDot - 1;
124         if (numLen < NUM_LEN_MIN || numLen > NUM_LEN_MAX) {
125             return false;
126         }
127         posPrevDot = posNextDot;
128     }
129     numLen = (int32_t)len - posPrevDot - 1;
130     if (numLen < NUM_LEN_MIN || numLen > NUM_LEN_MAX) {
131         return false;
132     }
133 
134     return true;
135 }
136 
MatchMacAddr(const char * str,uint32_t len)137 static bool MatchMacAddr(const char *str, uint32_t len)
138 {
139     static const uint32_t MAC_ADDR_LEN = 17;
140     static const uint32_t DELIMETER_POSITIONS[] = {2, 5, 8, 11, 14};
141     static const uint32_t HEX_POSITIONS[] = {0, 1, 3, 4, 6, 7, 9, 10, 12, 13, 15, 16};
142 
143     if (len != MAC_ADDR_LEN) {
144         return false;
145     }
146 
147     return IsValid(str, HEX_POSITIONS, sizeof(HEX_POSITIONS) / sizeof(HEX_POSITIONS[0]), IsHex) &&
148         (IsValid(str, DELIMETER_POSITIONS, sizeof(DELIMETER_POSITIONS) / sizeof(DELIMETER_POSITIONS[0]), IsDash) ||
149         IsValid(str, DELIMETER_POSITIONS, sizeof(DELIMETER_POSITIONS) / sizeof(DELIMETER_POSITIONS[0]), IsColon));
150 }
151 
MatchUdidStr(const char * str,uint32_t len)152 static bool MatchUdidStr(const char *str, uint32_t len)
153 {
154     const uint32_t UDID_LEN = 64;
155 
156     if (len != UDID_LEN) {
157         return false;
158     }
159     for (uint32_t i = 0; i < len; ++i) {
160         if (!IsNum(str[i]) && !IsAlphabet(str[i])) {
161             return false;
162         }
163     }
164     return true;
165 }
166 
MatchDeviceName(const char * str,uint32_t len)167 static bool MatchDeviceName(const char *str, uint32_t len)
168 {
169     (void)str;
170     return len <= DEVICE_NAME_MAX_LEN;
171 }
172 
MallocStr(uint32_t len)173 static char *MallocStr(uint32_t len)
174 {
175     char *str = (char *)malloc(sizeof(char) * (len + 1));
176     if (str != NULL) {
177         str[len] = '\0';
178     }
179     return str;
180 }
181 
CopyStr(const char * str,char ** copy)182 static int32_t CopyStr(const char *str, char **copy)
183 {
184     uint32_t len = strlen(str);
185     *copy = MallocStr(len);
186     COMM_CHECK_AND_RETURN_RET_LOGE(*copy != NULL, SOFTBUS_MALLOC_ERR, COMM_DFX, "malloc failed");
187 
188     errno_t ret = memcpy_s(*copy, len, str, len);
189     COMM_CHECK_AND_RETURN_RET_LOGE(ret == EOK, SOFTBUS_STRCPY_ERR, COMM_DFX, "memcpy failed");
190 
191     return SOFTBUS_OK;
192 }
193 
AnonymizeIpAddr(const char * str,uint32_t len,char ** anonymized)194 static int32_t AnonymizeIpAddr(const char *str, uint32_t len, char **anonymized)
195 {
196     int32_t ret = CopyStr(str, anonymized);
197     COMM_CHECK_AND_RETURN_RET_LOGE(ret == SOFTBUS_OK, ret, COMM_DFX, "copy ip addr failed");
198 
199     for (uint32_t i = len - 1; i >= 0; --i) {
200         if (IsDot((*anonymized)[i])) {
201             break;
202         }
203         (*anonymized)[i] = SYMBOL_ANONYMIZE;
204     }
205     return SOFTBUS_OK;
206 }
207 
AnonymizeMacAddr(const char * str,uint32_t len,char ** anonymized)208 static int32_t AnonymizeMacAddr(const char *str, uint32_t len, char **anonymized)
209 {
210     static const uint32_t ANONYMIZE_POSITIONS[] = {9, 10, 12, 13};
211 
212     int32_t ret = CopyStr(str, anonymized);
213     COMM_CHECK_AND_RETURN_RET_LOGE(ret == SOFTBUS_OK, ret, COMM_DFX, "copy mac addr failed");
214 
215     for (uint32_t i = 0; i < sizeof(ANONYMIZE_POSITIONS) / sizeof(ANONYMIZE_POSITIONS[0]); ++i) {
216         (*anonymized)[ANONYMIZE_POSITIONS[i]] = SYMBOL_ANONYMIZE;
217     }
218     return SOFTBUS_OK;
219 }
220 
AnonymizeUdidStr(const char * str,uint32_t len,char ** anonymized)221 static int32_t AnonymizeUdidStr(const char *str, uint32_t len, char **anonymized)
222 {
223     static const uint32_t ANONYMIZE_UDID_LEN = 12;
224     static const uint32_t ANONYMIZE_POSITIONS[] = {5, 6};
225     static const uint32_t UNANONYMIZE_UDID_LEN = 5;
226     static const uint32_t UNANONYMIZE_SUFFIX_POS = ANONYMIZE_UDID_LEN - UNANONYMIZE_UDID_LEN;
227     static const uint32_t UNANONYMIZE_SUFFIX_OFFSET = 64 - UNANONYMIZE_UDID_LEN;
228 
229     (void)len;
230     *anonymized = MallocStr(ANONYMIZE_UDID_LEN);
231     COMM_CHECK_AND_RETURN_RET_LOGE(*anonymized != NULL, SOFTBUS_MALLOC_ERR, COMM_DFX, "malloc failed");
232 
233     errno_t ret = memcpy_s(*anonymized, ANONYMIZE_UDID_LEN, str, UNANONYMIZE_UDID_LEN);
234     COMM_CHECK_AND_RETURN_RET_LOGE(ret == EOK, SOFTBUS_STRCPY_ERR, COMM_DFX, "memcpy failed");
235     ret = memcpy_s(*anonymized + UNANONYMIZE_SUFFIX_POS, ANONYMIZE_UDID_LEN - UNANONYMIZE_SUFFIX_POS,
236         str + UNANONYMIZE_SUFFIX_OFFSET, UNANONYMIZE_UDID_LEN);
237     COMM_CHECK_AND_RETURN_RET_LOGE(ret == EOK, SOFTBUS_STRCPY_ERR, COMM_DFX, "memcpy failed");
238 
239     for (uint32_t i = 0; i < sizeof(ANONYMIZE_POSITIONS) / sizeof(ANONYMIZE_POSITIONS[0]); ++i) {
240         (*anonymized)[ANONYMIZE_POSITIONS[i]] = SYMBOL_ANONYMIZE;
241     }
242     return SOFTBUS_OK;
243 }
244 
SetLocale(char ** localeBefore)245 static int32_t SetLocale(char **localeBefore)
246 {
247     *localeBefore = setlocale(LC_CTYPE, NULL);
248     if (*localeBefore == NULL) {
249         COMM_LOGW(COMM_DFX, "get locale failed");
250     }
251 
252     char *localeAfter = setlocale(LC_CTYPE, "C.UTF-8");
253     return (localeAfter != NULL) ? SOFTBUS_OK : SOFTBUS_LOCALE_ERR;
254 }
255 
RestoreLocale(const char * localeBefore)256 static void RestoreLocale(const char *localeBefore)
257 {
258     if (setlocale(LC_CTYPE, localeBefore) == NULL) {
259         COMM_LOGW(COMM_DFX, "restore locale failed");
260     }
261 }
262 
AnonymizeMultiByteStr(const char * str,uint32_t len,uint32_t lenRatio,uint32_t posRatio,char ** anonymized)263 static int32_t AnonymizeMultiByteStr(const char *str, uint32_t len, uint32_t lenRatio, uint32_t posRatio,
264     char **anonymized)
265 {
266     COMM_CHECK_AND_RETURN_RET_LOGE(lenRatio != 0, SOFTBUS_INVALID_PARAM, COMM_DFX, "lenRatio is 0");
267     COMM_CHECK_AND_RETURN_RET_LOGE(posRatio != 0, SOFTBUS_INVALID_PARAM, COMM_DFX, "posRatio is 0");
268 
269     *anonymized = MallocStr(len);
270     COMM_CHECK_AND_RETURN_RET_LOGE(*anonymized != NULL, SOFTBUS_MALLOC_ERR, COMM_DFX, "malloc failed");
271 
272     wchar_t wideStr[DEVICE_NAME_MAX_LEN] = {0};
273     size_t wideCharNum = mbstowcs(wideStr, str, len);
274     COMM_CHECK_AND_RETURN_RET_LOGE(wideCharNum > 0, SOFTBUS_WIDECHAR_ERR, COMM_DFX, "convert wide str failed");
275 
276     uint32_t wideStrLen = (uint32_t)wideCharNum;
277     uint32_t anonymizedNum = (wideStrLen + lenRatio - 1) / lenRatio; // +ratio-1 for round up
278     uint32_t plainPrefixPos = wideStrLen / posRatio;
279     uint32_t plainSuffixPos = plainPrefixPos + anonymizedNum;
280 
281     char multiByteChar[WIDE_CHAR_MAX_LEN] = {0};
282     uint32_t multiByteStrIndex = 0;
283     uint32_t wideStrIndex = 0;
284     errno_t ret = EOK;
285     for (; wideStrIndex < plainPrefixPos && multiByteStrIndex < len; ++wideStrIndex) {
286         int32_t multiByteCharLen = wctomb(multiByteChar, wideStr[wideStrIndex]);
287         COMM_CHECK_AND_RETURN_RET_LOGE(multiByteCharLen > 0, SOFTBUS_WIDECHAR_ERR, COMM_DFX, "convert prefix failed");
288         ret = memcpy_s(*anonymized + multiByteStrIndex, len - multiByteStrIndex, multiByteChar, multiByteCharLen);
289         COMM_CHECK_AND_RETURN_RET_LOGE(ret == EOK, SOFTBUS_MEM_ERR, COMM_DFX, "copy prefix failed");
290         multiByteStrIndex += (uint32_t)multiByteCharLen;
291     }
292 
293     for (; wideStrIndex < plainSuffixPos && multiByteStrIndex < len; ++wideStrIndex) {
294         (*anonymized)[multiByteStrIndex++] = SYMBOL_ANONYMIZE;
295     }
296 
297     for (; wideStrIndex < wideStrLen && multiByteStrIndex < len; ++wideStrIndex) {
298         int32_t multiByteCharLen = wctomb(multiByteChar, wideStr[wideStrIndex]);
299         COMM_CHECK_AND_RETURN_RET_LOGE(multiByteCharLen > 0, SOFTBUS_WIDECHAR_ERR, COMM_DFX, "convert suffix failed");
300         ret = memcpy_s(*anonymized + multiByteStrIndex, len - multiByteStrIndex, multiByteChar, multiByteCharLen);
301         COMM_CHECK_AND_RETURN_RET_LOGE(ret == EOK, SOFTBUS_MEM_ERR, COMM_DFX, "copy prefix failed");
302         multiByteStrIndex += (uint32_t)multiByteCharLen;
303     }
304 
305     uint32_t endPos = multiByteStrIndex < len ? multiByteStrIndex : len;
306     (*anonymized)[endPos] = '\0';
307     return SOFTBUS_OK;
308 }
309 
AnonymizeDeviceName(const char * str,uint32_t len,char ** anonymized)310 static int32_t AnonymizeDeviceName(const char *str, uint32_t len, char **anonymized)
311 {
312     static const uint32_t ANONYMIZE_LEN_RATIO = 2; // anonymize half str
313     static const uint32_t ANONYMIZE_POS_RATIO = 4; // start from 1/4 pos
314 
315     char *localeBefore = NULL;
316     int32_t ret = SetLocale(&localeBefore);
317     COMM_CHECK_AND_RETURN_RET_LOGE(ret == SOFTBUS_OK, ret, COMM_DFX, "get locale failed");
318 
319     ret = AnonymizeMultiByteStr(str, len, ANONYMIZE_LEN_RATIO, ANONYMIZE_POS_RATIO, anonymized);
320     RestoreLocale(localeBefore);
321     COMM_CHECK_AND_RETURN_RET_LOGE(ret == SOFTBUS_OK, ret, COMM_DFX, "anonymize multi byte str failed");
322     return ret;
323 }
324 
AnonymizeHalfStr(const char * str,uint32_t len,char ** anonymized)325 static int32_t AnonymizeHalfStr(const char *str, uint32_t len, char **anonymized)
326 {
327     uint32_t plainTextLen = len / 2;
328     uint32_t plainTextOffset = len - plainTextLen;
329     uint32_t anonymizeLen = 1 + plainTextLen;
330 
331     *anonymized = MallocStr(anonymizeLen);
332     COMM_CHECK_AND_RETURN_RET_LOGE(*anonymized != NULL, SOFTBUS_MALLOC_ERR, COMM_DFX, "malloc failed");
333 
334     if (plainTextLen > 0) {
335         errno_t ret = memcpy_s(*anonymized + 1, plainTextLen, str + plainTextOffset, plainTextLen);
336         COMM_CHECK_AND_RETURN_RET_LOGE(ret == EOK, SOFTBUS_STRCPY_ERR, COMM_DFX, "memcpy failed");
337     }
338 
339     (*anonymized)[0] = SYMBOL_ANONYMIZE;
340     return SOFTBUS_OK;
341 }
342 
AnonymizeEmpty(const char * str,uint32_t len,char ** anonymized)343 static int32_t AnonymizeEmpty(const char *str, uint32_t len, char **anonymized)
344 {
345     (void)str;
346     (void)len;
347     return CopyStr("EMPTY", anonymized);
348 }
349 
AnonymizeInner(const char * str,char ** anonymized)350 static int32_t AnonymizeInner(const char *str, char **anonymized)
351 {
352     if (str == NULL) {
353         return CopyStr("NULL", anonymized);
354     }
355 
356     static const AnonymizeHandler ANONYMIZE_HANDLER[] = {
357         { MatchEmpty, AnonymizeEmpty },
358         { MatchIpAddr, AnonymizeIpAddr },
359         { MatchMacAddr, AnonymizeMacAddr },
360         { MatchUdidStr, AnonymizeUdidStr },
361         { MatchDeviceName, AnonymizeDeviceName },
362     };
363 
364     uint32_t len = strlen(str);
365     for (uint32_t i = 0; i < sizeof(ANONYMIZE_HANDLER) / sizeof(AnonymizeHandler); ++i) {
366         if (ANONYMIZE_HANDLER[i].Matcher(str, len)) {
367             return ANONYMIZE_HANDLER[i].Anonymizer(str, len, anonymized);
368         }
369     }
370     return AnonymizeHalfStr(str, len, anonymized);
371 }
372 
Anonymize(const char * plainStr,char ** anonymizedStr)373 void Anonymize(const char *plainStr, char **anonymizedStr)
374 {
375     COMM_CHECK_AND_RETURN_LOGE(anonymizedStr != NULL, COMM_DFX, "anonymizedStr is null");
376 
377     if (AnonymizeInner(plainStr, anonymizedStr) == SOFTBUS_OK) {
378         return;
379     }
380     if (*anonymizedStr != NULL) {
381         AnonymizeFree(*anonymizedStr);
382         *anonymizedStr = NULL;
383     }
384 }
385 
AnonymizeFree(char * anonymizedStr)386 void AnonymizeFree(char *anonymizedStr)
387 {
388     if (anonymizedStr == NULL) {
389         return;
390     }
391     free(anonymizedStr);
392 }
393 
AnonymizeWrapper(const char * anonymizedStr)394 const char *AnonymizeWrapper(const char *anonymizedStr)
395 {
396     return anonymizedStr ? anonymizedStr : "NULL";
397 }
398