← Back to Overview

src/common/unicode_norm.c

Coverage: 146/153 lines (95.4%)
Total Lines
153
modified
Covered
146
95.4%
Uncovered
7
4.6%
Keyboard navigation
get_code_entry() lines 55-60
Modified Lines Coverage: 2/2 lines (100.0%)
LineHitsSourceCommit
55 - get_code_entry(char32_t code) -
56 - { -
57 192070 uint16 idx = normalization_index(code); 82fd08eRefactoring Unicode Normalization Forms, performance up.
58 - -
59 192070 return idx != 0 ? &UnicodeDecompMain[idx] : NULL; 4fa0062Improve the performance of Unicode Normalization Forms.
60 - } -
get_code_decomposition() lines 88-103
Modified Lines Coverage: 2/2 lines (100.0%)
LineHitsSourceCommit
88 - get_code_decomposition(const pg_unicode_decomposition *entry, int *dec_size) -
89 - { -
90 - static char32_t x; -
91 - -
92 - if (DECOMPOSITION_IS_INLINE(entry)) -
93 - { -
94 - Assert(DECOMPOSITION_SIZE(entry) == 1); -
95 - x = (char32_t) entry->dec_index; -
96 - *dec_size = 1; -
97 - return &x; -
98 - } -
99 - 82fd08eRefactoring Unicode Normalization Forms, performance up.
100 3956 *dec_size = DECOMPOSITION_SIZE(entry); 82fd08eRefactoring Unicode Normalization Forms, performance up.
101 3956 return &UnicodeDecompCodepoints[entry->dec_index]; 82fd08eRefactoring Unicode Normalization Forms, performance up.
102 - 82fd08eRefactoring Unicode Normalization Forms, performance up.
103 - } 82fd08eRefactoring Unicode Normalization Forms, performance up.
get_code_compat_decomposition() lines 106-125
Modified Lines Coverage: 11/11 lines (100.0%)
LineHitsSourceCommit
106 13994 get_code_compat_decomposition(const pg_unicode_decomposition *entry, 82fd08eRefactoring Unicode Normalization Forms, performance up.
107 - int *dec_size) 82fd08eRefactoring Unicode Normalization Forms, performance up.
108 - { 82fd08eRefactoring Unicode Normalization Forms, performance up.
109 - static char32_t x; 82fd08eRefactoring Unicode Normalization Forms, performance up.
110 - 82fd08eRefactoring Unicode Normalization Forms, performance up.
111 13994 if (DECOMPOSITION_IS_INLINE(entry)) 82fd08eRefactoring Unicode Normalization Forms, performance up.
112 - { -
113 7468 x = (char32_t) entry->dec_index; 82fd08eRefactoring Unicode Normalization Forms, performance up.
114 7468 *dec_size = 1; 82fd08eRefactoring Unicode Normalization Forms, performance up.
115 7468 return &x; 82fd08eRefactoring Unicode Normalization Forms, performance up.
116 - } -
117 - 82fd08eRefactoring Unicode Normalization Forms, performance up.
118 6526 *dec_size = DECOMPOSITION_COMPAT_SIZE(entry); 82fd08eRefactoring Unicode Normalization Forms, performance up.
119 6526 if (*dec_size > 0) 82fd08eRefactoring Unicode Normalization Forms, performance up.
120 256 return &UnicodeDecompCodepoints[entry->dec_index 82fd08eRefactoring Unicode Normalization Forms, performance up.
121 256 + DECOMPOSITION_SIZE(entry)]; 82fd08eRefactoring Unicode Normalization Forms, performance up.
122 - 82fd08eRefactoring Unicode Normalization Forms, performance up.
123 6270 *dec_size = DECOMPOSITION_SIZE(entry); 82fd08eRefactoring Unicode Normalization Forms, performance up.
124 6270 return &UnicodeDecompCodepoints[entry->dec_index]; 82fd08eRefactoring Unicode Normalization Forms, performance up.
125 - } -
get_code_size() lines 131-138
Modified Lines Coverage: 4/4 lines (100.0%)
LineHitsSourceCommit
131 6290 get_code_size(const pg_unicode_decomposition *entry) 82fd08eRefactoring Unicode Normalization Forms, performance up.
132 - { -
133 6290 if (DECOMPOSITION_IS_INLINE(entry)) 82fd08eRefactoring Unicode Normalization Forms, performance up.
134 2334 return 1; 82fd08eRefactoring Unicode Normalization Forms, performance up.
135 - -
136 3956 return DECOMPOSITION_SIZE(entry); 82fd08eRefactoring Unicode Normalization Forms, performance up.
137 - -
138 - } 82fd08eRefactoring Unicode Normalization Forms, performance up.
get_code_compat_size() lines 141-153
Modified Lines Coverage: 6/6 lines (100.0%)
LineHitsSourceCommit
141 13994 get_code_compat_size(const pg_unicode_decomposition *entry) 82fd08eRefactoring Unicode Normalization Forms, performance up.
142 - { 82fd08eRefactoring Unicode Normalization Forms, performance up.
143 - int size; 82fd08eRefactoring Unicode Normalization Forms, performance up.
144 - -
145 13994 if (DECOMPOSITION_IS_INLINE(entry)) 82fd08eRefactoring Unicode Normalization Forms, performance up.
146 - return 1; -
147 - -
148 6526 size = DECOMPOSITION_COMPAT_SIZE(entry); 82fd08eRefactoring Unicode Normalization Forms, performance up.
149 6526 if (size > 0) 82fd08eRefactoring Unicode Normalization Forms, performance up.
150 256 return size; 82fd08eRefactoring Unicode Normalization Forms, performance up.
151 - 82fd08eRefactoring Unicode Normalization Forms, performance up.
152 6270 return DECOMPOSITION_SIZE(entry); 82fd08eRefactoring Unicode Normalization Forms, performance up.
153 - } 82fd08eRefactoring Unicode Normalization Forms, performance up.
get_decomposed_size() lines 159-212
Modified Lines Coverage: 17/17 lines (100.0%)
LineHitsSourceCommit
159 80136 get_decomposed_size(const char32_t *p, bool compat) 82fd08eRefactoring Unicode Normalization Forms, performance up.
160 - { 82fd08eRefactoring Unicode Normalization Forms, performance up.
161 - char32_t code; 82fd08eRefactoring Unicode Normalization Forms, performance up.
162 - const pg_unicode_decomposition *entry; 82fd08eRefactoring Unicode Normalization Forms, performance up.
163 80136 int size = 0; 82fd08eRefactoring Unicode Normalization Forms, performance up.
164 - 82fd08eRefactoring Unicode Normalization Forms, performance up.
165 204588 while ((code = *p++)) 82fd08eRefactoring Unicode Normalization Forms, performance up.
166 - { -
167 - /* 82fd08eRefactoring Unicode Normalization Forms, performance up.
168 - * Fast path for Hangul characters not stored in tables to save memory 82fd08eRefactoring Unicode Normalization Forms, performance up.
169 - * as decomposition is algorithmic. See 82fd08eRefactoring Unicode Normalization Forms, performance up.
170 - * https://www.unicode.org/reports/tr15/tr15-18.html, annex 10 for 82fd08eRefactoring Unicode Normalization Forms, performance up.
171 - * details on the matter. 82fd08eRefactoring Unicode Normalization Forms, performance up.
172 - */ 82fd08eRefactoring Unicode Normalization Forms, performance up.
173 124452 if (code >= SBASE && code < SBASE + SCOUNT) 82fd08eRefactoring Unicode Normalization Forms, performance up.
174 45064 { 82fd08eRefactoring Unicode Normalization Forms, performance up.
175 - uint32 tindex, 82fd08eRefactoring Unicode Normalization Forms, performance up.
176 - sindex; 82fd08eRefactoring Unicode Normalization Forms, performance up.
177 - 60f11b8Use SASLprep to normalize passwords for SCRAM authentication.
178 45064 sindex = code - SBASE; 82fd08eRefactoring Unicode Normalization Forms, performance up.
179 45064 tindex = sindex % TCOUNT; 82fd08eRefactoring Unicode Normalization Forms, performance up.
180 - -
181 45064 if (tindex != 0) 82fd08eRefactoring Unicode Normalization Forms, performance up.
182 43092 size += 3; 82fd08eRefactoring Unicode Normalization Forms, performance up.
183 - else 82fd08eRefactoring Unicode Normalization Forms, performance up.
184 1972 size += 2; 82fd08eRefactoring Unicode Normalization Forms, performance up.
185 - } 82fd08eRefactoring Unicode Normalization Forms, performance up.
186 - else 82fd08eRefactoring Unicode Normalization Forms, performance up.
187 - { 82fd08eRefactoring Unicode Normalization Forms, performance up.
188 79388 entry = get_code_entry(code); 82fd08eRefactoring Unicode Normalization Forms, performance up.
189 - 82fd08eRefactoring Unicode Normalization Forms, performance up.
190 - /* 82fd08eRefactoring Unicode Normalization Forms, performance up.
191 - * Just count current code if no other decompositions. A NULL 82fd08eRefactoring Unicode Normalization Forms, performance up.
192 - * entry is equivalent to a character with class 0 and no 82fd08eRefactoring Unicode Normalization Forms, performance up.
193 - * decompositions. 82fd08eRefactoring Unicode Normalization Forms, performance up.
194 - */ 82fd08eRefactoring Unicode Normalization Forms, performance up.
195 79388 if (entry == NULL || entry->dec_index == 0 || 82fd08eRefactoring Unicode Normalization Forms, performance up.
196 27988 (!compat && DECOMPOSITION_IS_COMPAT(entry))) 82fd08eRefactoring Unicode Normalization Forms, performance up.
197 59104 size += 1; 82fd08eRefactoring Unicode Normalization Forms, performance up.
198 - 82fd08eRefactoring Unicode Normalization Forms, performance up.
199 - /* 82fd08eRefactoring Unicode Normalization Forms, performance up.
200 - * If this entry has other decomposition codes look at them as 82fd08eRefactoring Unicode Normalization Forms, performance up.
201 - * well. First get its decomposition in the list of tables 82fd08eRefactoring Unicode Normalization Forms, performance up.
202 - * available. 82fd08eRefactoring Unicode Normalization Forms, performance up.
203 - */ 82fd08eRefactoring Unicode Normalization Forms, performance up.
204 20284 else if (!compat) 82fd08eRefactoring Unicode Normalization Forms, performance up.
205 6290 size += get_code_size(entry); 82fd08eRefactoring Unicode Normalization Forms, performance up.
206 - else 82fd08eRefactoring Unicode Normalization Forms, performance up.
207 13994 size += get_code_compat_size(entry); 82fd08eRefactoring Unicode Normalization Forms, performance up.
208 - } 82fd08eRefactoring Unicode Normalization Forms, performance up.
209 - } -
210 - -
211 - return size; -
212 - } -
recompose_code() lines 221-253
Modified Lines Coverage: 2/2 lines (100.0%)
LineHitsSourceCommit
221 - recompose_code(uint32 start, uint32 code, uint32 *result) -
222 - { -
223 - /* -
224 - * Handle Hangul characters algorithmically, per the Unicode spec. -
225 - * -
226 - * Check if two current characters are L and V. -
227 - */ -
228 - if (start >= LBASE && start < LBASE + LCOUNT && -
229 - code >= VBASE && code < VBASE + VCOUNT) -
230 - { -
231 - /* make syllable of form LV */ -
232 - uint32 lindex = start - LBASE; -
233 - uint32 vindex = code - VBASE; -
234 - -
235 - *result = SBASE + (lindex * VCOUNT + vindex) * TCOUNT; -
236 - return true; -
237 - } -
238 - /* Check if two current characters are LV and T */ -
239 - else if (start >= SBASE && start < (SBASE + SCOUNT) && -
240 - ((start - SBASE) % TCOUNT) == 0 && -
241 - code >= TBASE && code < (TBASE + TCOUNT)) -
242 - { -
243 - /* make syllable of form LVT */ -
244 - uint32 tindex = code - TBASE; -
245 - -
246 - *result = start + tindex; -
247 - return true; -
248 - } -
249 - -
250 21559 *result = normalization_inverse(start, code); 82fd08eRefactoring Unicode Normalization Forms, performance up.
251 - 60f11b8Use SASLprep to normalize passwords for SCRAM authentication.
252 21559 return *result != 0; 82fd08eRefactoring Unicode Normalization Forms, performance up.
253 - } 60f11b8Use SASLprep to normalize passwords for SCRAM authentication.
unicode_canonical_reorder() lines 259-301
Modified Lines Coverage: 14/14 lines (100.0%)
LineHitsSourceCommit
259 12648 unicode_canonical_reorder(char32_t *decomps, char32_t *last_starter, 82fd08eRefactoring Unicode Normalization Forms, performance up.
260 - uint8 *cur_class) 82fd08eRefactoring Unicode Normalization Forms, performance up.
261 - { 60f11b8Use SASLprep to normalize passwords for SCRAM authentication.
262 - int i, 82fd08eRefactoring Unicode Normalization Forms, performance up.
263 - length; 82fd08eRefactoring Unicode Normalization Forms, performance up.
264 - uint8 ccc; 82fd08eRefactoring Unicode Normalization Forms, performance up.
265 - char32_t tmp; 82fd08eRefactoring Unicode Normalization Forms, performance up.
266 - 60f11b8Use SASLprep to normalize passwords for SCRAM authentication.
267 - /* 60f11b8Use SASLprep to normalize passwords for SCRAM authentication.
268 - * Reordering occurs from starter to starter. There cannot be another 82fd08eRefactoring Unicode Normalization Forms, performance up.
269 - * starter between starters. Therefore, there is no need to perform a 82fd08eRefactoring Unicode Normalization Forms, performance up.
270 - * combining class check on 0. 82fd08eRefactoring Unicode Normalization Forms, performance up.
271 - */ 60f11b8Use SASLprep to normalize passwords for SCRAM authentication.
272 - -
273 12648 length = (int) (last_starter - decomps); 82fd08eRefactoring Unicode Normalization Forms, performance up.
274 - -
275 80844 for (i = 1; i < length; i++) 82fd08eRefactoring Unicode Normalization Forms, performance up.
276 - { 60f11b8Use SASLprep to normalize passwords for SCRAM authentication.
277 - /* -
278 - * Per Unicode (https://www.unicode.org/reports/tr15/tr15-18.html) 82fd08eRefactoring Unicode Normalization Forms, performance up.
279 - * annex 4, a sequence of two adjacent characters in a string is an 82fd08eRefactoring Unicode Normalization Forms, performance up.
280 - * exchangeable pair if the combining class (from the Unicode 82fd08eRefactoring Unicode Normalization Forms, performance up.
281 - * Character Database) for the first character is greater than the 82fd08eRefactoring Unicode Normalization Forms, performance up.
282 - * combining class for the second, and the second is not a starter. A 82fd08eRefactoring Unicode Normalization Forms, performance up.
283 - * character is a starter if its combining class is 0. 82fd08eRefactoring Unicode Normalization Forms, performance up.
284 - */ -
285 68196 if (cur_class[i - 1] <= cur_class[i]) 82fd08eRefactoring Unicode Normalization Forms, performance up.
286 36274 continue; 82fd08eRefactoring Unicode Normalization Forms, performance up.
287 - -
288 - /* exchange can happen */ 82fd08eRefactoring Unicode Normalization Forms, performance up.
289 31922 tmp = decomps[i - 1]; 82fd08eRefactoring Unicode Normalization Forms, performance up.
290 31922 decomps[i - 1] = decomps[i]; 82fd08eRefactoring Unicode Normalization Forms, performance up.
291 31922 decomps[i] = tmp; 82fd08eRefactoring Unicode Normalization Forms, performance up.
292 - -
293 31922 ccc = cur_class[i - 1]; 82fd08eRefactoring Unicode Normalization Forms, performance up.
294 31922 cur_class[i - 1] = cur_class[i]; 82fd08eRefactoring Unicode Normalization Forms, performance up.
295 31922 cur_class[i] = ccc; 82fd08eRefactoring Unicode Normalization Forms, performance up.
296 - -
297 - /* backtrack to check again */ 82fd08eRefactoring Unicode Normalization Forms, performance up.
298 31922 if (i > 1) 82fd08eRefactoring Unicode Normalization Forms, performance up.
299 19840 i -= 2; 82fd08eRefactoring Unicode Normalization Forms, performance up.
300 - } 60f11b8Use SASLprep to normalize passwords for SCRAM authentication.
301 12648 } 82fd08eRefactoring Unicode Normalization Forms, performance up.
decomposition() lines 304-430
Modified Lines Coverage: 50/52 lines (96.2%)
LineHitsSourceCommit
304 80136 decomposition(const char32_t *p, char32_t *decomps, 82fd08eRefactoring Unicode Normalization Forms, performance up.
305 - uint8 *cur_class, bool compat) 82fd08eRefactoring Unicode Normalization Forms, performance up.
306 - { 82fd08eRefactoring Unicode Normalization Forms, performance up.
307 - int length; 82fd08eRefactoring Unicode Normalization Forms, performance up.
308 - uint32 l, 82fd08eRefactoring Unicode Normalization Forms, performance up.
309 - v, 82fd08eRefactoring Unicode Normalization Forms, performance up.
310 - tindex, 82fd08eRefactoring Unicode Normalization Forms, performance up.
311 - sindex; 82fd08eRefactoring Unicode Normalization Forms, performance up.
312 - char32_t cp, 82fd08eRefactoring Unicode Normalization Forms, performance up.
313 - *next_after_starter; 82fd08eRefactoring Unicode Normalization Forms, performance up.
314 - const char32_t *cps; 82fd08eRefactoring Unicode Normalization Forms, performance up.
315 - const pg_unicode_decomposition *entry; 82fd08eRefactoring Unicode Normalization Forms, performance up.
316 - uint8 *next_after_class; 82fd08eRefactoring Unicode Normalization Forms, performance up.
317 - -
318 80136 next_after_starter = decomps; 82fd08eRefactoring Unicode Normalization Forms, performance up.
319 80136 next_after_class = cur_class; 82fd08eRefactoring Unicode Normalization Forms, performance up.
320 - -
321 204588 while ((cp = *p++)) 82fd08eRefactoring Unicode Normalization Forms, performance up.
322 - { 60f11b8Use SASLprep to normalize passwords for SCRAM authentication.
323 - /* 82fd08eRefactoring Unicode Normalization Forms, performance up.
324 - * Fast path for Hangul characters not stored in tables to save memory 82fd08eRefactoring Unicode Normalization Forms, performance up.
325 - * as decomposition is algorithmic. See 82fd08eRefactoring Unicode Normalization Forms, performance up.
326 - * https://www.unicode.org/reports/tr15/tr15-18.html, annex 10 for 82fd08eRefactoring Unicode Normalization Forms, performance up.
327 - * details on the matter. 82fd08eRefactoring Unicode Normalization Forms, performance up.
328 - */ 82fd08eRefactoring Unicode Normalization Forms, performance up.
329 124452 if (cp >= SBASE && cp < SBASE + SCOUNT) 82fd08eRefactoring Unicode Normalization Forms, performance up.
330 - { -
331 45064 sindex = cp - SBASE; 82fd08eRefactoring Unicode Normalization Forms, performance up.
332 45064 l = LBASE + sindex / (VCOUNT * TCOUNT); 82fd08eRefactoring Unicode Normalization Forms, performance up.
333 45064 v = VBASE + (sindex % (VCOUNT * TCOUNT)) / TCOUNT; 82fd08eRefactoring Unicode Normalization Forms, performance up.
334 45064 tindex = sindex % TCOUNT; 82fd08eRefactoring Unicode Normalization Forms, performance up.
335 - -
336 45064 if (decomps - next_after_starter > 1) 82fd08eRefactoring Unicode Normalization Forms, performance up.
337 0 unicode_canonical_reorder(next_after_starter, decomps, 82fd08eRefactoring Unicode Normalization Forms, performance up.
338 - next_after_class); 82fd08eRefactoring Unicode Normalization Forms, performance up.
339 - -
340 45064 *decomps++ = l; 82fd08eRefactoring Unicode Normalization Forms, performance up.
341 45064 *cur_class++ = 0; 82fd08eRefactoring Unicode Normalization Forms, performance up.
342 - 82fd08eRefactoring Unicode Normalization Forms, performance up.
343 45064 *decomps++ = v; 82fd08eRefactoring Unicode Normalization Forms, performance up.
344 45064 *cur_class++ = 0; 82fd08eRefactoring Unicode Normalization Forms, performance up.
345 - -
346 45064 if (tindex != 0) 82fd08eRefactoring Unicode Normalization Forms, performance up.
347 - { -
348 43092 *decomps++ = TBASE + tindex; 82fd08eRefactoring Unicode Normalization Forms, performance up.
349 43092 *cur_class++ = 0; 82fd08eRefactoring Unicode Normalization Forms, performance up.
350 - } -
351 - -
352 45064 next_after_class = cur_class; 82fd08eRefactoring Unicode Normalization Forms, performance up.
353 45064 next_after_starter = decomps; 82fd08eRefactoring Unicode Normalization Forms, performance up.
354 - -
355 45064 continue; 82fd08eRefactoring Unicode Normalization Forms, performance up.
356 - } 82fd08eRefactoring Unicode Normalization Forms, performance up.
357 - -
358 79388 entry = get_code_entry(cp); 82fd08eRefactoring Unicode Normalization Forms, performance up.
359 - 82fd08eRefactoring Unicode Normalization Forms, performance up.
360 - /* 82fd08eRefactoring Unicode Normalization Forms, performance up.
361 - * Just fill in with the current decomposition if there are no 82fd08eRefactoring Unicode Normalization Forms, performance up.
362 - * decomposition codes. A NULL entry is equivalent to a character 82fd08eRefactoring Unicode Normalization Forms, performance up.
363 - * with class 0 and no decompositions, so just leave also in this 82fd08eRefactoring Unicode Normalization Forms, performance up.
364 - * case. 82fd08eRefactoring Unicode Normalization Forms, performance up.
365 - */ 82fd08eRefactoring Unicode Normalization Forms, performance up.
366 79388 if (entry == NULL || entry->dec_index == 0 82fd08eRefactoring Unicode Normalization Forms, performance up.
367 27988 || (!compat && DECOMPOSITION_IS_COMPAT(entry))) 82fd08eRefactoring Unicode Normalization Forms, performance up.
368 - { -
369 59104 *decomps++ = cp; 82fd08eRefactoring Unicode Normalization Forms, performance up.
370 - 82fd08eRefactoring Unicode Normalization Forms, performance up.
371 59104 if (entry != NULL && entry->comb_class > 0) 82fd08eRefactoring Unicode Normalization Forms, performance up.
372 - { 82fd08eRefactoring Unicode Normalization Forms, performance up.
373 34372 *cur_class++ = entry->comb_class; 82fd08eRefactoring Unicode Normalization Forms, performance up.
374 34372 continue; 82fd08eRefactoring Unicode Normalization Forms, performance up.
375 - } 82fd08eRefactoring Unicode Normalization Forms, performance up.
376 - 82fd08eRefactoring Unicode Normalization Forms, performance up.
377 24732 *cur_class++ = 0; 82fd08eRefactoring Unicode Normalization Forms, performance up.
378 - 82fd08eRefactoring Unicode Normalization Forms, performance up.
379 24732 if (decomps - next_after_starter > 1) 82fd08eRefactoring Unicode Normalization Forms, performance up.
380 8528 unicode_canonical_reorder(next_after_starter, decomps - 1, 82fd08eRefactoring Unicode Normalization Forms, performance up.
381 - next_after_class); 82fd08eRefactoring Unicode Normalization Forms, performance up.
382 - 82fd08eRefactoring Unicode Normalization Forms, performance up.
383 24732 next_after_class = cur_class; 82fd08eRefactoring Unicode Normalization Forms, performance up.
384 24732 next_after_starter = decomps; 82fd08eRefactoring Unicode Normalization Forms, performance up.
385 - 82fd08eRefactoring Unicode Normalization Forms, performance up.
386 24732 continue; 82fd08eRefactoring Unicode Normalization Forms, performance up.
387 - } -
388 - -
389 - /* 82fd08eRefactoring Unicode Normalization Forms, performance up.
390 - * Recursion is not required in the decomposition; the data was 82fd08eRefactoring Unicode Normalization Forms, performance up.
391 - * expanded in advance during the formation of the decomposition 82fd08eRefactoring Unicode Normalization Forms, performance up.
392 - * tables. 82fd08eRefactoring Unicode Normalization Forms, performance up.
393 - */ 82fd08eRefactoring Unicode Normalization Forms, performance up.
394 20284 if (!compat) 82fd08eRefactoring Unicode Normalization Forms, performance up.
395 6290 cps = get_code_decomposition(entry, &length); 82fd08eRefactoring Unicode Normalization Forms, performance up.
396 - else 82fd08eRefactoring Unicode Normalization Forms, performance up.
397 13994 cps = get_code_compat_decomposition(entry, &length); 82fd08eRefactoring Unicode Normalization Forms, performance up.
398 - -
399 53578 for (int i = 0; i < length; i++) 82fd08eRefactoring Unicode Normalization Forms, performance up.
400 - { 82fd08eRefactoring Unicode Normalization Forms, performance up.
401 - char32_t ccc; 82fd08eRefactoring Unicode Normalization Forms, performance up.
402 33294 const char32_t lcode = cps[i]; 82fd08eRefactoring Unicode Normalization Forms, performance up.
403 - -
404 33294 ccc = get_canonical_class(lcode); 82fd08eRefactoring Unicode Normalization Forms, performance up.
405 - -
406 33294 *decomps++ = lcode; 82fd08eRefactoring Unicode Normalization Forms, performance up.
407 33294 *cur_class++ = ccc; 82fd08eRefactoring Unicode Normalization Forms, performance up.
408 - -
409 33294 if (ccc == 0) 82fd08eRefactoring Unicode Normalization Forms, performance up.
410 - { 82fd08eRefactoring Unicode Normalization Forms, performance up.
411 23220 char32_t *starter = decomps - 1; 82fd08eRefactoring Unicode Normalization Forms, performance up.
412 - 82fd08eRefactoring Unicode Normalization Forms, performance up.
413 23220 if (starter - next_after_starter > 1) 82fd08eRefactoring Unicode Normalization Forms, performance up.
414 0 unicode_canonical_reorder(next_after_starter, starter, 82fd08eRefactoring Unicode Normalization Forms, performance up.
415 - next_after_class); 82fd08eRefactoring Unicode Normalization Forms, performance up.
416 - -
417 23220 next_after_starter = decomps; 82fd08eRefactoring Unicode Normalization Forms, performance up.
418 23220 next_after_class = cur_class; 82fd08eRefactoring Unicode Normalization Forms, performance up.
419 - } 82fd08eRefactoring Unicode Normalization Forms, performance up.
420 - } 82fd08eRefactoring Unicode Normalization Forms, performance up.
421 - } -
422 - 82fd08eRefactoring Unicode Normalization Forms, performance up.
423 80136 if (decomps - next_after_starter > 1) 82fd08eRefactoring Unicode Normalization Forms, performance up.
424 4120 unicode_canonical_reorder(next_after_starter, decomps, 82fd08eRefactoring Unicode Normalization Forms, performance up.
425 - next_after_class); 82fd08eRefactoring Unicode Normalization Forms, performance up.
426 - 82fd08eRefactoring Unicode Normalization Forms, performance up.
427 80136 *decomps = '\0'; 82fd08eRefactoring Unicode Normalization Forms, performance up.
428 - 82fd08eRefactoring Unicode Normalization Forms, performance up.
429 80136 return decomps; 82fd08eRefactoring Unicode Normalization Forms, performance up.
430 - } -
unicode_normalize() lines 442-553
Modified Lines Coverage: 38/43 lines (88.4%)
LineHitsSourceCommit
442 - unicode_normalize(UnicodeNormalizationForm form, const char32_t *input) -
443 - { -
444 - bool compat = (form == UNICODE_NFKC || form == UNICODE_NFKD); -
445 - bool recompose = (form == UNICODE_NFC || form == UNICODE_NFKC); -
446 - int decomp_len, 82fd08eRefactoring Unicode Normalization Forms, performance up.
447 - i, 82fd08eRefactoring Unicode Normalization Forms, performance up.
448 - w; 82fd08eRefactoring Unicode Normalization Forms, performance up.
449 - char32_t *decomps, 82fd08eRefactoring Unicode Normalization Forms, performance up.
450 - *decomps_end, 82fd08eRefactoring Unicode Normalization Forms, performance up.
451 - *starter, 82fd08eRefactoring Unicode Normalization Forms, performance up.
452 - composed; 82fd08eRefactoring Unicode Normalization Forms, performance up.
453 - uint8 *classes, 82fd08eRefactoring Unicode Normalization Forms, performance up.
454 - prev_ccc; 82fd08eRefactoring Unicode Normalization Forms, performance up.
455 - uint8 class_buf[512]; 82fd08eRefactoring Unicode Normalization Forms, performance up.
456 - -
457 - /* -
458 - * Calculate how many characters long the decomposed version will be. -
459 - */ -
460 80136 decomp_len = get_decomposed_size(input, compat); 82fd08eRefactoring Unicode Normalization Forms, performance up.
461 - -
462 80136 decomps = (char32_t *) ALLOC((decomp_len + 1) * sizeof(char32_t)); 82fd08eRefactoring Unicode Normalization Forms, performance up.
463 80136 if (decomps == NULL) 82fd08eRefactoring Unicode Normalization Forms, performance up.
464 - return NULL; -
465 - -
466 - /* -
467 - * We will cache all combining classes to reduce the number of visits to 82fd08eRefactoring Unicode Normalization Forms, performance up.
468 - * data tables. 82fd08eRefactoring Unicode Normalization Forms, performance up.
469 - */ -
470 80136 if (decomp_len <= sizeof(class_buf)) 82fd08eRefactoring Unicode Normalization Forms, performance up.
471 - { 60f11b8Use SASLprep to normalize passwords for SCRAM authentication.
472 80136 classes = class_buf; 82fd08eRefactoring Unicode Normalization Forms, performance up.
473 - } 82fd08eRefactoring Unicode Normalization Forms, performance up.
474 - else 82fd08eRefactoring Unicode Normalization Forms, performance up.
475 - { -
476 0 classes = (uint8 *) ALLOC(decomp_len * sizeof(uint8)); 82fd08eRefactoring Unicode Normalization Forms, performance up.
477 0 if (classes == NULL) 82fd08eRefactoring Unicode Normalization Forms, performance up.
478 - { 82fd08eRefactoring Unicode Normalization Forms, performance up.
479 0 FREE(decomps); 82fd08eRefactoring Unicode Normalization Forms, performance up.
480 0 return NULL; 82fd08eRefactoring Unicode Normalization Forms, performance up.
481 - } 82fd08eRefactoring Unicode Normalization Forms, performance up.
482 - } 82fd08eRefactoring Unicode Normalization Forms, performance up.
483 - -
484 80136 decomps_end = decomposition(input, decomps, classes, compat); 82fd08eRefactoring Unicode Normalization Forms, performance up.
485 - -
486 80136 if (!recompose) 82fd08eRefactoring Unicode Normalization Forms, performance up.
487 40068 goto done; 82fd08eRefactoring Unicode Normalization Forms, performance up.
488 - -
489 40068 starter = NULL; 82fd08eRefactoring Unicode Normalization Forms, performance up.
490 40068 decomp_len = (int) (decomps_end - decomps); 82fd08eRefactoring Unicode Normalization Forms, performance up.
491 - -
492 - /* 82fd08eRefactoring Unicode Normalization Forms, performance up.
493 - * Find the first starter. This is necessary in order to avoid checking 82fd08eRefactoring Unicode Normalization Forms, performance up.
494 - * for the presence of a starter in the main recomposition cycle. 82fd08eRefactoring Unicode Normalization Forms, performance up.
495 - */ 82fd08eRefactoring Unicode Normalization Forms, performance up.
496 40110 for (i = 0; i < decomp_len; i++) 82fd08eRefactoring Unicode Normalization Forms, performance up.
497 - { 82fd08eRefactoring Unicode Normalization Forms, performance up.
498 40094 if (classes[i] == 0) 82fd08eRefactoring Unicode Normalization Forms, performance up.
499 - { 82fd08eRefactoring Unicode Normalization Forms, performance up.
500 40052 starter = &decomps[i]; 82fd08eRefactoring Unicode Normalization Forms, performance up.
501 40052 i += 1; 82fd08eRefactoring Unicode Normalization Forms, performance up.
502 40052 break; 82fd08eRefactoring Unicode Normalization Forms, performance up.
503 - } 82fd08eRefactoring Unicode Normalization Forms, performance up.
504 - } -
505 - -
506 40068 if (starter == NULL) 82fd08eRefactoring Unicode Normalization Forms, performance up.
507 16 goto done; 82fd08eRefactoring Unicode Normalization Forms, performance up.
508 - 82fd08eRefactoring Unicode Normalization Forms, performance up.
509 40052 prev_ccc = 0; 82fd08eRefactoring Unicode Normalization Forms, performance up.
510 - -
511 - /* -
512 - * The last phase of NFC and NFKC is the recomposition of the reordered -
513 - * Unicode string using combining classes. The recomposed string cannot be -
514 - * longer than the decomposed one, so make the allocation of the output -
515 - * string based on that assumption. -
516 - */ -
517 112767 for (w = i; i < decomp_len; i++) 82fd08eRefactoring Unicode Normalization Forms, performance up.
518 - { -
519 72715 char32_t ch = decomps[i]; 82fd08eRefactoring Unicode Normalization Forms, performance up.
520 72715 uint8 ccc = classes[i]; 82fd08eRefactoring Unicode Normalization Forms, performance up.
521 - -
522 72715 if (prev_ccc != 0 && prev_ccc >= ccc) 82fd08eRefactoring Unicode Normalization Forms, performance up.
523 - { -
524 7034 if (ccc == 0) 82fd08eRefactoring Unicode Normalization Forms, performance up.
525 4240 starter = &decomps[w]; 82fd08eRefactoring Unicode Normalization Forms, performance up.
526 - 82fd08eRefactoring Unicode Normalization Forms, performance up.
527 7034 decomps[w++] = ch; 82fd08eRefactoring Unicode Normalization Forms, performance up.
528 7034 continue; 82fd08eRefactoring Unicode Normalization Forms, performance up.
529 - } -
530 - 82fd08eRefactoring Unicode Normalization Forms, performance up.
531 65681 if (recompose_code(*starter, ch, &composed)) 82fd08eRefactoring Unicode Normalization Forms, performance up.
532 - { -
533 51684 *starter = composed; 82fd08eRefactoring Unicode Normalization Forms, performance up.
534 51684 prev_ccc = 0; 82fd08eRefactoring Unicode Normalization Forms, performance up.
535 51684 continue; 82fd08eRefactoring Unicode Normalization Forms, performance up.
536 - } -
537 - 82fd08eRefactoring Unicode Normalization Forms, performance up.
538 13997 if (ccc == 0) 82fd08eRefactoring Unicode Normalization Forms, performance up.
539 1842 starter = &decomps[w]; 82fd08eRefactoring Unicode Normalization Forms, performance up.
540 - 82fd08eRefactoring Unicode Normalization Forms, performance up.
541 13997 decomps[w++] = ch; 82fd08eRefactoring Unicode Normalization Forms, performance up.
542 13997 prev_ccc = ccc; 82fd08eRefactoring Unicode Normalization Forms, performance up.
543 - } -
544 - -
545 40052 decomps[w] = '\0'; 82fd08eRefactoring Unicode Normalization Forms, performance up.
546 - 82fd08eRefactoring Unicode Normalization Forms, performance up.
547 80136 done: 82fd08eRefactoring Unicode Normalization Forms, performance up.
548 - 82fd08eRefactoring Unicode Normalization Forms, performance up.
549 80136 if (classes != class_buf) 82fd08eRefactoring Unicode Normalization Forms, performance up.
550 0 FREE(classes); 82fd08eRefactoring Unicode Normalization Forms, performance up.
551 - -
552 80136 return decomps; 82fd08eRefactoring Unicode Normalization Forms, performance up.
553 - } -