diff --git a/contrib/unaccent/generate_unaccent_rules.py b/contrib/unaccent/generate_unaccent_rules.py index 71932c8224..bb797fc954 100644 --- a/contrib/unaccent/generate_unaccent_rules.py +++ b/contrib/unaccent/generate_unaccent_rules.py @@ -208,8 +208,8 @@ def special_cases(): """Returns the special cases which are not handled by other methods""" charactersDict = {} - charactersDict[0x2103] = "\xb0C" # DEGREE CELSIUS - charactersDict[0x2109] = "\xb0F" # DEGREE FAHRENHEIT + # Template example (already unnecessary): + #charactersDict[0x2103] = "\xb0C" # DEGREE CELSIUS return charactersDict @@ -252,6 +252,12 @@ def main(args): charactersDict[codepoint.id] = "".join(chr(combining_codepoint.id) for combining_codepoint in get_plain_letters(codepoint, table)) + elif (codepoint.general_category.startswith('N') or codepoint.general_category.startswith('So')) and \ + len(codepoint.combining_ids) > 0 and \ + args.noLigaturesExpansion is False and is_ligature(codepoint, table): + charactersDict[codepoint.id] = "".join(chr(combining_codepoint.id) + for combining_codepoint + in get_plain_letters(codepoint, table)) elif is_mark_to_remove(codepoint): charactersDict[codepoint.id] = None diff --git a/contrib/unaccent/unaccent.rules b/contrib/unaccent/unaccent.rules index 3a5d35627a..9013014d2f 100644 --- a/contrib/unaccent/unaccent.rules +++ b/contrib/unaccent/unaccent.rules @@ -5,7 +5,10 @@ ­ - ® (R) ± +/- +² 2 +³ 3 µ μ +¹ 1 º o » >> ¼ 1/4 @@ -1207,8 +1210,25 @@ ⁈ ?! ⁉ !? ⁎ * +⁰ 0 ⁱ i +⁴ 4 +⁵ 5 +⁶ 6 +⁷ 7 +⁸ 8 +⁹ 9 ⁿ n +₀ 0 +₁ 1 +₂ 2 +₃ 3 +₄ 4 +₅ 5 +₆ 6 +₇ 7 +₈ 8 +₉ 9 ₐ a ₑ e ₒ o @@ -1261,7 +1281,9 @@ ℜ R ℝ R ℞ Rx +℠ SM ℡ TEL +™ TM ℤ Z Ω Ω ℨ Z @@ -1341,6 +1363,26 @@ ∥ || ≪ << ≫ >> +① 1 +② 2 +③ 3 +④ 4 +⑤ 5 +⑥ 6 +⑦ 7 +⑧ 8 +⑨ 9 +⑩ 10 +⑪ 11 +⑫ 12 +⑬ 13 +⑭ 14 +⑮ 15 +⑯ 16 +⑰ 17 +⑱ 18 +⑲ 19 +⑳ 20 ⑴ (1) ⑵ (2) ⑶ (3) @@ -1407,6 +1449,59 @@ ⒳ (x) ⒴ (y) ⒵ (z) +Ⓐ A +Ⓑ B +Ⓒ C +Ⓓ D +Ⓔ E +Ⓕ F +Ⓖ G +Ⓗ H +Ⓘ I +Ⓙ J +Ⓚ K +Ⓛ L +Ⓜ M +Ⓝ N +Ⓞ O +Ⓟ P +Ⓠ Q +Ⓡ R +Ⓢ S +Ⓣ T +Ⓤ U +Ⓥ V +Ⓦ W +Ⓧ X +Ⓨ Y +Ⓩ Z +ⓐ a +ⓑ b +ⓒ c +ⓓ d +ⓔ e +ⓕ f +ⓖ g +ⓗ h +ⓘ i +ⓙ j +ⓚ k +ⓛ l +ⓜ m +ⓝ n +ⓞ o +ⓟ p +ⓠ q +ⓡ r +ⓢ s +ⓣ t +ⓤ u +ⓥ v +ⓦ w +ⓧ x +ⓨ y +ⓩ z +⓪ 0 ⦅ (( ⦆ )) ⩴ ::= @@ -1451,6 +1546,41 @@ 〛 ] 〝 " 〞 " +㉐ PTE +㉑ 21 +㉒ 22 +㉓ 23 +㉔ 24 +㉕ 25 +㉖ 26 +㉗ 27 +㉘ 28 +㉙ 29 +㉚ 30 +㉛ 31 +㉜ 32 +㉝ 33 +㉞ 34 +㉟ 35 +㊱ 36 +㊲ 37 +㊳ 38 +㊴ 39 +㊵ 40 +㊶ 41 +㊷ 42 +㊸ 43 +㊹ 44 +㊺ 45 +㊻ 46 +㊼ 47 +㊽ 48 +㊾ 49 +㊿ 50 +㋌ Hg +㋍ erg +㋎ eV +㋏ LTD ㍱ hPa ㍲ da ㍳ AU @@ -1461,6 +1591,7 @@ ㍺ IU ㎀ pA ㎁ nA +㎂ μA ㎃ mA ㎄ kA ㎅ KB @@ -1470,6 +1601,8 @@ ㎉ kcal ㎊ pF ㎋ nF +㎌ μF +㎍ μg ㎎ mg ㎏ kg ㎐ Hz @@ -1479,6 +1612,7 @@ ㎔ THz ㎙ fm ㎚ nm +㎛ μm ㎜ mm ㎝ cm ㎞ km @@ -1491,17 +1625,22 @@ ㎮ rad/s ㎰ ps ㎱ ns +㎲ μs ㎳ ms ㎴ pV ㎵ nV +㎶ μV ㎷ mV ㎸ kV ㎹ MV ㎺ pW ㎻ nW +㎼ μW ㎽ mW ㎾ kW ㎿ MW +㏀ kΩ +㏁ MΩ ㏂ a.m. ㏃ Bq ㏄ cc @@ -1532,6 +1671,7 @@ ㏝ Wb ㏞ V/m ㏟ A/m +㏿ gal ꚜ ъ ꚝ ь ꜰ F @@ -2667,6 +2807,56 @@ 𝟀 χ 𝟁 ψ 𝟂 ω +𝟎 0 +𝟏 1 +𝟐 2 +𝟑 3 +𝟒 4 +𝟓 5 +𝟔 6 +𝟕 7 +𝟖 8 +𝟗 9 +𝟘 0 +𝟙 1 +𝟚 2 +𝟛 3 +𝟜 4 +𝟝 5 +𝟞 6 +𝟟 7 +𝟠 8 +𝟡 9 +𝟢 0 +𝟣 1 +𝟤 2 +𝟥 3 +𝟦 4 +𝟧 5 +𝟨 6 +𝟩 7 +𝟪 8 +𝟫 9 +𝟬 0 +𝟭 1 +𝟮 2 +𝟯 3 +𝟰 4 +𝟱 5 +𝟲 6 +𝟳 7 +𝟴 8 +𝟵 9 +𝟶 0 +𝟷 1 +𝟸 2 +𝟹 3 +𝟺 4 +𝟻 5 +𝟼 6 +𝟽 7 +𝟾 8 +𝟿 9 🄀 0. 🄁 0, 🄂 1, @@ -2704,3 +2894,53 @@ 🄧 (X) 🄨 (Y) 🄩 (Z) +🄫 C +🄬 R +🄭 CD +🄮 WZ +🄰 A +🄱 B +🄲 C +🄳 D +🄴 E +🄵 F +🄶 G +🄷 H +🄸 I +🄹 J +🄺 K +🄻 L +🄼 M +🄽 N +🄾 O +🄿 P +🅀 Q +🅁 R +🅂 S +🅃 T +🅄 U +🅅 V +🅆 W +🅇 X +🅈 Y +🅉 Z +🅊 HV +🅋 MV +🅌 SD +🅍 SS +🅎 PPV +🅏 WC +🅪 MC +🅫 MD +🅬 MR +🆐 DJ +🯰 0 +🯱 1 +🯲 2 +🯳 3 +🯴 4 +🯵 5 +🯶 6 +🯷 7 +🯸 8 +🯹 9