Set width of JUNGSEONG/JONGSEONG characters from UD7B0 to UD7FB to 0 [BZ #26120]

Reviewed-by: Carlos O'Donell <carlos@redhat.com>
author: Mike FABIAN <mfabian@redhat.com> 2020-06-16 08:29:40 +0200
committer: Mike FABIAN <mfabian@redhat.com> 2020-06-26 09:54:43 +0200
commit: 6e540caa21616d5ec5511fafb22819204525138e (patch)
tree: 47cb8da2e88d1023bfd4f64b67e28a8eadc48189 /localedata
parent: 1d21fb1061cbeb50414a8f371abb36548d90f150 (diff)
download: glibc-6e540caa21616d5ec5511fafb22819204525138e.tar
glibc-6e540caa21616d5ec5511fafb22819204525138e.tar.gz
glibc-6e540caa21616d5ec5511fafb22819204525138e.tar.bz2
glibc-6e540caa21616d5ec5511fafb22819204525138e.zip
10 files changed, 18 insertions, 9 deletions
diff --git a/localedata/charmaps/UTF-8 b/localedata/charmaps/UTF-8
index 14c5d4fa33..8cce47cd97 100644
--- a/localedata/charmaps/UTF-8
+++ b/localedata/charmaps/UTF-8
@@ -48920,6 +48920,8 @@ WIDTH
 <UABE8>	0
 <UABED>	0
 <UAC00>...<UD7A3>	2
+<UD7B0>...<UD7C6>	0
+<UD7CB>...<UD7FB>	0
 <UF900>...<UFA6D>	2
 <UFA70>...<UFAD9>	2
 <UFB1E>	0
diff --git a/localedata/locales/i18n_ctype b/localedata/locales/i18n_ctype
index 6f078a101d..c63e0790fc 100644
--- a/localedata/locales/i18n_ctype
+++ b/localedata/locales/i18n_ctype
@@ -26,7 +26,7 @@ fax       ""
 language  ""
 territory "Earth"
 revision  "13.0.0"
-date      "2020-04-14"
+date      "2020-06-25"
 category  "i18n:2012";LC_CTYPE
 END LC_IDENTIFICATION
 
diff --git a/localedata/locales/tr_TR b/localedata/locales/tr_TR
index d5785ceca1..7dbb923228 100644
--- a/localedata/locales/tr_TR
+++ b/localedata/locales/tr_TR
@@ -43,7 +43,7 @@ fax        ""
 language   "Turkish"
 territory  "Turkey"
 revision   "1.0"
-date       "2020-04-14"
+date       "2020-06-25"
 
 category "i18n:2012";LC_IDENTIFICATION
 category "i18n:2012";LC_CTYPE
diff --git a/localedata/locales/translit_circle b/localedata/locales/translit_circle
index 0f1e81541c..5c07b44532 100644
--- a/localedata/locales/translit_circle
+++ b/localedata/locales/translit_circle
@@ -9,7 +9,7 @@ comment_char %
 % otherwise be governed by that license.
 
 % Transliterations of encircled characters.
-% Generated automatically from UnicodeData.txt by gen_translit_circle.py on 2020-04-14 for Unicode 13.0.0.
+% Generated automatically from UnicodeData.txt by gen_translit_circle.py on 2020-06-25 for Unicode 13.0.0.
 
 LC_CTYPE
 
diff --git a/localedata/locales/translit_cjk_compat b/localedata/locales/translit_cjk_compat
index 17b74134fc..ee0d7f83c6 100644
--- a/localedata/locales/translit_cjk_compat
+++ b/localedata/locales/translit_cjk_compat
@@ -9,7 +9,7 @@ comment_char %
 % otherwise be governed by that license.
 
 % Transliterations of CJK compatibility characters.
-% Generated automatically from UnicodeData.txt by gen_translit_cjk_compat.py on 2020-04-14 for Unicode 13.0.0.
+% Generated automatically from UnicodeData.txt by gen_translit_cjk_compat.py on 2020-06-25 for Unicode 13.0.0.
 
 LC_CTYPE
 
diff --git a/localedata/locales/translit_combining b/localedata/locales/translit_combining
index d5c8bbfe8f..36128f097a 100644
--- a/localedata/locales/translit_combining
+++ b/localedata/locales/translit_combining
@@ -10,7 +10,7 @@ comment_char %
 
 % Transliterations that remove all combining characters (accents,
 % pronounciation marks, etc.).
-% Generated automatically from UnicodeData.txt by gen_translit_combining.py on 2020-04-14 for Unicode 13.0.0.
+% Generated automatically from UnicodeData.txt by gen_translit_combining.py on 2020-06-25 for Unicode 13.0.0.
 
 LC_CTYPE
 
diff --git a/localedata/locales/translit_compat b/localedata/locales/translit_compat
index ff18b02ea3..ac24c4e938 100644
--- a/localedata/locales/translit_compat
+++ b/localedata/locales/translit_compat
@@ -9,7 +9,7 @@ comment_char %
 % otherwise be governed by that license.
 
 % Transliterations of compatibility characters and ligatures.
-% Generated automatically from UnicodeData.txt by gen_translit_compat.py on 2020-04-14 for Unicode 13.0.0.
+% Generated automatically from UnicodeData.txt by gen_translit_compat.py on 2020-06-25 for Unicode 13.0.0.
 
 LC_CTYPE
 
diff --git a/localedata/locales/translit_font b/localedata/locales/translit_font
index e79b0d83f5..680c4ed426 100644
--- a/localedata/locales/translit_font
+++ b/localedata/locales/translit_font
@@ -9,7 +9,7 @@ comment_char %
 % otherwise be governed by that license.
 
 % Transliterations of font equivalents.
-% Generated automatically from UnicodeData.txt by gen_translit_font.py on 2020-04-14 for Unicode 13.0.0.
+% Generated automatically from UnicodeData.txt by gen_translit_font.py on 2020-06-25 for Unicode 13.0.0.
 
 LC_CTYPE
 
diff --git a/localedata/locales/translit_fraction b/localedata/locales/translit_fraction
index 197d57a644..b52244969e 100644
--- a/localedata/locales/translit_fraction
+++ b/localedata/locales/translit_fraction
@@ -9,7 +9,7 @@ comment_char %
 % otherwise be governed by that license.
 
 % Transliterations of fractions.
-% Generated automatically from UnicodeData.txt by gen_translit_fraction.py on 2020-04-14 for Unicode 13.0.0.
+% Generated automatically from UnicodeData.txt by gen_translit_fraction.py on 2020-06-25 for Unicode 13.0.0.
 % The replacements have been surrounded with spaces, because fractions are
 % often preceded by a decimal number and followed by a unit or a math symbol.
 
diff --git a/localedata/unicode-gen/utf8_gen.py b/localedata/unicode-gen/utf8_gen.py
index 17b99ee88d..11c906b92f 100755
--- a/localedata/unicode-gen/utf8_gen.py
+++ b/localedata/unicode-gen/utf8_gen.py
@@ -258,7 +258,13 @@ def process_width(outfile, ulines, elines, plines):
         if key in width_dict:
             del width_dict[key] # default width is 1
     for key in list(range(0x1160, 0x1200)):
-        width_dict[key] = 0
+        # Hangul jungseong and jongseong:
+        if key in unicode_utils.UNICODE_ATTRIBUTES:
+            width_dict[key] = 0
+    for key in list(range(0xD7B0, 0xD800)):
+        # Hangul jungseong and jongseong:
+        if key in unicode_utils.UNICODE_ATTRIBUTES:
+            width_dict[key] = 0
     for key in list(range(0x3248, 0x3250)):
         # These are “A” which means we can decide whether to treat them
         # as “W” or “N” based on context:
@@ -327,6 +333,7 @@ if __name__ == "__main__":
         help='The Unicode version of the input files used.')
     ARGS = PARSER.parse_args()
 
+    unicode_utils.fill_attributes(ARGS.unicode_data_file)
     with open(ARGS.unicode_data_file, mode='r') as UNIDATA_FILE:
         UNICODE_DATA_LINES = UNIDATA_FILE.readlines()
     with open(ARGS.east_asian_with_file, mode='r') as EAST_ASIAN_WIDTH_FILE:
author	Mike FABIAN <mfabian@redhat.com>	2020-06-16 08:29:40 +0200
committer	Mike FABIAN <mfabian@redhat.com>	2020-06-26 09:54:43 +0200
commit	6e540caa21616d5ec5511fafb22819204525138e (patch)
tree	47cb8da2e88d1023bfd4f64b67e28a8eadc48189 /localedata
parent	1d21fb1061cbeb50414a8f371abb36548d90f150 (diff)
download	glibc-6e540caa21616d5ec5511fafb22819204525138e.tar glibc-6e540caa21616d5ec5511fafb22819204525138e.tar.gz glibc-6e540caa21616d5ec5511fafb22819204525138e.tar.bz2 glibc-6e540caa21616d5ec5511fafb22819204525138e.zip