diff options
Diffstat (limited to 'localedata/unicode-gen')
-rw-r--r-- | localedata/unicode-gen/Makefile | 10 | ||||
-rwxr-xr-x | localedata/unicode-gen/gen_unicode_ctype.py | 18 | ||||
-rw-r--r-- | localedata/unicode-gen/unicode_utils.py | 14 |
3 files changed, 37 insertions, 5 deletions
diff --git a/localedata/unicode-gen/Makefile b/localedata/unicode-gen/Makefile index 5b7305d54e..4cb1fabc77 100644 --- a/localedata/unicode-gen/Makefile +++ b/localedata/unicode-gen/Makefile @@ -41,7 +41,7 @@ PYTHON3 = python3 WGET = wget DOWNLOADS = UnicodeData.txt DerivedCoreProperties.txt EastAsianWidth.txt -GENERATED = i18n UTF-8 translit_combining translit_compat translit_circle translit_cjk_compat translit_font translit_fraction +GENERATED = i18n tr_TR UTF-8 translit_combining translit_compat translit_circle translit_cjk_compat translit_font translit_fraction REPORTS = i18n-report UTF-8-report all: $(GENERATED) @@ -50,6 +50,7 @@ check: check-i18n check-UTF-8 install: cp -p i18n ../locales/i18n + cp -p tr_TR ../locales/tr_TR cp -p UTF-8 ../charmaps/UTF-8 cp -p translit_combining ../locales/translit_combining cp -p translit_compat ../locales/translit_compat @@ -82,6 +83,13 @@ check-i18n: i18n-report i18n-report; \ then echo manual verification required; false; else true; fi +tr_TR: UnicodeData.txt DerivedCoreProperties.txt +tr_TR: ../locales/tr_TR # Preserve non-ctype information. +tr_TR: gen_unicode_ctype.py + $(PYTHON3) gen_unicode_ctype.py -u UnicodeData.txt \ + -d DerivedCoreProperties.txt -i ../locales/tr_TR -o $@ \ + --unicode_version $(UNICODE_VERSION) --turkish + UTF-8: UnicodeData.txt EastAsianWidth.txt UTF-8: utf8_gen.py $(PYTHON3) utf8_gen.py UnicodeData.txt EastAsianWidth.txt diff --git a/localedata/unicode-gen/gen_unicode_ctype.py b/localedata/unicode-gen/gen_unicode_ctype.py index 0f064f5ba5..bcb50bf9a5 100755 --- a/localedata/unicode-gen/gen_unicode_ctype.py +++ b/localedata/unicode-gen/gen_unicode_ctype.py @@ -196,7 +196,7 @@ def output_tail(i18n_file, tail=''): else: i18n_file.write('END LC_CTYPE\n') -def output_tables(i18n_file, unicode_version): +def output_tables(i18n_file, unicode_version, turkish): '''Write the new LC_CTYPE character classes to the output file''' i18n_file.write('% The following is the 14652 i18n fdcc-set ' + 'LC_CTYPE category.\n') @@ -240,8 +240,14 @@ def output_tables(i18n_file, unicode_version): + '(sections 7.25.2.1.12 and 6.4.4.1).\n') output_charclass(i18n_file, 'xdigit', unicode_utils.is_xdigit) output_charclass(i18n_file, 'blank', unicode_utils.is_blank) - output_charmap(i18n_file, 'toupper', unicode_utils.to_upper) - output_charmap(i18n_file, 'tolower', unicode_utils.to_lower) + if turkish: + i18n_file.write('% The case conversions reflect ' + + 'Turkish conventions.\n') + output_charmap(i18n_file, 'toupper', unicode_utils.to_upper_turkish) + output_charmap(i18n_file, 'tolower', unicode_utils.to_lower_turkish) + else: + output_charmap(i18n_file, 'toupper', unicode_utils.to_upper) + output_charmap(i18n_file, 'tolower', unicode_utils.to_lower) output_charmap(i18n_file, 'map "totitle";', unicode_utils.to_title) i18n_file.write('% The "combining" class reflects ISO/IEC 10646-1 ' + 'annex B.1\n') @@ -298,6 +304,10 @@ if __name__ == "__main__": required=True, type=str, help='The Unicode version of the input files used.') + PARSER.add_argument( + '--turkish', + action='store_true', + help='Use Turkish case conversions.') ARGS = PARSER.parse_args() unicode_utils.fill_attributes( @@ -310,5 +320,5 @@ if __name__ == "__main__": (HEAD, TAIL) = read_input_file(ARGS.input_file) with open(ARGS.output_file, mode='w') as I18N_FILE: output_head(I18N_FILE, ARGS.unicode_version, head=HEAD) - output_tables(I18N_FILE, ARGS.unicode_version) + output_tables(I18N_FILE, ARGS.unicode_version, ARGS.turkish) output_tail(I18N_FILE, tail=TAIL) diff --git a/localedata/unicode-gen/unicode_utils.py b/localedata/unicode-gen/unicode_utils.py index ee91582823..26a57ef293 100644 --- a/localedata/unicode-gen/unicode_utils.py +++ b/localedata/unicode-gen/unicode_utils.py @@ -220,6 +220,20 @@ def to_lower(code_point): else: return code_point +def to_upper_turkish(code_point): + '''Returns the code point of the Turkish uppercase version + of the given code point''' + if code_point == 0x0069: + return 0x0130 + return to_upper(code_point) + +def to_lower_turkish(code_point): + '''Returns the code point of the Turkish lowercase version + of the given code point''' + if code_point == 0x0049: + return 0x0131 + return to_lower(code_point) + def to_title(code_point): '''Returns the code point of the titlecase version of the given code point''' |