aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2000-05-24 20:22:51 +0000
committerUlrich Drepper <drepper@redhat.com>2000-05-24 20:22:51 +0000
commitacb5ee2e561276d64c6e26ef4b82f59a4db5ae90 (patch)
tree1f7ebfcaf8bf2874ae5cdb6348205dccfd9499c2
parentb7cbee1cb029f6471aa069552a69f04a3d1b4d70 (diff)
downloadglibc-acb5ee2e561276d64c6e26ef4b82f59a4db5ae90.tar
glibc-acb5ee2e561276d64c6e26ef4b82f59a4db5ae90.tar.gz
glibc-acb5ee2e561276d64c6e26ef4b82f59a4db5ae90.tar.bz2
glibc-acb5ee2e561276d64c6e26ef4b82f59a4db5ae90.zip
Update.
2000-05-24 Ulrich Drepper <drepper@redhat.com> * locale/programs/ld-collate.c (struct element_t): Add mbseqorder and wcseqorder members. (struct locale_collate_t): Likewise. (collate_finish): Assign collation sequence value to each character. Create tables for output. (collate_output): Write out tables with collation sequence information. * locale/C-collate.c: Provide C locale data for collation sequence table. * locale/langinfo.h: Add _NL_COLLATE_COLLSEQMB and _NL_COLLATE_COLLSEQWC. * locale/categories.def: Add entries for _NL_COLLATE_COLLSEQMB and _NL_COLLATE_COLLSEQWC. * posix/fnmatch.c: Define SUFFIX and WIDE_CHAR_VERSION before include fnmatch_loop.c. * posix/fnmatch_loop.c: Don't use strcoll while determining whether character is matched by range expression. Use collation sequence table. Outside glibc fall back on simple character value comparison.
-rw-r--r--ChangeLog20
-rw-r--r--locale/C-collate.c82
-rw-r--r--locale/categories.def2
-rw-r--r--locale/langinfo.h2
-rw-r--r--localedata/ChangeLog4
-rw-r--r--localedata/locales/iso14651_t11406
-rw-r--r--posix/fnmatch.c15
-rw-r--r--posix/fnmatch_loop.c135
8 files changed, 1644 insertions, 22 deletions
diff --git a/ChangeLog b/ChangeLog
index 9440cba153..5ce40be794 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,23 @@
+2000-05-24 Ulrich Drepper <drepper@redhat.com>
+
+ * locale/programs/ld-collate.c (struct element_t): Add mbseqorder
+ and wcseqorder members.
+ (struct locale_collate_t): Likewise.
+ (collate_finish): Assign collation sequence value to each character.
+ Create tables for output.
+ (collate_output): Write out tables with collation sequence information.
+ * locale/C-collate.c: Provide C locale data for collation sequence
+ table.
+ * locale/langinfo.h: Add _NL_COLLATE_COLLSEQMB and
+ _NL_COLLATE_COLLSEQWC.
+ * locale/categories.def: Add entries for _NL_COLLATE_COLLSEQMB and
+ _NL_COLLATE_COLLSEQWC.
+ * posix/fnmatch.c: Define SUFFIX and WIDE_CHAR_VERSION before
+ include fnmatch_loop.c.
+ * posix/fnmatch_loop.c: Don't use strcoll while determining whether
+ character is matched by range expression. Use collation sequence
+ table. Outside glibc fall back on simple character value comparison.
+
2000-05-24 Andreas Jaeger <aj@suse.de>
* sysdeps/mips/elf/start.S (ENTRY_POINT): Align stack for double
diff --git a/locale/C-collate.c b/locale/C-collate.c
index 679ed30871..0ad0efe271 100644
--- a/locale/C-collate.c
+++ b/locale/C-collate.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1995, 1996, 1997, 1999 Free Software Foundation, Inc.
+/* Copyright (C) 1995, 1996, 1997, 1999, 2000 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1995.
@@ -20,12 +20,84 @@
#include <endian.h>
#include "localeinfo.h"
+static const char collseqmb[] =
+{
+ '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
+ '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
+ '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
+ '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
+ '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
+ '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
+ '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
+ '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
+ '\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47',
+ '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f',
+ '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57',
+ '\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
+ '\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67',
+ '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f',
+ '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77',
+ '\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f',
+ '\x80', '\x81', '\x82', '\x83', '\x84', '\x85', '\x86', '\x87',
+ '\x88', '\x89', '\x8a', '\x8b', '\x8c', '\x8d', '\x8e', '\x8f',
+ '\x90', '\x91', '\x92', '\x93', '\x94', '\x95', '\x96', '\x97',
+ '\x98', '\x99', '\x9a', '\x9b', '\x9c', '\x9d', '\x9e', '\x9f',
+ '\xa0', '\xa1', '\xa2', '\xa3', '\xa4', '\xa5', '\xa6', '\xa7',
+ '\xa8', '\xa9', '\xaa', '\xab', '\xac', '\xad', '\xae', '\xaf',
+ '\xb0', '\xb1', '\xb2', '\xb3', '\xb4', '\xb5', '\xb6', '\xb7',
+ '\xb8', '\xb9', '\xba', '\xbb', '\xbc', '\xbd', '\xbe', '\xbf',
+ '\xc0', '\xc1', '\xc2', '\xc3', '\xc4', '\xc5', '\xc6', '\xc7',
+ '\xc8', '\xc9', '\xca', '\xcb', '\xcc', '\xcd', '\xce', '\xcf',
+ '\xd0', '\xd1', '\xd2', '\xd3', '\xd4', '\xd5', '\xd6', '\xd7',
+ '\xd8', '\xd9', '\xda', '\xdb', '\xdc', '\xdd', '\xde', '\xdf',
+ '\xe0', '\xe1', '\xe2', '\xe3', '\xe4', '\xe5', '\xe6', '\xe7',
+ '\xe8', '\xe9', '\xea', '\xeb', '\xec', '\xed', '\xee', '\xef',
+ '\xf0', '\xf1', '\xf2', '\xf3', '\xf4', '\xf5', '\xf6', '\xf7',
+ '\xf8', '\xf9', '\xfa', '\xfb', '\xfc', '\xfd', '\xfe', '\xff'
+};
+
+static const uint32_t collseqwc[] =
+{
+ L'\x00', L'\x01', L'\x02', L'\x03', L'\x04', L'\x05', L'\x06', L'\x07',
+ L'\x08', L'\x09', L'\x0a', L'\x0b', L'\x0c', L'\x0d', L'\x0e', L'\x0f',
+ L'\x10', L'\x11', L'\x12', L'\x13', L'\x14', L'\x15', L'\x16', L'\x17',
+ L'\x18', L'\x19', L'\x1a', L'\x1b', L'\x1c', L'\x1d', L'\x1e', L'\x1f',
+ L'\x20', L'\x21', L'\x22', L'\x23', L'\x24', L'\x25', L'\x26', L'\x27',
+ L'\x28', L'\x29', L'\x2a', L'\x2b', L'\x2c', L'\x2d', L'\x2e', L'\x2f',
+ L'\x30', L'\x31', L'\x32', L'\x33', L'\x34', L'\x35', L'\x36', L'\x37',
+ L'\x38', L'\x39', L'\x3a', L'\x3b', L'\x3c', L'\x3d', L'\x3e', L'\x3f',
+ L'\x40', L'\x41', L'\x42', L'\x43', L'\x44', L'\x45', L'\x46', L'\x47',
+ L'\x48', L'\x49', L'\x4a', L'\x4b', L'\x4c', L'\x4d', L'\x4e', L'\x4f',
+ L'\x50', L'\x51', L'\x52', L'\x53', L'\x54', L'\x55', L'\x56', L'\x57',
+ L'\x58', L'\x59', L'\x5a', L'\x5b', L'\x5c', L'\x5d', L'\x5e', L'\x5f',
+ L'\x60', L'\x61', L'\x62', L'\x63', L'\x64', L'\x65', L'\x66', L'\x67',
+ L'\x68', L'\x69', L'\x6a', L'\x6b', L'\x6c', L'\x6d', L'\x6e', L'\x6f',
+ L'\x70', L'\x71', L'\x72', L'\x73', L'\x74', L'\x75', L'\x76', L'\x77',
+ L'\x78', L'\x79', L'\x7a', L'\x7b', L'\x7c', L'\x7d', L'\x7e', L'\x7f',
+ L'\x80', L'\x81', L'\x82', L'\x83', L'\x84', L'\x85', L'\x86', L'\x87',
+ L'\x88', L'\x89', L'\x8a', L'\x8b', L'\x8c', L'\x8d', L'\x8e', L'\x8f',
+ L'\x90', L'\x91', L'\x92', L'\x93', L'\x94', L'\x95', L'\x96', L'\x97',
+ L'\x98', L'\x99', L'\x9a', L'\x9b', L'\x9c', L'\x9d', L'\x9e', L'\x9f',
+ L'\xa0', L'\xa1', L'\xa2', L'\xa3', L'\xa4', L'\xa5', L'\xa6', L'\xa7',
+ L'\xa8', L'\xa9', L'\xaa', L'\xab', L'\xac', L'\xad', L'\xae', L'\xaf',
+ L'\xb0', L'\xb1', L'\xb2', L'\xb3', L'\xb4', L'\xb5', L'\xb6', L'\xb7',
+ L'\xb8', L'\xb9', L'\xba', L'\xbb', L'\xbc', L'\xbd', L'\xbe', L'\xbf',
+ L'\xc0', L'\xc1', L'\xc2', L'\xc3', L'\xc4', L'\xc5', L'\xc6', L'\xc7',
+ L'\xc8', L'\xc9', L'\xca', L'\xcb', L'\xcc', L'\xcd', L'\xce', L'\xcf',
+ L'\xd0', L'\xd1', L'\xd2', L'\xd3', L'\xd4', L'\xd5', L'\xd6', L'\xd7',
+ L'\xd8', L'\xd9', L'\xda', L'\xdb', L'\xdc', L'\xdd', L'\xde', L'\xdf',
+ L'\xe0', L'\xe1', L'\xe2', L'\xe3', L'\xe4', L'\xe5', L'\xe6', L'\xe7',
+ L'\xe8', L'\xe9', L'\xea', L'\xeb', L'\xec', L'\xed', L'\xee', L'\xef',
+ L'\xf0', L'\xf1', L'\xf2', L'\xf3', L'\xf4', L'\xf5', L'\xf6', L'\xf7',
+ L'\xf8', L'\xf9', L'\xfa', L'\xfb', L'\xfc', L'\xfd', L'\xfe', L'\xff'
+};
+
const struct locale_data _nl_C_LC_COLLATE =
{
_nl_C_name,
NULL, 0, 0, /* no file mapped */
UNDELETABLE,
- 16,
+ 18,
{
{ word: 0 },
{ string: NULL },
@@ -40,8 +112,10 @@ const struct locale_data _nl_C_LC_COLLATE =
{ string: NULL },
{ string: NULL },
{ string: NULL },
- { word: 0 },
{ string: NULL },
- { string: NULL }
+ { string: NULL },
+ { string: NULL },
+ { string: collseqmb },
+ { wstr: collseqwc }
}
};
diff --git a/locale/categories.def b/locale/categories.def
index b02c1cac58..19e06879fd 100644
--- a/locale/categories.def
+++ b/locale/categories.def
@@ -58,6 +58,8 @@ DEFINE_CATEGORY
DEFINE_ELEMENT (_NL_COLLATE_SYMB_HASH_SIZEMB, "collate-symb-hash-sizemb", std, word)
DEFINE_ELEMENT (_NL_COLLATE_SYMB_TABLEMB, "collate-symb-tablemb", std, string)
DEFINE_ELEMENT (_NL_COLLATE_SYMB_EXTRAMB, "collate-symb-extramb", std, string)
+ DEFINE_ELEMENT (_NL_COLLATE_COLLSEQMB, "collate-collseqmb", std, string)
+ DEFINE_ELEMENT (_NL_COLLATE_COLLSEQWC, "collate-collseqwc", std, string)
), NO_POSTLOAD)
diff --git a/locale/langinfo.h b/locale/langinfo.h
index 7d1183434a..b5ccac6b88 100644
--- a/locale/langinfo.h
+++ b/locale/langinfo.h
@@ -248,6 +248,8 @@ enum
_NL_COLLATE_SYMB_HASH_SIZEMB,
_NL_COLLATE_SYMB_TABLEMB,
_NL_COLLATE_SYMB_EXTRAMB,
+ _NL_COLLATE_COLLSEQMB,
+ _NL_COLLATE_COLLSEQWC,
_NL_NUM_LC_COLLATE,
/* LC_CTYPE category: character classification.
diff --git a/localedata/ChangeLog b/localedata/ChangeLog
index e59ba8317a..3f40616dd3 100644
--- a/localedata/ChangeLog
+++ b/localedata/ChangeLog
@@ -1,3 +1,7 @@
+2000-05-24 Ulrich Drepper <drepper@redhat.com>
+
+ * locales/iso14651_t1: New file.
+
2000-05-15 Andreas Jaeger <aj@suse.de>
* tst-fmon.data: Change testcase following fixes for
diff --git a/localedata/locales/iso14651_t1 b/localedata/locales/iso14651_t1
new file mode 100644
index 0000000000..0402a1f510
--- /dev/null
+++ b/localedata/locales/iso14651_t1
@@ -0,0 +1,1406 @@
+LC_COLLATE
+
+COLL_WEIGHT_MAX=4
+
+# Déclaration des systèmes d'écriture / Declaration of scripts
+script <SPECIAL>
+script <LATIN>
+script <ARABINT>
+script <ARABFOR>
+script <HEBREU>
+script <GREC>
+script <CYRIL>
+script <HAN>
+
+# Déclaration des symboles internes / Declaration of internal symbols
+#
+# SYMB N° Expl.
+#
+collating-symbol <RES-1>
+#
+# <ARABINT>/<ARABFOR>
+#
+#
+collating-symbol <ANO> # 2 normal --> voir/see <MIN>
+collating-symbol <AIS> # 3 isol.
+collating-symbol <AFI> # 4 final
+collating-symbol <AII> # 5 initial
+collating-symbol <AME> # 6 medial/m<e'>dian
+#
+collating-symbol <MIN> # 7 minuscule/minuscule (bas de casse/lower case)
+collating-symbol <IMI> # 8 inférieur min./subscript min. (indice/index)
+collating-symbol <EMI> # 9 supér. min./superscript min. (exposant/exponent)
+collating-symbol <CAP> # 10 capitale/capital (haut de casse/upper case)
+collating-symbol <AMI> # 8 minuscule grecque/Greek lower case
+collating-symbol <ICA> # 11 inférieur en capitale/subscript capital
+collating-symbol <ECA> # 12 supérieur en capitale/superscript capital
+#
+# <ARABINT>/<ARABFOR>
+#
+collating-symbol <AMA> # 13 accent madda
+collating-symbol <AHA> # 14 accent hamza
+collating-symbol <AHW> # 14-1 accent hamza/waw
+collating-symbol <AHS> # 14-2 accent hamza under / hamza souscrit
+collating-symbol <AYE> # 14-3 accent under yeh / accent souscrit du ya'
+collating-symbol <YBA> # 14-4 accent hamza/yeh barree
+#
+collating-symbol <BAS> # 15 de base/basic (non accentué/non-accented)
+#
+collating-symbol <PCL> # 16 particulier/peculiar
+collating-symbol <LIG> # 17 ligature/ligature
+collating-symbol <ACA> # 18 accent aigu/acute accent
+collating-symbol <GRA> # 20 accent grave/grave accent
+collating-symbol <BRE> # 21 brève/breve
+collating-symbol <CIR> # 22 accent circonflexe/circumflex accent
+collating-symbol <CAR> # 23 caron/caron
+collating-symbol <RNE> # 24 rond supérieur/ring above
+collating-symbol <REU> # 25 tréma/diaeresis (ou/or umlaut)
+collating-symbol <DAC> # 26 double ac. aigu/double acute ac.
+collating-symbol <TIL> # 27 tilde/tilde
+collating-symbol <PCT> # 28 point/dot
+collating-symbol <OBL> # 29 barre oblique/oblique
+collating-symbol <CDI> # 30 cédille/cedilla
+collating-symbol <OGO> # 31 ogonek/ogonek
+collating-symbol <MAC> # 32 macron/macron
+#
+# GREC
+#
+collating-symbol <TNS> # accent aigu/tonos/acute accent
+collating-symbol <DLT> # tr<e'>ma/dialytica/diaeresis
+collating-symbol <DTT> # dialytika tonos
+#
+collating-symbol <0>
+collating-symbol <1>
+collating-symbol <2>
+collating-symbol <3>
+collating-symbol <4>
+collating-symbol <5>
+collating-symbol <6>
+collating-symbol <7>
+collating-symbol <8>
+collating-symbol <9>
+#
+collating-symbol <a>
+collating-symbol <b>
+collating-symbol <c>
+collating-symbol <d>
+collating-symbol <e>
+collating-symbol <f>
+collating-symbol <g>
+collating-symbol <h>
+collating-symbol <i>
+collating-symbol <j>
+collating-symbol <k>
+collating-symbol <l>
+collating-symbol <m>
+collating-symbol <n>
+collating-symbol <o>
+collating-symbol <p>
+collating-symbol <q>
+collating-symbol <r>
+collating-symbol <s>
+collating-symbol <t>
+collating-symbol <u>
+collating-symbol <v>
+collating-symbol <w>
+collating-symbol <x>
+collating-symbol <y>
+collating-symbol <z>
+#
+# <ARABINT>/<ARABFOR>
+#
+collating-symbol <hamza>
+collating-symbol <alef>
+collating-symbol <beh>
+collating-symbol <peh>
+collating-symbol <teh_marbuta>
+collating-symbol <teh>
+collating-symbol <tteh>
+collating-symbol <theh>
+collating-symbol <jeem>
+collating-symbol <tcheh>
+collating-symbol <hah>
+collating-symbol <khah>
+collating-symbol <dal>
+collating-symbol <ddal>
+collating-symbol <thal>
+collating-symbol <reh>
+collating-symbol <rreh>
+collating-symbol <zain>
+collating-symbol <jeh>
+collating-symbol <seen>
+collating-symbol <sheen>
+collating-symbol <sad>
+collating-symbol <dad>
+collating-symbol <tah>
+collating-symbol <zah>
+collating-symbol <ain>
+collating-symbol <ghain>
+collating-symbol <feh>
+collating-symbol <qaf>
+collating-symbol <kaf>
+collating-symbol <keheh>
+collating-symbol <gaf>
+collating-symbol <lam>
+collating-symbol <meem>
+collating-symbol <noon>
+collating-symbol <noon_ghunna>
+collating-symbol <heh>
+collating-symbol <heh_yeh>
+collating-symbol <waw>
+collating-symbol <alef_maksura>
+collating-symbol <yeh_barree>
+#
+# <HEBREU>
+#
+collating-symbol <alef>
+collating-symbol <bet>
+collating-symbol <gimel>
+collating-symbol <dalet>
+collating-symbol <he>
+collating-symbol <vav>
+collating-symbol <zayin>
+collating-symbol <het>
+collating-symbol <tet>
+collating-symbol <yod>
+collating-symbol <kaf_fin>
+collating-symbol <kaf>
+collating-symbol <lamed>
+collating-symbol <mem_fin>
+collating-symbol <mem>
+collating-symbol <nun_fin>
+collating-symbol <nun>
+collating-symbol <samekh>
+collating-symbol <ayin>
+collating-symbol <pe_fin>
+collating-symbol <pe>
+collating-symbol <tsad_fin>
+collating-symbol <tsadi>
+collating-symbol <qof>
+collating-symbol <resh>
+collating-symbol <shin>
+collating-symbol <tav>
+#
+# GREC
+#
+collating-symbol <ALPHA>
+collating-symbol <BETA>
+collating-symbol <GAMMA>
+collating-symbol <DELTA>
+collating-symbol <EPSILON>
+collating-symbol <ZETA>
+collating-symbol <ETA>
+collating-symbol <THETA>
+collating-symbol <IOTA>
+collating-symbol <KAPPA>
+collating-symbol <LAMBDA>
+collating-symbol <MU>
+collating-symbol <NU>
+collating-symbol <XI>
+collating-symbol <OMICRON>
+collating-symbol <PI>
+collating-symbol <RHO>
+collating-symbol <SIGMA>
+collating-symbol <TAU>
+collating-symbol <UPSILON>
+collating-symbol <PHI>
+collating-symbol <KHI>
+collating-symbol <PSI>
+collating-symbol <OMEGA>
+#
+# CYRIL
+#
+collating-symbol <CYR-A>
+collating-symbol <CYR-BE>
+collating-symbol <CYR-VE>
+collating-symbol <CYR-GHE>
+collating-symbol <CYR-DE>
+collating-symbol <CYR-GZHE>
+collating-symbol <CYR-DJE>
+collating-symbol <CYR-IE>
+collating-symbol <UKR-IE>
+collating-symbol <CYR-IO>
+collating-symbol <CYR-ZHE>
+collating-symbol <CYR-ZE>
+collating-symbol <CYR-DZE>
+collating-symbol <CYR-I>
+collating-symbol <UKR-I>
+collating-symbol <UKR-YI>
+collating-symbol <CYR-IBRE>
+collating-symbol <CYR-JE>
+collating-symbol <CYR-KA>
+collating-symbol <CYR-EL>
+collating-symbol <CYR-LJE>
+collating-symbol <CYR-EM>
+collating-symbol <CYR-EN>
+collating-symbol <CYR-NJE>
+collating-symbol <CYR-O>
+collating-symbol <CYR-PE>
+collating-symbol <CYR-ER>
+collating-symbol <CYR-ES>
+collating-symbol <CYR-TE>
+collating-symbol <CYR-KJE>
+collating-symbol <CYR-TSHE>
+collating-symbol <CYR-OU>
+collating-symbol <CYR-OUBRE>
+collating-symbol <CYR-EF>
+collating-symbol <CYR-HA>
+collating-symbol <CYR-TSE>
+collating-symbol <CYR-TSHE>
+collating-symbol <CYR-DCHE>
+collating-symbol <CYR-SHA>
+collating-symbol <CYR-SHTSHA>
+collating-symbol <CYR-SIGDUR>
+collating-symbol <CYR-YEROU>
+collating-symbol <CYR-SIGMOUIL>
+collating-symbol <CYR-E>
+collating-symbol <CYR-YOU>
+collating-symbol <CYR-YA>
+
+# Ordre des symboles internes / Order of internal symbols
+#
+# SYMB. N°
+#
+<RES-1>
+<MIN>
+# forme de base (bas de casse, arabe intrinsèque,
+# hébreu intrinsèque, etc.
+# basic form (lower case, intrinsic Arabic
+# intrinsic Hebrew and so on)
+#
+# <ARABINT>/<ARABFOR>
+#
+#
+<ANO> # voir
+<MIN>
+<AIS> # isol.
+# 3
+<AFI> # final
+# 4
+<AII> # initial
+# 5
+<AME> # medial/m<e'>dian
+# 6
+#
+<IMI> # 7
+<EMI> # 8
+<CAP> # 9
+<ICA> # 10
+<ECA> # 11
+<AMI>
+#alternate lower case/
+# 12
+#
+#minuscules spéciales après majuscules
+# <ARABINT>/<ARABFOR>
+#
+<AMA> # accent madda #13
+<AHA> # accent hamza #14
+<AHW> # accent hamza/waw #14 1
+<AHS> # accent hamza under / hamza souscrit #14 2
+<AYE> # accent under yeh / accent souscrit du ya' #14 3
+<YBA> # accent hamza/yeh barree #14 4
+#
+<BAS> # 15
+#
+<PCL> # 16
+<LIG> # 17
+<ACA> # 18
+<GRA> # 19
+<BRE> # 20
+<CIR> # 21
+<CAR> # 22
+<RNE> # 23
+<REU> # 24
+<DAC> # 25
+<TIL> # 26
+<PCT> # 27
+<OBL> # 28
+<CDI> # 29
+<OGO> # 30
+<MAC> # 31
+#
+# GREC
+#
+<TNS> # accent aigu/tonos/acute accent
+<DLT> # tr<e'>ma/dialytica/diaeresis
+<DTT> # dialytika tonos
+#
+<0> # 48
+<1> # 49
+<2> # 50
+<3> # 51
+<4> # 52
+<5> # 53
+<6> # 54
+<7> # 55
+<8> # 56
+<9> # 57
+#
+<a> # 97
+<b> # 98
+<c> # 99
+<d> # 100
+<e> # 101
+<f> # 102
+<g> # 103
+<h> # 104
+<i> # 105
+<j> # 106
+<k> # 107
+<l> # 108
+<m> # 109
+<n> # 110
+<o> # 111
+<p> # 112
+<q> # 113
+<r> # 114
+<s> # 115
+<t> # 116
+<u> # 117
+<v> # 118
+<w> # 119
+<x> # 120
+<y> # 121
+<z> # 122
+<th># 122b
+#
+# <ARABINT>/<ARABFOR>
+#
+<hamza>
+<alef>
+<beh>
+<peh>
+<teh_marbuta>
+<teh>
+<tteh>
+<theh>
+<jeem>
+<tcheh>
+<hah>
+<khah>
+<dal>
+<ddal>
+<thal>
+<reh>
+<rreh>
+<zain>
+<jeh>
+<seen>
+<sheen>
+<sad>
+<dad>
+<tah>
+<zah>
+<ain>
+<ghain>
+<feh>
+<qaf>
+<kaf>
+<keheh>
+<gaf>
+<lam>
+<meem>
+<noon>
+<noon_ghunna>
+<heh>
+<heh_yeh>
+<waw>
+<alef_maksura>
+<yeh_barree>
+#
+# <HEBREU>
+#
+<alef>
+<bet>
+<gimel>
+<dalet>
+<he>
+<vav>
+<zayin>
+<het>
+<tet>
+<yod>
+<kaf_fin>
+<kaf>
+<lamed>
+<mem_fin>
+<mem>
+<nun_fin>
+<nun>
+<samekh>
+<ayin>
+<pe_fin>
+<pe>
+<tsad_fin>
+<tsadi>
+<qof>
+<resh>
+<shin>
+<tav>
+#
+#GREC
+#
+<ALPHA>
+<BETA>
+<GAMMA>
+<DELTA>
+<EPSILON>
+<ZETA>
+<ETA>
+<THETA>
+<IOTA>
+<KAPPA>
+<LAMBDA>
+<MU>
+<NU>
+<XI>
+<OMICRON>
+<PI>
+<RHO>
+<SIGMA>
+<TAU>
+<UPSILON>
+<PHI>
+<CHI>
+<PSI>
+<OMEGA>
+#
+#CYRIL
+#
+<CYR-A>
+<CYR-BE>
+<CYR-VE>
+<CYR-GHE>
+<CYR-DE>
+<CYR-GZHE>
+<CYR-DJE>
+<CYR-IE>
+<UKR-IE>
+<CYR-IO>
+<CYR-ZHE>
+<CYR-ZE>
+<CYR-DZE>
+<CYR-I>
+<UKR-I>
+<UKR-YI>
+<CYR-IBRE>
+<CYR-JE>
+<CYR-KA>
+<CYR-EL>
+<CYR-LJE>
+<CYR-EM>
+<CYR-EN>
+<CYR-NJE>
+<CYR-O>
+<CYR-PE>
+<CYR-ER>
+<CYR-ES>
+<CYR-TE>
+<CYR-KJE>
+<CYR-TSHE>
+<CYR-OU>
+<CYR-OUBRE>
+<CYR-EF>
+<CYR-HA>
+<CYR-TSE>
+<CYR-TSHE>
+<CYR-DCHE>
+<CYR-SHA>
+<CYR-SHTSHA>
+<CYR-SIGDUR>
+<CYR-YEROU>
+<CYR-SIGMOUIL>
+<CYR-E>
+<CYR-YOU>
+<CYR-YA>
+
+order_start <SPECIAL>;forward;backward;forward;forward,position
+#
+# Tout caractère non précisément défini sera considéré comme caractère spécial
+# et considéré uniquement au dernier niveau.
+#
+# Any character not precisely specified will be considered as a special
+# character and considered only at the last level.
+# <U0000>......<U7FFFFFFF> IGNORE;IGNORE;IGNORE;<U0000>......<U7FFFFFFF>
+#
+# SYMB. N° GLY
+#
+<U0020> IGNORE;IGNORE;IGNORE;<U0020> # 32 <SP>
+<U005F> IGNORE;IGNORE;IGNORE;<U005F> # 33 _
+<U0332> IGNORE;IGNORE;IGNORE;<U0332> # 34 <"_>
+<U00AF> IGNORE;IGNORE;IGNORE;<U00AF> # 35 - (MACRON)
+<U00AD> IGNORE;IGNORE;IGNORE;<U00AD> # 36 <SHY>
+<U002D> IGNORE;IGNORE;IGNORE;<U002D> # 37 -
+<U002C> IGNORE;IGNORE;IGNORE;<U002C> # 38 ,
+<U003B> IGNORE;IGNORE;IGNORE;<U003B> # 39 ;
+<U003A> IGNORE;IGNORE;IGNORE;<U003A> # 40 :
+<U0021> IGNORE;IGNORE;IGNORE;<U0021> # 41 !
+<U00A1> IGNORE;IGNORE;IGNORE;<U00A1> # 42 ¡
+<U003F> IGNORE;IGNORE;IGNORE;<U003F> # 43 ?
+<U00BF> IGNORE;IGNORE;IGNORE;<U00BF> # 44 ¿
+<U002F> IGNORE;IGNORE;IGNORE;<U002F> # 45 /
+<U0338> IGNORE;IGNORE;IGNORE;<U0338> # 46 <"/>
+<U002E> IGNORE;IGNORE;IGNORE;<U002E> # 47 .
+<U00B7> IGNORE;IGNORE;IGNORE;<U00B7> # 58 ×
+<U00B8> IGNORE;IGNORE;IGNORE;<U00B8> # 59 ¸
+<U0328> IGNORE;IGNORE;IGNORE;<U0328> # 60 <";>
+<U0027> IGNORE;IGNORE;IGNORE;<U0027> # 61 '
+<U2018> IGNORE;IGNORE;IGNORE;<U2018> # 62 <'6>
+<U2019> IGNORE;IGNORE;IGNORE;<U2019> # 63 <'9>
+<U0022> IGNORE;IGNORE;IGNORE;<U0022> # 64 "
+<U201C> IGNORE;IGNORE;IGNORE;<U201C> # 65 <"6>
+<U201D> IGNORE;IGNORE;IGNORE;<U201D> # 66 <"9>
+<U00AB> IGNORE;IGNORE;IGNORE;<U00AB> # 67 «
+<U00BB> IGNORE;IGNORE;IGNORE;<U00BB> # 68 »
+<U0028> IGNORE;IGNORE;IGNORE;<U0028> # 69 (
+<U207D> IGNORE;IGNORE;IGNORE;<U207d> # 70 <(S>
+<U0029> IGNORE;IGNORE;IGNORE;<U0029> # 71 )
+<U207E> IGNORE;IGNORE;IGNORE;<U207E> # 72 <)S>
+<U005B> IGNORE;IGNORE;IGNORE;<U005B> # 73 [
+<U005D> IGNORE;IGNORE;IGNORE;<U005D> # 74 ]
+<U007B> IGNORE;IGNORE;IGNORE;<U007B> # 75 {
+<U007D> IGNORE;IGNORE;IGNORE;<U007D> # 76 }
+<U00A7> IGNORE;IGNORE;IGNORE;<U00A7> # 77 §
+<U00B6> IGNORE;IGNORE;IGNORE;<U00B6> # 78 ¶
+<U00A9> IGNORE;IGNORE;IGNORE;<U00A9> # 79 ©
+<U00AE> IGNORE;IGNORE;IGNORE;<U00AE> # 80 ®
+<U2122> IGNORE;IGNORE;IGNORE;<U2122> # 81 <TM>
+<U0040> IGNORE;IGNORE;IGNORE;<U0040> # 82 @
+<U00A4> IGNORE;IGNORE;IGNORE;<U00A4> # 83 ¤
+<U00A2> IGNORE;IGNORE;IGNORE;<U00A2> # 84 ¢
+<U0024> IGNORE;IGNORE;IGNORE;<U0024> # 85 $
+<U00A3> IGNORE;IGNORE;IGNORE;<U00A3> # 86 £
+<U00A5> IGNORE;IGNORE;IGNORE;<U00A5> # 87 ¥
+<U002A> IGNORE;IGNORE;IGNORE;<U002A> # 88 *
+<U005C> IGNORE;IGNORE;IGNORE;<U005C> # 89 \
+<U0026> IGNORE;IGNORE;IGNORE;<U0026> # 90 &
+<U0023> IGNORE;IGNORE;IGNORE;<U0023> # 91 #
+<U0025> IGNORE;IGNORE;IGNORE;<U0025> # 92 %
+<U207B> IGNORE;IGNORE;IGNORE;<U207D> # 93 <-S>
+<U002B> IGNORE;IGNORE;IGNORE;<U002B> # 94 +
+<U207A> IGNORE;IGNORE;IGNORE;<U207E> # 95 <+S>
+<U00B1> IGNORE;IGNORE;IGNORE;<U00B1> # 96 ±
+<U00B4> IGNORE;IGNORE;IGNORE;<0> # 123 ´
+<U0060> IGNORE;IGNORE;IGNORE;<1> # 124 `
+<U0306> IGNORE;IGNORE;IGNORE;<2> # 125 <"(>
+<U005E> IGNORE;IGNORE;IGNORE;<3> # 126 ^
+<U030C> IGNORE;IGNORE;IGNORE;<4> # 127 <"<>
+<U030A> IGNORE;IGNORE;IGNORE;<5> # 128 <"0>
+<U00A8> IGNORE;IGNORE;IGNORE;<6> # 129 ¨
+<U030B> IGNORE;IGNORE;IGNORE;<7> # 130 <"">
+<U007E> IGNORE;IGNORE;IGNORE;<8> # 131 ~
+<U0307> IGNORE;IGNORE;IGNORE;<9> # 132 <".>
+<U00F7> IGNORE;IGNORE;IGNORE;<a> # 133 ¸
+<U00D7> IGNORE;IGNORE;IGNORE;<b> # 134 ´
+<U2260> IGNORE;IGNORE;IGNORE;<c> # 135 <!=>
+<U003C> IGNORE;IGNORE;IGNORE;<d> # 136 <
+<U2264> IGNORE;IGNORE;IGNORE;<e> # 137 <=<>
+<U003D> IGNORE;IGNORE;IGNORE;<f> # 138 =
+<U2265> IGNORE;IGNORE;IGNORE;<g> # 139 </>=>
+<U003E> IGNORE;IGNORE;IGNORE;<h> # 140 >
+<U00AC> IGNORE;IGNORE;IGNORE;<i> # 141 ¬
+<U007C> IGNORE;IGNORE;IGNORE;<j> # 142 |
+<U00A6> IGNORE;IGNORE;IGNORE;<k> # 143 |
+<U00B0> IGNORE;IGNORE;IGNORE;<l> # 144 °
+<U00B5> IGNORE;IGNORE;IGNORE;<m> # 145 m
+<U2126> IGNORE;IGNORE;IGNORE;<n> # 146 <Om>
+<U220E> IGNORE;IGNORE;IGNORE;<o> # 147 <FP>
+<U250C> IGNORE;IGNORE;IGNORE;<p> # 148 <_V/>>
+<U252C> IGNORE;IGNORE;IGNORE;<q> # 149 <_V->
+<U2510> IGNORE;IGNORE;IGNORE;<r> # 150 <_V<w>
+<U251C> IGNORE;IGNORE;IGNORE;<s> # 151 <_!/>>
+<U253C> IGNORE;IGNORE;IGNORE;<t> # 152 <_!->
+<U2524> IGNORE;IGNORE;IGNORE;<u> # 153 <_!<>
+<U2514> IGNORE;IGNORE;IGNORE;<v> # 154 <_A/>>
+<U2534> IGNORE;IGNORE;IGNORE;<w> # 155 <_-A>
+<U2518> IGNORE;IGNORE;IGNORE;<x> # 156 <_A<>
+<U2502> IGNORE;IGNORE;IGNORE;<y> # 157 <_!>
+<U2500> IGNORE;IGNORE;IGNORE;<z> # 158 <_-> #
+<U2501> IGNORE;IGNORE;IGNORE;<U2501> # 159 <_=>
+<U2190> IGNORE;IGNORE;IGNORE;<U2190> # 160 <<->
+<U2192> IGNORE;IGNORE;IGNORE;<U2192> # 161 <-/>>
+<U20D1> IGNORE;IGNORE;IGNORE;<U20D1> # 162 <"7>
+<U2191> IGNORE;IGNORE;IGNORE;<U2191> # 163 <-!>
+<U2193> IGNORE;IGNORE;IGNORE;<U2193> # 164 <-v>
+<U266A> IGNORE;IGNORE;IGNORE;<U266A> # 165 <_d!>
+<U2571> IGNORE;IGNORE;IGNORE;<U2571> # 166 <_/>//>
+<U2572> IGNORE;IGNORE;IGNORE;<U2572> # 167 <_<\>
+<U25E2> IGNORE;IGNORE;IGNORE;<U25E2> # 168 <_./>//>
+<U25E3> IGNORE;IGNORE;IGNORE;<U25E3> # 169 <_.<\> # # <ARABINT>/<ARABFOR> #
+<U060C> IGNORE;IGNORE;IGNORE;<U060C>
+<U061B> IGNORE;IGNORE;IGNORE;<U061B>
+<U061F> IGNORE;IGNORE;IGNORE;<U061F>
+<U0640> IGNORE;IGNORE;IGNORE;<U0640>
+<U066A> IGNORE;IGNORE;IGNORE;<U066A>
+<U066B> IGNORE;IGNORE;IGNORE;<U066B>
+<U066C> IGNORE;IGNORE;IGNORE;<U066C>
+<U066D> IGNORE;IGNORE;IGNORE;<U066D>
+<U064B> IGNORE;IGNORE;IGNORE;<U064B> #<fathatan_no>
+<UFE70> IGNORE;IGNORE;IGNORE;<UFE70> #<fathatan_is>
+<UFE71> IGNORE;IGNORE;IGNORE;<UFE71> #<fathatan_me>
+<U064C> IGNORE;IGNORE;IGNORE;<U064C> #<dammatan_no>
+<UFE72> IGNORE;IGNORE;IGNORE;<UFE72> #<dammatan_is>
+<U064D> IGNORE;IGNORE;IGNORE;<U064D> #<kasratan_no>
+<UFE74> IGNORE;IGNORE;IGNORE;<UFE74> #<kasratan_is>
+<U064E> IGNORE;IGNORE;IGNORE;<U064E> #<fatha_no>
+<UFE76> IGNORE;IGNORE;IGNORE;<UFE76> #<fatha_is>
+<UFE77> IGNORE;IGNORE;IGNORE;<UFE77> #<fatha_me>
+<U064F> IGNORE;IGNORE;IGNORE;<U064F> #<damma_no>
+<UFE78> IGNORE;IGNORE;IGNORE;<UFE78> #<damma_is>
+<UFE79> IGNORE;IGNORE;IGNORE;<UFE79> #<damma_me>
+<U0650> IGNORE;IGNORE;IGNORE;<U0650> #<kasra_no>
+<UFE7A> IGNORE;IGNORE;IGNORE;<UFE7A> #<kasra_is>
+<UFE7B> IGNORE;IGNORE;IGNORE;<UFE7B> #<kasra_me>
+<U0651> IGNORE;IGNORE;IGNORE;<U0651> #<shadda_no>
+<UFE7C> IGNORE;IGNORE;IGNORE;<UFE7C> #<shadda_is>
+<UFE7D> IGNORE;IGNORE;IGNORE;<UFE7D> #<shadda_me>
+<U0652> IGNORE;IGNORE;IGNORE;<U0652> #<sukun_no>
+<UFE7E> IGNORE;IGNORE;IGNORE;<UFE7E> #<sukun_is>
+<UFE7F> IGNORE;IGNORE;IGNORE;<UFE7F> #<sukun_me> # # <HEBREU> #
+<U05B0> IGNORE;IGNORE;IGNORE;<U05B0> #point_sheva
+<U05B1> IGNORE;IGNORE;IGNORE;<U05B1> #point_hataf_segol
+<U05B2> IGNORE;IGNORE;IGNORE;<U05B2> #point_hataf_patah
+<U05B3> IGNORE;IGNORE;IGNORE;<U05B3> #point_hataf_qamats
+<U05B4> IGNORE;IGNORE;IGNORE;<U05B4> #point_hiriq
+<U05B5> IGNORE;IGNORE;IGNORE;<U05B5> #point_tsere
+<U05B6> IGNORE;IGNORE;IGNORE;<U05B6> #point_segol
+<U05B7> IGNORE;IGNORE;IGNORE;<U05B7> #point_patah
+<U05B8> IGNORE;IGNORE;IGNORE;<U05B8> #point_qamats
+<U05B9> IGNORE;IGNORE;IGNORE;<U05B9> #point_holam
+<U05BB> IGNORE;IGNORE;IGNORE;<U05BB> #point_qubuts
+<U05BC> IGNORE;IGNORE;IGNORE;<U05BC> #point_dagesh
+<U05BD> IGNORE;IGNORE;IGNORE;<U05BD> #point_meteg
+<U05BE> IGNORE;IGNORE;IGNORE;<U05BE> #maqaf
+<U05BF> IGNORE;IGNORE;IGNORE;<U05BF> #point_rafe
+<U05C0> IGNORE;IGNORE;IGNORE;<U05C0> #paseq
+<U05C1> IGNORE;IGNORE;IGNORE;<U05C1> #point_shin_dot
+<U05C2> IGNORE;IGNORE;IGNORE;<U05C2> #point_sin_dot
+<U05C3> IGNORE;IGNORE;IGNORE;<U05C3> #sof pasuq
+
+order_start <LATIN>;forward;backward;forward;forward,position
+#
+<U00A0> U0020;<BAS>;<MIN>;IGNORE # 170<NBSP>
+#
+<U0030> <0>;<BAS>;<MIN>;IGNORE # 171 0
+<U0031> <1>;<BAS>;<MIN>;IGNORE # 172 1
+<U0032> <2>;<BAS>;<MIN>;IGNORE # 173 2
+<U0033> <3>;<BAS>;<MIN>;IGNORE # 174 3
+<U0034> <4>;<BAS>;<MIN>;IGNORE # 175 4
+<U0035> <5>;<BAS>;<MIN>;IGNORE # 176 5
+<U0036> <6>;<BAS>;<MIN>;IGNORE # 177 6
+<U0037> <7>;<BAS>;<MIN>;IGNORE # 178 7
+<U0038> <8>;<BAS>;<MIN>;IGNORE # 179 8
+<U0039> <9>;<BAS>;<MIN>;IGNORE # 180 9
+#
+<U215B> <0>;<GRA>;<MIN>;IGNORE # 181 <18>
+<U00BC> <0>;<BRE>;<MIN>;IGNORE # 182 ¼
+<U215C> <0>;<CIR>;<MIN>;IGNORE # 183 <38>
+<U215D> <0>;<RNE>;<MIN>;IGNORE # 184 <58>
+<U215E> <0>;<DAC>;<MIN>;IGNORE # 185 <78>
+<U00BD> <0>;<CAR>;<MIN>;IGNORE # 186 ½
+<U00BE> <0>;<REU>;<MIN>;IGNORE # 187 ¾
+<U2070> <0>;<BAS>;<EMI>;IGNORE # 188 <0S>
+<U00B9> <1>;<BAS>;<EMI>;IGNORE # 189 ¹
+<U00B2> <2>;<BAS>;<EMI>;IGNORE # 190 ²
+<U00B3> <3>;<BAS>;<EMI>;IGNORE # 191 ³
+<U2074> <4>;<BAS>;<EMI>;IGNORE # 192 <4S>
+<U2075> <5>;<BAS>;<EMI>;IGNORE # 193 <5S>
+<U2076> <6>;<BAS>;<EMI>;IGNORE # 194 <6S>
+<U2077> <7>;<BAS>;<EMI>;IGNORE # 195 <7S>
+<U2078> <8>;<BAS>;<EMI>;IGNORE # 196 <8S>
+<U2079> <9>;<BAS>;<EMI>;IGNORE # 197 <9S>
+#
+<U0061> <a>;<BAS>;<MIN>;IGNORE # 198 a
+<U00AA> <a>;<PCL>;<EMI>;IGNORE # 199 ª
+<U00E1> <a>;<ACA>;<MIN>;IGNORE # 200 á
+<U00E0> <a>;<GRA>;<MIN>;IGNORE # 201 à
+<U00E2> <a>;<CIR>;<MIN>;IGNORE # 202 â
+<U00E3> <a>;<TIL>;<MIN>;IGNORE # 203 ã
+<U00E4> <a>;<REU>;<MIN>;IGNORE # 204 ä
+<U00E5> <a>;<RNE>;<MIN>;IGNORE # 205 å
+<U0103> <a>;<BRE>;<MIN>;IGNORE # 206 <a(>
+<U0105> <a>;<OGO>;<MIN>;IGNORE # 207 <a;>
+<U0101> <a>;<MAC>;<MIN>;IGNORE # 208 <a->
+<U00E6> <a><e>;<LIG><LIG>;<MIN><MIN>;IGNORE # 209 æ
+<U0062> <b>;<BAS>;<MIN>;IGNORE # 210 b
+<U0063> <c>;<BAS>;<MIN>;IGNORE # 211 c
+<U00E7> <c>;<CDI>;<MIN>;IGNORE # 212 ç
+<U0107> <c>;<ACA>;<MIN>;IGNORE # 213 <c'>
+<U0109> <c>;<CIR>;<MIN>;IGNORE # 214 <c/>>
+<U010D> <c>;<CAR>;<MIN>;IGNORE # 215 <c<>
+<U010B> <c>;<PCT>;<MIN>;IGNORE # 216 <c.>
+<U0064> <d>;<BAS>;<MIN>;IGNORE # 217 d
+<U00F0> <d>;<PCL>;<MIN>;IGNORE # 218 ð
+<U010F> <d>;<CAR>;<MIN>;IGNORE # 219 <d<>
+<U0111> <d>;<OBL>;<MIN>;IGNORE # 220 <d//>
+<U0065> <e>;<BAS>;<MIN>;IGNORE # 221 e
+<U00E9> <e>;<ACA>;<MIN>;IGNORE # 222 é
+<U00E8> <e>;<GRA>;<MIN>;IGNORE # 223 è
+<U00EA> <e>;<CIR>;<MIN>;IGNORE # 224 ê
+<U00EB> <e>;<REU>;<MIN>;IGNORE # 225 ë
+<U011B> <e>;<CAR>;<MIN>;IGNORE # 226 <e<>
+<U0117> <e>;<PCT>;<MIN>;IGNORE # 227 <e.>
+<U0119> <e>;<OGO>;<MIN>;IGNORE # 228 <e;>
+<U0113> <e>;<MAC>;<MIN>;IGNORE # 229 <e->
+<U0066> <f>;<BAS>;<MIN>;IGNORE # 230 f
+<U0067> <g>;<BAS>;<MIN>;IGNORE # 231 g
+<U011F> <g>;<BRE>;<MIN>;IGNORE # 232 <g(>
+<U011D> <g>;<CIR>;<MIN>;IGNORE # 233 <g/>>
+<U0121> <g>;<PCT>;<MIN>;IGNORE # 234 <g.>
+<U0123> <g>;<CDI>;<MIN>;IGNORE # 235 <g,>
+<U0068> <h>;<BAS>;<MIN>;IGNORE # 236 h
+<U0125> <h>;<CIR>;<MIN>;IGNORE # 237 <h/>>
+<U0127> <h>;<OBL>;<MIN>;IGNORE # 238 <h//>
+<U0069> <i>;<BAS>;<MIN>;IGNORE # 239 i
+<U00ED> <i>;<ACA>;<MIN>;IGNORE # 240 í
+<U00EC> <i>;<GRA>;<MIN>;IGNORE # 241 ì
+<U00EE> <i>;<CIR>;<MIN>;IGNORE # 242 î
+<U00EF> <i>;<REU>;<MIN>;IGNORE # 243 ï
+<U0131> <i>;<PCL>;<MIN>;IGNORE # 244 <i.>
+<U0129> <i>;<TIL>;<MIN>;IGNORE # 245 <i?>
+<U012F> <i>;<OGO>;<MIN>;IGNORE # 246 <i;>
+<U012B> <i>;<MAC>;<MIN>;IGNORE # 247 <i->
+<U0133> <i><j>;<LIG><LIG>;<MIN><MIN>;IGNORE # 248 <ij>
+<U006A> <j>;<BAS>;<MIN>;IGNORE # 249 j
+<U0135> <j>;<CIR>;<MIN>;IGNORE # 250 <j/>>
+<U006B> <k>;<BAS>;<MIN>;IGNORE # 251 k
+<U0138> <k>;<PCL>;<MIN>;IGNORE # 252 <kk>
+<U0137> <k>;<CDI>;<MIN>;IGNORE # 253 <k,>
+<U006C> <l>;<BAS>;<MIN>;IGNORE # 254 l
+<U013A> <l>;<ACA>;<MIN>;IGNORE # 255 <l'>
+<U013E> <l>;<CAR>;<MIN>;IGNORE # 256 <l<>
+<U0142> <l>;<OBL>;<MIN>;IGNORE # 257 <l//>
+<U013C> <l>;<CDI>;<MIN>;IGNORE # 258 <l,>
+<U0140> <l>;<PCT>;<MIN>;IGNORE # 259 <l.>
+<U006D> <m>;<BAS>;<MIN>;IGNORE # 260 m
+<U006E> <n>;<BAS>;<MIN>;IGNORE # 261 n
+<U00F1> <n>;<TIL>;<MIN>;IGNORE # 262 ñ
+<U0149> <n>;<PCL>;<MIN>;IGNORE # 263 <'n>
+<U0144> <n>;<ACA>;<MIN>;IGNORE # 264 <n'>
+<U0148> <n>;<CAR>;<MIN>;IGNORE # 265 <n<>
+<U0146> <n>;<CDI>;<MIN>;IGNORE # 266 <n,>
+<U014B> <n><g>;<LIG><LIG>;<MIN><MIN>;IGNORE # 267 <ng>
+<U006F> <o>;<BAS>;<MIN>;IGNORE # 268 o
+<U00BA> <o>;<PCL>;<EMI>;IGNORE # 269 º
+<U00F3> <o>;<ACA>;<MIN>;IGNORE # 270 ó
+<U00F2> <o>;<GRA>;<MIN>;IGNORE # 271 ò
+<U00F4> <o>;<CIR>;<MIN>;IGNORE # 272 ô
+<U00F5> <o>;<TIL>;<MIN>;IGNORE # 273 õ
+<U00F6> <o>;<REU>;<MIN>;IGNORE # 274 ö
+<U00F8> <o>;<OBL>;<MIN>;IGNORE # 275 ø
+<U0151> <o>;<DAC>;<MIN>;IGNORE # 276 <o">
+<U014D> <o>;<MAC>;<MIN>;IGNORE # 277 <o->
+<U0153> <o><e>;<LIG><LIG>;<MIN><MIN>;IGNORE # 278 <oe>
+<U0070> <p>;<BAS>;<MIN>;IGNORE # 279 p
+<U0071> <q>;<BAS>;<MIN>;IGNORE # 280 q
+<U0072> <r>;<BAS>;<MIN>;IGNORE # 281 r
+<U0155> <r>;<ACA>;<MIN>;IGNORE # 282 <r'>
+<U0159> <r>;<CAR>;<MIN>;IGNORE # 283 <r<>
+<U0157> <r>;<CDI>;<MIN>;IGNORE # 284 <r,>
+<U0073> <s>;<BAS>;<MIN>;IGNORE # 285 s
+<U015B> <s>;<ACA>;<MIN>;IGNORE # 286 <s'>
+<U015D> <s>;<CIR>;<MIN>;IGNORE # 287 <s/>>
+<U0161> <s>;<CAR>;<MIN>;IGNORE # 288 <s<>
+<U015F> <s>;<CDI>;<MIN>;IGNORE # 289 <s,>
+<U00DF> <s><s>;<LIG><LIG>;<MIN><MIN>;IGNORE # 290 ß
+<U0074> <t>;<BAS>;<MIN>;IGNORE # 291 t
+<U0165> <t>;<CAR>;<MIN>;IGNORE # 292 <t<>
+<U0167> <t>;<OBL>;<MIN>;IGNORE # 293 <t//>
+<U0163> <t>;<CDI>;<MIN>;IGNORE # 294 <t,>
+<U0075> <u>;<BAS>;<MIN>;IGNORE # 296 u
+<U00FA> <u>;<ACA>;<MIN>;IGNORE # 297 ú
+<U00F9> <u>;<GRA>;<MIN>;IGNORE # 298 ù
+<U00FB> <u>;<CIR>;<MIN>;IGNORE # 299 û
+<U00FC> <u>;<REU>;<MIN>;IGNORE # 300 ü
+<U016D> <u>;<BRE>;<MIN>;IGNORE # 301 <u(>
+<U016F> <u>;<RNE>;<MIN>;IGNORE # 302 <u0>
+<U0171> <u>;<DAC>;<MIN>;IGNORE # 303 <u">
+<U0169> <u>;<TIL>;<MIN>;IGNORE # 304 <u?>
+<U0173> <u>;<OGO>;<MIN>;IGNORE # 305 <u;>
+<U016B> <u>;<MAC>;<MIN>;IGNORE # 306 <u->
+<U0076> <v>;<BAS>;<MIN>;IGNORE # 307 v
+<U0077> <w>;<BAS>;<MIN>;IGNORE # 308 w
+<U0175> <w>;<CIR>;<MIN>;IGNORE # 309 <w/>>
+<U0078> <x>;<BAS>;<MIN>;IGNORE # 310 x
+<U0079> <y>;<BAS>;<MIN>;IGNORE # 311 y
+<U00FD> <y>;<ACA>;<MIN>;IGNORE # 312 ý
+<U00FF> <y>;<REU>;<MIN>;IGNORE # 313 _
+<U0177> <y>;<CIR>;<MIN>;IGNORE # 314 <y/>>
+<U007A> <z>;<BAS>;<MIN>;IGNORE # 315 z
+<U017A> <z>;<ACA>;<MIN>;IGNORE # 316 <z'>
+<U017E> <z>;<CAR>;<MIN>;IGNORE # 317 <z<>
+<U017C> <z>;<PCT>;<MIN>;IGNORE # 318 <z.>
+<U00FE> <th>;<BAS>;<MIN>;IGNORE # 318b Þ #
+<U0041> <a>;<BAS>;<CAP>;IGNORE # 319 A
+<U00C1> <a>;<ACA>;<CAP>;IGNORE # 320 Á
+<U00C0> <a>;<GRA>;<CAP>;IGNORE # 321 À
+<U00C2> <a>;<CIR>;<CAP>;IGNORE # 322 Â
+<U00C3> <a>;<TIL>;<CAP>;IGNORE # 323 Ã
+<U00C4> <a>;<REU>;<CAP>;IGNORE # 324 Ä
+<U00C5> <a>;<RNE>;<CAP>;IGNORE # 325 Å
+<U0102> <a>;<BRE>;<CAP>;IGNORE # 326 <A(>
+<U0104> <a>;<OGO>;<CAP>;IGNORE # 327 <A;>
+<U0100> <a>;<MAC>;<CAP>;IGNORE # 328 <A->
+<U00C6> <a><e>;<LIG><LIG>;<CAP><CAP>;IGNORE # 329 Æ
+<U0042> <b>;<BAS>;<CAP>;IGNORE # 330 B
+<U0043> <c>;<BAS>;<CAP>;IGNORE # 331 C
+<U00C7> <c>;<CDI>;<CAP>;IGNORE # 332 Ç
+<U0106> <c>;<ACA>;<CAP>;IGNORE # 333 <C'>
+<U0108> <c>;<CIR>;<CAP>;IGNORE # 334 <C/>>
+<U010C> <c>;<CAR>;<CAP>;IGNORE # 335 <C>>
+<U010A> <c>;<PCT>;<CAP>;IGNORE # 336 <C.>
+<U0044> <d>;<BAS>;<CAP>;IGNORE # 337 D
+<U00D0> <d>;<PCL>;<CAP>;IGNORE # 338 Ð
+<U010E> <d>;<CAR>;<CAP>;IGNORE # 339 <D<>
+<U0110> <d>;<OBL>;<CAP>;IGNORE # 340 <D//>
+<U0045> <e>;<BAS>;<CAP>;IGNORE # 341 E
+<U00C9> <e>;<ACA>;<CAP>;IGNORE # 342 É
+<U00C8> <e>;<GRA>;<CAP>;IGNORE # 343 È
+<U00CA> <e>;<CIR>;<CAP>;IGNORE # 344 Ê
+<U00CB> <e>;<REU>;<CAP>;IGNORE # 345 Ë
+<U011A> <e>;<CAR>;<CAP>;IGNORE # 346 <E<>
+<U0116> <e>;<PCT>;<CAP>;IGNORE # 347 <E.>
+<U0118> <e>;<OGO>;<CAP>;IGNORE # 348 <E;>
+<U0112> <e>;<MAC>;<CAP>;IGNORE # 349 <E->
+<U0046> <f>;<BAS>;<CAP>;IGNORE # 350 F
+<U0047> <g>;<BAS>;<CAP>;IGNORE # 351 G
+<U011E> <g>;<BRE>;<CAP>;IGNORE # 352 <G(>
+<U011C> <g>;<CIR>;<CAP>;IGNORE # 353 <G/>>
+<U0120> <g>;<PCT>;<CAP>;IGNORE # 354 <G.>
+<U0122> <g>;<CDI>;<CAP>;IGNORE # 355 <G,>
+<U0048> <h>;<BAS>;<CAP>;IGNORE # 356 H
+<U0124> <h>;<CIR>;<CAP>;IGNORE # 357 <H/>>
+<U0126> <h>;<OBL>;<CAP>;IGNORE # 358 <H//>
+<U0049> <i>;<BAS>;<CAP>;IGNORE # 359 I
+<U00CD> <i>;<ACA>;<CAP>;IGNORE # 360 Í
+<U00CC> <i>;<GRA>;<CAP>;IGNORE # 361 Ì
+<U00CE> <i>;<CIR>;<CAP>;IGNORE # 362 Î
+<U00CF> <i>;<REU>;<CAP>;IGNORE # 363 Ï
+<U0130> <i>;<PCL>;<CAP>;IGNORE # 364 <I.>
+<U0128> <i>;<TIL>;<CAP>;IGNORE # 365 <I?>
+<U012E> <i>;<OGO>;<CAP>;IGNORE # 366 <I;>
+<U012A> <i>;<MAC>;<CAP>;IGNORE # 367 <I->
+<U0132> <i><j>;<LIG><LIG>;<CAP><CAP>;IGNORE # 368 <IJ>
+<U004A> <j>;<BAS>;<CAP>;IGNORE # 369 J
+<U0134> <j>;<CIR>;<CAP>;IGNORE # 370 <J/>>
+<U004B> <k>;<BAS>;<CAP>;IGNORE # 371 K
+<U0136> <k>;<CDI>;<CAP>;IGNORE # 372 <K,>
+<U004C> <l>;<BAS>;<CAP>;IGNORE # 373 L
+<U0139> <l>;<ACA>;<CAP>;IGNORE # 374 <L'>
+<U013D> <l>;<CAR>;<CAP>;IGNORE # 375 <L<>
+<U0141> <l>;<OBL>;<CAP>;IGNORE # 376 <L//>
+<U013B> <l>;<CDI>;<CAP>;IGNORE # 377 <L,>
+<U013F> <l>;<PCT>;<CAP>;IGNORE # 378 <L.>
+<U004D> <m>;<BAS>;<CAP>;IGNORE # 379 M
+<U004E> <n>;<BAS>;<CAP>;IGNORE # 380 N
+<U00D1> <n>;<TIL>;<CAP>;IGNORE # 381 Ñ
+<U0143> <n>;<ACA>;<CAP>;IGNORE # 382 <N'>
+<U0147> <n>;<CAR>;<CAP>;IGNORE # 383 <N<>
+<U0145> <n>;<CDI>;<CAP>;IGNORE # 384 <N,>
+<U014A> <n><g>;<LIG><LIG>;<CAP><CAP>;IGNORE # 385 <NG>
+<U004F> <o>;<BAS>;<CAP>;IGNORE # 386 O
+<U00D3> <o>;<ACA>;<CAP>;IGNORE # 387 Ó
+<U00D2> <o>;<GRA>;<CAP>;IGNORE # 388 Ò
+<U00D4> <o>;<CIR>;<CAP>;IGNORE # 389 Ô
+<U00D5> <o>;<TIL>;<CAP>;IGNORE # 390 Õ
+<U00D6> <o>;<REU>;<CAP>;IGNORE # 391 Ö
+<U00D8> <o>;<OBL>;<CAP>;IGNORE # 392 Ø
+<U0150> <o>;<DAC>;<CAP>;IGNORE # 393 <O">
+<U014C> <o>;<MAC>;<CAP>;IGNORE # 394 <O->
+<U0152> <o><e>;<LIG><LIG>;<CAP><CAP>;IGNORE # 395 <OE>
+<U0050> <p>;<BAS>;<CAP>;IGNORE # 396 P
+<U0051> <q>;<BAS>;<CAP>;IGNORE # 397 Q
+<U0052> <r>;<BAS>;<CAP>;IGNORE # 398 R
+<U0154> <r>;<ACA>;<CAP>;IGNORE # 399 <R'>
+<U0158> <r>;<CAR>;<CAP>;IGNORE # 400 <R<>
+<U0156> <r>;<CDI>;<CAP>;IGNORE # 401 <R,>
+<U0053> <s>;<BAS>;<CAP>;IGNORE # 402 S
+<U015A> <s>;<ACA>;<CAP>;IGNORE # 403 <S'>
+<U015C> <s>;<CIR>;<CAP>;IGNORE # 404 <S/>>
+<U0160> <s>;<CAR>;<CAP>;IGNORE # 405 <S<>
+<U015E> <s>;<CDI>;<CAP>;IGNORE # 406 <S,>
+<U0054> <t>;<BAS>;<CAP>;IGNORE # 407 T
+<U0164> <t>;<CAR>;<CAP>;IGNORE # 408 <T<>
+<U0166> <t>;<OBL>;<CAP>;IGNORE # 409 <T//>
+<U0162> <t>;<CDI>;<CAP>;IGNORE # 410 <T,>
+<U0055> <u>;<BAS>;<CAP>;IGNORE # 412 U
+<U00DA> <u>;<ACA>;<CAP>;IGNORE # 413 Ú
+<U00D9> <u>;<GRA>;<CAP>;IGNORE # 414 Ù
+<U00DB> <u>;<CIR>;<CAP>;IGNORE # 415 Û
+<U00DC> <u>;<REU>;<CAP>;IGNORE # 416 Ü
+<U016C> <u>;<BRE>;<CAP>;IGNORE # 417 <U(>
+<U016E> <u>;<RNE>;<CAP>;IGNORE # 418 <U0>
+<U0170> <u>;<DAC>;<CAP>;IGNORE # 419 <U">
+<U0168> <u>;<TIL>;<CAP>;IGNORE # 420 <U?>
+<U0172> <u>;<OGO>;<CAP>;IGNORE # 421 <U;>
+<U016A> <u>;<MAC>;<CAP>;IGNORE # 422 <U->
+<U0056> <v>;<BAS>;<CAP>;IGNORE # 423 V
+<U0057> <w>;<BAS>;<CAP>;IGNORE # 424 W
+<U0174> <w>;<CIR>;<CAP>;IGNORE # 425 <W/>>
+<U0058> <x>;<BAS>;<CAP>;IGNORE # 426 X
+<U0059> <y>;<BAS>;<CAP>;IGNORE # 427 Y
+<U00DD> <y>;<ACA>;<CAP>;IGNORE # 428 Ý
+<U0176> <y>;<CIR>;<CAP>;IGNORE # 429 <Y/>>
+<U0178> <y>;<REU>;<CAP>;IGNORE # 430 <Y:>
+<U005A> <z>;<BAS>;<CAP>;IGNORE # 431 Z
+<U0179> <z>;<ACA>;<CAP>;IGNORE # 432 <Z'>
+<U017D> <z>;<CAR>;<CAP>;IGNORE # 433 <Z<>
+<U017B> <z>;<PCT>;<CAP>;IGNORE # 434 <Z.>
+<U00DE> <th>;<BAS>;<CAP>;IGNORE # 411 þ
+
+order_start <ARABINT>;forward;forward;forward;forward,position
+<U0660> <0>;<BAS>;<MIN>;IGNORE
+<U06F0> <0>;<PCL>;<MIN>;IGNORE
+<U0661> <1>;<BAS>;<MIN>;IGNORE
+<U06F1> <1>;<PCL>;<MIN>;IGNORE
+<U0662> <2>;<BAS>;<MIN>;IGNORE
+<U06F2> <2>;<PCL>;<MIN>;IGNORE
+<U0663> <3>;<BAS>;<MIN>;IGNORE
+<U06F3> <3>;<PCL>;<MIN>;IGNORE
+<U0664> <4>;<BAS>;<MIN>;IGNORE
+<U06F4> <4>;<PCL>;<MIN>;IGNORE
+<U0665> <5>;<BAS>;<MIN>;IGNORE
+<U06F5> <5>;<PCL>;<MIN>;IGNORE
+<U0666> <6>;<BAS>;<MIN>;IGNORE
+<U06F6> <6>;<PCL>;<MIN>;IGNORE
+<U0667> <7>;<BAS>;<MIN>;IGNORE
+<U06F7> <7>;<PCL>;<MIN>;IGNORE
+<U0668> <8>;<BAS>;<MIN>;IGNORE
+<U06F8> <8>;<PCL>;<MIN>;IGNORE
+<U0669> <9>;<BAS>;<MIN>;IGNORE
+<U06F9> <9>;<PCL>;<MIN>;IGNORE
+<U0621> <hamza>;<BAS>;<MIN>;IGNORE
+<U0622> <alef>;<AMA>;<MIN>;IGNORE
+<U0623> <alef>;<AHA>;<MIN>;IGNORE
+<U0625> <alef>;<AHS>;<MIN>;IGNORE
+<U0627> <alef>;<BAS>;<MIN>;IGNORE
+<U0628> <beh>;<BAS>;<MIN>;IGNORE
+<U067E> <peh>;<BAS>;<MIN>;IGNORE
+<U0629> <teh_marbuta>;<BAS>;<MIN>;IGNORE
+<U062A> <teh>;<BAS>;<MIN>;IGNORE
+<U0679> <tteh>;<BAS>;<MIN>;IGNORE
+<U062B> <theh>;<BAS>;<MIN>;IGNORE
+<U062C> <jeem>;<BAS>;<MIN>;IGNORE
+<U0686> <tcheh>;<BAS>;<MIN>;IGNORE
+<U062D> <hah>;<BAS>;<MIN>;IGNORE
+<U062E> <khah>;<BAS>;<MIN>;IGNORE
+<U062F> <dal>;<BAS>;<MIN>;IGNORE
+<U0688> <ddal>;<BAS>;<MIN>;IGNORE
+<U0630> <thal>;<BAS>;<MIN>;IGNORE
+<U0631> <reh>;<BAS>;<MIN>;IGNORE
+<U0691> <rreh>;<BAS>;<MIN>;IGNORE
+<U0632> <zain>;<BAS>;<MIN>;IGNORE
+<U0698> <jeh>;<BAS>;<MIN>;IGNORE
+<U0633> <seen>;<BAS>;<MIN>;IGNORE
+<U0634> <sheen>;<BAS>;<MIN>;IGNORE
+<U0635> <sad>;<BAS>;<MIN>;IGNORE
+<U0636> <dad>;<BAS>;<MIN>;IGNORE
+<U0637> <tah>;<BAS>;<MIN>;IGNORE
+<U0638> <zah>;<BAS>;<MIN>;IGNORE
+<U0639> <ain>;<BAS>;<MIN>;IGNORE
+<U063A> <ghain>;<BAS>;<MIN>;IGNORE
+<U0641> <feh>;<BAS>;<MIN>;IGNORE
+<U0642> <qaf>;<BAS>;<MIN>;IGNORE
+<U0643> <kaf>;<BAS>;<MIN>;IGNORE
+<U06A9> <keheh>;<BAS>;<MIN>;IGNORE
+<U06AF> <gaf>;<BAS>;<MIN>;IGNORE
+<U0644> <lam>;<BAS>;<MIN>;IGNORE
+<U0645> <meem>;<BAS>;<MIN>;IGNORE
+<U0646> <noon>>;<BAS>;<MIN>;IGNORE
+<U06BA> <noon_ghunna>;<BAS>;<MIN>;IGNORE
+<U0647> <heh>;<BAS>;<MIN>;IGNORE
+<U06C0> <heh_yeh>;<BAS>;<MIN>;IGNORE
+<U0624> <waw>;<AHW>;<MIN>;IGNORE
+<U0648> <waw>;<BAS>;<MIN>;IGNORE
+<U0649> <alef_maksura>;<BAS>;<MIN>;IGNORE
+<U0626> <alef_maksura><hamza>;<BAS><BAS>;<MIN><MIN>;IGNORE
+<U064A> <alef_maksura>;<AYE>;<MIN>;IGNORE
+<U06D3> <yeh_barree>;<YBA>;<MIN>;IGNORE
+<U06D2> <yeh_barree>;<BAS>;<MIN>;IGNORE
+
+order_start <ARABFOR>;backward;backward;backward;forward,position
+<UFE80> <hamza>;<BAS>;<AIS>;IGNORE
+<UFE81> <alef>;<AMA>;<AIS>;IGNORE
+<UFE82> <alef>;<AMA>;<AFI>;IGNORE
+<UFE83> <alef>;<AHA>;<AIS>;IGNORE
+<UFE84> <alef>;<AHA>;<AFI>;IGNORE
+<UFE87> <alef>;<AHS>;<AIS>;IGNORE
+<UFE88> <alef>;<AHS>;<AFI>;IGNORE
+<UFE8D> <alef>;<BAS>;<AIS>;IGNORE
+<UFE8E> <alef>;<BAS>;<AFI>;IGNORE
+<UFE8F> <beh>;<BAS>;<AIS>;IGNORE
+<UFE90> <beh>;<BAS>;<AFI>;IGNORE
+<UFE91> <beh>;<BAS>;<AII>;IGNORE
+<UFE92> <beh>;<BAS>;<AME>;IGNORE
+<UFB56> <peh>;<BAS>;<AIS>;IGNORE
+<UFB57> <peh>;<BAS>;<AFI>;IGNORE
+<UFB58> <peh>;<BAS>;<AII>;IGNORE
+<UFB59> <peh>;<BAS>;<AME>;IGNORE
+<UFE93> <teh_marbuta>;<BAS>;<AIS>;IGNORE
+<UFE94> <teh_marbuta>;<BAS>;<AFI>;IGNORE
+<UFE95> <teh>;<BAS>;<AIS>;IGNORE
+<UFE96> <teh>;<BAS>;<AFI>;IGNORE
+<UFE97> <teh>;<BAS>;<AII>;IGNORE
+<UFE98> <teh>;<BAS>;<AME>;IGNORE
+<UFB66> <tteh>;<BAS>;<AIS>;IGNORE
+<UFB67> <tteh>;<BAS>;<AFI>;IGNORE
+<UFB68> <tteh>;<BAS>;<AII>;IGNORE
+<UFB69> <tteh>;<BAS>;<AME>;IGNORE
+<UFE99> <theh>;<BAS>;<AIS>;IGNORE
+<UFE9A> <theh>;<BAS>;<AFI>;IGNORE
+<UFE9B> <theh>;<BAS>;<AII>;IGNORE
+<UFE9C> <theh>;<BAS>;<AME>;IGNORE
+<UFE9D> <jeem>;<BAS>;<AIS>;IGNORE
+<UFE9E> <jeem>;<BAS>;<AFI>;IGNORE
+<UFE9F> <jeem>;<BAS>;<AII>;IGNORE
+<UFEA0> <jeem>;<BAS>;<AME>;IGNORE
+<UFB7A> <tcheh>;<BAS>;<AIS>;IGNORE
+<UFB7B> <tcheh>;<BAS>;<AFI>;IGNORE
+<UFB7C> <tcheh>;<BAS>;<AII>;IGNORE
+<UFB7D> <tcheh>;<BAS>;<AME>;IGNORE
+<UFEA1> <hah>;<BAS>;<AIS>;IGNORE
+<UFEA2> <hah>;<BAS>;<AFI>;IGNORE
+<UFEA3> <hah>;<BAS>;<AII>;IGNORE
+<UFEA4> <hah>;<BAS>;<AME>;IGNORE
+<UFEA5> <khah>;<BAS>;<AIS>;IGNORE
+<UFEA6> <khah>;<BAS>;<AFI>;IGNORE
+<UFEA7> <khah>;<BAS>;<AII>;IGNORE
+<UFEA8> <khah>;<BAS>;<AME>;IGNORE
+<UFEA9> <dal>;<BAS>;<AIS>;IGNORE
+<UFEAA> <dal>;<BAS>;<AFI>;IGNORE
+<UFB88> <ddal>;<BAS>;<AIS>;IGNORE
+<UFB89> <ddal>;<BAS>;<AFI>;IGNORE
+<UFEAB> <thal>;<BAS>;<AIS>;IGNORE
+<UFEAC> <thal>;<BAS>;<AFI>;IGNORE
+<UFEAD> <reh>;<BAS>;<AIS>;IGNORE
+<UFEAE> <reh>;<BAS>;<AFI>;IGNORE
+<UFB8C> <rreh>;<BAS>;<AIS>;IGNORE
+<UFB8D> <rreh>;<BAS>;<AFI>;IGNORE
+<UFEAF> <zain>;<BAS>;<AIS>;IGNORE
+<UFEB0> <zain>;<BAS>;<AFI>;IGNORE
+<UFB8A> <jeh>;<BAS>;<AIS>;IGNORE
+<UFB8B> <jeh>;<BAS>;<AFI>;IGNORE
+<UFEB1> <seen>;<BAS>;<AIS>;IGNORE
+<UFEB2> <seen>;<BAS>;<AFI>;IGNORE
+<UFEB3> <seen>;<BAS>;<AII>;IGNORE
+<UFEB4> <seen>;<BAS>;<AME>;IGNORE
+<UFEB5> <sheen>;<BAS>;<AIS>;IGNORE
+<UFEB6> <sheen>;<BAS>;<AFI>;IGNORE
+<UFEB7> <sheen>;<BAS>;<AII>;IGNORE
+<UFEB8> <sheen>;<BAS>;<AME>;IGNORE
+<UFEB9> <sad>;<BAS>;<AIS>;IGNORE
+<UFEBA> <sad>;<BAS>;<AFI>;IGNORE
+<UFEBB> <sad>;<BAS>;<AII>;IGNORE
+<UFEBC> <sad>;<BAS>;<AME>;IGNORE
+<UFEBD> <dad>;<BAS>;<AIS>;IGNORE
+<UFEBE> <dad>;<BAS>;<AFI>;IGNORE
+<UFEBF> <dad>;<BAS>;<AII>;IGNORE
+<UFEC0> <dad>;<BAS>;<AME>;IGNORE
+<UFEC1> <tah>;<BAS>;<AIS>;IGNORE
+<UFEC2> <tah>;<BAS>;<AFI>;IGNORE
+<UFEC3> <tah>;<BAS>;<AII>;IGNORE
+<UFEC4> <tah>;<BAS>;<AME>;IGNORE
+<UFEC5> <zah>;<BAS>;<AIS>;IGNORE
+<UFEC6> <zah>;<BAS>;<AFI>;IGNORE
+<UFEC7> <zah>;<BAS>;<AII>;IGNORE
+<UFEC8> <zah>;<BAS>;<AME>;IGNORE
+<UFEC9> <ain>;<BAS>;<AIS>;IGNORE
+<UFECA> <ain>;<BAS>;<AFI>;IGNORE
+<UFECB> <ain>;<BAS>;<AII>;IGNORE
+<UFECC> <ain>;<BAS>;<AME>;IGNORE
+<UFECD> <ghain>;<BAS>;<AIS>;IGNORE
+<UFECE> <ghain>;<BAS>;<AFI>;IGNORE
+<UFECF> <ghain>;<BAS>;<AII>;IGNORE
+<UFED0> <ghain>;<BAS>;<AME>;IGNORE
+<UFED1> <feh>;<BAS>;<AIS>;IGNORE
+<UFED2> <feh>;<BAS>;<AFI>;IGNORE
+<UFED3> <feh>;<BAS>;<AII>;IGNORE
+<UFED4> <feh>;<BAS>;<AME>;IGNORE
+<UFED5> <qaf>;<BAS>;<AIS>;IGNORE
+<UFED6> <qaf>;<BAS>;<AFI>;IGNORE
+<UFED7> <qaf>;<BAS>;<AII>;IGNORE
+<UFED8> <qaf>;<BAS>;<AME>;IGNORE
+<UFED9> <kaf>;<BAS>;<AIS>;IGNORE
+<UFEDA> <kaf>;<BAS>;<AFI>;IGNORE
+<UFEDB> <kaf>;<BAS>;<AII>;IGNORE
+<UFEDC> <kaf>;<BAS>;<AME>;IGNORE
+<UFB8E> <keheh>;<BAS>;<AIS>;IGNORE
+<UFB8F> <keheh>;<BAS>;<AFI>;IGNORE
+<UFB90> <keheh>;<BAS>;<AII>;IGNORE
+<UFB91> <keheh>;<BAS>;<AME>;IGNORE
+<UFB92> <gaf>;<BAS>;<AIS>;IGNORE
+<UFB93> <gaf>;<BAS>;<AFI>;IGNORE
+<UFB94> <gaf>;<BAS>;<AII>;IGNORE
+<UFB95> <gaf>;<BAS>;<AME>;IGNORE
+<UFEDD> <lam>;<BAS>;<AIS>;IGNORE
+<UFEDE> <lam>;<BAS>;<AFI>;IGNORE
+<UFEDF> <lam>;<BAS>;<AII>;IGNORE
+<UFEE0> <lam>;<BAS>;<AME>;IGNORE
+<UFEE1> <meem>;<BAS>;<AIS>;IGNORE
+<UFEE2> <meem>;<BAS>;<AFI>;IGNORE
+<UFEE3> <meem>;<BAS>;<AII>;IGNORE
+<UFEE4> <meem>;<BAS>;<AME>;IGNORE
+<UFEE5> <noon>;<BAS>;<AIS>;IGNORE
+<UFEE6> <noon>;<BAS>;<AFI>;IGNORE
+<UFEE7> <noon>;<BAS>;<AII>;IGNORE
+<UFEE8> <noon>;<BAS>;<AME>;IGNORE
+<UFB9E> <noon_ghunna>;<BAS>;<AIS>;IGNORE
+<UFB9F> <noon_ghunna>;<BAS>;<AFI>;IGNORE
+<UFEE9> <heh>;<BAS>;<AIS>;IGNORE <UFEEA> <heh>;<BAS>;<AFI>;IGNORE
+<UFEEB> <heh>;<BAS>;<AII>;IGNORE <UFEEC> <heh>;<BAS>;<AME>;IGNORE
+<UFBA4> <heh_yeh>;<BAS>;<AIS>;IGNORE <UFBA5> <heh_yeh>;<BAS>;<AFI>;IGNORE
+<UFE85> <waw>;<AHW>;<AIS>;IGNORE <UFE86> <waw>;<AHW>;<AFI>;IGNORE
+<UFEED> <waw>;<BAS>;<AIS>;IGNORE <UFEEE> <waw>;<BAS>;<AFI>;IGNORE
+<UFEEF> <alef_maksura>;<BAS>;<AIS>;IGNORE
+<UFEF0> <alef_maksura>;<BAS>;<AFI>;IGNORE
+<UFE89> <alef_maksura><hamza>;<BAS><BAS>;<AIS><AIS>;IGNORE
+<UFE8A> <alef_maksura><hamza>;<BAS><BAS>;<AFI><AIS>;IGNORE
+<UFE8B> <alef_maksura><hamza>;<BAS><BAS>;<AII><AIS>;IGNORE
+<UFE8C> <alef_maksura><hamza>;<BAS><BAS>;<AME><AIS>;IGNORE
+<UFEF1> <alef_maksura>;<AYE>;<AIS>;IGNORE
+<UFEF2> <alef_maksura>;<AYE>;<AFI>;IGNORE
+<UFEF3> <alef_maksura>;<AYE>;<AII>;IGNORE
+<UFEF4> <alef_maksura>;<AYE>;<AME>;IGNORE
+<UFBB0> <yeh_barree>;<YBA>;<AIS>;IGNORE
+<UFBB1> <yeh_barree>;<YBA>;<AFI>;IGNORE
+<UFBAE> <yeh_barree>;<BAS>;<AIS>;IGNORE
+<UFBAF> <yeh_barree>;<BAS>;<AFI>;IGNORE
+<UFEF5> <lam><alef>;<BAS><AMA>;<AIS><AFI>;IGNORE
+<UFEF6> <lam><alef>;<BAS><AMA>;<AFI>;<AFI>;IGNORE
+<UFEF7> <lam><alef>;<BAS><AHA>;<AIS>;<AFI>;IGNORE
+<UFEF8> <lam><alef>;<BAS><AHA>;<AFI>;<AFI>;IGNORE
+<UFEF9> <lam><alef>;<BAS><AHS>;<AIS>;<AFI>;IGNORE
+<UFEFA> <lam><alef>;<BAS><AHS>;<AFI><AFI>;IGNORE
+<UFEFB> <lam><alef>;<BAS><BAS>;<AIS><AFI>;IGNORE
+<UFEFC> <lam><alef>;<BAS><BAS>;<AFI><AFI>;IGNORE
+
+order_start <HEBREU>;forward;forward;forward;forward,position
+<U05D0> <alef>;<BAS>;IGNORE;IGNORE
+<U05D1> <bet>;<BAS>;IGNORE;IGNORE
+<U05D2> <gimel>;<BAS>;IGNORE;IGNORE
+<U05D3> <dalet>;<BAS>;IGNORE;IGNORE
+<U05D4> <he>;<BAS>;IGNORE;IGNORE
+<U05D5> <vav>;<BAS>;IGNORE;IGNORE
+<U05D6> <zayin>;<BAS>;IGNORE;IGNORE
+<U05D7> <het>;<BAS>;IGNORE;IGNORE
+<U05D8> <tet>;<BAS>;IGNORE;IGNORE
+<U05D9> <yod>;<BAS>;IGNORE;IGNORE
+<U05DA> <kaf_fin>;<BAS>;IGNORE;IGNORE
+<U05DB> <kaf>;<BAS>;IGNORE;IGNORE
+<U05DC> <lamed>;<BAS>;IGNORE;IGNORE
+<U05DD> <mem_fin>;<BAS>;IGNORE;IGNORE
+<U05DE> <mem>;<BAS>;IGNORE;IGNORE
+<U05DF> <nun_fin>;<BAS>;IGNORE;IGNORE
+<U05E0> <nun>;<BAS>;IGNORE;IGNORE
+<U05E1> <samekh>;<BAS>;IGNORE;IGNORE
+<U05E2> <ayin>;<BAS>;IGNORE;IGNORE
+<U05E3> <pe_fin>;<BAS>;IGNORE;IGNORE
+<U05E4> <pe>;<BAS>;IGNORE;IGNORE
+<U05E5> <tsadi_fin>;<BAS>;IGNORE;IGNORE
+<U05E6> <tsadi>;<BAS>;IGNORE;IGNORE
+<U05E7> <qof>;<BAS>;IGNORE;IGNORE
+<U05E8> <resh>;<BAS>;IGNORE;IGNORE
+<U05E9> <shin>;<BAS>;IGNORE;IGNORE
+<U05EA> <tav>;<BAS>;IGNORE;IGNORE
+
+order_start <GREC>;forward;backward;forward
+<U0391> <ALPHA>;<BAS>;<CAP>;IGNORE
+<U03B1> <ALPHA>;<BAS>;<AMI>;IGNORE
+<U0386> <ALPHA>;<TNS>;<CAP>;IGNORE
+<U03AC> <ALPHA>;<TNS>;<AMI>;IGNORE
+<U0392> <BETA>;<BAS>;<CAP>;IGNORE
+<U03B2> <BETA>;<BAS>;<AMI>;IGNORE
+<U03D0> <BETA>;<PCL>;<AMI>;IGNORE
+<U0393> <GAMMA>;<BAS>;<CAP>;IGNORE
+<U03B3> <GAMMA>;<BAS>;<AMI>;IGNORE
+<U03DC> <GAMMA>;<PCL>;<CAP>;IGNORE # digamma copte
+<U0394> <DELTA>;<BAS>;<CAP>;IGNORE
+<U03B4> <DELTA>;<BAS>;<AMI>;IGNORE
+<U03EA> <DELTA>;<PCL>;<CAP>;IGNORE # GANGIA COPTE
+<U03EB> <DELTA>;<BAS>;<AMI>;IGNORE # gangia copte
+<U0395> <EPSILON>;<BAS>;<CAP>;IGNORE
+<U03B5> <EPSILON>;<BAS>;<AMI>;IGNORE
+<U0388> <EPSILON>;<TNS>;<CAP>;IGNORE
+<U03AD> <EPSILON>;<TNS>;<AMI>;IGNORE
+<U0396> <ZETA>;<BAS>;<CAP>;IGNORE
+<U03B6> <ZETA>;<BAS>;<AMI>;IGNORE
+<U03E8> <ZETA>;<PCL>;<CAP>;IGNORE # HORI COPTE
+<U03E9> <ZETA>;<PCL>;<AMI>;IGNORE # hori copte
+<U0397> <ETA>;<BAS>;<CAP>;IGNORE
+<U03B7> <ETA>;<BAS>;<AMI>;IGNORE
+<U0389> <ETA>;<TNS>;<CAP>;IGNORE
+<U03AE> <ETA>;<TNS>;<AMI>;IGNORE
+<U0398> <THETA>;<BAS>;<CAP>;IGNORE
+<U03B8> <THETA>;<BAS>;<AMI>;IGNORE
+<U03D1> <THETA>;<PCL>;<AMI>;IGNORE
+<U0399> <IOTA>;<BAS>;<CAP>;IGNORE
+<U03B9> <IOTA>;<BAS>;<AMI>;IGNORE
+<U038A> <IOTA>;<TNS>;<CAP>;IGNORE
+<U03AF> <IOTA>;<TNS>;<AMI>;IGNORE
+<U03AA> <IOTA>;<DLT>;<CAP>;IGNORE
+<U03CA> <IOTA>;<DLT>;<AMI>;IGNORE
+<U0390> <IOTA>;<DTT>;<AMI>;IGNORE
+<U03F3> <IOTA>;<OGO>;<AMI>;IGNORE # yot
+<U039A> <KAPPA>;<BAS>;<CAP>;IGNORE
+<U03BA> <KAPPA>;<BAS>;<AMI>;IGNORE
+<U03DE> <KAPPA>;<PCL>;<CAP>;IGNORE # koppa copte
+<U03F0> <KAPPA>;<PCL>;<AMI>;IGNORE
+<U03E6> <KAPPA>;<LIG>;<CAP>;IGNORE # KHEI COPTE
+<U03E7> <KAPPA>;<LIG>;<AMI>;IGNORE # khei copte
+<U039B> <LAMBDA>;<BAS>;<CAP>;IGNORE
+<U03BB> <LAMBDA>;<BAS>;<CAP>;IGNORE
+<U039C> <MU>;<BAS>;<CAP>;IGNORE
+<U03BC> <MU>;<BAS>;<AMI>;IGNORE
+<U039D> <NU>;<BAS>;<CAP>;IGNORE
+<U03BD> <NU>;<BAS>;<AMI>;IGNORE
+<U039E> <XI>;<BAS>;<CAP>;IGNORE
+<U03BE> <XI>;<BAS>;<AMI>;IGNORE
+<U039F> <OMICRON>;<BAS>;<CAP>;IGNORE
+<U03BF> <OMICRON>;<BAS>;<AMI>;IGNORE
+<U038C> <OMICRON>;<TNS>;<CAP>;IGNORE
+<U03CC> <OMICRON>;<TNS>;<AMI>;IGNORE
+<U03A0> <PI>;<BAS>;<CAP>;IGNORE
+<U03C0> <PI>;<BAS>;<AMI>;IGNORE
+<U03D6> <PI>;<PCL>;<AMI>;IGNORE
+<U03A1> <RHO>;<BAS>;<CAP>;IGNORE
+<U03C1> <RHO>;<BAS>;<CAP>;IGNORE
+<U03F1> <RHO>;<PCL>;<AMI>;IGNORE
+<U03A3> <SIGMA>;<BAS>;<CAP>;IGNORE
+<U03C3> <SIGMA>;<BAS>;<AMI>;IGNORE
+<U03C2> <SIGMA>;<PCL>;<AMI>;IGNORE
+<U03DA> <SIGMA>;<PCL>;<CAP>;IGNORE # STIGMA ARCH.
+<U03EC> <SIGMA>;<LIG>;<CAP>;IGNORE # SHIMA COPTE
+<U03ED> <SIGMA>;<LIG>;<AMI>;IGNORE # shima copte
+<U03F2> <SIGMA>;<OGO>;<AMI>;IGNORE
+<U03A4> <TAU>;<BAS>;<CAP>;IGNORE
+<U03C4> <TAU>;<BAS>;<AMI>;IGNORE
+<U03EE> <TAU>;<PCL>;<CAP>;IGNORE # DEI COPTE
+<U03EF> <TAU>;<PCL>;<AMI>;IGNORE # dei copte
+<U03A5> <UPSILON>;<BAS>;<CAP>;IGNORE
+<U03C5> <UPSILON>;<BAS>;<AMI>;IGNORE
+<U038E> <UPSILON>;<TNS>;<CAP>;IGNORE
+<U03CD> <UPSILON>;<TNS>;<AMI>;IGNORE
+<U03AB> <UPSILON>;<DLT>;<CAP>;IGNORE
+<U03CB> <UPSILON>;<DLT>;<AMI>;IGNORE
+<U03B0> <UPSILON>;<DTT>;<AMI>;IGNORE
+<U03D4> <UPSILON>;<DTT>;<CAP>;IGNORE
+<U03D2> <UPSILON>;<OGO>;<CAP>;IGNORE
+<U03D3> <UPSILON>;<MAC>;<CAP>;IGNORE
+<U03A6> <PHI>;<BAS>;<CAP>;IGNORE
+<U03C6> <PHI>;<BAS>;<AMI>;IGNORE
+<U03D5> <PHI>;<PCL>;<AMI>;IGNORE
+<U03E4> <PHI>;<LIG>;<CAP>;IGNORE # FEI COPTE
+<U03E5> <PHI>;<LIG>;<AMI>;IGNORE # fei copte
+<U03A7> <KHI>;<BAS>;<CAP>;IGNORE
+<U03C7> <KHI>;<BAS>;<AMI>;IGNORE
+<U03E0> <KHI>;<PCL>;<CAP>;IGNORE # sampi copte
+<U03A8> <PSI>;<BAS>;<CAP>;IGNORE
+<U03C8> <PSI>;<BAS>;<AMI>;IGNORE
+<U03E2> <PSI>;<PCL>;<CAP>;IGNORE # SHEI COPTE
+<U03E3> <PSI>;<PCL>;<AMI>;IGNORE # shei copte
+<U03A9> <OMEGA>;<BAS>;<CAP>;IGNORE
+<U03C9> <OMEGA>;<BAS>;<AMI>;IGNORE
+<U038F> <OMEGA>;<TNS>;<CAP>;IGNORE
+<U03CE> <OMEGA>;<TNS>;<AMI>;IGNORE
+
+order_start <CYRIL>;forward;forward;forward;forward,position
+<U0430> <CYR-A>;<BAS>;<MIN>;IGNORE
+<U0410> <CYR-A>;<BAS>;<CAP>;IGNORE
+<U0431> <CYR-BE>;<BAS>;<MIN>;IGNORE
+<U0411> <CYR-BE>;<BAS>;<CAP>;IGNORE
+<U0432> <CYR-VE>;<BAS>;<MIN>;IGNORE
+<U0412> <CYR-VE>;<BAS>;<CAP>;IGNORE
+<U0433> <CYR-GHE>;<BAS>;<MIN>;IGNORE
+<U0413> <CYR-GHE>;<BAS>;<CAP>;IGNORE
+<U0434> <CYR-DE>;<BAS>;<MIN>;IGNORE
+<U0414> <CYR-DE>;<BAS>;<CAP>;IGNORE
+<U0453> <CYR-GZHE>;<BAS>;<MIN>;IGNORE
+<U0403> <CYR-GZHE>;<BAS>;<CAP>;IGNORE
+<U0452> <CYR-DJE>;<BAS>;<MIN>;IGNORE
+<U0402> <CYR-DJE>;<BAS>;<CAP>;IGNORE
+<U0435> <CYR-IE>;<BAS>;<MIN>;IGNORE
+<U0415> <CYR-IE>;<BAS>;<CAP>;IGNORE
+<U0454> <UKR-IE>;<BAS>;<MIN>;IGNORE
+<U0404> <UKR-IE>;<BAS>;<CAP>;IGNORE
+<U0451> <CYR-IO>;<BAS>;<MIN>;IGNORE
+<U0401> <CYR-IO>;<BAS>;<CAP>;IGNORE
+<U0436> <CYR-ZHE>;<BAS>;<MIN>;IGNORE
+<U0416> <CYR-ZHE>;<BAS>;<CAP>;IGNORE
+<U0437> <CYR-ZE>;<BAS>;<MIN>;IGNORE
+<U0417> <CYR-ZE>;<BAS>;<CAP>;IGNORE
+<U0455> <CYR-DZE>;<BAS>;<MIN>;IGNORE
+<U0405> <CYR-DZE>;<BAS>;<CAP>;IGNORE
+<U0438> <CYR-I>;<BAS>;<MIN>;IGNORE
+<U0418> <CYR-I>;<BAS>;<CAP>;IGNORE
+<U0456> <UKR-I>;<BAS>;<MIN>;IGNORE
+<U0406> <UKR-I>;<BAS>;<MIN>;IGNORE
+<U0457> <UKR-YI>;<BAS>;<MIN>;IGNORE
+<U0407> <UKR-YI>;<BAS>;<CAP>;IGNORE
+<U0439> <CYR-IBRE>;<BAS>;<MIN>;IGNORE
+<U0419> <CYR-IBRE>;<BAS>;<CAP>;IGNORE
+<U0458> <CYR-JE>;<BAS>;<MIN>;IGNORE
+<U0408> <CYR-JE>;<BAS>;<CAP>;IGNORE
+<U043A> <CYR-KA>;<BAS>;<MIN>;IGNORE
+<U041A> <CYR-KA>;<BAS>;<CAP>;IGNORE
+<U043B> <CYR-EL>;<BAS>;<MIN>;IGNORE
+<U041B> <CYR-EL>;<BAS>;<CAP>;IGNORE
+<U0459> <CYR-LJE>;<BAS>;<MIN>;IGNORE
+<U0409> <CYR-LJE>;<BAS>;<CAP>;IGNORE
+<U043C> <CYR-EM>;<BAS>;<MIN>;IGNORE
+<U041C> <CYR-EM>;<BAS>;<CAP>;IGNORE
+<U043D> <CYR-EN>;<BAS>;<MIN>;IGNORE
+<U041D> <CYR-EN>;<BAS>;<CAP>;IGNORE
+<U045A> <CYR-NJE>;<BAS>;<MIN>;IGNORE
+<U040A> <CYR-NJE>;<BAS>;<CAP>;IGNORE
+<U043E> <CYR-O>;<BAS>;<MIN>;IGNORE
+<U041E> <CYR-O>;<BAS>;<CAP>;IGNORE
+<U043F> <CYR-PE>;<BAS>;<MIN>;IGNORE
+<U041F> <CYR-PE>;<BAS>;<CAP>;IGNORE
+<U0440> <CYR-ER>;<BAS>;<MIN>;IGNORE
+<U0420> <CYR-ER>;<BAS>;<CAP>;IGNORE
+<U0441> <CYR-ES>;<BAS>;<MIN>;IGNORE
+<U0421> <CYR-ES>;<BAS>;<CAP>;IGNORE
+<U0442> <CYR-TE>;<BAS>;<MIN>;IGNORE
+<U0422> <CYR-TE>;<BAS>;<CAP>;IGNORE
+<U045C> <CYR-KJE>;<BAS>;<MIN>;IGNORE
+<U040C> <CYR-KJE>;<BAS>;<CAP>;IGNORE
+<U045B> <CYR-TSHE>;<BAS>;<MIN>;IGNORE
+<U040B> <CYR-TSHE>;<BAS>;<CAP>;IGNORE
+<U0443> <CYR-OU>;<BAS>;<MIN>;IGNORE
+<U0423> <CYR-OU>;<BAS>;<CAP>;IGNORE
+<U045E> <CYR-OUBRE>;<BAS>;<MIN>;IGNORE
+<U040E> <CYR-OUBRE>;<BAS>;<CAP>;IGNORE
+<U0444> <CYR-EF>;<BAS>;<MIN>;IGNORE
+<U0424> <CYR-EF>;<BAS>;<CAP>;IGNORE
+<U0445> <CYR-HA>;<BAS>;<MIN>;IGNORE
+<U0425> <CYR-HA>;<BAS>;<CAP>;IGNORE
+<U0446> <CYR-TSE>;<BAS>;<MIN>;IGNORE
+<U0426> <CYR-TSE>;<BAS>;<CAP>;IGNORE
+<U0447> <CYR-TSHE>;<BAS>;<MIN>;IGNORE
+<U0427> <CYR-TSHE>;<BAS>;<CAP>;IGNORE
+<U045F> <CYR-DCHE>;<BAS>;<MIN>;IGNORE
+<U040F> <CYR-DCHE>;<BAS>;<CAP>;IGNORE
+<U0448> <CYR-SHA>;<BAS>;<MIN>;IGNORE
+<U0428> <CYR-SHA>;<BAS>;<CAP>;IGNORE
+<U0449> <CYR-SHTSHA>;<BAS>;<MIN>;IGNORE
+<U0429> <CYR-SHTSHA>;<BAS>;<CAP>;IGNORE
+<U044A> <CYR-SIGDUR>;<BAS>;<MIN>;IGNORE
+<U042A> <CYR-SIGDUR>;<BAS>;<CAP>;IGNORE
+<U044B> <CYR-YEROU>;<BAS>;<MIN>;IGNORE
+<U042B> <CYR-YEROU>;<BAS>;<CAP>;IGNORE
+<U044C> <CYR-SIGMOUIL>;<BAS>;<MIN>;IGNORE
+<U042C> <CYR-SIGMOUIL>;<BAS>;<CAP>;IGNORE
+<U044D> <CYR-E>;<BAS>;<MIN>;IGNORE
+<U042D> <CYR-E>;<BAS>;<CAP>;IGNORE
+<U044E> <CYR-YOU>;<BAS>;<MIN>;IGNORE
+<U042E> <CYR-YOU>;<BAS>;<CAP>;IGNORE
+<U044F> <CYR-YA>;<BAS>;<MIN>;IGNORE
+<U042F> <CYR-YA>;<BAS>;<CAP>;IGNORE
+
+order_start <HAN>;forward;forward;forward;forward,position
+<U4E00>......<U9FA5> <U4E00>......<U9FA5>;IGNORE;IGNORE;IGNORE
+#
+order_end
+#
+END LC_COLLATE
diff --git a/posix/fnmatch.c b/posix/fnmatch.c
index 18abf5da27..c4b11080fe 100644
--- a/posix/fnmatch.c
+++ b/posix/fnmatch.c
@@ -48,6 +48,15 @@
# include <wctype.h>
#endif
+/* We need some of the locale data (the collation sequence information)
+ but there is no interface to get this information in general. Therefore
+ we support a correct implementation only in glibc. */
+#ifdef _LIBC
+# include "../locale/localeinfo.h"
+
+# define CONCAT(a,b) __CONCAT(a,b)
+#endif
+
/* Comment out all this code if we are using the GNU C Library, and are not
actually compiling the library itself. This code is part of the GNU C
Library, but also included in many other GNU distributions. Compiling
@@ -192,6 +201,7 @@ __wcschrnul (s, c)
# define STRCHR(S, C) strchr (S, C)
# define STRCHRNUL(S, C) __strchrnul (S, C)
# define STRCOLL(S1, S2) strcoll (S1, S2)
+# define SUFFIX MB
# include "fnmatch_loop.c"
@@ -209,7 +219,10 @@ __wcschrnul (s, c)
# define BTOWC(C) (C)
# define STRCHR(S, C) wcschr (S, C)
# define STRCHRNUL(S, C) __wcschrnul (S, C)
-# define STRCOLL(S1, S2) wcscoll (S1, S2)
+# define STRCOLL(S1, S2) wcscoll (S1, S2)
+# define SUFFIX WC
+# define WIDE_CHAR_VERSION 1
+
# undef IS_CHAR_CLASS
# ifdef _LIBC
diff --git a/posix/fnmatch_loop.c b/posix/fnmatch_loop.c
index 5f6c05710e..831bd0631a 100644
--- a/posix/fnmatch_loop.c
+++ b/posix/fnmatch_loop.c
@@ -31,6 +31,16 @@ FCT (pattern, string, no_leading_period, flags)
{
register const CHAR *p = pattern, *n = string;
register UCHAR c;
+#ifdef _LIBC
+ const UCHAR *collseq = (const UCHAR *)
+ _NL_CURRENT(LC_COLLATE, CONCAT(_NL_COLLATE_COLLSEQ,SUFFIX));
+# ifdef WIDE_CHAR_VERSION
+ const wint_t *names = (const wint_t *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_NAMES);
+ size_t size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_HASH_SIZE);
+ size_t layers = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_HASH_LAYERS);
+# endif
+#endif
while ((c = *p++) != L('\0'))
{
@@ -210,9 +220,9 @@ FCT (pattern, string, no_leading_period, flags)
/* Leave room for the null. */
CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
size_t c1 = 0;
-# if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
+#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
wctype_t wt;
-# endif
+#endif
const CHAR *startp = p;
for (;;)
@@ -240,7 +250,7 @@ FCT (pattern, string, no_leading_period, flags)
}
str[c1] = L('\0');
-# if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
+#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
wt = IS_CHAR_CLASS (str);
if (wt == 0)
/* Invalid character class name. */
@@ -248,7 +258,7 @@ FCT (pattern, string, no_leading_period, flags)
if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
goto matched;
-# else
+#else
if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
|| (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
|| (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n))
@@ -262,7 +272,7 @@ FCT (pattern, string, no_leading_period, flags)
|| (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n))
|| (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n)))
goto matched;
-# endif
+#endif
}
else if (c == L('\0'))
/* [ (unterminated) loses. */
@@ -279,27 +289,117 @@ FCT (pattern, string, no_leading_period, flags)
if (c == L('-') && *p != L(']'))
{
- /* It is a range. */
- CHAR lo[2];
- CHAR fc[2];
+#if _LIBC
+ /* We have to find the collation sequence
+ value for C. Collation sequence is nothing
+ we can regularly access. The sequence
+ value is defined by the order in which the
+ definitions of the collation values for the
+ various characters appear in the source
+ file. A strange concept, nowhere
+ documented. */
+ int32_t fseqidx;
+ int32_t lseqidx;
UCHAR cend = *p++;
+# ifdef WIDE_CHAR_VERSION
+ size_t cnt;
+# endif
+
if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
cend = *p++;
if (cend == L('\0'))
return FNM_NOMATCH;
- lo[0] = cold;
- lo[1] = L('\0');
- fc[0] = fn;
- fc[1] = L('\0');
- if (STRCOLL (lo, fc) <= 0)
+# ifdef WIDE_CHAR_VERSION
+ /* Search in the `names' array for the characters. */
+ fseqidx = fn % size;
+ cnt = 0;
+ while (names[fseqidx] != fn)
{
- CHAR hi[2];
- hi[0] = FOLD (cend);
- hi[1] = L('\0');
- if (STRCOLL (fc, hi) <= 0)
+ if (++cnt == layers)
+ /* XXX We don't know anything about
+ the character we are supposed to
+ match. This means we are failing. */
+ goto range_not_matched;
+
+ fseqidx += size;
+ }
+ lseqidx = cold % size;
+ cnt = 0;
+ while (names[lseqidx] != cold)
+ {
+ if (++cnt == layers)
+ {
+ lseqidx = -1;
+ break;
+ }
+ lseqidx += size;
+ }
+# else
+ fseqidx = fn;
+ lseqidx = cold;
+# endif
+
+ /* XXX It is not entirely clear to me how to handle
+ characters which are not mentioned in the
+ collation specification. */
+ if (
+# ifdef WIDE_CHAR_VERSION
+ lseqidx == -1 ||
+# endif
+ collseq[lseqidx] <= collseq[fseqidx])
+ {
+ /* We have to look at the upper bound. */
+ int32_t hseqidx;
+
+ cend = FOLD (cend);
+# ifdef WIDE_CHAR_VERSION
+ hseqidx = cend % size;
+ cnt = 0;
+ while (names[hseqidx] != cend)
+ {
+ if (++cnt == layers)
+ {
+ /* Hum, no information about the upper
+ bound. The matching succeeds if the
+ lower bound is matched exactly. */
+ if (lseqidx == -1 || cold != fn)
+ goto range_not_matched;
+
+ goto matched;
+ }
+ }
+# else
+ hseqidx = cend;
+# endif
+
+ if (
+# ifdef WIDE_CHAR_VERSION
+ (lseqidx == -1
+ && collseq[fseqidx] == collseq[hseqidx]) ||
+# endif
+ collseq[fseqidx] <= collseq[hseqidx])
goto matched;
}
+# ifdef WIDE_CHAR_VERSION
+ range_not_matched:
+# endif
+#else
+ /* We use a boring value comparison of the character
+ values. This is better than comparing using
+ `strcoll' since the latter would have surprising
+ and sometimes fatal consequences. */
+ UCHAR cend = *p++;
+
+ if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
+ cend = *p++;
+ if (cend == L('\0'))
+ return FNM_NOMATCH;
+
+ /* It is a range. */
+ if (cold <= fc && fc <= c)
+ goto matched;
+#endif
c = *p++;
}
@@ -371,3 +471,4 @@ FCT (pattern, string, no_leading_period, flags)
#undef STRCOLL
#undef L
#undef BTOWC
+#undef SUFFIX