aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2000-05-24 20:22:51 +0000
committerUlrich Drepper <drepper@redhat.com>2000-05-24 20:22:51 +0000
commitacb5ee2e561276d64c6e26ef4b82f59a4db5ae90 (patch)
tree1f7ebfcaf8bf2874ae5cdb6348205dccfd9499c2
parentb7cbee1cb029f6471aa069552a69f04a3d1b4d70 (diff)
downloadglibc-acb5ee2e561276d64c6e26ef4b82f59a4db5ae90.tar.xz
glibc-acb5ee2e561276d64c6e26ef4b82f59a4db5ae90.zip
Update.
2000-05-24 Ulrich Drepper <drepper@redhat.com> * locale/programs/ld-collate.c (struct element_t): Add mbseqorder and wcseqorder members. (struct locale_collate_t): Likewise. (collate_finish): Assign collation sequence value to each character. Create tables for output. (collate_output): Write out tables with collation sequence information. * locale/C-collate.c: Provide C locale data for collation sequence table. * locale/langinfo.h: Add _NL_COLLATE_COLLSEQMB and _NL_COLLATE_COLLSEQWC. * locale/categories.def: Add entries for _NL_COLLATE_COLLSEQMB and _NL_COLLATE_COLLSEQWC. * posix/fnmatch.c: Define SUFFIX and WIDE_CHAR_VERSION before include fnmatch_loop.c. * posix/fnmatch_loop.c: Don't use strcoll while determining whether character is matched by range expression. Use collation sequence table. Outside glibc fall back on simple character value comparison.
-rw-r--r--ChangeLog20
-rw-r--r--locale/C-collate.c82
-rw-r--r--locale/categories.def2
-rw-r--r--locale/langinfo.h2
-rw-r--r--localedata/ChangeLog4
-rw-r--r--localedata/locales/iso14651_t11406
-rw-r--r--posix/fnmatch.c15
-rw-r--r--posix/fnmatch_loop.c135
8 files changed, 1644 insertions, 22 deletions
diff --git a/ChangeLog b/ChangeLog
index 9440cba153..5ce40be794 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,23 @@
+2000-05-24 Ulrich Drepper <drepper@redhat.com>
+
+ * locale/programs/ld-collate.c (struct element_t): Add mbseqorder
+ and wcseqorder members.
+ (struct locale_collate_t): Likewise.
+ (collate_finish): Assign collation sequence value to each character.
+ Create tables for output.
+ (collate_output): Write out tables with collation sequence information.
+ * locale/C-collate.c: Provide C locale data for collation sequence
+ table.
+ * locale/langinfo.h: Add _NL_COLLATE_COLLSEQMB and
+ _NL_COLLATE_COLLSEQWC.
+ * locale/categories.def: Add entries for _NL_COLLATE_COLLSEQMB and
+ _NL_COLLATE_COLLSEQWC.
+ * posix/fnmatch.c: Define SUFFIX and WIDE_CHAR_VERSION before
+ include fnmatch_loop.c.
+ * posix/fnmatch_loop.c: Don't use strcoll while determining whether
+ character is matched by range expression. Use collation sequence
+ table. Outside glibc fall back on simple character value comparison.
+
2000-05-24 Andreas Jaeger <aj@suse.de>
* sysdeps/mips/elf/start.S (ENTRY_POINT): Align stack for double
diff --git a/locale/C-collate.c b/locale/C-collate.c
index 679ed30871..0ad0efe271 100644
--- a/locale/C-collate.c
+++ b/locale/C-collate.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1995, 1996, 1997, 1999 Free Software Foundation, Inc.
+/* Copyright (C) 1995, 1996, 1997, 1999, 2000 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1995.
@@ -20,12 +20,84 @@
#include <endian.h>
#include "localeinfo.h"
+static const char collseqmb[] =
+{
+ '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
+ '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
+ '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
+ '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
+ '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
+ '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
+ '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
+ '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
+ '\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47',
+ '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f',
+ '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57',
+ '\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
+ '\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67',
+ '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f',
+ '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77',
+ '\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f',
+ '\x80', '\x81', '\x82', '\x83', '\x84', '\x85', '\x86', '\x87',
+ '\x88', '\x89', '\x8a', '\x8b', '\x8c', '\x8d', '\x8e', '\x8f',
+ '\x90', '\x91', '\x92', '\x93', '\x94', '\x95', '\x96', '\x97',
+ '\x98', '\x99', '\x9a', '\x9b', '\x9c', '\x9d', '\x9e', '\x9f',
+ '\xa0', '\xa1', '\xa2', '\xa3', '\xa4', '\xa5', '\xa6', '\xa7',
+ '\xa8', '\xa9', '\xaa', '\xab', '\xac', '\xad', '\xae', '\xaf',
+ '\xb0', '\xb1', '\xb2', '\xb3', '\xb4', '\xb5', '\xb6', '\xb7',
+ '\xb8', '\xb9', '\xba', '\xbb', '\xbc', '\xbd', '\xbe', '\xbf',
+ '\xc0', '\xc1', '\xc2', '\xc3', '\xc4', '\xc5', '\xc6', '\xc7',
+ '\xc8', '\xc9', '\xca', '\xcb', '\xcc', '\xcd', '\xce', '\xcf',
+ '\xd0', '\xd1', '\xd2', '\xd3', '\xd4', '\xd5', '\xd6', '\xd7',
+ '\xd8', '\xd9', '\xda', '\xdb', '\xdc', '\xdd', '\xde', '\xdf',
+ '\xe0', '\xe1', '\xe2', '\xe3', '\xe4', '\xe5', '\xe6', '\xe7',
+ '\xe8', '\xe9', '\xea', '\xeb', '\xec', '\xed', '\xee', '\xef',
+ '\xf0', '\xf1', '\xf2', '\xf3', '\xf4', '\xf5', '\xf6', '\xf7',
+ '\xf8', '\xf9', '\xfa', '\xfb', '\xfc', '\xfd', '\xfe', '\xff'
+};
+
+static const uint32_t collseqwc[] =
+{
+ L'\x00', L'\x01', L'\x02', L'\x03', L'\x04', L'\x05', L'\x06', L'\x07',
+ L'\x08', L'\x09', L'\x0a', L'\x0b', L'\x0c', L'\x0d', L'\x0e', L'\x0f',
+ L'\x10', L'\x11', L'\x12', L'\x13', L'\x14', L'\x15', L'\x16', L'\x17',
+ L'\x18', L'\x19', L'\x1a', L'\x1b', L'\x1c', L'\x1d', L'\x1e', L'\x1f',
+ L'\x20', L'\x21', L'\x22', L'\x23', L'\x24', L'\x25', L'\x26', L'\x27',
+ L'\x28', L'\x29', L'\x2a', L'\x2b', L'\x2c', L'\x2d', L'\x2e', L'\x2f',
+ L'\x30', L'\x31', L'\x32', L'\x33', L'\x34', L'\x35', L'\x36', L'\x37',
+ L'\x38', L'\x39', L'\x3a', L'\x3b', L'\x3c', L'\x3d', L'\x3e', L'\x3f',
+ L'\x40', L'\x41', L'\x42', L'\x43', L'\x44', L'\x45', L'\x46', L'\x47',
+ L'\x48', L'\x49', L'\x4a', L'\x4b', L'\x4c', L'\x4d', L'\x4e', L'\x4f',
+ L'\x50', L'\x51', L'\x52', L'\x53', L'\x54', L'\x55', L'\x56', L'\x57',
+ L'\x58', L'\x59', L'\x5a', L'\x5b', L'\x5c', L'\x5d', L'\x5e', L'\x5f',
+ L'\x60', L'\x61', L'\x62', L'\x63', L'\x64', L'\x65', L'\x66', L'\x67',
+ L'\x68', L'\x69', L'\x6a', L'\x6b', L'\x6c', L'\x6d', L'\x6e', L'\x6f',
+ L'\x70', L'\x71', L'\x72', L'\x73', L'\x74', L'\x75', L'\x76', L'\x77',
+ L'\x78', L'\x79', L'\x7a', L'\x7b', L'\x7c', L'\x7d', L'\x7e', L'\x7f',
+ L'\x80', L'\x81', L'\x82', L'\x83', L'\x84', L'\x85', L'\x86', L'\x87',
+ L'\x88', L'\x89', L'\x8a', L'\x8b', L'\x8c', L'\x8d', L'\x8e', L'\x8f',
+ L'\x90', L'\x91', L'\x92', L'\x93', L'\x94', L'\x95', L'\x96', L'\x97',
+ L'\x98', L'\x99', L'\x9a', L'\x9b', L'\x9c', L'\x9d', L'\x9e', L'\x9f',
+ L'\xa0', L'\xa1', L'\xa2', L'\xa3', L'\xa4', L'\xa5', L'\xa6', L'\xa7',
+ L'\xa8', L'\xa9', L'\xaa', L'\xab', L'\xac', L'\xad', L'\xae', L'\xaf',
+ L'\xb0', L'\xb1', L'\xb2', L'\xb3', L'\xb4', L'\xb5', L'\xb6', L'\xb7',
+ L'\xb8', L'\xb9', L'\xba', L'\xbb', L'\xbc', L'\xbd', L'\xbe', L'\xbf',
+ L'\xc0', L'\xc1', L'\xc2', L'\xc3', L'\xc4', L'\xc5', L'\xc6', L'\xc7',
+ L'\xc8', L'\xc9', L'\xca', L'\xcb', L'\xcc', L'\xcd', L'\xce', L'\xcf',
+ L'\xd0', L'\xd1', L'\xd2', L'\xd3', L'\xd4', L'\xd5', L'\xd6', L'\xd7',
+ L'\xd8', L'\xd9', L'\xda', L'\xdb', L'\xdc', L'\xdd', L'\xde', L'\xdf',
+ L'\xe0', L'\xe1', L'\xe2', L'\xe3', L'\xe4', L'\xe5', L'\xe6', L'\xe7',
+ L'\xe8', L'\xe9', L'\xea', L'\xeb', L'\xec', L'\xed', L'\xee', L'\xef',
+ L'\xf0', L'\xf1', L'\xf2', L'\xf3', L'\xf4', L'\xf5', L'\xf6', L'\xf7',
+ L'\xf8', L'\xf9', L'\xfa', L'\xfb', L'\xfc', L'\xfd', L'\xfe', L'\xff'
+};
+
const struct locale_data _nl_C_LC_COLLATE =
{
_nl_C_name,
NULL, 0, 0, /* no file mapped */
UNDELETABLE,
- 16,
+ 18,
{
{ word: 0 },
{ string: NULL },
@@ -40,8 +112,10 @@ const struct locale_data _nl_C_LC_COLLATE =
{ string: NULL },
{ string: NULL },
{ string: NULL },
- { word: 0 },
{ string: NULL },
- { string: NULL }
+ { string: NULL },
+ { string: NULL },
+ { string: collseqmb },
+ { wstr: collseqwc }
}
};
diff --git a/locale/categories.def b/locale/categories.def
index b02c1cac58..19e06879fd 100644
--- a/locale/categories.def
+++ b/locale/categories.def
@@ -58,6 +58,8 @@ DEFINE_CATEGORY
DEFINE_ELEMENT (_NL_COLLATE_SYMB_HASH_SIZEMB, "collate-symb-hash-sizemb", std, word)
DEFINE_ELEMENT (_NL_COLLATE_SYMB_TABLEMB, "collate-symb-tablemb", std, string)
DEFINE_ELEMENT (_NL_COLLATE_SYMB_EXTRAMB, "collate-symb-extramb", std, string)
+ DEFINE_ELEMENT (_NL_COLLATE_COLLSEQMB, "collate-collseqmb", std, string)
+ DEFINE_ELEMENT (_NL_COLLATE_COLLSEQWC, "collate-collseqwc", std, string)
), NO_POSTLOAD)
diff --git a/locale/langinfo.h b/locale/langinfo.h
index 7d1183434a..b5ccac6b88 100644
--- a/locale/langinfo.h
+++ b/locale/langinfo.h
@@ -248,6 +248,8 @@ enum
_NL_COLLATE_SYMB_HASH_SIZEMB,
_NL_COLLATE_SYMB_TABLEMB,
_NL_COLLATE_SYMB_EXTRAMB,
+ _NL_COLLATE_COLLSEQMB,
+ _NL_COLLATE_COLLSEQWC,
_NL_NUM_LC_COLLATE,
/* LC_CTYPE category: character classification.
diff --git a/localedata/ChangeLog b/localedata/ChangeLog
index e59ba8317a..3f40616dd3 100644
--- a/localedata/ChangeLog
+++ b/localedata/ChangeLog
@@ -1,3 +1,7 @@
+2000-05-24 Ulrich Drepper <drepper@redhat.com>
+
+ * locales/iso14651_t1: New file.
+
2000-05-15 Andreas Jaeger <aj@suse.de>
* tst-fmon.data: Change testcase following fixes for
diff --git a/localedata/locales/iso14651_t1 b/localedata/locales/iso14651_t1
new file mode 100644
index 0000000000..0402a1f510
--- /dev/null
+++ b/localedata/locales/iso14651_t1
@@ -0,0 +1,1406 @@
+LC_COLLATE
+
+COLL_WEIGHT_MAX=4
+
+# Déclaration des systèmes d'écriture / Declaration of scripts
+script <SPECIAL>
+script <LATIN>
+script <ARABINT>
+script <ARABFOR>
+script <HEBREU>
+script <GREC>
+script <CYRIL>
+script <HAN>
+
+# Déclaration des symboles internes / Declaration of internal symbols
+#
+# SYMB N° Expl.
+#
+collating-symbol <RES-1>
+#
+# <ARABINT>/<ARABFOR>
+#
+#
+collating-symbol <ANO> # 2 normal --> voir/see <MIN>
+collating-symbol <AIS> # 3 isol.
+collating-symbol <AFI> # 4 final
+collating-symbol <AII> # 5 initial
+collating-symbol <AME> # 6 medial/m<e'>dian
+#
+collating-symbol <MIN> # 7 minuscule/minuscule (bas de casse/lower case)
+collating-symbol <IMI> # 8 inférieur min./subscript min. (indice/index)
+collating-symbol <EMI> # 9 supér. min./superscript min. (exposant/exponent)
+collating-symbol <CAP> # 10 capitale/capital (haut de casse/upper case)
+collating-symbol <AMI> # 8 minuscule grecque/Greek lower case
+collating-symbol <ICA> # 11 inférieur en capitale/subscript capital
+collating-symbol <ECA> # 12 supérieur en capitale/superscript capital
+#
+# <ARABINT>/<ARABFOR>
+#
+collating-symbol <AMA> # 13 accent madda
+collating-symbol <AHA> # 14 accent hamza
+collating-symbol <AHW> # 14-1 accent hamza/waw
+collating-symbol <AHS> # 14-2 accent hamza under / hamza souscrit
+collating-symbol <AYE> # 14-3 accent under yeh / accent souscrit du ya'
+collating-symbol <YBA> # 14-4 accent hamza/yeh barree
+#
+collating-symbol <BAS> # 15 de base/basic (non accentué/non-accented)
+#
+collating-symbol <PCL> # 16 particulier/peculiar
+collating-symbol <LIG> # 17 ligature/ligature
+collating-symbol <ACA> # 18 accent aigu/acute accent
+collating-symbol <GRA> # 20 accent grave/grave accent
+collating-symbol <BRE> # 21 brève/breve
+collating-symbol <CIR> # 22 accent circonflexe/circumflex accent
+collating-symbol <CAR> # 23 caron/caron
+collating-symbol <RNE> # 24 rond supérieur/ring above
+collating-symbol <REU> # 25 tréma/diaeresis (ou/or umlaut)
+collating-symbol <DAC> # 26 double ac. aigu/double acute ac.
+collating-symbol <TIL> # 27 tilde/tilde
+collating-symbol <PCT> # 28 point/dot
+collating-symbol <OBL> # 29 barre oblique/oblique
+collating-symbol <CDI> # 30 cédille/cedilla
+collating-symbol <OGO> # 31 ogonek/ogonek
+collating-symbol <MAC> # 32 macron/macron
+#
+# GREC
+#
+collating-symbol <TNS> # accent aigu/tonos/acute accent
+collating-symbol <DLT> # tr<e'>ma/dialytica/diaeresis
+collating-symbol <DTT> # dialytika tonos
+#
+collating-symbol <0>
+collating-symbol <1>
+collating-symbol <2>
+collating-symbol <3>
+collating-symbol <4>
+collating-symbol <5>
+collating-symbol <6>
+collating-symbol <7>
+collating-symbol <8>
+collating-symbol <9>
+#
+collating-symbol <a>
+collating-symbol <b>
+collating-symbol <c>
+collating-symbol <d>
+collating-symbol <e>
+collating-symbol <f>
+collating-symbol <g>
+collating-symbol <h>
+collating-symbol <i>
+collating-symbol <j>
+collating-symbol <k>
+collating-symbol <l>
+collating-symbol <m>
+collating-symbol <n>
+collating-symbol <o>
+collating-symbol <p>
+collating-symbol <q>
+collating-symbol <r>
+collating-symbol <s>
+collating-symbol <t>
+collating-symbol <u>
+collating-symbol <v>
+collating-symbol <w>
+collating-symbol <x>
+collating-symbol <y>
+collating-symbol <z>
+#
+# <ARABINT>/<ARABFOR>
+#
+collating-symbol <hamza>
+collating-symbol <alef>
+collating-symbol <beh>
+collating-symbol <peh>
+collating-symbol <teh_marbuta>
+collating-symbol <teh>
+collating-symbol <tteh>
+collating-symbol <theh>
+collating-symbol <jeem>
+collating-symbol <tcheh>
+collating-symbol <hah>
+collating-symbol <khah>
+collating-symbol <dal>
+collating-symbol <ddal>
+collating-symbol <thal>
+collating-symbol <reh>
+collating-symbol <rreh>
+collating-symbol <zain>
+collating-symbol <jeh>
+collating-symbol <seen>
+collating-symbol <sheen>
+collating-symbol <sad>
+collating-symbol <dad>
+collating-symbol <tah>
+collating-symbol <zah>
+collating-symbol <ain>
+collating-symbol <ghain>
+collating-symbol <feh>
+collating-symbol <qaf>
+collating-symbol <kaf>
+collating-symbol <keheh>
+collating-symbol <gaf>
+collating-symbol <lam>
+collating-symbol <meem>
+collating-symbol <noon>
+collating-symbol <noon_ghunna>
+collating-symbol <heh>
+collating-symbol <heh_yeh>
+collating-symbol <waw>
+collating-symbol <alef_maksura>
+collating-symbol <yeh_barree>
+#
+# <HEBREU>
+#
+collating-symbol <alef>
+collating-symbol <bet>
+collating-symbol <gimel>
+collating-symbol <dalet>
+collating-symbol <he>
+collating-symbol <vav>
+collating-symbol <zayin>
+collating-symbol <het>
+collating-symbol <tet>
+collating-symbol <yod>
+collating-symbol <kaf_fin>
+collating-symbol <kaf>
+collating-symbol <lamed>
+collating-symbol <mem_fin>
+collating-symbol <mem>
+collating-symbol <nun_fin>
+collating-symbol <nun>
+collating-symbol <samekh>
+collating-symbol <ayin>
+collating-symbol <pe_fin>
+collating-symbol <pe>
+collating-symbol <tsad_fin>
+collating-symbol <tsadi>
+collating-symbol <qof>
+collating-symbol <resh>
+collating-symbol <shin>
+collating-symbol <tav>
+#
+# GREC
+#
+collating-symbol <ALPHA>
+collating-symbol <BETA>
+collating-symbol <GAMMA>
+collating-symbol <DELTA>
+collating-symbol <EPSILON>
+collating-symbol <ZETA>
+collating-symbol <ETA>
+collating-symbol <THETA>
+collating-symbol <IOTA>
+collating-symbol <KAPPA>
+collating-symbol <LAMBDA>
+collating-symbol <MU>
+collating-symbol <NU>
+collating-symbol <XI>
+collating-symbol <OMICRON>
+collating-symbol <PI>
+collating-symbol <RHO>
+collating-symbol <SIGMA>
+collating-symbol <TAU>
+collating-symbol <UPSILON>
+collating-symbol <PHI>
+collating-symbol <KHI>
+collating-symbol <PSI>
+collating-symbol <OMEGA>
+#
+# CYRIL
+#
+collating-symbol <CYR-A>
+collating-symbol <CYR-BE>
+collating-symbol <CYR-VE>
+collating-symbol <CYR-GHE>
+collating-symbol <CYR-DE>
+collating-symbol <CYR-GZHE>
+collating-symbol <CYR-DJE>
+collating-symbol <CYR-IE>
+collating-symbol <UKR-IE>
+collating-symbol <CYR-IO>
+collating-symbol <CYR-ZHE>
+collating-symbol <CYR-ZE>
+collating-symbol <CYR-DZE>
+collating-symbol <CYR-I>
+collating-symbol <UKR-I>
+collating-symbol <UKR-YI>
+collating-symbol <CYR-IBRE>
+collating-symbol <CYR-JE>
+collating-symbol <CYR-KA>
+collating-symbol <CYR-EL>
+collating-symbol <CYR-LJE>
+collating-symbol <CYR-EM>
+collating-symbol <CYR-EN>
+collating-symbol <CYR-NJE>
+collating-symbol <CYR-O>
+collating-symbol <CYR-PE>
+collating-symbol <CYR-ER>
+collating-symbol <CYR-ES>
+collating-symbol <CYR-TE>
+collating-symbol <CYR-KJE>
+collating-symbol <CYR-TSHE>
+collating-symbol <CYR-OU>
+collating-symbol <CYR-OUBRE>
+collating-symbol <CYR-EF>
+collating-symbol <CYR-HA>
+collating-symbol <CYR-TSE>
+collating-symbol <CYR-TSHE>
+collating-symbol <CYR-DCHE>
+collating-symbol <CYR-SHA>
+collating-symbol <CYR-SHTSHA>
+collating-symbol <CYR-SIGDUR>
+collating-symbol <CYR-YEROU>
+collating-symbol <CYR-SIGMOUIL>
+collating-symbol <CYR-E>
+collating-symbol <CYR-YOU>
+collating-symbol <CYR-YA>
+
+# Ordre des symboles internes / Order of internal symbols
+#
+# SYMB. N°
+#
+<RES-1>
+<MIN>
+# forme de base (bas de casse, arabe intrinsèque,
+# hébreu intrinsèque, etc.
+# basic form (lower case, intrinsic Arabic
+# intrinsic Hebrew and so on)
+#
+# <ARABINT>/<ARABFOR>
+#
+#
+<ANO> # voir
+<MIN>
+<AIS> # isol.
+# 3
+<AFI> # final
+# 4
+<AII> # initial
+# 5
+<AME> # medial/m<e'>dian
+# 6
+#
+<IMI> # 7
+<EMI> # 8
+<CAP> # 9
+<ICA> # 10
+<ECA> # 11
+<AMI>
+#alternate lower case/
+# 12
+#
+#minuscules spéciales après majuscules
+# <ARABINT>/<ARABFOR>
+#
+<AMA> # accent madda #13
+<AHA> # accent hamza #14
+<AHW> # accent hamza/waw #14 1
+<AHS> # accent hamza under / hamza souscrit #14 2
+<AYE> # accent under yeh / accent souscrit du ya' #14 3
+<YBA> # accent hamza/yeh barree #14 4
+#
+<BAS> # 15
+#
+<PCL> # 16
+<LIG> # 17
+<ACA> # 18
+<GRA> # 19
+<BRE> # 20
+<CIR> # 21
+<CAR> # 22
+<RNE> # 23
+<REU> # 24
+<DAC> # 25
+<TIL> # 26
+<PCT> # 27
+<OBL> # 28
+<CDI> # 29
+<OGO> # 30
+<MAC> # 31
+#
+# GREC
+#
+<TNS> # accent aigu/tonos/acute accent
+<DLT> # tr<e'>ma/dialytica/diaeresis
+<DTT> # dialytika tonos
+#
+<0> # 48
+<1> # 49
+<2> # 50
+<3> # 51
+<4> # 52
+<5> # 53
+<6> # 54
+<7> # 55
+<8> # 56
+<9> # 57
+#
+<a> # 97
+<b> # 98
+<c> # 99
+<d> # 100
+<e> # 101
+<f> # 102
+<g> # 103
+<h> # 104
+<i> # 105
+<j> # 106
+<k> # 107
+<l> # 108
+<m> # 109
+<n> # 110
+<o> # 111
+<p> # 112
+<q> # 113
+<r> # 114
+<s> # 115
+<t> # 116
+<u> # 117
+<v> # 118
+<w> # 119
+<x> # 120
+<y> # 121
+<z> # 122
+<th># 122b
+#
+# <ARABINT>/<ARABFOR>
+#
+<hamza>
+<alef>
+<beh>
+<peh>
+<teh_marbuta>
+<teh>
+<tteh>
+<theh>
+<jeem>
+<tcheh>
+<hah>
+<khah>
+<dal>
+<ddal>
+<thal>
+<reh>
+<rreh>
+<zain>
+<jeh>
+<seen>
+<sheen>
+<sad>
+<dad>
+<tah>
+<zah>
+<ain>
+<ghain>
+<feh>
+<qaf>
+<kaf>
+<keheh>
+<gaf>
+<lam>
+<meem>
+<noon>
+<noon_ghunna>
+<heh>
+<heh_yeh>
+<waw>
+<alef_maksura>
+<yeh_barree>
+#
+# <HEBREU>
+#
+<alef>
+<bet>
+<gimel>
+<dalet>
+<he>
+<vav>
+<zayin>
+<het>
+<tet>
+<yod>
+<kaf_fin>
+<kaf>
+<lamed>
+<mem_fin>
+<mem>
+<nun_fin>
+<nun>
+<samekh>
+<ayin>
+<pe_fin>
+<pe>
+<tsad_fin>
+<tsadi>
+<qof>
+<resh>
+<shin>
+<tav>
+#
+#GREC
+#
+<ALPHA>
+<BETA>
+<GAMMA>
+<DELTA>
+<EPSILON>
+<ZETA>
+<ETA>
+<THETA>
+<IOTA>
+<KAPPA>
+<LAMBDA>
+<MU>
+<NU>
+<XI>
+<OMICRON>
+<PI>
+<RHO>
+<SIGMA>
+<TAU>
+<UPSILON>
+<PHI>
+<CHI>
+<PSI>
+<OMEGA>
+#
+#CYRIL
+#
+<CYR-A>
+<CYR-BE>
+<CYR-VE>
+<CYR-GHE>
+<CYR-DE>
+<CYR-GZHE>
+<CYR-DJE>
+<CYR-IE>
+<UKR-IE>
+<CYR-IO>
+<CYR-ZHE>
+<CYR-ZE>
+<CYR-DZE>
+<CYR-I>
+<UKR-I>
+<UKR-YI>
+<CYR-IBRE>
+<CYR-JE>
+<CYR-KA>
+<CYR-EL>
+<C