aboutsummaryrefslogtreecommitdiff
path: root/REORG.TODO/locale/programs
diff options
context:
space:
mode:
Diffstat (limited to 'REORG.TODO/locale/programs')
-rw-r--r--REORG.TODO/locale/programs/3level.h328
-rw-r--r--REORG.TODO/locale/programs/charmap-dir.c309
-rw-r--r--REORG.TODO/locale/programs/charmap-dir.h46
-rw-r--r--REORG.TODO/locale/programs/charmap-kw.gperf42
-rw-r--r--REORG.TODO/locale/programs/charmap-kw.h195
-rw-r--r--REORG.TODO/locale/programs/charmap.c1104
-rw-r--r--REORG.TODO/locale/programs/charmap.h84
-rw-r--r--REORG.TODO/locale/programs/config.h35
-rw-r--r--REORG.TODO/locale/programs/ld-address.c545
-rw-r--r--REORG.TODO/locale/programs/ld-collate.c3978
-rw-r--r--REORG.TODO/locale/programs/ld-ctype.c4030
-rw-r--r--REORG.TODO/locale/programs/ld-identification.c416
-rw-r--r--REORG.TODO/locale/programs/ld-measurement.c233
-rw-r--r--REORG.TODO/locale/programs/ld-messages.c315
-rw-r--r--REORG.TODO/locale/programs/ld-monetary.c757
-rw-r--r--REORG.TODO/locale/programs/ld-name.c281
-rw-r--r--REORG.TODO/locale/programs/ld-numeric.c343
-rw-r--r--REORG.TODO/locale/programs/ld-paper.c231
-rw-r--r--REORG.TODO/locale/programs/ld-telephone.c295
-rw-r--r--REORG.TODO/locale/programs/ld-time.c964
-rw-r--r--REORG.TODO/locale/programs/linereader.c886
-rw-r--r--REORG.TODO/locale/programs/linereader.h146
-rw-r--r--REORG.TODO/locale/programs/locale-spec.c131
-rw-r--r--REORG.TODO/locale/programs/locale.c989
-rw-r--r--REORG.TODO/locale/programs/localedef.c626
-rw-r--r--REORG.TODO/locale/programs/localedef.h177
-rw-r--r--REORG.TODO/locale/programs/locarchive.c1757
-rw-r--r--REORG.TODO/locale/programs/locfile-kw.gperf201
-rw-r--r--REORG.TODO/locale/programs/locfile-kw.h621
-rw-r--r--REORG.TODO/locale/programs/locfile-token.h258
-rw-r--r--REORG.TODO/locale/programs/locfile.c1001
-rw-r--r--REORG.TODO/locale/programs/locfile.h279
-rw-r--r--REORG.TODO/locale/programs/repertoire.c524
-rw-r--r--REORG.TODO/locale/programs/repertoire.h64
-rw-r--r--REORG.TODO/locale/programs/simple-hash.c291
-rw-r--r--REORG.TODO/locale/programs/simple-hash.h53
-rw-r--r--REORG.TODO/locale/programs/xmalloc.c106
-rw-r--r--REORG.TODO/locale/programs/xstrdup.c36
38 files changed, 22677 insertions, 0 deletions
diff --git a/REORG.TODO/locale/programs/3level.h b/REORG.TODO/locale/programs/3level.h
new file mode 100644
index 0000000000..15e192dc49
--- /dev/null
+++ b/REORG.TODO/locale/programs/3level.h
@@ -0,0 +1,328 @@
+/* Copyright (C) 2000-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Bruno Haible <haible@clisp.cons.org>, 2000.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>. */
+
+#include <stdint.h>
+
+/* Construction of sparse 3-level tables.
+ See wchar-lookup.h or coll-lookup.h for their structure and the
+ meaning of p and q.
+
+ Before including this file, set
+ TABLE to the name of the structure to be defined
+ ELEMENT to the type of every entry
+ DEFAULT to the default value for empty entries
+ ITERATE if you want the TABLE_iterate function to be defined
+ NO_ADD_LOCALE if you don't want the add_locale_TABLE function
+ to be defined
+
+ This will define
+
+ struct TABLE;
+ void TABLE_init (struct TABLE *t);
+ ELEMENT TABLE_get (struct TABLE *t, uint32_t wc);
+ void TABLE_add (struct TABLE *t, uint32_t wc, ELEMENT value);
+ void TABLE_iterate (struct TABLE *t,
+ void (*fn) (uint32_t wc, ELEMENT value));
+ void add_locale_TABLE (struct locale_file *file, struct TABLE *t);
+*/
+
+#define CONCAT(a,b) CONCAT1(a,b)
+#define CONCAT1(a,b) a##b
+
+struct TABLE
+{
+ /* Parameters. */
+ unsigned int p;
+ unsigned int q;
+ /* Working representation. */
+ size_t level1_alloc;
+ size_t level1_size;
+ uint32_t *level1;
+ size_t level2_alloc;
+ size_t level2_size;
+ uint32_t *level2;
+ size_t level3_alloc;
+ size_t level3_size;
+ ELEMENT *level3;
+ /* Size of compressed representation. */
+ size_t result_size;
+};
+
+/* Initialize. Assumes t->p and t->q have already been set. */
+static inline void
+CONCAT(TABLE,_init) (struct TABLE *t)
+{
+ t->level1 = NULL;
+ t->level1_alloc = t->level1_size = 0;
+ t->level2 = NULL;
+ t->level2_alloc = t->level2_size = 0;
+ t->level3 = NULL;
+ t->level3_alloc = t->level3_size = 0;
+}
+
+/* Marker for an empty slot. This has the value 0xFFFFFFFF, regardless
+ whether 'int' is 16 bit, 32 bit, or 64 bit. */
+#define EMPTY ((uint32_t) ~0)
+
+/* Retrieve an entry. */
+static inline ELEMENT
+__attribute ((always_inline))
+CONCAT(TABLE,_get) (struct TABLE *t, uint32_t wc)
+{
+ uint32_t index1 = wc >> (t->q + t->p);
+ if (index1 < t->level1_size)
+ {
+ uint32_t lookup1 = t->level1[index1];
+ if (lookup1 != EMPTY)
+ {
+ uint32_t index2 = ((wc >> t->p) & ((1 << t->q) - 1))
+ + (lookup1 << t->q);
+ uint32_t lookup2 = t->level2[index2];
+ if (lookup2 != EMPTY)
+ {
+ uint32_t index3 = (wc & ((1 << t->p) - 1))
+ + (lookup2 << t->p);
+ ELEMENT lookup3 = t->level3[index3];
+
+ return lookup3;
+ }
+ }
+ }
+ return DEFAULT;
+}
+
+/* Add one entry. */
+static void
+CONCAT(TABLE,_add) (struct TABLE *t, uint32_t wc, ELEMENT value)
+{
+ uint32_t index1 = wc >> (t->q + t->p);
+ uint32_t index2 = (wc >> t->p) & ((1 << t->q) - 1);
+ uint32_t index3 = wc & ((1 << t->p) - 1);
+ size_t i, i1, i2;
+
+ if (value == CONCAT(TABLE,_get) (t, wc))
+ return;
+
+ if (index1 >= t->level1_size)
+ {
+ if (index1 >= t->level1_alloc)
+ {
+ size_t alloc = 2 * t->level1_alloc;
+ if (alloc <= index1)
+ alloc = index1 + 1;
+ t->level1 = (uint32_t *) xrealloc ((char *) t->level1,
+ alloc * sizeof (uint32_t));
+ t->level1_alloc = alloc;
+ }
+ while (index1 >= t->level1_size)
+ t->level1[t->level1_size++] = EMPTY;
+ }
+
+ if (t->level1[index1] == EMPTY)
+ {
+ if (t->level2_size == t->level2_alloc)
+ {
+ size_t alloc = 2 * t->level2_alloc + 1;
+ t->level2 = (uint32_t *) xrealloc ((char *) t->level2,
+ (alloc << t->q) * sizeof (uint32_t));
+ t->level2_alloc = alloc;
+ }
+ i1 = t->level2_size << t->q;
+ i2 = (t->level2_size + 1) << t->q;
+ for (i = i1; i < i2; i++)
+ t->level2[i] = EMPTY;
+ t->level1[index1] = t->level2_size++;
+ }
+
+ index2 += t->level1[index1] << t->q;
+
+ if (t->level2[index2] == EMPTY)
+ {
+ if (t->level3_size == t->level3_alloc)
+ {
+ size_t alloc = 2 * t->level3_alloc + 1;
+ t->level3 = (ELEMENT *) xrealloc ((char *) t->level3,
+ (alloc << t->p) * sizeof (ELEMENT));
+ t->level3_alloc = alloc;
+ }
+ i1 = t->level3_size << t->p;
+ i2 = (t->level3_size + 1) << t->p;
+ for (i = i1; i < i2; i++)
+ t->level3[i] = DEFAULT;
+ t->level2[index2] = t->level3_size++;
+ }
+
+ index3 += t->level2[index2] << t->p;
+
+ t->level3[index3] = value;
+}
+
+#ifdef ITERATE
+/* Apply a function to all entries in the table. */
+static void
+CONCAT(TABLE,_iterate) (struct TABLE *t,
+ void (*fn) (uint32_t wc, ELEMENT value))
+{
+ uint32_t index1;
+ for (index1 = 0; index1 < t->level1_size; index1++)
+ {
+ uint32_t lookup1 = t->level1[index1];
+ if (lookup1 != EMPTY)
+ {
+ uint32_t lookup1_shifted = lookup1 << t->q;
+ uint32_t index2;
+ for (index2 = 0; index2 < (1 << t->q); index2++)
+ {
+ uint32_t lookup2 = t->level2[index2 + lookup1_shifted];
+ if (lookup2 != EMPTY)
+ {
+ uint32_t lookup2_shifted = lookup2 << t->p;
+ uint32_t index3;
+ for (index3 = 0; index3 < (1 << t->p); index3++)
+ {
+ ELEMENT lookup3 = t->level3[index3 + lookup2_shifted];
+ if (lookup3 != DEFAULT)
+ fn ((((index1 << t->q) + index2) << t->p) + index3,
+ lookup3);
+ }
+ }
+ }
+ }
+ }
+}
+#endif
+
+#ifndef NO_ADD_LOCALE
+/* Finalize and shrink. */
+static void
+CONCAT(add_locale_,TABLE) (struct locale_file *file, struct TABLE *t)
+{
+ size_t i, j, k;
+ uint32_t reorder3[t->level3_size];
+ uint32_t reorder2[t->level2_size];
+ uint32_t level2_offset, level3_offset, last_offset;
+
+ /* Uniquify level3 blocks. */
+ k = 0;
+ for (j = 0; j < t->level3_size; j++)
+ {
+ for (i = 0; i < k; i++)
+ if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p],
+ (1 << t->p) * sizeof (ELEMENT)) == 0)
+ break;
+ /* Relocate block j to block i. */
+ reorder3[j] = i;
+ if (i == k)
+ {
+ if (i != j)
+ memcpy (&t->level3[i << t->p], &t->level3[j << t->p],
+ (1 << t->p) * sizeof (ELEMENT));
+ k++;
+ }
+ }
+ t->level3_size = k;
+
+ for (i = 0; i < (t->level2_size << t->q); i++)
+ if (t->level2[i] != EMPTY)
+ t->level2[i] = reorder3[t->level2[i]];
+
+ /* Uniquify level2 blocks. */
+ k = 0;
+ for (j = 0; j < t->level2_size; j++)
+ {
+ for (i = 0; i < k; i++)
+ if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q],
+ (1 << t->q) * sizeof (uint32_t)) == 0)
+ break;
+ /* Relocate block j to block i. */
+ reorder2[j] = i;
+ if (i == k)
+ {
+ if (i != j)
+ memcpy (&t->level2[i << t->q], &t->level2[j << t->q],
+ (1 << t->q) * sizeof (uint32_t));
+ k++;
+ }
+ }
+ t->level2_size = k;
+
+ for (i = 0; i < t->level1_size; i++)
+ if (t->level1[i] != EMPTY)
+ t->level1[i] = reorder2[t->level1[i]];
+
+ /* Create and fill the resulting compressed representation. */
+ last_offset =
+ 5 * sizeof (uint32_t)
+ + t->level1_size * sizeof (uint32_t)
+ + (t->level2_size << t->q) * sizeof (uint32_t)
+ + (t->level3_size << t->p) * sizeof (ELEMENT);
+ t->result_size = LOCFILE_ALIGN_UP (last_offset);
+
+ level2_offset =
+ 5 * sizeof (uint32_t)
+ + t->level1_size * sizeof (uint32_t);
+ level3_offset =
+ 5 * sizeof (uint32_t)
+ + t->level1_size * sizeof (uint32_t)
+ + (t->level2_size << t->q) * sizeof (uint32_t);
+
+ start_locale_structure (file);
+ add_locale_uint32 (file, t->q + t->p);
+ add_locale_uint32 (file, t->level1_size);
+ add_locale_uint32 (file, t->p);
+ add_locale_uint32 (file, (1 << t->q) - 1);
+ add_locale_uint32 (file, (1 << t->p) - 1);
+
+ for (i = 0; i < t->level1_size; i++)
+ add_locale_uint32
+ (file,
+ t->level1[i] == EMPTY
+ ? 0
+ : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset);
+
+ for (i = 0; i < (t->level2_size << t->q); i++)
+ add_locale_uint32
+ (file,
+ t->level2[i] == EMPTY
+ ? 0
+ : (t->level2[i] << t->p) * sizeof (ELEMENT) + level3_offset);
+
+ if (sizeof (ELEMENT) == 1)
+ add_locale_raw_data (file, t->level3, t->level3_size << t->p);
+ else if (sizeof (ELEMENT) == sizeof (uint32_t))
+ add_locale_uint32_array (file, (uint32_t *) t->level3,
+ t->level3_size << t->p);
+ else
+ abort ();
+ align_locale_data (file, LOCFILE_ALIGN);
+ end_locale_structure (file);
+
+ if (t->level1_alloc > 0)
+ free (t->level1);
+ if (t->level2_alloc > 0)
+ free (t->level2);
+ if (t->level3_alloc > 0)
+ free (t->level3);
+}
+#endif
+
+#undef EMPTY
+#undef TABLE
+#undef ELEMENT
+#undef DEFAULT
+#undef ITERATE
+#undef NO_ADD_LOCALE
diff --git a/REORG.TODO/locale/programs/charmap-dir.c b/REORG.TODO/locale/programs/charmap-dir.c
new file mode 100644
index 0000000000..e55ab86e28
--- /dev/null
+++ b/REORG.TODO/locale/programs/charmap-dir.c
@@ -0,0 +1,309 @@
+/* Copyright (C) 2000-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>. */
+
+#include <dirent.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <libintl.h>
+#include <spawn.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/stat.h>
+
+#include "localedef.h"
+#include "charmap-dir.h"
+
+/* The data type of a charmap directory being traversed. */
+struct charmap_dir
+{
+ DIR *dir;
+ /* The directory pathname, ending in a slash. */
+ char *directory;
+ size_t directory_len;
+ /* Scratch area used for returning pathnames. */
+ char *pathname;
+ size_t pathname_size;
+};
+
+/* Starts a charmap directory traversal.
+ Returns a CHARMAP_DIR, or NULL if the directory doesn't exist. */
+CHARMAP_DIR *
+charmap_opendir (const char *directory)
+{
+ struct charmap_dir *cdir;
+ DIR *dir;
+ size_t len;
+ int add_slash;
+
+ dir = opendir (directory);
+ if (dir == NULL)
+ {
+ WITH_CUR_LOCALE (error (1, errno, gettext ("\
+cannot read character map directory `%s'"), directory));
+ return NULL;
+ }
+
+ cdir = (struct charmap_dir *) xmalloc (sizeof (struct charmap_dir));
+ cdir->dir = dir;
+
+ len = strlen (directory);
+ add_slash = (len == 0 || directory[len - 1] != '/');
+ cdir->directory = (char *) xmalloc (len + add_slash + 1);
+ memcpy (cdir->directory, directory, len);
+ if (add_slash)
+ cdir->directory[len] = '/';
+ cdir->directory[len + add_slash] = '\0';
+ cdir->directory_len = len + add_slash;
+
+ cdir->pathname = NULL;
+ cdir->pathname_size = 0;
+
+ return cdir;
+}
+
+/* Reads the next directory entry.
+ Returns its charmap name, or NULL if past the last entry or upon error.
+ The storage returned may be overwritten by a later charmap_readdir
+ call on the same CHARMAP_DIR. */
+const char *
+charmap_readdir (CHARMAP_DIR *cdir)
+{
+ for (;;)
+ {
+ struct dirent64 *dirent;
+ size_t len;
+ size_t size;
+ char *filename;
+ mode_t mode;
+
+ dirent = readdir64 (cdir->dir);
+ if (dirent == NULL)
+ return NULL;
+ if (strcmp (dirent->d_name, ".") == 0)
+ continue;
+ if (strcmp (dirent->d_name, "..") == 0)
+ continue;
+
+ len = strlen (dirent->d_name);
+
+ size = cdir->directory_len + len + 1;
+ if (size > cdir->pathname_size)
+ {
+ free (cdir->pathname);
+ if (size < 2 * cdir->pathname_size)
+ size = 2 * cdir->pathname_size;
+ cdir->pathname = (char *) xmalloc (size);
+ cdir->pathname_size = size;
+ }
+
+ stpcpy (stpcpy (cdir->pathname, cdir->directory), dirent->d_name);
+ filename = cdir->pathname + cdir->directory_len;
+
+#ifdef _DIRENT_HAVE_D_TYPE
+ if (dirent->d_type != DT_UNKNOWN && dirent->d_type != DT_LNK)
+ mode = DTTOIF (dirent->d_type);
+ else
+#endif
+ {
+ struct stat64 statbuf;
+
+ if (stat64 (cdir->pathname, &statbuf) < 0)
+ continue;
+
+ mode = statbuf.st_mode;
+ }
+
+ if (!S_ISREG (mode))
+ continue;
+
+ /* For compressed charmaps, the canonical charmap name does not
+ include the extension. */
+ if (len > 3 && memcmp (&filename[len - 3], ".gz", 3) == 0)
+ filename[len - 3] = '\0';
+ else if (len > 4 && memcmp (&filename[len - 4], ".bz2", 4) == 0)
+ filename[len - 4] = '\0';
+
+ return filename;
+ }
+}
+
+/* Finishes a charmap directory traversal, and frees the resources
+ attached to the CHARMAP_DIR. */
+int
+charmap_closedir (CHARMAP_DIR *cdir)
+{
+ DIR *dir = cdir->dir;
+
+ free (cdir->directory);
+ free (cdir->pathname);
+ free (cdir);
+ return closedir (dir);
+}
+
+/* Creates a subprocess decompressing the given pathname, and returns
+ a stream reading its output (the decompressed data). */
+static
+FILE *
+fopen_uncompressed (const char *pathname, const char *compressor)
+{
+ int pfd;
+
+ pfd = open (pathname, O_RDONLY);
+ if (pfd >= 0)
+ {
+ struct stat64 statbuf;
+ int fd[2];
+
+ if (fstat64 (pfd, &statbuf) >= 0
+ && S_ISREG (statbuf.st_mode)
+ && pipe (fd) >= 0)
+ {
+ char *argv[4]
+ = { (char *) compressor, (char *) "-d", (char *) "-c", NULL };
+ posix_spawn_file_actions_t actions;
+
+ if (posix_spawn_file_actions_init (&actions) == 0)
+ {
+ if (posix_spawn_file_actions_adddup2 (&actions,
+ fd[1], STDOUT_FILENO) == 0
+ && posix_spawn_file_actions_addclose (&actions, fd[1]) == 0
+ && posix_spawn_file_actions_addclose (&actions, fd[0]) == 0
+ && posix_spawn_file_actions_adddup2 (&actions,
+ pfd, STDIN_FILENO) == 0
+ && posix_spawn_file_actions_addclose (&actions, pfd) == 0
+ && posix_spawnp (NULL, compressor, &actions, NULL,
+ argv, environ) == 0)
+ {
+ posix_spawn_file_actions_destroy (&actions);
+ close (fd[1]);
+ close (pfd);
+ return fdopen (fd[0], "r");
+ }
+ posix_spawn_file_actions_destroy (&actions);
+ }
+ close (fd[1]);
+ close (fd[0]);
+ }
+ close (pfd);
+ }
+ return NULL;
+}
+
+/* Opens a charmap for reading, given its name (not an alias name). */
+FILE *
+charmap_open (const char *directory, const char *name)
+{
+ size_t dlen = strlen (directory);
+ int add_slash = (dlen == 0 || directory[dlen - 1] != '/');
+ size_t nlen = strlen (name);
+ char *pathname;
+ char *p;
+ FILE *stream;
+
+ pathname = alloca (dlen + add_slash + nlen + 5);
+ p = stpcpy (pathname, directory);
+ if (add_slash)
+ *p++ = '/';
+ p = stpcpy (p, name);
+
+ stream = fopen (pathname, "rm");
+ if (stream != NULL)
+ return stream;
+
+ memcpy (p, ".gz", 4);
+ stream = fopen_uncompressed (pathname, "gzip");
+ if (stream != NULL)
+ return stream;
+
+ memcpy (p, ".bz2", 5);
+ stream = fopen_uncompressed (pathname, "bzip2");
+ if (stream != NULL)
+ return stream;
+
+ return NULL;
+}
+
+/* An empty alias list. Avoids the need to return NULL from
+ charmap_aliases. */
+static char *empty[1];
+
+/* Returns a NULL terminated list of alias names of a charmap. */
+char **
+charmap_aliases (const char *directory, const char *name)
+{
+ FILE *stream;
+ char **aliases;
+ size_t naliases;
+
+ stream = charmap_open (directory, name);
+ if (stream == NULL)
+ return empty;
+
+ aliases = NULL;
+ naliases = 0;
+
+ while (!feof (stream))
+ {
+ char *alias = NULL;
+ char junk[BUFSIZ];
+
+ if (fscanf (stream, " <code_set_name> %ms", &alias) == 1
+ || fscanf (stream, "%% alias %ms", &alias) == 1)
+ {
+ aliases = (char **) xrealloc (aliases,
+ (naliases + 2) * sizeof (char *));
+ aliases[naliases++] = alias;
+ }
+
+ /* Read the rest of the line. */
+ if (fgets (junk, sizeof junk, stream) != NULL)
+ {
+ if (strstr (junk, "CHARMAP") != NULL)
+ /* We cannot expect more aliases from now on. */
+ break;
+
+ while (strchr (junk, '\n') == NULL
+ && fgets (junk, sizeof junk, stream) != NULL)
+ continue;
+ }
+ }
+
+ fclose (stream);
+
+ if (naliases == 0)
+ return empty;
+
+ aliases[naliases] = NULL;
+ return aliases;
+}
+
+/* Frees an alias list returned by charmap_aliases. */
+void
+charmap_free_aliases (char **aliases)
+{
+ if (aliases != empty)
+ {
+ char **p;
+
+ for (p = aliases; *p; p++)
+ free (*p);
+
+ free (aliases);
+ }
+}
diff --git a/REORG.TODO/locale/programs/charmap-dir.h b/REORG.TODO/locale/programs/charmap-dir.h
new file mode 100644
index 0000000000..c27d7fe614
--- /dev/null
+++ b/REORG.TODO/locale/programs/charmap-dir.h
@@ -0,0 +1,46 @@
+/* Copyright (C) 2000-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef _CHARMAP_DIR_H
+#define _CHARMAP_DIR_H 1
+
+/* The data type of a charmap directory being traversed. */
+typedef struct charmap_dir CHARMAP_DIR;
+
+/* Starts a charmap directory traversal.
+ Returns a CHARMAP_DIR, or NULL if the directory doesn't exist. */
+extern CHARMAP_DIR *charmap_opendir (const char *directory);
+
+/* Reads the next directory entry.
+ Returns its charmap name, or NULL if past the last entry or upon error.
+ The storage returned may be overwritten by a later charmap_readdir
+ call on the same CHARMAP_DIR. */
+extern const char *charmap_readdir (CHARMAP_DIR *dir);
+
+/* Finishes a charmap directory traversal, and frees the resources
+ attached to the CHARMAP_DIR. */
+extern int charmap_closedir (CHARMAP_DIR *dir);
+
+/* Returns a NULL terminated list of alias names of a charmap. */
+extern char **charmap_aliases (const char *directory, const char *name);
+
+/* Frees an alias list returned by charmap_aliases. */
+extern void charmap_free_aliases (char **aliases);
+
+/* Opens a charmap for reading, given its name (not an alias name). */
+extern FILE *charmap_open (const char *directory, const char *name);
+
+#endif /* _CHARMAP_DIR_H */
diff --git a/REORG.TODO/locale/programs/charmap-kw.gperf b/REORG.TODO/locale/programs/charmap-kw.gperf
new file mode 100644
index 0000000000..0ebdfeb26e
--- /dev/null
+++ b/REORG.TODO/locale/programs/charmap-kw.gperf
@@ -0,0 +1,42 @@
+%{
+/* Copyright (C) 1995-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper, <drepper@gnu.org>.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <string.h>
+
+#include "locfile-token.h"
+%}
+struct keyword_t ;
+%%
+code_set_name, tok_code_set_name, 1
+mb_cur_max, tok_mb_cur_max, 1
+mb_cur_min, tok_mb_cur_min, 1
+escape_char, tok_escape_char, 1
+comment_char, tok_comment_char, 1
+g0esc, tok_g0esc, 1
+g1esc, tok_g1esc, 1
+g2esc, tok_g2esc, 1
+g3esc, tok_g3esc, 1
+escseq, tok_escseq, 1
+addset, tok_addset, 1
+include, tok_include, 1
+CHARMAP, tok_charmap, 0
+END, tok_end, 0
+WIDTH, tok_width, 0
+WIDTH_VARIABLE, tok_width_variable, 0
+WIDTH_DEFAULT, tok_width_default, 0
diff --git a/REORG.TODO/locale/programs/charmap-kw.h b/REORG.TODO/locale/programs/charmap-kw.h
new file mode 100644
index 0000000000..9e2969c4a1
--- /dev/null
+++ b/REORG.TODO/locale/programs/charmap-kw.h
@@ -0,0 +1,195 @@
+/* ANSI-C code produced by gperf version 3.0.4 */
+/* Command-line: gperf -acCgopt -k'1,2,5,9,$' -L ANSI-C -N charmap_hash charmap-kw.gperf */
+
+#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
+ && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
+ && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
+ && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
+ && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
+ && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
+ && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
+ && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
+ && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
+ && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
+ && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
+ && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
+ && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
+ && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
+ && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
+ && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
+ && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
+ && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
+ && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
+ && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
+ && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
+ && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
+ && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
+/* The character set is not based on ISO-646. */
+#error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>."
+#endif
+
+#line 1 "charmap-kw.gperf"
+
+/* Copyright (C) 1995-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper, <drepper@gnu.org>.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <string.h>
+
+#include "locfile-token.h"
+#line 24 "charmap-kw.gperf"
+struct keyword_t ;
+
+#define TOTAL_KEYWORDS 17
+#define MIN_WORD_LENGTH 3
+#define MAX_WORD_LENGTH 14
+#define MIN_HASH_VALUE 3
+#define MAX_HASH_VALUE 35
+/* maximum key range = 33, duplicates = 0 */
+
+#ifdef __GNUC__
+__inline
+#else
+#ifdef __cplusplus
+inline
+#endif
+#endif
+static unsigned int
+hash (register const char *str, register unsigned int len)
+{
+ static const unsigned char asso_values[] =
+ {
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36, 36, 36, 25, 20,
+ 15, 10, 36, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36, 36, 5, 0, 0,
+ 5, 36, 0, 0, 36, 36, 36, 5, 0, 36,
+ 0, 36, 0, 36, 0, 36, 36, 0, 36, 36,
+ 36, 36, 36, 36, 36, 0, 36, 5, 0, 0,
+ 5, 0, 36, 5, 0, 0, 36, 36, 36, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 36, 36,
+ 0, 36, 36, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36
+ };
+ register int hval = len;
+
+ switch (hval)
+ {
+ default:
+ hval += asso_values[(unsigned char)str[8]];
+ /*FALLTHROUGH*/
+ case 8:
+ case 7:
+ case 6:
+ case 5:
+ hval += asso_values[(unsigned char)str[4]];
+ /*FALLTHROUGH*/
+ case 4:
+ case 3:
+ case 2:
+ hval += asso_values[(unsigned char)str[1]];
+ /*FALLTHROUGH*/
+ case 1:
+ hval += asso_values[(unsigned char)str[0]];
+ break;
+ }
+ return hval + asso_values[(unsigned char)str[len - 1]];
+}
+
+#ifdef __GNUC__
+__inline
+#if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__
+__attribute__ ((__gnu_inline__))
+#endif
+#endif
+const struct keyword_t *
+charmap_hash (register const char *str, register unsigned int len)
+{
+ static const struct keyword_t wordlist[] =
+ {
+ {""}, {""}, {""},
+#line 39 "charmap-kw.gperf"
+ {"END", tok_end, 0},
+ {""},
+#line 40 "charmap-kw.gperf"
+ {"WIDTH", tok_width, 0},
+#line 35 "charmap-kw.gperf"
+ {"escseq", tok_escseq, 1},
+#line 37 "charmap-kw.gperf"
+ {"include", tok_include, 1},
+ {""}, {""},
+#line 28 "charmap-kw.gperf"
+ {"mb_cur_min", tok_mb_cur_min, 1},
+#line 29 "charmap-kw.gperf"
+ {"escape_char", tok_escape_char, 1},
+#line 30 "charmap-kw.gperf"
+ {"comment_char", tok_comment_char, 1},
+#line 26 "charmap-kw.gperf"
+ {"code_set_name", tok_code_set_name, 1},
+#line 41 "charmap-kw.gperf"
+ {"WIDTH_VARIABLE", tok_width_variable, 0},
+#line 27 "charmap-kw.gperf"
+ {"mb_cur_max", tok_mb_cur_max, 1},
+#line 36 "charmap-kw.gperf"
+ {"addset", tok_addset, 1},
+#line 38 "charmap-kw.gperf"
+ {"CHARMAP", tok_charmap, 0},
+#line 42 "charmap-kw.gperf"
+ {"WIDTH_DEFAULT", tok_width_default, 0},
+ {""},
+#line 34 "charmap-kw.gperf"
+ {"g3esc", tok_g3esc, 1},
+ {""}, {""}, {""}, {""},
+#line 33 "charmap-kw.gperf"
+ {"g2esc", tok_g2esc, 1},
+ {""}, {""}, {""}, {""},
+#line 32 "charmap-kw.gperf"
+ {"g1esc", tok_g1esc, 1},
+ {""}, {""}, {""}, {""},
+#line 31 "charmap-kw.gperf"
+ {"g0esc", tok_g0esc, 1}
+ };
+
+ if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
+ {
+ register int key = hash (str, len);
+
+ if (key <= MAX_HASH_VALUE && key >= 0)
+ {
+ register const char *s = wordlist[key].name;
+
+ if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
+ return &wordlist[key];
+ }
+ }
+ return 0;
+}
diff --git a/REORG.TODO/locale/programs/charmap.c b/REORG.TODO/locale/programs/charmap.c
new file mode 100644
index 0000000000..129aefffc1
--- /dev/null
+++ b/REORG.TODO/locale/programs/charmap.c
@@ -0,0 +1,1104 @@
+/* Copyright (C) 1996-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@gnu.org>, 1996.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <ctype.h>
+#include <errno.h>
+#include <libintl.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <error.h>
+#include <stdint.h>
+
+#include "localedef.h"
+#include "linereader.h"
+#include "charmap.h"
+#include "charmap-dir.h"
+
+#include <assert.h>
+
+
+/* Define the lookup function. */
+#include "charmap-kw.h"
+
+
+/* Prototypes for local functions. */
+static struct charmap_t *parse_charmap (struct linereader *cmfile,
+ int verbose, int be_quiet);
+static void new_width (struct linereader *cmfile, struct charmap_t *result,
+ const char *from, const char *to,
+ unsigned long int width);
+static void charmap_new_char (struct linereader *lr, struct charmap_t *cm,
+ size_t nbytes, unsigned char *bytes,
+ const char *from, const char *to,
+ int decimal_ellipsis, int step);
+
+
+bool enc_not_ascii_compatible;
+
+
+#ifdef NEED_NULL_POINTER
+static const char *null_pointer;
+#endif
+
+static struct linereader *
+cmlr_open (const char *directory, const char *name, kw_hash_fct_t hf)
+{
+ FILE *fp;
+
+ fp = charmap_open (directory, name);
+ if (fp == NULL)
+ return NULL;
+ else
+ {
+ size_t dlen = strlen (directory);
+ int add_slash = (dlen == 0 || directory[dlen - 1] != '/');
+ size_t nlen = strlen (name);
+ char *pathname;
+ char *p;
+
+ pathname = alloca (dlen + add_slash + nlen + 1);
+ p = stpcpy (pathname, directory);
+ if (add_slash)
+ *p++ = '/';
+ stpcpy (p, name);
+
+ return lr_create (fp, pathname, hf);
+ }
+}
+
+struct charmap_t *
+charmap_read (const char *filename, int verbose, int error_not_found,
+ int be_quiet, int use_default)
+{
+ struct charmap_t *result = NULL;
+
+ if (filename != NULL)
+ {
+ struct linereader *cmfile;
+
+ /* First try the name as found in the parameter. */
+ cmfile = lr_open (filename, charmap_hash);
+ if (cmfile == NULL)
+ {
+ /* No successful. So start looking through the directories
+ in the I18NPATH if this is a simple name. */
+ if (strchr (filename, '/') == NULL)
+ {
+ char *i18npath = getenv ("I18NPATH");
+ if (i18npath != NULL && *i18npath != '\0')
+ {
+ const size_t pathlen = strlen (i18npath);
+ char i18npathbuf[pathlen + 1];
+ char path[pathlen + sizeof ("/charmaps")];
+ char *next;
+ i18npath = memcpy (i18npathbuf, i18npath, pathlen + 1);
+
+ while (cmfile == NULL
+ && (next = strsep (&i18npath, ":")) != NULL)
+ {
+ stpcpy (stpcpy (path, next), "/charmaps");
+ cmfile = cmlr_open (path, filename, charmap_hash);
+
+ if (cmfile == NULL)
+ /* Try without the "/charmaps" part. */
+ cmfile = cmlr_open (next, filename, charmap_hash);
+ }
+ }
+
+ if (cmfile == NULL)
+ /* Try the default directory. */
+ cmfile = cmlr_open (CHARMAP_PATH, filename, charmap_hash);
+ }
+ }
+
+ if (cmfile != NULL)
+ result = parse_charmap (cmfile, verbose, be_quiet);
+
+ if (result == NULL && error_not_found)
+ WITH_CUR_LOCALE (error (0, errno, _("\
+character map file `%s' not found"), filename));
+ }
+
+ if (result == NULL && filename != NULL && strchr (filename, '/') == NULL)
+ {
+ /* OK, one more try. We also accept the names given to the
+ character sets in the files. Sometimes they differ from the
+ file name. */
+ CHARMAP_DIR *dir;
+
+ dir = charmap_opendir (CHARMAP_PATH);
+ if (dir != NULL)
+ {
+ const char *dirent;
+
+ while ((dirent = charmap_readdir (dir)) != NULL)
+ {
+ char **aliases;
+ char **p;
+ int found;
+
+ aliases = charmap_aliases (CHARMAP_PATH, dirent);
+ found = 0;
+ for (p = aliases; *p; p++)
+ if (strcasecmp (*p, filename) == 0)
+ {
+ found = 1;
+ break;
+ }
+ charmap_free_aliases (aliases);
+
+ if (found)
+ {
+ struct linereader *cmfile;
+
+ cmfile = cmlr_open (CHARMAP_PATH, dirent, charmap_hash);
+ if (cmfile != NULL)
+ result = parse_charmap (cmfile, verbose, be_quiet);
+
+ break;
+ }
+ }
+
+ charmap_closedir (dir);
+ }
+ }
+
+ if (result == NULL && DEFAULT_CHARMAP != NULL)
+ {
+ struct linereader *cmfile;
+
+ cmfile = cmlr_open (CHARMAP_PATH, DEFAULT_CHARMAP, charmap_hash);
+ if (cmfile != NULL)
+ result = parse_charmap (cmfile, verbose, be_quiet);
+
+ if (result == NULL)
+ WITH_CUR_LOCALE (error (4, errno, _("\
+default character map file `%s' not found"), DEFAULT_CHARMAP));
+ }
+
+ if (result != NULL && result->code_set_name == NULL)
+ /* The input file does not specify a code set name. This
+ shouldn't happen but we should cope with it. */
+ result->code_set_name = basename (filename);
+
+ /* Test of ASCII compatibility of locale encoding.
+
+ Verify that the encoding to be used in a locale is ASCII compatible,
+ at least for the graphic characters, excluding the control characters,
+ '$' and '@'. This constraint comes from an ISO C 99 restriction.
+
+ ISO C 99 section 7.17.(2) (about wchar_t):
+ the null character shall have the code value zero and each member of
+ the basic character set shall have a code value equal to its value
+ when used as the lone character in an integer character constant.
+ ISO C 99 section 5.2.1.(3):
+ Both the basic source and basic execution character sets shall have
+ the following members: the 26 uppercase letters of the Latin alphabet
+ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
+ the 26 lowercase letters of the Latin alphabet
+ a b c d e f g h i j k l m n o p q r s t u v w x y z
+ the 10 decimal digits
+ 0 1 2 3 4 5 6 7 8 9
+ the following 29 graphic characters
+ ! " # % & ' ( ) * + , - . / : ; < = > ? [ \ ] ^ _ { | } ~
+ the space character, and control characters representing horizontal
+ tab, vertical tab, and form feed.
+
+ Therefore, for all members of the "basic character set", the 'char' code
+ must have the same value as the 'wchar_t' code, which in glibc is the
+ same as the Unicode code, which for all of the enumerated characters
+ is identical to the ASCII code. */
+ if (result != NULL && use_default)
+ {
+ static const char basic_charset[] =
+ {
+ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
+ 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
+ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
+ 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
+ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
+ '!', '"', '#', '%', '&', '\'', '(', ')', '*', '+', ',', '-',
+ '.', '/', ':', ';', '<', '=', '>', '?', '[', '\\', ']', '^',
+ '_', '{', '|', '}', '~', ' ', '\t', '\v', '\f', '\0'
+ };
+ int failed = 0;
+ const char *p = basic_charset;
+
+ do
+ {
+ struct charseq *seq = charmap_find_symbol (result, p, 1);
+
+ if (seq == NULL || seq->ucs4 != (uint32_t) *p)
+ failed = 1;
+ }
+ while (*p++ != '\0');
+
+ if (failed)
+ {
+ WITH_CUR_LOCALE (fprintf (stderr, _("\
+character map `%s' is not ASCII compatible, locale not ISO C compliant\n"),
+ result->code_set_name));
+ enc_not_ascii_compatible = true;
+ }
+ }
+
+ return result;
+}
+
+
+static struct charmap_t *
+parse_charmap (struct linereader *cmfile, int verbose, int be_quiet)
+{
+ struct charmap_t *result;
+ int state;
+ enum token_t expected_tok = tok_error;
+ const char *expected_str = NULL;
+ char *from_name = NULL;
+ char *to_name = NULL;
+ enum token_t ellipsis = 0;
+ int step = 1;
+
+ /* We don't want symbolic names in string to be translated. */
+ cmfile->translate_strings = 0;
+
+ /* Allocate room for result. */
+ result = (struct charmap_t *) xmalloc (sizeof (struct charmap_t));
+ memset (result, '\0', sizeof (struct charmap_t));
+ /* The default DEFAULT_WIDTH is 1. */
+ result->width_default = 1;
+
+#define obstack_chunk_alloc malloc
+#define obstack_chunk_free free
+ obstack_init (&result->mem_pool);
+
+ if (init_hash (&result->char_table, 256)
+ || init_hash (&result->byte_table, 256))
+ {
+ free (result);
+ return NULL;
+ }
+
+ /* We use a state machine to describe the charmap description file
+ format. */
+ state = 1;
+ while (1)
+ {
+ /* What's on? */
+ struct token *now = lr_token (cmfile, NULL, NULL, NULL, verbose);
+ enum token_t nowtok = now->tok;
+ struct token *arg;
+
+ if (nowtok == tok_eof)
+ break;
+
+ switch (state)
+ {
+ case 1:
+ /* The beginning. We expect the special declarations, EOL or
+ `CHARMAP'. */
+ if (nowtok == tok_eol)
+ /* Ignore empty lines. */
+ continue;
+
+ if (nowtok == tok_charmap)
+ {
+ from_name = NULL;
+ to_name = NULL;
+
+ /* We have to set up the real work. Fill in some
+ default values. */
+ if (result->mb_cur_max == 0)
+ result->mb_cur_max = 1;
+ if (result->mb_cur_min == 0)
+ result->mb_cur_min = result->mb_cur_max;
+ if (result->mb_cur_min > result->mb_cur_max)
+ {
+ if (!be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: <mb_cur_max> must be greater than <mb_cur_min>\n"),
+ cmfile->fname));
+
+ result->mb_cur_min = result->mb_cur_max;
+ }
+
+ lr_ignore_rest (cmfile, 1);
+
+ state = 2;
+ continue;
+ }
+
+ if (nowtok != tok_code_set_name && nowtok != tok_mb_cur_max
+ && nowtok != tok_mb_cur_min && nowtok != tok_escape_char
+ && nowtok != tok_comment_char && nowtok != tok_g0esc
+ && nowtok != tok_g1esc && nowtok != tok_g2esc
+ && nowtok != tok_g3esc && nowtok != tok_repertoiremap
+ && nowtok != tok_include)
+ {
+ lr_error (cmfile, _("syntax error in prolog: %s"),
+ _("invalid definition"));
+
+ lr_ignore_rest (cmfile, 0);
+ continue;
+ }
+
+ /* We know that we need an argument. */
+ arg = lr_token (cmfile, NULL, NULL, NULL, verbose);
+
+ switch (nowtok)
+ {
+ case tok_code_set_name:
+ case tok_repertoiremap:
+ if (arg->tok != tok_ident && arg->tok != tok_string)
+ {
+ badarg:
+ lr_error (cmfile, _("syntax error in prolog: %s"),
+ _("bad argument"));
+
+ lr_ignore_rest (cmfile, 0);
+ continue;
+ }
+
+ if (nowtok == tok_code_set_name)
+ result->code_set_name = obstack_copy0 (&result->mem_pool,
+ arg->val.str.startmb,
+ arg->val.str.lenmb);
+ else
+ result->repertoiremap = obstack_copy0 (&result->mem_pool,
+ arg->val.str.startmb,
+ arg->val.str.lenmb);
+
+ lr_ignore_rest (cmfile, 1);
+ continue;
+
+ case tok_mb_cur_max:
+ case tok_mb_cur_min:
+ if (arg->tok != tok_number)
+ goto badarg;
+
+ if (verbose
+ && ((nowtok == tok_mb_cur_max
+ && result->mb_cur_max != 0)
+ || (nowtok == tok_mb_cur_max
+ && result->mb_cur_max != 0)))
+ lr_error (cmfile, _("duplicate definition of <%s>"),
+ nowtok == tok_mb_cur_min
+ ? "mb_cur_min" : "mb_cur_max");
+
+ if (arg->val.num < 1)
+ {
+ lr_error (cmfile,
+ _("value for <%s> must be 1 or greater"),
+ nowtok == tok_mb_cur_min
+ ? "mb_cur_min" : "mb_cur_max");
+
+ lr_ignore_rest (cmfile, 0);
+ continue;
+ }
+ if ((nowtok == tok_mb_cur_max && result->mb_cur_min != 0
+ && (int) arg->val.num < result->mb_cur_min)
+ || (nowtok == tok_mb_cur_min && result->mb_cur_max != 0
+ && (int) arg->val.num > result->mb_cur_max))
+ {
+ lr_error (cmfile, _("\
+value of <%s> must be greater or equal than the value of <%s>"),
+ "mb_cur_max", "mb_cur_min");
+
+ lr_ignore_rest (cmfile, 0);
+ continue;
+ }
+
+ if (nowtok == tok_mb_cur_max)
+ result->mb_cur_max = arg->val.num;
+ else
+ result->mb_cur_min = arg->val.num;
+
+ lr_ignore_rest (cmfile, 1);
+ continue;
+
+ case tok_escape_char:
+ case tok_comment_char:
+ if (arg->tok != tok_ident)
+ goto badarg;
+
+ if (arg->val.str.lenmb != 1)
+ {
+ lr_error (cmfile, _("\
+argument to <%s> must be a single character"),
+ nowtok == tok_escape_char ? "escape_char"
+ : "comment_char");
+
+ lr_ignore_rest (cmfile, 0);
+ continue;
+ }
+
+ if (nowtok == tok_escape_char)
+ cmfile->escape_char = *arg->val.str.startmb;
+ else
+ cmfile->comment_char = *arg->val.str.startmb;
+
+ lr_ignore_rest (cmfile, 1);
+ continue;
+
+ case tok_g0esc:
+ case tok_g1esc:
+ case tok_g2esc:
+ case tok_g3esc:
+ case tok_escseq:
+ lr_ignore_rest (cmfile, 0); /* XXX */
+ continue;
+
+ case tok_include:
+ lr_error (cmfile, _("\
+character sets with locking states are not supported"));
+ exit (4);
+
+ default:
+ /* Cannot happen. */
+ assert (! "Should not happen");
+ }
+ break;
+
+ case 2:
+ /* We have seen `CHARMAP' and now are in the body. Each line
+ must have the format "%s %s %s\n" or "%s...%s %s %s\n". */
+ if (nowtok == tok_eol)
+ /* Ignore empty lines. */
+ continue;
+
+ if (nowtok == tok_end)
+ {
+ expected_tok = tok_charmap;
+ expected_str = "CHARMAP";
+ state = 90;
+ continue;
+ }
+
+ if (nowtok != tok_bsymbol && nowtok != tok_ucs4)
+ {
+ lr_error (cmfile, _("syntax error in %s definition: %s"),
+ "CHARMAP", _("no symbolic name given"));
+
+ lr_ignore_rest (cmfile, 0);
+ continue;
+ }
+
+ /* If the previous line was not completely correct free the
+ used memory. */
+ if (from_name != NULL)
+ obstack_free (&result->mem_pool, from_name);
+
+ if (nowtok == tok_bsymbol)
+ from_name = (char *) obstack_copy0 (&result->mem_pool,
+ now->val.str.startmb,
+ now->val.str.lenmb);
+ else
+ {
+ obstack_printf (&result->mem_pool, "U%08X",
+ cmfile->token.val.ucs4);
+ obstack_1grow (&result->mem_pool, '\0');
+ from_name = (char *) obstack_finish (&result->mem_pool);
+ }
+ to_name = NULL;
+
+ state = 3;
+ continue;
+
+ case 3:
+ /* We have two possibilities: We can see an ellipsis or an
+ encoding value. */
+ if (nowtok == tok_ellipsis3 || nowtok == tok_ellipsis4
+ || nowtok == tok_ellipsis2 || nowtok == tok_ellipsis4_2
+ || nowtok == tok_ellipsis2_2)
+ {
+ ellipsis = nowtok;
+ if (nowtok == tok_ellipsis4_2)
+ {
+ step = 2;
+ nowtok = tok_ellipsis4;
+ }
+ else if (nowtok == tok_ellipsis2_2)
+ {
+ step = 2;
+ nowtok = tok_ellipsis2;
+ }
+ state = 4;
+ continue;
+ }
+ /* FALLTHROUGH */
+
+ case 5:
+ if (nowtok != tok_charcode)
+ {
+ lr_error (cmfile, _("syntax error in %s definition: %s"),
+ "CHARMAP", _("invalid encoding given"));
+
+ lr_ignore_rest (cmfile, 0);
+
+ state = 2;
+ continue;
+ }
+
+ if (now->val.charcode.nbytes < result->mb_cur_min)
+ lr_error (cmfile, _("too few bytes in character encoding"));
+ else if (now->val.charcode.nbytes > result->mb_cur_max)
+ lr_error (cmfile, _("too many bytes in character encoding"));
+ else
+ charmap_new_char (cmfile, result, now->val.charcode.nbytes,
+ now->val.charcode.bytes, from_name, to_name,
+ ellipsis != tok_ellipsis2, step);
+
+ /* Ignore trailing comment silently. */
+ lr_ignore_rest (cmfile, 0);
+
+ from_name = NULL;
+ to_name = NULL;
+ ellipsis = tok_none;
+ step = 1;
+
+ state = 2;
+ continue;
+
+ case 4:
+ if (nowtok != tok_bsymbol && nowtok != tok_ucs4)
+ {
+ lr_error (cmfile, _("syntax error in %s definition: %s"),
+ "CHARMAP",
+ _("no symbolic name given for end of range"));
+
+ lr_ignore_rest (cmfile, 0);
+ continue;
+ }
+
+ /* Copy the to-name in a safe place. */
+ if (nowtok == tok_bsymbol)
+ to_name = (char *) obstack_copy0 (&result->mem_pool,
+ cmfile->token.val.str.startmb,
+ cmfile->token.val.str.lenmb);
+ else
+ {
+ obstack_printf (&result->mem_pool, "U%08X",
+ cmfile->token.val.ucs4);
+ obstack_1grow (&result->mem_pool, '\0');
+ to_name = (char *) obstack_finish (&result->mem_pool);
+ }
+
+ state = 5;
+ continue;
+
+ case 90:
+ if (nowtok != expected_tok)
+ lr_error (cmfile, _("\
+%1$s: definition does not end with `END %1$s'"), expected_str);
+
+ lr_ignore_rest (cmfile, nowtok == expected_tok);
+ state = 91;
+ continue;
+
+ case 91:
+ /* Waiting for WIDTH... */
+ if (nowtok == tok_eol)
+ /* Ignore empty lines. */
+ continue;
+
+ if (nowtok == tok_width_default)
+ {
+ state = 92;
+ continue;
+ }
+
+ if (nowtok == tok_width)
+ {
+ lr_ignore_rest (cmfile, 1);
+ state = 93;
+ continue;
+ }
+
+ if (nowtok == tok_width_variable)
+ {
+ lr_ignore_rest (cmfile, 1);
+ state = 98;
+ continue;
+ }
+
+ lr_error (cmfile, _("\
+only WIDTH definitions are allowed to follow the CHARMAP definition"));
+
+ lr_ignore_rest (cmfile, 0);
+ continue;
+
+ case 92:
+ if (nowtok != tok_number)
+ lr_error (cmfile, _("value for %s must be an integer"),
+ "WIDTH_DEFAULT");
+ else
+ result->width_default = now->val.num;
+
+ lr_ignore_rest (cmfile, nowtok == tok_number);
+
+ state = 91;
+ continue;
+
+ case 93:
+ /* We now expect `END WIDTH' or lines of the format "%s %d\n" or
+ "%s...%s %d\n". */
+ if (nowtok == tok_eol)
+ /* ignore empty lines. */
+ continue;
+
+ if (nowtok == tok_end)
+ {
+ expected_tok = tok_width;
+ expected_str = "WIDTH";
+ state = 90;
+ continue;
+ }
+
+ if (nowtok != tok_bsymbol && nowtok != tok_ucs4)
+ {
+ lr_error (cmfile, _("syntax error in %s definition: %s"),
+ "WIDTH", _("no symbolic name given"));
+
+ lr_ignore_rest (cmfile, 0);
+ continue;
+ }
+
+ if (from_name != NULL)
+ obstack_free (&result->mem_pool, from_name);
+
+ if (nowtok == tok_bsymbol)
+ from_name = (char *) obstack_copy0 (&result->mem_pool,
+ now->val.str.startmb,
+ now->val.str.lenmb);
+ else
+ {
+ obstack_printf (&result->mem_pool, "U%08X",
+ cmfile->token.val.ucs4);
+ obstack_1grow (&result->mem_pool, '\0');
+ from_name = (char *) obstack_finish (&result->mem_pool);
+ }
+
+ to_name = NULL;
+
+ state = 94;
+ continue;
+
+ case 94:
+ if (nowtok == tok_ellipsis3)
+ {
+ state = 95;
+ continue;
+ }
+
+ case 96:
+ if (nowtok != tok_number)
+ lr_error (cmfile, _("value for %s must be an integer"),
+ "WIDTH");
+ else
+ {
+ /* Store width for chars. */
+ new_width (cmfile, result, from_name, to_name, now->val.num);
+
+ from_name = NULL;
+ to_name = NULL;
+ }
+
+ lr_ignore_rest (cmfile, nowtok == tok_number);
+
+ state = 93;
+ continue;
+
+ case 95:
+ if (nowtok != tok_bsymbol && nowtok != tok_ucs4)
+ {
+ lr_error (cmfile, _("syntax error in %s definition: %s"),
+ "WIDTH", _("no symbolic name given for end of range"));
+
+ lr_ignore_rest (cmfile, 0);
+
+ state = 93;
+ continue;
+ }
+
+ if (nowtok == tok_bsymbol)
+ to_name = (char *) obstack_copy0 (&result->mem_pool,
+ now->val.str.startmb,
+ now->val.str.lenmb);
+ else
+ {
+ obstack_printf (&result->mem_pool, "U%08X",
+ cmfile->token.val.ucs4);
+ obstack_1grow (&result->mem_pool, '\0');
+ to_name = (char *) obstack_finish (&result->mem_pool);
+ }
+
+ state = 96;
+ continue;
+
+ case 98:
+ /* We now expect `END WIDTH_VARIABLE' or lines of the format
+ "%s\n" or "%s...%s\n". */
+ if (nowtok == tok_eol)
+ /* ignore empty lines. */
+ continue;
+
+ if (nowtok == tok_end)
+ {
+ expected_tok = tok_width_variable;
+ expected_str = "WIDTH_VARIABLE";
+ state = 90;
+ continue;
+ }
+
+ if (nowtok != tok_bsymbol && nowtok != tok_ucs4)
+ {
+ lr_error (cmfile, _("syntax error in %s definition: %s"),
+ "WIDTH_VARIABLE", _("no symbolic name given"));
+
+ lr_ignore_rest (cmfile, 0);
+
+ continue;
+ }
+
+ if (from_name != NULL)
+ obstack_free (&result->mem_pool, from_name);
+
+ if (nowtok == tok_bsymbol)
+ from_name = (char *) obstack_copy0 (&result->mem_pool,
+ now->val.str.startmb,
+ now->val.str.lenmb);
+ else
+ {
+ obstack_printf (&result->mem_pool, "U%08X",
+ cmfile->token.val.ucs4);
+ obstack_1grow (&result->mem_pool, '\0');
+ from_name = (char *) obstack_finish (&result->mem_pool);
+ }
+ to_name = NULL;
+
+ state = 99;
+ continue;
+
+ case 99:
+ if (nowtok == tok_ellipsis3)
+ state = 100;
+
+ /* Store info. */
+ from_name = NULL;
+
+ /* Warn */
+ state = 98;
+ continue;
+
+ case 100:
+ if (nowtok != tok_bsymbol && nowtok != tok_ucs4)
+ {
+ lr_error (cmfile, _("syntax error in %s definition: %s"),
+ "WIDTH_VARIABLE",
+ _("no symbolic name given for end of range"));
+ lr_ignore_rest (cmfile, 0);
+ continue;
+ }
+
+ if (nowtok == tok_bsymbol)
+ to_name = (char *) obstack_copy0 (&result->mem_pool,
+ now->val.str.startmb,
+ now->val.str.lenmb);
+ else
+ {
+ obstack_printf (&result->mem_pool, "U%08X",
+ cmfile->token.val.ucs4);
+ obstack_1grow (&result->mem_pool, '\0');
+ to_name = (char *) obstack_finish (&result->mem_pool);
+ }
+
+ /* XXX Enter value into table. */
+
+ lr_ignore_rest (cmfile, 1);
+
+ state = 98;
+ continue;
+
+ default:
+ WITH_CUR_LOCALE (error (5, 0, _("%s: error in state machine"),
+ __FILE__));
+ /* NOTREACHED */
+ }
+ break;
+ }
+
+ if (state != 91 && !be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("%s: premature end of file"),
+ cmfile->fname));
+
+ lr_close (cmfile);
+
+ return result;
+}
+
+
+static void
+new_width (struct linereader *cmfile, struct charmap_t *result,
+ const char *from, const char *to, unsigned long int width)
+{
+ struct charseq *from_val;
+ struct charseq *to_val;
+
+ from_val = charmap_find_value (result, from, strlen (from));
+ if (from_val == NULL)
+ {
+ lr_error (cmfile, _("unknown character `%s'"), from);
+ return;
+ }
+
+ if (to == NULL)
+ to_val = from_val;
+ else
+ {
+ to_val = charmap_find_value (result, to, strlen (to));
+ if (to_val == NULL)
+ {
+ lr_error (cmfile, _("unknown character `%s'"), to);
+ return;
+ }
+
+ /* Make sure the number of bytes for the end points of the range
+ is correct. */
+ if (from_val->nbytes != to_val->nbytes)
+ {
+ lr_error (cmfile, _("\
+number of bytes for byte sequence of beginning and end of range not the same: %d vs %d"),
+ from_val->nbytes, to_val->nbytes);
+ return;
+ }
+ }
+
+ if (result->nwidth_rules >= result->nwidth_rules_max)
+ {
+ size_t new_size = result->nwidth_rules + 32;
+ struct width_rule *new_rules =
+ (struct width_rule *) obstack_alloc (&result->mem_pool,
+ (new_size
+ * sizeof (struct width_rule)));
+
+ memcpy (new_rules, result->width_rules,
+ result->nwidth_rules_max * sizeof (struct width_rule));
+
+ result->width_rules = new_rules;
+ result->nwidth_rules_max = new_size;
+ }
+
+ result->width_rules[result->nwidth_rules].from = from_val;
+ result->width_rules[result->nwidth_rules].to = to_val;
+ result->width_rules[result->nwidth_rules].width = (unsigned int) width;
+ ++result->nwidth_rules;
+}
+
+
+struct charseq *
+charmap_find_value (const struct charmap_t *cm, const char *name, size_t len)
+{
+ void *result;
+
+ return (find_entry ((hash_table *) &cm->char_table, name, len, &result)
+ < 0 ? NULL : (struct charseq *) result);
+}
+
+
+static void
+charmap_new_char (struct linereader *lr, struct charmap_t *cm,
+ size_t nbytes, unsigned char *bytes,
+ const char *from, const char *to,
+ int decimal_ellipsis, int step)
+{
+ hash_table *ht = &cm->char_table;
+ hash_table *bt = &cm->byte_table;
+ struct obstack *ob = &cm->mem_pool;
+ char *from_end;
+ char *to_end;
+ const char *cp;
+ int prefix_len, len1, len2;
+ unsigned int from_nr, to_nr, cnt;
+ struct charseq *newp;
+
+ len1 = strlen (from);
+
+ if (to == NULL)
+ {
+ newp = (struct charseq *) obstack_alloc (ob, sizeof (*newp) + nbytes);
+ newp->nbytes = nbytes;
+ memcpy (newp->bytes, bytes, nbytes);
+ newp->name = from;
+
+ newp->ucs4 = UNINITIALIZED_CHAR_VALUE;
+ if ((from[0] == 'U' || from[0] == 'P') && (len1 == 5 || len1 == 9))
+ {
+ /* Maybe the name is of the form `Uxxxx' or `Uxxxxxxxx' where
+ xxxx and xxxxxxxx are hexadecimal numbers. In this case
+ we use the value of xxxx or xxxxxxxx as the UCS4 value of
+ this character and we don't have to consult the repertoire
+ map.
+
+ If the name is of the form `Pxxxx' or `Pxxxxxxxx' the xxxx
+ and xxxxxxxx also give the code point in UCS4 but this must
+ be in the private, i.e., unassigned, area. This should be
+ used for characters which do not (yet) have an equivalent
+ in ISO 10646 and Unicode. */
+ char *endp;
+
+ errno = 0;
+ newp->ucs4 = strtoul (from + 1, &endp, 16);
+ if (endp - from != len1
+ || (newp->ucs4 == ~((uint32_t) 0) && errno == ERANGE)
+ || newp->ucs4 >= 0x80000000)
+ /* This wasn't successful. Signal this name cannot be a
+ correct UCS value. */
+ newp->ucs4 = UNINITIALIZED_CHAR_VALUE;
+ }
+
+ insert_entry (ht, from, len1, newp);
+ insert_entry (bt, newp->bytes, nbytes, newp);
+ /* Please note that it isn't a bug if a symbol is defined more
+ than once. All later definitions are simply discarded. */
+ return;
+ }
+
+ /* We have a range: the names must have names with equal prefixes
+ and an equal number of digits, where the second number is greater
+ or equal than the first. */
+ len2 = strlen (to);
+
+ if (len1 != len2)
+ {
+ illegal_range:
+ lr_error (lr, _("invalid names for character range"));
+ return;
+ }
+
+ cp = &from[len1 - 1];
+ if (decimal_ellipsis)
+ while (isdigit (*cp) && cp >= from)
+ --cp;
+ else
+ while (isxdigit (*cp) && cp >= from)
+ {
+ if (!isdigit (*cp) && !isupper (*cp))
+ lr_error (lr, _("\
+hexadecimal range format should use only capital characters"));
+ --cp;
+ }
+
+ prefix_len = (cp - from) + 1;
+
+ if (cp == &from[len1 - 1] || strncmp (from, to, prefix_len) != 0)
+ goto illegal_range;
+
+ errno = 0;
+ from_nr = strtoul (&from[prefix_len], &from_end, decimal_ellipsis ? 10 : 16);
+ if (*from_end != '\0' || (from_nr == UINT_MAX && errno == ERANGE)
+ || ((to_nr = strtoul (&to[prefix_len], &to_end,
+ decimal_ellipsis ? 10 : 16)) == UINT_MAX
+ && errno == ERANGE)
+ || *to_end != '\0')
+ {
+ lr_error (lr, _("<%s> and <%s> are invalid names for range"), from, to);
+ return;
+ }
+
+ if (from_nr > to_nr)
+ {
+ lr_error (lr, _("upper limit in range is smaller than lower limit"));
+ return;
+ }
+
+ for (cnt = from_nr; cnt <= to_nr; cnt += step)
+ {
+ char *name_end;
+ obstack_printf (ob, decimal_ellipsis ? "%.*s%0*d" : "%.*s%0*X",
+ prefix_len, from, len1 - prefix_len, cnt);
+ obstack_1grow (ob, '\0');
+ name_end = obstack_finish (ob);
+
+ newp = (struct charseq *) obstack_alloc (ob, sizeof (*newp) + nbytes);
+ newp->nbytes = nbytes;
+ memcpy (newp->bytes, bytes, nbytes);
+ newp->name = name_end;
+
+ newp->ucs4 = UNINITIALIZED_CHAR_VALUE;
+ if ((name_end[0] == 'U' || name_end[0] == 'P')
+ && (len1 == 5 || len1 == 9))
+ {
+ /* Maybe the name is of the form `Uxxxx' or `Uxxxxxxxx' where
+ xxxx and xxxxxxxx are hexadecimal numbers. In this case
+ we use the value of xxxx or xxxxxxxx as the UCS4 value of
+ this character and we don't have to consult the repertoire
+ map.
+
+ If the name is of the form `Pxxxx' or `Pxxxxxxxx' the xxxx
+ and xxxxxxxx also give the code point in UCS4 but this must
+ be in the private, i.e., unassigned, area. This should be
+ used for characters which do not (yet) have an equivalent
+ in ISO 10646 and Unicode. */
+ char *endp;
+
+ errno = 0;
+ newp->ucs4 = strtoul (name_end + 1, &endp, 16);
+ if (endp - name_end != len1
+ || (newp->ucs4 == ~((uint32_t) 0) && errno == ERANGE)
+ || newp->ucs4 >= 0x80000000)
+ /* This wasn't successful. Signal this name cannot be a
+ correct UCS value. */
+ newp->ucs4 = UNINITIALIZED_CHAR_VALUE;
+ }
+
+ insert_entry (ht, name_end, len1, newp);
+ insert_entry (bt, newp->bytes, nbytes, newp);
+ /* Please note we don't examine the return value since it is no error
+ if we have two definitions for a symbol. */
+
+ /* Increment the value in the byte sequence. */
+ if (++bytes[nbytes - 1] == '\0')
+ {
+ int b = nbytes - 2;
+
+ do
+ if (b < 0)
+ {
+ lr_error (lr,
+ _("resulting bytes for range not representable."));
+ return;
+ }
+ while (++bytes[b--] == 0);
+ }
+ }
+}
+
+
+struct charseq *
+charmap_find_symbol (const struct charmap_t *cm, const char *bytes,
+ size_t nbytes)
+{
+ void *result;
+
+ return (find_entry ((hash_table *) &cm->byte_table, bytes, nbytes, &result)
+ < 0 ? NULL : (struct charseq *) result);
+}
diff --git a/REORG.TODO/locale/programs/charmap.h b/REORG.TODO/locale/programs/charmap.h
new file mode 100644
index 0000000000..5d6b48f59c
--- /dev/null
+++ b/REORG.TODO/locale/programs/charmap.h
@@ -0,0 +1,84 @@
+/* Copyright (C) 1996-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@gnu.org>, 1996.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef _CHARMAP_H
+#define _CHARMAP_H
+
+#include <obstack.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "repertoire.h"
+#include "simple-hash.h"
+
+
+struct width_rule
+{
+ struct charseq *from;
+ struct charseq *to;
+ unsigned int width;
+};
+
+
+struct charmap_t
+{
+ const char *code_set_name;
+ const char *repertoiremap;
+ int mb_cur_min;
+ int mb_cur_max;
+
+ struct width_rule *width_rules;
+ size_t nwidth_rules;
+ size_t nwidth_rules_max;
+ unsigned int width_default;
+
+ struct obstack mem_pool;
+ hash_table char_table;
+ hash_table byte_table;
+ hash_table ucs4_table;
+};
+
+
+/* This is the structure used for entries in the hash table. It represents
+ the sequence of bytes used for the coded character. */
+struct charseq
+{
+ const char *name;
+ uint32_t ucs4;
+ int nbytes;
+ unsigned char bytes[0];
+};
+
+
+/* True if the encoding is not ASCII compatible. */
+extern bool enc_not_ascii_compatible;
+
+
+/* Prototypes for charmap handling functions. */
+extern struct charmap_t *charmap_read (const char *filename, int verbose,
+ int error_not_found, int be_quiet,
+ int use_default);
+
+/* Return the value stored under the given key in the hashing table. */
+extern struct charseq *charmap_find_value (const struct charmap_t *charmap,
+ const char *name, size_t len);
+
+/* Return symbol for given multibyte sequence. */
+extern struct charseq *charmap_find_symbol (const struct charmap_t *charmap,
+ const char *name, size_t len);
+
+#endif /* charmap.h */
diff --git a/REORG.TODO/locale/programs/config.h b/REORG.TODO/locale/programs/config.h
new file mode 100644
index 0000000000..5b416be0d8
--- /dev/null
+++ b/REORG.TODO/locale/programs/config.h
@@ -0,0 +1,35 @@
+/* Configuration for localedef program.
+ Copyright (C) 1995-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1995.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef _LD_CONFIG_H
+#define _LD_CONFIG_H 1
+
+/* Use the internal textdomain used for libc messages. */
+#define PACKAGE _libc_intl_domainname
+#ifndef VERSION
+/* Get libc version number. */
+#include "../../version.h"
+#endif
+
+#define DEFAULT_CHARMAP "ANSI_X3.4-1968" /* ASCII */
+
+/* This must be one higer than the last used LC_xxx category value. */
+#define __LC_LAST 13
+
+#include_next <config.h>
+#endif
diff --git a/REORG.TODO/locale/programs/ld-address.c b/REORG.TODO/locale/programs/ld-address.c
new file mode 100644
index 0000000000..2488a5ce5c
--- /dev/null
+++ b/REORG.TODO/locale/programs/ld-address.c
@@ -0,0 +1,545 @@
+/* Copyright (C) 1998-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <byteswap.h>
+#include <error.h>
+#include <langinfo.h>
+#include <string.h>
+#include <stdint.h>
+#include <sys/uio.h>
+
+#include <assert.h>
+
+#include "localedef.h"
+#include "localeinfo.h"
+#include "locfile.h"
+
+
+static struct
+{
+ const char ab2[3];
+ const char ab3[4];
+ uint32_t num;
+} iso3166[] =
+{
+#define DEFINE_COUNTRY_CODE(Name, Ab2, Ab3, Num) \
+ { #Ab2, #Ab3, Num },
+#include "iso-3166.def"
+};
+
+
+static struct
+{
+ const char ab[3];
+ const char term[4];
+ const char lib[4];
+} iso639[] =
+{
+#define DEFINE_LANGUAGE_CODE(Name, Ab, Term, Lib) \
+ { #Ab, #Term, #Lib },
+#define DEFINE_LANGUAGE_CODE3(Name, Term, Lib) \
+ { "", #Term, #Lib },
+#define DEFINE_LANGUAGE_CODE2(Name, Term) \
+ { "", #Term, "" },
+#include "iso-639.def"
+};
+
+
+/* The real definition of the struct for the LC_ADDRESS locale. */
+struct locale_address_t
+{
+ const char *postal_fmt;
+ const char *country_name;
+ const char *country_post;
+ const char *country_ab2;
+ const char *country_ab3;
+ uint32_t country_num;
+ const char *country_car;
+ const char *country_isbn;
+ const char *lang_name;
+ const char *lang_ab;
+ const char *lang_term;
+ const char *lang_lib;
+};
+
+
+static void
+address_startup (struct linereader *lr, struct localedef_t *locale,
+ int ignore_content)
+{
+ if (!ignore_content)
+ locale->categories[LC_ADDRESS].address =
+ (struct locale_address_t *) xcalloc (1,
+ sizeof (struct locale_address_t));
+
+ if (lr != NULL)
+ {
+ lr->translate_strings = 1;
+ lr->return_widestr = 0;
+ }
+}
+
+
+void
+address_finish (struct localedef_t *locale, const struct charmap_t *charmap)
+{
+ struct locale_address_t *address = locale->categories[LC_ADDRESS].address;
+ size_t cnt;
+ int helper;
+ int nothing = 0;
+
+ /* Now resolve copying and also handle completely missing definitions. */
+ if (address == NULL)
+ {
+ /* First see whether we were supposed to copy. If yes, find the
+ actual definition. */
+ if (locale->copy_name[LC_ADDRESS] != NULL)
+ {
+ /* Find the copying locale. This has to happen transitively since
+ the locale we are copying from might also copying another one. */
+ struct localedef_t *from = locale;
+
+ do
+ from = find_locale (LC_ADDRESS, from->copy_name[LC_ADDRESS],
+ from->repertoire_name, charmap);
+ while (from->categories[LC_ADDRESS].address == NULL
+ && from->copy_name[LC_ADDRESS] != NULL);
+
+ address = locale->categories[LC_ADDRESS].address
+ = from->categories[LC_ADDRESS].address;
+ }
+
+ /* If there is still no definition issue an warning and create an
+ empty one. */
+ if (address == NULL)
+ {
+ if (! be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+No definition for %s category found"), "LC_ADDRESS"));
+ address_startup (NULL, locale, 0);
+ address = locale->categories[LC_ADDRESS].address;
+ nothing = 1;
+ }
+ }
+
+ if (address->postal_fmt == NULL)
+ {
+ if (! nothing)
+ WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"),
+ "LC_ADDRESS", "postal_fmt"));
+ /* Use as the default value the value of the i18n locale. */
+ address->postal_fmt = "%a%N%f%N%d%N%b%N%s %h %e %r%N%C-%z %T%N%c%N";
+ }
+ else
+ {
+ /* We must check whether the format string contains only the allowed
+ escape sequences. Last checked against ISO 30112 WD10 [2014]. */
+ const char *cp = address->postal_fmt;
+
+ if (*cp == '\0')
+ WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' must not be empty"),
+ "LC_ADDRESS", "postal_fmt"));
+ else
+ while (*cp != '\0')
+ {
+ if (*cp == '%')
+ {
+ if (*++cp == 'R')
+ /* Romanize-flag. */
+ ++cp;
+ if (strchr ("nafdbshNtreClzTSc%", *cp) == NULL)
+ {
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: invalid escape `%%%c' sequence in field `%s'"),
+ "LC_ADDRESS", *cp, "postal_fmt"));
+ break;
+ }
+ }
+ ++cp;
+ }
+ }
+
+#define TEST_ELEM(cat) \
+ if (address->cat == NULL) \
+ { \
+ if (verbose && ! nothing) \
+ WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), \
+ "LC_ADDRESS", #cat)); \
+ address->cat = ""; \
+ }
+
+ TEST_ELEM (country_name);
+ /* XXX Test against list of defined codes. */
+ TEST_ELEM (country_post);
+ /* XXX Test against list of defined codes. */
+ TEST_ELEM (country_car);
+ /* XXX Test against list of defined codes. */
+ TEST_ELEM (country_isbn);
+ TEST_ELEM (lang_name);
+
+ helper = 1;
+ if (address->lang_term == NULL)
+ {
+ if (verbose && ! nothing)
+ WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"),
+ "LC_ADDRESS", "lang_term"));
+ address->lang_term = "";
+ cnt = sizeof (iso639) / sizeof (iso639[0]);
+ }
+ else if (address->lang_term[0] == '\0')
+ {
+ if (verbose)
+ WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' must not be empty"),
+ "LC_ADDRESS", "lang_term"));
+ cnt = sizeof (iso639) / sizeof (iso639[0]);
+ }
+ else
+ {
+ /* Look for this language in the table. */
+ for (cnt = 0; cnt < sizeof (iso639) / sizeof (iso639[0]); ++cnt)
+ if (strcmp (address->lang_term, iso639[cnt].term) == 0)
+ break;
+ if (cnt == sizeof (iso639) / sizeof (iso639[0]))
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: terminology language code `%s' not defined"),
+ "LC_ADDRESS", address->lang_term));
+ }
+
+ if (address->lang_ab == NULL)
+ {
+ if ((cnt == sizeof (iso639) / sizeof (iso639[0])
+ || iso639[cnt].ab[0] != '\0')
+ && verbose && ! nothing)
+ WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"),
+ "LC_ADDRESS", "lang_ab"));
+ address->lang_ab = "";
+ }
+ else if (address->lang_ab[0] == '\0')
+ {
+ if ((cnt == sizeof (iso639) / sizeof (iso639[0])
+ || iso639[cnt].ab[0] != '\0')
+ && verbose)
+ WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' must not be empty"),
+ "LC_ADDRESS", "lang_ab"));
+ }
+ else if (cnt < sizeof (iso639) / sizeof (iso639[0])
+ && iso639[cnt].ab[0] == '\0')
+ {
+ WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' must not be defined"),
+ "LC_ADDRESS", "lang_ab"));
+
+ address->lang_ab = "";
+ }
+ else
+ {
+ if (cnt == sizeof (iso639) / sizeof (iso639[0]))
+ {
+ helper = 2;
+ for (cnt = 0; cnt < sizeof (iso639) / sizeof (iso639[0]); ++cnt)
+ if (strcmp (address->lang_ab, iso639[cnt].ab) == 0)
+ break;
+ if (cnt == sizeof (iso639) / sizeof (iso639[0]))
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: language abbreviation `%s' not defined"),
+ "LC_ADDRESS", address->lang_ab));
+ }
+ else
+ if (strcmp (iso639[cnt].ab, address->lang_ab) != 0
+ && iso639[cnt].ab[0] != '\0')
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: `%s' value does not match `%s' value"),
+ "LC_ADDRESS", "lang_ab", "lang_term"));
+ }
+
+ if (address->lang_lib == NULL)
+ /* This is no error. */
+ address->lang_lib = address->lang_term;
+ else if (address->lang_lib[0] == '\0')
+ {
+ if (verbose)
+ WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' must not be empty"),
+ "LC_ADDRESS", "lang_lib"));
+ }
+ else
+ {
+ if (cnt == sizeof (iso639) / sizeof (iso639[0]))
+ {
+ for (cnt = 0; cnt < sizeof (iso639) / sizeof (iso639[0]); ++cnt)
+ if (strcmp (address->lang_lib, iso639[cnt].lib) == 0)
+ break;
+ if (cnt == sizeof (iso639) / sizeof (iso639[0]))
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: language abbreviation `%s' not defined"),
+ "LC_ADDRESS", address->lang_lib));
+ }
+ else
+ if (strcmp (iso639[cnt].ab, address->lang_ab) != 0)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: `%s' value does not match `%s' value"), "LC_ADDRESS", "lang_lib",
+ helper == 1 ? "lang_term" : "lang_ab"));
+ }
+
+ if (address->country_num == 0)
+ {
+ if (verbose && ! nothing)
+ WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"),
+ "LC_ADDRESS", "country_num"));
+ cnt = sizeof (iso3166) / sizeof (iso3166[0]);
+ }
+ else
+ {
+ for (cnt = 0; cnt < sizeof (iso3166) / sizeof (iso3166[0]); ++cnt)
+ if (address->country_num == iso3166[cnt].num)
+ break;
+
+ if (cnt == sizeof (iso3166) / sizeof (iso3166[0]))
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: numeric country code `%d' not valid"),
+ "LC_ADDRESS", address->country_num));
+ }
+
+ if (address->country_ab2 == NULL)
+ {
+ if (verbose && ! nothing)
+ WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"),
+ "LC_ADDRESS", "country_ab2"));
+ address->country_ab2 = " ";
+ }
+ else if (cnt != sizeof (iso3166) / sizeof (iso3166[0])
+ && strcmp (address->country_ab2, iso3166[cnt].ab2) != 0)
+ WITH_CUR_LOCALE (error (0, 0,
+ _("%s: `%s' value does not match `%s' value"),
+ "LC_ADDRESS", "country_ab2", "country_num"));
+
+ if (address->country_ab3 == NULL)
+ {
+ if (verbose && ! nothing)
+ WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"),
+ "LC_ADDRESS", "country_ab3"));
+ address->country_ab3 = " ";
+ }
+ else if (cnt != sizeof (iso3166) / sizeof (iso3166[0])
+ && strcmp (address->country_ab3, iso3166[cnt].ab3) != 0)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: `%s' value does not match `%s' value"),
+ "LC_ADDRESS", "country_ab3", "country_num"));
+}
+
+
+void
+address_output (struct localedef_t *locale, const struct charmap_t *charmap,
+ const char *output_path)
+{
+ struct locale_address_t *address = locale->categories[LC_ADDRESS].address;
+ struct locale_file file;
+
+ init_locale_data (&file, _NL_ITEM_INDEX (_NL_NUM_LC_ADDRESS));
+ add_locale_string (&file, address->postal_fmt);
+ add_locale_string (&file, address->country_name);
+ add_locale_string (&file, address->country_post);
+ add_locale_string (&file, address->country_ab2);
+ add_locale_string (&file, address->country_ab3);
+ add_locale_string (&file, address->country_car);
+ add_locale_uint32 (&file, address->country_num);
+ add_locale_string (&file, address->country_isbn);
+ add_locale_string (&file, address->lang_name);
+ add_locale_string (&file, address->lang_ab);
+ add_locale_string (&file, address->lang_term);
+ add_locale_string (&file, address->lang_lib);
+ add_locale_string (&file, charmap->code_set_name);
+ write_locale_data (output_path, LC_ADDRESS, "LC_ADDRESS", &file);
+}
+
+
+/* The parser for the LC_ADDRESS section of the locale definition. */
+void
+address_read (struct linereader *ldfile, struct localedef_t *result,
+ const struct charmap_t *charmap, const char *repertoire_name,
+ int ignore_content)
+{
+ struct locale_address_t *address;
+ struct token *now;
+ struct token *arg;
+ enum token_t nowtok;
+
+ /* The rest of the line containing `LC_ADDRESS' must be free. */
+ lr_ignore_rest (ldfile, 1);
+
+
+ do
+ {
+ now = lr_token (ldfile, charmap, result, NULL, verbose);
+ nowtok = now->tok;
+ }
+ while (nowtok == tok_eol);
+
+ /* If we see `copy' now we are almost done. */
+ if (nowtok == tok_copy)
+ {
+ handle_copy (ldfile, charmap, repertoire_name, result, tok_lc_address,
+ LC_ADDRESS, "LC_ADDRESS", ignore_content);
+ return;
+ }
+
+ /* Prepare the data structures. */
+ address_startup (ldfile, result, ignore_content);
+ address = result->categories[LC_ADDRESS].address;
+
+ while (1)
+ {
+ /* Of course we don't proceed beyond the end of file. */
+ if (nowtok == tok_eof)
+ break;
+
+ /* Ignore empty lines. */
+ if (nowtok == tok_eol)
+ {
+ now = lr_token (ldfile, charmap, result, NULL, verbose);
+ nowtok = now->tok;
+ continue;
+ }
+
+ switch (nowtok)
+ {
+#define STR_ELEM(cat) \
+ case tok_##cat: \
+ /* Ignore the rest of the line if we don't need the input of \
+ this line. */ \
+ if (ignore_content) \
+ { \
+ lr_ignore_rest (ldfile, 0); \
+ break; \
+ } \
+ \
+ arg = lr_token (ldfile, charmap, result, NULL, verbose); \
+ if (arg->tok != tok_string) \
+ goto err_label; \
+ if (address->cat != NULL) \
+ lr_error (ldfile, _("\
+%s: field `%s' declared more than once"), "LC_ADDRESS", #cat); \
+ else if (!ignore_content && arg->val.str.startmb == NULL) \
+ { \
+ lr_error (ldfile, _("\
+%s: unknown character in field `%s'"), "LC_ADDRESS", #cat); \
+ address->cat = ""; \
+ } \
+ else if (!ignore_content) \
+ address->cat = arg->val.str.startmb; \
+ break
+
+ STR_ELEM (postal_fmt);
+ STR_ELEM (country_name);
+ STR_ELEM (country_post);
+ STR_ELEM (country_ab2);
+ STR_ELEM (country_ab3);
+ STR_ELEM (country_car);
+ STR_ELEM (lang_name);
+ STR_ELEM (lang_ab);
+ STR_ELEM (lang_term);
+ STR_ELEM (lang_lib);
+
+#define INT_STR_ELEM(cat) \
+ case tok_##cat: \
+ /* Ignore the rest of the line if we don't need the input of \
+ this line. */ \
+ if (ignore_content) \
+ { \
+ lr_ignore_rest (ldfile, 0); \
+ break; \
+ } \
+ \
+ arg = lr_token (ldfile, charmap, result, NULL, verbose); \
+ if (arg->tok != tok_string && arg->tok != tok_number) \
+ goto err_label; \
+ if (address->cat != NULL) \
+ lr_error (ldfile, _("\
+%s: field `%s' declared more than once"), "LC_ADDRESS", #cat); \
+ else if (!ignore_content && arg->tok == tok_string \
+ && arg->val.str.startmb == NULL) \
+ { \
+ lr_error (ldfile, _("\
+%s: unknown character in field `%s'"), "LC_ADDRESS", #cat); \
+ address->cat = ""; \
+ } \
+ else if (!ignore_content) \
+ { \
+ if (arg->tok == tok_string) \
+ address->cat = arg->val.str.startmb; \
+ else \
+ { \
+ char *numbuf = (char *) xmalloc (21); \
+ snprintf (numbuf, 21, "%ld", arg->val.num); \
+ address->cat = numbuf; \
+ } \
+ } \
+ break
+
+ INT_STR_ELEM (country_isbn);
+
+#define INT_ELEM(cat) \
+ case tok_##cat: \
+ /* Ignore the rest of the line if we don't need the input of \
+ this line. */ \
+ if (ignore_content) \
+ { \
+ lr_ignore_rest (ldfile, 0); \
+ break; \
+ } \
+ \
+ arg = lr_token (ldfile, charmap, result, NULL, verbose); \
+ if (arg->tok != tok_number) \
+ goto err_label; \
+ else if (address->cat != 0) \
+ lr_error (ldfile, _("\
+%s: field `%s' declared more than once"), "LC_ADDRESS", #cat); \
+ else if (!ignore_content) \
+ address->cat = arg->val.num; \
+ break
+
+ INT_ELEM (country_num);
+
+ case tok_end:
+ /* Next we assume `LC_ADDRESS'. */
+ arg = lr_token (ldfile, charmap, result, NULL, verbose);
+ if (arg->tok == tok_eof)
+ break;
+ if (arg->tok == tok_eol)
+ lr_error (ldfile, _("%s: incomplete `END' line"),
+ "LC_ADDRESS");
+ else if (arg->tok != tok_lc_address)
+ lr_error (ldfile, _("\
+%1$s: definition does not end with `END %1$s'"), "LC_ADDRESS");
+ lr_ignore_rest (ldfile, arg->tok == tok_lc_address);
+ return;
+
+ default:
+ err_label:
+ SYNTAX_ERROR (_("%s: syntax error"), "LC_ADDRESS");
+ }
+
+ /* Prepare for the next round. */
+ now = lr_token (ldfile, charmap, result, NULL, verbose);
+ nowtok = now->tok;
+ }
+
+ /* When we come here we reached the end of the file. */
+ lr_error (ldfile, _("%s: premature end of file"), "LC_ADDRESS");
+}
diff --git a/REORG.TODO/locale/programs/ld-collate.c b/REORG.TODO/locale/programs/ld-collate.c
new file mode 100644
index 0000000000..cec848cb7c
--- /dev/null
+++ b/REORG.TODO/locale/programs/ld-collate.c
@@ -0,0 +1,3978 @@
+/* Copyright (C) 1995-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <errno.h>
+#include <error.h>
+#include <stdlib.h>
+#include <wchar.h>
+#include <stdint.h>
+#include <sys/param.h>
+
+#include "localedef.h"
+#include "charmap.h"
+#include "localeinfo.h"
+#include "linereader.h"
+#include "locfile.h"
+#include "elem-hash.h"
+
+/* Uncomment the following line in the production version. */
+/* #define NDEBUG 1 */
+#include <assert.h>
+
+#define obstack_chunk_alloc malloc
+#define obstack_chunk_free free
+
+static inline void
+__attribute ((always_inline))
+obstack_int32_grow (struct obstack *obstack, int32_t data)
+{
+ assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack)));
+ data = maybe_swap_uint32 (data);
+ if (sizeof (int32_t) == sizeof (int))
+ obstack_int_grow (obstack, data);
+ else
+ obstack_grow (obstack, &data, sizeof (int32_t));
+}
+
+static inline void
+__attribute ((always_inline))
+obstack_int32_grow_fast (struct obstack *obstack, int32_t data)
+{
+ assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack)));
+ data = maybe_swap_uint32 (data);
+ if (sizeof (int32_t) == sizeof (int))
+ obstack_int_grow_fast (obstack, data);
+ else
+ obstack_grow (obstack, &data, sizeof (int32_t));
+}
+
+/* Forward declaration. */
+struct element_t;
+
+/* Data type for list of strings. */
+struct section_list
+{
+ /* Successor in the known_sections list. */
+ struct section_list *def_next;
+ /* Successor in the sections list. */
+ struct section_list *next;
+ /* Name of the section. */
+ const char *name;
+ /* First element of this section. */
+ struct element_t *first;
+ /* Last element of this section. */
+ struct element_t *last;
+ /* These are the rules for this section. */
+ enum coll_sort_rule *rules;
+ /* Index of the rule set in the appropriate section of the output file. */
+ int ruleidx;
+};
+
+struct element_t;
+
+struct element_list_t
+{
+ /* Number of elements. */
+ int cnt;
+
+ struct element_t **w;
+};
+
+/* Data type for collating element. */
+struct element_t
+{
+ const char *name;
+
+ const char *mbs;
+ size_t nmbs;
+ const uint32_t *wcs;
+ size_t nwcs;
+ int *mborder;
+ int wcorder;
+
+ /* The following is a bit mask which bits are set if this element is
+ used in the appropriate level. Interesting for the singlebyte
+ weight computation.
+
+ XXX The type here restricts the number of levels to 32. It could
+ be changed if necessary but I doubt this is necessary. */
+ unsigned int used_in_level;
+
+ struct element_list_t *weights;
+
+ /* Nonzero if this is a real character definition. */
+ int is_character;
+
+ /* Order of the character in the sequence. This information will
+ be used in range expressions. */
+ int mbseqorder;
+ int wcseqorder;
+
+ /* Where does the definition come from. */
+ const char *file;
+ size_t line;
+
+ /* Which section does this belong to. */
+ struct section_list *section;
+
+ /* Predecessor and successor in the order list. */
+ struct element_t *last;
+ struct element_t *next;
+
+ /* Next element in multibyte output list. */
+ struct element_t *mbnext;
+ struct element_t *mblast;
+
+ /* Next element in wide character output list. */
+ struct element_t *wcnext;
+ struct element_t *wclast;
+};
+
+/* Special element value. */
+#define ELEMENT_ELLIPSIS2 ((struct element_t *) 1)
+#define ELEMENT_ELLIPSIS3 ((struct element_t *) 2)
+#define ELEMENT_ELLIPSIS4 ((struct element_t *) 3)
+
+/* Data type for collating symbol. */
+struct symbol_t
+{
+ const char *name;
+
+ /* Point to place in the order list. */
+ struct element_t *order;
+
+ /* Where does the definition come from. */
+ const char *file;
+ size_t line;
+};
+
+/* Sparse table of struct element_t *. */
+#define TABLE wchead_table
+#define ELEMENT struct element_t *
+#define DEFAULT NULL
+#define ITERATE
+#define NO_ADD_LOCALE
+#include "3level.h"
+
+/* Sparse table of int32_t. */
+#define TABLE collidx_table
+#define ELEMENT int32_t
+#define DEFAULT 0
+#include "3level.h"
+
+/* Sparse table of uint32_t. */
+#define TABLE collseq_table
+#define ELEMENT uint32_t
+#define DEFAULT ~((uint32_t) 0)
+#include "3level.h"
+
+
+/* Simple name list for the preprocessor. */
+struct name_list
+{
+ struct name_list *next;
+ char str[0];
+};
+
+
+/* The real definition of the struct for the LC_COLLATE locale. */
+struct locale_collate_t
+{
+ int col_weight_max;
+ int cur_weight_max;
+
+ /* List of known scripts. */
+ struct section_list *known_sections;
+ /* List of used sections. */
+ struct section_list *sections;
+ /* Current section using definition. */
+ struct section_list *current_section;
+ /* There always can be an unnamed section. */
+ struct section_list unnamed_section;
+ /* Flag whether the unnamed section has been defined. */
+ bool unnamed_section_defined;
+ /* To make handling of errors easier we have another section. */
+ struct section_list error_section;
+ /* Sometimes we are defining the values for collating symbols before
+ the first actual section. */
+ struct section_list symbol_section;
+
+ /* Start of the order list. */
+ struct element_t *start;
+
+ /* The undefined element. */
+ struct element_t undefined;
+
+ /* This is the cursor for `reorder_after' insertions. */
+ struct element_t *cursor;
+
+ /* This value is used when handling ellipsis. */
+ struct element_t ellipsis_weight;
+
+ /* Known collating elements. */
+ hash_table elem_table;
+
+ /* Known collating symbols. */
+ hash_table sym_table;
+
+ /* Known collation sequences. */
+ hash_table seq_table;
+
+ struct obstack mempool;
+
+ /* The LC_COLLATE category is a bit special as it is sometimes possible
+ that the definitions from more than one input file contains information.
+ Therefore we keep all relevant input in a list. */
+ struct locale_collate_t *next;
+
+ /* Arrays with heads of the list for each of the leading bytes in
+ the multibyte sequences. */
+ struct element_t *mbheads[256];
+
+ /* Arrays with heads of the list for each of the leading bytes in
+ the multibyte sequences. */
+ struct wchead_table wcheads;
+
+ /* The arrays with the collation sequence order. */
+ unsigned char mbseqorder[256];
+ struct collseq_table wcseqorder;
+
+ /* State of the preprocessor. */
+ enum
+ {
+ else_none = 0,
+ else_ignore,
+ else_seen
+ }
+ else_action;
+};
+
+
+/* We have a few global variables which are used for reading all
+ LC_COLLATE category descriptions in all files. */
+static uint32_t nrules;
+
+/* List of defined preprocessor symbols. */
+static struct name_list *defined;
+
+
+/* We need UTF-8 encoding of numbers. */
+static inline int
+__attribute ((always_inline))
+utf8_encode (char *buf, int val)
+{
+ int retval;
+
+ if (val < 0x80)
+ {
+ *buf++ = (char) val;
+ retval = 1;
+ }
+ else
+ {
+ int step;
+
+ for (step = 2; step < 6; ++step)
+ if ((val & (~(uint32_t)0 << (5 * step + 1))) == 0)
+ break;
+ retval = step;
+
+ *buf = (unsigned char) (~0xff >> step);
+ --step;
+ do
+ {
+ buf[step] = 0x80 | (val & 0x3f);
+ val >>= 6;
+ }
+ while (--step > 0);
+ *buf |= val;
+ }
+
+ return retval;
+}
+
+
+static struct section_list *
+make_seclist_elem (struct locale_collate_t *collate, const char *string,
+ struct section_list *next)
+{
+ struct section_list *newp;
+
+ newp = (struct section_list *) obstack_alloc (&collate->mempool,
+ sizeof (*newp));
+ newp->next = next;
+ newp->name = string;
+ newp->first = NULL;
+ newp->last = NULL;
+
+ return newp;
+}
+
+
+static struct element_t *
+new_element (struct locale_collate_t *collate, const char *mbs, size_t mbslen,
+ const uint32_t *wcs, const char *name, size_t namelen,
+ int is_character)
+{
+ struct element_t *newp;
+
+ newp = (struct element_t *) obstack_alloc (&collate->mempool,
+ sizeof (*newp));
+ newp->name = name == NULL ? NULL : obstack_copy0 (&collate->mempool,
+ name, namelen);
+ if (mbs != NULL)
+ {
+ newp->mbs = obstack_copy0 (&collate->mempool, mbs, mbslen);
+ newp->nmbs = mbslen;
+ }
+ else
+ {
+ newp->mbs = NULL;
+ newp->nmbs = 0;
+ }
+ if (wcs != NULL)
+ {
+ size_t nwcs = wcslen ((wchar_t *) wcs);
+ uint32_t zero = 0;
+ /* Handle <U0000> as a single character. */
+ if (nwcs == 0)
+ nwcs = 1;
+ obstack_grow (&collate->mempool, wcs, nwcs * sizeof (uint32_t));
+ obstack_grow (&collate->mempool, &zero, sizeof (uint32_t));
+ newp->wcs = (uint32_t *) obstack_finish (&collate->mempool);
+ newp->nwcs = nwcs;
+ }
+ else
+ {
+ newp->wcs = NULL;
+ newp->nwcs = 0;
+ }
+ newp->mborder = NULL;
+ newp->wcorder = 0;
+ newp->used_in_level = 0;
+ newp->is_character = is_character;
+
+ /* Will be assigned later. XXX */
+ newp->mbseqorder = 0;
+ newp->wcseqorder = 0;
+
+ /* Will be allocated later. */
+ newp->weights = NULL;
+
+ newp->file = NULL;
+ newp->line = 0;
+
+ newp->section = collate->current_section;
+
+ newp->last = NULL;
+ newp->next = NULL;
+
+ newp->mbnext = NULL;
+ newp->mblast = NULL;
+
+ newp->wcnext = NULL;
+ newp->wclast = NULL;
+
+ return newp;
+}
+
+
+static struct symbol_t *
+new_symbol (struct locale_collate_t *collate, const char *name, size_t len)
+{
+ struct symbol_t *newp;
+
+ newp = (struct symbol_t *) obstack_alloc (&collate->mempool, sizeof (*newp));
+
+ newp->name = obstack_copy0 (&collate->mempool, name, len);
+ newp->order = NULL;
+
+ newp->file = NULL;
+ newp->line = 0;
+
+ return newp;
+}
+
+
+/* Test whether this name is already defined somewhere. */
+static int
+check_duplicate (struct linereader *ldfile, struct locale_collate_t *collate,
+ const struct charmap_t *charmap,
+ struct repertoire_t *repertoire, const char *symbol,
+ size_t symbol_len)
+{
+ void *ignore = NULL;
+
+ if (find_entry (&charmap->char_table, symbol, symbol_len, &ignore) == 0)
+ {
+ lr_error (ldfile, _("`%.*s' already defined in charmap"),
+ (int) symbol_len, symbol);
+ return 1;
+ }
+
+ if (repertoire != NULL
+ && (find_entry (&repertoire->char_table, symbol, symbol_len, &ignore)
+ == 0))
+ {
+ lr_error (ldfile, _("`%.*s' already defined in repertoire"),
+ (int) symbol_len, symbol);
+ return 1;
+ }
+
+ if (find_entry (&collate->sym_table, symbol, symbol_len, &ignore) == 0)
+ {
+ lr_error (ldfile, _("`%.*s' already defined as collating symbol"),
+ (int) symbol_len, symbol);
+ return 1;
+ }
+
+ if (find_entry (&collate->elem_table, symbol, symbol_len, &ignore) == 0)
+ {
+ lr_error (ldfile, _("`%.*s' already defined as collating element"),
+ (int) symbol_len, symbol);
+ return 1;
+ }
+
+ return 0;
+}
+
+
+/* Read the direction specification. */
+static void
+read_directions (struct linereader *ldfile, struct token *arg,
+ const struct charmap_t *charmap,
+ struct repertoire_t *repertoire, struct localedef_t *result)
+{
+ int cnt = 0;
+ int max = nrules ?: 10;
+ enum coll_sort_rule *rules = calloc (max, sizeof (*rules));
+ int warned = 0;
+ struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
+
+ while (1)
+ {
+ int valid = 0;
+
+ if (arg->tok == tok_forward)
+ {
+ if (rules[cnt] & sort_backward)
+ {
+ if (! warned)
+ {
+ lr_error (ldfile, _("\
+%s: `forward' and `backward' are mutually excluding each other"),
+ "LC_COLLATE");
+ warned = 1;
+ }
+ }
+ else if (rules[cnt] & sort_forward)
+ {
+ if (! warned)
+ {
+ lr_error (ldfile, _("\
+%s: `%s' mentioned more than once in definition of weight %d"),
+ "LC_COLLATE", "forward", cnt + 1);
+ }
+ }
+ else
+ rules[cnt] |= sort_forward;
+
+ valid = 1;
+ }
+ else if (arg->tok == tok_backward)
+ {
+ if (rules[cnt] & sort_forward)
+ {
+ if (! warned)
+ {
+ lr_error (ldfile, _("\
+%s: `forward' and `backward' are mutually excluding each other"),
+ "LC_COLLATE");
+ warned = 1;
+ }
+ }
+ else if (rules[cnt] & sort_backward)
+ {
+ if (! warned)
+ {
+ lr_error (ldfile, _("\
+%s: `%s' mentioned more than once in definition of weight %d"),
+ "LC_COLLATE", "backward", cnt + 1);
+ }
+ }
+ else
+ rules[cnt] |= sort_backward;
+
+ valid = 1;
+ }
+ else if (arg->tok == tok_position)
+ {
+ if (rules[cnt] & sort_position)
+ {
+ if (! warned)
+ {
+ lr_error (ldfile, _("\
+%s: `%s' mentioned more than once in definition of weight %d"),
+ "LC_COLLATE", "position", cnt + 1);
+ }
+ }
+ else
+ rules[cnt] |= sort_position;
+
+ valid = 1;
+ }
+
+ if (valid)
+ arg = lr_token (ldfile, charmap, result, repertoire, verbose);
+
+ if (arg->tok == tok_eof || arg->tok == tok_eol || arg->tok == tok_comma
+ || arg->tok == tok_semicolon)
+ {
+ if (! valid && ! warned)
+ {
+ lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
+ warned = 1;
+ }
+
+ /* See whether we have to increment the counter. */
+ if (arg->tok != tok_comma && rules[cnt] != 0)
+ {
+ /* Add the default `forward' if we have seen only `position'. */
+ if (rules[cnt] == sort_position)
+ rules[cnt] = sort_position | sort_forward;
+
+ ++cnt;
+ }
+
+ if (arg->tok == tok_eof || arg->tok == tok_eol)
+ /* End of line or file, so we exit the loop. */
+ break;
+
+ if (nrules == 0)
+ {
+ /* See whether we have enough room in the array. */
+ if (cnt == max)
+ {
+ max += 10;
+ rules = (enum coll_sort_rule *) xrealloc (rules,
+ max
+ * sizeof (*rules));
+ memset (&rules[cnt], '\0', (max - cnt) * sizeof (*rules));
+ }
+ }
+ else
+ {
+ if (cnt == nrules)
+ {
+ /* There must not be any more rule. */
+ if (! warned)
+ {
+ lr_error (ldfile, _("\
+%s: too many rules; first entry only had %d"),
+ "LC_COLLATE", nrules);
+ warned = 1;
+ }
+
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+ }
+ }
+ else
+ {
+ if (! warned)
+ {
+ lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
+ warned = 1;
+ }
+ }
+
+ arg = lr_token (ldfile, charmap, result, repertoire, verbose);
+ }
+
+ if (nrules == 0)
+ {
+ /* Now we know how many rules we have. */
+ nrules = cnt;
+ rules = (enum coll_sort_rule *) xrealloc (rules,
+ nrules * sizeof (*rules));
+ }
+ else
+ {
+ if (cnt < nrules)
+ {
+ /* Not enough rules in this specification. */
+ if (! warned)
+ lr_error (ldfile, _("%s: not enough sorting rules"), "LC_COLLATE");
+
+ do
+ rules[cnt] = sort_forward;
+ while (++cnt < nrules);
+ }
+ }
+
+ collate->current_section->rules = rules;
+}
+
+
+static struct element_t *
+find_element (struct linereader *ldfile, struct locale_collate_t *collate,
+ const char *str, size_t len)
+{
+ void *result = NULL;
+
+ /* Search for the entries among the collation sequences already define. */
+ if (find_entry (&collate->seq_table, str, len, &result) != 0)
+ {
+ /* Nope, not define yet. So we see whether it is a
+ collation symbol. */
+ void *ptr;
+
+ if (find_entry (&collate->sym_table, str, len, &ptr) == 0)
+ {
+ /* It's a collation symbol. */
+ struct symbol_t *sym = (struct symbol_t *) ptr;
+ result = sym->order;
+
+ if (result == NULL)
+ result = sym->order = new_element (collate, NULL, 0, NULL,
+ NULL, 0, 0);
+ }
+ else if (find_entry (&collate->elem_table, str, len, &result) != 0)
+ {
+ /* It's also no collation element. So it is a character
+ element defined later. */
+ result = new_element (collate, NULL, 0, NULL, str, len, 1);
+ /* Insert it into the sequence table. */
+ insert_entry (&collate->seq_table, str, len, result);
+ }
+ }
+
+ return (struct element_t *) result;
+}
+
+
+static void
+unlink_element (struct locale_collate_t *collate)
+{
+ if (collate->cursor == collate->start)
+ {
+ assert (collate->cursor->next == NULL);
+ assert (collate->cursor->last == NULL);
+ collate->cursor = NULL;
+ }
+ else
+ {
+ if (collate->cursor->next != NULL)
+ collate->cursor->next->last = collate->cursor->last;
+ if (collate->cursor->last != NULL)
+ collate->cursor->last->next = collate->cursor->next;
+ collate->cursor = collate->cursor->last;
+ }
+}
+
+
+static void
+insert_weights (struct linereader *ldfile, struct element_t *elem,
+ const struct charmap_t *charmap,
+ struct repertoire_t *repertoire, struct localedef_t *result,
+ enum token_t ellipsis)
+{
+ int weight_cnt;
+ struct token *arg;
+ struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
+
+ /* Initialize all the fields. */
+ elem->file = ldfile->fname;
+ elem->line = ldfile->lineno;
+
+ elem->last = collate->cursor;
+ elem->next = collate->cursor ? collate->cursor->next : NULL;
+ if (collate->cursor != NULL && collate->cursor->next != NULL)
+ collate->cursor->next->last = elem;
+ if (collate->cursor != NULL)
+ collate->cursor->next = elem;
+ if (collate->start == NULL)
+ {
+ assert (collate->cursor == NULL);
+ collate->start = elem;
+ }
+
+ elem->section = collate->current_section;
+
+ if (collate->current_section->first == NULL)
+ collate->current_section->first = elem;
+ if (collate->current_section->last == collate->cursor)
+ collate->current_section->last = elem;
+
+ collate->cursor = elem;
+
+ elem->weights = (struct element_list_t *)
+ obstack_alloc (&collate->mempool, nrules * sizeof (struct element_list_t));
+ memset (elem->weights, '\0', nrules * sizeof (struct element_list_t));
+
+ weight_cnt = 0;
+
+ arg = lr_token (ldfile, charmap, result, repertoire, verbose);
+ do
+ {
+ if (arg->tok == tok_eof || arg->tok == tok_eol)
+ break;
+
+ if (arg->tok == tok_ignore)
+ {
+ /* The weight for this level has to be ignored. We use the
+ null pointer to indicate this. */
+ elem->weights[weight_cnt].w = (struct element_t **)
+ obstack_alloc (&collate->mempool, sizeof (struct element_t *));
+ elem->weights[weight_cnt].w[0] = NULL;
+ elem->weights[weight_cnt].cnt = 1;
+ }
+ else if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
+ {
+ char ucs4str[10];
+ struct element_t *val;
+ char *symstr;
+ size_t symlen;
+
+ if (arg->tok == tok_bsymbol)
+ {
+ symstr = arg->val.str.startmb;
+ symlen = arg->val.str.lenmb;
+ }
+ else
+ {
+ snprintf (ucs4str, sizeof (ucs4str), "U%08X", arg->val.ucs4);
+ symstr = ucs4str;
+ symlen = 9;
+ }
+
+ val = find_element (ldfile, collate, symstr, symlen);
+ if (val == NULL)
+ break;
+
+ elem->weights[weight_cnt].w = (struct element_t **)
+ obstack_alloc (&collate->mempool, sizeof (struct element_t *));
+ elem->weights[weight_cnt].w[0] = val;
+ elem->weights[weight_cnt].cnt = 1;
+ }
+ else if (arg->tok == tok_string)
+ {
+ /* Split the string up in the individual characters and put
+ the element definitions in the list. */
+ const char *cp = arg->val.str.startmb;
+ int cnt = 0;
+ struct element_t *charelem;
+ struct element_t **weights = NULL;
+ int max = 0;
+
+ if (*cp == '\0')
+ {
+ lr_error (ldfile, _("%s: empty weight string not allowed"),
+ "LC_COLLATE");
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+
+ do
+ {
+ if (*cp == '<')
+ {
+ /* Ahh, it's a bsymbol or an UCS4 value. If it's
+ the latter we have to unify the name. */
+ const char *startp = ++cp;
+ size_t len;
+
+ while (*cp != '>')
+ {
+ if (*cp == ldfile->escape_char)
+ ++cp;
+ if (*cp == '\0')
+ /* It's a syntax error. */
+ goto syntax;
+
+ ++cp;
+ }
+
+ if (cp - startp == 5 && startp[0] == 'U'
+ && isxdigit (startp[1]) && isxdigit (startp[2])
+ && isxdigit (startp[3]) && isxdigit (startp[4]))
+ {
+ unsigned int ucs4 = strtoul (startp + 1, NULL, 16);
+ char *newstr;
+
+ newstr = (char *) xmalloc (10);
+ snprintf (newstr, 10, "U%08X", ucs4);
+ startp = newstr;
+
+ len = 9;
+ }
+ else
+ len = cp - startp;
+
+ charelem = find_element (ldfile, collate, startp, len);
+ ++cp;
+ }
+ else
+ {
+ /* People really shouldn't use characters directly in
+ the string. Especially since it's not really clear
+ what this means. We interpret all characters in the
+ string as if that would be bsymbols. Otherwise we
+ would have to match back to bsymbols somehow and this
+ is normally not what people normally expect. */
+ charelem = find_element (ldfile, collate, cp++, 1);
+ }
+
+ if (charelem == NULL)
+ {
+ /* We ignore the rest of the line. */
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+
+ /* Add the pointer. */
+ if (cnt >= max)
+ {
+ struct element_t **newp;
+ max += 10;
+ newp = (struct element_t **)
+ alloca (max * sizeof (struct element_t *));
+ memcpy (newp, weights, cnt * sizeof (struct element_t *));
+ weights = newp;
+ }
+ weights[cnt++] = charelem;
+ }
+ while (*cp != '\0');
+
+ /* Now store the information. */
+ elem->weights[weight_cnt].w = (struct element_t **)
+ obstack_alloc (&collate->mempool,
+ cnt * sizeof (struct element_t *));
+ memcpy (elem->weights[weight_cnt].w, weights,
+ cnt * sizeof (struct element_t *));
+ elem->weights[weight_cnt].cnt = cnt;
+
+ /* We don't need the string anymore. */
+ free (arg->val.str.startmb);
+ }
+ else if (ellipsis != tok_none
+ && (arg->tok == tok_ellipsis2
+ || arg->tok == tok_ellipsis3
+ || arg->tok == tok_ellipsis4))
+ {
+ /* It must be the same ellipsis as used in the initial column. */
+ if (arg->tok != ellipsis)
+ lr_error (ldfile, _("\
+%s: weights must use the same ellipsis symbol as the name"),
+ "LC_COLLATE");
+
+ /* The weight for this level will depend on the element
+ iterating over the range. Put a placeholder. */
+ elem->weights[weight_cnt].w = (struct element_t **)
+ obstack_alloc (&collate->mempool, sizeof (struct element_t *));
+ elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
+ elem->weights[weight_cnt].cnt = 1;
+ }
+ else
+ {
+ syntax:
+ /* It's a syntax error. */
+ lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+
+ arg = lr_token (ldfile, charmap, result, repertoire, verbose);
+ /* This better should be the end of the line or a semicolon. */
+ if (arg->tok == tok_semicolon)
+ /* OK, ignore this and read the next token. */
+ arg = lr_token (ldfile, charmap, result, repertoire, verbose);
+ else if (arg->tok != tok_eof && arg->tok != tok_eol)
+ {
+ /* It's a syntax error. */
+ lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+ }
+ while (++weight_cnt < nrules);
+
+ if (weight_cnt < nrules)
+ {
+ /* This means the rest of the line uses the current element as
+ the weight. */
+ do
+ {
+ elem->weights[weight_cnt].w = (struct element_t **)
+ obstack_alloc (&collate->mempool, sizeof (struct element_t *));
+ if (ellipsis == tok_none)
+ elem->weights[weight_cnt].w[0] = elem;
+ else
+ elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
+ elem->weights[weight_cnt].cnt = 1;
+ }
+ while (++weight_cnt < nrules);
+ }
+ else
+ {
+ if (arg->tok == tok_ignore || arg->tok == tok_bsymbol)
+ {
+ /* Too many rule values. */
+ lr_error (ldfile, _("%s: too many values"), "LC_COLLATE");
+ lr_ignore_rest (ldfile, 0);
+ }
+ else
+ lr_ignore_rest (ldfile, arg->tok != tok_eol && arg->tok != tok_eof);
+ }
+}
+
+
+static int
+insert_value (struct linereader *ldfile, const char *symstr, size_t symlen,
+ const struct charmap_t *charmap, struct repertoire_t *repertoire,
+ struct localedef_t *result)
+{
+ /* First find out what kind of symbol this is. */
+ struct charseq *seq;
+ uint32_t wc;
+ struct element_t *elem = NULL;
+ struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
+
+ /* Try to find the character in the charmap. */
+ seq = charmap_find_value (charmap, symstr, symlen);
+
+ /* Determine the wide character. */
+ if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
+ {
+ wc = repertoire_find_value (repertoire, symstr, symlen);
+ if (seq != NULL)
+ seq->ucs4 = wc;
+ }
+ else
+ wc = seq->ucs4;
+
+ if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
+ {
+ /* It's no character, so look through the collation elements and
+ symbol list. */
+ void *ptr = elem;
+ if (find_entry (&collate->elem_table, symstr, symlen, &ptr) != 0)
+ {
+ void *result;
+ struct symbol_t *sym = NULL;
+
+ /* It's also collation element. Therefore it's either a
+ collating symbol or it's a character which is not
+ supported by the character set. In the later case we
+ simply create a dummy entry. */
+ if (find_entry (&collate->sym_table, symstr, symlen, &result) == 0)
+ {
+ /* It's a collation symbol. */
+ sym = (struct symbol_t *) result;
+
+ elem = sym->order;
+ }
+
+ if (elem == NULL)
+ {
+ elem = new_element (collate, NULL, 0, NULL, symstr, symlen, 0);
+
+ if (sym != NULL)
+ sym->order = elem;
+ else
+ /* Enter a fake element in the sequence table. This
+ won't cause anything in the output since there is
+ no multibyte or wide character associated with
+ it. */
+ insert_entry (&collate->seq_table, symstr, symlen, elem);
+ }
+ }
+ else
+ /* Copy the result back. */
+ elem = ptr;
+ }
+ else
+ {
+ /* Otherwise the symbols stands for a character. */
+ void *ptr = elem;
+ if (find_entry (&collate->seq_table, symstr, symlen, &ptr) != 0)
+ {
+ uint32_t wcs[2] = { wc, 0 };
+
+ /* We have to allocate an entry. */
+ elem = new_element (collate,
+ seq != NULL ? (char *) seq->bytes : NULL,
+ seq != NULL ? seq->nbytes : 0,
+ wc == ILLEGAL_CHAR_VALUE ? NULL : wcs,
+ symstr, symlen, 1);
+
+ /* And add it to the table. */
+ if (insert_entry (&collate->seq_table, symstr, symlen, elem) != 0)
+ /* This cannot happen. */
+ assert (! "Internal error");
+ }
+ else
+ {
+ /* Copy the result back. */
+ elem = ptr;
+
+ /* Maybe the character was used before the definition. In this case
+ we have to insert the byte sequences now. */
+ if (elem->mbs == NULL && seq != NULL)
+ {
+ elem->mbs = obstack_copy0 (&collate->mempool,
+ seq->bytes, seq->nbytes);
+ elem->nmbs = seq->nbytes;
+ }
+
+ if (elem->wcs == NULL && wc != ILLEGAL_CHAR_VALUE)
+ {
+ uint32_t wcs[2] = { wc, 0 };
+
+ elem->wcs = obstack_copy (&collate->mempool, wcs, sizeof (wcs));
+ elem->nwcs = 1;
+ }
+ }
+ }
+
+ /* Test whether this element is not already in the list. */
+ if (elem->next != NULL || elem == collate->cursor)
+ {
+ lr_error (ldfile, _("order for `%.*s' already defined at %s:%Zu"),
+ (int) symlen, symstr, elem->file, elem->line);
+ lr_ignore_rest (ldfile, 0);
+ return 1;
+ }
+
+ insert_weights (ldfile, elem, charmap, repertoire, result, tok_none);
+
+ return 0;
+}
+
+
+static void
+handle_ellipsis (struct linereader *ldfile, const char *symstr, size_t symlen,
+ enum token_t ellipsis, const struct charmap_t *charmap,
+ struct repertoire_t *repertoire,
+ struct localedef_t *result)
+{
+ struct element_t *startp;
+ struct element_t *endp;
+ struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
+
+ /* Unlink the entry added for the ellipsis. */
+ unlink_element (collate);
+ startp = collate->cursor;
+
+ /* Process and add the end-entry. */
+ if (symstr != NULL
+ && insert_value (ldfile, symstr, symlen, charmap, repertoire, result))
+ /* Something went wrong with inserting the to-value. This means
+ we cannot process the ellipsis. */
+ return;
+
+ /* Reset the cursor. */
+ collate->cursor = startp;
+
+ /* Now we have to handle many different situations:
+ - we have to distinguish between the three different ellipsis forms
+ - the is the ellipsis at the beginning, in the middle, or at the end.
+ */
+ endp = collate->cursor->next;
+ assert (symstr == NULL || endp != NULL);
+
+ /* XXX The following is probably very wrong since also collating symbols
+ can appear in ranges. But do we want/can refine the test for that? */
+#if 0
+ /* Both, the start and the end symbol, must stand for characters. */
+ if ((startp != NULL && (startp->name == NULL || ! startp->is_character))
+ || (endp != NULL && (endp->name == NULL|| ! endp->is_character)))
+ {
+ lr_error (ldfile, _("\
+%s: the start and the end symbol of a range must stand for characters"),
+ "LC_COLLATE");
+ return;
+ }
+#endif
+
+ if (ellipsis == tok_ellipsis3)
+ {
+ /* One requirement we make here: the length of the byte
+ sequences for the first and end character must be the same.
+ This is mainly to prevent unwanted effects and this is often
+ not what is wanted. */
+ size_t len = (startp->mbs != NULL ? startp->nmbs
+ : (endp->mbs != NULL ? endp->nmbs : 0));
+ char mbcnt[len + 1];
+ char mbend[len + 1];
+
+ /* Well, this should be caught somewhere else already. Just to
+ make sure. */
+ assert (startp == NULL || startp->wcs == NULL || startp->wcs[1] == 0);
+ assert (endp == NULL || endp->wcs == NULL || endp->wcs[1] == 0);
+
+ if (startp != NULL && endp != NULL
+ && startp->mbs != NULL && endp->mbs != NULL
+ && startp->nmbs != endp->nmbs)
+ {
+ lr_error (ldfile, _("\
+%s: byte sequences of first and last character must have the same length"),
+ "LC_COLLATE");
+ return;
+ }
+
+ /* Determine whether we have to generate multibyte sequences. */
+ if ((startp == NULL || startp->mbs != NULL)
+ && (endp == NULL || endp->mbs != NULL))
+ {
+ int cnt;
+ int ret;
+
+ /* Prepare the beginning byte sequence. This is either from the
+ beginning byte sequence or it is all nulls if it was an
+ initial ellipsis. */
+ if (startp == NULL || startp->mbs == NULL)
+ memset (mbcnt, '\0', len);
+ else
+ {
+ memcpy (mbcnt, startp->mbs, len);
+
+ /* And increment it so that the value is the first one we will
+ try to insert. */
+ for (cnt = len - 1; cnt >= 0; --cnt)
+ if (++mbcnt[cnt] != '\0')
+ break;
+ }
+ mbcnt[len] = '\0';
+
+ /* And the end sequence. */
+ if (endp == NULL || endp->mbs == NULL)
+ memset (mbend, '\0', len);
+ else
+ memcpy (mbend, endp->mbs, len);
+ mbend[len] = '\0';
+
+ /* Test whether we have a correct range. */
+ ret = memcmp (mbcnt, mbend, len);
+ if (ret >= 0)
+ {
+ if (ret > 0)
+ lr_error (ldfile, _("%s: byte sequence of first character of \
+range is not lower than that of the last character"), "LC_COLLATE");
+ return;
+ }
+
+ /* Generate the byte sequences data. */
+ while (1)
+ {
+ struct charseq *seq;
+
+ /* Quite a bit of work ahead. We have to find the character
+ definition for the byte sequence and then determine the
+ wide character belonging to it. */
+ seq = charmap_find_symbol (charmap, mbcnt, len);
+ if (seq != NULL)
+ {
+ struct element_t *elem;
+ size_t namelen;
+
+ /* I don't think this can ever happen. */
+ assert (seq->name != NULL);
+ namelen = strlen (seq->name);
+
+ if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
+ seq->ucs4 = repertoire_find_value (repertoire, seq->name,
+ namelen);
+
+ /* Now we are ready to insert the new value in the
+ sequence. Find out whether the element is
+ already known. */
+ void *ptr;
+ if (find_entry (&collate->seq_table, seq->name, namelen,
+ &ptr) != 0)
+ {
+ uint32_t wcs[2] = { seq->ucs4, 0 };
+
+ /* We have to allocate an entry. */
+ elem = new_element (collate, mbcnt, len,
+ seq->ucs4 == ILLEGAL_CHAR_VALUE
+ ? NULL : wcs, seq->name,
+ namelen, 1);
+
+ /* And add it to the table. */
+ if (insert_entry (&collate->seq_table, seq->name,
+ namelen, elem) != 0)
+ /* This cannot happen. */
+ assert (! "Internal error");
+ }
+ else
+ /* Copy the result. */
+ elem = ptr;
+
+ /* Test whether this element is not already in the list. */
+ if (elem->next != NULL || (collate->cursor != NULL
+ && elem->next == collate->cursor))
+ {
+ lr_error (ldfile, _("\
+order for `%.*s' already defined at %s:%Zu"),
+ (int) namelen, seq->name,
+ elem->file, elem->line);
+ goto increment;
+ }
+
+ /* Enqueue the new element. */
+ elem->last = collate->cursor;
+ if (collate->cursor == NULL)
+ elem->next = NULL;
+ else
+ {
+ elem->next = collate->cursor->next;
+ elem->last->next = elem;
+ if (elem->next != NULL)
+ elem->next->last = elem;
+ }
+ if (collate->start == NULL)
+ {
+ assert (collate->cursor == NULL);
+ collate->start = elem;
+ }
+ collate->cursor = elem;
+
+ /* Add the weight value. We take them from the
+ `ellipsis_weights' member of `collate'. */
+ elem->weights = (struct element_list_t *)
+ obstack_alloc (&collate->mempool,
+ nrules * sizeof (struct element_list_t));
+ for (cnt = 0; cnt < nrules; ++cnt)
+ if (collate->ellipsis_weight.weights[cnt].cnt == 1
+ && (collate->ellipsis_weight.weights[cnt].w[0]
+ == ELEMENT_ELLIPSIS2))
+ {
+ elem->weights[cnt].w = (struct element_t **)
+ obstack_alloc (&collate->mempool,
+ sizeof (struct element_t *));
+ elem->weights[cnt].w[0] = elem;
+ elem->weights[cnt].cnt = 1;
+ }
+ else
+ {
+ /* Simply use the weight from `ellipsis_weight'. */
+ elem->weights[cnt].w =
+ collate->ellipsis_weight.weights[cnt].w;
+ elem->weights[cnt].cnt =
+ collate->ellipsis_weight.weights[cnt].cnt;
+ }
+ }
+
+ /* Increment for the next round. */
+ increment:
+ for (cnt = len - 1; cnt >= 0; --cnt)
+ if (++mbcnt[cnt] != '\0')
+ break;
+
+ /* Find out whether this was all. */
+ if (cnt < 0 || memcmp (mbcnt, mbend, len) >= 0)
+ /* Yep, that's all. */
+ break;
+ }
+ }
+ }
+ else
+ {
+ /* For symbolic range we naturally must have a beginning and an
+ end specified by the user. */
+ if (startp == NULL)
+ lr_error (ldfile, _("\
+%s: symbolic range ellipsis must not directly follow `order_start'"),
+ "LC_COLLATE");
+ else if (endp == NULL)
+ lr_error (ldfile, _("\
+%s: symbolic range ellipsis must not be directly followed by `order_end'"),
+ "LC_COLLATE");
+ else
+ {
+ /* Determine the range. To do so we have to determine the
+ common prefix of the both names and then the numeric
+ values of both ends. */
+ size_t lenfrom = strlen (startp->name);
+ size_t lento = strlen (endp->name);
+ char buf[lento + 1];
+ int preflen = 0;
+ long int from;
+ long int to;
+ char *cp;
+ int base = ellipsis == tok_ellipsis2 ? 16 : 10;
+
+ if (lenfrom != lento)
+ {
+ invalid_range:
+ lr_error (ldfile, _("\
+`%s' and `%.*s' are not valid names for symbolic range"),
+ startp->name, (int) lento, endp->name);
+ return;
+ }
+
+ while (startp->name[preflen] == endp->name[preflen])
+ if (startp->name[preflen] == '\0')
+ /* Nothing to be done. The start and end point are identical
+ and while inserting the end point we have already given
+ the user an error message. */
+ return;
+ else
+ ++preflen;
+
+ errno = 0;
+ from = strtol (startp->name + preflen, &cp, base);
+ if ((from == UINT_MAX && errno == ERANGE) || *cp != '\0')
+ goto invalid_range;
+
+ errno = 0;
+ to = strtol (endp->name + preflen, &cp, base);
+ if ((to == UINT_MAX && errno == ERANGE) || *cp != '\0')
+ goto invalid_range;
+
+ /* Copy the prefix. */
+ memcpy (buf, startp->name, preflen);
+
+ /* Loop over all values. */
+ for (++from; from < to; ++from)
+ {
+ struct element_t *elem = NULL;
+ struct charseq *seq;
+ uint32_t wc;
+ int cnt;
+
+ /* Generate the name. */
+ sprintf (buf + preflen, base == 10 ? "%0*ld" : "%0*lX",
+ (int) (lenfrom - preflen), from);
+
+ /* Look whether this name is already defined. */
+ void *ptr;
+ if (find_entry (&collate->seq_table, buf, symlen, &ptr) == 0)
+ {
+ /* Copy back the result. */
+ elem = ptr;
+
+ if (elem->next != NULL || (collate->cursor != NULL
+ && elem->next == collate->cursor))
+ {
+ lr_error (ldfile, _("\
+%s: order for `%.*s' already defined at %s:%Zu"),
+ "LC_COLLATE", (int) lenfrom, buf,
+ elem->file, elem->line);
+ continue;
+ }
+
+ if (elem->name == NULL)
+ {
+ lr_error (ldfile, _("%s: `%s' must be a character"),
+ "LC_COLLATE", buf);
+ continue;
+ }
+ }
+
+ if (elem == NULL || (elem->mbs == NULL && elem->wcs == NULL))
+ {
+ /* Search for a character of this name. */
+ seq = charmap_find_value (charmap, buf, lenfrom);
+ if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
+ {
+ wc = repertoire_find_value (repertoire, buf, lenfrom);
+
+ if (seq != NULL)
+ seq->ucs4 = wc;
+ }
+ else
+ wc = seq->ucs4;
+
+ if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
+ /* We don't know anything about a character with this
+ name. XXX Should we warn? */
+ continue;
+
+ if (elem == NULL)
+ {
+ uint32_t wcs[2] = { wc, 0 };
+
+ /* We have to allocate an entry. */
+ elem = new_element (collate,
+ seq != NULL
+ ? (char *) seq->bytes : NULL,
+ seq != NULL ? seq->nbytes : 0,
+ wc == ILLEGAL_CHAR_VALUE
+ ? NULL : wcs, buf, lenfrom, 1);
+ }
+ else
+ {
+ /* Update the element. */
+ if (seq != NULL)
+ {
+ elem->mbs = obstack_copy0 (&collate->mempool,
+ seq->bytes, seq->nbytes);
+ elem->nmbs = seq->nbytes;
+ }
+
+ if (wc != ILLEGAL_CHAR_VALUE)
+ {
+ uint32_t zero = 0;
+
+ obstack_grow (&collate->mempool,
+ &wc, sizeof (uint32_t));
+ obstack_grow (&collate->mempool,
+ &zero, sizeof (uint32_t));
+ elem->wcs = obstack_finish (&collate->mempool);
+ elem->nwcs = 1;
+ }
+ }
+
+ elem->file = ldfile->fname;
+ elem->line = ldfile->lineno;
+ elem->section = collate->current_section;
+ }
+
+ /* Enqueue the new element. */
+ elem->last = collate->cursor;
+ elem->next = collate->cursor->next;
+ elem->last->next = elem;
+ if (elem->next != NULL)
+ elem->next->last = elem;
+ collate->cursor = elem;
+
+ /* Now add the weights. They come from the `ellipsis_weights'
+ member of `collate'. */
+ elem->weights = (struct element_list_t *)
+ obstack_alloc (&collate->mempool,
+ nrules * sizeof (struct element_list_t));
+ for (cnt = 0; cnt < nrules; ++cnt)
+ if (collate->ellipsis_weight.weights[cnt].cnt == 1
+ && (collate->ellipsis_weight.weights[cnt].w[0]
+ == ELEMENT_ELLIPSIS2))
+ {
+ elem->weights[cnt].w = (struct element_t **)
+ obstack_alloc (&collate->mempool,
+ sizeof (struct element_t *));
+ elem->weights[cnt].w[0] = elem;
+ elem->weights[cnt].cnt = 1;
+ }
+ else
+ {
+ /* Simly use the weight from `ellipsis_weight'. */
+ elem->weights[cnt].w =
+ collate->ellipsis_weight.weights[cnt].w;
+ elem->weights[cnt].cnt =
+ collate->ellipsis_weight.weights[cnt].cnt;
+ }
+ }
+ }
+ }
+}
+
+
+static void
+collate_startup (struct linereader *ldfile, struct localedef_t *locale,
+ struct localedef_t *copy_locale, int ignore_content)
+{
+ if (!ignore_content && locale->categories[LC_COLLATE].collate == NULL)
+ {
+ struct locale_collate_t *collate;
+
+ if (copy_locale == NULL)
+ {
+ collate = locale->categories[LC_COLLATE].collate =
+ (struct locale_collate_t *)
+ xcalloc (1, sizeof (struct locale_collate_t));
+
+ /* Init the various data structures. */
+ init_hash (&collate->elem_table, 100);
+ init_hash (&collate->sym_table, 100);
+ init_hash (&collate->seq_table, 500);
+ obstack_init (&collate->mempool);
+
+ collate->col_weight_max = -1;
+ }
+ else
+ /* Reuse the copy_locale's data structures. */
+ collate = locale->categories[LC_COLLATE].collate =
+ copy_locale->categories[LC_COLLATE].collate;
+ }
+
+ ldfile->translate_strings = 0;
+ ldfile->return_widestr = 0;
+}
+
+
+void
+collate_finish (struct localedef_t *locale, const struct charmap_t *charmap)
+{
+ /* Now is the time when we can assign the individual collation
+ values for all the symbols. We have possibly different values
+ for the wide- and the multibyte-character symbols. This is done
+ since it might make a difference in the encoding if there is in
+ some cases no multibyte-character but there are wide-characters.
+ (The other way around it is not important since theencoded
+ collation value in the wide-character case is 32 bits wide and
+ therefore requires no encoding).
+
+ The lowest collation value assigned is 2. Zero is reserved for
+ the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
+ functions and 1 is used to separate the individual passes for the
+ different rules.
+
+ We also have to construct is list with all the bytes/words which
+ can come first in a sequence, followed by all the elements which
+ also start with this byte/word. The order is reverse which has
+ among others the important effect that longer strings are located
+ first in the list. This is required for the output data since
+ the algorithm used in `strcoll' etc depends on this.
+
+ The multibyte case is easy. We simply sort into an array with
+ 256 elements. */
+ struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
+ int mbact[nrules];
+ int wcact;
+ int mbseqact;
+ int wcseqact;
+ struct element_t *runp;
+ int i;
+ int need_undefined = 0;
+ struct section_list *sect;
+ int ruleidx;
+ int nr_wide_elems = 0;
+
+ if (collate == NULL)
+ {
+ /* No data, no check. */
+ if (! be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("No definition for %s category found"),
+ "LC_COLLATE"));
+ return;
+ }
+
+ /* If this assertion is hit change the type in `element_t'. */
+ assert (nrules <= sizeof (runp->used_in_level) * 8);
+
+ /* Make sure that the `position' rule is used either in all sections
+ or in none. */
+ for (i = 0; i < nrules; ++i)
+ for (sect = collate->sections; sect != NULL; sect = sect->next)
+ if (sect != collate->current_section
+ && sect->rules != NULL
+ && ((sect->rules[i] & sort_position)
+ != (collate->current_section->rules[i] & sort_position)))
+ {
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: `position' must be used for a specific level in all sections or none"),
+ "LC_COLLATE"));
+ break;
+ }
+
+ /* Find out which elements are used at which level. At the same
+ time we find out whether we have any undefined symbols. */
+ runp = collate->start;
+ while (runp != NULL)
+ {
+ if (runp->mbs != NULL)
+ {
+ for (i = 0; i < nrules; ++i)
+ {
+ int j;
+
+ for (j = 0; j < runp->weights[i].cnt; ++j)
+ /* A NULL pointer as the weight means IGNORE. */
+ if (runp->weights[i].w[j] != NULL)
+ {
+ if (runp->weights[i].w[j]->weights == NULL)
+ {
+ WITH_CUR_LOCALE (error_at_line (0, 0, runp->file,
+ runp->line,
+ _("symbol `%s' not defined"),
+ runp->weights[i].w[j]->name));
+
+ need_undefined = 1;
+ runp->weights[i].w[j] = &collate->undefined;
+ }
+ else
+ /* Set the bit for the level. */
+ runp->weights[i].w[j]->used_in_level |= 1 << i;
+ }
+ }
+ }
+
+ /* Up to the next entry. */
+ runp = runp->next;
+ }
+
+ /* Walk through the list of defined sequences and assign weights. Also
+ create the data structure which will allow generating the single byte
+ character based tables.
+
+ Since at each time only the weights for each of the rules are
+ only compared to other weights for this rule it is possible to
+ assign more compact weight values than simply counting all
+ weights in sequence. We can assign weights from 3, one for each
+ rule individually and only for those elements, which are actually
+ used for this rule.
+
+ Why is this important? It is not for the wide char table. But
+ it is for the singlebyte output since here larger numbers have to
+ be encoded to make it possible to emit the value as a byte
+ string. */
+ for (i = 0; i < nrules; ++i)
+ mbact[i] = 2;
+ wcact = 2;
+ mbseqact = 0;
+ wcseqact = 0;
+ runp = collate->start;
+ while (runp != NULL)
+ {
+ /* Determine the order. */
+ if (runp->used_in_level != 0)
+ {
+ runp->mborder = (int *) obstack_alloc (&collate->mempool,
+ nrules * sizeof (int));
+
+ for (i = 0; i < nrules; ++i)
+ if ((runp->used_in_level & (1 << i)) != 0)
+ runp->mborder[i] = mbact[i]++;
+ else
+ runp->mborder[i] = 0;
+ }
+
+ if (runp->mbs != NULL)
+ {
+ struct element_t **eptr;
+ struct element_t *lastp = NULL;
+
+ /* Find the point where to insert in the list. */
+ eptr = &collate->mbheads[((unsigned char *) runp->mbs)[0]];
+ while (*eptr != NULL)
+ {
+ if ((*eptr)->nmbs < runp->nmbs)
+ break;
+
+ if ((*eptr)->nmbs == runp->nmbs)
+ {
+ int c = memcmp ((*eptr)->mbs, runp->mbs, runp->nmbs);
+
+ if (c == 0)
+ {
+ /* This should not happen. It means that we have
+ to symbols with the same byte sequence. It is
+ of course an error. */
+ WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr)->file,
+ (*eptr)->line,
+ _("\
+symbol `%s' has the same encoding as"), (*eptr)->name);
+ error_at_line (0, 0, runp->file,
+ runp->line,
+ _("symbol `%s'"),
+ runp->name));
+ goto dont_insert;
+ }
+ else if (c < 0)
+ /* Insert it here. */
+ break;
+ }
+
+ /* To the next entry. */
+ lastp = *eptr;
+ eptr = &(*eptr)->mbnext;
+ }
+
+ /* Set the pointers. */
+ runp->mbnext = *eptr;
+ runp->mblast = lastp;
+ if (*eptr != NULL)
+ (*eptr)->mblast = runp;
+ *eptr = runp;
+ dont_insert:
+ ;
+ }
+
+ if (runp->used_in_level)
+ {
+ runp->wcorder = wcact++;
+
+ /* We take the opportunity to count the elements which have
+ wide characters. */
+ ++nr_wide_elems;
+ }
+
+ if (runp->is_character)
+ {
+ if (runp->nmbs == 1)
+ collate->mbseqorder[((unsigned char *) runp->mbs)[0]] = mbseqact++;
+
+ runp->wcseqorder = wcseqact++;
+ }
+ else if (runp->mbs != NULL && runp->weights != NULL)
+ /* This is for collation elements. */
+ runp->wcseqorder = wcseqact++;
+
+ /* Up to the next entry. */
+ runp = runp->next;
+ }
+
+ /* Find out whether any of the `mbheads' entries is unset. In this
+ case we use the UNDEFINED entry. */
+ for (i = 1; i < 256; ++i)
+ if (collate->mbheads[i] == NULL)
+ {
+ need_undefined = 1;
+ collate->mbheads[i] = &collate->undefined;
+ }
+
+ /* Now to the wide character case. */
+ collate->wcheads.p = 6;
+ collate->wcheads.q = 10;
+ wchead_table_init (&collate->wcheads);
+
+ collate->wcseqorder.p = 6;
+ collate->wcseqorder.q = 10;
+ collseq_table_init (&collate->wcseqorder);
+
+ /* Start adding. */
+ runp = collate->start;
+ while (runp != NULL)
+ {
+ if (runp->wcs != NULL)
+ {
+ struct element_t *e;
+ struct element_t **eptr;
+ struct element_t *lastp;
+
+ /* Insert the collation sequence value. */
+ if (runp->is_character)
+ collseq_table_add (&collate->wcseqorder, runp->wcs[0],
+ runp->wcseqorder);
+
+ /* Find the point where to insert in the list. */
+ e = wchead_table_get (&collate->wcheads, runp->wcs[0]);
+ eptr = &e;
+ lastp = NULL;
+ while (*eptr != NULL)
+ {
+ if ((*eptr)->nwcs < runp->nwcs)
+ break;
+
+ if ((*eptr)->nwcs == runp->nwcs)
+ {
+ int c = wmemcmp ((wchar_t *) (*eptr)->wcs,
+ (wchar_t *) runp->wcs, runp->nwcs);
+
+ if (c == 0)
+ {
+ /* This should not happen. It means that we have
+ two symbols with the same byte sequence. It is
+ of course an error. */
+ WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr)->file,
+ (*eptr)->line,
+ _("\
+symbol `%s' has the same encoding as"), (*eptr)->name);
+ error_at_line (0, 0, runp->file,
+ runp->line,
+ _("symbol `%s'"),
+ runp->name));
+ goto dont_insertwc;
+ }
+ else if (c < 0)
+ /* Insert it here. */
+ break;
+ }
+
+ /* To the next entry. */
+ lastp = *eptr;
+ eptr = &(*eptr)->wcnext;
+ }
+
+ /* Set the pointers. */
+ runp->wcnext = *eptr;
+ runp->wclast = lastp;
+ if (*eptr != NULL)
+ (*eptr)->wclast = runp;
+ *eptr = runp;
+ if (eptr == &e)
+ wchead_table_add (&collate->wcheads, runp->wcs[0], e);
+ dont_insertwc:
+ ;
+ }
+
+ /* Up to the next entry. */
+ runp = runp->next;
+ }
+
+ /* Now determine whether the UNDEFINED entry is needed and if yes,
+ whether it was defined. */
+ collate->undefined.used_in_level = need_undefined ? ~0ul : 0;
+ if (collate->undefined.file == NULL)
+ {
+ if (need_undefined)
+ {
+ /* This seems not to be enforced by recent standards. Don't
+ emit an error, simply append UNDEFINED at the end. */
+ if (0)
+ WITH_CUR_LOCALE (error (0, 0, _("no definition of `UNDEFINED'")));
+
+ /* Add UNDEFINED at the end. */
+ collate->undefined.mborder =
+ (int *) obstack_alloc (&collate->mempool, nrules * sizeof (int));
+
+ for (i = 0; i < nrules; ++i)
+ collate->undefined.mborder[i] = mbact[i]++;
+ }
+
+ /* In any case we will need the definition for the wide character
+ case. But we will not complain that it is missing since the
+ specification strangely enough does not seem to account for
+ this. */
+ collate->undefined.wcorder = wcact++;
+ }
+
+ /* Finally, try to unify the rules for the sections. Whenever the rules
+ for a section are the same as those for another section give the
+ ruleset the same index. Since there are never many section we can
+ use an O(n^2) algorithm here. */
+ sect = collate->sections;
+ while (sect != NULL && sect->rules == NULL)
+ sect = sect->next;
+
+ /* Bail out if we have no sections because of earlier errors. */
+ if (sect == NULL)
+ {
+ WITH_CUR_LOCALE (error (EXIT_FAILURE, 0,
+ _("too many errors; giving up")));
+ return;
+ }
+
+ ruleidx = 0;
+ do
+ {
+ struct section_list *osect = collate->sections;
+
+ while (osect != sect)
+ if (osect->rules != NULL
+ && memcmp (osect->rules, sect->rules,
+ nrules * sizeof (osect->rules[0])) == 0)
+ break;
+ else
+ osect = osect->next;
+
+ if (osect == sect)
+ sect->ruleidx = ruleidx++;
+ else
+ sect->ruleidx = osect->ruleidx;
+
+ /* Next section. */
+ do
+ sect = sect->next;
+ while (sect != NULL && sect->rules == NULL);
+ }
+ while (sect != NULL);
+ /* We are currently not prepared for more than 128 rulesets. But this
+ should never really be a problem. */
+ assert (ruleidx <= 128);
+}
+
+
+static int32_t
+output_weight (struct obstack *pool, struct locale_collate_t *collate,
+ struct element_t *elem)
+{
+ size_t cnt;
+ int32_t retval;
+
+ /* Optimize the use of UNDEFINED. */
+ if (elem == &collate->undefined)
+ /* The weights are already inserted. */
+ return 0;
+
+ /* This byte can start exactly one collation element and this is
+ a single byte. We can directly give the index to the weights. */
+ retval = obstack_object_size (pool);
+
+ /* Construct the weight. */
+ for (cnt = 0; cnt < nrules; ++cnt)
+ {
+ char buf[elem->weights[cnt].cnt * 7];
+ int len = 0;
+ int i;
+
+ for (i = 0; i < elem->weights[cnt].cnt; ++i)
+ /* Encode the weight value. We do nothing for IGNORE entries. */
+ if (elem->weights[cnt].w[i] != NULL)
+ len += utf8_encode (&buf[len],
+ elem->weights[cnt].w[i]->mborder[cnt]);
+
+ /* And add the buffer content. */
+ obstack_1grow (pool, len);
+ obstack_grow (pool, buf, len);
+ }
+
+ return retval | ((elem->section->ruleidx & 0x7f) << 24);
+}
+
+
+static int32_t
+output_weightwc (struct obstack *pool, struct locale_collate_t *collate,
+ struct element_t *elem)
+{
+ size_t cnt;
+ int32_t retval;
+
+ /* Optimize the use of UNDEFINED. */
+ if (elem == &collate->undefined)
+ /* The weights are already inserted. */
+ return 0;
+
+ /* This byte can start exactly one collation element and this is
+ a single byte. We can directly give the index to the weights. */
+ retval = obstack_object_size (pool) / sizeof (int32_t);
+
+ /* Construct the weight. */
+ for (cnt = 0; cnt < nrules; ++cnt)
+ {
+ int32_t buf[elem->weights[cnt].cnt];
+ int i;
+ int32_t j;
+
+ for (i = 0, j = 0; i < elem->weights[cnt].cnt; ++i)
+ if (elem->weights[cnt].w[i] != NULL)
+ buf[j++] = elem->weights[cnt].w[i]->wcorder;
+
+ /* And add the buffer content. */
+ obstack_int32_grow (pool, j);
+
+ obstack_grow (pool, buf, j * sizeof (int32_t));
+ maybe_swap_uint32_obstack (pool, j);
+ }
+
+ return retval | ((elem->section->ruleidx & 0x7f) << 24);
+}
+
+/* If localedef is every threaded, this would need to be __thread var. */
+static struct
+{
+ struct obstack *weightpool;
+ struct obstack *extrapool;
+ struct obstack *indpool;
+ struct locale_collate_t *collate;
+ struct collidx_table *tablewc;
+} atwc;
+
+static void add_to_tablewc (uint32_t ch, struct element_t *runp);
+
+static void
+add_to_tablewc (uint32_t ch, struct element_t *runp)
+{
+ if (runp->wcnext == NULL && runp->nwcs == 1)
+ {
+ int32_t weigthidx = output_weightwc (atwc.weightpool, atwc.collate,
+ runp);
+ collidx_table_add (atwc.tablewc, ch, weigthidx);
+ }
+ else
+ {
+ /* As for the singlebyte table, we recognize sequences and
+ compress them. */
+
+ collidx_table_add (atwc.tablewc, ch,
+ -(obstack_object_size (atwc.extrapool)
+ / sizeof (uint32_t)));
+
+ do
+ {
+ /* Store the current index in the weight table. We know that
+ the current position in the `extrapool' is aligned on a
+ 32-bit address. */
+ int32_t weightidx;
+ int added;
+
+ /* Find out wether this is a single entry or we have more than
+ one consecutive entry. */
+ if (runp->wcnext != NULL
+ && runp->nwcs == runp->wcnext->nwcs
+ && wmemcmp ((wchar_t *) runp->wcs,
+ (wchar_t *)runp->wcnext->wcs,
+ runp->nwcs - 1) == 0
+ && (runp->wcs[runp->nwcs - 1]
+ == runp->wcnext->wcs[runp->nwcs - 1] + 1))
+ {
+ int i;
+ struct element_t *series_startp = runp;
+ struct element_t *curp;
+
+ /* Now add first the initial byte sequence. */
+ added = (1 + 1 + 2 * (runp->nwcs - 1)) * sizeof (int32_t);
+ if (sizeof (int32_t) == sizeof (int))
+ obstack_make_room (atwc.extrapool, added);
+
+ /* More than one consecutive entry. We mark this by having
+ a negative index into the indirect table. */
+ obstack_int32_grow_fast (atwc.extrapool,
+ -(obstack_object_size (atwc.indpool)
+ / sizeof (int32_t)));
+ obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - 1);
+
+ do
+ runp = runp->wcnext;
+ while (runp->wcnext != NULL
+ && runp->nwcs == runp->wcnext->nwcs
+ && wmemcmp ((wchar_t *) runp->wcs,
+ (wchar_t *)runp->wcnext->wcs,
+ runp->nwcs - 1) == 0
+ && (runp->wcs[runp->nwcs - 1]
+ == runp->wcnext->wcs[runp->nwcs - 1] + 1));
+
+ /* Now walk backward from here to the beginning. */
+ curp = runp;
+
+ for (i = 1; i < runp->nwcs; ++i)
+ obstack_int32_grow_fast (atwc.extrapool, curp->wcs[i]);
+
+ /* Now find the end of the consecutive sequence and
+ add all the indeces in the indirect pool. */
+ do
+ {
+ weightidx = output_weightwc (atwc.weightpool, atwc.collate,
+ curp);
+ obstack_int32_grow (atwc.indpool, weightidx);
+
+ curp = curp->wclast;
+ }
+ while (curp != series_startp);
+
+ /* Add the final weight. */
+ weightidx = output_weightwc (atwc.weightpool, atwc.collate,
+ curp);
+ obstack_int32_grow (atwc.indpool, weightidx);
+
+ /* And add the end byte sequence. Without length this
+ time. */
+ for (i = 1; i < curp->nwcs; ++i)
+ obstack_int32_grow (atwc.extrapool, curp->wcs[i]);
+ }
+ else
+ {
+ /* A single entry. Simply add the index and the length and
+ string (except for the first character which is already
+ tested for). */
+ int i;
+
+ /* Output the weight info. */
+ weightidx = output_weightwc (atwc.weightpool, atwc.collate,
+ runp);
+
+ assert (runp->nwcs > 0);
+ added = (1 + 1 + runp->nwcs - 1) * sizeof (int32_t);
+ if (sizeof (int) == sizeof (int32_t))
+ obstack_make_room (atwc.extrapool, added);
+
+ obstack_int32_grow_fast (atwc.extrapool, weightidx);
+ obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - 1);
+ for (i = 1; i < runp->nwcs; ++i)
+ obstack_int32_grow_fast (atwc.extrapool, runp->wcs[i]);
+ }
+
+ /* Next entry. */
+ runp = runp->wcnext;
+ }
+ while (runp != NULL);
+ }
+}
+
+void
+collate_output (struct localedef_t *locale, const struct charmap_t *charmap,
+ const char *output_path)
+{
+ struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
+ const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE);
+ struct locale_file file;
+ size_t ch;
+ int32_t tablemb[256];
+ struct obstack weightpool;
+ struct obstack extrapool;
+ struct obstack indirectpool;
+ struct section_list *sect;
+ struct collidx_table tablewc;
+ uint32_t elem_size;
+ uint32_t *elem_table;
+ int i;
+ struct element_t *runp;
+
+ init_locale_data (&file, nelems);
+ add_locale_uint32 (&file, nrules);
+
+ /* If we have no LC_COLLATE data emit only the number of rules as zero. */
+ if (collate == NULL)
+ {
+ size_t idx;
+ for (idx = 1; idx < nelems; idx++)
+ {
+ /* The words have to be handled specially. */
+ if (idx == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB))
+ add_locale_uint32 (&file, 0);
+ else
+ add_locale_empty (&file);
+ }
+ write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", &file);
+ return;
+ }
+
+ obstack_init (&weightpool);
+ obstack_init (&extrapool);
+ obstack_init (&indirectpool);
+
+ /* Since we are using the sign of an integer to mark indirection the
+ offsets in the arrays we are indirectly referring to must not be
+ zero since -0 == 0. Therefore we add a bit of dummy content. */
+ obstack_int32_grow (&extrapool, 0);
+ obstack_int32_grow (&indirectpool, 0);
+
+ /* Prepare the ruleset table. */
+ for (sect = collate->sections, i = 0; sect != NULL; sect = sect->next)
+ if (sect->rules != NULL && sect->ruleidx == i)
+ {
+ int j;
+
+ obstack_make_room (&weightpool, nrules);
+
+ for (j = 0; j < nrules; ++j)
+ obstack_1grow_fast (&weightpool, sect->rules[j]);
+ ++i;
+ }
+ /* And align the output. */
+ i = (nrules * i) % LOCFILE_ALIGN;
+ if (i > 0)
+ do
+ obstack_1grow (&weightpool, '\0');
+ while (++i < LOCFILE_ALIGN);
+
+ add_locale_raw_obstack (&file, &weightpool);
+
+ /* Generate the 8-bit table. Walk through the lists of sequences
+ starting with the same byte and add them one after the other to
+ the table. In case we have more than one sequence starting with
+ the same byte we have to use extra indirection.
+
+ First add a record for the NUL byte. This entry will never be used
+ so it does not matter. */
+ tablemb[0] = 0;
+
+ /* Now insert the `UNDEFINED' value if it is used. Since this value
+ will probably be used more than once it is good to store the
+ weights only once. */
+ if (collate->undefined.used_in_level != 0)
+ output_weight (&weightpool, collate, &collate->undefined);
+
+ for (ch = 1; ch < 256; ++ch)
+ if (collate->mbheads[ch]->mbnext == NULL
+ && collate->mbheads[ch]->nmbs <= 1)
+ {
+ tablemb[ch] = output_weight (&weightpool, collate,
+ collate->mbheads[ch]);
+ }
+ else
+ {
+ /* The entries in the list are sorted by length and then
+ alphabetically. This is the order in which we will add the
+ elements to the collation table. This allows simply walking
+ the table in sequence and stopping at the first matching
+ entry. Since the longer sequences are coming first in the
+ list they have the possibility to match first, just as it
+ has to be. In the worst case we are walking to the end of
+ the list where we put, if no singlebyte sequence is defined
+ in the locale definition, the weights for UNDEFINED.
+
+ To reduce the length of the search list we compress them a bit.
+ This happens by collecting sequences of consecutive byte
+ sequences in one entry (having and begin and end byte sequence)
+ and add only one index into the weight table. We can find the
+ consecutive entries since they are also consecutive in the list. */
+ struct element_t *runp = collate->mbheads[ch];
+ struct element_t *lastp;
+
+ assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
+
+ tablemb[ch] = -obstack_object_size (&extrapool);
+
+ do
+ {
+ /* Store the current index in the weight table. We know that
+ the current position in the `extrapool' is aligned on a
+ 32-bit address. */
+ int32_t weightidx;
+ int added;
+
+ /* Find out wether this is a single entry or we have more than
+ one consecutive entry. */
+ if (runp->mbnext != NULL
+ && runp->nmbs == runp->mbnext->nmbs
+ && memcmp (runp->mbs, runp->mbnext->mbs, runp->nmbs - 1) == 0
+ && (runp->mbs[runp->nmbs - 1]
+ == runp->mbnext->mbs[runp->nmbs - 1] + 1))
+ {
+ int i;
+ struct element_t *series_startp = runp;
+ struct element_t *curp;
+
+ /* Compute how much space we will need. */
+ added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1
+ + 2 * (runp->nmbs - 1));
+ assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
+ obstack_make_room (&extrapool, added);
+
+ /* More than one consecutive entry. We mark this by having
+ a negative index into the indirect table. */
+ obstack_int32_grow_fast (&extrapool,
+ -(obstack_object_size (&indirectpool)
+ / sizeof (int32_t)));
+
+ /* Now search first the end of the series. */
+ do
+ runp = runp->mbnext;
+ while (runp->mbnext != NULL
+ && runp->nmbs == runp->mbnext->nmbs
+ && memcmp (runp->mbs, runp->mbnext->mbs,
+ runp->nmbs - 1) == 0
+ && (runp->mbs[runp->nmbs - 1]
+ == runp->mbnext->mbs[runp->nmbs - 1] + 1));
+
+ /* Now walk backward from here to the beginning. */
+ curp = runp;
+
+ assert (runp->nmbs <= 256);
+ obstack_1grow_fast (&extrapool, curp->nmbs - 1);
+ for (i = 1; i < curp->nmbs; ++i)
+ obstack_1grow_fast (&extrapool, curp->mbs[i]);
+
+ /* Now find the end of the consecutive sequence and
+ add all the indeces in the indirect pool. */
+ do
+ {
+ weightidx = output_weight (&weightpool, collate, curp);
+ obstack_int32_grow (&indirectpool, weightidx);
+
+ curp = curp->mblast;
+ }
+ while (curp != series_startp);
+
+ /* Add the final weight. */
+ weightidx = output_weight (&weightpool, collate, curp);
+ obstack_int32_grow (&indirectpool, weightidx);
+
+ /* And add the end byte sequence. Without length this
+ time. */
+ for (i = 1; i < curp->nmbs; ++i)
+ obstack_1grow_fast (&extrapool, curp->mbs[i]);
+ }
+ else
+ {
+ /* A single entry. Simply add the index and the length and
+ string (except for the first character which is already
+ tested for). */
+ int i;
+
+ /* Output the weight info. */
+ weightidx = output_weight (&weightpool, collate, runp);
+
+ added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1
+ + runp->nmbs - 1);
+ assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
+ obstack_make_room (&extrapool, added);
+
+ obstack_int32_grow_fast (&extrapool, weightidx);
+ assert (runp->nmbs <= 256);
+ obstack_1grow_fast (&extrapool, runp->nmbs - 1);
+
+ for (i = 1; i < runp->nmbs; ++i)
+ obstack_1grow_fast (&extrapool, runp->mbs[i]);
+ }
+
+ /* Add alignment bytes if necessary. */
+ while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)))
+ obstack_1grow_fast (&extrapool, '\0');
+
+ /* Next entry. */
+ lastp = runp;
+ runp = runp->mbnext;
+ }
+ while (runp != NULL);
+
+ assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
+
+ /* If the final entry in the list is not a single character we
+ add an UNDEFINED entry here. */
+ if (lastp->nmbs != 1)
+ {
+ int added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1 + 1);
+ obstack_make_room (&extrapool, added);
+
+ obstack_int32_grow_fast (&extrapool, 0);
+ /* XXX What rule? We just pick the first. */
+ obstack_1grow_fast (&extrapool, 0);
+ /* Length is zero. */
+ obstack_1grow_fast (&extrapool, 0);
+
+ /* Add alignment bytes if necessary. */
+ while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)))
+ obstack_1grow_fast (&extrapool, '\0');
+ }
+ }
+
+ /* Add padding to the tables if necessary. */
+ while (!LOCFILE_ALIGNED_P (obstack_object_size (&weightpool)))
+ obstack_1grow (&weightpool, 0);
+
+ /* Now add the four tables. */
+ add_locale_uint32_array (&file, (const uint32_t *) tablemb, 256);
+ add_locale_raw_obstack (&file, &weightpool);
+ add_locale_raw_obstack (&file, &extrapool);
+ add_locale_raw_obstack (&file, &indirectpool);
+
+ /* Now the same for the wide character table. We need to store some
+ more information here. */
+ add_locale_empty (&file);
+ add_locale_empty (&file);
+ add_locale_empty (&file);
+
+ /* Since we are using the sign of an integer to mark indirection the
+ offsets in the arrays we are indirectly referring to must not be
+ zero since -0 == 0. Therefore we add a bit of dummy content. */
+ obstack_int32_grow (&extrapool, 0);
+ obstack_int32_grow (&indirectpool, 0);
+
+ /* Now insert the `UNDEFINED' value if it is used. Since this value
+ will probably be used more than once it is good to store the
+ weights only once. */
+ if (output_weightwc (&weightpool, collate, &collate->undefined) != 0)
+ abort ();
+
+ /* Generate the table. Walk through the lists of sequences starting
+ with the same wide character and add them one after the other to
+ the table. In case we have more than one sequence starting with
+ the same byte we have to use extra indirection. */
+ tablewc.p = 6;
+ tablewc.q = 10;
+ collidx_table_init (&tablewc);
+
+ atwc.weightpool = &weightpool;
+ atwc.extrapool = &extrapool;
+ atwc.indpool = &indirectpool;
+ atwc.collate = collate;
+ atwc.tablewc = &tablewc;
+
+ wchead_table_iterate (&collate->wcheads, add_to_tablewc);
+
+ memset (&atwc, 0, sizeof (atwc));
+
+ /* Now add the four tables. */
+ add_locale_collidx_table (&file, &tablewc);
+ add_locale_raw_obstack (&file, &weightpool);
+ add_locale_raw_obstack (&file, &extrapool);
+ add_locale_raw_obstack (&file, &indirectpool);
+
+ /* Finally write the table with collation element names out. It is
+ a hash table with a simple function which gets the name of the
+ character as the input. One character might have many names. The
+ value associated with the name is an index into the weight table
+ where we are then interested in the first-level weight value.
+
+ To determine how large the table should be we are counting the
+ elements have to put in. Since we are using internal chaining
+ using a secondary hash function we have to make the table a bit
+ larger to avoid extremely long search times. We can achieve
+ good results with a 40% larger table than there are entries. */
+ elem_size = 0;
+ runp = collate->start;
+ while (runp != NULL)
+ {
+ if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
+ /* Yep, the element really counts. */
+ ++elem_size;
+
+ runp = runp->next;
+ }
+ /* Add 40% and find the next prime number. */
+ elem_size = next_prime (elem_size * 1.4);
+
+ /* Allocate the table. Each entry consists of two words: the hash
+ value and an index in a secondary table which provides the index
+ into the weight table and the string itself (so that a match can
+ be determined). */
+ elem_table = (uint32_t *) obstack_alloc (&extrapool,
+ elem_size * 2 * sizeof (uint32_t));
+ memset (elem_table, '\0', elem_size * 2 * sizeof (uint32_t));
+
+ /* Now add the elements. */
+ runp = collate->start;
+ while (runp != NULL)
+ {
+ if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
+ {
+ /* Compute the hash value of the name. */
+ uint32_t namelen = strlen (runp->name);
+ uint32_t hash = elem_hash (runp->name, namelen);
+ size_t idx = hash % elem_size;
+#ifndef NDEBUG
+ size_t start_idx = idx;
+#endif
+
+ if (elem_table[idx * 2] != 0)
+ {
+ /* The spot is already taken. Try iterating using the value
+ from the secondary hashing function. */
+ size_t iter = hash % (elem_size - 2) + 1;
+
+ do
+ {
+ idx += iter;
+ if (idx >= elem_size)
+ idx -= elem_size;
+ assert (idx != start_idx);
+ }
+ while (elem_table[idx * 2] != 0);
+ }
+ /* This is the spot where we will insert the value. */
+ elem_table[idx * 2] = hash;
+ elem_table[idx * 2 + 1] = obstack_object_size (&extrapool);
+
+ /* The string itself including length. */
+ obstack_1grow (&extrapool, namelen);
+ obstack_grow (&extrapool, runp->name, namelen);
+
+ /* And the multibyte representation. */
+ obstack_1grow (&extrapool, runp->nmbs);
+ obstack_grow (&extrapool, runp->mbs, runp->nmbs);
+
+ /* And align again to 32 bits. */
+ if ((1 + namelen + 1 + runp->nmbs) % sizeof (int32_t) != 0)
+ obstack_grow (&extrapool, "\0\0",
+ (sizeof (int32_t)
+ - ((1 + namelen + 1 + runp->nmbs)
+ % sizeof (int32_t))));
+
+ /* Now some 32-bit values: multibyte collation sequence,
+ wide char string (including length), and wide char
+ collation sequence. */
+ obstack_int32_grow (&extrapool, runp->mbseqorder);
+
+ obstack_int32_grow (&extrapool, runp->nwcs);
+ obstack_grow (&extrapool, runp->wcs,
+ runp->nwcs * sizeof (uint32_t));
+ maybe_swap_uint32_obstack (&extrapool, runp->nwcs);
+
+ obstack_int32_grow (&extrapool, runp->wcseqorder);
+ }
+
+ runp = runp->next;
+ }
+
+ /* Prepare to write out this data. */
+ add_locale_uint32 (&file, elem_size);
+ add_locale_uint32_array (&file, elem_table, 2 * elem_size);
+ add_locale_raw_obstack (&file, &extrapool);
+ add_locale_raw_data (&file, collate->mbseqorder, 256);
+ add_locale_collseq_table (&file, &collate->wcseqorder);
+ add_locale_string (&file, charmap->code_set_name);
+ write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", &file);
+
+ obstack_free (&weightpool, NULL);
+ obstack_free (&extrapool, NULL);
+ obstack_free (&indirectpool, NULL);
+}
+
+
+static enum token_t
+skip_to (struct linereader *ldfile, struct locale_collate_t *collate,
+ const struct charmap_t *charmap, int to_endif)
+{
+ while (1)
+ {
+ struct token *now = lr_token (ldfile, charmap, NULL, NULL, 0);
+ enum token_t nowtok = now->tok;
+
+ if (nowtok == tok_eof || nowtok == tok_end)
+ return nowtok;
+
+ if (nowtok == tok_ifdef || nowtok == tok_ifndef)
+ {
+ lr_error (ldfile, _("%s: nested conditionals not supported"),
+ "LC_COLLATE");
+ nowtok = skip_to (ldfile, collate, charmap, tok_endif);
+ if (nowtok == tok_eof || nowtok == tok_end)
+ return nowtok;
+ }
+ else if (nowtok == tok_endif || (!to_endif && nowtok == tok_else))
+ {
+ lr_ignore_rest (ldfile, 1);
+ return nowtok;
+ }
+ else if (!to_endif && (nowtok == tok_elifdef || nowtok == tok_elifndef))
+ {
+ /* Do not read the rest of the line. */
+ return nowtok;
+ }
+ else if (nowtok == tok_else)
+ {
+ lr_error (ldfile, _("%s: more than one 'else'"), "LC_COLLATE");
+ }
+
+ lr_ignore_rest (ldfile, 0);
+ }
+}
+
+
+void
+collate_read (struct linereader *ldfile, struct localedef_t *result,
+ const struct charmap_t *charmap, const char *repertoire_name,
+ int ignore_content)
+{
+ struct repertoire_t *repertoire = NULL;
+ struct locale_collate_t *collate;
+ struct token *now;
+ struct token *arg = NULL;
+ enum token_t nowtok;
+ enum token_t was_ellipsis = tok_none;
+ struct localedef_t *copy_locale = NULL;
+ /* Parsing state:
+ 0 - start
+ 1 - between `order-start' and `order-end'
+ 2 - after `order-end'
+ 3 - after `reorder-after', waiting for `reorder-end'
+ 4 - after `reorder-end'
+ 5 - after `reorder-sections-after', waiting for `reorder-sections-end'
+ 6 - after `reorder-sections-end'
+ */
+ int state = 0;
+
+ /* Get the repertoire we have to use. */
+ if (repertoire_name != NULL)
+ repertoire = repertoire_read (repertoire_name);
+
+ /* The rest of the line containing `LC_COLLATE' must be free. */
+ lr_ignore_rest (ldfile, 1);
+
+ while (1)
+ {
+ do
+ {
+ now = lr_token (ldfile, charmap, result, NULL, verbose);
+ nowtok = now->tok;
+ }
+ while (nowtok == tok_eol);
+
+ if (nowtok != tok_define)
+ break;
+
+ if (ignore_content)
+ lr_ignore_rest (ldfile, 0);
+ else
+ {
+ arg = lr_token (ldfile, charmap, result, NULL, verbose);
+ if (arg->tok != tok_ident)
+ SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
+ else
+ {
+ /* Simply add the new symbol. */
+ struct name_list *newsym = xmalloc (sizeof (*newsym)
+ + arg->val.str.lenmb + 1);
+ memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb);
+ newsym->str[arg->val.str.lenmb] = '\0';
+ newsym->next = defined;
+ defined = newsym;
+
+ lr_ignore_rest (ldfile, 1);
+ }
+ }
+ }
+
+ if (nowtok == tok_copy)
+ {
+ now = lr_token (ldfile, charmap, result, NULL, verbose);
+ if (now->tok != tok_string)
+ {
+ SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
+
+ skip_category:
+ do
+ now = lr_token (ldfile, charmap, result, NULL, verbose);
+ while (now->tok != tok_eof && now->tok != tok_end);
+
+ if (now->tok != tok_eof
+ || (now = lr_token (ldfile, charmap, result, NULL, verbose),
+ now->tok == tok_eof))
+ lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
+ else if (now->tok != tok_lc_collate)
+ {
+ lr_error (ldfile, _("\
+%1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
+ lr_ignore_rest (ldfile, 0);
+ }
+ else
+ lr_ignore_rest (ldfile, 1);
+
+ return;
+ }
+
+ if (! ignore_content)
+ {
+ /* Get the locale definition. */
+ copy_locale = load_locale (LC_COLLATE, now->val.str.startmb,
+ repertoire_name, charmap, NULL);
+ if ((copy_locale->avail & COLLATE_LOCALE) == 0)
+ {
+ /* Not yet loaded. So do it now. */
+ if (locfile_read (copy_locale, charmap) != 0)
+ goto skip_category;
+ }
+
+ if (copy_locale->categories[LC_COLLATE].collate == NULL)
+ return;
+ }
+
+ lr_ignore_rest (ldfile, 1);
+
+ now = lr_token (ldfile, charmap, result, NULL, verbose);
+ nowtok = now->tok;
+ }
+
+ /* Prepare the data structures. */
+ collate_startup (ldfile, result, copy_locale, ignore_content);
+ collate = result->categories[LC_COLLATE].collate;
+
+ while (1)
+ {
+ char ucs4buf[10];
+ char *symstr;
+ size_t symlen;
+
+ /* Of course we don't proceed beyond the end of file. */
+ if (nowtok == tok_eof)
+ break;
+
+ /* Ingore empty lines. */
+ if (nowtok == tok_eol)
+ {
+ now = lr_token (ldfile, charmap, result, NULL, verbose);
+ nowtok = now->tok;
+ continue;
+ }
+
+ switch (nowtok)
+ {
+ case tok_copy:
+ /* Allow copying other locales. */
+ now = lr_token (ldfile, charmap, result, NULL, verbose);
+ if (now->tok != tok_string)
+ goto err_label;
+
+ if (! ignore_content)
+ load_locale (LC_COLLATE, now->val.str.startmb, repertoire_name,
+ charmap, result);
+
+ lr_ignore_rest (ldfile, 1);
+ break;
+
+ case tok_coll_weight_max:
+ /* Ignore the rest of the line if we don't need the input of
+ this line. */
+ if (ignore_content)
+ {
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+
+ if (state != 0)
+ goto err_label;
+
+ arg = lr_token (ldfile, charmap, result, NULL, verbose);
+ if (arg->tok != tok_number)
+ goto err_label;
+ if (collate->col_weight_max != -1)
+ lr_error (ldfile, _("%s: duplicate definition of `%s'"),
+ "LC_COLLATE", "col_weight_max");
+ else
+ collate->col_weight_max = arg->val.num;
+ lr_ignore_rest (ldfile, 1);
+ break;
+
+ case tok_section_symbol:
+ /* Ignore the rest of the line if we don't need the input of
+ this line. */
+ if (ignore_content)
+ {
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+
+ if (state != 0)
+ goto err_label;
+
+ arg = lr_token (ldfile, charmap, result, repertoire, verbose);
+ if (arg->tok != tok_bsymbol)
+ goto err_label;
+ else if (!ignore_content)
+ {
+ /* Check whether this section is already known. */
+ struct section_list *known = collate->sections;
+ while (known != NULL)
+ {
+ if (strcmp (known->name, arg->val.str.startmb) == 0)
+ break;
+ known = known->next;
+ }
+
+ if (known != NULL)
+ {
+ lr_error (ldfile,
+ _("%s: duplicate declaration of section `%s'"),
+ "LC_COLLATE", arg->val.str.startmb);
+ free (arg->val.str.startmb);
+ }
+ else
+ collate->sections = make_seclist_elem (collate,
+ arg->val.str.startmb,
+ collate->sections);
+
+ lr_ignore_rest (ldfile, known == NULL);
+ }
+ else
+ {
+ free (arg->val.str.startmb);
+ lr_ignore_rest (ldfile, 0);
+ }
+ break;
+
+ case tok_collating_element:
+ /* Ignore the rest of the line if we don't need the input of
+ this line. */
+ if (ignore_content)
+ {
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+
+ if (state != 0 && state != 2)
+ goto err_label;
+
+ arg = lr_token (ldfile, charmap, result, repertoire, verbose);
+ if (arg->tok != tok_bsymbol)
+ goto err_label;
+ else
+ {
+ const char *symbol = arg->val.str.startmb;
+ size_t symbol_len = arg->val.str.lenmb;
+
+ /* Next the `from' keyword. */
+ arg = lr_token (ldfile, charmap, result, repertoire, verbose);
+ if (arg->tok != tok_from)
+ {
+ free ((char *) symbol);
+ goto err_label;
+ }
+
+ ldfile->return_widestr = 1;
+ ldfile->translate_strings = 1;
+
+ /* Finally the string with the replacement. */
+ arg = lr_token (ldfile, charmap, result, repertoire, verbose);
+
+ ldfile->return_widestr = 0;
+ ldfile->translate_strings = 0;
+
+ if (arg->tok != tok_string)
+ goto err_label;
+
+ if (!ignore_content && symbol != NULL)
+ {
+ /* The name is already defined. */
+ if (check_duplicate (ldfile, collate, charmap,
+ repertoire, symbol, symbol_len))
+ goto col_elem_free;
+
+ if (arg->val.str.startmb != NULL)
+ insert_entry (&collate->elem_table, symbol, symbol_len,
+ new_element (collate,
+ arg->val.str.startmb,
+ arg->val.str.lenmb - 1,
+ arg->val.str.startwc,
+ symbol, symbol_len, 0));
+ }
+ else
+ {
+ col_elem_free:
+ free ((char *) symbol);
+ free (arg->val.str.startmb);
+ free (arg->val.str.startwc);
+ }
+ lr_ignore_rest (ldfile, 1);
+ }
+ break;
+
+ case tok_collating_symbol:
+ /* Ignore the rest of the line if we don't need the input of
+ this line. */
+ if (ignore_content)
+ {
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+
+ if (state != 0 && state != 2)
+ goto err_label;
+
+ arg = lr_token (ldfile, charmap, result, repertoire, verbose);
+ if (arg->tok != tok_bsymbol)
+ goto err_label;
+ else
+ {
+ char *symbol = arg->val.str.startmb;
+ size_t symbol_len = arg->val.str.lenmb;
+ char *endsymbol = NULL;
+ size_t endsymbol_len = 0;
+ enum token_t ellipsis = tok_none;
+
+ arg = lr_token (ldfile, charmap, result, repertoire, verbose);
+ if (arg->tok == tok_ellipsis2 || arg->tok == tok_ellipsis4)
+ {
+ ellipsis = arg->tok;
+
+ arg = lr_token (ldfile, charmap, result, repertoire,
+ verbose);
+ if (arg->tok != tok_bsymbol)
+ {
+ free (symbol);
+ goto err_label;
+ }
+
+ endsymbol = arg->val.str.startmb;
+ endsymbol_len = arg->val.str.lenmb;
+
+ lr_ignore_rest (ldfile, 1);
+ }
+ else if (arg->tok != tok_eol)
+ {
+ free (symbol);
+ goto err_label;
+ }
+
+ if (!ignore_content)
+ {
+ if (symbol == NULL
+ || (ellipsis != tok_none && endsymbol == NULL))
+ {
+ lr_error (ldfile, _("\
+%s: unknown character in collating symbol name"),
+ "LC_COLLATE");
+ goto col_sym_free;
+ }
+ else if (ellipsis == tok_none)
+ {
+ /* A single symbol, no ellipsis. */
+ if (check_duplicate (ldfile, collate, charmap,
+ repertoire, symbol, symbol_len))
+ /* The name is already defined. */
+ goto col_sym_free;
+
+ insert_entry (&collate->sym_table, symbol, symbol_len,
+ new_symbol (collate, symbol, symbol_len));
+ }
+ else if (symbol_len != endsymbol_len)
+ {
+ col_sym_inv_range:
+ lr_error (ldfile,
+ _("invalid names for character range"));
+ goto col_sym_free;
+ }
+ else
+ {
+ /* Oh my, we have to handle an ellipsis. First, as
+ usual, determine the common prefix and then
+ convert the rest into a range. */
+ size_t prefixlen;
+ unsigned long int from;
+ unsigned long int to;
+ char *endp;
+
+ for (prefixlen = 0; prefixlen < symbol_len; ++prefixlen)
+ if (symbol[prefixlen] != endsymbol[prefixlen])
+ break;
+
+ /* Convert the rest into numbers. */
+ symbol[symbol_len] = '\0';
+ from = strtoul (&symbol[prefixlen], &endp,
+ ellipsis == tok_ellipsis2 ? 16 : 10);
+ if (*endp != '\0')
+ goto col_sym_inv_range;
+
+ endsymbol[symbol_len] = '\0';
+ to = strtoul (&endsymbol[prefixlen], &endp,
+ ellipsis == tok_ellipsis2 ? 16 : 10);
+ if (*endp != '\0')
+ goto col_sym_inv_range;
+
+ if (from > to)
+ goto col_sym_inv_range;
+
+ /* Now loop over all entries. */
+ while (from <= to)
+ {
+ char *symbuf;
+
+ symbuf = (char *) obstack_alloc (&collate->mempool,
+ symbol_len + 1);
+
+ /* Create the name. */
+ sprintf (symbuf,
+ ellipsis == tok_ellipsis2
+ ? "%.*s%.*lX" : "%.*s%.*lu",
+ (int) prefixlen, symbol,
+ (int) (symbol_len - prefixlen), from);
+
+ if (check_duplicate (ldfile, collate, charmap,
+ repertoire, symbuf, symbol_len))
+ /* The name is already defined. */
+ goto col_sym_free;
+
+ insert_entry (&collate->sym_table, symbuf,
+ symbol_len,
+ new_symbol (collate, symbuf,
+ symbol_len));
+
+ /* Increment the counter. */
+ ++from;
+ }
+
+ goto col_sym_free;
+ }
+ }
+ else
+ {
+ col_sym_free:
+ free (symbol);
+ free (endsymbol);
+ }
+ }
+ break;
+
+ case tok_symbol_equivalence:
+ /* Ignore the rest of the line if we don't need the input of
+ this line. */
+ if (ignore_content)
+ {
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+
+ if (state != 0)
+ goto err_label;
+
+ arg = lr_token (ldfile, charmap, result, repertoire, verbose);
+ if (arg->tok != tok_bsymbol)
+ goto err_label;
+ else
+ {
+ const char *newname = arg->val.str.startmb;
+ size_t newname_len = arg->val.str.lenmb;
+ const char *symname;
+ size_t symname_len;
+ void *symval; /* Actually struct symbol_t* */
+
+ arg = lr_token (ldfile, charmap, result, repertoire, verbose);
+ if (arg->tok != tok_bsymbol)
+ {
+ free ((char *) newname);
+ goto err_label;
+ }
+
+ symname = arg->val.str.startmb;
+ symname_len = arg->val.str.lenmb;
+
+ if (newname == NULL)
+ {
+ lr_error (ldfile, _("\
+%s: unknown character in equivalent definition name"),
+ "LC_COLLATE");
+
+ sym_equiv_free:
+ free ((char *) newname);
+ free ((char *) symname);
+ break;
+ }
+ if (symname == NULL)
+ {
+ lr_error (ldfile, _("\
+%s: unknown character in equivalent definition value"),
+ "LC_COLLATE");
+ goto sym_equiv_free;
+ }
+
+ /* See whether the symbol name is already defined. */
+ if (find_entry (&collate->sym_table, symname, symname_len,
+ &symval) != 0)
+ {
+ lr_error (ldfile, _("\
+%s: unknown symbol `%s' in equivalent definition"),
+ "LC_COLLATE", symname);
+ goto sym_equiv_free;
+ }
+
+ if (insert_entry (&collate->sym_table,
+ newname, newname_len, symval) < 0)
+ {
+ lr_error (ldfile, _("\
+error while adding equivalent collating symbol"));
+ goto sym_equiv_free;
+ }
+
+ free ((char *) symname);
+ }
+ lr_ignore_rest (ldfile, 1);
+ break;
+
+ case tok_script:
+ /* Ignore the rest of the line if we don't need the input of
+ this line. */
+ if (ignore_content)
+ {
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+
+ /* We get told about the scripts we know. */
+ arg = lr_token (ldfile, charmap, result, repertoire, verbose);
+ if (arg->tok != tok_bsymbol)
+ goto err_label;
+ else
+ {
+ struct section_list *runp = collate->known_sections;
+ char *name;
+
+ while (runp != NULL)
+ if (strncmp (runp->name, arg->val.str.startmb,
+ arg->val.str.lenmb) == 0
+ && runp->name[arg->val.str.lenmb] == '\0')
+ break;
+ else
+ runp = runp->def_next;
+
+ if (runp != NULL)
+ {
+ lr_error (ldfile, _("duplicate definition of script `%s'"),
+ runp->name);
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+
+ runp = (struct section_list *) xcalloc (1, sizeof (*runp));
+ name = (char *) xmalloc (arg->val.str.lenmb + 1);
+ memcpy (name, arg->val.str.startmb, arg->val.str.lenmb);
+ name[arg->val.str.lenmb] = '\0';
+ runp->name = name;
+
+ runp->def_next = collate->known_sections;
+ collate->known_sections = runp;
+ }
+ lr_ignore_rest (ldfile, 1);
+ break;
+
+ case tok_order_start:
+ /* Ignore the rest of the line if we don't need the input of
+ this line. */
+ if (ignore_content)
+ {
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+
+ if (state != 0 && state != 1 && state != 2)
+ goto err_label;
+ state = 1;
+
+ /* The 14652 draft does not specify whether all `order_start' lines
+ must contain the same number of sort-rules, but 14651 does. So
+ we require this here as well. */
+ arg = lr_token (ldfile, charmap, result, repertoire, verbose);
+ if (arg->tok == tok_bsymbol)
+ {
+ /* This better should be a section name. */
+ struct section_list *sp = collate->known_sections;
+ while (sp != NULL
+ && (sp->name == NULL
+ || strncmp (sp->name, arg->val.str.startmb,
+ arg->val.str.lenmb) != 0
+ || sp->name[arg->val.str.lenmb] != '\0'))
+ sp = sp->def_next;
+
+ if (sp == NULL)
+ {
+ lr_error (ldfile, _("\
+%s: unknown section name `%.*s'"),
+ "LC_COLLATE", (int) arg->val.str.lenmb,
+ arg->val.str.startmb);
+ /* We use the error section. */
+ collate->current_section = &collate->error_section;
+
+ if (collate->error_section.first == NULL)
+ {
+ /* Insert &collate->error_section at the end of
+ the collate->sections list. */
+ if (collate->sections == NULL)
+ collate->sections = &collate->error_section;
+ else
+ {
+ sp = collate->sections;
+ while (sp->next != NULL)
+ sp = sp->next;
+
+ sp->next = &collate->error_section;
+ }
+ collate->error_section.next = NULL;
+ }
+ }
+ else
+ {
+ /* One should not be allowed to open the same
+ section twice. */
+ if (sp->first != NULL)
+ lr_error (ldfile, _("\
+%s: multiple order definitions for section `%s'"),
+ "LC_COLLATE", sp->name);
+ else
+ {
+ /* Insert sp in the collate->sections list,
+ right after collate->current_section. */
+ if (collate->current_section != NULL)
+ {
+ sp->next = collate->current_section->next;
+ collate->current_section->next = sp;
+ }
+ else if (collate->sections == NULL)
+ /* This is the first section to be defined. */
+ collate->sections = sp;
+
+ collate->current_section = sp;
+ }
+
+ /* Next should come the end of the line or a semicolon. */
+ arg = lr_token (ldfile, charmap, result, repertoire,
+ verbose);
+ if (arg->tok == tok_eol)
+ {
+ uint32_t cnt;
+
+ /* This means we have exactly one rule: `forward'. */
+ if (nrules > 1)
+ lr_error (ldfile, _("\
+%s: invalid number of sorting rules"),
+ "LC_COLLATE");
+ else
+ nrules = 1;
+ sp->rules = obstack_alloc (&collate->mempool,
+ (sizeof (enum coll_sort_rule)
+ * nrules));
+ for (cnt = 0; cnt < nrules; ++cnt)
+ sp->rules[cnt] = sort_forward;
+
+ /* Next line. */
+ break;
+ }
+
+ /* Get the next token. */
+ arg = lr_token (ldfile, charmap, result, repertoire,
+ verbose);
+ }
+ }
+ else
+ {
+ /* There is no section symbol. Therefore we use the unnamed
+ section. */
+ collate->current_section = &collate->unnamed_section;
+
+ if (collate->unnamed_section_defined)
+ lr_error (ldfile, _("\
+%s: multiple order definitions for unnamed section"),
+ "LC_COLLATE");
+ else
+ {
+ /* Insert &collate->unnamed_section at the beginning of
+ the collate->sections list. */
+ collate->unnamed_section.next = collate->sections;
+ collate->sections = &collate->unnamed_section;
+ collate->unnamed_section_defined = true;
+ }
+ }
+
+ /* Now read the direction names. */
+ read_directions (ldfile, arg, charmap, repertoire, result);
+
+ /* From now we need the strings untranslated. */
+ ldfile->translate_strings = 0;
+ break;
+
+ case tok_order_end:
+ /* Ignore the rest of the line if we don't need the input of
+ this line. */
+ if (ignore_content)
+ {
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+
+ if (state != 1)
+ goto err_label;
+
+ /* Handle ellipsis at end of list. */
+ if (was_ellipsis != tok_none)
+ {
+ handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
+ repertoire, result);
+ was_ellipsis = tok_none;
+ }
+
+ state = 2;
+ lr_ignore_rest (ldfile, 1);
+ break;
+
+ case tok_reorder_after:
+ /* Ignore the rest of the line if we don't need the input of
+ this line. */
+ if (ignore_content)
+ {
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+
+ if (state == 1)
+ {
+ lr_error (ldfile, _("%s: missing `order_end' keyword"),
+ "LC_COLLATE");
+ state = 2;
+
+ /* Handle ellipsis at end of list. */
+ if (was_ellipsis != tok_none)
+ {
+ handle_ellipsis (ldfile, arg->val.str.startmb,
+ arg->val.str.lenmb, was_ellipsis, charmap,
+ repertoire, result);
+ was_ellipsis = tok_none;
+ }
+ }
+ else if (state == 0 && copy_locale == NULL)
+ goto err_label;
+ else if (state != 0 && state != 2 && state != 3)
+ goto err_label;
+ state = 3;
+
+ arg = lr_token (ldfile, charmap, result, repertoire, verbose);
+ if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
+ {
+ /* Find this symbol in the sequence table. */
+ char ucsbuf[10];
+ char *startmb;
+ size_t lenmb;
+ struct element_t *insp;
+ int no_error = 1;
+ void *ptr;
+
+ if (arg->tok == tok_bsymbol)
+ {
+ startmb = arg->val.str.startmb;
+ lenmb = arg->val.str.lenmb;
+ }
+ else
+ {
+ sprintf (ucsbuf, "U%08X", arg->val.ucs4);
+ startmb = ucsbuf;
+ lenmb = 9;
+ }
+
+ if (find_entry (&collate->seq_table, startmb, lenmb, &ptr) == 0)
+ /* Yes, the symbol exists. Simply point the cursor
+ to it. */
+ collate->cursor = (struct element_t *) ptr;
+ else
+ {
+ struct symbol_t *symbp;
+ void *ptr;
+
+ if (find_entry (&collate->sym_table, startmb, lenmb,
+ &ptr) == 0)
+ {
+ symbp = ptr;
+
+ if (symbp->order->last != NULL
+ || symbp->order->next != NULL)
+ collate->cursor = symbp->order;
+ else
+ {
+ /* This is a collating symbol but its position
+ is not yet defined. */
+ lr_error (ldfile, _("\
+%s: order for collating symbol %.*s not yet defined"),
+ "LC_COLLATE", (int) lenmb, startmb);
+ collate->cursor = NULL;
+ no_error = 0;
+ }
+ }
+ else if (find_entry (&collate->elem_table, startmb, lenmb,
+ &ptr) == 0)
+ {
+ insp = (struct element_t *) ptr;
+
+ if (insp->last != NULL || insp->next != NULL)
+ collate->cursor = insp;
+ else
+ {
+ /* This is a collating element but its position
+ is not yet defined. */
+ lr_error (ldfile, _("\
+%s: order for collating element %.*s not yet defined"),
+ "LC_COLLATE", (int) lenmb, startmb);
+ collate->cursor = NULL;
+ no_error = 0;
+ }
+ }
+ else
+ {
+ /* This is bad. The symbol after which we have to
+ insert does not exist. */
+ lr_error (ldfile, _("\
+%s: cannot reorder after %.*s: symbol not known"),
+ "LC_COLLATE", (int) lenmb, startmb);
+ collate->cursor = NULL;
+ no_error = 0;
+ }
+ }
+
+ lr_ignore_rest (ldfile, no_error);
+ }
+ else
+ /* This must not happen. */
+ goto err_label;
+ break;
+
+ case tok_reorder_end:
+ /* Ignore the rest of the line if we don't need the input of
+ this line. */
+ if (ignore_content)
+ break;
+
+ if (state != 3)
+ goto err_label;
+ state = 4;
+ lr_ignore_rest (ldfile, 1);
+ break;
+
+ case tok_reorder_sections_after:
+ /* Ignore the rest of the line if we don't need the input of
+ this line. */
+ if (ignore_content)
+ {
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+
+ if (state == 1)
+ {
+ lr_error (ldfile, _("%s: missing `order_end' keyword"),
+ "LC_COLLATE");
+ state = 2;
+
+ /* Handle ellipsis at end of list. */
+ if (was_ellipsis != tok_none)
+ {
+ handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
+ repertoire, result);
+ was_ellipsis = tok_none;
+ }
+ }
+ else if (state == 3)
+ {
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: missing `reorder-end' keyword"), "LC_COLLATE"));
+ state = 4;
+ }
+ else if (state != 2 && state != 4)
+ goto err_label;
+ state = 5;
+
+ /* Get the name of the sections we are adding after. */
+ arg = lr_token (ldfile, charmap, result, repertoire, verbose);
+ if (arg->tok == tok_bsymbol)
+ {
+ /* Now find a section with this name. */
+ struct section_list *runp = collate->sections;
+
+ while (runp != NULL)
+ {
+ if (runp->name != NULL
+ && strlen (runp->name) == arg->val.str.lenmb
+ && memcmp (runp->name, arg->val.str.startmb,
+ arg->val.str.lenmb) == 0)
+ break;
+
+ runp = runp->next;
+ }
+
+ if (runp != NULL)
+ collate->current_section = runp;
+ else
+ {
+ /* This is bad. The section after which we have to
+ reorder does not exist. Therefore we cannot
+ process the whole rest of this reorder
+ specification. */
+ lr_error (ldfile, _("%s: section `%.*s' not known"),
+ "LC_COLLATE", (int) arg->val.str.lenmb,
+ arg->val.str.startmb);
+
+ do
+ {
+ lr_ignore_rest (ldfile, 0);
+
+ now = lr_token (ldfile, charmap, result, NULL, verbose);
+ }
+ while (now->tok == tok_reorder_sections_after
+ || now->tok == tok_reorder_sections_end
+ || now->tok == tok_end);
+
+ /* Process the token we just saw. */
+ nowtok = now->tok;
+ continue;
+ }
+ }
+ else
+ /* This must not happen. */
+ goto err_label;
+ break;
+
+ case tok_reorder_sections_end:
+ /* Ignore the rest of the line if we don't need the input of
+ this line. */
+ if (ignore_content)
+ break;
+
+ if (state != 5)
+ goto err_label;
+ state = 6;
+ lr_ignore_rest (ldfile, 1);
+ break;
+
+ case tok_bsymbol:
+ case tok_ucs4:
+ /* Ignore the rest of the line if we don't need the input of
+ this line. */
+ if (ignore_content)
+ {
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+
+ if (state != 0 && state != 1 && state != 3 && state != 5)
+ goto err_label;
+
+ if ((state == 0 || state == 5) && nowtok == tok_ucs4)
+ goto err_label;
+
+ if (nowtok == tok_ucs4)
+ {
+ snprintf (ucs4buf, sizeof (ucs4buf), "U%08X", now->val.ucs4);
+ symstr = ucs4buf;
+ symlen = 9;
+ }
+ else if (arg != NULL)
+ {
+ symstr = arg->val.str.startmb;
+ symlen = arg->val.str.lenmb;
+ }
+ else
+ {
+ lr_error (ldfile, _("%s: bad symbol <%.*s>"), "LC_COLLATE",
+ (int) ldfile->token.val.str.lenmb,
+ ldfile->token.val.str.startmb);
+ break;
+ }
+
+ struct element_t *seqp;
+ if (state == 0)
+ {
+ /* We are outside an `order_start' region. This means
+ we must only accept definitions of values for
+ collation symbols since these are purely abstract
+ values and don't need directions associated. */
+ void *ptr;
+
+ if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0)
+ {
+ seqp = ptr;
+
+ /* It's already defined. First check whether this
+ is really a collating symbol. */
+ if (seqp->is_character)
+ goto err_label;
+
+ goto move_entry;
+ }
+ else
+ {
+ void *result;
+
+ if (find_entry (&collate->sym_table, symstr, symlen,
+ &result) != 0)
+ /* No collating symbol, it's an error. */
+ goto err_label;
+
+ /* Maybe this is the first time we define a symbol
+ value and it is before the first actual section. */
+ if (collate->sections == NULL)
+ collate->sections = collate->current_section =
+ &collate->symbol_section;
+ }
+
+ if (was_ellipsis != tok_none)
+ {
+ handle_ellipsis (ldfile, symstr, symlen, was_ellipsis,
+ charmap, repertoire, result);
+
+ /* Remember that we processed the ellipsis. */
+ was_ellipsis = tok_none;
+
+ /* And don't add the value a second time. */
+ break;
+ }
+ }
+ else if (state == 3)
+ {
+ /* It is possible that we already have this collation sequence.
+ In this case we move the entry. */
+ void *sym;
+ void *ptr;
+
+ /* If the symbol after which we have to insert was not found
+ ignore all entries. */
+ if (collate->cursor == NULL)
+ {
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+
+ if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0)
+ {
+ seqp = (struct element_t *) ptr;
+ goto move_entry;
+ }
+
+ if (find_entry (&collate->sym_table, symstr, symlen, &sym) == 0
+ && (seqp = ((struct symbol_t *) sym)->order) != NULL)
+ goto move_entry;
+
+ if (find_entry (&collate->elem_table, symstr, symlen, &ptr) == 0
+ && (seqp = (struct element_t *) ptr,
+ seqp->last != NULL || seqp->next != NULL
+ || (collate->start != NULL && seqp == collate->start)))
+ {
+ move_entry:
+ /* Remove the entry from the old position. */
+ if (seqp->last == NULL)
+ collate->start = seqp->next;
+ else
+ seqp->last->next = seqp->next;
+ if (seqp->next != NULL)
+ seqp->next->last = seqp->last;
+
+ /* We also have to check whether this entry is the
+ first or last of a section. */
+ if (seqp->section->first == seqp)
+ {
+ if (seqp->section->first == seqp->section->last)
+ /* This section has no content anymore. */
+ seqp->section->first = seqp->section->last = NULL;
+ else
+ seqp->section->first = seqp->next;
+ }
+ else if (seqp->section->last == seqp)
+ seqp->section->last = seqp->last;
+
+ /* Now insert it in the new place. */
+ insert_weights (ldfile, seqp, charmap, repertoire, result,
+ tok_none);
+ break;
+ }
+
+ /* Otherwise we just add a new entry. */
+ }
+ else if (state == 5)
+ {
+ /* We are reordering sections. Find the named section. */
+ struct section_list *runp = collate->sections;
+ struct section_list *prevp = NULL;
+
+ while (runp != NULL)
+ {
+ if (runp->name != NULL
+ && strlen (runp->name) == symlen
+ && memcmp (runp->name, symstr, symlen) == 0)
+ break;
+
+ prevp = runp;
+ runp = runp->next;
+ }
+
+ if (runp == NULL)
+ {
+ lr_error (ldfile, _("%s: section `%.*s' not known"),
+ "LC_COLLATE", (int) symlen, symstr);
+ lr_ignore_rest (ldfile, 0);
+ }
+ else
+ {
+ if (runp != collate->current_section)
+ {
+ /* Remove the named section from the old place and
+ insert it in the new one. */
+ prevp->next = runp->next;
+
+ runp->next = collate->current_section->next;
+ collate->current_section->next = runp;
+ collate->current_section = runp;
+ }
+
+ /* Process the rest of the line which might change
+ the collation rules. */
+ arg = lr_token (ldfile, charmap, result, repertoire,
+ verbose);
+ if (arg->tok != tok_eof && arg->tok != tok_eol)
+ read_directions (ldfile, arg, charmap, repertoire,
+ result);
+ }
+ break;
+ }
+ else if (was_ellipsis != tok_none)
+ {
+ /* Using the information in the `ellipsis_weight'
+ element and this and the last value we have to handle
+ the ellipsis now. */
+ assert (state == 1);
+
+ handle_ellipsis (ldfile, symstr, symlen, was_ellipsis, charmap,
+ repertoire, result);
+
+ /* Remember that we processed the ellipsis. */
+ was_ellipsis = tok_none;
+
+ /* And don't add the value a second time. */
+ break;
+ }
+
+ /* Now insert in the new place. */
+ insert_value (ldfile, symstr, symlen, charmap, repertoire, result);
+ break;
+
+ case tok_undefined:
+ /* Ignore the rest of the line if we don't need the input of
+ this line. */
+ if (ignore_content)
+ {
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+
+ if (state != 1)
+ goto err_label;
+
+ if (was_ellipsis != tok_none)
+ {
+ lr_error (ldfile,
+ _("%s: cannot have `%s' as end of ellipsis range"),
+ "LC_COLLATE", "UNDEFINED");
+
+ unlink_element (collate);
+ was_ellipsis = tok_none;
+ }
+
+ /* See whether UNDEFINED already appeared somewhere. */
+ if (collate->undefined.next != NULL
+ || &collate->undefined == collate->cursor)
+ {
+ lr_error (ldfile,
+ _("%s: order for `%.*s' already defined at %s:%Zu"),
+ "LC_COLLATE", 9, "UNDEFINED",
+ collate->undefined.file,
+ collate->undefined.line);
+ lr_ignore_rest (ldfile, 0);
+ }
+ else
+ /* Parse the weights. */
+ insert_weights (ldfile, &collate->undefined, charmap,
+ repertoire, result, tok_none);
+ break;
+
+ case tok_ellipsis2: /* symbolic hexadecimal ellipsis */
+ case tok_ellipsis3: /* absolute ellipsis */
+ case tok_ellipsis4: /* symbolic decimal ellipsis */
+ /* This is the symbolic (decimal or hexadecimal) or absolute
+ ellipsis. */
+ if (was_ellipsis != tok_none)
+ goto err_label;
+
+ if (state != 0 && state != 1 && state != 3)
+ goto err_label;
+
+ was_ellipsis = nowtok;
+
+ insert_weights (ldfile, &collate->ellipsis_weight, charmap,
+ repertoire, result, nowtok);
+ break;
+
+ case tok_end:
+ seen_end:
+ /* Next we assume `LC_COLLATE'. */
+ if (!ignore_content)
+ {
+ if (state == 0 && copy_locale == NULL)
+ /* We must either see a copy statement or have
+ ordering values. */
+ lr_error (ldfile,
+ _("%s: empty category description not allowed"),
+ "LC_COLLATE");
+ else if (state == 1)
+ {
+ lr_error (ldfile, _("%s: missing `order_end' keyword"),
+ "LC_COLLATE");
+
+ /* Handle ellipsis at end of list. */
+ if (was_ellipsis != tok_none)
+ {
+ handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
+ repertoire, result);
+ was_ellipsis = tok_none;
+ }
+ }
+ else if (state == 3)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: missing `reorder-end' keyword"), "LC_COLLATE"));
+ else if (state == 5)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: missing `reorder-sections-end' keyword"), "LC_COLLATE"));
+ }
+ arg = lr_token (ldfile, charmap, result, NULL, verbose);
+ if (arg->tok == tok_eof)
+ break;
+ if (arg->tok == tok_eol)
+ lr_error (ldfile, _("%s: incomplete `END' line"), "LC_COLLATE");
+ else if (arg->tok != tok_lc_collate)
+ lr_error (ldfile, _("\
+%1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
+ lr_ignore_rest (ldfile, arg->tok == tok_lc_collate);
+ return;
+
+ case tok_define:
+ if (ignore_content)
+ {
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+
+ arg = lr_token (ldfile, charmap, result, NULL, verbose);
+ if (arg->tok != tok_ident)
+ goto err_label;
+
+ /* Simply add the new symbol. */
+ struct name_list *newsym = xmalloc (sizeof (*newsym)
+ + arg->val.str.lenmb + 1);
+ memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb);
+ newsym->str[arg->val.str.lenmb] = '\0';
+ newsym->next = defined;
+ defined = newsym;
+
+ lr_ignore_rest (ldfile, 1);
+ break;
+
+ case tok_undef:
+ if (ignore_content)
+ {
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+
+ arg = lr_token (ldfile, charmap, result, NULL, verbose);
+ if (arg->tok != tok_ident)
+ goto err_label;
+
+ /* Remove _all_ occurrences of the symbol from the list. */
+ struct name_list *prevdef = NULL;
+ struct name_list *curdef = defined;
+ while (curdef != NULL)
+ if (strncmp (arg->val.str.startmb, curdef->str,
+ arg->val.str.lenmb) == 0
+ && curdef->str[arg->val.str.lenmb] == '\0')
+ {
+ if (prevdef == NULL)
+ defined = curdef->next;
+ else
+ prevdef->next = curdef->next;
+
+ struct name_list *olddef = curdef;
+ curdef = curdef->next;
+
+ free (olddef);
+ }
+ else
+ {
+ prevdef = curdef;
+ curdef = curdef->next;
+ }
+
+ lr_ignore_rest (ldfile, 1);
+ break;
+
+ case tok_ifdef:
+ case tok_ifndef:
+ if (ignore_content)
+ {
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+
+ found_ifdef:
+ arg = lr_token (ldfile, charmap, result, NULL, verbose);
+ if (arg->tok != tok_ident)
+ goto err_label;
+ lr_ignore_rest (ldfile, 1);
+
+ if (collate->else_action == else_none)
+ {
+ curdef = defined;
+ while (curdef != NULL)
+ if (strncmp (arg->val.str.startmb, curdef->str,
+ arg->val.str.lenmb) == 0
+ && curdef->str[arg->val.str.lenmb] == '\0')
+ break;
+ else
+ curdef = curdef->next;
+
+ if ((nowtok == tok_ifdef && curdef != NULL)
+ || (nowtok == tok_ifndef && curdef == NULL))
+ {
+ /* We have to use the if-branch. */
+ collate->else_action = else_ignore;
+ }
+ else
+ {
+ /* We have to use the else-branch, if there is one. */
+ nowtok = skip_to (ldfile, collate, charmap, 0);
+ if (nowtok == tok_else)
+ collate->else_action = else_seen;
+ else if (nowtok == tok_elifdef)
+ {
+ nowtok = tok_ifdef;
+ goto found_ifdef;
+ }
+ else if (nowtok == tok_elifndef)
+ {
+ nowtok = tok_ifndef;
+ goto found_ifdef;
+ }
+ else if (nowtok == tok_eof)
+ goto seen_eof;
+ else if (nowtok == tok_end)
+ goto seen_end;
+ }
+ }
+ else
+ {
+ /* XXX Should it really become necessary to support nested
+ preprocessor handling we will push the state here. */
+ lr_error (ldfile, _("%s: nested conditionals not supported"),
+ "LC_COLLATE");
+ nowtok = skip_to (ldfile, collate, charmap, 1);
+ if (nowtok == tok_eof)
+ goto seen_eof;
+ else if (nowtok == tok_end)
+ goto seen_end;
+ }
+ break;
+
+ case tok_elifdef:
+ case tok_elifndef:
+ case tok_else:
+ if (ignore_content)
+ {
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+
+ lr_ignore_rest (ldfile, 1);
+
+ if (collate->else_action == else_ignore)
+ {
+ /* Ignore everything until the endif. */
+ nowtok = skip_to (ldfile, collate, charmap, 1);
+ if (nowtok == tok_eof)
+ goto seen_eof;
+ else if (nowtok == tok_end)
+ goto seen_end;
+ }
+ else
+ {
+ assert (collate->else_action == else_none);
+ lr_error (ldfile, _("\
+%s: '%s' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE",
+ nowtok == tok_else ? "else"
+ : nowtok == tok_elifdef ? "elifdef" : "elifndef");
+ }
+ break;
+
+ case tok_endif:
+ if (ignore_content)
+ {
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+
+ lr_ignore_rest (ldfile, 1);
+
+ if (collate->else_action != else_ignore
+ && collate->else_action != else_seen)
+ lr_error (ldfile, _("\
+%s: 'endif' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE");
+
+ /* XXX If we support nested preprocessor directives we pop
+ the state here. */
+ collate->else_action = else_none;
+ break;
+
+ default:
+ err_label:
+ SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
+ }
+
+ /* Prepare for the next round. */
+ now = lr_token (ldfile, charmap, result, NULL, verbose);
+ nowtok = now->tok;
+ }
+
+ seen_eof:
+ /* When we come here we reached the end of the file. */
+ lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
+}
diff --git a/REORG.TODO/locale/programs/ld-ctype.c b/REORG.TODO/locale/programs/ld-ctype.c
new file mode 100644
index 0000000000..df266c20d6
--- /dev/null
+++ b/REORG.TODO/locale/programs/ld-ctype.c
@@ -0,0 +1,4030 @@
+/* Copyright (C) 1995-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <alloca.h>
+#include <byteswap.h>
+#include <endian.h>
+#include <errno.h>
+#include <limits.h>
+#include <obstack.h>
+#include <stdlib.h>
+#include <string.h>
+#include <wchar.h>
+#include <wctype.h>
+#include <stdint.h>
+#include <sys/uio.h>
+
+#include "localedef.h"
+#include "charmap.h"
+#include "localeinfo.h"
+#include "langinfo.h"
+#include "linereader.h"
+#include "locfile-token.h"
+#include "locfile.h"
+
+#include <assert.h>
+
+
+/* The bit used for representing a special class. */
+#define BITPOS(class) ((class) - tok_upper)
+#define BIT(class) (_ISbit (BITPOS (class)))
+#define BITw(class) (_ISwbit (BITPOS (class)))
+
+#define ELEM(ctype, collection, idx, value) \
+ *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
+ &ctype->collection##_act idx, value)
+
+
+/* To be compatible with former implementations we for now restrict
+ the number of bits for character classes to 16. When compatibility
+ is not necessary anymore increase the number to 32. */
+#define char_class_t uint16_t
+#define char_class32_t uint32_t
+
+
+/* Type to describe a transliteration action. We have a possibly
+ multiple character from-string and a set of multiple character
+ to-strings. All are 32bit values since this is what is used in
+ the gconv functions. */
+struct translit_to_t
+{
+ uint32_t *str;
+
+ struct translit_to_t *next;
+};
+
+struct translit_t
+{
+ uint32_t *from;
+
+ const char *fname;
+ size_t lineno;
+
+ struct translit_to_t *to;
+
+ struct translit_t *next;
+};
+
+struct translit_ignore_t
+{
+ uint32_t from;
+ uint32_t to;
+ uint32_t step;
+
+ const char *fname;
+ size_t lineno;
+
+ struct translit_ignore_t *next;
+};
+
+
+/* Type to describe a transliteration include statement. */
+struct translit_include_t
+{
+ const char *copy_locale;
+ const char *copy_repertoire;
+
+ struct translit_include_t *next;
+};
+
+/* Provide some dummy pointer for empty string. */
+static uint32_t no_str[] = { 0 };
+
+
+/* Sparse table of uint32_t. */
+#define TABLE idx_table
+#define ELEMENT uint32_t
+#define DEFAULT ((uint32_t) ~0)
+#define NO_ADD_LOCALE
+#include "3level.h"
+
+#define TABLE wcwidth_table
+#define ELEMENT uint8_t
+#define DEFAULT 0xff
+#include "3level.h"
+
+#define TABLE wctrans_table
+#define ELEMENT int32_t
+#define DEFAULT 0
+#define wctrans_table_add wctrans_table_add_internal
+#include "3level.h"
+#undef wctrans_table_add
+/* The wctrans_table must actually store the difference between the
+ desired result and the argument. */
+static inline void
+wctrans_table_add (struct wctrans_table *t, uint32_t wc, uint32_t mapped_wc)
+{
+ wctrans_table_add_internal (t, wc, mapped_wc - wc);
+}
+
+/* Construction of sparse 3-level tables.
+ See wchar-lookup.h for their structure and the meaning of p and q. */
+
+struct wctype_table
+{
+ /* Parameters. */
+ unsigned int p;
+ unsigned int q;
+ /* Working representation. */
+ size_t level1_alloc;
+ size_t level1_size;
+ uint32_t *level1;
+ size_t level2_alloc;
+ size_t level2_size;
+ uint32_t *level2;
+ size_t level3_alloc;
+ size_t level3_size;
+ uint32_t *level3;
+ size_t result_size;
+};
+
+static void add_locale_wctype_table (struct locale_file *file,
+ struct wctype_table *t);
+
+/* The real definition of the struct for the LC_CTYPE locale. */
+struct locale_ctype_t
+{
+ uint32_t *charnames;
+ size_t charnames_max;
+ size_t charnames_act;
+ /* An index lookup table, to speedup find_idx. */
+ struct idx_table charnames_idx;
+
+ struct repertoire_t *repertoire;
+
+ /* We will allow up to 8 * sizeof (uint32_t) character classes. */
+#define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
+ size_t nr_charclass;
+ const char *classnames[MAX_NR_CHARCLASS];
+ uint32_t last_class_char;
+ uint32_t class256_collection[256];
+ uint32_t *class_collection;
+ size_t class_collection_max;
+ size_t class_collection_act;
+ uint32_t class_done;
+ uint32_t class_offset;
+
+ struct charseq **mbdigits;
+ size_t mbdigits_act;
+ size_t mbdigits_max;
+ uint32_t *wcdigits;
+ size_t wcdigits_act;
+ size_t wcdigits_max;
+
+ struct charseq *mboutdigits[10];
+ uint32_t wcoutdigits[10];
+ size_t outdigits_act;
+
+ /* If the following number ever turns out to be too small simply
+ increase it. But I doubt it will. --drepper@gnu */
+#define MAX_NR_CHARMAP 16
+ const char *mapnames[MAX_NR_CHARMAP];
+ uint32_t *map_collection[MAX_NR_CHARMAP];
+ uint32_t map256_collection[2][256];
+ size_t map_collection_max[MAX_NR_CHARMAP];
+ size_t map_collection_act[MAX_NR_CHARMAP];
+ size_t map_collection_nr;
+ size_t last_map_idx;
+ int tomap_done[MAX_NR_CHARMAP];
+ uint32_t map_offset;
+
+ /* Transliteration information. */
+ struct translit_include_t *translit_include;
+ struct translit_t *translit;
+ struct translit_ignore_t *translit_ignore;
+ uint32_t ntranslit_ignore;
+
+ uint32_t *default_missing;
+ const char *default_missing_file;
+ size_t default_missing_lineno;
+
+ uint32_t to_nonascii;
+ uint32_t nonascii_case;
+
+ /* The arrays for the binary representation. */
+ char_class_t *ctype_b;
+ char_class32_t *ctype32_b;
+ uint32_t **map_b;
+ uint32_t **map32_b;
+ uint32_t **class_b;
+ struct wctype_table *class_3level;
+ struct wctrans_table *map_3level;
+ uint32_t *class_name_ptr;
+ uint32_t *map_name_ptr;
+ struct wcwidth_table width;
+ uint32_t mb_cur_max;
+ const char *codeset_name;
+ uint32_t *translit_from_idx;
+ uint32_t *translit_from_tbl;
+ uint32_t *translit_to_idx;
+ uint32_t *translit_to_tbl;
+ uint32_t translit_idx_size;
+ size_t translit_from_tbl_size;
+ size_t translit_to_tbl_size;
+
+ struct obstack mempool;
+};
+
+
+/* Marker for an empty slot. This has the value 0xFFFFFFFF, regardless
+ whether 'int' is 16 bit, 32 bit, or 64 bit. */
+#define EMPTY ((uint32_t) ~0)
+
+
+#define obstack_chunk_alloc xmalloc
+#define obstack_chunk_free free
+
+
+/* Prototypes for local functions. */
+static void ctype_startup (struct linereader *lr, struct localedef_t *locale,
+ const struct charmap_t *charmap,
+ struct localedef_t *copy_locale,
+ int ignore_content);
+static void ctype_class_new (struct linereader *lr,
+ struct locale_ctype_t *ctype, const char *name);
+static void ctype_map_new (struct linereader *lr,
+ struct locale_ctype_t *ctype,
+ const char *name, const struct charmap_t *charmap);
+static uint32_t *find_idx (struct locale_ctype_t *ctype, uint32_t **table,
+ size_t *max, size_t *act, uint32_t idx);
+static void set_class_defaults (struct locale_ctype_t *ctype,
+ const struct charmap_t *charmap,
+ struct repertoire_t *repertoire);
+static void allocate_arrays (struct locale_ctype_t *ctype,
+ const struct charmap_t *charmap,
+ struct repertoire_t *repertoire);
+
+
+static const char *longnames[] =
+{
+ "zero", "one", "two", "three", "four",
+ "five", "six", "seven", "eight", "nine"
+};
+static const char *uninames[] =
+{
+ "U00000030", "U00000031", "U00000032", "U00000033", "U00000034",
+ "U00000035", "U00000036", "U00000037", "U00000038", "U00000039"
+};
+static const unsigned char digits[] = "0123456789";
+
+
+static void
+ctype_startup (struct linereader *lr, struct localedef_t *locale,
+ const struct charmap_t *charmap,
+ struct localedef_t *copy_locale, int ignore_content)
+{
+ unsigned int cnt;
+ struct locale_ctype_t *ctype;
+
+ if (!ignore_content && locale->categories[LC_CTYPE].ctype == NULL)
+ {
+ if (copy_locale == NULL)
+ {
+ /* Allocate the needed room. */
+ locale->categories[LC_CTYPE].ctype = ctype =
+ (struct locale_ctype_t *) xcalloc (1,
+ sizeof (struct locale_ctype_t));
+
+ /* We have seen no names yet. */
+ ctype->charnames_max = charmap->mb_cur_max == 1 ? 256 : 512;
+ ctype->charnames = (uint32_t *) xmalloc (ctype->charnames_max
+ * sizeof (uint32_t));
+ for (cnt = 0; cnt < 256; ++cnt)
+ ctype->charnames[cnt] = cnt;
+ ctype->charnames_act = 256;
+ idx_table_init (&ctype->charnames_idx);
+
+ /* Fill character class information. */
+ ctype->last_class_char = ILLEGAL_CHAR_VALUE;
+ /* The order of the following instructions determines the bit
+ positions! */
+ ctype_class_new (lr, ctype, "upper");
+ ctype_class_new (lr, ctype, "lower");
+ ctype_class_new (lr, ctype, "alpha");
+ ctype_class_new (lr, ctype, "digit");
+ ctype_class_new (lr, ctype, "xdigit");
+ ctype_class_new (lr, ctype, "space");
+ ctype_class_new (lr, ctype, "print");
+ ctype_class_new (lr, ctype, "graph");
+ ctype_class_new (lr, ctype, "blank");
+ ctype_class_new (lr, ctype, "cntrl");
+ ctype_class_new (lr, ctype, "punct");
+ ctype_class_new (lr, ctype, "alnum");
+
+ ctype->class_collection_max = charmap->mb_cur_max == 1 ? 256 : 512;
+ ctype->class_collection
+ = (uint32_t *) xcalloc (sizeof (unsigned long int),
+ ctype->class_collection_max);
+ ctype->class_collection_act = 256;
+
+ /* Fill character map information. */
+ ctype->last_map_idx = MAX_NR_CHARMAP;
+ ctype_map_new (lr, ctype, "toupper", charmap);
+ ctype_map_new (lr, ctype, "tolower", charmap);
+
+ /* Fill first 256 entries in `toXXX' arrays. */
+ for (cnt = 0; cnt < 256; ++cnt)
+ {
+ ctype->map_collection[0][cnt] = cnt;
+ ctype->map_collection[1][cnt] = cnt;
+
+ ctype->map256_collection[0][cnt] = cnt;
+ ctype->map256_collection[1][cnt] = cnt;
+ }
+
+ if (enc_not_ascii_compatible)
+ ctype->to_nonascii = 1;
+
+ obstack_init (&ctype->mempool);
+ }
+ else
+ ctype = locale->categories[LC_CTYPE].ctype =
+ copy_locale->categories[LC_CTYPE].ctype;
+ }
+}
+
+
+void
+ctype_finish (struct localedef_t *locale, const struct charmap_t *charmap)
+{
+ /* See POSIX.2, table 2-6 for the meaning of the following table. */
+#define NCLASS 12
+ static const struct
+ {
+ const char *name;
+ const char allow[NCLASS];
+ }
+ valid_table[NCLASS] =
+ {
+ /* The order is important. See token.h for more information.
+ M = Always, D = Default, - = Permitted, X = Mutually exclusive */
+ { "upper", "--MX-XDDXXX-" },
+ { "lower", "--MX-XDDXXX-" },
+ { "alpha", "---X-XDDXXX-" },
+ { "digit", "XXX--XDDXXX-" },
+ { "xdigit", "-----XDDXXX-" },
+ { "space", "XXXXX------X" },
+ { "print", "---------X--" },
+ { "graph", "---------X--" },
+ { "blank", "XXXXXM-----X" },
+ { "cntrl", "XXXXX-XX--XX" },
+ { "punct", "XXXXX-DD-X-X" },
+ { "alnum", "-----XDDXXX-" }
+ };
+ size_t cnt;
+ int cls1, cls2;
+ uint32_t space_value;
+ struct charseq *space_seq;
+ struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
+ int warned;
+ const void *key;
+ size_t len;
+ void *vdata;
+ void *curs;
+
+ /* Now resolve copying and also handle completely missing definitions. */
+ if (ctype == NULL)
+ {
+ const char *repertoire_name;
+
+ /* First see whether we were supposed to copy. If yes, find the
+ actual definition. */
+ if (locale->copy_name[LC_CTYPE] != NULL)
+ {
+ /* Find the copying locale. This has to happen transitively since
+ the locale we are copying from might also copying another one. */
+ struct localedef_t *from = locale;
+
+ do
+ from = find_locale (LC_CTYPE, from->copy_name[LC_CTYPE],
+ from->repertoire_name, charmap);
+ while (from->categories[LC_CTYPE].ctype == NULL
+ && from->copy_name[LC_CTYPE] != NULL);
+
+ ctype = locale->categories[LC_CTYPE].ctype
+ = from->categories[LC_CTYPE].ctype;
+ }
+
+ /* If there is still no definition issue an warning and create an
+ empty one. */
+ if (ctype == NULL)
+ {
+ if (! be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+No definition for %s category found"), "LC_CTYPE"));
+ ctype_startup (NULL, locale, charmap, NULL, 0);
+ ctype = locale->categories[LC_CTYPE].ctype;
+ }
+
+ /* Get the repertoire we have to use. */
+ repertoire_name = locale->repertoire_name ?: repertoire_global;
+ if (repertoire_name != NULL)
+ ctype->repertoire = repertoire_read (repertoire_name);
+ }
+
+ /* We need the name of the currently used 8-bit character set to
+ make correct conversion between this 8-bit representation and the
+ ISO 10646 character set used internally for wide characters. */
+ ctype->codeset_name = charmap->code_set_name;
+ if (ctype->codeset_name == NULL)
+ {
+ if (! be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+No character set name specified in charmap")));
+ ctype->codeset_name = "//UNKNOWN//";
+ }
+
+ /* Set default value for classes not specified. */
+ set_class_defaults (ctype, charmap, ctype->repertoire);
+
+ /* Check according to table. */
+ for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
+ {
+ uint32_t tmp = ctype->class_collection[cnt];
+
+ if (tmp != 0)
+ {
+ for (cls1 = 0; cls1 < NCLASS; ++cls1)
+ if ((tmp & _ISwbit (cls1)) != 0)
+ for (cls2 = 0; cls2 < NCLASS; ++cls2)
+ if (valid_table[cls1].allow[cls2] != '-')
+ {
+ int eq = (tmp & _ISwbit (cls2)) != 0;
+ switch (valid_table[cls1].allow[cls2])
+ {
+ case 'M':
+ if (!eq)
+ {
+ uint32_t value = ctype->charnames[cnt];
+
+ if (!be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+character L'\\u%0*x' in class `%s' must be in class `%s'"),
+ value > 0xffff ? 8 : 4,
+ value,
+ valid_table[cls1].name,
+ valid_table[cls2].name));
+ }
+ break;
+
+ case 'X':
+ if (eq)
+ {
+ uint32_t value = ctype->charnames[cnt];
+
+ if (!be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+character L'\\u%0*x' in class `%s' must not be in class `%s'"),
+ value > 0xffff ? 8 : 4,
+ value,
+ valid_table[cls1].name,
+ valid_table[cls2].name));
+ }
+ break;
+
+ case 'D':
+ ctype->class_collection[cnt] |= _ISwbit (cls2);
+ break;
+
+ default:
+ WITH_CUR_LOCALE (error (5, 0, _("\
+internal error in %s, line %u"), __FUNCTION__, __LINE__));
+ }
+ }
+ }
+ }
+
+ for (cnt = 0; cnt < 256; ++cnt)
+ {
+ uint32_t tmp = ctype->class256_collection[cnt];
+
+ if (tmp != 0)
+ {
+ for (cls1 = 0; cls1 < NCLASS; ++cls1)
+ if ((tmp & _ISbit (cls1)) != 0)
+ for (cls2 = 0; cls2 < NCLASS; ++cls2)
+ if (valid_table[cls1].allow[cls2] != '-')
+ {
+ int eq = (tmp & _ISbit (cls2)) != 0;
+ switch (valid_table[cls1].allow[cls2])
+ {
+ case 'M':
+ if (!eq)
+ {
+ char buf[17];
+
+ snprintf (buf, sizeof buf, "\\%Zo", cnt);
+
+ if (!be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+character '%s' in class `%s' must be in class `%s'"),
+ buf,
+ valid_table[cls1].name,
+ valid_table[cls2].name));
+ }
+ break;
+
+ case 'X':
+ if (eq)
+ {
+ char buf[17];
+
+ snprintf (buf, sizeof buf, "\\%Zo", cnt);
+
+ if (!be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+character '%s' in class `%s' must not be in class `%s'"),
+ buf,
+ valid_table[cls1].name,
+ valid_table[cls2].name));
+ }
+ break;
+
+ case 'D':
+ ctype->class256_collection[cnt] |= _ISbit (cls2);
+ break;
+
+ default:
+ WITH_CUR_LOCALE (error (5, 0, _("\
+internal error in %s, line %u"), __FUNCTION__, __LINE__));
+ }
+ }
+ }
+ }
+
+ /* ... and now test <SP> as a special case. */
+ space_value = 32;
+ if (((cnt = BITPOS (tok_space),
+ (ELEM (ctype, class_collection, , space_value)
+ & BITw (tok_space)) == 0)
+ || (cnt = BITPOS (tok_blank),
+ (ELEM (ctype, class_collection, , space_value)
+ & BITw (tok_blank)) == 0)))
+ {
+ if (!be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
+ valid_table[cnt].name));
+ }
+ else if (((cnt = BITPOS (tok_punct),
+ (ELEM (ctype, class_collection, , space_value)
+ & BITw (tok_punct)) != 0)
+ || (cnt = BITPOS (tok_graph),
+ (ELEM (ctype, class_collection, , space_value)
+ & BITw (tok_graph))
+ != 0)))
+ {
+ if (!be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+<SP> character must not be in class `%s'"),
+ valid_table[cnt].name));
+ }
+ else
+ ELEM (ctype, class_collection, , space_value) |= BITw (tok_print);
+
+ space_seq = charmap_find_value (charmap, "SP", 2);
+ if (space_seq == NULL)
+ space_seq = charmap_find_value (charmap, "space", 5);
+ if (space_seq == NULL)
+ space_seq = charmap_find_value (charmap, "U00000020", 9);
+ if (space_seq == NULL || space_seq->nbytes != 1)
+ {
+ if (!be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+character <SP> not defined in character map")));
+ }
+ else if (((cnt = BITPOS (tok_space),
+ (ctype->class256_collection[space_seq->bytes[0]]
+ & BIT (tok_space)) == 0)
+ || (cnt = BITPOS (tok_blank),
+ (ctype->class256_collection[space_seq->bytes[0]]
+ & BIT (tok_blank)) == 0)))
+ {
+ if (!be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
+ valid_table[cnt].name));
+ }
+ else if (((cnt = BITPOS (tok_punct),
+ (ctype->class256_collection[space_seq->bytes[0]]
+ & BIT (tok_punct)) != 0)
+ || (cnt = BITPOS (tok_graph),
+ (ctype->class256_collection[space_seq->bytes[0]]
+ & BIT (tok_graph)) != 0)))
+ {
+ if (!be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+<SP> character must not be in class `%s'"),
+ valid_table[cnt].name));
+ }
+ else
+ ctype->class256_collection[space_seq->bytes[0]] |= BIT (tok_print);
+
+ /* Check whether all single-byte characters make to their upper/lowercase
+ equivalent according to the ASCII rules. */
+ for (cnt = 'A'; cnt <= 'Z'; ++cnt)
+ {
+ uint32_t uppval = ctype->map256_collection[0][cnt];
+ uint32_t lowval = ctype->map256_collection[1][cnt];
+ uint32_t lowuppval = ctype->map256_collection[0][lowval];
+ uint32_t lowlowval = ctype->map256_collection[1][lowval];
+
+ if (uppval != cnt
+ || lowval != cnt + 0x20
+ || lowuppval != cnt
+ || lowlowval != cnt + 0x20)
+ ctype->nonascii_case = 1;
+ }
+ for (cnt = 0; cnt < 256; ++cnt)
+ if (cnt < 'A' || (cnt > 'Z' && cnt < 'a') || cnt > 'z')
+ if (ctype->map256_collection[0][cnt] != cnt
+ || ctype->map256_collection[1][cnt] != cnt)
+ ctype->nonascii_case = 1;
+
+ /* Now that the tests are done make sure the name array contains all
+ characters which are handled in the WIDTH section of the
+ character set definition file. */
+ if (charmap->width_rules != NULL)
+ for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
+ {
+ unsigned char bytes[charmap->mb_cur_max];
+ int nbytes = charmap->width_rules[cnt].from->nbytes;
+
+ /* We have the range of character for which the width is
+ specified described using byte sequences of the multibyte
+ charset. We have to convert this to UCS4 now. And we
+ cannot simply convert the beginning and the end of the
+ sequence, we have to iterate over the byte sequence and
+ convert it for every single character. */
+ memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
+
+ while (nbytes < charmap->width_rules[cnt].to->nbytes
+ || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
+ nbytes) <= 0)
+ {
+ /* Find the UCS value for `bytes'. */
+ int inner;
+ uint32_t wch;
+ struct charseq *seq
+ = charmap_find_symbol (charmap, (char *) bytes, nbytes);
+
+ if (seq == NULL)
+ wch = ILLEGAL_CHAR_VALUE;
+ else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
+ wch = seq->ucs4;
+ else
+ wch = repertoire_find_value (ctype->repertoire, seq->name,
+ strlen (seq->name));
+
+ if (wch != ILLEGAL_CHAR_VALUE)
+ /* We are only interested in the side-effects of the
+ `find_idx' call. It will add appropriate entries in
+ the name array if this is necessary. */
+ (void) find_idx (ctype, NULL, NULL, NULL, wch);
+
+ /* "Increment" the bytes sequence. */
+ inner = nbytes - 1;
+ while (inner >= 0 && bytes[inner] == 0xff)
+ --inner;
+
+ if (inner < 0)
+ {
+ /* We have to extend the byte sequence. */
+ if (nbytes >= charmap->width_rules[cnt].to->nbytes)
+ break;
+
+ bytes[0] = 1;
+ memset (&bytes[1], 0, nbytes);
+ ++nbytes;
+ }
+ else
+ {
+ ++bytes[inner];
+ while (++inner < nbytes)
+ bytes[inner] = 0;
+ }
+ }
+ }
+
+ /* Now set all the other characters of the character set to the
+ default width. */
+ curs = NULL;
+ while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
+ {
+ struct charseq *data = (struct charseq *) vdata;
+
+ if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
+ data->ucs4 = repertoire_find_value (ctype->repertoire,
+ data->name, len);
+
+ if (data->ucs4 != ILLEGAL_CHAR_VALUE)
+ (void) find_idx (ctype, NULL, NULL, NULL, data->ucs4);
+ }
+
+ /* There must be a multiple of 10 digits. */
+ if (ctype->mbdigits_act % 10 != 0)
+ {
+ assert (ctype->mbdigits_act == ctype->wcdigits_act);
+ ctype->wcdigits_act -= ctype->mbdigits_act % 10;
+ ctype->mbdigits_act -= ctype->mbdigits_act % 10;
+ WITH_CUR_LOCALE (error (0, 0, _("\
+`digit' category has not entries in groups of ten")));
+ }
+
+ /* Check the input digits. There must be a multiple of ten available.
+ In each group it could be that one or the other character is missing.
+ In this case the whole group must be removed. */
+ cnt = 0;
+ while (cnt < ctype->mbdigits_act)
+ {
+ size_t inner;
+ for (inner = 0; inner < 10; ++inner)
+ if (ctype->mbdigits[cnt + inner] == NULL)
+ break;
+
+ if (inner == 10)
+ cnt += 10;
+ else
+ {
+ /* Remove the group. */
+ memmove (&ctype->mbdigits[cnt], &ctype->mbdigits[cnt + 10],
+ ((ctype->wcdigits_act - cnt - 10)
+ * sizeof (ctype->mbdigits[0])));
+ ctype->mbdigits_act -= 10;
+ }
+ }
+
+ /* If no input digits are given use the default. */
+ if (ctype->mbdigits_act == 0)
+ {
+ if (ctype->mbdigits_max == 0)
+ {
+ ctype->mbdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
+ 10 * sizeof (struct charseq *));
+ ctype->mbdigits_max = 10;
+ }
+
+ for (cnt = 0; cnt < 10; ++cnt)
+ {
+ ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
+ (char *) digits + cnt, 1);
+ if (ctype->mbdigits[cnt] == NULL)
+ {
+ ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
+ longnames[cnt],
+ strlen (longnames[cnt]));
+ if (ctype->mbdigits[cnt] == NULL)
+ {
+ /* Hum, this ain't good. */
+ WITH_CUR_LOCALE (error (0, 0, _("\
+no input digits defined and none of the standard names in the charmap")));
+
+ ctype->mbdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
+ sizeof (struct charseq) + 1);
+
+ /* This is better than nothing. */
+ ctype->mbdigits[cnt]->bytes[0] = digits[cnt];
+ ctype->mbdigits[cnt]->nbytes = 1;
+ }
+ }
+ }
+
+ ctype->mbdigits_act = 10;
+ }
+
+ /* Check the wide character input digits. There must be a multiple
+ of ten available. In each group it could be that one or the other
+ character is missing. In this case the whole group must be
+ removed. */
+ cnt = 0;
+ while (cnt < ctype->wcdigits_act)
+ {
+ size_t inner;
+ for (inner = 0; inner < 10; ++inner)
+ if (ctype->wcdigits[cnt + inner] == ILLEGAL_CHAR_VALUE)
+ break;
+
+ if (inner == 10)
+ cnt += 10;
+ else
+ {
+ /* Remove the group. */
+ memmove (&ctype->wcdigits[cnt], &ctype->wcdigits[cnt + 10],
+ ((ctype->wcdigits_act - cnt - 10)
+ * sizeof (ctype->wcdigits[0])));
+ ctype->wcdigits_act -= 10;
+ }
+ }
+
+ /* If no input digits are given use the default. */
+ if (ctype->wcdigits_act == 0)
+ {
+ if (ctype->wcdigits_max == 0)
+ {
+ ctype->wcdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
+ 10 * sizeof (uint32_t));
+ ctype->wcdigits_max = 10;
+ }
+
+ for (cnt = 0; cnt < 10; ++cnt)
+ ctype->wcdigits[cnt] = L'0' + cnt;
+
+ ctype->mbdigits_act = 10;
+ }
+
+ /* Check the outdigits. */
+ warned = 0;
+ for (cnt = 0; cnt < 10; ++cnt)
+ if (ctype->mboutdigits[cnt] == NULL)
+ {
+ static struct charseq replace[2];
+
+ if (!warned)
+ {
+ WITH_CUR_LOCALE (error (0, 0, _("\
+not all characters used in `outdigit' are available in the charmap")));
+ warned = 1;
+ }
+
+ replace[0].nbytes = 1;
+ replace[0].bytes[0] = '?';
+ replace[0].bytes[1] = '\0';
+ ctype->mboutdigits[cnt] = &replace[0];
+ }
+
+ warned = 0;
+ for (cnt = 0; cnt < 10; ++cnt)
+ if (ctype->wcoutdigits[cnt] == 0)
+ {
+ if (!warned)
+ {
+ WITH_CUR_LOCALE (error (0, 0, _("\
+not all characters used in `outdigit' are available in the repertoire")));
+ warned = 1;
+ }
+
+ ctype->wcoutdigits[cnt] = L'?';
+ }
+
+ /* Sort the entries in the translit_ignore list. */
+ if (ctype->translit_ignore != NULL)
+ {
+ struct translit_ignore_t *firstp = ctype->translit_ignore;
+ struct translit_ignore_t *runp;
+
+ ctype->ntranslit_ignore = 1;
+
+ for (runp = firstp->next; runp != NULL; runp = runp->next)
+ {
+ struct translit_ignore_t *lastp = NULL;
+ struct translit_ignore_t *cmpp;
+
+ ++ctype->ntranslit_ignore;
+
+ for (cmpp = firstp; cmpp != NULL; lastp = cmpp, cmpp = cmpp->next)
+ if (runp->from < cmpp->from)
+ break;
+
+ runp->next = lastp;
+ if (lastp == NULL)
+ firstp = runp;
+ }
+
+ ctype->translit_ignore = firstp;
+ }
+}
+
+
+void
+ctype_output (struct localedef_t *locale, const struct charmap_t *charmap,
+ const char *output_path)
+{
+ struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
+ const size_t nelems = (_NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1)
+ + ctype->nr_charclass + ctype->map_collection_nr);
+ struct locale_file file;
+ uint32_t default_missing_len;
+ size_t elem, cnt;
+
+ /* Now prepare the output: Find the sizes of the table we can use. */
+ allocate_arrays (ctype, charmap, ctype->repertoire);
+
+ default_missing_len = (ctype->default_missing
+ ? wcslen ((wchar_t *) ctype->default_missing)
+ : 0);
+
+ init_locale_data (&file, nelems);
+ for (elem = 0; elem < nelems; ++elem)
+ {
+ if (elem < _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1))
+ switch (elem)
+ {
+#define CTYPE_EMPTY(name) \
+ case name: \
+ add_locale_empty (&file); \
+ break
+
+ CTYPE_EMPTY(_NL_CTYPE_GAP1);
+ CTYPE_EMPTY(_NL_CTYPE_GAP2);
+ CTYPE_EMPTY(_NL_CTYPE_GAP3);
+ CTYPE_EMPTY(_NL_CTYPE_GAP4);
+ CTYPE_EMPTY(_NL_CTYPE_GAP5);
+ CTYPE_EMPTY(_NL_CTYPE_GAP6);
+
+#define CTYPE_RAW_DATA(name, base, size) \
+ case _NL_ITEM_INDEX (name): \
+ add_locale_raw_data (&file, base, size); \
+ break
+
+ CTYPE_RAW_DATA (_NL_CTYPE_CLASS,
+ ctype->ctype_b,
+ (256 + 128) * sizeof (char_class_t));
+
+#define CTYPE_UINT32_ARRAY(name, base, n_elems) \
+ case _NL_ITEM_INDEX (name): \
+ add_locale_uint32_array (&file, base, n_elems); \
+ break
+
+ CTYPE_UINT32_ARRAY (_NL_CTYPE_TOUPPER, ctype->map_b[0], 256 + 128);
+ CTYPE_UINT32_ARRAY (_NL_CTYPE_TOLOWER, ctype->map_b[1], 256 + 128);
+ CTYPE_UINT32_ARRAY (_NL_CTYPE_TOUPPER32, ctype->map32_b[0], 256);
+ CTYPE_UINT32_ARRAY (_NL_CTYPE_TOLOWER32, ctype->map32_b[1], 256);
+ CTYPE_RAW_DATA (_NL_CTYPE_CLASS32,
+ ctype->ctype32_b,
+ 256 * sizeof (char_class32_t));
+
+#define CTYPE_UINT32(name, value) \
+ case _NL_ITEM_INDEX (name): \
+ add_locale_uint32 (&file, value); \
+ break
+
+ CTYPE_UINT32 (_NL_CTYPE_CLASS_OFFSET, ctype->class_offset);
+ CTYPE_UINT32 (_NL_CTYPE_MAP_OFFSET, ctype->map_offset);
+ CTYPE_UINT32 (_NL_CTYPE_TRANSLIT_TAB_SIZE, ctype->translit_idx_size);
+
+ CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_FROM_IDX,
+ ctype->translit_from_idx,
+ ctype->translit_idx_size);
+
+ CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_FROM_TBL,
+ ctype->translit_from_tbl,
+ ctype->translit_from_tbl_size
+ / sizeof (uint32_t));
+
+ CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_TO_IDX,
+ ctype->translit_to_idx,
+ ctype->translit_idx_size);
+
+ CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_TO_TBL,
+ ctype->translit_to_tbl,
+ ctype->translit_to_tbl_size / sizeof (uint32_t));
+
+ case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
+ /* The class name array. */
+ start_locale_structure (&file);
+ for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
+ add_locale_string (&file, ctype->classnames[cnt]);
+ add_locale_char (&file, 0);
+ align_locale_data (&file, LOCFILE_ALIGN);
+ end_locale_structure (&file);
+ break;
+
+ case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
+ /* The class name array. */
+ start_locale_structure (&file);
+ for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
+ add_locale_string (&file, ctype->mapnames[cnt]);
+ add_locale_char (&file, 0);
+ align_locale_data (&file, LOCFILE_ALIGN);
+ end_locale_structure (&file);
+ break;
+
+ case _NL_ITEM_INDEX (_NL_CTYPE_WIDTH):
+ add_locale_wcwidth_table (&file, &ctype->width);
+ break;
+
+ CTYPE_UINT32 (_NL_CTYPE_MB_CUR_MAX, ctype->mb_cur_max);
+
+ case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
+ add_locale_string (&file, ctype->codeset_name);
+ break;
+
+ CTYPE_UINT32 (_NL_CTYPE_MAP_TO_NONASCII, ctype->to_nonascii);
+
+ CTYPE_UINT32 (_NL_CTYPE_NONASCII_CASE, ctype->nonascii_case);
+
+ case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN):
+ add_locale_uint32 (&file, ctype->mbdigits_act / 10);
+ break;
+
+ case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN):
+ add_locale_uint32 (&file, ctype->wcdigits_act / 10);
+ break;
+
+ case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB):
+ start_locale_structure (&file);
+ for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
+ cnt < ctype->mbdigits_act; cnt += 10)
+ {
+ add_locale_raw_data (&file, ctype->mbdigits[cnt]->bytes,
+ ctype->mbdigits[cnt]->nbytes);
+ add_locale_char (&file, 0);
+ }
+ end_locale_structure (&file);
+ break;
+
+ case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB):
+ start_locale_structure (&file);
+ cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB);
+ add_locale_raw_data (&file, ctype->mboutdigits[cnt]->bytes,
+ ctype->mboutdigits[cnt]->nbytes);
+ add_locale_char (&file, 0);
+ end_locale_structure (&file);
+ break;
+
+ case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC):
+ start_locale_structure (&file);
+ for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC);
+ cnt < ctype->wcdigits_act; cnt += 10)
+ add_locale_uint32 (&file, ctype->wcdigits[cnt]);
+ end_locale_structure (&file);
+ break;
+
+ case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC):
+ cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC);
+ add_locale_uint32 (&file, ctype->wcoutdigits[cnt]);
+ break;
+
+ case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN):
+ add_locale_uint32 (&file, default_missing_len);
+ break;
+
+ case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING):
+ add_locale_uint32_array (&file, ctype->default_missing,
+ default_missing_len);
+ break;
+
+ case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN):
+ add_locale_uint32 (&file, ctype->ntranslit_ignore);
+ break;
+
+ case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE):
+ start_locale_structure (&file);
+ {
+ struct translit_ignore_t *runp;
+ for (runp = ctype->translit_ignore; runp != NULL;
+ runp = runp->next)
+ {
+ add_locale_uint32 (&file, runp->from);
+ add_locale_uint32 (&file, runp->to);
+ add_locale_uint32 (&file, runp->step);
+ }
+ }
+ end_locale_structure (&file);
+ break;
+
+ default:
+ assert (! "unknown CTYPE element");
+ }
+ else
+ {
+ /* Handle extra maps. */
+ size_t nr = elem - _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
+ if (nr < ctype->nr_charclass)
+ {
+ start_locale_prelude (&file);
+ add_locale_uint32_array (&file, ctype->class_b[nr], 256 / 32);
+ end_locale_prelude (&file);
+ add_locale_wctype_table (&file, &ctype->class_3level[nr]);
+ }
+ else
+ {
+ nr -= ctype->nr_charclass;
+ assert (nr < ctype->map_collection_nr);
+ add_locale_wctrans_table (&file, &ctype->map_3level[nr]);
+ }
+ }
+ }
+
+ write_locale_data (output_path, LC_CTYPE, "LC_CTYPE", &file);
+}
+
+
+/* Local functions. */
+static void
+ctype_class_new (struct linereader *lr, struct locale_ctype_t *ctype,
+ const char *name)
+{
+ size_t cnt;
+
+ for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
+ if (strcmp (ctype->classnames[cnt], name) == 0)
+ break;
+
+ if (cnt < ctype->nr_charclass)
+ {
+ lr_error (lr, _("character class `%s' already defined"), name);
+ return;
+ }
+
+ if (ctype->nr_charclass == MAX_NR_CHARCLASS)
+ /* Exit code 2 is prescribed in P1003.2b. */
+ WITH_CUR_LOCALE (error (2, 0, _("\
+implementation limit: no more than %Zd character classes allowed"),
+ MAX_NR_CHARCLASS));
+
+ ctype->classnames[ctype->nr_charclass++] = name;
+}
+
+
+static void
+ctype_map_new (struct linereader *lr, struct locale_ctype_t *ctype,
+ const char *name, const struct charmap_t *charmap)
+{
+ size_t max_chars = 0;
+ size_t cnt;
+
+ for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
+ {
+ if (strcmp (ctype->mapnames[cnt], name) == 0)
+ break;
+
+ if (max_chars < ctype->map_collection_max[cnt])
+ max_chars = ctype->map_collection_max[cnt];
+ }
+
+ if (cnt < ctype->map_collection_nr)
+ {
+ lr_error (lr, _("character map `%s' already defined"), name);
+ return;
+ }
+
+ if (ctype->map_collection_nr == MAX_NR_CHARMAP)
+ /* Exit code 2 is prescribed in P1003.2b. */
+ WITH_CUR_LOCALE (error (2, 0, _("\
+implementation limit: no more than %d character maps allowed"),
+ MAX_NR_CHARMAP));
+
+ ctype->mapnames[cnt] = name;
+
+ if (max_chars == 0)
+ ctype->map_collection_max[cnt] = charmap->mb_cur_max == 1 ? 256 : 512;
+ else
+ ctype->map_collection_max[cnt] = max_chars;
+
+ ctype->map_collection[cnt] = (uint32_t *)
+ xcalloc (sizeof (uint32_t), ctype->map_collection_max[cnt]);
+ ctype->map_collection_act[cnt] = 256;
+
+ ++ctype->map_collection_nr;
+}
+
+
+/* We have to be prepared that TABLE, MAX, and ACT can be NULL. This
+ is possible if we only want to extend the name array. */
+static uint32_t *
+find_idx (struct locale_ctype_t *ctype, uint32_t **table, size_t *max,
+ size_t *act, uint32_t idx)
+{
+ size_t cnt;
+
+ if (idx < 256)
+ return table == NULL ? NULL : &(*table)[idx];
+
+ /* Use the charnames_idx lookup table instead of the slow search loop. */
+#if 1
+ cnt = idx_table_get (&ctype->charnames_idx, idx);
+ if (cnt == EMPTY)
+ /* Not found. */
+ cnt = ctype->charnames_act;
+#else
+ for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
+ if (ctype->charnames[cnt] == idx)
+ break;
+#endif
+
+ /* We have to distinguish two cases: the name is found or not. */
+ if (cnt == ctype->charnames_act)
+ {
+ /* Extend the name array. */
+ if (ctype->charnames_act == ctype->charnames_max)
+ {
+ ctype->charnames_max *= 2;
+ ctype->charnames = (uint32_t *)
+ xrealloc (ctype->charnames,
+ sizeof (uint32_t) * ctype->charnames_max);
+ }
+ ctype->charnames[ctype->charnames_act++] = idx;
+ idx_table_add (&ctype->charnames_idx, idx, cnt);
+ }
+
+ if (table == NULL)
+ /* We have done everything we are asked to do. */
+ return NULL;
+
+ if (max == NULL)
+ /* The caller does not want to extend the table. */
+ return (cnt >= *act ? NULL : &(*table)[cnt]);
+
+ if (cnt >= *act)
+ {
+ if (cnt >= *max)
+ {
+ size_t old_max = *max;
+ do
+ *max *= 2;
+ while (*max <= cnt);
+
+ *table =
+ (uint32_t *) xrealloc (*table, *max * sizeof (uint32_t));
+ memset (&(*table)[old_max], '\0',
+ (*max - old_max) * sizeof (uint32_t));
+ }
+
+ *act = cnt + 1;
+ }
+
+ return &(*table)[cnt];
+}
+
+
+static int
+get_character (struct token *now, const struct charmap_t *charmap,
+ struct repertoire_t *repertoire,
+ struct charseq **seqp, uint32_t *wchp)
+{
+ if (now->tok == tok_bsymbol)
+ {
+ /* This will hopefully be the normal case. */
+ *wchp = repertoire_find_value (repertoire, now->val.str.startmb,
+ now->val.str.lenmb);
+ *seqp = charmap_find_value (charmap, now->val.str.startmb,
+ now->val.str.lenmb);
+ }
+ else if (now->tok == tok_ucs4)
+ {
+ char utmp[10];
+
+ snprintf (utmp, sizeof (utmp), "U%08X", now->val.ucs4);
+ *seqp = charmap_find_value (charmap, utmp, 9);
+
+ if (*seqp == NULL)
+ *seqp = repertoire_find_seq (repertoire, now->val.ucs4);
+
+ if (*seqp == NULL)
+ {
+ /* Compute the value in the charmap from the UCS value. */
+ const char *symbol = repertoire_find_symbol (repertoire,
+ now->val.ucs4);
+
+ if (symbol == NULL)
+ *seqp = NULL;
+ else
+ *seqp = charmap_find_value (charmap, symbol, strlen (symbol));
+
+ if (*seqp == NULL)
+ {
+ if (repertoire != NULL)
+ {
+ /* Insert a negative entry. */
+ static const struct charseq negative
+ = { .ucs4 = ILLEGAL_CHAR_VALUE };
+ uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
+ sizeof (uint32_t));
+ *newp = now->val.ucs4;
+
+ insert_entry (&repertoire->seq_table, newp,
+ sizeof (uint32_t), (void *) &negative);
+ }
+ }
+ else
+ (*seqp)->ucs4 = now->val.ucs4;
+ }
+ else if ((*seqp)->ucs4 != now->val.ucs4)
+ *seqp = NULL;
+
+ *wchp = now->val.ucs4;
+ }
+ else if (now->tok == tok_charcode)
+ {
+ /* We must map from the byte code to UCS4. */
+ *seqp = charmap_find_symbol (charmap, now->val.str.startmb,
+ now->val.str.lenmb);
+
+ if (*seqp == NULL)
+ *wchp = ILLEGAL_CHAR_VALUE;
+ else
+ {
+ if ((*seqp)->ucs4 == UNINITIALIZED_CHAR_VALUE)
+ (*seqp)->ucs4 = repertoire_find_value (repertoire, (*seqp)->name,
+ strlen ((*seqp)->name));
+ *wchp = (*seqp)->ucs4;
+ }
+ }
+ else
+ return 1;
+
+ return 0;
+}
+
+
+/* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and
+ the .(2). counterparts. */
+static void
+charclass_symbolic_ellipsis (struct linereader *ldfile,
+ struct locale_ctype_t *ctype,
+ const struct charmap_t *charmap,
+ struct repertoire_t *repertoire,
+ struct token *now,
+ const char *last_str,
+ unsigned long int class256_bit,
+ unsigned long int class_bit, int base,
+ int ignore_content, int handle_digits, int step)
+{
+ const char *nowstr = now->val.str.startmb;
+ char tmp[now->val.str.lenmb + 1];
+ const char *cp;
+ char *endp;
+ unsigned long int from;
+ unsigned long int to;
+
+ /* We have to compute the ellipsis values using the symbolic names. */
+ assert (last_str != NULL);
+
+ if (strlen (last_str) != now->val.str.lenmb)
+ {
+ invalid_range:
+ lr_error (ldfile,
+ _("`%s' and `%.*s' are not valid names for symbolic range"),
+ last_str, (int) now->val.str.lenmb, nowstr);
+ return;
+ }
+
+ if (memcmp (last_str, nowstr, now->val.str.lenmb) == 0)
+ /* Nothing to do, the names are the same. */
+ return;
+
+ for (cp = last_str; *cp == *(nowstr + (cp - last_str)); ++cp)
+ ;
+
+ errno = 0;
+ from = strtoul (cp, &endp, base);
+ if ((from == UINT_MAX && errno == ERANGE) || *endp != '\0')
+ goto invalid_range;
+
+ to = strtoul (nowstr + (cp - last_str), &endp, base);
+ if ((to == UINT_MAX && errno == ERANGE)
+ || (endp - nowstr) != now->val.str.lenmb || from >= to)
+ goto invalid_range;
+
+ /* OK, we have a range FROM - TO. Now we can create the symbolic names. */
+ if (!ignore_content)
+ {
+ now->val.str.startmb = tmp;
+ while ((from += step) <= to)
+ {
+ struct charseq *seq;
+ uint32_t wch;
+
+ sprintf (tmp, (base == 10 ? "%.*s%0*ld" : "%.*s%0*lX"),
+ (int) (cp - last_str), last_str,
+ (int) (now->val.str.lenmb - (cp - last_str)),
+ from);
+
+ get_character (now, charmap, repertoire, &seq, &wch);
+
+ if (seq != NULL && seq->nbytes == 1)
+ /* Yep, we can store information about this byte sequence. */
+ ctype->class256_collection[seq->bytes[0]] |= class256_bit;
+
+ if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
+ /* We have the UCS4 position. */
+ *find_idx (ctype, &ctype->class_collection,
+ &ctype->class_collection_max,
+ &ctype->class_collection_act, wch) |= class_bit;
+
+ if (handle_digits == 1)
+ {
+ /* We must store the digit values. */
+ if (ctype->mbdigits_act == ctype->mbdigits_max)
+ {
+ ctype->mbdigits_max *= 2;
+ ctype->mbdigits = xrealloc (ctype->mbdigits,
+ (ctype->mbdigits_max
+ * sizeof (char *)));
+ ctype->wcdigits_max *= 2;
+ ctype->wcdigits = xrealloc (ctype->wcdigits,
+ (ctype->wcdigits_max
+ * sizeof (uint32_t)));
+ }
+
+ ctype->mbdigits[ctype->mbdigits_act++] = seq;
+ ctype->wcdigits[ctype->wcdigits_act++] = wch;
+ }
+ else if (handle_digits == 2)
+ {
+ /* We must store the digit values. */
+ if (ctype->outdigits_act >= 10)
+ {
+ lr_error (ldfile, _("\
+%s: field `%s' does not contain exactly ten entries"),
+ "LC_CTYPE", "outdigit");
+ return;
+ }
+
+ ctype->mboutdigits[ctype->outdigits_act] = seq;
+ ctype->wcoutdigits[ctype->outdigits_act] = wch;
+ ++ctype->outdigits_act;
+ }
+ }
+ }
+}
+
+
+/* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'. */
+static void
+charclass_ucs4_ellipsis (struct linereader *ldfile,
+ struct locale_ctype_t *ctype,
+ const struct charmap_t *charmap,
+ struct repertoire_t *repertoire,
+ struct token *now, uint32_t last_wch,
+ unsigned long int class256_bit,
+ unsigned long int class_bit, int ignore_content,
+ int handle_digits, int step)
+{
+ if (last_wch > now->val.ucs4)
+ {
+ lr_error (ldfile, _("\
+to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
+ (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, now->val.ucs4,
+ (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, last_wch);
+ return;
+ }
+
+ if (!ignore_content)
+ while ((last_wch += step) <= now->val.ucs4)
+ {
+ /* We have to find out whether there is a byte sequence corresponding
+ to this UCS4 value. */
+ struct charseq *seq;
+ char utmp[10];
+
+ snprintf (utmp, sizeof (utmp), "U%08X", last_wch);
+ seq = charmap_find_value (charmap, utmp, 9);
+ if (seq == NULL)
+ {
+ snprintf (utmp, sizeof (utmp), "U%04X", last_wch);
+ seq = charmap_find_value (charmap, utmp, 5);
+ }
+
+ if (seq == NULL)
+ /* Try looking in the repertoire map. */
+ seq = repertoire_find_seq (repertoire, last_wch);
+
+ /* If this is the first time we look for this sequence create a new
+ entry. */
+ if (seq == NULL)
+ {
+ static const struct charseq negative
+ = { .ucs4 = ILLEGAL_CHAR_VALUE };
+
+ /* Find the symbolic name for this UCS4 value. */
+ if (repertoire != NULL)
+ {
+ const char *symbol = repertoire_find_symbol (repertoire,
+ last_wch);
+ uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
+ sizeof (uint32_t));
+ *newp = last_wch;
+
+ if (symbol != NULL)
+ /* We have a name, now search the multibyte value. */
+ seq = charmap_find_value (charmap, symbol, strlen (symbol));
+
+ if (seq == NULL)
+ /* We have to create a fake entry. */
+ seq = (struct charseq *) &negative;
+ else
+ seq->ucs4 = last_wch;
+
+ insert_entry (&repertoire->seq_table, newp, sizeof (uint32_t),
+ seq);
+ }
+ else
+ /* We have to create a fake entry. */
+ seq = (struct charseq *) &negative;
+ }
+
+ /* We have a name, now search the multibyte value. */
+ if (seq->ucs4 == last_wch && seq->nbytes == 1)
+ /* Yep, we can store information about this byte sequence. */
+ ctype->class256_collection[(size_t) seq->bytes[0]]
+ |= class256_bit;
+
+ /* And of course we have the UCS4 position. */
+ if (class_bit != 0)
+ *find_idx (ctype, &ctype->class_collection,
+ &ctype->class_collection_max,
+ &ctype->class_collection_act, last_wch) |= class_bit;
+
+ if (handle_digits == 1)
+ {
+ /* We must store the digit values. */
+ if (ctype->mbdigits_act == ctype->mbdigits_max)
+ {
+ ctype->mbdigits_max *= 2;
+ ctype->mbdigits = xrealloc (ctype->mbdigits,
+ (ctype->mbdigits_max
+ * sizeof (char *)));
+ ctype->wcdigits_max *= 2;
+ ctype->wcdigits = xrealloc (ctype->wcdigits,
+ (ctype->wcdigits_max
+ * sizeof (uint32_t)));
+ }
+
+ ctype->mbdigits[ctype->mbdigits_act++] = (seq->ucs4 == last_wch
+ ? seq : NULL);
+ ctype->wcdigits[ctype->wcdigits_act++] = last_wch;
+ }
+ else if (handle_digits == 2)
+ {
+ /* We must store the digit values. */
+ if (ctype->outdigits_act >= 10)
+ {
+ lr_error (ldfile, _("\
+%s: field `%s' does not contain exactly ten entries"),
+ "LC_CTYPE", "outdigit");
+ return;
+ }
+
+ ctype->mboutdigits[ctype->outdigits_act] = (seq->ucs4 == last_wch
+ ? seq : NULL);
+ ctype->wcoutdigits[ctype->outdigits_act] = last_wch;
+ ++ctype->outdigits_act;
+ }
+ }
+}
+
+
+/* Ellipsis as in `/xea/x12.../xea/x34'. */
+static void
+charclass_charcode_ellipsis (struct linereader *ldfile,
+ struct locale_ctype_t *ctype,
+ const struct charmap_t *charmap,
+ struct repertoire_t *repertoire,
+ struct token *now, char *last_charcode,
+ uint32_t last_charcode_len,
+ unsigned long int class256_bit,
+ unsigned long int class_bit, int ignore_content,
+ int handle_digits)
+{
+ /* First check whether the to-value is larger. */
+ if (now->val.charcode.nbytes != last_charcode_len)
+ {
+ lr_error (ldfile, _("\
+start and end character sequence of range must have the same length"));
+ return;
+ }
+
+ if (memcmp (last_charcode, now->val.charcode.bytes, last_charcode_len) > 0)
+ {
+ lr_error (ldfile, _("\
+to-value character sequence is smaller than from-value sequence"));
+ return;
+ }
+
+ if (!ignore_content)
+ {
+ do
+ {
+ /* Increment the byte sequence value. */
+ struct charseq *seq;
+ uint32_t wch;
+ int i;
+
+ for (i = last_charcode_len - 1; i >= 0; --i)
+ if (++last_charcode[i] != 0)
+ break;
+
+ if (last_charcode_len == 1)
+ /* Of course we have the charcode value. */
+ ctype->class256_collection[(size_t) last_charcode[0]]
+ |= class256_bit;
+
+ /* Find the symbolic name. */
+ seq = charmap_find_symbol (charmap, last_charcode,
+ last_charcode_len);
+ if (seq != NULL)
+ {
+ if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
+ seq->ucs4 = repertoire_find_value (repertoire, seq->name,
+ strlen (seq->name));
+ wch = seq == NULL ? ILLEGAL_CHAR_VALUE : seq->ucs4;
+
+ if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
+ *find_idx (ctype, &ctype->class_collection,
+ &ctype->class_collection_max,
+ &ctype->class_collection_act, wch) |= class_bit;
+ }
+ else
+ wch = ILLEGAL_CHAR_VALUE;
+
+ if (handle_digits == 1)
+ {
+ /* We must store the digit values. */
+ if (ctype->mbdigits_act == ctype->mbdigits_max)
+ {
+ ctype->mbdigits_max *= 2;
+ ctype->mbdigits = xrealloc (ctype->mbdigits,
+ (ctype->mbdigits_max
+ * sizeof (char *)));
+ ctype->wcdigits_max *= 2;
+ ctype->wcdigits = xrealloc (ctype->wcdigits,
+ (ctype->wcdigits_max
+ * sizeof (uint32_t)));
+ }
+
+ seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
+ memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
+ seq->nbytes = last_charcode_len;
+
+ ctype->mbdigits[ctype->mbdigits_act++] = seq;
+ ctype->wcdigits[ctype->wcdigits_act++] = wch;
+ }
+ else if (handle_digits == 2)
+ {
+ struct charseq *seq;
+ /* We must store the digit values. */
+ if (ctype->outdigits_act >= 10)
+ {
+ lr_error (ldfile, _("\
+%s: field `%s' does not contain exactly ten entries"),
+ "LC_CTYPE", "outdigit");
+ return;
+ }
+
+ seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
+ memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
+ seq->nbytes = last_charcode_len;
+
+ ctype->mboutdigits[ctype->outdigits_act] = seq;
+ ctype->wcoutdigits[ctype->outdigits_act] = wch;
+ ++ctype->outdigits_act;
+ }
+ }
+ while (memcmp (last_charcode, now->val.charcode.bytes,
+ last_charcode_len) != 0);
+ }
+}
+
+
+static uint32_t *
+find_translit2 (struct locale_ctype_t *ctype, const struct charmap_t *charmap,
+ uint32_t wch)
+{
+ struct translit_t *trunp = ctype->translit;
+ struct translit_ignore_t *tirunp = ctype->translit_ignore;
+
+ while (trunp != NULL)
+ {
+ /* XXX We simplify things here. The transliterations we look
+ for are only allowed to have one character. */
+ if (trunp->from[0] == wch && trunp->from[1] == 0)
+ {
+ /* Found it. Now look for a transliteration which can be
+ represented with the character set. */
+ struct translit_to_t *torunp = trunp->to;
+
+ while (torunp != NULL)
+ {
+ int i;
+
+ for (i = 0; torunp->str[i] != 0; ++i)
+ {
+ char utmp[10];
+
+ snprintf (utmp, sizeof (utmp), "U%08X", torunp->str[i]);
+ if (charmap_find_value (charmap, utmp, 9) == NULL)
+ /* This character cannot be represented. */
+ break;
+ }
+
+ if (torunp->str[i] == 0)
+ return torunp->str;
+
+ torunp = torunp->next;
+ }
+
+ break;
+ }
+
+ trunp = trunp->next;
+ }
+
+ /* Check for ignored chars. */
+ while (tirunp != NULL)
+ {
+ if (tirunp->from <= wch && tirunp->to >= wch)
+ {
+ uint32_t wi;
+
+ for (wi = tirunp->from; wi <= wch; wi += tirunp->step)
+ if (wi == wch)
+ return no_str;
+ }
+ }
+
+ /* Nothing found. */
+ return NULL;
+}
+
+
+uint32_t *
+find_translit (struct localedef_t *locale, const struct charmap_t *charmap,
+ uint32_t wch)
+{
+ struct locale_ctype_t *ctype;
+ uint32_t *result = NULL;
+
+ assert (locale != NULL);
+ ctype = locale->categories[LC_CTYPE].ctype;
+
+ if (ctype == NULL)
+ return NULL;
+
+ if (ctype->translit != NULL)
+ result = find_translit2 (ctype, charmap, wch);
+
+ if (result == NULL)
+ {
+ struct translit_include_t *irunp = ctype->translit_include;
+
+ while (irunp != NULL && result == NULL)
+ {
+ result = find_translit (find_locale (CTYPE_LOCALE,
+ irunp->copy_locale,
+ irunp->copy_repertoire,
+ charmap),
+ charmap, wch);
+ irunp = irunp->next;
+ }
+ }
+
+ return result;
+}
+
+
+/* Read one transliteration entry. */
+static uint32_t *
+read_widestring (struct linereader *ldfile, struct token *now,
+ const struct charmap_t *charmap,
+ struct repertoire_t *repertoire)
+{
+ uint32_t *wstr;
+
+ if (now->tok == tok_default_missing)
+ /* The special name "" will denote this case. */
+ wstr = no_str;
+ else if (now->tok == tok_bsymbol)
+ {
+ /* Get the value from the repertoire. */
+ wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
+ wstr[0] = repertoire_find_value (repertoire, now->val.str.startmb,
+ now->val.str.lenmb);
+ if (wstr[0] == ILLEGAL_CHAR_VALUE)
+ {
+ /* We cannot proceed, we don't know the UCS4 value. */
+ free (wstr);
+ return NULL;
+ }
+
+ wstr[1] = 0;
+ }
+ else if (now->tok == tok_ucs4)
+ {
+ wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
+ wstr[0] = now->val.ucs4;
+ wstr[1] = 0;
+ }
+ else if (now->tok == tok_charcode)
+ {
+ /* Argh, we have to convert to the symbol name first and then to the
+ UCS4 value. */
+ struct charseq *seq = charmap_find_symbol (charmap,
+ now->val.str.startmb,
+ now->val.str.lenmb);
+ if (seq == NULL)
+ /* Cannot find the UCS4 value. */
+ return NULL;
+
+ if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
+ seq->ucs4 = repertoire_find_value (repertoire, seq->name,
+ strlen (seq->name));
+ if (seq->ucs4 == ILLEGAL_CHAR_VALUE)
+ /* We cannot proceed, we don't know the UCS4 value. */
+ return NULL;
+
+ wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
+ wstr[0] = seq->ucs4;
+ wstr[1] = 0;
+ }
+ else if (now->tok == tok_string)
+ {
+ wstr = now->val.str.startwc;
+ if (wstr == NULL || wstr[0] == 0)
+ return NULL;
+ }
+ else
+ {
+ if (now->tok != tok_eol && now->tok != tok_eof)
+ lr_ignore_rest (ldfile, 0);
+ SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
+ return (uint32_t *) -1l;
+ }
+
+ return wstr;
+}
+
+
+static void
+read_translit_entry (struct linereader *ldfile, struct locale_ctype_t *ctype,
+ struct token *now, const struct charmap_t *charmap,
+ struct repertoire_t *repertoire)
+{
+ uint32_t *from_wstr = read_widestring (ldfile, now, charmap, repertoire);
+ struct translit_t *result;
+ struct translit_to_t **top;
+ struct obstack *ob = &ctype->mempool;
+ int first;
+ int ignore;
+
+ if (from_wstr == NULL)
+ /* There is no valid from string. */
+ return;
+
+ result = (struct translit_t *) obstack_alloc (ob,
+ sizeof (struct translit_t));
+ result->from = from_wstr;
+ result->fname = ldfile->fname;
+ result->lineno = ldfile->lineno;
+ result->next = NULL;
+ result->to = NULL;
+ top = &result->to;
+ first = 1;
+ ignore = 0;
+
+ while (1)
+ {
+ uint32_t *to_wstr;
+
+ /* Next we have one or more transliterations. They are
+ separated by semicolons. */
+ now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
+
+ if (!first && (now->tok == tok_semicolon || now->tok == tok_eol))
+ {
+ /* One string read. */
+ const uint32_t zero = 0;
+
+ if (!ignore)
+ {
+ obstack_grow (ob, &zero, 4);
+ to_wstr = obstack_finish (ob);
+
+ *top = obstack_alloc (ob, sizeof (struct translit_to_t));
+ (*top)->str = to_wstr;
+ (*top)->next = NULL;
+ }
+
+ if (now->tok == tok_eol)
+ {
+ result->next = ctype->translit;
+ ctype->translit = result;
+ return;
+ }
+
+ if (!ignore)
+ top = &(*top)->next;
+ ignore = 0;
+ }
+ else
+ {
+ to_wstr = read_widestring (ldfile, now, charmap, repertoire);
+ if (to_wstr == (uint32_t *) -1l)
+ {
+ /* An error occurred. */
+ obstack_free (ob, result);
+ return;
+ }
+
+ if (to_wstr == NULL)
+ ignore = 1;
+ else
+ /* This value is usable. */
+ obstack_grow (ob, to_wstr, wcslen ((wchar_t *) to_wstr) * 4);
+
+ first = 0;
+ }
+ }
+}
+
+
+static void
+read_translit_ignore_entry (struct linereader *ldfile,
+ struct locale_ctype_t *ctype,
+ const struct charmap_t *charmap,
+ struct repertoire_t *repertoire)
+{
+ /* We expect a semicolon-separated list of characters we ignore. We are
+ only interested in the wide character definitions. These must be
+ single characters, possibly defining a range when an ellipsis is used. */
+ while (1)
+ {
+ struct token *now = lr_token (ldfile, charmap, NULL, repertoire,
+ verbose);
+ struct translit_ignore_t *newp;
+ uint32_t from;
+
+ if (now->tok == tok_eol || now->tok == tok_eof)
+ {
+ lr_error (ldfile,
+ _("premature end of `translit_ignore' definition"));
+ return;
+ }
+
+ if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
+ {
+ lr_error (ldfile, _("syntax error"));
+ lr_ignore_rest (ldfile, 0);
+ return;
+ }
+
+ if (now->tok == tok_ucs4)
+ from = now->val.ucs4;
+ else
+ /* Try to get the value. */
+ from = repertoire_find_value (repertoire, now->val.str.startmb,
+ now->val.str.lenmb);
+
+ if (from == ILLEGAL_CHAR_VALUE)
+ {
+ lr_error (ldfile, "invalid character name");
+ newp = NULL;
+ }
+ else
+ {
+ newp = (struct translit_ignore_t *)
+ obstack_alloc (&ctype->mempool, sizeof (struct translit_ignore_t));
+ newp->from = from;
+ newp->to = from;
+ newp->step = 1;
+
+ newp->next = ctype->translit_ignore;
+ ctype->translit_ignore = newp;
+ }
+
+ /* Now we expect either a semicolon, an ellipsis, or the end of the
+ line. */
+ now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
+
+ if (now->tok == tok_ellipsis2 || now->tok == tok_ellipsis2_2)
+ {
+ /* XXX Should we bother implementing `....'? `...' certainly
+ will not be implemented. */
+ uint32_t to;
+ int step = now->tok == tok_ellipsis2_2 ? 2 : 1;
+
+ now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
+
+ if (now->tok == tok_eol || now->tok == tok_eof)
+ {
+ lr_error (ldfile,
+ _("premature end of `translit_ignore' definition"));
+ return;
+ }
+
+ if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
+ {
+ lr_error (ldfile, _("syntax error"));
+ lr_ignore_rest (ldfile, 0);
+ return;
+ }
+
+ if (now->tok == tok_ucs4)
+ to = now->val.ucs4;
+ else
+ /* Try to get the value. */
+ to = repertoire_find_value (repertoire, now->val.str.startmb,
+ now->val.str.lenmb);
+
+ if (to == ILLEGAL_CHAR_VALUE)
+ lr_error (ldfile, "invalid character name");
+ else
+ {
+ /* Make sure the `to'-value is larger. */
+ if (to >= from)
+ {
+ newp->to = to;
+ newp->step = step;
+ }
+ else
+ lr_error (ldfile, _("\
+to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
+ (to | from) < 65536 ? 4 : 8, to,
+ (to | from) < 65536 ? 4 : 8, from);
+ }
+
+ /* And the next token. */
+ now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
+ }
+
+ if (now->tok == tok_eol || now->tok == tok_eof)
+ /* We are done. */
+ return;
+
+ if (now->tok == tok_semicolon)
+ /* Next round. */
+ continue;
+
+ /* If we come here something is wrong. */
+ lr_error (ldfile, _("syntax error"));
+ lr_ignore_rest (ldfile, 0);
+ return;
+ }
+}
+
+
+/* The parser for the LC_CTYPE section of the locale definition. */
+void
+ctype_read (struct linereader *ldfile, struct localedef_t *result,
+ const struct charmap_t *charmap, const char *repertoire_name,
+ int ignore_content)
+{
+ struct repertoire_t *repertoire = NULL;
+ struct locale_ctype_t *ctype;
+ struct token *now;
+ enum token_t nowtok;
+ size_t cnt;
+ uint32_t last_wch = 0;
+ enum token_t last_token;
+ enum token_t ellipsis_token;
+ int step;
+ char last_charcode[16];
+ size_t last_charcode_len = 0;
+ const char *last_str = NULL;
+ int mapidx;
+ struct localedef_t *copy_locale = NULL;
+
+ /* Get the repertoire we have to use. */
+ if (repertoire_name != NULL)
+ repertoire = repertoire_read (repertoire_name);
+
+ /* The rest of the line containing `LC_CTYPE' must be free. */
+ lr_ignore_rest (ldfile, 1);
+
+
+ do
+ {
+ now = lr_token (ldfile, charmap, NULL, NULL, verbose);
+ nowtok = now->tok;
+ }
+ while (nowtok == tok_eol);
+
+ /* If we see `copy' now we are almost done. */
+ if (nowtok == tok_copy)
+ {
+ now = lr_token (ldfile, charmap, NULL, NULL, verbose);
+ if (now->tok != tok_string)
+ {
+ SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
+
+ skip_category:
+ do
+ now = lr_token (ldfile, charmap, NULL, NULL, verbose);
+ while (now->tok != tok_eof && now->tok != tok_end);
+
+ if (now->tok != tok_eof
+ || (now = lr_token (ldfile, charmap, NULL, NULL, verbose),
+ now->tok == tok_eof))
+ lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
+ else if (now->tok != tok_lc_ctype)
+ {
+ lr_error (ldfile, _("\
+%1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
+ lr_ignore_rest (ldfile, 0);
+ }
+ else
+ lr_ignore_rest (ldfile, 1);
+
+ return;
+ }
+
+ if (! ignore_content)
+ {
+ /* Get the locale definition. */
+ copy_locale = load_locale (LC_CTYPE, now->val.str.startmb,
+ repertoire_name, charmap, NULL);
+ if ((copy_locale->avail & CTYPE_LOCALE) == 0)
+ {
+ /* Not yet loaded. So do it now. */
+ if (locfile_read (copy_locale, charmap) != 0)
+ goto skip_category;
+ }
+
+ if (copy_locale->categories[LC_CTYPE].ctype == NULL)
+ return;
+ }
+
+ lr_ignore_rest (ldfile, 1);
+
+ now = lr_token (ldfile, charmap, NULL, NULL, verbose);
+ nowtok = now->tok;
+ }
+
+ /* Prepare the data structures. */
+ ctype_startup (ldfile, result, charmap, copy_locale, ignore_content);
+ ctype = result->categories[LC_CTYPE].ctype;
+
+ /* Remember the repertoire we use. */
+ if (!ignore_content)
+ ctype->repertoire = repertoire;
+
+ while (1)
+ {
+ unsigned long int class_bit = 0;
+ unsigned long int class256_bit = 0;
+ int handle_digits = 0;
+
+ /* Of course we don't proceed beyond the end of file. */
+ if (nowtok == tok_eof)
+ break;
+
+ /* Ingore empty lines. */
+ if (nowtok == tok_eol)
+ {
+ now = lr_token (ldfile, charmap, NULL, NULL, verbose);
+ nowtok = now->tok;
+ continue;
+ }
+
+ switch (nowtok)
+ {
+ case tok_charclass:
+ now = lr_token (ldfile, charmap, NULL, NULL, verbose);
+ while (now->tok == tok_ident || now->tok == tok_string)
+ {
+ ctype_class_new (ldfile, ctype, now->val.str.startmb);
+ now = lr_token (ldfile, charmap, NULL, NULL, verbose);
+ if (now->tok != tok_semicolon)
+ break;
+ now = lr_token (ldfile, charmap, NULL, NULL, verbose);
+ }
+ if (now->tok != tok_eol)
+ SYNTAX_ERROR (_("\
+%s: syntax error in definition of new character class"), "LC_CTYPE");
+ break;
+
+ case tok_charconv:
+ now = lr_token (ldfile, charmap, NULL, NULL, verbose);
+ while (now->tok == tok_ident || now->tok == tok_string)
+ {
+ ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
+ now = lr_token (ldfile, charmap, NULL, NULL, verbose);
+ if (now->tok != tok_semicolon)
+ break;
+ now = lr_token (ldfile, charmap, NULL, NULL, verbose);
+ }
+ if (now->tok != tok_eol)
+ SYNTAX_ERROR (_("\
+%s: syntax error in definition of new character map"), "LC_CTYPE");
+ break;
+
+ case tok_class:
+ /* Ignore the rest of the line if we don't need the input of
+ this line. */
+ if (ignore_content)
+ {
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+
+ /* We simply forget the `class' keyword and use the following
+ operand to determine the bit. */
+ now = lr_token (ldfile, charmap, NULL, NULL, verbose);
+ if (now->tok == tok_ident || now->tok == tok_string)
+ {
+ /* Must can be one of the predefined class names. */
+ for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
+ if (strcmp (ctype->classnames[cnt], now->val.str.startmb) == 0)
+ break;
+ if (cnt >= ctype->nr_charclass)
+ {
+ /* OK, it's a new class. */
+ ctype_class_new (ldfile, ctype, now->val.str.startmb);
+
+ class_bit = _ISwbit (ctype->nr_charclass - 1);
+ }
+ else
+ {
+ class_bit = _ISwbit (cnt);
+
+ free (now->val.str.startmb);
+ }
+ }
+ else if (now->tok == tok_digit)
+ goto handle_tok_digit;
+ else if (now->tok < tok_upper || now->tok > tok_blank)
+ goto err_label;
+ else
+ {
+ class_bit = BITw (now->tok);
+ class256_bit = BIT (now->tok);
+ }
+
+ /* The next character must be a semicolon. */
+ now = lr_token (ldfile, charmap, NULL, NULL, verbose);
+ if (now->tok != tok_semicolon)
+ goto err_label;
+ goto read_charclass;
+
+ case tok_upper:
+ case tok_lower:
+ case tok_alpha:
+ case tok_alnum:
+ case tok_space:
+ case tok_cntrl:
+ case tok_punct:
+ case tok_graph:
+ case tok_print:
+ case tok_xdigit:
+ case tok_blank:
+ /* Ignore the rest of the line if we don't need the input of
+ this line. */
+ if (ignore_content)
+ {
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+
+ class_bit = BITw (now->tok);
+ class256_bit = BIT (now->tok);
+ handle_digits = 0;
+ read_charclass:
+ ctype->class_done |= class_bit;
+ last_token = tok_none;
+ ellipsis_token = tok_none;
+ step = 1;
+ now = lr_token (ldfile, charmap, NULL, NULL, verbose);
+ while (now->tok != tok_eol && now->tok != tok_eof)
+ {
+ uint32_t wch;
+ struct charseq *seq;
+
+ if (ellipsis_token == tok_none)
+ {
+ if (get_character (now, charmap, repertoire, &seq, &wch))
+ goto err_label;
+
+ if (!ignore_content && seq != NULL && seq->nbytes == 1)
+ /* Yep, we can store information about this byte
+ sequence. */
+ ctype->class256_collection[seq->bytes[0]] |= class256_bit;
+
+ if (!ignore_content && wch != ILLEGAL_CHAR_VALUE
+ && class_bit != 0)
+ /* We have the UCS4 position. */
+ *find_idx (ctype, &ctype->class_collection,
+ &ctype->class_collection_max,
+ &ctype->class_collection_act, wch) |= class_bit;
+
+ last_token = now->tok;
+ /* Terminate the string. */
+ if (last_token == tok_bsymbol)
+ {
+ now->val.str.startmb[now->val.str.lenmb] = '\0';
+ last_str = now->val.str.startmb;
+ }
+ else
+ last_str = NULL;
+ last_wch = wch;
+ memcpy (last_charcode, now->val.charcode.bytes, 16);
+ last_charcode_len = now->val.charcode.nbytes;
+
+ if (!ignore_content && handle_digits == 1)
+ {
+ /* We must store the digit values. */
+ if (ctype->mbdigits_act == ctype->mbdigits_max)
+ {
+ ctype->mbdigits_max += 10;
+ ctype->mbdigits = xrealloc (ctype->mbdigits,
+ (ctype->mbdigits_max
+ * sizeof (char *)));
+ ctype->wcdigits_max += 10;
+ ctype->wcdigits = xrealloc (ctype->wcdigits,
+ (ctype->wcdigits_max
+ * sizeof (uint32_t)));
+ }
+
+ ctype->mbdigits[ctype->mbdigits_act++] = seq;
+ ctype->wcdigits[ctype->wcdigits_act++] = wch;
+ }
+ else if (!ignore_content && handle_digits == 2)
+ {
+ /* We must store the digit values. */
+ if (ctype->outdigits_act >= 10)
+ {
+ lr_error (ldfile, _("\
+%s: field `%s' does not contain exactly ten entries"),
+ "LC_CTYPE", "outdigit");
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+
+ ctype->mboutdigits[ctype->outdigits_act] = seq;
+ ctype->wcoutdigits[ctype->outdigits_act] = wch;
+ ++ctype->outdigits_act;
+ }
+ }
+ else
+ {
+ /* Now it gets complicated. We have to resolve the
+ ellipsis problem. First we must distinguish between
+ the different kind of ellipsis and this must match the
+ tokens we have seen. */
+ assert (last_token != tok_none);
+
+ if (last_token != now->tok)
+ {
+ lr_error (ldfile, _("\
+ellipsis range must be marked by two operands of same type"));
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+
+ if (last_token == tok_bsymbol)
+ {
+ if (ellipsis_token == tok_ellipsis3)
+ lr_error (ldfile, _("with symbolic name range values \
+the absolute ellipsis `...' must not be used"));
+
+ charclass_symbolic_ellipsis (ldfile, ctype, charmap,
+ repertoire, now, last_str,
+ class256_bit, class_bit,
+ (ellipsis_token
+ == tok_ellipsis4
+ ? 10 : 16),
+ ignore_content,
+ handle_digits, step);
+ }
+ else if (last_token == tok_ucs4)
+ {
+ if (ellipsis_token != tok_ellipsis2)
+ lr_error (ldfile, _("\
+with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
+
+ charclass_ucs4_ellipsis (ldfile, ctype, charmap,
+ repertoire, now, last_wch,
+ class256_bit, class_bit,
+ ignore_content, handle_digits,
+ step);
+ }
+ else
+ {
+ assert (last_token == tok_charcode);
+
+ if (ellipsis_token != tok_ellipsis3)
+ lr_error (ldfile, _("\
+with character code range values one must use the absolute ellipsis `...'"));
+
+ charclass_charcode_ellipsis (ldfile, ctype, charmap,
+ repertoire, now,
+ last_charcode,
+ last_charcode_len,
+ class256_bit, class_bit,
+ ignore_content,
+ handle_digits);
+ }
+
+ /* Now we have used the last value. */
+ last_token = tok_none;
+ }
+
+ /* Next we expect a semicolon or the end of the line. */
+ now = lr_token (ldfile, charmap, NULL, NULL, verbose);
+ if (now->tok == tok_eol || now->tok == tok_eof)
+ break;
+
+ if (last_token != tok_none
+ && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4_2)
+ {
+ if (now->tok == tok_ellipsis2_2)
+ {
+ now->tok = tok_ellipsis2;
+ step = 2;
+ }
+ else if (now->tok == tok_ellipsis4_2)
+ {
+ now->tok = tok_ellipsis4;
+ step = 2;
+ }
+
+ ellipsis_token = now->tok;
+
+ now = lr_token (ldfile, charmap, NULL, NULL, verbose);
+ continue;
+ }
+
+ if (now->tok != tok_semicolon)
+ goto err_label;
+
+ /* And get the next character. */
+ now = lr_token (ldfile, charmap, NULL, NULL, verbose);
+
+ ellipsis_token = tok_none;
+ step = 1;
+ }
+ break;
+
+ case tok_digit:
+ /* Ignore the rest of the line if we don't need the input of
+ this line. */
+ if (ignore_content)
+ {
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+
+ handle_tok_digit:
+ class_bit = _ISwdigit;
+ class256_bit = _ISdigit;
+ handle_digits = 1;
+ goto read_charclass;
+
+ case tok_outdigit:
+ /* Ignore the rest of the line if we don't need the input of
+ this line. */
+ if (ignore_content)
+ {
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+
+ if (ctype->outdigits_act != 0)
+ lr_error (ldfile, _("\
+%s: field `%s' declared more than once"),
+ "LC_CTYPE", "outdigit");
+ class_bit = 0;
+ class256_bit = 0;
+ handle_digits = 2;
+ goto read_charclass;
+
+ case tok_toupper:
+ /* Ignore the rest of the line if we don't need the input of
+ this line. */
+ if (ignore_content)
+ {
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+
+ mapidx = 0;
+ goto read_mapping;
+
+ case tok_tolower:
+ /* Ignore the rest of the line if we don't need the input of
+ this line. */
+ if (ignore_content)
+ {
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+
+ mapidx = 1;
+ goto read_mapping;
+
+ case tok_map:
+ /* Ignore the rest of the line if we don't need the input of
+ this line. */
+ if (ignore_content)
+ {
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+
+ /* We simply forget the `map' keyword and use the following
+ operand to determine the mapping. */
+ now = lr_token (ldfile, charmap, NULL, NULL, verbose);
+ if (now->tok == tok_ident || now->tok == tok_string)
+ {
+ size_t cnt;
+
+ for (cnt = 2; cnt < ctype->map_collection_nr; ++cnt)
+ if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
+ break;
+
+ if (cnt < ctype->map_collection_nr)
+ free (now->val.str.startmb);
+ else
+ /* OK, it's a new map. */
+ ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
+
+ mapidx = cnt;
+ }
+ else if (now->tok < tok_toupper || now->tok > tok_tolower)
+ goto err_label;
+ else
+ mapidx = now->tok - tok_toupper;
+
+ now = lr_token (ldfile, charmap, NULL, NULL, verbose);
+ /* This better should be a semicolon. */
+ if (now->tok != tok_semicolon)
+ goto err_label;
+
+ read_mapping:
+ /* Test whether this mapping was already defined. */
+ if (ctype->tomap_done[mapidx])
+ {
+ lr_error (ldfile, _("duplicated definition for mapping `%s'"),
+ ctype->mapnames[mapidx]);
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+ ctype->tomap_done[mapidx] = 1;
+
+ now = lr_token (ldfile, charmap, NULL, NULL, verbose);
+ while (now->tok != tok_eol && now->tok != tok_eof)
+ {
+ struct charseq *from_seq;
+ uint32_t from_wch;
+ struct charseq *to_seq;
+ uint32_t to_wch;
+
+ /* Every pair starts with an opening brace. */
+ if (now->tok != tok_open_brace)
+ goto err_label;
+
+ /* Next comes the from-value. */
+ now = lr_token (ldfile, charmap, NULL, NULL, verbose);
+ if (get_character (now, charmap, repertoire, &from_seq,
+ &from_wch) != 0)
+ goto err_label;
+
+ /* The next is a comma. */
+ now = lr_token (ldfile, charmap, NULL, NULL, verbose);
+ if (now->tok != tok_comma)
+ goto err_label;
+
+ /* And the other value. */
+ now = lr_token (ldfile, charmap, NULL, NULL, verbose);
+ if (get_character (now, charmap, repertoire, &to_seq,
+ &to_wch) != 0)
+ goto err_label;
+
+ /* And the last thing is the closing brace. */
+ now = lr_token (ldfile, charmap, NULL, NULL, verbose);
+ if (now->tok != tok_close_brace)
+ goto err_label;
+
+ if (!ignore_content)
+ {
+ /* Check whether the mapping converts from an ASCII value
+ to a non-ASCII value. */
+ if (from_seq != NULL && from_seq->nbytes == 1
+ && isascii (from_seq->bytes[0])
+ && to_seq != NULL && (to_seq->nbytes != 1
+ || !isascii (to_seq->bytes[0])))
+ ctype->to_nonascii = 1;
+
+ if (mapidx < 2 && from_seq != NULL && to_seq != NULL
+ && from_seq->nbytes == 1 && to_seq->nbytes == 1)
+ /* We can use this value. */
+ ctype->map256_collection[mapidx][from_seq->bytes[0]]
+ = to_seq->bytes[0];
+
+ if (from_wch != ILLEGAL_CHAR_VALUE
+ && to_wch != ILLEGAL_CHAR_VALUE)
+ /* Both correct values. */
+ *find_idx (ctype, &ctype->map_collection[mapidx],
+ &ctype->map_collection_max[mapidx],
+ &ctype->map_collection_act[mapidx],
+ from_wch) = to_wch;
+ }
+
+ /* Now comes a semicolon or the end of the line/file. */
+ now = lr_token (ldfile, charmap, NULL, NULL, verbose);
+ if (now->tok == tok_semicolon)
+ now = lr_token (ldfile, charmap, NULL, NULL, verbose);
+ }
+ break;
+
+ case tok_translit_start:
+ /* Ignore the entire translit section with its peculiar syntax
+ if we don't need the input. */
+ if (ignore_content)
+ {
+ do
+ {
+ lr_ignore_rest (ldfile, 0);
+ now = lr_token (ldfile, charmap, NULL, NULL, verbose);
+ }
+ while (now->tok != tok_translit_end && now->tok != tok_eof);
+
+ if (now->tok == tok_eof)
+ lr_error (ldfile, _(\
+"%s: `translit_start' section does not end with `translit_end'"),
+ "LC_CTYPE");
+
+ break;
+ }
+
+ /* The rest of the line better should be empty. */
+ lr_ignore_rest (ldfile, 1);
+
+ /* We count here the number of allocated entries in the `translit'
+ array. */
+ cnt = 0;
+
+ ldfile->translate_strings = 1;
+ ldfile->return_widestr = 1;
+
+ /* We proceed until we see the `translit_end' token. */
+ while (now = lr_token (ldfile, charmap, NULL, repertoire, verbose),
+ now->tok != tok_translit_end && now->tok != tok_eof)
+ {
+ if (now->tok == tok_eol)
+ /* Ignore empty lines. */
+ continue;
+
+ if (now->tok == tok_include)
+ {
+ /* We have to include locale. */
+ const char *locale_name;
+ const char *repertoire_name;
+ struct translit_include_t *include_stmt, **include_ptr;
+
+ now = lr_token (ldfile, charmap, NULL, NULL, verbose);
+ /* This should be a string or an identifier. In any
+ case something to name a locale. */
+ if (now->tok != tok_string && now->tok != tok_ident)
+ {
+ translit_syntax:
+ lr_error (ldfile, _("%s: syntax error"), "LC_CTYPE");
+ lr_ignore_rest (ldfile, 0);
+ continue;
+ }
+ locale_name = now->val.str.startmb;
+
+ /* Next should be a semicolon. */
+ now = lr_token (ldfile, charmap, NULL, NULL, verbose);
+ if (now->tok != tok_semicolon)
+ goto translit_syntax;
+
+ /* Now the repertoire name. */
+ now = lr_token (ldfile, charmap, NULL, NULL, verbose);
+ if ((now->tok != tok_string && now->tok != tok_ident)
+ || now->val.str.startmb == NULL)
+ goto translit_syntax;
+ repertoire_name = now->val.str.startmb;
+ if (repertoire_name[0] == '\0')
+ /* Ignore the empty string. */
+ repertoire_name = NULL;
+
+ /* Save the include statement for later processing. */
+ include_stmt = (struct translit_include_t *)
+ xmalloc (sizeof (struct translit_include_t));
+ include_stmt->copy_locale = locale_name;
+ include_stmt->copy_repertoire = repertoire_name;
+ include_stmt->next = NULL;
+
+ include_ptr = &ctype->translit_include;
+ while (*include_ptr != NULL)
+ include_ptr = &(*include_ptr)->next;
+ *include_ptr = include_stmt;
+
+ /* The rest of the line must be empty. */
+ lr_ignore_rest (ldfile, 1);
+
+ /* Make sure the locale is read. */
+ add_to_readlist (LC_CTYPE, locale_name, repertoire_name,
+ 1, NULL);
+ continue;
+ }
+ else if (now->tok == tok_default_missing)
+ {
+ uint32_t *wstr;
+
+ while (1)
+ {
+ /* We expect a single character or string as the
+ argument. */
+ now = lr_token (ldfile, charmap, NULL, NULL, verbose);
+ wstr = read_widestring (ldfile, now, charmap,
+ repertoire);
+
+ if (wstr != NULL)
+ {
+ if (ctype->default_missing != NULL)
+ {
+ lr_error (ldfile, _("\
+%s: duplicate `default_missing' definition"), "LC_CTYPE");
+ WITH_CUR_LOCALE (error_at_line (0, 0,
+ ctype->default_missing_file,
+ ctype->default_missing_lineno,
+ _("\
+previous definition was here")));
+ }
+ else
+ {
+ ctype->default_missing = wstr;
+ ctype->default_missing_file = ldfile->fname;
+ ctype->default_missing_lineno = ldfile->lineno;
+ }
+ /* We can have more entries, ignore them. */
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+ else if (wstr == (uint32_t *) -1l)
+ /* This was an syntax error. */
+ break;
+
+ /* Maybe there is another replacement we can use. */
+ now = lr_token (ldfile, charmap, NULL, NULL, verbose);
+ if (now->tok == tok_eol || now->tok == tok_eof)
+ {
+ /* Nothing found. We tell the user. */
+ lr_error (ldfile, _("\
+%s: no representable `default_missing' definition found"), "LC_CTYPE");
+ break;
+ }
+ if (now->tok != tok_semicolon)
+ goto translit_syntax;
+ }
+
+ continue;
+ }
+ else if (now->tok == tok_translit_ignore)
+ {
+ read_translit_ignore_entry (ldfile, ctype, charmap,
+ repertoire);
+ continue;
+ }
+
+ read_translit_entry (ldfile, ctype, now, charmap, repertoire);
+ }
+ ldfile->return_widestr = 0;
+
+ if (now->tok == tok_eof)
+ lr_error (ldfile, _(\
+"%s: `translit_start' section does not end with `translit_end'"),
+ "LC_CTYPE");
+
+ break;
+
+ case tok_ident:
+ /* Ignore the rest of the line if we don't need the input of
+ this line. */
+ if (ignore_content)
+ {
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+
+ /* This could mean one of several things. First test whether
+ it's a character class name. */
+ for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
+ if (strcmp (now->val.str.startmb, ctype->classnames[cnt]) == 0)
+ break;
+ if (cnt < ctype->nr_charclass)
+ {
+ class_bit = _ISwbit (cnt);
+ class256_bit = cnt <= 11 ? _ISbit (cnt) : 0;
+ free (now->val.str.startmb);
+ goto read_charclass;
+ }
+ for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
+ if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
+ break;
+ if (cnt < ctype->map_collection_nr)
+ {
+ mapidx = cnt;
+ free (now->val.str.startmb);
+ goto read_mapping;
+ }
+ break;
+
+ case tok_end:
+ /* Next we assume `LC_CTYPE'. */
+ now = lr_token (ldfile, charmap, NULL, NULL, verbose);
+ if (now->tok == tok_eof)
+ break;
+ if (now->tok == tok_eol)
+ lr_error (ldfile, _("%s: incomplete `END' line"),
+ "LC_CTYPE");
+ else if (now->tok != tok_lc_ctype)
+ lr_error (ldfile, _("\
+%1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
+ lr_ignore_rest (ldfile, now->tok == tok_lc_ctype);
+ return;
+
+ default:
+ err_label:
+ if (now->tok != tok_eof)
+ SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
+ }
+
+ /* Prepare for the next round. */
+ now = lr_token (ldfile, charmap, NULL, NULL, verbose);
+ nowtok = now->tok;
+ }
+
+ /* When we come here we reached the end of the file. */
+ lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
+}
+
+
+/* Subroutine of set_class_defaults, below. */
+static void
+set_one_default (struct locale_ctype_t *ctype,
+ const struct charmap_t *charmap,
+ int bitpos, int from, int to)
+{
+ char tmp[2];
+ int ch;
+ int bit = _ISbit (bitpos);
+ int bitw = _ISwbit (bitpos);
+ /* Define string. */
+ strcpy (tmp, "?");
+
+ for (ch = from; ch <= to; ++ch)
+ {
+ struct charseq *seq;
+ tmp[0] = ch;
+
+ seq = charmap_find_value (charmap, tmp, 1);
+ if (seq == NULL)
+ {
+ char buf[10];
+ sprintf (buf, "U%08X", ch);
+ seq = charmap_find_value (charmap, buf, 9);
+ }
+ if (seq == NULL)
+ {
+ if (!be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: character `%s' not defined while needed as default value"),
+ "LC_CTYPE", tmp));
+ }
+ else if (seq->nbytes != 1)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: character `%s' in charmap not representable with one byte"),
+ "LC_CTYPE", tmp));
+ else
+ ctype->class256_collection[seq->bytes[0]] |= bit;
+
+ /* No need to search here, the ASCII value is also the Unicode
+ value. */
+ ELEM (ctype, class_collection, , ch) |= bitw;
+ }
+}
+
+static void
+set_class_defaults (struct locale_ctype_t *ctype,
+ const struct charmap_t *charmap,
+ struct repertoire_t *repertoire)
+{
+#define set_default(bitpos, from, to) \
+ set_one_default (ctype, charmap, bitpos, from, to)
+
+ /* These function defines the default values for the classes and conversions
+ according to POSIX.2 2.5.2.1.
+ It may seem that the order of these if-blocks is arbitrary but it is NOT.
+ Don't move them unless you know what you do! */
+
+ /* Set default values if keyword was not present. */
+ if ((ctype->class_done & BITw (tok_upper)) == 0)
+ /* "If this keyword [lower] is not specified, the lowercase letters
+ `A' through `Z', ..., shall automatically belong to this class,
+ with implementation defined character values." [P1003.2, 2.5.2.1] */
+ set_default (BITPOS (tok_upper), 'A', 'Z');
+
+ if ((ctype->class_done & BITw (tok_lower)) == 0)
+ /* "If this keyword [lower] is not specified, the lowercase letters
+ `a' through `z', ..., shall automatically belong to this class,
+ with implementation defined character values." [P1003.2, 2.5.2.1] */
+ set_default (BITPOS (tok_lower), 'a', 'z');
+
+ if ((ctype->class_done & BITw (tok_alpha)) == 0)
+ {
+ /* Table 2-6 in P1003.2 says that characters in class `upper' or
+ class `lower' *must* be in class `alpha'. */
+ unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
+ unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower);
+
+ for (size_t cnt = 0; cnt < 256; ++cnt)
+ if ((ctype->class256_collection[cnt] & mask) != 0)
+ ctype->class256_collection[cnt] |= BIT (tok_alpha);
+
+ for (size_t cnt = 0; cnt < ctype->class_collection_act; ++cnt)
+ if ((ctype->class_collection[cnt] & maskw) != 0)
+ ctype->class_collection[cnt] |= BITw (tok_alpha);
+ }
+
+ if ((ctype->class_done & BITw (tok_digit)) == 0)
+ /* "If this keyword [digit] is not specified, the digits `0' through
+ `9', ..., shall automatically belong to this class, with
+ implementation-defined character values." [P1003.2, 2.5.2.1] */
+ set_default (BITPOS (tok_digit), '0', '9');
+
+ /* "Only characters specified for the `alpha' and `digit' keyword
+ shall be specified. Characters specified for the keyword `alpha'
+ and `digit' are automatically included in this class. */
+ {
+ unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
+ unsigned long int maskw = BITw (tok_alpha) | BITw (tok_digit);
+
+ for (size_t cnt = 0; cnt < 256; ++cnt)
+ if ((ctype->class256_collection[cnt] & mask) != 0)
+ ctype->class256_collection[cnt] |= BIT (tok_alnum);
+
+ for (size_t cnt = 0; cnt < ctype->class_collection_act; ++cnt)
+ if ((ctype->class_collection[cnt] & maskw) != 0)
+ ctype->class_collection[cnt] |= BITw (tok_alnum);
+ }
+
+ if ((ctype->class_done & BITw (tok_space)) == 0)
+ /* "If this keyword [space] is not specified, the characters <space>,
+ <form-feed>, <newline>, <carriage-return>, <tab>, and
+ <vertical-tab>, ..., shall automatically belong to this class,
+ with implementation-defined character values." [P1003.2, 2.5.2.1] */
+ {
+ struct charseq *seq;
+
+ seq = charmap_find_value (charmap, "space", 5);
+ if (seq == NULL)
+ seq = charmap_find_value (charmap, "SP", 2);
+ if (seq == NULL)
+ seq = charmap_find_value (charmap, "U00000020", 9);
+ if (seq == NULL)
+ {
+ if (!be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: character `%s' not defined while needed as default value"),
+ "LC_CTYPE", "<space>"));
+ }
+ else if (seq->nbytes != 1)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: character `%s' in charmap not representable with one byte"),
+ "LC_CTYPE", "<space>"));
+ else
+ ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
+
+ /* No need to search. */
+ ELEM (ctype, class_collection, , L' ') |= BITw (tok_space);
+
+ seq = charmap_find_value (charmap, "form-feed", 9);
+ if (seq == NULL)
+ seq = charmap_find_value (charmap, "U0000000C", 9);
+ if (seq == NULL)
+ {
+ if (!be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: character `%s' not defined while needed as default value"),
+ "LC_CTYPE", "<form-feed>"));
+ }
+ else if (seq->nbytes != 1)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: character `%s' in charmap not representable with one byte"),
+ "LC_CTYPE", "<form-feed>"));
+ else
+ ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
+
+ /* No need to search. */
+ ELEM (ctype, class_collection, , L'\f') |= BITw (tok_space);
+
+
+ seq = charmap_find_value (charmap, "newline", 7);
+ if (seq == NULL)
+ seq = charmap_find_value (charmap, "U0000000A", 9);
+ if (seq == NULL)
+ {
+ if (!be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: character `%s' not defined while needed as default value"),
+ "LC_CTYPE", "<newline>"));
+ }
+ else if (seq->nbytes != 1)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: character `%s' in charmap not representable with one byte"),
+ "LC_CTYPE", "<newline>"));
+ else
+ ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
+
+ /* No need to search. */
+ ELEM (ctype, class_collection, , L'\n') |= BITw (tok_space);
+
+
+ seq = charmap_find_value (charmap, "carriage-return", 15);
+ if (seq == NULL)
+ seq = charmap_find_value (charmap, "U0000000D", 9);
+ if (seq == NULL)
+ {
+ if (!be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: character `%s' not defined while needed as default value"),
+ "LC_CTYPE", "<carriage-return>"));
+ }
+ else if (seq->nbytes != 1)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: character `%s' in charmap not representable with one byte"),
+ "LC_CTYPE", "<carriage-return>"));
+ else
+ ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
+
+ /* No need to search. */
+ ELEM (ctype, class_collection, , L'\r') |= BITw (tok_space);
+
+
+ seq = charmap_find_value (charmap, "tab", 3);
+ if (seq == NULL)
+ seq = charmap_find_value (charmap, "U00000009", 9);
+ if (seq == NULL)
+ {
+ if (!be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: character `%s' not defined while needed as default value"),
+ "LC_CTYPE", "<tab>"));
+ }
+ else if (seq->nbytes != 1)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: character `%s' in charmap not representable with one byte"),
+ "LC_CTYPE", "<tab>"));
+ else
+ ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
+
+ /* No need to search. */
+ ELEM (ctype, class_collection, , L'\t') |= BITw (tok_space);
+
+
+ seq = charmap_find_value (charmap, "vertical-tab", 12);
+ if (seq == NULL)
+ seq = charmap_find_value (charmap, "U0000000B", 9);
+ if (seq == NULL)
+ {
+ if (!be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: character `%s' not defined while needed as default value"),
+ "LC_CTYPE", "<vertical-tab>"));
+ }
+ else if (seq->nbytes != 1)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: character `%s' in charmap not representable with one byte"),
+ "LC_CTYPE", "<vertical-tab>"));
+ else
+ ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
+
+ /* No need to search. */
+ ELEM (ctype, class_collection, , L'\v') |= BITw (tok_space);
+ }
+
+ if ((ctype->class_done & BITw (tok_xdigit)) == 0)
+ /* "If this keyword is not specified, the digits `0' to `9', the
+ uppercase letters `A' through `F', and the lowercase letters `a'
+ through `f', ..., shell automatically belong to this class, with
+ implementation defined character values." [P1003.2, 2.5.2.1] */
+ {
+ set_default (BITPOS (tok_xdigit), '0', '9');
+ set_default (BITPOS (tok_xdigit), 'A', 'F');
+ set_default (BITPOS (tok_xdigit), 'a', 'f');
+ }
+
+ if ((ctype->class_done & BITw (tok_blank)) == 0)
+ /* "If this keyword [blank] is unspecified, the characters <space> and
+ <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */
+ {
+ struct charseq *seq;
+
+ seq = charmap_find_value (charmap, "space", 5);
+ if (seq == NULL)
+ seq = charmap_find_value (charmap, "SP", 2);
+ if (seq == NULL)
+ seq = charmap_find_value (charmap, "U00000020", 9);
+ if (seq == NULL)
+ {
+ if (!be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: character `%s' not defined while needed as default value"),
+ "LC_CTYPE", "<space>"));
+ }
+ else if (seq->nbytes != 1)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: character `%s' in charmap not representable with one byte"),
+ "LC_CTYPE", "<space>"));
+ else
+ ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
+
+ /* No need to search. */
+ ELEM (ctype, class_collection, , L' ') |= BITw (tok_blank);
+
+
+ seq = charmap_find_value (charmap, "tab", 3);
+ if (seq == NULL)
+ seq = charmap_find_value (charmap, "U00000009", 9);
+ if (seq == NULL)
+ {
+ if (!be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: character `%s' not defined while needed as default value"),
+ "LC_CTYPE", "<tab>"));
+ }
+ else if (seq->nbytes != 1)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: character `%s' in charmap not representable with one byte"),
+ "LC_CTYPE", "<tab>"));
+ else
+ ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
+
+ /* No need to search. */
+ ELEM (ctype, class_collection, , L'\t') |= BITw (tok_blank);
+ }
+
+ if ((ctype->class_done & BITw (tok_graph)) == 0)
+ /* "If this keyword [graph] is not specified, characters specified for
+ the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
+ shall belong to this character class." [P1003.2, 2.5.2.1] */
+ {
+ unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
+ BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
+ unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
+ BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
+ BITw (tok_punct);
+
+ for (size_t cnt = 0; cnt < ctype->class_collection_act; ++cnt)
+ if ((ctype->class_collection[cnt] & maskw) != 0)
+ ctype->class_collection[cnt] |= BITw (tok_graph);
+
+ for (size_t cnt = 0; cnt < 256; ++cnt)
+ if ((ctype->class256_collection[cnt] & mask) != 0)
+ ctype->class256_collection[cnt] |= BIT (tok_graph);
+ }
+
+ if ((ctype->class_done & BITw (tok_print)) == 0)
+ /* "If this keyword [print] is not provided, characters specified for
+ the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
+ and the <space> character shall belong to this character class."
+ [P1003.2, 2.5.2.1] */
+ {
+ unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
+ BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
+ unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
+ BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
+ BITw (tok_punct);
+ struct charseq *seq;
+
+ for (size_t cnt = 0; cnt < ctype->class_collection_act; ++cnt)
+ if ((ctype->class_collection[cnt] & maskw) != 0)
+ ctype->class_collection[cnt] |= BITw (tok_print);
+
+ for (size_t cnt = 0; cnt < 256; ++cnt)
+ if ((ctype->class256_collection[cnt] & mask) != 0)
+ ctype->class256_collection[cnt] |= BIT (tok_print);
+
+
+ seq = charmap_find_value (charmap, "space", 5);
+ if (seq == NULL)
+ seq = charmap_find_value (charmap, "SP", 2);
+ if (seq == NULL)
+ seq = charmap_find_value (charmap, "U00000020", 9);
+ if (seq == NULL)
+ {
+ if (!be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: character `%s' not defined while needed as default value"),
+ "LC_CTYPE", "<space>"));
+ }
+ else if (seq->nbytes != 1)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: character `%s' in charmap not representable with one byte"),
+ "LC_CTYPE", "<space>"));
+ else
+ ctype->class256_collection[seq->bytes[0]] |= BIT (tok_print);
+
+ /* No need to search. */
+ ELEM (ctype, class_collection, , L' ') |= BITw (tok_print);
+ }
+
+ if (ctype->tomap_done[0] == 0)
+ /* "If this keyword [toupper] is not specified, the lowercase letters
+ `a' through `z', and their corresponding uppercase letters `A' to
+ `Z', ..., shall automatically be included, with implementation-
+ defined character values." [P1003.2, 2.5.2.1] */
+ {
+ char tmp[4];
+ int ch;
+
+ strcpy (tmp, "<?>");
+
+ for (ch = 'a'; ch <= 'z'; ++ch)
+ {
+ struct charseq *seq_from, *seq_to;
+
+ tmp[1] = (char) ch;
+
+ seq_from = charmap_find_value (charmap, &tmp[1], 1);
+ if (seq_from == NULL)
+ {
+ char buf[10];
+ sprintf (buf, "U%08X", ch);
+ seq_from = charmap_find_value (charmap, buf, 9);
+ }
+ if (seq_from == NULL)
+ {
+ if (!be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: character `%s' not defined while needed as default value"),
+ "LC_CTYPE", tmp));
+ }
+ else if (seq_from->nbytes != 1)
+ {
+ if (!be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: character `%s' needed as default value not representable with one byte"),
+ "LC_CTYPE", tmp));
+ }
+ else
+ {
+ /* This conversion is implementation defined. */
+ tmp[1] = (char) (ch + ('A' - 'a'));
+ seq_to = charmap_find_value (charmap, &tmp[1], 1);
+ if (seq_to == NULL)
+ {
+ char buf[10];
+ sprintf (buf, "U%08X", ch + ('A' - 'a'));
+ seq_to = charmap_find_value (charmap, buf, 9);
+ }
+ if (seq_to == NULL)
+ {
+ if (!be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: character `%s' not defined while needed as default value"),
+ "LC_CTYPE", tmp));
+ }
+ else if (seq_to->nbytes != 1)
+ {
+ if (!be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: character `%s' needed as default value not representable with one byte"),
+ "LC_CTYPE", tmp));
+ }
+ else
+ /* The index [0] is determined by the order of the
+ `ctype_map_newP' calls in `ctype_startup'. */
+ ctype->map256_collection[0][seq_from->bytes[0]]
+ = seq_to->bytes[0];
+ }
+
+ /* No need to search. */
+ ELEM (ctype, map_collection, [0], ch) = ch + ('A' - 'a');
+ }
+ }
+
+ if (ctype->tomap_done[1] == 0)
+ /* "If this keyword [tolower] is not specified, the mapping shall be
+ the reverse mapping of the one specified to `toupper'." [P1003.2] */
+ {
+ for (size_t cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
+ if (ctype->map_collection[0][cnt] != 0)
+ ELEM (ctype, map_collection, [1],
+ ctype->map_collection[0][cnt])
+ = ctype->charnames[cnt];
+
+ for (size_t cnt = 0; cnt < 256; ++cnt)
+ if (ctype->map256_collection[0][cnt] != 0)
+ ctype->map256_collection[1][ctype->map256_collection[0][cnt]] = cnt;
+ }
+
+ if (ctype->outdigits_act != 10)
+ {
+ if (ctype->outdigits_act != 0)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: field `%s' does not contain exactly ten entries"),
+ "LC_CTYPE", "outdigit"));
+
+ for (size_t cnt = ctype->outdigits_act; cnt < 10; ++cnt)
+ {
+ ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
+ (char *) digits + cnt,
+ 1);
+
+ if (ctype->mboutdigits[cnt] == NULL)
+ ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
+ longnames[cnt],
+ strlen (longnames[cnt]));
+
+ if (ctype->mboutdigits[cnt] == NULL)
+ ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
+ uninames[cnt], 9);
+
+ if (ctype->mboutdigits[cnt] == NULL)
+ {
+ /* Provide a replacement. */
+ WITH_CUR_LOCALE (error (0, 0, _("\
+no output digits defined and none of the standard names in the charmap")));
+
+ ctype->mboutdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
+ sizeof (struct charseq)
+ + 1);
+
+ /* This is better than nothing. */
+ ctype->mboutdigits[cnt]->bytes[0] = digits[cnt];
+ ctype->mboutdigits[cnt]->nbytes = 1;
+ }
+
+ ctype->wcoutdigits[cnt] = L'0' + cnt;
+ }
+
+ ctype->outdigits_act = 10;
+ }
+
+#undef set_default
+}
+
+
+/* Initialize. Assumes t->p and t->q have already been set. */
+static inline void
+wctype_table_init (struct wctype_table *t)
+{
+ t->level1 = NULL;
+ t->level1_alloc = t->level1_size = 0;
+ t->level2 = NULL;
+ t->level2_alloc = t->level2_size = 0;
+ t->level3 = NULL;
+ t->level3_alloc = t->level3_size = 0;
+}
+
+/* Retrieve an entry. */
+static inline int
+wctype_table_get (struct wctype_table *t, uint32_t wc)
+{
+ uint32_t index1 = wc >> (t->q + t->p + 5);
+ if (index1 < t->level1_size)
+ {
+ uint32_t lookup1 = t->level1[index1];
+ if (lookup1 != EMPTY)
+ {
+ uint32_t index2 = ((wc >> (t->p + 5)) & ((1 << t->q) - 1))
+ + (lookup1 << t->q);
+ uint32_t lookup2 = t->level2[index2];
+ if (lookup2 != EMPTY)
+ {
+ uint32_t index3 = ((wc >> 5) & ((1 << t->p) - 1))
+ + (lookup2 << t->p);
+ uint32_t lookup3 = t->level3[index3];
+ uint32_t index4 = wc & 0x1f;
+
+ return (lookup3 >> index4) & 1;
+ }
+ }
+ }
+ return 0;
+}
+
+/* Add one entry. */
+static void
+wctype_table_add (struct wctype_table *t, uint32_t wc)
+{
+ uint32_t index1 = wc >> (t->q + t->p + 5);
+ uint32_t index2 = (wc >> (t->p + 5)) & ((1 << t->q) - 1);
+ uint32_t index3 = (wc >> 5) & ((1 << t->p) - 1);
+ uint32_t index4 = wc & 0x1f;
+ size_t i, i1, i2;
+
+ if (index1 >= t->level1_size)
+ {
+ if (index1 >= t->level1_alloc)
+ {
+ size_t alloc = 2 * t->level1_alloc;
+ if (alloc <= index1)
+ alloc = index1 + 1;
+ t->level1 = (uint32_t *) xrealloc ((char *) t->level1,
+ alloc * sizeof (uint32_t));
+ t->level1_alloc = alloc;
+ }
+ while (index1 >= t->level1_size)
+ t->level1[t->level1_size++] = EMPTY;
+ }
+
+ if (t->level1[index1] == EMPTY)
+ {
+ if (t->level2_size == t->level2_alloc)
+ {
+ size_t alloc = 2 * t->level2_alloc + 1;
+ t->level2 = (uint32_t *) xrealloc ((char *) t->level2,
+ (alloc << t->q) * sizeof (uint32_t));
+ t->level2_alloc = alloc;
+ }
+ i1 = t->level2_size << t->q;
+ i2 = (t->level2_size + 1) << t->q;
+ for (i = i1; i < i2; i++)
+ t->level2[i] = EMPTY;
+ t->level1[index1] = t->level2_size++;
+ }
+
+ index2 += t->level1[index1] << t->q;
+
+ if (t->level2[index2] == EMPTY)
+ {
+ if (t->level3_size == t->level3_alloc)
+ {
+ size_t alloc = 2 * t->level3_alloc + 1;
+ t->level3 = (uint32_t *) xrealloc ((char *) t->level3,
+ (alloc << t->p) * sizeof (uint32_t));
+ t->level3_alloc = alloc;
+ }
+ i1 = t->level3_size << t->p;
+ i2 = (t->level3_size + 1) << t->p;
+ for (i = i1; i < i2; i++)
+ t->level3[i] = 0;
+ t->level2[index2] = t->level3_size++;
+ }
+
+ index3 += t->level2[index2] << t->p;
+
+ t->level3[index3] |= (uint32_t)1 << index4;
+}
+
+/* Finalize and shrink. */
+static void
+add_locale_wctype_table (struct locale_file *file, struct wctype_table *t)
+{
+ size_t i, j, k;
+ uint32_t reorder3[t->level3_size];
+ uint32_t reorder2[t->level2_size];
+ uint32_t level2_offset, level3_offset;
+
+ /* Uniquify level3 blocks. */
+ k = 0;
+ for (j = 0; j < t->level3_size; j++)
+ {
+ for (i = 0; i < k; i++)
+ if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p],
+ (1 << t->p) * sizeof (uint32_t)) == 0)
+ break;
+ /* Relocate block j to block i. */
+ reorder3[j] = i;
+ if (i == k)
+ {
+ if (i != j)
+ memcpy (&t->level3[i << t->p], &t->level3[j << t->p],
+ (1 << t->p) * sizeof (uint32_t));
+ k++;
+ }
+ }
+ t->level3_size = k;
+
+ for (i = 0; i < (t->level2_size << t->q); i++)
+ if (t->level2[i] != EMPTY)
+ t->level2[i] = reorder3[t->level2[i]];
+
+ /* Uniquify level2 blocks. */
+ k = 0;
+ for (j = 0; j < t->level2_size; j++)
+ {
+ for (i = 0; i < k; i++)
+ if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q],
+ (1 << t->q) * sizeof (uint32_t)) == 0)
+ break;
+ /* Relocate block j to block i. */
+ reorder2[j] = i;
+ if (i == k)
+ {
+ if (i != j)
+ memcpy (&t->level2[i << t->q], &t->level2[j << t->q],
+ (1 << t->q) * sizeof (uint32_t));
+ k++;
+ }
+ }
+ t->level2_size = k;
+
+ for (i = 0; i < t->level1_size; i++)
+ if (t->level1[i] != EMPTY)
+ t->level1[i] = reorder2[t->level1[i]];
+
+ t->result_size =
+ 5 * sizeof (uint32_t)
+ + t->level1_size * sizeof (uint32_t)
+ + (t->level2_size << t->q) * sizeof (uint32_t)
+ + (t->level3_size << t->p) * sizeof (uint32_t);
+
+ level2_offset =
+ 5 * sizeof (uint32_t)
+ + t->level1_size * sizeof (uint32_t);
+ level3_offset =
+ 5 * sizeof (uint32_t)
+ + t->level1_size * sizeof (uint32_t)
+ + (t->level2_size << t->q) * sizeof (uint32_t);
+
+ start_locale_structure (file);
+ add_locale_uint32 (file, t->q + t->p + 5);
+ add_locale_uint32 (file, t->level1_size);
+ add_locale_uint32 (file, t->p + 5);
+ add_locale_uint32 (file, (1 << t->q) - 1);
+ add_locale_uint32 (file, (1 << t->p) - 1);
+
+ for (i = 0; i < t->level1_size; i++)
+ add_locale_uint32
+ (file,
+ t->level1[i] == EMPTY
+ ? 0
+ : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset);
+
+ for (i = 0; i < (t->level2_size << t->q); i++)
+ add_locale_uint32
+ (file,
+ t->level2[i] == EMPTY
+ ? 0
+ : (t->level2[i] << t->p) * sizeof (uint32_t) + level3_offset);
+
+ add_locale_uint32_array (file, t->level3, t->level3_size << t->p);
+ end_locale_structure (file);
+
+ if (t->level1_alloc > 0)
+ free (t->level1);
+ if (t->level2_alloc > 0)
+ free (t->level2);
+ if (t->level3_alloc > 0)
+ free (t->level3);
+}
+
+/* Flattens the included transliterations into a translit list.
+ Inserts them in the list at `cursor', and returns the new cursor. */
+static struct translit_t **
+translit_flatten (struct locale_ctype_t *ctype,
+ const struct charmap_t *charmap,
+ struct translit_t **cursor)
+{
+ while (ctype->translit_include != NULL)
+ {
+ const char *copy_locale = ctype->translit_include->copy_locale;
+ const char *copy_repertoire = ctype->translit_include->copy_repertoire;
+ struct localedef_t *other;
+
+ /* Unchain the include statement. During the depth-first traversal
+ we don't want to visit any locale more than once. */
+ ctype->translit_include = ctype->translit_include->next;
+
+ other = find_locale (LC_CTYPE, copy_locale, copy_repertoire, charmap);
+
+ if (other == NULL || other->categories[LC_CTYPE].ctype == NULL)
+ {
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: transliteration data from locale `%s' not available"),
+ "LC_CTYPE", copy_locale));
+ }
+ else
+ {
+ struct locale_ctype_t *other_ctype =
+ other->categories[LC_CTYPE].ctype;
+
+ cursor = translit_flatten (other_ctype, charmap, cursor);
+ assert (other_ctype->translit_include == NULL);
+
+ if (other_ctype->translit != NULL)
+ {
+ /* Insert the other_ctype->translit list at *cursor. */
+ struct translit_t *endp = other_ctype->translit;
+ while (endp->next != NULL)
+ endp = endp->next;
+
+ endp->next = *cursor;
+ *cursor = other_ctype->translit;
+
+ /* Avoid any risk of circular lists. */
+ other_ctype->translit = NULL;
+
+ cursor = &endp->next;
+ }
+
+ if (ctype->default_missing == NULL)
+ ctype->default_missing = other_ctype->default_missing;
+ }
+ }
+
+ return cursor;
+}
+
+static void
+allocate_arrays (struct locale_ctype_t *ctype, const struct charmap_t *charmap,
+ struct repertoire_t *repertoire)
+{
+ size_t idx, nr;
+ const void *key;
+ size_t len;
+ void *vdata;
+ void *curs;
+
+ /* You wonder about this amount of memory? This is only because some
+ users do not manage to address the array with unsigned values or
+ data types with range >= 256. '\200' would result in the array
+ index -128. To help these poor people we duplicate the entries for
+ 128 up to 255 below the entry for \0. */
+ ctype->ctype_b = (char_class_t *) xcalloc (256 + 128, sizeof (char_class_t));
+ ctype->ctype32_b = (char_class32_t *) xcalloc (256, sizeof (char_class32_t));
+ ctype->class_b = (uint32_t **)
+ xmalloc (ctype->nr_charclass * sizeof (uint32_t *));
+ ctype->class_3level = (struct wctype_table *)
+ xmalloc (ctype->nr_charclass * sizeof (struct wctype_table));
+
+ /* This is the array accessed using the multibyte string elements. */
+ for (idx = 0; idx < 256; ++idx)
+ ctype->ctype_b[128 + idx] = ctype->class256_collection[idx];
+
+ /* Mirror first 127 entries. We must take care that entry -1 is not
+ mirrored because EOF == -1. */
+ for (idx = 0; idx < 127; ++idx)
+ ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
+
+ /* The 32 bit array contains all characters < 0x100. */
+ for (idx = 0; idx < ctype->class_collection_act; ++idx)
+ if (ctype->charnames[idx] < 0x100)
+ ctype->ctype32_b[ctype->charnames[idx]] = ctype->class_collection[idx];
+
+ for (nr = 0; nr < ctype->nr_charclass; nr++)
+ {
+ ctype->class_b[nr] = (uint32_t *) xcalloc (256 / 32, sizeof (uint32_t));
+
+ /* We only set CLASS_B for the bits in the ISO C classes, not
+ the user defined classes. The number should not change but
+ who knows. */
+#define LAST_ISO_C_BIT 11
+ if (nr <= LAST_ISO_C_BIT)
+ for (idx = 0; idx < 256; ++idx)
+ if (ctype->class256_collection[idx] & _ISbit (nr))
+ ctype->class_b[nr][idx >> 5] |= (uint32_t) 1 << (idx & 0x1f);
+ }
+
+ for (nr = 0; nr < ctype->nr_charclass; nr++)
+ {
+ struct wctype_table *t;
+
+ t = &ctype->class_3level[nr];
+ t->p = 4; /* or: 5 */
+ t->q = 7; /* or: 6 */
+ wctype_table_init (t);
+
+ for (idx = 0; idx < ctype->class_collection_act; ++idx)
+ if (ctype->class_collection[idx] & _ISwbit (nr))
+ wctype_table_add (t, ctype->charnames[idx]);
+
+ if (verbose)
+ WITH_CUR_LOCALE (fprintf (stderr, _("\
+%s: table for class \"%s\": %lu bytes\n"),
+ "LC_CTYPE", ctype->classnames[nr],
+ (unsigned long int) t->result_size));
+ }
+
+ /* Room for table of mappings. */
+ ctype->map_b = (uint32_t **) xmalloc (2 * sizeof (uint32_t *));
+ ctype->map32_b = (uint32_t **) xmalloc (ctype->map_collection_nr
+ * sizeof (uint32_t *));
+ ctype->map_3level = (struct wctrans_table *)
+ xmalloc (ctype->map_collection_nr * sizeof (struct wctrans_table));
+
+ /* Fill in all mappings. */
+ for (idx = 0; idx < 2; ++idx)
+ {
+ unsigned int idx2;
+
+ /* Allocate table. */
+ ctype->map_b[idx] = (uint32_t *)
+ xmalloc ((256 + 128) * sizeof (uint32_t));
+
+ /* Copy values from collection. */
+ for (idx2 = 0; idx2 < 256; ++idx2)
+ ctype->map_b[idx][128 + idx2] = ctype->map256_collection[idx][idx2];
+
+ /* Mirror first 127 entries. We must take care not to map entry
+ -1 because EOF == -1. */
+ for (idx2 = 0; idx2 < 127; ++idx2)
+ ctype->map_b[idx][idx2] = ctype->map_b[idx][256 + idx2];
+
+ /* EOF must map to EOF. */
+ ctype->map_b[idx][127] = EOF;
+ }
+
+ for (idx = 0; idx < ctype->map_collection_nr; ++idx)
+ {
+ unsigned int idx2;
+
+ /* Allocate table. */
+ ctype->map32_b[idx] = (uint32_t *) xmalloc (256 * sizeof (uint32_t));
+
+ /* Copy values from collection. Default is identity mapping. */
+ for (idx2 = 0; idx2 < 256; ++idx2)
+ ctype->map32_b[idx][idx2] =
+ (ctype->map_collection[idx][idx2] != 0
+ ? ctype->map_collection[idx][idx2]
+ : idx2);
+ }
+
+ for (nr = 0; nr < ctype->map_collection_nr; nr++)
+ {
+ struct wctrans_table *t;
+
+ t = &ctype->map_3level[nr];
+ t->p = 7;
+ t->q = 9;
+ wctrans_table_init (t);
+
+ for (idx = 0; idx < ctype->map_collection_act[nr]; ++idx)
+ if (ctype->map_collection[nr][idx] != 0)
+ wctrans_table_add (t, ctype->charnames[idx],
+ ctype->map_collection[nr][idx]);
+
+ if (verbose)
+ WITH_CUR_LOCALE (fprintf (stderr, _("\
+%s: table for map \"%s\": %lu bytes\n"),
+ "LC_CTYPE", ctype->mapnames[nr],
+ (unsigned long int) t->result_size));
+ }
+
+ /* Extra array for class and map names. */
+ ctype->class_name_ptr = (uint32_t *) xmalloc (ctype->nr_charclass
+ * sizeof (uint32_t));
+ ctype->map_name_ptr = (uint32_t *) xmalloc (ctype->map_collection_nr
+ * sizeof (uint32_t));
+
+ ctype->class_offset = _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
+ ctype->map_offset = ctype->class_offset + ctype->nr_charclass;
+
+ /* Array for width information. Because the expected widths are very
+ small (never larger than 2) we use only one single byte. This
+ saves space.
+ We put only printable characters in the table. wcwidth is specified
+ to return -1 for non-printable characters. Doing the check here
+ saves a run-time check.
+ But we put L'\0' in the table. This again saves a run-time check. */
+ {
+ struct wcwidth_table *t;
+
+ t = &ctype->width;
+ t->p = 7;
+ t->q = 9;
+ wcwidth_table_init (t);
+
+ /* First set all the printable characters of the character set to
+ the default width. */
+ curs = NULL;
+ while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
+ {
+ struct charseq *data = (struct charseq *) vdata;
+
+ if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
+ data->ucs4 = repertoire_find_value (ctype->repertoire,
+ data->name, len);
+
+ if (data->ucs4 != ILLEGAL_CHAR_VALUE)
+ {
+ uint32_t *class_bits =
+ find_idx (ctype, &ctype->class_collection, NULL,
+ &ctype->class_collection_act, data->ucs4);
+
+ if (class_bits != NULL && (*class_bits & BITw (tok_print)))
+ wcwidth_table_add (t, data->ucs4, charmap->width_default);
+ }
+ }
+
+ /* Now add the explicitly specified widths. */
+ if (charmap->width_rules != NULL)
+ for (size_t cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
+ {
+ unsigned char bytes[charmap->mb_cur_max];
+ int nbytes = charmap->width_rules[cnt].from->nbytes;
+
+ /* We have the range of character for which the width is
+ specified described using byte sequences of the multibyte
+ charset. We have to convert this to UCS4 now. And we
+ cannot simply convert the beginning and the end of the
+ sequence, we have to iterate over the byte sequence and
+ convert it for every single character. */
+ memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
+
+ while (nbytes < charmap->width_rules[cnt].to->nbytes
+ || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
+ nbytes) <= 0)
+ {
+ /* Find the UCS value for `bytes'. */
+ int inner;
+ uint32_t wch;
+ struct charseq *seq =
+ charmap_find_symbol (charmap, (char *) bytes, nbytes);
+
+ if (seq == NULL)
+ wch = ILLEGAL_CHAR_VALUE;
+ else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
+ wch = seq->ucs4;
+ else
+ wch = repertoire_find_value (ctype->repertoire, seq->name,
+ strlen (seq->name));
+
+ if (wch != ILLEGAL_CHAR_VALUE)
+ {
+ /* Store the value. */
+ uint32_t *class_bits =
+ find_idx (ctype, &ctype->class_collection, NULL,
+ &ctype->class_collection_act, wch);
+
+ if (class_bits != NULL && (*class_bits & BITw (tok_print)))
+ wcwidth_table_add (t, wch,
+ charmap->width_rules[cnt].width);
+ }
+
+ /* "Increment" the bytes sequence. */
+ inner = nbytes - 1;
+ while (inner >= 0 && bytes[inner] == 0xff)
+ --inner;
+
+ if (inner < 0)
+ {
+ /* We have to extend the byte sequence. */
+ if (nbytes >= charmap->width_rules[cnt].to->nbytes)
+ break;
+
+ bytes[0] = 1;
+ memset (&bytes[1], 0, nbytes);
+ ++nbytes;
+ }
+ else
+ {
+ ++bytes[inner];
+ while (++inner < nbytes)
+ bytes[inner] = 0;
+ }
+ }
+ }
+
+ /* Set the width of L'\0' to 0. */
+ wcwidth_table_add (t, 0, 0);
+
+ if (verbose)
+ WITH_CUR_LOCALE (fprintf (stderr, _("%s: table for width: %lu bytes\n"),
+ "LC_CTYPE", (unsigned long int) t->result_size));
+ }
+
+ /* Set MB_CUR_MAX. */
+ ctype->mb_cur_max = charmap->mb_cur_max;
+
+ /* Now determine the table for the transliteration information.
+
+ XXX It is not yet clear to me whether it is worth implementing a
+ complicated algorithm which uses a hash table to locate the entries.
+ For now I'll use a simple array which can be searching using binary
+ search. */
+ if (ctype->translit_include != NULL)
+ /* Traverse the locales mentioned in the `include' statements in a
+ depth-first way and fold in their transliteration information. */
+ translit_flatten (ctype, charmap, &ctype->translit);
+
+ if (ctype->translit != NULL)
+ {
+ /* First count how many entries we have. This is the upper limit
+ since some entries from the included files might be overwritten. */
+ size_t number = 0;
+ struct translit_t *runp = ctype->translit;
+ struct translit_t **sorted;
+ size_t from_len, to_len;
+
+ while (runp != NULL)
+ {
+ ++number;
+ runp = runp->next;
+ }
+
+ /* Next we allocate an array large enough and fill in the values. */
+ sorted = (struct translit_t **) alloca (number
+ * sizeof (struct translit_t **));
+ runp = ctype->translit;
+ number = 0;
+ do
+ {
+ /* Search for the place where to insert this string.
+ XXX Better use a real sorting algorithm later. */
+ size_t idx = 0;
+ int replace = 0;
+
+ while (idx < number)
+ {
+ int res = wcscmp ((const wchar_t *) sorted[idx]->from,
+ (const wchar_t *) runp->from);
+ if (res == 0)
+ {
+ replace = 1;
+ break;
+ }
+ if (res > 0)
+ break;
+ ++idx;
+ }
+
+ if (replace)
+ sorted[idx] = runp;
+ else
+ {
+ memmove (&sorted[idx + 1], &sorted[idx],
+ (number - idx) * sizeof (struct translit_t *));
+ sorted[idx] = runp;
+ ++number;
+ }
+
+ runp = runp->next;
+ }
+ while (runp != NULL);
+
+ /* The next step is putting all the possible transliteration
+ strings in one memory block so that we can write it out.
+ We need several different blocks:
+ - index to the from-string array
+ - from-string array
+ - index to the to-string array
+ - to-string array.
+ */
+ from_len = to_len = 0;
+ for (size_t cnt = 0; cnt < number; ++cnt)
+ {
+ struct translit_to_t *srunp;
+ from_len += wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
+ srunp = sorted[cnt]->to;
+ while (srunp != NULL)
+ {
+ to_len += wcslen ((const wchar_t *) srunp->str) + 1;
+ srunp = srunp->next;
+ }
+ /* Plus one for the extra NUL character marking the end of
+ the list for the current entry. */
+ ++to_len;
+ }
+
+ /* We can allocate the arrays for the results. */
+ ctype->translit_from_idx = xmalloc (number * sizeof (uint32_t));
+ ctype->translit_from_tbl = xmalloc (from_len * sizeof (uint32_t));
+ ctype->translit_to_idx = xmalloc (number * sizeof (uint32_t));
+ ctype->translit_to_tbl = xmalloc (to_len * sizeof (uint32_t));
+
+ from_len = 0;
+ to_len = 0;
+ for (size_t cnt = 0; cnt < number; ++cnt)
+ {
+ size_t len;
+ struct translit_to_t *srunp;
+
+ ctype->translit_from_idx[cnt] = from_len;
+ ctype->translit_to_idx[cnt] = to_len;
+
+ len = wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
+ wmemcpy ((wchar_t *) &ctype->translit_from_tbl[from_len],
+ (const wchar_t *) sorted[cnt]->from, len);
+ from_len += len;
+
+ ctype->translit_to_idx[cnt] = to_len;
+ srunp = sorted[cnt]->to;
+ while (srunp != NULL)
+ {
+ len = wcslen ((const wchar_t *) srunp->str) + 1;
+ wmemcpy ((wchar_t *) &ctype->translit_to_tbl[to_len],
+ (const wchar_t *) srunp->str, len);
+ to_len += len;
+ srunp = srunp->next;
+ }
+ ctype->translit_to_tbl[to_len++] = L'\0';
+ }
+
+ /* Store the information about the length. */
+ ctype->translit_idx_size = number;
+ ctype->translit_from_tbl_size = from_len * sizeof (uint32_t);
+ ctype->translit_to_tbl_size = to_len * sizeof (uint32_t);
+ }
+ else
+ {
+ ctype->translit_from_idx = no_str;
+ ctype->translit_from_tbl = no_str;
+ ctype->translit_to_tbl = no_str;
+ ctype->translit_idx_size = 0;
+ ctype->translit_from_tbl_size = 0;
+ ctype->translit_to_tbl_size = 0;
+ }
+}
diff --git a/REORG.TODO/locale/programs/ld-identification.c b/REORG.TODO/locale/programs/ld-identification.c
new file mode 100644
index 0000000000..3e3ea649d7
--- /dev/null
+++ b/REORG.TODO/locale/programs/ld-identification.c
@@ -0,0 +1,416 @@
+/* Copyright (C) 1998-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <error.h>
+#include <langinfo.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <sys/uio.h>
+
+#include <assert.h>
+
+#include "localedef.h"
+#include "localeinfo.h"
+#include "locfile.h"
+
+
+/* The real definition of the struct for the LC_IDENTIFICATION locale. */
+struct locale_identification_t
+{
+ const char *title;
+ const char *source;
+ const char *address;
+ const char *contact;
+ const char *email;
+ const char *tel;
+ const char *fax;
+ const char *language;
+ const char *territory;
+ const char *audience;
+ const char *application;
+ const char *abbreviation;
+ const char *revision;
+ const char *date;
+ const char *category[__LC_LAST];
+};
+
+
+static const char *category_name[__LC_LAST] =
+{
+ [LC_CTYPE] = "LC_CTYPE",
+ [LC_NUMERIC] = "LC_NUMERIC",
+ [LC_TIME] = "LC_TIME",
+ [LC_COLLATE] = "LC_COLLATE",
+ [LC_MONETARY] = "LC_MONETARY",
+ [LC_MESSAGES] = "LC_MESSAGES",
+ [LC_ALL] = "LC_ALL",
+ [LC_PAPER] = "LC_PAPER",
+ [LC_NAME] = "LC_NAME",
+ [LC_ADDRESS] = "LC_ADDRESS",
+ [LC_TELEPHONE] = "LC_TELEPHONE",
+ [LC_MEASUREMENT] = "LC_MEASUREMENT",
+ [LC_IDENTIFICATION] = "LC_IDENTIFICATION"
+};
+
+
+static void
+identification_startup (struct linereader *lr, struct localedef_t *locale,
+ int ignore_content)
+{
+ if (!ignore_content)
+ {
+ locale->categories[LC_IDENTIFICATION].identification =
+ (struct locale_identification_t *)
+ xcalloc (1, sizeof (struct locale_identification_t));
+
+ locale->categories[LC_IDENTIFICATION].identification->category[LC_ALL] =
+ "";
+ }
+
+ if (lr != NULL)
+ {
+ lr->translate_strings = 1;
+ lr->return_widestr = 0;
+ }
+}
+
+
+void
+identification_finish (struct localedef_t *locale,
+ const struct charmap_t *charmap)
+{
+ struct locale_identification_t *identification
+ = locale->categories[LC_IDENTIFICATION].identification;
+ int nothing = 0;
+ size_t num;
+
+ /* Now resolve copying and also handle completely missing definitions. */
+ if (identification == NULL)
+ {
+ /* First see whether we were supposed to copy. If yes, find the
+ actual definition. */
+ if (locale->copy_name[LC_IDENTIFICATION] != NULL)
+ {
+ /* Find the copying locale. This has to happen transitively since
+ the locale we are copying from might also copying another one. */
+ struct localedef_t *from = locale;
+
+ do
+ from = find_locale (LC_IDENTIFICATION,
+ from->copy_name[LC_IDENTIFICATION],
+ from->repertoire_name, charmap);
+ while (from->categories[LC_IDENTIFICATION].identification == NULL
+ && from->copy_name[LC_IDENTIFICATION] != NULL);
+
+ identification = locale->categories[LC_IDENTIFICATION].identification
+ = from->categories[LC_IDENTIFICATION].identification;
+ }
+
+ /* If there is still no definition issue an warning and create an
+ empty one. */
+ if (identification == NULL)
+ {
+ if (! be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+No definition for %s category found"), "LC_IDENTIFICATION"));
+ identification_startup (NULL, locale, 0);
+ identification
+ = locale->categories[LC_IDENTIFICATION].identification;
+ nothing = 1;
+ }
+ }
+
+#define TEST_ELEM(cat) \
+ if (identification->cat == NULL) \
+ { \
+ if (verbose && ! nothing) \
+ WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), \
+ "LC_IDENTIFICATION", #cat)); \
+ identification->cat = ""; \
+ }
+
+ TEST_ELEM (title);
+ TEST_ELEM (source);
+ TEST_ELEM (address);
+ TEST_ELEM (contact);
+ TEST_ELEM (email);
+ TEST_ELEM (tel);
+ TEST_ELEM (fax);
+ TEST_ELEM (language);
+ TEST_ELEM (territory);
+ TEST_ELEM (audience);
+ TEST_ELEM (application);
+ TEST_ELEM (abbreviation);
+ TEST_ELEM (revision);
+ TEST_ELEM (date);
+
+ for (num = 0; num < __LC_LAST; ++num)
+ {
+ /* We don't accept/parse this category, so skip it early. */
+ if (num == LC_ALL)
+ continue;
+
+ if (identification->category[num] == NULL)
+ {
+ if (verbose && ! nothing)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: no identification for category `%s'"),
+ "LC_IDENTIFICATION", category_name[num]));
+ identification->category[num] = "";
+ }
+ else
+ {
+ /* Only list the standards we care about. This is based on the
+ ISO 30112 WD10 [2014] standard which supersedes all previous
+ revisions of the ISO 14652 standard. */
+ static const char * const standards[] =
+ {
+ "posix:1993",
+ "i18n:2004",
+ "i18n:2012",
+ };
+ size_t i;
+ bool matched = false;
+
+ for (i = 0; i < sizeof (standards) / sizeof (standards[0]); ++i)
+ if (strcmp (identification->category[num], standards[i]) == 0)
+ matched = true;
+
+ if (matched != true)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: unknown standard `%s' for category `%s'"),
+ "LC_IDENTIFICATION",
+ identification->category[num],
+ category_name[num]));
+ }
+ }
+}
+
+
+void
+identification_output (struct localedef_t *locale,
+ const struct charmap_t *charmap,
+ const char *output_path)
+{
+ struct locale_identification_t *identification
+ = locale->categories[LC_IDENTIFICATION].identification;
+ struct locale_file file;
+ size_t num;
+
+ init_locale_data (&file, _NL_ITEM_INDEX (_NL_NUM_LC_IDENTIFICATION));
+ add_locale_string (&file, identification->title);
+ add_locale_string (&file, identification->source);
+ add_locale_string (&file, identification->address);
+ add_locale_string (&file, identification->contact);
+ add_locale_string (&file, identification->email);
+ add_locale_string (&file, identification->tel);
+ add_locale_string (&file, identification->fax);
+ add_locale_string (&file, identification->language);
+ add_locale_string (&file, identification->territory);
+ add_locale_string (&file, identification->audience);
+ add_locale_string (&file, identification->application);
+ add_locale_string (&file, identification->abbreviation);
+ add_locale_string (&file, identification->revision);
+ add_locale_string (&file, identification->date);
+ start_locale_structure (&file);
+ for (num = 0; num < __LC_LAST; ++num)
+ if (num != LC_ALL)
+ add_locale_string (&file, identification->category[num]);
+ end_locale_structure (&file);
+ add_locale_string (&file, charmap->code_set_name);
+ write_locale_data (output_path, LC_IDENTIFICATION, "LC_IDENTIFICATION",
+ &file);
+}
+
+
+/* The parser for the LC_IDENTIFICATION section of the locale definition. */
+void
+identification_read (struct linereader *ldfile, struct localedef_t *result,
+ const struct charmap_t *charmap, const char *repertoire_name,
+ int ignore_content)
+{
+ struct locale_identification_t *identification;
+ struct token *now;
+ struct token *arg;
+ struct token *cattok;
+ int category;
+ enum token_t nowtok;
+
+ /* The rest of the line containing `LC_IDENTIFICATION' must be free. */
+ lr_ignore_rest (ldfile, 1);
+
+ do
+ {
+ now = lr_token (ldfile, charmap, result, NULL, verbose);
+ nowtok = now->tok;
+ }
+ while (nowtok == tok_eol);
+
+ /* If we see `copy' now we are almost done. */
+ if (nowtok == tok_copy)
+ {
+ handle_copy (ldfile, charmap, repertoire_name, result,
+ tok_lc_identification, LC_IDENTIFICATION,
+ "LC_IDENTIFICATION", ignore_content);
+ return;
+ }
+
+ /* Prepare the data structures. */
+ identification_startup (ldfile, result, ignore_content);
+ identification = result->categories[LC_IDENTIFICATION].identification;
+
+ while (1)
+ {
+ /* Of course we don't proceed beyond the end of file. */
+ if (nowtok == tok_eof)
+ break;
+
+ /* Ignore empty lines. */
+ if (nowtok == tok_eol)
+ {
+ now = lr_token (ldfile, charmap, result, NULL, verbose);
+ nowtok = now->tok;
+ continue;
+ }
+
+ switch (nowtok)
+ {
+#define STR_ELEM(cat) \
+ case tok_##cat: \
+ /* Ignore the rest of the line if we don't need the input of \
+ this line. */ \
+ if (ignore_content) \
+ { \
+ lr_ignore_rest (ldfile, 0); \
+ break; \
+ } \
+ \
+ arg = lr_token (ldfile, charmap, result, NULL, verbose); \
+ if (arg->tok != tok_string) \
+ goto err_label; \
+ if (identification->cat != NULL) \
+ lr_error (ldfile, _("\
+%s: field `%s' declared more than once"), "LC_IDENTIFICATION", #cat); \
+ else if (!ignore_content && arg->val.str.startmb == NULL) \
+ { \
+ lr_error (ldfile, _("\
+%s: unknown character in field `%s'"), "LC_IDENTIFICATION", #cat); \
+ identification->cat = ""; \
+ } \
+ else if (!ignore_content) \
+ identification->cat = arg->val.str.startmb; \
+ break
+
+ STR_ELEM (title);
+ STR_ELEM (source);
+ STR_ELEM (address);
+ STR_ELEM (contact);
+ STR_ELEM (email);
+ STR_ELEM (tel);
+ STR_ELEM (fax);
+ STR_ELEM (language);
+ STR_ELEM (territory);
+ STR_ELEM (audience);
+ STR_ELEM (application);
+ STR_ELEM (abbreviation);
+ STR_ELEM (revision);
+ STR_ELEM (date);
+
+ case tok_category:
+ /* Ignore the rest of the line if we don't need the input of
+ this line. */
+ if (ignore_content)
+ {
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+
+ /* We expect two operands. */
+ arg = lr_token (ldfile, charmap, result, NULL, verbose);
+ if (arg->tok != tok_string && arg->tok != tok_ident)
+ goto err_label;
+ /* Next is a semicolon. */
+ cattok = lr_token (ldfile, charmap, result, NULL, verbose);
+ if (cattok->tok != tok_semicolon)
+ goto err_label;
+ /* Now a LC_xxx identifier. */
+ cattok = lr_token (ldfile, charmap, result, NULL, verbose);
+ switch (cattok->tok)
+ {
+#define CATEGORY(lname, uname) \
+ case tok_lc_##lname: \
+ category = LC_##uname; \
+ break
+
+ CATEGORY (identification, IDENTIFICATION);
+ CATEGORY (ctype, CTYPE);
+ CATEGORY (collate, COLLATE);
+ CATEGORY (time, TIME);
+ CATEGORY (numeric, NUMERIC);
+ CATEGORY (monetary, MONETARY);
+ CATEGORY (messages, MESSAGES);
+ CATEGORY (paper, PAPER);
+ CATEGORY (name, NAME);
+ CATEGORY (address, ADDRESS);
+ CATEGORY (telephone, TELEPHONE);
+ CATEGORY (measurement, MEASUREMENT);
+
+ default:
+ goto err_label;
+ }
+ if (identification->category[category] != NULL)
+ {
+ lr_error (ldfile, _("\
+%s: duplicate category version definition"), "LC_IDENTIFICATION");
+ free (arg->val.str.startmb);
+ }
+ else
+ identification->category[category] = arg->val.str.startmb;
+ break;
+
+ case tok_end:
+ /* Next we assume `LC_IDENTIFICATION'. */
+ arg = lr_token (ldfile, charmap, result, NULL, verbose);
+ if (arg->tok == tok_eof)
+ break;
+ if (arg->tok == tok_eol)
+ lr_error (ldfile, _("%s: incomplete `END' line"),
+ "LC_IDENTIFICATION");
+ else if (arg->tok != tok_lc_identification)
+ lr_error (ldfile, _("\
+%1$s: definition does not end with `END %1$s'"), "LC_IDENTIFICATION");
+ lr_ignore_rest (ldfile, arg->tok == tok_lc_identification);
+ return;
+
+ default:
+ err_label:
+ SYNTAX_ERROR (_("%s: syntax error"), "LC_IDENTIFICATION");
+ }
+
+ /* Prepare for the next round. */
+ now = lr_token (ldfile, charmap, result, NULL, verbose);
+ nowtok = now->tok;
+ }
+
+ /* When we come here we reached the end of the file. */
+ lr_error (ldfile, _("%s: premature end of file"), "LC_IDENTIFICATION");
+}
diff --git a/REORG.TODO/locale/programs/ld-measurement.c b/REORG.TODO/locale/programs/ld-measurement.c
new file mode 100644
index 0000000000..92c849ebfb
--- /dev/null
+++ b/REORG.TODO/locale/programs/ld-measurement.c
@@ -0,0 +1,233 @@
+/* Copyright (C) 1998-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <error.h>
+#include <langinfo.h>
+#include <string.h>
+#include <stdint.h>
+#include <sys/uio.h>
+
+#include <assert.h>
+
+#include "localedef.h"
+#include "localeinfo.h"
+#include "locfile.h"
+
+
+/* The real definition of the struct for the LC_MEASUREMENT locale. */
+struct locale_measurement_t
+{
+ unsigned char measurement;
+};
+
+
+static void
+measurement_startup (struct linereader *lr, struct localedef_t *locale,
+ int ignore_content)
+{
+ if (!ignore_content)
+ locale->categories[LC_MEASUREMENT].measurement =
+ (struct locale_measurement_t *)
+ xcalloc (1, sizeof (struct locale_measurement_t));
+
+ if (lr != NULL)
+ {
+ lr->translate_strings = 1;
+ lr->return_widestr = 0;
+ }
+}
+
+
+void
+measurement_finish (struct localedef_t *locale,
+ const struct charmap_t *charmap)
+{
+ struct locale_measurement_t *measurement =
+ locale->categories[LC_MEASUREMENT].measurement;
+ int nothing = 0;
+
+ /* Now resolve copying and also handle completely missing definitions. */
+ if (measurement == NULL)
+ {
+ /* First see whether we were supposed to copy. If yes, find the
+ actual definition. */
+ if (locale->copy_name[LC_MEASUREMENT] != NULL)
+ {
+ /* Find the copying locale. This has to happen transitively since
+ the locale we are copying from might also copying another one. */
+ struct localedef_t *from = locale;
+
+ do
+ from = find_locale (LC_MEASUREMENT,
+ from->copy_name[LC_MEASUREMENT],
+ from->repertoire_name, charmap);
+ while (from->categories[LC_MEASUREMENT].measurement == NULL
+ && from->copy_name[LC_MEASUREMENT] != NULL);
+
+ measurement = locale->categories[LC_MEASUREMENT].measurement
+ = from->categories[LC_MEASUREMENT].measurement;
+ }
+
+ /* If there is still no definition issue an warning and create an
+ empty one. */
+ if (measurement == NULL)
+ {
+ if (! be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+No definition for %s category found"), "LC_MEASUREMENT"));
+ measurement_startup (NULL, locale, 0);
+ measurement = locale->categories[LC_MEASUREMENT].measurement;
+ nothing = 1;
+ }
+ }
+
+ if (measurement->measurement == 0)
+ {
+ if (! nothing)
+ WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"),
+ "LC_MEASUREMENT", "measurement"));
+ /* Use as the default value the value of the i18n locale. */
+ measurement->measurement = 1;
+ }
+ else
+ {
+ if (measurement->measurement > 3)
+ WITH_CUR_LOCALE (error (0, 0, _("%s: invalid value for field `%s'"),
+ "LC_MEASUREMENT", "measurement"));
+ }
+}
+
+
+void
+measurement_output (struct localedef_t *locale,
+ const struct charmap_t *charmap, const char *output_path)
+{
+ struct locale_measurement_t *measurement =
+ locale->categories[LC_MEASUREMENT].measurement;
+ struct locale_file file;
+
+ init_locale_data (&file, _NL_ITEM_INDEX (_NL_NUM_LC_MEASUREMENT));
+ add_locale_char (&file, measurement->measurement);
+ add_locale_string (&file, charmap->code_set_name);
+ write_locale_data (output_path, LC_MEASUREMENT, "LC_MEASUREMENT", &file);
+}
+
+
+/* The parser for the LC_MEASUREMENT section of the locale definition. */
+void
+measurement_read (struct linereader *ldfile, struct localedef_t *result,
+ const struct charmap_t *charmap, const char *repertoire_name,
+ int ignore_content)
+{
+ struct locale_measurement_t *measurement;
+ struct token *now;
+ struct token *arg;
+ enum token_t nowtok;
+
+ /* The rest of the line containing `LC_MEASUREMENT' must be free. */
+ lr_ignore_rest (ldfile, 1);
+
+ do
+ {
+ now = lr_token (ldfile, charmap, result, NULL, verbose);
+ nowtok = now->tok;
+ }
+ while (nowtok == tok_eol);
+
+ /* If we see `copy' now we are almost done. */
+ if (nowtok == tok_copy)
+ {
+ handle_copy (ldfile, charmap, repertoire_name, result,
+ tok_lc_measurement, LC_MEASUREMENT, "LC_MEASUREMENT",
+ ignore_content);
+ return;
+ }
+
+ /* Prepare the data structures. */
+ measurement_startup (ldfile, result, ignore_content);
+ measurement = result->categories[LC_MEASUREMENT].measurement;
+
+ while (1)
+ {
+ /* Of course we don't proceed beyond the end of file. */
+ if (nowtok == tok_eof)
+ break;
+
+ /* Ingore empty lines. */
+ if (nowtok == tok_eol)
+ {
+ now = lr_token (ldfile, charmap, result, NULL, verbose);
+ nowtok = now->tok;
+ continue;
+ }
+
+ switch (nowtok)
+ {
+#define INT_ELEM(cat) \
+ case tok_##cat: \
+ /* Ignore the rest of the line if we don't need the input of \
+ this line. */ \
+ if (ignore_content) \
+ { \
+ lr_ignore_rest (ldfile, 0); \
+ break; \
+ } \
+ \
+ arg = lr_token (ldfile, charmap, result, NULL, verbose); \
+ if (arg->tok != tok_number) \
+ goto err_label; \
+ else if (measurement->cat != 0) \
+ lr_error (ldfile, _("%s: field `%s' declared more than once"), \
+ "LC_MEASUREMENT", #cat); \
+ else if (!ignore_content) \
+ measurement->cat = arg->val.num; \
+ break
+
+ INT_ELEM (measurement);
+
+ case tok_end:
+ /* Next we assume `LC_MEASUREMENT'. */
+ arg = lr_token (ldfile, charmap, result, NULL, verbose);
+ if (arg->tok == tok_eof)
+ break;
+ if (arg->tok == tok_eol)
+ lr_error (ldfile, _("%s: incomplete `END' line"),
+ "LC_MEASUREMENT");
+ else if (arg->tok != tok_lc_measurement)
+ lr_error (ldfile, _("\
+%1$s: definition does not end with `END %1$s'"), "LC_MEASUREMENT");
+ lr_ignore_rest (ldfile, arg->tok == tok_lc_measurement);
+ return;
+
+ default:
+ err_label:
+ SYNTAX_ERROR (_("%s: syntax error"), "LC_MEASUREMENT");
+ }
+
+ /* Prepare for the next round. */
+ now = lr_token (ldfile, charmap, result, NULL, verbose);
+ nowtok = now->tok;
+ }
+
+ /* When we come here we reached the end of the file. */
+ lr_error (ldfile, _("%s: premature end of file"),
+ "LC_MEASUREMENT");
+}
diff --git a/REORG.TODO/locale/programs/ld-messages.c b/REORG.TODO/locale/programs/ld-messages.c
new file mode 100644
index 0000000000..bc86ec0ccf
--- /dev/null
+++ b/REORG.TODO/locale/programs/ld-messages.c
@@ -0,0 +1,315 @@
+/* Copyright (C) 1995-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <langinfo.h>
+#include <sys/types.h>
+#include <regex.h>
+#include <string.h>
+#include <stdint.h>
+#include <sys/uio.h>
+
+#include <assert.h>
+
+#include "localedef.h"
+#include "linereader.h"
+#include "localeinfo.h"
+#include "locfile.h"
+
+
+/* The real definition of the struct for the LC_MESSAGES locale. */
+struct locale_messages_t
+{
+ const char *yesexpr;
+ const char *noexpr;
+ const char *yesstr;
+ const char *nostr;
+};
+
+
+static void
+messages_startup (struct linereader *lr, struct localedef_t *locale,
+ int ignore_content)
+{
+ if (!ignore_content)
+ locale->categories[LC_MESSAGES].messages =
+ (struct locale_messages_t *) xcalloc (1,
+ sizeof (struct locale_messages_t));
+
+ if (lr != NULL)
+ {
+ lr->translate_strings = 1;
+ lr->return_widestr = 0;
+ }
+}
+
+
+void
+messages_finish (struct localedef_t *locale, const struct charmap_t *charmap)
+{
+ struct locale_messages_t *messages
+ = locale->categories[LC_MESSAGES].messages;
+ int nothing = 0;
+
+ /* Now resolve copying and also handle completely missing definitions. */
+ if (messages == NULL)
+ {
+ /* First see whether we were supposed to copy. If yes, find the
+ actual definition. */
+ if (locale->copy_name[LC_MESSAGES] != NULL)
+ {
+ /* Find the copying locale. This has to happen transitively since
+ the locale we are copying from might also copying another one. */
+ struct localedef_t *from = locale;
+
+ do
+ from = find_locale (LC_MESSAGES, from->copy_name[LC_MESSAGES],
+ from->repertoire_name, charmap);
+ while (from->categories[LC_MESSAGES].messages == NULL
+ && from->copy_name[LC_MESSAGES] != NULL);
+
+ messages = locale->categories[LC_MESSAGES].messages
+ = from->categories[LC_MESSAGES].messages;
+ }
+
+ /* If there is still no definition issue an warning and create an
+ empty one. */
+ if (messages == NULL)
+ {
+ if (! be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+No definition for %s category found"), "LC_MESSAGES"));
+ messages_startup (NULL, locale, 0);
+ messages = locale->categories[LC_MESSAGES].messages;
+ nothing = 1;
+ }
+ }
+
+ /* The fields YESSTR and NOSTR are optional. */
+ if (messages->yesstr == NULL)
+ messages->yesstr = "";
+ if (messages->nostr == NULL)
+ messages->nostr = "";
+
+ if (messages->yesexpr == NULL)
+ {
+ if (! be_quiet && ! nothing)
+ WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' undefined"),
+ "LC_MESSAGES", "yesexpr"));
+ messages->yesexpr = "^[yY]";
+ }
+ else if (messages->yesexpr[0] == '\0')
+ {
+ if (!be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: value for field `%s' must not be an empty string"),
+ "LC_MESSAGES", "yesexpr"));
+ }
+ else
+ {
+ int result;
+ regex_t re;
+
+ /* Test whether it are correct regular expressions. */
+ result = regcomp (&re, messages->yesexpr, REG_EXTENDED);
+ if (result != 0 && !be_quiet)
+ {
+ char errbuf[BUFSIZ];
+
+ (void) regerror (result, &re, errbuf, BUFSIZ);
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: no correct regular expression for field `%s': %s"),
+ "LC_MESSAGES", "yesexpr", errbuf));
+ }
+ else if (result != 0)
+ regfree (&re);
+ }
+
+ if (messages->noexpr == NULL)
+ {
+ if (! be_quiet && ! nothing)
+ WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' undefined"),
+ "LC_MESSAGES", "noexpr"));
+ messages->noexpr = "^[nN]";
+ }
+ else if (messages->noexpr[0] == '\0')
+ {
+ if (!be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: value for field `%s' must not be an empty string"),
+ "LC_MESSAGES", "noexpr"));
+ }
+ else
+ {
+ int result;
+ regex_t re;
+
+ /* Test whether it are correct regular expressions. */
+ result = regcomp (&re, messages->noexpr, REG_EXTENDED);
+ if (result != 0 && !be_quiet)
+ {
+ char errbuf[BUFSIZ];
+
+ (void) regerror (result, &re, errbuf, BUFSIZ);
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: no correct regular expression for field `%s': %s"),
+ "LC_MESSAGES", "noexpr", errbuf));
+ }
+ else if (result != 0)
+ regfree (&re);
+ }
+}
+
+
+void
+messages_output (struct localedef_t *locale, const struct charmap_t *charmap,
+ const char *output_path)
+{
+ struct locale_messages_t *messages
+ = locale->categories[LC_MESSAGES].messages;
+ struct locale_file file;
+
+ init_locale_data (&file, _NL_ITEM_INDEX (_NL_NUM_LC_MESSAGES));
+ add_locale_string (&file, messages->yesexpr);
+ add_locale_string (&file, messages->noexpr);
+ add_locale_string (&file, messages->yesstr);
+ add_locale_string (&file, messages->nostr);
+ add_locale_string (&file, charmap->code_set_name);
+ write_locale_data (output_path, LC_MESSAGES, "LC_MESSAGES", &file);
+}
+
+
+/* The parser for the LC_MESSAGES section of the locale definition. */
+void
+messages_read (struct linereader *ldfile, struct localedef_t *result,
+ const struct charmap_t *charmap, const char *repertoire_name,
+ int ignore_content)
+{
+ struct repertoire_t *repertoire = NULL;
+ struct locale_messages_t *messages;
+ struct token *now;
+ enum token_t nowtok;
+
+ /* Get the repertoire we have to use. */
+ if (repertoire_name != NULL)
+ repertoire = repertoire_read (repertoire_name);
+
+ /* The rest of the line containing `LC_MESSAGES' must be free. */
+ lr_ignore_rest (ldfile, 1);
+
+
+ do
+ {
+ now = lr_token (ldfile, charmap, result, NULL, verbose);
+ nowtok = now->tok;
+ }
+ while (nowtok == tok_eol);
+
+ /* If we see `copy' now we are almost done. */
+ if (nowtok == tok_copy)
+ {
+ handle_copy (ldfile, charmap, repertoire_name, result, tok_lc_messages,
+ LC_MESSAGES, "LC_MESSAGES", ignore_content);
+ return;
+ }
+
+ /* Prepare the data structures. */
+ messages_startup (ldfile, result, ignore_content);
+ messages = result->categories[LC_MESSAGES].messages;
+
+ while (1)
+ {
+ struct token *arg;
+
+ /* Of course we don't proceed beyond the end of file. */
+ if (nowtok == tok_eof)
+ break;
+
+ /* Ignore empty lines. */
+ if (nowtok == tok_eol)
+ {
+ now = lr_token (ldfile, charmap, result, NULL, verbose);
+ nowtok = now->tok;
+ continue;
+ }
+
+ switch (nowtok)
+ {
+#define STR_ELEM(cat) \
+ case tok_##cat: \
+ /* Ignore the rest of the line if we don't need the input of \
+ this line. */ \
+ if (ignore_content) \
+ { \
+ lr_ignore_rest (ldfile, 0); \
+ break; \
+ } \
+ \
+ if (messages->cat != NULL) \
+ { \
+ lr_error (ldfile, _("\
+%s: field `%s' declared more than once"), "LC_MESSAGES", #cat); \
+ lr_ignore_rest (ldfile, 0); \
+ break; \
+ } \
+ now = lr_token (ldfile, charmap, result, repertoire, verbose); \
+ if (now->tok != tok_string) \
+ goto syntax_error; \
+ else if (!ignore_content && now->val.str.startmb == NULL) \
+ { \
+ lr_error (ldfile, _("\
+%s: unknown character in field `%s'"), "LC_MESSAGES", #cat); \
+ messages->cat = ""; \
+ } \
+ else if (!ignore_content) \
+ messages->cat = now->val.str.startmb; \
+ break
+
+ STR_ELEM (yesexpr);
+ STR_ELEM (noexpr);
+ STR_ELEM (yesstr);
+ STR_ELEM (nostr);
+
+ case tok_end:
+ /* Next we assume `LC_MESSAGES'. */
+ arg = lr_token (ldfile, charmap, result, NULL, verbose);
+ if (arg->tok == tok_eof)
+ break;
+ if (arg->tok == tok_eol)
+ lr_error (ldfile, _("%s: incomplete `END' line"), "LC_MESSAGES");
+ else if (arg->tok != tok_lc_messages)
+ lr_error (ldfile, _("\
+%1$s: definition does not end with `END %1$s'"), "LC_MESSAGES");
+ lr_ignore_rest (ldfile, arg->tok == tok_lc_messages);
+ return;
+
+ default:
+ syntax_error:
+ SYNTAX_ERROR (_("%s: syntax error"), "LC_MESSAGES");
+ }
+
+ /* Prepare for the next round. */
+ now = lr_token (ldfile, charmap, result, NULL, verbose);
+ nowtok = now->tok;
+ }
+
+ /* When we come here we reached the end of the file. */
+ lr_error (ldfile, _("%s: premature end of file"), "LC_MESSAGES");
+}
diff --git a/REORG.TODO/locale/programs/ld-monetary.c b/REORG.TODO/locale/programs/ld-monetary.c
new file mode 100644
index 0000000000..cd50541603
--- /dev/null
+++ b/REORG.TODO/locale/programs/ld-monetary.c
@@ -0,0 +1,757 @@
+/* Copyright (C) 1995-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <byteswap.h>
+#include <langinfo.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <sys/uio.h>
+
+#include <assert.h>
+
+#include "localedef.h"
+#include "linereader.h"
+#include "localeinfo.h"
+#include "locfile.h"
+
+
+/* The real definition of the struct for the LC_MONETARY locale. */
+struct locale_monetary_t
+{
+ const char *int_curr_symbol;
+ const char *currency_symbol;
+ const char *mon_decimal_point;
+ const char *mon_thousands_sep;
+ uint32_t mon_decimal_point_wc;
+ uint32_t mon_thousands_sep_wc;
+ char *mon_grouping;
+ size_t mon_grouping_len;
+ const char *positive_sign;
+ const char *negative_sign;
+ signed char int_frac_digits;
+ signed char frac_digits;
+ signed char p_cs_precedes;
+ signed char p_sep_by_space;
+ signed char n_cs_precedes;
+ signed char n_sep_by_space;
+ signed char p_sign_posn;
+ signed char n_sign_posn;
+ signed char int_p_cs_precedes;
+ signed char int_p_sep_by_space;
+ signed char int_n_cs_precedes;
+ signed char int_n_sep_by_space;
+ signed char int_p_sign_posn;
+ signed char int_n_sign_posn;
+ const char *duo_int_curr_symbol;
+ const char *duo_currency_symbol;
+ signed char duo_int_frac_digits;
+ signed char duo_frac_digits;
+ signed char duo_p_cs_precedes;
+ signed char duo_p_sep_by_space;
+ signed char duo_n_cs_precedes;
+ signed char duo_n_sep_by_space;
+ signed char duo_p_sign_posn;
+ signed char duo_n_sign_posn;
+ signed char duo_int_p_cs_precedes;
+ signed char duo_int_p_sep_by_space;
+ signed char duo_int_n_cs_precedes;
+ signed char duo_int_n_sep_by_space;
+ signed char duo_int_p_sign_posn;
+ signed char duo_int_n_sign_posn;
+ uint32_t uno_valid_from;
+ uint32_t uno_valid_to;
+ uint32_t duo_valid_from;
+ uint32_t duo_valid_to;
+ uint32_t conversion_rate[2];
+ char *crncystr;
+};
+
+
+/* The content iof the field int_curr_symbol has to be taken from
+ ISO-4217. We test for correct values. */
+#define DEFINE_INT_CURR(str) str,
+static const char *const valid_int_curr[] =
+ {
+# include "../iso-4217.def"
+ };
+#define NR_VALID_INT_CURR ((sizeof (valid_int_curr) \
+ / sizeof (valid_int_curr[0])))
+#undef DEFINE_INT_CURR
+
+
+/* Prototypes for local functions. */
+static int curr_strcmp (const char *s1, const char **s2);
+
+
+static void
+monetary_startup (struct linereader *lr, struct localedef_t *locale,
+ int ignore_content)
+{
+ if (!ignore_content)
+ {
+ struct locale_monetary_t *monetary;
+
+ locale->categories[LC_MONETARY].monetary = monetary =
+ (struct locale_monetary_t *) xmalloc (sizeof (*monetary));
+
+ memset (monetary, '\0', sizeof (struct locale_monetary_t));
+
+ monetary->mon_grouping = NULL;
+ monetary->mon_grouping_len = 0;
+
+ monetary->int_frac_digits = -2;
+ monetary->frac_digits = -2;
+ monetary->p_cs_precedes = -2;
+ monetary->p_sep_by_space = -2;
+ monetary->n_cs_precedes = -2;
+ monetary->n_sep_by_space = -2;
+ monetary->p_sign_posn = -2;
+ monetary->n_sign_posn = -2;
+ monetary->int_p_cs_precedes = -2;
+ monetary->int_p_sep_by_space = -2;
+ monetary->int_n_cs_precedes = -2;
+ monetary->int_n_sep_by_space = -2;
+ monetary->int_p_sign_posn = -2;
+ monetary->int_n_sign_posn = -2;
+ monetary->duo_int_frac_digits = -2;
+ monetary->duo_frac_digits = -2;
+ monetary->duo_p_cs_precedes = -2;
+ monetary->duo_p_sep_by_space = -2;
+ monetary->duo_n_cs_precedes = -2;
+ monetary->duo_n_sep_by_space = -2;
+ monetary->duo_p_sign_posn = -2;
+ monetary->duo_n_sign_posn = -2;
+ monetary->duo_int_p_cs_precedes = -2;
+ monetary->duo_int_p_sep_by_space = -2;
+ monetary->duo_int_n_cs_precedes = -2;
+ monetary->duo_int_n_sep_by_space = -2;
+ monetary->duo_int_p_sign_posn = -2;
+ monetary->duo_int_n_sign_posn = -2;
+ }
+
+ if (lr != NULL)
+ {
+ lr->translate_strings = 1;
+ lr->return_widestr = 0;
+ }
+}
+
+
+void
+monetary_finish (struct localedef_t *locale, const struct charmap_t *charmap)
+{
+ struct locale_monetary_t *monetary
+ = locale->categories[LC_MONETARY].monetary;
+ int nothing = 0;
+
+ /* Now resolve copying and also handle completely missing definitions. */
+ if (monetary == NULL)
+ {
+ /* First see whether we were supposed to copy. If yes, find the
+ actual definition. */
+ if (locale->copy_name[LC_MONETARY] != NULL)
+ {
+ /* Find the copying locale. This has to happen transitively since
+ the locale we are copying from might also copying another one. */
+ struct localedef_t *from = locale;
+
+ do
+ from = find_locale (LC_MONETARY, from->copy_name[LC_MONETARY],
+ from->repertoire_name, charmap);
+ while (from->categories[LC_MONETARY].monetary == NULL
+ && from->copy_name[LC_MONETARY] != NULL);
+
+ monetary = locale->categories[LC_MONETARY].monetary
+ = from->categories[LC_MONETARY].monetary;
+ }
+
+ /* If there is still no definition issue an warning and create an
+ empty one. */
+ if (monetary == NULL)
+ {
+ if (! be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+No definition for %s category found"), "LC_MONETARY"));
+ monetary_startup (NULL, locale, 0);
+ monetary = locale->categories[LC_MONETARY].monetary;
+ nothing = 1;
+ }
+ }
+
+#define TEST_ELEM(cat, initval) \
+ if (monetary->cat == NULL) \
+ { \
+ if (! be_quiet && ! nothing) \
+ WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), \
+ "LC_MONETARY", #cat)); \
+ monetary->cat = initval; \
+ }
+
+ TEST_ELEM (int_curr_symbol, "");
+ TEST_ELEM (currency_symbol, "");
+ TEST_ELEM (mon_decimal_point, ".");
+ TEST_ELEM (mon_thousands_sep, "");
+ TEST_ELEM (positive_sign, "");
+ TEST_ELEM (negative_sign, "");
+
+ /* The international currency symbol must come from ISO 4217. */
+ if (monetary->int_curr_symbol != NULL)
+ {
+ if (strlen (monetary->int_curr_symbol) != 4)
+ {
+ if (! be_quiet && ! nothing)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: value of field `int_curr_symbol' has wrong length"),
+ "LC_MONETARY"));
+ }
+ else
+ { /* Check the first three characters against ISO 4217 */
+ char symbol[4];
+ strncpy (symbol, monetary->int_curr_symbol, 3);
+ symbol[3] = '\0';
+ if (bsearch (symbol, valid_int_curr, NR_VALID_INT_CURR,
+ sizeof (const char *),
+ (comparison_fn_t) curr_strcmp) == NULL
+ && !be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: value of field `int_curr_symbol' does \
+not correspond to a valid name in ISO 4217"),
+ "LC_MONETARY"));
+ }
+ }
+
+ /* The decimal point must not be empty. This is not said explicitly
+ in POSIX but ANSI C (ISO/IEC 9899) says in 4.4.2.1 it has to be
+ != "". */
+ if (monetary->mon_decimal_point == NULL)
+ {
+ if (! be_quiet && ! nothing)
+ WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"),
+ "LC_MONETARY", "mon_decimal_point"));
+ monetary->mon_decimal_point = ".";
+ }
+ else if (monetary->mon_decimal_point[0] == '\0' && ! be_quiet && ! nothing)
+ {
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: value for field `%s' must not be an empty string"),
+ "LC_MONETARY", "mon_decimal_point"));
+ }
+ if (monetary->mon_decimal_point_wc == L'\0')
+ monetary->mon_decimal_point_wc = L'.';
+
+ if (monetary->mon_grouping_len == 0)
+ {
+ if (! be_quiet && ! nothing)
+ WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"),
+ "LC_MONETARY", "mon_grouping"));
+
+ monetary->mon_grouping = (char *) "\177";
+ monetary->mon_grouping_len = 1;
+ }
+
+#undef TEST_ELEM
+#define TEST_ELEM(cat, min, max, initval) \
+ if (monetary->cat == -2) \
+ { \
+ if (! be_quiet && ! nothing) \
+ WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), \
+ "LC_MONETARY", #cat)); \
+ monetary->cat = initval; \
+ } \
+ else if ((monetary->cat < min || monetary->cat > max) \
+ && min < max \
+ && !be_quiet && !nothing) \
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: value for field `%s' must be in range %d...%d"), \
+ "LC_MONETARY", #cat, min, max))
+
+ TEST_ELEM (int_frac_digits, 1, 0, -1);
+ TEST_ELEM (frac_digits, 1, 0, -1);
+ TEST_ELEM (p_cs_precedes, -1, 1, -1);
+ TEST_ELEM (p_sep_by_space, -1, 2, -1);
+ TEST_ELEM (n_cs_precedes, -1, 1, -1);
+ TEST_ELEM (n_sep_by_space, -1, 2, -1);
+ TEST_ELEM (p_sign_posn, -1, 4, -1);
+ TEST_ELEM (n_sign_posn, -1, 4, -1);
+
+ /* The non-POSIX.2 extensions are optional. */
+ if (monetary->duo_int_curr_symbol == NULL)
+ monetary->duo_int_curr_symbol = monetary->int_curr_symbol;
+ if (monetary->duo_currency_symbol == NULL)
+ monetary->duo_currency_symbol = monetary->currency_symbol;
+
+ if (monetary->duo_int_frac_digits == -2)
+ monetary->duo_int_frac_digits = monetary->int_frac_digits;
+ if (monetary->duo_frac_digits == -2)
+ monetary->duo_frac_digits = monetary->frac_digits;
+
+#undef TEST_ELEM
+#define TEST_ELEM(cat, alt, min, max) \
+ if (monetary->cat == -2) \
+ monetary->cat = monetary->alt; \
+ else if ((monetary->cat < min || monetary->cat > max) && !be_quiet \
+ && ! nothing) \
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: value for field `%s' must be in range %d...%d"), \
+ "LC_MONETARY", #cat, min, max))
+
+ TEST_ELEM (int_p_cs_precedes, p_cs_precedes, -1, 1);
+ TEST_ELEM (int_p_sep_by_space, p_sep_by_space, -1, 2);
+ TEST_ELEM (int_n_cs_precedes, n_cs_precedes, -1, 1);
+ TEST_ELEM (int_n_sep_by_space, n_sep_by_space, -1, 2);
+ TEST_ELEM (int_p_sign_posn, p_sign_posn, -1, 4);
+ TEST_ELEM (int_n_sign_posn, n_sign_posn, -1, 4);
+
+ TEST_ELEM (duo_p_cs_precedes, p_cs_precedes, -1, 1);
+ TEST_ELEM (duo_p_sep_by_space, p_sep_by_space, -1, 2);
+ TEST_ELEM (duo_n_cs_precedes, n_cs_precedes, -1, 1);
+ TEST_ELEM (duo_n_sep_by_space, n_sep_by_space, -1, 2);
+ TEST_ELEM (duo_int_p_cs_precedes, int_p_cs_precedes, -1, 1);
+ TEST_ELEM (duo_int_p_sep_by_space, int_p_sep_by_space, -1, 2);
+ TEST_ELEM (duo_int_n_cs_precedes, int_n_cs_precedes, -1, 1);
+ TEST_ELEM (duo_int_n_sep_by_space, int_n_sep_by_space, -1, 2);
+ TEST_ELEM (duo_p_sign_posn, p_sign_posn, -1, 4);
+ TEST_ELEM (duo_n_sign_posn, n_sign_posn, -1, 4);
+ TEST_ELEM (duo_int_p_sign_posn, int_p_sign_posn, -1, 4);
+ TEST_ELEM (duo_int_n_sign_posn, int_n_sign_posn, -1, 4);
+
+ if (monetary->uno_valid_from == 0)
+ monetary->uno_valid_from = 10101;
+ if (monetary->uno_valid_to == 0)
+ monetary->uno_valid_to = 99991231;
+ if (monetary->duo_valid_from == 0)
+ monetary->duo_valid_from = 10101;
+ if (monetary->duo_valid_to == 0)
+ monetary->duo_valid_to = 99991231;
+
+ if (monetary->conversion_rate[0] == 0)
+ {
+ monetary->conversion_rate[0] = 1;
+ monetary->conversion_rate[1] = 1;
+ }
+
+ /* Create the crncystr entry. */
+ monetary->crncystr = (char *) xmalloc (strlen (monetary->currency_symbol)
+ + 2);
+ monetary->crncystr[0] = monetary->p_cs_precedes ? '-' : '+';
+ strcpy (&monetary->crncystr[1], monetary->currency_symbol);
+}
+
+
+void
+monetary_output (struct localedef_t *locale, const struct charmap_t *charmap,
+ const char *output_path)
+{
+ struct locale_monetary_t *monetary
+ = locale->categories[LC_MONETARY].monetary;
+ struct locale_file file;
+
+ init_locale_data (&file, _NL_ITEM_INDEX (_NL_NUM_LC_MONETARY));
+ add_locale_string (&file, monetary->int_curr_symbol);
+ add_locale_string (&file, monetary->currency_symbol);
+ add_locale_string (&file, monetary->mon_decimal_point);
+ add_locale_string (&file, monetary->mon_thousands_sep);
+ add_locale_raw_data (&file, monetary->mon_grouping,
+ monetary->mon_grouping_len);
+ add_locale_string (&file, monetary->positive_sign);
+ add_locale_string (&file, monetary->negative_sign);
+ add_locale_char (&file, monetary->int_frac_digits);
+ add_locale_char (&file, monetary->frac_digits);
+ add_locale_char (&file, monetary->p_cs_precedes);
+ add_locale_char (&file, monetary->p_sep_by_space);
+ add_locale_char (&file, monetary->n_cs_precedes);
+ add_locale_char (&file, monetary->n_sep_by_space);
+ add_locale_char (&file, monetary->p_sign_posn);
+ add_locale_char (&file, monetary->n_sign_posn);
+ add_locale_string (&file, monetary->crncystr);
+ add_locale_char (&file, monetary->int_p_cs_precedes);
+ add_locale_char (&file, monetary->int_p_sep_by_space);
+ add_locale_char (&file, monetary->int_n_cs_precedes);
+ add_locale_char (&file, monetary->int_n_sep_by_space);
+ add_locale_char (&file, monetary->int_p_sign_posn);
+ add_locale_char (&file, monetary->int_n_sign_posn);
+ add_locale_string (&file, monetary->duo_int_curr_symbol);
+ add_locale_string (&file, monetary->duo_currency_symbol);
+ add_locale_char (&file, monetary->duo_int_frac_digits);
+ add_locale_char (&file, monetary->duo_frac_digits);
+ add_locale_char (&file, monetary->duo_p_cs_precedes);
+ add_locale_char (&file, monetary->duo_p_sep_by_space);
+ add_locale_char (&file, monetary->duo_n_cs_precedes);
+ add_locale_char (&file, monetary->duo_n_sep_by_space);
+ add_locale_char (&file, monetary->duo_int_p_cs_precedes);
+ add_locale_char (&file, monetary->duo_int_p_sep_by_space);
+ add_locale_char (&file, monetary->duo_int_n_cs_precedes);
+ add_locale_char (&file, monetary->duo_int_n_sep_by_space);
+ add_locale_char (&file, monetary->duo_p_sign_posn);
+ add_locale_char (&file, monetary->duo_n_sign_posn);
+ add_locale_char (&file, monetary->duo_int_p_sign_posn);
+ add_locale_char (&file, monetary->duo_int_n_sign_posn);
+ add_locale_uint32 (&file, monetary->uno_valid_from);
+ add_locale_uint32 (&file, monetary->uno_valid_to);
+ add_locale_uint32 (&file, monetary->duo_valid_from);
+ add_locale_uint32 (&file, monetary->duo_valid_to);
+ add_locale_uint32_array (&file, monetary->conversion_rate, 2);
+ add_locale_uint32 (&file, monetary->mon_decimal_point_wc);
+ add_locale_uint32 (&file, monetary->mon_thousands_sep_wc);
+ add_locale_string (&file, charmap->code_set_name);
+ write_locale_data (output_path, LC_MONETARY, "LC_MONETARY", &file);
+}
+
+
+static int
+curr_strcmp (const char *s1, const char **s2)
+{
+ return strcmp (s1, *s2);
+}
+
+
+/* The parser for the LC_MONETARY section of the locale definition. */
+void
+monetary_read (struct linereader *ldfile, struct localedef_t *result,
+ const struct charmap_t *charmap, const char *repertoire_name,
+ int ignore_content)
+{
+ struct repertoire_t *repertoire = NULL;
+ struct locale_monetary_t *monetary;
+ struct token *now;
+ enum token_t nowtok;
+
+ /* Get the repertoire we have to use. */
+ if (repertoire_name != NULL)
+ repertoire = repertoire_read (repertoire_name);
+
+ /* The rest of the line containing `LC_MONETARY' must be free. */
+ lr_ignore_rest (ldfile, 1);
+
+ do
+ {
+ now = lr_token (ldfile, charmap, result, NULL, verbose);
+ nowtok = now->tok;
+ }
+ while (nowtok == tok_eol);
+
+ /* If we see `copy' now we are almost done. */
+ if (nowtok == tok_copy)
+ {
+ handle_copy (ldfile, charmap, repertoire_name, result, tok_lc_monetary,
+ LC_MONETARY, "LC_MONETARY", ignore_content);
+ return;
+ }
+
+ /* Prepare the data structures. */
+ monetary_startup (ldfile, result, ignore_content);
+ monetary = result->categories[LC_MONETARY].monetary;
+
+ while (1)
+ {
+ /* Of course we don't proceed beyond the end of file. */
+ if (nowtok == tok_eof)
+ break;
+
+ /* Ignore empty lines. */
+ if (nowtok == tok_eol)
+ {
+ now = lr_token (ldfile, charmap, result, NULL, verbose);
+ nowtok = now->tok;
+ continue;
+ }
+
+ switch (nowtok)
+ {
+#define STR_ELEM(cat) \
+ case tok_##cat: \
+ /* Ignore the rest of the line if we don't need the input of \
+ this line. */ \
+ if (ignore_content) \
+ { \
+ lr_ignore_rest (ldfile, 0); \
+ break; \
+ } \
+ \
+ now = lr_token (ldfile, charmap, result, NULL, verbose); \
+ if (now->tok != tok_string) \
+ goto err_label; \
+ else if (monetary->cat != NULL) \
+ lr_error (ldfile, _("%s: field `%s' declared more than once"), \
+ "LC_MONETARY", #cat); \
+ else if (!ignore_content && now->val.str.startmb == NULL) \
+ { \
+ lr_error (ldfile, _("\
+%s: unknown character in field `%s'"), "LC_MONETARY", #cat); \
+ monetary->cat = ""; \
+ } \
+ else if (!ignore_content) \
+ monetary->cat = now->val.str.startmb; \
+ lr_ignore_rest (ldfile, 1); \
+ break
+
+ STR_ELEM (int_curr_symbol);
+ STR_ELEM (currency_symbol);
+ STR_ELEM (positive_sign);
+ STR_ELEM (negative_sign);
+ STR_ELEM (duo_int_curr_symbol);
+ STR_ELEM (duo_currency_symbol);
+
+#define STR_ELEM_WC(cat) \
+ case tok_##cat: \
+ /* Ignore the rest of the line if we don't need the input of \
+ this line. */ \
+ if (ignore_content) \
+ { \
+ lr_ignore_rest (ldfile, 0); \
+ break; \
+ } \
+ \
+ ldfile->return_widestr = 1; \
+ now = lr_token (ldfile, charmap, result, repertoire, verbose); \
+ if (now->tok != tok_string) \
+ goto err_label; \
+ if (monetary->cat != NULL) \
+ lr_error (ldfile, _("\
+%s: field `%s' declared more than once"), "LC_MONETARY", #cat); \
+ else if (!ignore_content && now->val.str.startmb == NULL) \
+ { \
+ lr_error (ldfile, _("\
+%s: unknown character in field `%s'"), "LC_MONETARY", #cat); \
+ monetary->cat = ""; \
+ monetary->cat##_wc = L'\0'; \
+ } \
+ else if (now->val.str.startwc != NULL && now->val.str.lenwc > 2) \
+ { \
+ lr_error (ldfile, _("\
+%s: value for field `%s' must be a single character"), "LC_MONETARY", #cat); \
+ } \
+ else if (!ignore_content) \
+ { \
+ monetary->cat = now->val.str.startmb; \
+ \
+ if (now->val.str.startwc != NULL) \
+ monetary->cat##_wc = *now->val.str.startwc; \
+ } \
+ ldfile->return_widestr = 0; \
+ break
+
+ STR_ELEM_WC (mon_decimal_point);
+ STR_ELEM_WC (mon_thousands_sep);
+
+#define INT_ELEM(cat) \
+ case tok_##cat: \
+ /* Ignore the rest of the line if we don't need the input of \
+ this line. */ \
+ if (ignore_content) \
+ { \
+ lr_ignore_rest (ldfile, 0); \
+ break; \
+ } \
+ \
+ now = lr_token (ldfile, charmap, result, NULL, verbose); \
+ if (now->tok != tok_minus1 && now->tok != tok_number) \
+ goto err_label; \
+ else if (monetary->cat != -2) \
+ lr_error (ldfile, _("%s: field `%s' declared more than once"), \
+ "LC_MONETARY", #cat); \
+ else if (!ignore_content) \
+ monetary->cat = now->tok == tok_minus1 ? -1 : now->val.num; \
+ break
+
+ INT_ELEM (int_frac_digits);
+ INT_ELEM (frac_digits);
+ INT_ELEM (p_cs_precedes);
+ INT_ELEM (p_sep_by_space);
+ INT_ELEM (n_cs_precedes);
+ INT_ELEM (n_sep_by_space);
+ INT_ELEM (p_sign_posn);
+ INT_ELEM (n_sign_posn);
+ INT_ELEM (int_p_cs_precedes);
+ INT_ELEM (int_p_sep_by_space);
+ INT_ELEM (int_n_cs_precedes);
+ INT_ELEM (int_n_sep_by_space);
+ INT_ELEM (int_p_sign_posn);
+ INT_ELEM (int_n_sign_posn);
+ INT_ELEM (duo_int_frac_digits);
+ INT_ELEM (duo_frac_digits);
+ INT_ELEM (duo_p_cs_precedes);
+ INT_ELEM (duo_p_sep_by_space);
+ INT_ELEM (duo_n_cs_precedes);
+ INT_ELEM (duo_n_sep_by_space);
+ INT_ELEM (duo_p_sign_posn);
+ INT_ELEM (duo_n_sign_posn);
+ INT_ELEM (duo_int_p_cs_precedes);
+ INT_ELEM (duo_int_p_sep_by_space);
+ INT_ELEM (duo_int_n_cs_precedes);
+ INT_ELEM (duo_int_n_sep_by_space);
+ INT_ELEM (duo_int_p_sign_posn);
+ INT_ELEM (duo_int_n_sign_posn);
+ INT_ELEM (uno_valid_from);
+ INT_ELEM (uno_valid_to);
+ INT_ELEM (duo_valid_from);
+ INT_ELEM (duo_valid_to);
+
+ case tok_mon_grouping:
+ /* Ignore the rest of the line if we don't need the input of
+ this line. */
+ if (ignore_content)
+ {
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+
+ now = lr_token (ldfile, charmap, result, NULL, verbose);
+ if (now->tok != tok_minus1 && now->tok != tok_number)
+ goto err_label;
+ else
+ {
+ size_t act = 0;
+ size_t max = 10;
+ char *grouping = ignore_content ? NULL : xmalloc (max);
+
+ do
+ {
+ if (act + 1 >= max)
+ {
+ max *= 2;
+ grouping = xrealloc (grouping, max);
+ }
+
+ if (act > 0 && grouping[act - 1] == '\177')
+ {
+ lr_error (ldfile, _("\
+%s: `-1' must be last entry in `%s' field"),
+ "LC_MONETARY", "mon_grouping");
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+
+ if (now->tok == tok_minus1)
+ {
+ if (!ignore_content)
+ grouping[act++] = '\177';
+ }
+ else if (now->val.num == 0)
+ {
+ /* A value of 0 disables grouping from here on but
+ we must not store a NUL character since this
+ terminates the string. Use something different
+ which must not be used otherwise. */
+ if (!ignore_content)
+ grouping[act++] = '\377';
+ }
+ else if (now->val.num > 126)
+ lr_error (ldfile, _("\
+%s: values for field `%s' must be smaller than 127"),
+ "LC_MONETARY", "mon_grouping");
+ else if (!ignore_content)
+ grouping[act++] = now->val.num;
+
+ /* Next must be semicolon. */
+ now = lr_token (ldfile, charmap, result, NULL, verbose);
+ if (now->tok != tok_semicolon)
+ break;
+
+ now = lr_token (ldfile, charmap, result, NULL, verbose);
+ }
+ while (now->tok == tok_minus1 || now->tok == tok_number);
+
+ if (now->tok != tok_eol)
+ goto err_label;
+
+ if (!ignore_content)
+ {
+ /* A single -1 means no grouping. */
+ if (act == 1 && grouping[0] == '\177')
+ act--;
+ grouping[act++] = '\0';
+
+ monetary->mon_grouping = xrealloc (grouping, act);
+ monetary->mon_grouping_len = act;
+ }
+ }
+ break;
+
+ case tok_conversion_rate:
+ /* Ignore the rest of the line if we don't need the input of
+ this line. */
+ if (ignore_content)
+ {
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+
+ now = lr_token (ldfile, charmap, result, NULL, verbose);
+ if (now->tok != tok_number)
+ goto err_label;
+ if (now->val.num == 0)
+ {
+ invalid_conversion_rate:
+ lr_error (ldfile, _("conversion rate value cannot be zero"));
+ if (!ignore_content)
+ {
+ monetary->conversion_rate[0] = 1;
+ monetary->conversion_rate[1] = 1;
+ }
+ break;
+ }
+ if (!ignore_content)
+ monetary->conversion_rate[0] = now->val.num;
+ /* Next must be a semicolon. */
+ now = lr_token (ldfile, charmap, result, NULL, verbose);
+ if (now->tok != tok_semicolon)
+ goto err_label;
+ /* And another number. */
+ now = lr_token (ldfile, charmap, result, NULL, verbose);
+ if (now->tok != tok_number)
+ goto err_label;
+ if (now->val.num == 0)
+ goto invalid_conversion_rate;
+ if (!ignore_content)
+ monetary->conversion_rate[1] = now->val.num;
+ /* The rest of the line must be empty. */
+ lr_ignore_rest (ldfile, 1);
+ break;
+
+ case tok_end:
+ /* Next we assume `LC_MONETARY'. */
+ now = lr_token (ldfile, charmap, result, NULL, verbose);
+ if (now->tok == tok_eof)
+ break;
+ if (now->tok == tok_eol)
+ lr_error (ldfile, _("%s: incomplete `END' line"), "LC_MONETARY");
+ else if (now->tok != tok_lc_monetary)
+ lr_error (ldfile, _("\
+%1$s: definition does not end with `END %1$s'"), "LC_MONETARY");
+ lr_ignore_rest (ldfile, now->tok == tok_lc_monetary);
+ return;
+
+ default:
+ err_label:
+ SYNTAX_ERROR (_("%s: syntax error"), "LC_MONETARY");
+ }
+
+ /* Prepare for the next round. */
+ now = lr_token (ldfile, charmap, result, NULL, verbose);
+ nowtok = now->tok;
+ }
+
+ /* When we come here we reached the end of the file. */
+ lr_error (ldfile, _("%s: premature end of file"), "LC_MONETARY");
+}
diff --git a/REORG.TODO/locale/programs/ld-name.c b/REORG.TODO/locale/programs/ld-name.c
new file mode 100644
index 0000000000..ee50ae7322
--- /dev/null
+++ b/REORG.TODO/locale/programs/ld-name.c
@@ -0,0 +1,281 @@
+/* Copyright (C) 1998-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <langinfo.h>
+#include <string.h>
+#include <stdint.h>
+#include <sys/uio.h>
+
+#include <assert.h>
+
+#include "localedef.h"
+#include "localeinfo.h"
+#include "locfile.h"
+
+
+/* The real definition of the struct for the LC_NAME locale. */
+struct locale_name_t
+{
+ const char *name_fmt;
+ const char *name_gen;
+ const char *name_mr;
+ const char *name_mrs;
+ const char *name_miss;
+ const char *name_ms;
+};
+
+
+static void
+name_startup (struct linereader *lr, struct localedef_t *locale,
+ int ignore_content)
+{
+ if (!ignore_content)
+ locale->categories[LC_NAME].name =
+ (struct locale_name_t *) xcalloc (1, sizeof (struct locale_name_t));
+
+ if (lr != NULL)
+ {
+ lr->translate_strings = 1;
+ lr->return_widestr = 0;
+ }
+}
+
+
+void
+name_finish (struct localedef_t *locale, const struct charmap_t *charmap)
+{
+ struct locale_name_t *name = locale->categories[LC_NAME].name;
+ int nothing = 0;
+
+ /* Now resolve copying and also handle completely missing definitions. */
+ if (name == NULL)
+ {
+ /* First see whether we were supposed to copy. If yes, find the
+ actual definition. */
+ if (locale->copy_name[LC_NAME] != NULL)
+ {
+ /* Find the copying locale. This has to happen transitively since
+ the locale we are copying from might also copying another one. */
+ struct localedef_t *from = locale;
+
+ do
+ from = find_locale (LC_NAME, from->copy_name[LC_NAME],
+ from->repertoire_name, charmap);
+ while (from->categories[LC_NAME].name == NULL
+ && from->copy_name[LC_NAME] != NULL);
+
+ name = locale->categories[LC_NAME].name
+ = from->categories[LC_NAME].name;
+ }
+
+ /* If there is still no definition issue an warning and create an
+ empty one. */
+ if (name == NULL)
+ {
+ if (! be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+No definition for %s category found"), "LC_NAME"));
+ name_startup (NULL, locale, 0);
+ name = locale->categories[LC_NAME].name;
+ nothing = 1;
+ }
+ }
+
+ if (name->name_fmt == NULL)
+ {
+ if (! nothing)
+ WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"),
+ "LC_NAME", "name_fmt"));
+ /* Use as the default value the value of the i18n locale. */
+ name->name_fmt = "%p%t%g%t%m%t%f";
+ }
+ else
+ {
+ /* We must check whether the format string contains only the
+ allowed escape sequences. */
+ const char *cp = name->name_fmt;
+
+ if (*cp == '\0')
+ WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' must not be empty"),
+ "LC_NAME", "name_fmt"));
+ else
+ while (*cp != '\0')
+ {
+ if (*cp == '%')
+ {
+ if (*++cp == 'R')
+ /* Romanize-flag. */
+ ++cp;
+ if (strchr ("dfFgGlomMpsSt", *cp) == NULL)
+ {
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: invalid escape sequence in field `%s'"), "LC_NAME", "name_fmt"));
+ break;
+ }
+ }
+ ++cp;
+ }
+ }
+
+#define TEST_ELEM(cat) \
+ if (name->cat == NULL) \
+ { \
+ if (verbose && ! nothing) \
+ WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), \
+ "LC_NAME", #cat)); \
+ name->cat = ""; \
+ }
+
+ TEST_ELEM (name_gen);
+ TEST_ELEM (name_mr);
+ TEST_ELEM (name_mrs);
+ TEST_ELEM (name_miss);
+ TEST_ELEM (name_ms);
+}
+
+
+void
+name_output (struct localedef_t *locale, const struct charmap_t *charmap,
+ const char *output_path)
+{
+ struct locale_name_t *name = locale->categories[LC_NAME].name;
+ struct locale_file file;
+
+ init_locale_data (&file, _NL_ITEM_INDEX (_NL_NUM_LC_NAME));
+ add_locale_string (&file, name->name_fmt);
+ add_locale_string (&file, name->name_gen);
+ add_locale_string (&file, name->name_mr);
+ add_locale_string (&file, name->name_mrs);
+ add_locale_string (&file, name->name_miss);
+ add_locale_string (&file, name->name_ms);
+ add_locale_string (&file, charmap->code_set_name);
+ write_locale_data (output_path, LC_NAME, "LC_NAME", &file);
+}
+
+
+/* The parser for the LC_NAME section of the locale definition. */
+void
+name_read (struct linereader *ldfile, struct localedef_t *result,
+ const struct charmap_t *charmap, const char *repertoire_name,
+ int ignore_content)
+{
+ struct locale_name_t *name;
+ struct token *now;
+ struct token *arg;
+ enum token_t nowtok;
+
+ /* The rest of the line containing `LC_NAME' must be empty. */
+ lr_ignore_rest (ldfile, 1);
+
+ do
+ {
+ now = lr_token (ldfile, charmap, result, NULL, verbose);
+ nowtok = now->tok;
+ }
+ while (nowtok == tok_eol);
+
+ /* If we see `copy' now we are almost done. */
+ if (nowtok == tok_copy)
+ {
+ handle_copy (ldfile, charmap, repertoire_name, result, tok_lc_name,
+ LC_NAME, "LC_NAME", ignore_content);
+ return;
+ }
+
+ /* Prepare the data structures. */
+ name_startup (ldfile, result, ignore_content);
+ name = result->categories[LC_NAME].name;
+
+ while (1)
+ {
+ /* Of course we don't proceed beyond the end of file. */
+ if (nowtok == tok_eof)
+ break;
+
+ /* Ignore empty lines. */
+ if (nowtok == tok_eol)
+ {
+ now = lr_token (ldfile, charmap, result, NULL, verbose);
+ nowtok = now->tok;
+ continue;
+ }
+
+ switch (nowtok)
+ {
+#define STR_ELEM(cat) \
+ case tok_##cat: \
+ /* Ignore the rest of the line if we don't need the input of \
+ this line. */ \
+ if (ignore_content) \
+ { \
+ lr_ignore_rest (ldfile, 0); \
+ break; \
+ } \
+ \
+ arg = lr_token (ldfile, charmap, result, NULL, verbose); \
+ if (arg->tok != tok_string) \
+ goto err_label; \
+ if (name->cat != NULL) \
+ lr_error (ldfile, _("%s: field `%s' declared more than once"), \
+ "LC_NAME", #cat); \
+ else if (!ignore_content && arg->val.str.startmb == NULL) \
+ { \
+ lr_error (ldfile, _("%s: unknown character in field `%s'"), \
+ "LC_NAME", #cat); \
+ name->cat = ""; \
+ } \
+ else if (!ignore_content) \
+ name->cat = arg->val.str.startmb; \
+ break
+
+ STR_ELEM (name_fmt);
+ STR_ELEM (name_gen);
+ STR_ELEM (name_mr);
+ STR_ELEM (name_mrs);
+ STR_ELEM (name_miss);
+ STR_ELEM (name_ms);
+
+ case tok_end:
+ /* Next we assume `LC_NAME'. */
+ arg = lr_token (ldfile, charmap, result, NULL, verbose);
+ if (arg->tok == tok_eof)
+ break;
+ if (arg->tok == tok_eol)
+ lr_error (ldfile, _("%s: incomplete `END' line"), "LC_NAME");
+ else if (arg->tok != tok_lc_name)
+ lr_error (ldfile, _("\
+%1$s: definition does not end with `END %1$s'"), "LC_NAME");
+ lr_ignore_rest (ldfile, arg->tok == tok_lc_name);
+ return;
+
+ default:
+ err_label:
+ SYNTAX_ERROR (_("%s: syntax error"), "LC_NAME");
+ }
+
+ /* Prepare for the next round. */
+ now = lr_token (ldfile, charmap, result, NULL, verbose);
+ nowtok = now->tok;
+ }
+
+ /* When we come here we reached the end of the file. */
+ lr_error (ldfile, _("%s: premature end of file"), "LC_NAME");
+}
diff --git a/REORG.TODO/locale/programs/ld-numeric.c b/REORG.TODO/locale/programs/ld-numeric.c
new file mode 100644
index 0000000000..a81ff04f93
--- /dev/null
+++ b/REORG.TODO/locale/programs/ld-numeric.c
@@ -0,0 +1,343 @@
+/* Copyright (C) 1995-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <langinfo.h>
+#include <string.h>
+#include <stdint.h>
+#include <sys/uio.h>
+
+#include <assert.h>
+
+#include "localedef.h"
+#include "linereader.h"
+#include "localeinfo.h"
+#include "locfile.h"
+
+
+/* The real definition of the struct for the LC_NUMERIC locale. */
+struct locale_numeric_t
+{
+ const char *decimal_point;
+ const char *thousands_sep;
+ char *grouping;
+ size_t grouping_len;
+ uint32_t decimal_point_wc;
+ uint32_t thousands_sep_wc;
+};
+
+
+static void
+numeric_startup (struct linereader *lr, struct localedef_t *locale,
+ int ignore_content)
+{
+ if (!ignore_content)
+ {
+ locale->categories[LC_NUMERIC].numeric =
+ (struct locale_numeric_t *) xcalloc (1,
+ sizeof (struct locale_numeric_t));
+ }
+
+ if (lr != NULL)
+ {
+ lr->translate_strings = 1;
+ lr->return_widestr = 0;
+ }
+}
+
+
+void
+numeric_finish (struct localedef_t *locale, const struct charmap_t *charmap)
+{
+ struct locale_numeric_t *numeric = locale->categories[LC_NUMERIC].numeric;
+ int nothing = 0;
+
+ /* Now resolve copying and also handle completely missing definitions. */
+ if (numeric == NULL)
+ {
+ /* First see whether we were supposed to copy. If yes, find the
+ actual definition. */
+ if (locale->copy_name[LC_NUMERIC] != NULL)
+ {
+ /* Find the copying locale. This has to happen transitively since
+ the locale we are copying from might also copying another one. */
+ struct localedef_t *from = locale;
+
+ do
+ from = find_locale (LC_NUMERIC, from->copy_name[LC_NUMERIC],
+ from->repertoire_name, charmap);
+ while (from->categories[LC_NUMERIC].numeric == NULL
+ && from->copy_name[LC_NUMERIC] != NULL);
+
+ numeric = locale->categories[LC_NUMERIC].numeric
+ = from->categories[LC_NUMERIC].numeric;
+ }
+
+ /* If there is still no definition issue an warning and create an
+ empty one. */
+ if (numeric == NULL)
+ {
+ if (! be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+No definition for %s category found"), "LC_NUMERIC"));
+ numeric_startup (NULL, locale, 0);
+ numeric = locale->categories[LC_NUMERIC].numeric;
+ nothing = 1;
+ }
+ }
+
+ /* The decimal point must not be empty. This is not said explicitly
+ in POSIX but ANSI C (ISO/IEC 9899) says in 4.4.2.1 it has to be
+ != "". */
+ if (numeric->decimal_point == NULL)
+ {
+ if (! be_quiet && ! nothing)
+ WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"),
+ "LC_NUMERIC", "decimal_point"));
+ numeric->decimal_point = ".";
+ }
+ else if (numeric->decimal_point[0] == '\0' && ! be_quiet && ! nothing)
+ {
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: value for field `%s' must not be an empty string"),
+ "LC_NUMERIC", "decimal_point"));
+ }
+ if (numeric->decimal_point_wc == L'\0')
+ numeric->decimal_point_wc = L'.';
+
+ if (numeric->grouping_len == 0 && ! be_quiet && ! nothing)
+ WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"),
+ "LC_NUMERIC", "grouping"));
+}
+
+
+void
+numeric_output (struct localedef_t *locale, const struct charmap_t *charmap,
+ const char *output_path)
+{
+ struct locale_numeric_t *numeric = locale->categories[LC_NUMERIC].numeric;
+ struct locale_file file;
+
+ init_locale_data (&file, _NL_ITEM_INDEX (_NL_NUM_LC_NUMERIC));
+ add_locale_string (&file, numeric->decimal_point ?: "");
+ add_locale_string (&file, numeric->thousands_sep ?: "");
+ add_locale_raw_data (&file, numeric->grouping, numeric->grouping_len);
+ add_locale_uint32 (&file, numeric->decimal_point_wc);
+ add_locale_uint32 (&file, numeric->thousands_sep_wc);
+ add_locale_string (&file, charmap->code_set_name);
+ write_locale_data (output_path, LC_NUMERIC, "LC_NUMERIC", &file);
+}
+
+
+/* The parser for the LC_NUMERIC section of the locale definition. */
+void
+numeric_read (struct linereader *ldfile, struct localedef_t *result,
+ const struct charmap_t *charmap, const char *repertoire_name,
+ int ignore_content)
+{
+ struct repertoire_t *repertoire = NULL;
+ struct locale_numeric_t *numeric;
+ struct token *now;
+ enum token_t nowtok;
+
+ /* Get the repertoire we have to use. */
+ if (repertoire_name != NULL)
+ repertoire = repertoire_read (repertoire_name);
+
+ /* The rest of the line containing `LC_NUMERIC' must be free. */
+ lr_ignore_rest (ldfile, 1);
+
+
+ do
+ {
+ now = lr_token (ldfile, charmap, result, NULL, verbose);
+ nowtok = now->tok;
+ }
+ while (nowtok == tok_eol);
+
+ /* If we see `copy' now we are almost done. */
+ if (nowtok == tok_copy)
+ {
+ handle_copy (ldfile, charmap, repertoire_name, result, tok_lc_numeric,
+ LC_NUMERIC, "LC_NUMERIC", ignore_content);
+ return;
+ }
+
+ /* Prepare the data structures. */
+ numeric_startup (ldfile, result, ignore_content);
+ numeric = result->categories[LC_NUMERIC].numeric;
+
+ while (1)
+ {
+ /* Of course we don't proceed beyond the end of file. */
+ if (nowtok == tok_eof)
+ break;
+
+ /* Ingore empty lines. */
+ if (nowtok == tok_eol)
+ {
+ now = lr_token (ldfile, charmap, result, NULL, verbose);
+ nowtok = now->tok;
+ continue;
+ }
+
+ switch (nowtok)
+ {
+#define STR_ELEM(cat) \
+ case tok_##cat: \
+ /* Ignore the rest of the line if we don't need the input of \
+ this line. */ \
+ if (ignore_content) \
+ { \
+ lr_ignore_rest (ldfile, 0); \
+ break; \
+ } \
+ \
+ ldfile->return_widestr = 1; \
+ now = lr_token (ldfile, charmap, result, repertoire, verbose); \
+ if (now->tok != tok_string) \
+ goto err_label; \
+ if (numeric->cat != NULL) \
+ lr_error (ldfile, _("\
+%s: field `%s' declared more than once"), "LC_NUMERIC", #cat); \
+ else if (!ignore_content && now->val.str.startmb == NULL) \
+ { \
+ lr_error (ldfile, _("\
+%s: unknown character in field `%s'"), "LC_NUMERIC", #cat); \
+ numeric->cat = ""; \
+ numeric->cat##_wc = L'\0'; \
+ } \
+ else if (now->val.str.startwc != NULL && now->val.str.lenwc > 2) \
+ { \
+ lr_error (ldfile, _("\
+%s: value for field `%s' must be a single character"), "LC_NUMERIC", #cat); \
+ } \
+ else if (!ignore_content) \
+ { \
+ numeric->cat = now->val.str.startmb; \
+ \
+ if (now->val.str.startwc != NULL) \
+ numeric->cat##_wc = *now->val.str.startwc; \
+ } \
+ ldfile->return_widestr = 0; \
+ break
+
+ STR_ELEM (decimal_point);
+ STR_ELEM (thousands_sep);
+
+ case tok_grouping:
+ /* Ignore the rest of the line if we don't need the input of
+ this line. */
+ if (ignore_content)
+ {
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+
+ now = lr_token (ldfile, charmap, result, NULL, verbose);
+ if (now->tok != tok_minus1 && now->tok != tok_number)
+ goto err_label;
+ else
+ {
+ size_t act = 0;
+ size_t max = 10;
+ char *grouping = xmalloc (max);
+
+ do
+ {
+ if (act + 1 >= max)
+ {
+ max *= 2;
+ grouping = xrealloc (grouping, max);
+ }
+
+ if (act > 0 && grouping[act - 1] == '\177')
+ {
+ lr_error (ldfile, _("\
+%s: `-1' must be last entry in `%s' field"), "LC_NUMERIC", "grouping");
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+
+ if (now->tok == tok_minus1)
+ grouping[act++] = '\177';
+ else if (now->val.num == 0)
+ {
+ /* A value of 0 disables grouping from here on but
+ we must not store a NUL character since this
+ terminates the string. Use something different
+ which must not be used otherwise. */
+ grouping[act++] = '\377';
+ }
+ else if (now->val.num > 126)
+ lr_error (ldfile, _("\
+%s: values for field `%s' must be smaller than 127"),
+ "LC_NUMERIC", "grouping");
+ else
+ grouping[act++] = now->val.num;
+
+ /* Next must be semicolon. */
+ now = lr_token (ldfile, charmap, result, NULL, verbose);
+ if (now->tok != tok_semicolon)
+ break;
+
+ now = lr_token (ldfile, charmap, result, NULL, verbose);
+ }
+ while (now->tok == tok_minus1 || now->tok == tok_number);
+
+ if (now->tok != tok_eol)
+ goto err_label;
+
+ /* A single -1 means no grouping. */
+ if (act == 1 && grouping[0] == '\177')
+ act--;
+ grouping[act++] = '\0';
+
+ numeric->grouping = xrealloc (grouping, act);
+ numeric->grouping_len = act;
+ }
+ break;
+
+ case tok_end:
+ /* Next we assume `LC_NUMERIC'. */
+ now = lr_token (ldfile, charmap, result, NULL, verbose);
+ if (now->tok == tok_eof)
+ break;
+ if (now->tok == tok_eol)
+ lr_error (ldfile, _("%s: incomplete `END' line"), "LC_NUMERIC");
+ else if (now->tok != tok_lc_numeric)
+ lr_error (ldfile, _("\
+%1$s: definition does not end with `END %1$s'"), "LC_NUMERIC");
+ lr_ignore_rest (ldfile, now->tok == tok_lc_numeric);
+ return;
+
+ default:
+ err_label:
+ SYNTAX_ERROR (_("%s: syntax error"), "LC_NUMERIC");
+ }
+
+ /* Prepare for the next round. */
+ now = lr_token (ldfile, charmap, result, NULL, verbose);
+ nowtok = now->tok;
+ }
+
+ /* When we come here we reached the end of the file. */
+ lr_error (ldfile, _("%s: premature end of file"), "LC_NUMERIC");
+}
diff --git a/REORG.TODO/locale/programs/ld-paper.c b/REORG.TODO/locale/programs/ld-paper.c
new file mode 100644
index 0000000000..df7ce12036
--- /dev/null
+++ b/REORG.TODO/locale/programs/ld-paper.c
@@ -0,0 +1,231 @@
+/* Copyright (C) 1998-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <error.h>
+#include <langinfo.h>
+#include <string.h>
+#include <stdint.h>
+#include <sys/uio.h>
+
+#include <assert.h>
+
+#include "localedef.h"
+#include "localeinfo.h"
+#include "locfile.h"
+
+
+/* The real definition of the struct for the LC_PAPER locale. */
+struct locale_paper_t
+{
+ uint32_t height;
+ uint32_t width;
+};
+
+
+static void
+paper_startup (struct linereader *lr, struct localedef_t *locale,
+ int ignore_content)
+{
+ if (!ignore_content)
+ locale->categories[LC_PAPER].paper =
+ (struct locale_paper_t *) xcalloc (1, sizeof (struct locale_paper_t));
+
+ if (lr != NULL)
+ {
+ lr->translate_strings = 1;
+ lr->return_widestr = 0;
+ }
+}
+
+
+void
+paper_finish (struct localedef_t *locale, const struct charmap_t *charmap)
+{
+ struct locale_paper_t *paper = locale->categories[LC_PAPER].paper;
+ int nothing = 0;
+
+ /* Now resolve copying and also handle completely missing definitions. */
+ if (paper == NULL)
+ {
+ /* First see whether we were supposed to copy. If yes, find the
+ actual definition. */
+ if (locale->copy_name[LC_PAPER] != NULL)
+ {
+ /* Find the copying locale. This has to happen transitively since
+ the locale we are copying from might also copying another one. */
+ struct localedef_t *from = locale;
+
+ do
+ from = find_locale (LC_PAPER, from->copy_name[LC_PAPER],
+ from->repertoire_name, charmap);
+ while (from->categories[LC_PAPER].paper == NULL
+ && from->copy_name[LC_PAPER] != NULL);
+
+ paper = locale->categories[LC_PAPER].paper
+ = from->categories[LC_PAPER].paper;
+ }
+
+ /* If there is still no definition issue an warning and create an
+ empty one. */
+ if (paper == NULL)
+ {
+ if (! be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+No definition for %s category found"), "LC_PAPER"));
+ paper_startup (NULL, locale, 0);
+ paper = locale->categories[LC_PAPER].paper;
+ nothing = 1;
+ }
+ }
+
+ if (paper->height == 0)
+ {
+ if (! nothing)
+ WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"),
+ "LC_PAPER", "height"));
+ /* Use as default values the values from the i18n locale. */
+ paper->height = 297;
+ }
+
+ if (paper->width == 0)
+ {
+ if (! nothing)
+ WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"),
+ "LC_PAPER", "width"));
+ /* Use as default values the values from the i18n locale. */
+ paper->width = 210;
+ }
+}
+
+
+void
+paper_output (struct localedef_t *locale, const struct charmap_t *charmap,
+ const char *output_path)
+{
+ struct locale_paper_t *paper = locale->categories[LC_PAPER].paper;
+ struct locale_file file;
+
+ init_locale_data (&file, _NL_ITEM_INDEX (_NL_NUM_LC_PAPER));
+ add_locale_uint32 (&file, paper->height);
+ add_locale_uint32 (&file, paper->width);
+ add_locale_string (&file, charmap->code_set_name);
+ write_locale_data (output_path, LC_PAPER, "LC_PAPER", &file);
+}
+
+
+/* The parser for the LC_PAPER section of the locale definition. */
+void
+paper_read (struct linereader *ldfile, struct localedef_t *result,
+ const struct charmap_t *charmap, const char *repertoire_name,
+ int ignore_content)
+{
+ struct locale_paper_t *paper;
+ struct token *now;
+ struct token *arg;
+ enum token_t nowtok;
+
+ /* The rest of the line containing `LC_PAPER' must be empty. */
+ lr_ignore_rest (ldfile, 1);
+
+ do
+ {
+ now = lr_token (ldfile, charmap, result, NULL, verbose);
+ nowtok = now->tok;
+ }
+ while (nowtok == tok_eol);
+
+ /* If we see `copy' now we are almost done. */
+ if (nowtok == tok_copy)
+ {
+ handle_copy (ldfile, charmap, repertoire_name, result, tok_lc_paper,
+ LC_PAPER, "LC_PAPER", ignore_content);
+ return;
+ }
+
+ /* Prepare the data structures. */
+ paper_startup (ldfile, result, ignore_content);
+ paper = result->categories[LC_PAPER].paper;
+
+ while (1)
+ {
+ /* Of course we don't proceed beyond the end of file. */
+ if (nowtok == tok_eof)
+ break;
+
+ /* Ingore empty lines. */
+ if (nowtok == tok_eol)
+ {
+ now = lr_token (ldfile, charmap, result, NULL, verbose);
+ nowtok = now->tok;
+ continue;
+ }
+
+ switch (nowtok)
+ {
+#define INT_ELEM(cat) \
+ case tok_##cat: \
+ /* Ignore the rest of the line if we don't need the input of \
+ this line. */ \
+ if (ignore_content) \
+ { \
+ lr_ignore_rest (ldfile, 0); \
+ break; \
+ } \
+ \
+ arg = lr_token (ldfile, charmap, result, NULL, verbose); \
+ if (arg->tok != tok_number) \
+ goto err_label; \
+ else if (paper->cat != 0) \
+ lr_error (ldfile, _("%s: field `%s' declared more than once"), \
+ "LC_PAPER", #cat); \
+ else if (!ignore_content) \
+ paper->cat = arg->val.num; \
+ break
+
+ INT_ELEM (height);
+ INT_ELEM (width);
+
+ case tok_end:
+ /* Next we assume `LC_PAPER'. */
+ arg = lr_token (ldfile, charmap, result, NULL, verbose);
+ if (arg->tok == tok_eof)
+ break;
+ if (arg->tok == tok_eol)
+ lr_error (ldfile, _("%s: incomplete `END' line"), "LC_PAPER");
+ else if (arg->tok != tok_lc_paper)
+ lr_error (ldfile, _("\
+%1$s: definition does not end with `END %1$s'"), "LC_PAPER");
+ lr_ignore_rest (ldfile, arg->tok == tok_lc_paper);
+ return;
+
+ default:
+ err_label:
+ SYNTAX_ERROR (_("%s: syntax error"), "LC_PAPER");
+ }
+
+ /* Prepare for the next round. */
+ now = lr_token (ldfile, charmap, result, NULL, verbose);
+ nowtok = now->tok;
+ }
+
+ /* When we come here we reached the end of the file. */
+ lr_error (ldfile, _("%s: premature end of file"), "LC_PAPER");
+}
diff --git a/REORG.TODO/locale/programs/ld-telephone.c b/REORG.TODO/locale/programs/ld-telephone.c
new file mode 100644
index 0000000000..b62280aeec
--- /dev/null
+++ b/REORG.TODO/locale/programs/ld-telephone.c
@@ -0,0 +1,295 @@
+/* Copyright (C) 1998-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <error.h>
+#include <langinfo.h>
+#include <string.h>
+#include <stdint.h>
+#include <sys/uio.h>
+
+#include <assert.h>
+
+#include "localedef.h"
+#include "localeinfo.h"
+#include "locfile.h"
+
+
+/* The real definition of the struct for the LC_TELEPHONE locale. */
+struct locale_telephone_t
+{
+ const char *tel_int_fmt;
+ const char *tel_dom_fmt;
+ const char *int_select;
+ const char *int_prefix;
+};
+
+
+static void
+telephone_startup (struct linereader *lr, struct localedef_t *locale,
+ int ignore_content)
+{
+ if (!ignore_content)
+ locale->categories[LC_TELEPHONE].telephone = (struct locale_telephone_t *)
+ xcalloc (1, sizeof (struct locale_telephone_t));
+
+ if (lr != NULL)
+ {
+ lr->translate_strings = 1;
+ lr->return_widestr = 0;
+ }
+}
+
+
+void
+telephone_finish (struct localedef_t *locale, const struct charmap_t *charmap)
+{
+ struct locale_telephone_t *telephone =
+ locale->categories[LC_TELEPHONE].telephone;
+ int nothing = 0;
+
+ /* Now resolve copying and also handle completely missing definitions. */
+ if (telephone == NULL)
+ {
+ /* First see whether we were supposed to copy. If yes, find the
+ actual definition. */
+ if (locale->copy_name[LC_TELEPHONE] != NULL)
+ {
+ /* Find the copying locale. This has to happen transitively since
+ the locale we are copying from might also copying another one. */
+ struct localedef_t *from = locale;
+
+ do
+ from = find_locale (LC_TELEPHONE, from->copy_name[LC_TELEPHONE],
+ from->repertoire_name, charmap);
+ while (from->categories[LC_TELEPHONE].telephone == NULL
+ && from->copy_name[LC_TELEPHONE] != NULL);
+
+ telephone = locale->categories[LC_TELEPHONE].telephone
+ = from->categories[LC_TELEPHONE].telephone;
+ }
+
+ /* If there is still no definition issue an warning and create an
+ empty one. */
+ if (telephone == NULL)
+ {
+ if (! be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+No definition for %s category found"), "LC_TELEPHONE"));
+ telephone_startup (NULL, locale, 0);
+ telephone = locale->categories[LC_TELEPHONE].telephone;
+ nothing = 1;
+ }
+ }
+
+ if (telephone->tel_int_fmt == NULL)
+ {
+ if (! nothing)
+ WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"),
+ "LC_TELEPHONE", "tel_int_fmt"));
+ /* Use as the default value the value of the i18n locale. */
+ telephone->tel_int_fmt = "+%c %a%t%l";
+ }
+ else
+ {
+ /* We must check whether the format string contains only the
+ allowed escape sequences. */
+ const char *cp = telephone->tel_int_fmt;
+
+ if (*cp == '\0')
+ WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' must not be empty"),
+ "LC_TELEPHONE", "tel_int_fmt"));
+ else
+ while (*cp != '\0')
+ {
+ if (*cp == '%')
+ {
+ if (strchr ("aAcCelt", *++cp) == NULL)
+ {
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: invalid escape sequence in field `%s'"), "LC_TELEPHONE", "tel_int_fmt"));
+ break;
+ }
+ }
+ ++cp;
+ }
+ }
+
+ if (telephone->tel_dom_fmt == NULL)
+ telephone->tel_dom_fmt = "";
+ else if (telephone->tel_dom_fmt[0] != '\0')
+ {
+ /* We must check whether the format string contains only the
+ allowed escape sequences. */
+ const char *cp = telephone->tel_dom_fmt;
+
+ while (*cp != '\0')
+ {
+ if (*cp == '%')
+ {
+ if (strchr ("aAcCelt", *++cp) == NULL)
+ {
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: invalid escape sequence in field `%s'"), "LC_TELEPHONE", "tel_dom_fmt"));
+ break;
+ }
+ }
+ ++cp;
+ }
+ }
+
+#define TEST_ELEM(cat) \
+ if (telephone->cat == NULL) \
+ { \
+ if (verbose && ! nothing) \
+ WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), \
+ "LC_TELEPHONE", #cat)); \
+ telephone->cat = ""; \
+ }
+
+ TEST_ELEM (int_select);
+ TEST_ELEM (int_prefix);
+}
+
+
+void
+telephone_output (struct localedef_t *locale, const struct charmap_t *charmap,
+ const char *output_path)
+{
+ struct locale_telephone_t *telephone =
+ locale->categories[LC_TELEPHONE].telephone;
+ struct locale_file file;
+
+ init_locale_data (&file, _NL_ITEM_INDEX (_NL_NUM_LC_TELEPHONE));
+ add_locale_string (&file, telephone->tel_int_fmt);
+ add_locale_string (&file, telephone->tel_dom_fmt);
+ add_locale_string (&file, telephone->int_select);
+ add_locale_string (&file, telephone->int_prefix);
+ add_locale_string (&file, charmap->code_set_name);
+ write_locale_data (output_path, LC_TELEPHONE, "LC_TELEPHONE", &file);
+}
+
+
+/* The parser for the LC_TELEPHONE section of the locale definition. */
+void
+telephone_read (struct linereader *ldfile, struct localedef_t *result,
+ const struct charmap_t *charmap, const char *repertoire_name,
+ int ignore_content)
+{
+ struct locale_telephone_t *telephone;
+ struct token *now;
+ struct token *arg;
+ enum token_t nowtok;
+
+ /* The rest of the line containing `LC_TELEPHONE' must be free. */
+ lr_ignore_rest (ldfile, 1);
+
+ do
+ {
+ now = lr_token (ldfile, charmap, result, NULL, verbose);
+ nowtok = now->tok;
+ }
+ while (nowtok == tok_eol);
+
+ /* If we see `copy' now we are almost done. */
+ if (nowtok == tok_copy)
+ {
+ handle_copy (ldfile, charmap, repertoire_name, result, tok_lc_telephone,
+ LC_TELEPHONE, "LC_TELEPHONE", ignore_content);
+ return;
+ }
+
+ /* Prepare the data structures. */
+ telephone_startup (ldfile, result, ignore_content);
+ telephone = result->categories[LC_TELEPHONE].telephone;
+
+ while (1)
+ {
+ /* Of course we don't proceed beyond the end of file. */
+ if (nowtok == tok_eof)
+ break;
+
+ /* Ingore empty lines. */
+ if (nowtok == tok_eol)
+ {
+ now = lr_token (ldfile, charmap, result, NULL, verbose);
+ nowtok = now->tok;
+ continue;
+ }
+
+ switch (nowtok)
+ {
+#define STR_ELEM(cat) \
+ case tok_##cat: \
+ /* Ignore the rest of the line if we don't need the input of \
+ this line. */ \
+ if (ignore_content) \
+ { \
+ lr_ignore_rest (ldfile, 0); \
+ break; \
+ } \
+ \
+ arg = lr_token (ldfile, charmap, result, NULL, verbose); \
+ if (arg->tok != tok_string) \
+ goto err_label; \
+ if (telephone->cat != NULL) \
+ lr_error (ldfile, _("%s: field `%s' declared more than once"), \
+ "LC_TELEPHONE", #cat); \
+ else if (!ignore_content && arg->val.str.startmb == NULL) \
+ { \
+ lr_error (ldfile, _("%s: unknown character in field `%s'"), \
+ "LC_TELEPHONE", #cat); \
+ telephone->cat = ""; \
+ } \
+ else if (!ignore_content) \
+ telephone->cat = arg->val.str.startmb; \
+ break
+
+ STR_ELEM (tel_int_fmt);
+ STR_ELEM (tel_dom_fmt);
+ STR_ELEM (int_select);
+ STR_ELEM (int_prefix);
+
+ case tok_end:
+ /* Next we assume `LC_TELEPHONE'. */
+ arg = lr_token (ldfile, charmap, result, NULL, verbose);
+ if (arg->tok == tok_eof)
+ break;
+ if (arg->tok == tok_eol)
+ lr_error (ldfile, _("%s: incomplete `END' line"), "LC_TELEPHONE");
+ else if (arg->tok != tok_lc_telephone)
+ lr_error (ldfile, _("\
+%1$s: definition does not end with `END %1$s'"), "LC_TELEPHONE");
+ lr_ignore_rest (ldfile, arg->tok == tok_lc_telephone);
+ return;
+
+ default:
+ err_label:
+ SYNTAX_ERROR (_("%s: syntax error"), "LC_TELEPHONE");
+ }
+
+ /* Prepare for the next round. */
+ now = lr_token (ldfile, charmap, result, NULL, verbose);
+ nowtok = now->tok;
+ }
+
+ /* When we come here we reached the end of the file. */
+ lr_error (ldfile, _("%s: premature end of file"), "LC_TELEPHONE");
+}
diff --git a/REORG.TODO/locale/programs/ld-time.c b/REORG.TODO/locale/programs/ld-time.c
new file mode 100644
index 0000000000..32e9c41e35
--- /dev/null
+++ b/REORG.TODO/locale/programs/ld-time.c
@@ -0,0 +1,964 @@
+/* Copyright (C) 1995-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <byteswap.h>
+#include <langinfo.h>
+#include <stdlib.h>
+#include <string.h>
+#include <wchar.h>
+#include <stdint.h>
+#include <sys/uio.h>
+
+#include <assert.h>
+
+#include "localedef.h"
+#include "linereader.h"
+#include "localeinfo.h"
+#include "locfile.h"
+
+
+/* Entry describing an entry of the era specification. */
+struct era_data
+{
+ int32_t direction;
+ int32_t offset;
+ int32_t start_date[3];
+ int32_t stop_date[3];
+ const char *name;
+ const char *format;
+ uint32_t *wname;
+ uint32_t *wformat;
+};
+
+
+/* The real definition of the struct for the LC_TIME locale. */
+struct locale_time_t
+{
+ const char *abday[7];
+ const uint32_t *wabday[7];
+ int abday_defined;
+ const char *day[7];
+ const uint32_t *wday[7];
+ int day_defined;
+ const char *abmon[12];
+ const uint32_t *wabmon[12];
+ int abmon_defined;
+ const char *mon[12];
+ const uint32_t *wmon[12];
+ int mon_defined;
+ const char *am_pm[2];
+ const uint32_t *wam_pm[2];
+ int am_pm_defined;
+ const char *d_t_fmt;
+ const uint32_t *wd_t_fmt;
+ const char *d_fmt;
+ const uint32_t *wd_fmt;
+ const char *t_fmt;
+ const uint32_t *wt_fmt;
+ const char *t_fmt_ampm;
+ const uint32_t *wt_fmt_ampm;
+ const char **era;
+ const uint32_t **wera;
+ uint32_t num_era;
+ const char *era_year;
+ const uint32_t *wera_year;
+ const char *era_d_t_fmt;
+ const uint32_t *wera_d_t_fmt;
+ const char *era_t_fmt;
+ const uint32_t *wera_t_fmt;
+ const char *era_d_fmt;
+ const uint32_t *wera_d_fmt;
+ const char *alt_digits[100];
+ const uint32_t *walt_digits[100];
+ const char *date_fmt;
+ const uint32_t *wdate_fmt;
+ int alt_digits_defined;
+ unsigned char week_ndays;
+ uint32_t week_1stday;
+ unsigned char week_1stweek;
+ unsigned char first_weekday;
+ unsigned char first_workday;
+ unsigned char cal_direction;
+ const char *timezone;
+ const uint32_t *wtimezone;
+
+ struct era_data *era_entries;
+};
+
+
+/* This constant is used to represent an empty wide character string. */
+static const uint32_t empty_wstr[1] = { 0 };
+
+
+static void
+time_startup (struct linereader *lr, struct localedef_t *locale,
+ int ignore_content)
+{
+ if (!ignore_content)
+ locale->categories[LC_TIME].time =
+ (struct locale_time_t *) xcalloc (1, sizeof (struct locale_time_t));
+
+ if (lr != NULL)
+ {
+ lr->translate_strings = 1;
+ lr->return_widestr = 1;
+ }
+}
+
+
+void
+time_finish (struct localedef_t *locale, const struct charmap_t *charmap)
+{
+ struct locale_time_t *time = locale->categories[LC_TIME].time;
+ int nothing = 0;
+
+ /* Now resolve copying and also handle completely missing definitions. */
+ if (time == NULL)
+ {
+ /* First see whether we were supposed to copy. If yes, find the
+ actual definition. */
+ if (locale->copy_name[LC_TIME] != NULL)
+ {
+ /* Find the copying locale. This has to happen transitively since
+ the locale we are copying from might also copying another one. */
+ struct localedef_t *from = locale;
+
+ do
+ from = find_locale (LC_TIME, from->copy_name[LC_TIME],
+ from->repertoire_name, charmap);
+ while (from->categories[LC_TIME].time == NULL
+ && from->copy_name[LC_TIME] != NULL);
+
+ time = locale->categories[LC_TIME].time
+ = from->categories[LC_TIME].time;
+ }
+
+ /* If there is still no definition issue an warning and create an
+ empty one. */
+ if (time == NULL)
+ {
+ if (! be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+No definition for %s category found"), "LC_TIME"));
+ time_startup (NULL, locale, 0);
+ time = locale->categories[LC_TIME].time;
+ nothing = 1;
+ }
+ }
+
+#define noparen(arg1, argn...) arg1, ##argn
+#define TESTARR_ELEM(cat, val) \
+ if (!time->cat##_defined) \
+ { \
+ const char *initval[] = { noparen val }; \
+ unsigned int i; \
+ \
+ if (! be_quiet && ! nothing) \
+ WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), \
+ "LC_TIME", #cat)); \
+ \
+ for (i = 0; i < sizeof (initval) / sizeof (initval[0]); ++i) \
+ time->cat[i] = initval[i]; \
+ }
+
+ TESTARR_ELEM (abday, ( "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" ));
+ TESTARR_ELEM (day, ( "Sunday", "Monday", "Tuesday", "Wednesday",
+ "Thursday", "Friday", "Saturday" ));
+ TESTARR_ELEM (abmon, ( "Jan", "Feb", "Mar", "Apr", "May", "Jun",
+ "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" ));
+ TESTARR_ELEM (mon, ( "January", "February", "March", "April",
+ "May", "June", "July", "August",
+ "September", "October", "November", "December" ));
+ TESTARR_ELEM (am_pm, ( "AM", "PM" ));
+
+#define TEST_ELEM(cat, initval) \
+ if (time->cat == NULL) \
+ { \
+ if (! be_quiet && ! nothing) \
+ WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), \
+ "LC_TIME", #cat)); \
+ \
+ time->cat = initval; \
+ }
+
+ TEST_ELEM (d_t_fmt, "%a %b %e %H:%M:%S %Y");
+ TEST_ELEM (d_fmt, "%m/%d/%y");
+ TEST_ELEM (t_fmt, "%H:%M:%S");
+
+ /* According to C.Y.Alexis Cheng <alexis@vnet.ibm.com> the T_FMT_AMPM
+ field is optional. */
+ if (time->t_fmt_ampm == NULL)
+ {
+ if (time->am_pm[0][0] == '\0' && time->am_pm[1][0] == '\0')
+ {
+ /* No AM/PM strings defined, use the 24h format as default. */
+ time->t_fmt_ampm = time->t_fmt;
+ time->wt_fmt_ampm = time->wt_fmt;
+ }
+ else
+ {
+ time->t_fmt_ampm = "%I:%M:%S %p";
+ time->wt_fmt_ampm = (const uint32_t *) L"%I:%M:%S %p";
+ }
+ }
+
+ /* Now process the era entries. */
+ if (time->num_era != 0)
+ {
+ const int days_per_month[12] = { 31, 29, 31, 30, 31, 30,
+ 31, 31, 30, 31 ,30, 31 };
+ size_t idx;
+ wchar_t *wstr;
+
+ time->era_entries =
+ (struct era_data *) xmalloc (time->num_era
+ * sizeof (struct era_data));
+
+ for (idx = 0; idx < time->num_era; ++idx)
+ {
+ size_t era_len = strlen (time->era[idx]);
+ char *str = xmalloc ((era_len + 1 + 3) & ~3);
+ char *endp;
+
+ memcpy (str, time->era[idx], era_len + 1);
+
+ /* First character must be + or - for the direction. */
+ if (*str != '+' && *str != '-')
+ {
+ if (!be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: direction flag in string %Zd in `era' field is not '+' nor '-'"),
+ "LC_TIME", idx + 1));
+ /* Default arbitrarily to '+'. */
+ time->era_entries[idx].direction = '+';
+ }
+ else
+ time->era_entries[idx].direction = *str;
+ if (*++str != ':')
+ {
+ if (!be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: direction flag in string %Zd in `era' field is not a single character"),
+ "LC_TIME", idx + 1));
+ (void) strsep (&str, ":");
+ }
+ else
+ ++str;
+
+ /* Now the offset year. */
+ time->era_entries[idx].offset = strtol (str, &endp, 10);
+ if (endp == str)
+ {
+ if (!be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: invalid number for offset in string %Zd in `era' field"),
+ "LC_TIME", idx + 1));
+ (void) strsep (&str, ":");
+ }
+ else if (*endp != ':')
+ {
+ if (!be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: garbage at end of offset value in string %Zd in `era' field"),
+ "LC_TIME", idx + 1));
+ (void) strsep (&str, ":");
+ }
+ else
+ str = endp + 1;
+
+ /* Next is the starting date in ISO format. */
+ if (strncmp (str, "-*", 2) == 0)
+ {
+ time->era_entries[idx].start_date[0] =
+ time->era_entries[idx].start_date[1] =
+ time->era_entries[idx].start_date[2] = 0x80000000;
+ if (str[2] != ':')
+ goto garbage_start_date;
+ str += 3;
+ }
+ else if (strncmp (str, "+*", 2) == 0)
+ {
+ time->era_entries[idx].start_date[0] =
+ time->era_entries[idx].start_date[1] =
+ time->era_entries[idx].start_date[2] = 0x7fffffff;
+ if (str[2] != ':')
+ goto garbage_start_date;
+ str += 3;
+ }
+ else
+ {
+ time->era_entries[idx].start_date[0] = strtol (str, &endp, 10);
+ if (endp == str || *endp != '/')
+ goto invalid_start_date;
+ else
+ str = endp + 1;
+ time->era_entries[idx].start_date[0] -= 1900;
+ /* year -1 represent 1 B.C. (not -1 A.D.) */
+ if (time->era_entries[idx].start_date[0] < -1900)
+ ++time->era_entries[idx].start_date[0];
+
+ time->era_entries[idx].start_date[1] = strtol (str, &endp, 10);
+ if (endp == str || *endp != '/')
+ goto invalid_start_date;
+ else
+ str = endp + 1;
+ time->era_entries[idx].start_date[1] -= 1;
+
+ time->era_entries[idx].start_date[2] = strtol (str, &endp, 10);
+ if (endp == str)
+ {
+ invalid_start_date:
+ if (!be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: invalid starting date in string %Zd in `era' field"),
+ "LC_TIME", idx + 1));
+ (void) strsep (&str, ":");
+ }
+ else if (*endp != ':')
+ {
+ garbage_start_date:
+ if (!be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: garbage at end of starting date in string %Zd in `era' field "),
+ "LC_TIME", idx + 1));
+ (void) strsep (&str, ":");
+ }
+ else
+ {
+ str = endp + 1;
+
+ /* Check for valid value. */
+ if ((time->era_entries[idx].start_date[1] < 0
+ || time->era_entries[idx].start_date[1] >= 12
+ || time->era_entries[idx].start_date[2] < 0
+ || (time->era_entries[idx].start_date[2]
+ > days_per_month[time->era_entries[idx].start_date[1]])
+ || (time->era_entries[idx].start_date[1] == 2
+ && time->era_entries[idx].start_date[2] == 29
+ && !__isleap (time->era_entries[idx].start_date[0])))
+ && !be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: starting date is invalid in string %Zd in `era' field"),
+ "LC_TIME", idx + 1));
+ }
+ }
+
+ /* Next is the stopping date in ISO format. */
+ if (strncmp (str, "-*", 2) == 0)
+ {
+ time->era_entries[idx].stop_date[0] =
+ time->era_entries[idx].stop_date[1] =
+ time->era_entries[idx].stop_date[2] = 0x80000000;
+ if (str[2] != ':')
+ goto garbage_stop_date;
+ str += 3;
+ }
+ else if (strncmp (str, "+*", 2) == 0)
+ {
+ time->era_entries[idx].stop_date[0] =
+ time->era_entries[idx].stop_date[1] =
+ time->era_entries[idx].stop_date[2] = 0x7fffffff;
+ if (str[2] != ':')
+ goto garbage_stop_date;
+ str += 3;
+ }
+ else
+ {
+ time->era_entries[idx].stop_date[0] = strtol (str, &endp, 10);
+ if (endp == str || *endp != '/')
+ goto invalid_stop_date;
+ else
+ str = endp + 1;
+ time->era_entries[idx].stop_date[0] -= 1900;
+ /* year -1 represent 1 B.C. (not -1 A.D.) */
+ if (time->era_entries[idx].stop_date[0] < -1900)
+ ++time->era_entries[idx].stop_date[0];
+
+ time->era_entries[idx].stop_date[1] = strtol (str, &endp, 10);
+ if (endp == str || *endp != '/')
+ goto invalid_stop_date;
+ else
+ str = endp + 1;
+ time->era_entries[idx].stop_date[1] -= 1;
+
+ time->era_entries[idx].stop_date[2] = strtol (str, &endp, 10);
+ if (endp == str)
+ {
+ invalid_stop_date:
+ if (!be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: invalid stopping date in string %Zd in `era' field"),
+ "LC_TIME", idx + 1));
+ (void) strsep (&str, ":");
+ }
+ else if (*endp != ':')
+ {
+ garbage_stop_date:
+ if (!be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: garbage at end of stopping date in string %Zd in `era' field"),
+ "LC_TIME", idx + 1));
+ (void) strsep (&str, ":");
+ }
+ else
+ {
+ str = endp + 1;
+
+ /* Check for valid value. */
+ if ((time->era_entries[idx].stop_date[1] < 0
+ || time->era_entries[idx].stop_date[1] >= 12
+ || time->era_entries[idx].stop_date[2] < 0
+ || (time->era_entries[idx].stop_date[2]
+ > days_per_month[time->era_entries[idx].stop_date[1]])
+ || (time->era_entries[idx].stop_date[1] == 2
+ && time->era_entries[idx].stop_date[2] == 29
+ && !__isleap (time->era_entries[idx].stop_date[0])))
+ && !be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: invalid stopping date in string %Zd in `era' field"),
+ "LC_TIME", idx + 1));
+ }
+ }
+
+ if (str == NULL || *str == '\0')
+ {
+ if (!be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: missing era name in string %Zd in `era' field"), "LC_TIME", idx + 1));
+ time->era_entries[idx].name =
+ time->era_entries[idx].format = "";
+ }
+ else
+ {
+ time->era_entries[idx].name = strsep (&str, ":");
+
+ if (str == NULL || *str == '\0')
+ {
+ if (!be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: missing era format in string %Zd in `era' field"),
+ "LC_TIME", idx + 1));
+ time->era_entries[idx].name =
+ time->era_entries[idx].format = "";
+ }
+ else
+ time->era_entries[idx].format = str;
+ }
+
+ /* Now generate the wide character name and format. */
+ wstr = wcschr ((wchar_t *) time->wera[idx], L':');/* end direction */
+ wstr = wstr ? wcschr (wstr + 1, L':') : NULL; /* end offset */
+ wstr = wstr ? wcschr (wstr + 1, L':') : NULL; /* end start */
+ wstr = wstr ? wcschr (wstr + 1, L':') : NULL; /* end end */
+ if (wstr != NULL)
+ {
+ time->era_entries[idx].wname = (uint32_t *) wstr + 1;
+ wstr = wcschr (wstr + 1, L':'); /* end name */
+ if (wstr != NULL)
+ {
+ *wstr = L'\0';
+ time->era_entries[idx].wformat = (uint32_t *) wstr + 1;
+ }
+ else
+ time->era_entries[idx].wname =
+ time->era_entries[idx].wformat = (uint32_t *) L"";
+ }
+ else
+ time->era_entries[idx].wname =
+ time->era_entries[idx].wformat = (uint32_t *) L"";
+ }
+ }
+
+ /* Set up defaults based on ISO 30112 WD10 [2014]. */
+ if (time->week_ndays == 0)
+ time->week_ndays = 7;
+
+ if (time->week_1stday == 0)
+ time->week_1stday = 19971130;
+
+ if (time->week_1stweek == 0)
+ time->week_1stweek = 7;
+
+ if (time->week_1stweek > time->week_ndays)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: third operand for value of field `%s' must not be larger than %d"),
+ "LC_TIME", "week", 7));
+
+ if (time->first_weekday == '\0')
+ /* The definition does not specify this so the default is used. */
+ time->first_weekday = 1;
+ else if (time->first_weekday > time->week_ndays)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: values for field `%s' must not be larger than %d"),
+ "LC_TIME", "first_weekday", 7));
+
+ if (time->first_workday == '\0')
+ /* The definition does not specify this so the default is used. */
+ time->first_workday = 2;
+ else if (time->first_workday > time->week_ndays)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: values for field `%s' must not be larger than %d"),
+ "LC_TIME", "first_workday", 7));
+
+ if (time->cal_direction == '\0')
+ /* The definition does not specify this so the default is used. */
+ time->cal_direction = 1;
+ else if (time->cal_direction > 3)
+ WITH_CUR_LOCALE (error (0, 0, _("\
+%s: values for field `%s' must not be larger than %d"),
+ "LC_TIME", "cal_direction", 3));
+
+ /* XXX We don't perform any tests on the timezone value since this is
+ simply useless, stupid $&$!@... */
+ if (time->timezone == NULL)
+ time->timezone = "";
+
+ if (time->date_fmt == NULL)
+ time->date_fmt = "%a %b %e %H:%M:%S %Z %Y";
+ if (time->wdate_fmt == NULL)
+ time->wdate_fmt = (const uint32_t *) L"%a %b %e %H:%M:%S %Z %Y";
+}
+
+
+void
+time_output (struct localedef_t *locale, const struct charmap_t *charmap,
+ const char *output_path)
+{
+ struct locale_time_t *time = locale->categories[LC_TIME].time;
+ struct locale_file file;
+ size_t num, n;
+
+ init_locale_data (&file, _NL_ITEM_INDEX (_NL_NUM_LC_TIME));
+
+ /* The ab'days. */
+ for (n = 0; n < 7; ++n)
+ add_locale_string (&file, time->abday[n] ?: "");
+
+ /* The days. */
+ for (n = 0; n < 7; ++n)
+ add_locale_string (&file, time->day[n] ?: "");
+
+ /* The ab'mons. */
+ for (n = 0; n < 12; ++n)
+ add_locale_string (&file, time->abmon[n] ?: "");
+
+ /* The mons. */
+ for (n = 0; n < 12; ++n)
+ add_locale_string (&file, time->mon[n] ?: "");
+
+ /* AM/PM. */
+ for (n = 0; n < 2; ++n)
+ add_locale_string (&file, time->am_pm[n]);
+
+ add_locale_string (&file, time->d_t_fmt ?: "");
+ add_locale_string (&file, time->d_fmt ?: "");
+ add_locale_string (&file, time->t_fmt ?: "");
+ add_locale_string (&file, time->t_fmt_ampm ?: "");
+
+ start_locale_structure (&file);
+ for (num = 0; num < time->num_era; ++num)
+ add_locale_string (&file, time->era[num]);
+ end_locale_structure (&file);
+
+ add_locale_string (&file, time->era_year ?: "");
+ add_locale_string (&file, time->era_d_fmt ?: "");
+
+ start_locale_structure (&file);
+ for (num = 0; num < 100; ++num)
+ add_locale_string (&file, time->alt_digits[num] ?: "");
+ end_locale_structure (&file);
+
+ add_locale_string (&file, time->era_d_t_fmt ?: "");
+ add_locale_string (&file, time->era_t_fmt ?: "");
+ add_locale_uint32 (&file, time->num_era);
+
+ start_locale_structure (&file);
+ for (num = 0; num < time->num_era; ++num)
+ {
+ add_locale_uint32 (&file, time->era_entries[num].direction);
+ add_locale_uint32 (&file, time->era_entries[num].offset);
+ add_locale_uint32 (&file, time->era_entries[num].start_date[0]);
+ add_locale_uint32 (&file, time->era_entries[num].start_date[1]);
+ add_locale_uint32 (&file, time->era_entries[num].start_date[2]);
+ add_locale_uint32 (&file, time->era_entries[num].stop_date[0]);
+ add_locale_uint32 (&file, time->era_entries[num].stop_date[1]);
+ add_locale_uint32 (&file, time->era_entries[num].stop_date[2]);
+ add_locale_string (&file, time->era_entries[num].name);
+ add_locale_string (&file, time->era_entries[num].format);
+ add_locale_wstring (&file, time->era_entries[num].wname);
+ add_locale_wstring (&file, time->era_entries[num].wformat);
+ }
+ end_locale_structure (&file);
+
+ /* The wide character ab'days. */
+ for (n = 0; n < 7; ++n)
+ add_locale_wstring (&file, time->wabday[n] ?: empty_wstr);
+
+ /* The wide character days. */
+ for (n = 0; n < 7; ++n)
+ add_locale_wstring (&file, time->wday[n] ?: empty_wstr);
+
+ /* The wide character ab'mons. */
+ for (n = 0; n < 12; ++n)
+ add_locale_wstring (&file, time->wabmon[n] ?: empty_wstr);
+
+ /* The wide character mons. */
+ for (n = 0; n < 12; ++n)
+ add_locale_wstring (&file, time->wmon[n] ?: empty_wstr);
+
+ /* Wide character AM/PM. */
+ for (n = 0; n < 2; ++n)
+ add_locale_wstring (&file, time->wam_pm[n] ?: empty_wstr);
+
+ add_locale_wstring (&file, time->wd_t_fmt ?: empty_wstr);
+ add_locale_wstring (&file, time->wd_fmt ?: empty_wstr);
+ add_locale_wstring (&file, time->wt_fmt ?: empty_wstr);
+ add_locale_wstring (&file, time->wt_fmt_ampm ?: empty_wstr);
+ add_locale_wstring (&file, time->wera_year ?: empty_wstr);
+ add_locale_wstring (&file, time->wera_d_fmt ?: empty_wstr);
+
+ start_locale_structure (&file);
+ for (num = 0; num < 100; ++num)
+ add_locale_wstring (&file, time->walt_digits[num] ?: empty_wstr);
+ end_locale_structure (&file);
+
+ add_locale_wstring (&file, time->wera_d_t_fmt ?: empty_wstr);
+ add_locale_wstring (&file, time->wera_t_fmt ?: empty_wstr);
+ add_locale_char (&file, time->week_ndays);
+ add_locale_uint32 (&file, time->week_1stday);
+ add_locale_char (&file, time->week_1stweek);
+ add_locale_char (&file, time->first_weekday);
+ add_locale_char (&file, time->first_workday);
+ add_locale_char (&file, time->cal_direction);
+ add_locale_string (&file, time->timezone);
+ add_locale_string (&file, time->date_fmt);
+ add_locale_wstring (&file, time->wdate_fmt);
+ add_locale_string (&file, charmap->code_set_name);
+ write_locale_data (output_path, LC_TIME, "LC_TIME", &file);
+}
+
+
+/* The parser for the LC_TIME section of the locale definition. */
+void
+time_read (struct linereader *ldfile, struct localedef_t *result,
+ const struct charmap_t *charmap, const char *repertoire_name,
+ int ignore_content)
+{
+ struct repertoire_t *repertoire = NULL;
+ struct locale_time_t *time;
+ struct token *now;
+ enum token_t nowtok;
+ size_t cnt;
+
+ /* Get the repertoire we have to use. */
+ if (repertoire_name != NULL)
+ repertoire = repertoire_read (repertoire_name);
+
+ /* The rest of the line containing `LC_TIME' must be free. */
+ lr_ignore_rest (ldfile, 1);
+
+
+ do
+ {
+ now = lr_token (ldfile, charmap, result, repertoire, verbose);
+ nowtok = now->tok;
+ }
+ while (nowtok == tok_eol);
+
+ /* If we see `copy' now we are almost done. */
+ if (nowtok == tok_copy)
+ {
+ handle_copy (ldfile, charmap, repertoire_name, result, tok_lc_time,
+ LC_TIME, "LC_TIME", ignore_content);
+ return;
+ }
+
+ /* Prepare the data structures. */
+ time_startup (ldfile, result, ignore_content);
+ time = result->categories[LC_TIME].time;
+
+ while (1)
+ {
+ /* Of course we don't proceed beyond the end of file. */
+ if (nowtok == tok_eof)
+ break;
+
+ /* Ingore empty lines. */
+ if (nowtok == tok_eol)
+ {
+ now = lr_token (ldfile, charmap, result, repertoire, verbose);
+ nowtok = now->tok;
+ continue;
+ }
+
+ switch (nowtok)
+ {
+#define STRARR_ELEM(cat, min, max) \
+ case tok_##cat: \
+ /* Ignore the rest of the line if we don't need the input of \
+ this line. */ \
+ if (ignore_content) \
+ { \
+ lr_ignore_rest (ldfile, 0); \
+ break; \
+ } \
+ \
+ for (cnt = 0; cnt < max; ++cnt) \
+ { \
+ now = lr_token (ldfile, charmap, result, repertoire, verbose); \
+ if (now->tok == tok_eol) \
+ { \
+ if (cnt < min) \
+ lr_error (ldfile, _("%s: too few values for field `%s'"), \
+ "LC_TIME", #cat); \
+ if (!ignore_content) \
+ do \
+ { \
+ time->cat[cnt] = ""; \
+ time->w##cat[cnt] = empty_wstr; \
+ } \
+ while (++cnt < max); \
+ break; \
+ } \
+ else if (now->tok != tok_string) \
+ goto err_label; \
+ else if (!ignore_content && (now->val.str.startmb == NULL \
+ || now->val.str.startwc == NULL)) \
+ { \
+ lr_error (ldfile, _("%s: unknown character in field `%s'"), \
+ "LC_TIME", #cat); \
+ time->cat[cnt] = ""; \
+ time->w##cat[cnt] = empty_wstr; \
+ } \
+ else if (!ignore_content) \
+ { \
+ time->cat[cnt] = now->val.str.startmb; \
+ time->w##cat[cnt] = now->val.str.startwc; \
+ } \
+ \
+ /* Match the semicolon. */ \
+ now = lr_token (ldfile, charmap, result, repertoire, verbose); \
+ if (now->tok != tok_semicolon && now->tok != tok_eol) \
+ break; \
+ } \
+ if (now->tok != tok_eol) \
+ { \
+ while (!ignore_content && cnt < min) \
+ { \
+ time->cat[cnt] = ""; \
+ time->w##cat[cnt++] = empty_wstr; \
+ } \
+ \
+ if (now->tok == tok_semicolon) \
+ { \
+ now = lr_token (ldfile, charmap, result, repertoire, \
+ verbose); \
+ if (now->tok == tok_eol) \
+ lr_error (ldfile, _("extra trailing semicolon")); \
+ else if (now->tok == tok_string) \
+ { \
+ lr_error (ldfile, _("\
+%s: too many values for field `%s'"), \
+ "LC_TIME", #cat); \
+ lr_ignore_rest (ldfile, 0); \
+ } \
+ else \
+ goto err_label; \
+ } \
+ else \
+ goto err_label; \
+ } \
+ time->cat##_defined = 1; \
+ break
+
+ STRARR_ELEM (abday, 7, 7);
+ STRARR_ELEM (day, 7, 7);
+ STRARR_ELEM (abmon, 12, 12);
+ STRARR_ELEM (mon, 12, 12);
+ STRARR_ELEM (am_pm, 2, 2);
+ STRARR_ELEM (alt_digits, 0, 100);
+
+ case tok_era:
+ /* Ignore the rest of the line if we don't need the input of
+ this line. */
+ if (ignore_content)
+ {
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+ do
+ {
+ now = lr_token (ldfile, charmap, result, repertoire, verbose);
+ if (now->tok != tok_string)
+ goto err_label;
+ if (!ignore_content && (now->val.str.startmb == NULL
+ || now->val.str.startwc == NULL))
+ {
+ lr_error (ldfile, _("%s: unknown character in field `%s'"),
+ "LC_TIME", "era");
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+ if (!ignore_content)
+ {
+ time->era = xrealloc (time->era,
+ (time->num_era + 1) * sizeof (char *));
+ time->era[time->num_era] = now->val.str.startmb;
+
+ time->wera = xrealloc (time->wera,
+ (time->num_era + 1)
+ * sizeof (char *));
+ time->wera[time->num_era++] = now->val.str.startwc;
+ }
+ now = lr_token (ldfile, charmap, result, repertoire, verbose);
+ if (now->tok != tok_eol && now->tok != tok_semicolon)
+ goto err_label;
+ }
+ while (now->tok == tok_semicolon);
+ break;
+
+#define STR_ELEM(cat) \
+ case tok_##cat: \
+ /* Ignore the rest of the line if we don't need the input of \
+ this line. */ \
+ if (ignore_content) \
+ { \
+ lr_ignore_rest (ldfile, 0); \
+ break; \
+ } \
+ \
+ now = lr_token (ldfile, charmap, result, repertoire, verbose); \
+ if (now->tok != tok_string) \
+ goto err_label; \
+ else if (time->cat != NULL) \
+ lr_error (ldfile, _("\
+%s: field `%s' declared more than once"), "LC_TIME", #cat); \
+ else if (!ignore_content && (now->val.str.startmb == NULL \
+ || now->val.str.startwc == NULL)) \
+ { \
+ lr_error (ldfile, _("%s: unknown character in field `%s'"), \
+ "LC_TIME", #cat); \
+ time->cat = ""; \
+ time->w##cat = empty_wstr; \
+ } \
+ else if (!ignore_content) \
+ { \
+ time->cat = now->val.str.startmb; \
+ time->w##cat = now->val.str.startwc; \
+ } \
+ break
+
+ STR_ELEM (d_t_fmt);
+ STR_ELEM (d_fmt);
+ STR_ELEM (t_fmt);
+ STR_ELEM (t_fmt_ampm);
+ STR_ELEM (era_year);
+ STR_ELEM (era_d_t_fmt);
+ STR_ELEM (era_d_fmt);
+ STR_ELEM (era_t_fmt);
+ STR_ELEM (timezone);
+ STR_ELEM (date_fmt);
+
+#define INT_ELEM(cat) \
+ case tok_##cat: \
+ /* Ignore the rest of the line if we don't need the input of \
+ this line. */ \
+ if (ignore_content) \
+ { \
+ lr_ignore_rest (ldfile, 0); \
+ break; \
+ } \
+ \
+ now = lr_token (ldfile, charmap, result, repertoire, verbose); \
+ if (now->tok != tok_number) \
+ goto err_label; \
+ else if (time->cat != 0) \
+ lr_error (ldfile, _("%s: field `%s' declared more than once"), \
+ "LC_TIME", #cat); \
+ else if (!ignore_content) \
+ time->cat = now->val.num; \
+ break
+
+ INT_ELEM (first_weekday);
+ INT_ELEM (first_workday);
+ INT_ELEM (cal_direction);
+
+ case tok_week:
+ /* Ignore the rest of the line if we don't need the input of
+ this line. */
+ if (ignore_content)
+ {
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+
+ now = lr_token (ldfile, charmap, result, repertoire, verbose);
+ if (now->tok != tok_number)
+ goto err_label;
+ time->week_ndays = now->val.num;
+
+ now = lr_token (ldfile, charmap, result, repertoire, verbose);
+ if (now->tok != tok_semicolon)
+ goto err_label;
+
+ now = lr_token (ldfile, charmap, result, repertoire, verbose);
+ if (now->tok != tok_number)
+ goto err_label;
+ time->week_1stday = now->val.num;
+
+ now = lr_token (ldfile, charmap, result, repertoire, verbose);
+ if (now->tok != tok_semicolon)
+ goto err_label;
+
+ now = lr_token (ldfile, charmap, result, repertoire, verbose);
+ if (now->tok != tok_number)
+ goto err_label;
+ time->week_1stweek = now->val.num;
+
+ lr_ignore_rest (ldfile, 1);
+ break;
+
+ case tok_end:
+ /* Next we assume `LC_TIME'. */
+ now = lr_token (ldfile, charmap, result, repertoire, verbose);
+ if (now->tok == tok_eof)
+ break;
+ if (now->tok == tok_eol)
+ lr_error (ldfile, _("%s: incomplete `END' line"), "LC_TIME");
+ else if (now->tok != tok_lc_time)
+ lr_error (ldfile, _("\
+%1$s: definition does not end with `END %1$s'"), "LC_TIME");
+ lr_ignore_rest (ldfile, now->tok == tok_lc_time);
+ return;
+
+ default:
+ err_label:
+ SYNTAX_ERROR (_("%s: syntax error"), "LC_TIME");
+ }
+
+ /* Prepare for the next round. */
+ now = lr_token (ldfile, charmap, result, repertoire, verbose);
+ nowtok = now->tok;
+ }
+
+ /* When we come here we reached the end of the file. */
+ lr_error (ldfile, _("%s: premature end of file"), "LC_TIME");
+}
diff --git a/REORG.TODO/locale/programs/linereader.c b/REORG.TODO/locale/programs/linereader.c
new file mode 100644
index 0000000000..52b340963a
--- /dev/null
+++ b/REORG.TODO/locale/programs/linereader.c
@@ -0,0 +1,886 @@
+/* Copyright (C) 1996-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@gnu.org>, 1996.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#include <libintl.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+
+#include "localedef.h"
+#include "charmap.h"
+#include "error.h"
+#include "linereader.h"
+#include "locfile.h"
+
+/* Prototypes for local functions. */
+static struct token *get_toplvl_escape (struct linereader *lr);
+static struct token *get_symname (struct linereader *lr);
+static struct token *get_ident (struct linereader *lr);
+static struct token *get_string (struct linereader *lr,
+ const struct charmap_t *charmap,
+ struct localedef_t *locale,
+ const struct repertoire_t *repertoire,
+ int verbose);
+
+
+struct linereader *
+lr_open (const char *fname, kw_hash_fct_t hf)
+{
+ FILE *fp;
+
+ if (fname == NULL || strcmp (fname, "-") == 0
+ || strcmp (fname, "/dev/stdin") == 0)
+ return lr_create (stdin, "<stdin>", hf);
+ else
+ {
+ fp = fopen (fname, "rm");
+ if (fp == NULL)
+ return NULL;
+ return lr_create (fp, fname, hf);
+ }
+}
+
+struct linereader *
+lr_create (FILE *fp, const char *fname, kw_hash_fct_t hf)
+{
+ struct linereader *result;
+ int n;
+
+ result = (struct linereader *) xmalloc (sizeof (*result));
+
+ result->fp = fp;
+ result->fname = xstrdup (fname);
+ result->buf = NULL;
+ result->bufsize = 0;
+ result->lineno = 1;
+ result->idx = 0;
+ result->comment_char = '#';
+ result->escape_char = '\\';
+ result->translate_strings = 1;
+ result->return_widestr = 0;
+
+ n = getdelim (&result->buf, &result->bufsize, '\n', result->fp);
+ if (n < 0)
+ {
+ int save = errno;
+ fclose (result->fp);
+ free ((char *) result->fname);
+ free (result);
+ errno = save;
+ return NULL;
+ }
+
+ if (n > 1 && result->buf[n - 2] == '\\' && result->buf[n - 1] == '\n')
+ n -= 2;
+
+ result->buf[n] = '\0';
+ result->bufact = n;
+ result->hash_fct = hf;
+
+ return result;
+}
+
+
+int
+lr_eof (struct linereader *lr)
+{
+ return lr->bufact = 0;
+}
+
+
+void
+lr_ignore_rest (struct linereader *lr, int verbose)
+{
+ if (verbose)
+ {
+ while (isspace (lr->buf[lr->idx]) && lr->buf[lr->idx] != '\n'
+ && lr->buf[lr->idx] != lr->comment_char)
+ if (lr->buf[lr->idx] == '\0')
+ {
+ if (lr_next (lr) < 0)
+ return;
+ }
+ else
+ ++lr->idx;
+
+ if (lr->buf[lr->idx] != '\n' && ! feof (lr->fp)
+ && lr->buf[lr->idx] != lr->comment_char)
+ lr_error (lr, _("trailing garbage at end of line"));
+ }
+
+ /* Ignore continued line. */
+ while (lr->bufact > 0 && lr->buf[lr->bufact - 1] != '\n')
+ if (lr_next (lr) < 0)
+ break;
+
+ lr->idx = lr->bufact;
+}
+
+
+void
+lr_close (struct linereader *lr)
+{
+ fclose (lr->fp);
+ free (lr->buf);
+ free (lr);
+}
+
+
+int
+lr_next (struct linereader *lr)
+{
+ int n;
+
+ n = getdelim (&lr->buf, &lr->bufsize, '\n', lr->fp);
+ if (n < 0)
+ return -1;
+
+ ++lr->lineno;
+
+ if (n > 1 && lr->buf[n - 2] == lr->escape_char && lr->buf[n - 1] == '\n')
+ {
+#if 0
+ /* XXX Is this correct? */
+ /* An escaped newline character is substituted with a single <SP>. */
+ --n;
+ lr->buf[n - 1] = ' ';
+#else
+ n -= 2;
+#endif
+ }
+
+ lr->buf[n] = '\0';
+ lr->bufact = n;
+ lr->idx = 0;
+
+ return 0;
+}
+
+
+/* Defined in error.c. */
+/* This variable is incremented each time `error' is called. */
+extern unsigned int error_message_count;
+
+/* The calling program should define program_name and set it to the
+ name of the executing program. */
+extern char *program_name;
+
+
+struct token *
+lr_token (struct linereader *lr, const struct charmap_t *charmap,
+ struct localedef_t *locale, const struct repertoire_t *repertoire,
+ int verbose)
+{
+ int ch;
+
+ while (1)
+ {
+ do
+ {
+ ch = lr_getc (lr);
+
+ if (ch == EOF)
+ {
+ lr->token.tok = tok_eof;
+ return &lr->token;
+ };
+
+ if (ch == '\n')
+ {
+ lr->token.tok = tok_eol;
+ return &lr->token;
+ }
+ }
+ while (isspace (ch));
+
+ if (ch != lr->comment_char)
+ break;
+
+ /* Is there an newline at the end of the buffer? */
+ if (lr->buf[lr->bufact - 1] != '\n')
+ {
+ /* No. Some people want this to mean that only the line in
+ the file not the logical, concatenated line is ignored.
+ Let's try this. */
+ lr->idx = lr->bufact;
+ continue;
+ }
+
+ /* Ignore rest of line. */
+ lr_ignore_rest (lr, 0);
+ lr->token.tok = tok_eol;
+ return &lr->token;
+ }
+
+ /* Match escape sequences. */
+ if (ch == lr->escape_char)
+ return get_toplvl_escape (lr);
+
+ /* Match ellipsis. */
+ if (ch == '.')
+ {
+ if (strncmp (&lr->buf[lr->idx], "...(2)....", 10) == 0)
+ {
+ int cnt;
+ for (cnt = 0; cnt < 10; ++cnt)
+ lr_getc (lr);
+ lr->token.tok = tok_ellipsis4_2;
+ return &lr->token;
+ }
+ if (strncmp (&lr->buf[lr->idx], "...", 3) == 0)
+ {
+ lr_getc (lr);
+ lr_getc (lr);
+ lr_getc (lr);
+ lr->token.tok = tok_ellipsis4;
+ return &lr->token;
+ }
+ if (strncmp (&lr->buf[lr->idx], "..", 2) == 0)
+ {
+ lr_getc (lr);
+ lr_getc (lr);
+ lr->token.tok = tok_ellipsis3;
+ return &lr->token;
+ }
+ if (strncmp (&lr->buf[lr->idx], ".(2)..", 6) == 0)
+ {
+ int cnt;
+ for (cnt = 0; cnt < 6; ++cnt)
+ lr_getc (lr);
+ lr->token.tok = tok_ellipsis2_2;
+ return &lr->token;
+ }
+ if (lr->buf[lr->idx] == '.')
+ {
+ lr_getc (lr);
+ lr->token.tok = tok_ellipsis2;
+ return &lr->token;
+ }
+ }
+
+ switch (ch)
+ {
+ case '<':
+ return get_symname (lr);
+
+ case '0' ... '9':
+ lr->token.tok = tok_number;
+ lr->token.val.num = ch - '0';
+
+ while (isdigit (ch = lr_getc (lr)))
+ {
+ lr->token.val.num *= 10;
+ lr->token.val.num += ch - '0';
+ }
+ if (isalpha (ch))
+ lr_error (lr, _("garbage at end of number"));
+ lr_ungetn (lr, 1);
+
+ return &lr->token;
+
+ case ';':
+ lr->token.tok = tok_semicolon;
+ return &lr->token;
+
+ case ',':
+ lr->token.tok = tok_comma;
+ return &lr->token;
+
+ case '(':
+ lr->token.tok = tok_open_brace;
+ return &lr->token;
+
+ case ')':
+ lr->token.tok = tok_close_brace;
+ return &lr->token;
+
+ case '"':
+ return get_string (lr, charmap, locale, repertoire, verbose);
+
+ case '-':
+ ch = lr_getc (lr);
+ if (ch == '1')
+ {
+ lr->token.tok = tok_minus1;
+ return &lr->token;
+ }
+ lr_ungetn (lr, 2);
+ break;
+ }
+
+ return get_ident (lr);
+}
+
+
+static struct token *
+get_toplvl_escape (struct linereader *lr)
+{
+ /* This is supposed to be a numeric value. We return the
+ numerical value and the number of bytes. */
+ size_t start_idx = lr->idx - 1;
+ unsigned char *bytes = lr->token.val.charcode.bytes;
+ size_t nbytes = 0;
+ int ch;
+
+ do
+ {
+ unsigned int byte = 0;
+ unsigned int base = 8;
+
+ ch = lr_getc (lr);
+
+ if (ch == 'd')
+ {
+ base = 10;
+ ch = lr_getc (lr);
+ }
+ else if (ch == 'x')
+ {
+ base = 16;
+ ch = lr_getc (lr);
+ }
+
+ if ((base == 16 && !isxdigit (ch))
+ || (base != 16 && (ch < '0' || ch >= (int) ('0' + base))))
+ {
+ esc_error:
+ lr->token.val.str.startmb = &lr->buf[start_idx];
+
+ while (ch != EOF && !isspace (ch))
+ ch = lr_getc (lr);
+ lr->token.val.str.lenmb = lr->idx - start_idx;
+
+ lr->token.tok = tok_error;
+ return &lr->token;
+ }
+
+ if (isdigit (ch))
+ byte = ch - '0';
+ else
+ byte = tolower (ch) - 'a' + 10;
+
+ ch = lr_getc (lr);
+ if ((base == 16 && !isxdigit (ch))
+ || (base != 16 && (ch < '0' || ch >= (int) ('0' + base))))
+ goto esc_error;
+
+ byte *= base;
+ if (isdigit (ch))
+ byte += ch - '0';
+ else
+ byte += tolower (ch) - 'a' + 10;
+
+ ch = lr_getc (lr);
+ if (base != 16 && isdigit (ch))
+ {
+ byte *= base;
+ byte += ch - '0';
+
+ ch = lr_getc (lr);
+ }
+
+ bytes[nbytes++] = byte;
+ }
+ while (ch == lr->escape_char
+ && nbytes < (int) sizeof (lr->token.val.charcode.bytes));
+
+ if (!isspace (ch))
+ lr_error (lr, _("garbage at end of character code specification"));
+
+ lr_ungetn (lr, 1);
+
+ lr->token.tok = tok_charcode;
+ lr->token.val.charcode.nbytes = nbytes;
+
+ return &lr->token;
+}
+
+
+#define ADDC(ch) \
+ do \
+ { \
+ if (bufact == bufmax) \
+ { \
+ bufmax *= 2; \
+ buf = xrealloc (buf, bufmax); \
+ } \
+ buf[bufact++] = (ch); \
+ } \
+ while (0)
+
+
+#define ADDS(s, l) \
+ do \
+ { \
+ size_t _l = (l); \
+ if (bufact + _l > bufmax) \
+ { \
+ if (bufact < _l) \
+ bufact = _l; \
+ bufmax *= 2; \
+ buf = xrealloc (buf, bufmax); \
+ } \
+ memcpy (&buf[bufact], s, _l); \
+ bufact += _l; \
+ } \
+ while (0)
+
+
+#define ADDWC(ch) \
+ do \
+ { \
+ if (buf2act == buf2max) \
+ { \
+ buf2max *= 2; \
+ buf2 = xrealloc (buf2, buf2max * 4); \
+ } \
+ buf2[buf2act++] = (ch); \
+ } \
+ while (0)
+
+
+static struct token *
+get_symname (struct linereader *lr)
+{
+ /* Symbol in brackets. We must distinguish three kinds:
+ 1. reserved words
+ 2. ISO 10646 position values
+ 3. all other. */
+ char *buf;
+ size_t bufact = 0;
+ size_t bufmax = 56;
+ const struct keyword_t *kw;
+ int ch;
+
+ buf = (char *) xmalloc (bufmax);
+
+ do
+ {
+ ch = lr_getc (lr);
+ if (ch == lr->escape_char)
+ {
+ int c2 = lr_getc (lr);
+ ADDC (c2);
+
+ if (c2 == '\n')
+ ch = '\n';
+ }
+ else
+ ADDC (ch);
+ }
+ while (ch != '>' && ch != '\n');
+
+ if (ch == '\n')
+ lr_error (lr, _("unterminated symbolic name"));
+
+ /* Test for ISO 10646 position value. */
+ if (buf[0] == 'U' && (bufact == 6 || bufact == 10))
+ {
+ char *cp = buf + 1;
+ while (cp < &buf[bufact - 1] && isxdigit (*cp))
+ ++cp;
+
+ if (cp == &buf[bufact - 1])
+ {
+ /* Yes, it is. */
+ lr->token.tok = tok_ucs4;
+ lr->token.val.ucs4 = strtoul (buf + 1, NULL, 16);
+
+ return &lr->token;
+ }
+ }
+
+ /* It is a symbolic name. Test for reserved words. */
+ kw = lr->hash_fct (buf, bufact - 1);
+
+ if (kw != NULL && kw->symname_or_ident == 1)
+ {
+ lr->token.tok = kw->token;
+ free (buf);
+ }
+ else
+ {
+ lr->token.tok = tok_bsymbol;
+
+ buf = xrealloc (buf, bufact + 1);
+ buf[bufact] = '\0';
+
+ lr->token.val.str.startmb = buf;
+ lr->token.val.str.lenmb = bufact - 1;
+ }
+
+ return &lr->token;
+}
+
+
+static struct token *
+get_ident (struct linereader *lr)
+{
+ char *buf;
+ size_t bufact;
+ size_t bufmax = 56;
+ const struct keyword_t *kw;
+ int ch;
+
+ buf = xmalloc (bufmax);
+ bufact = 0;
+
+ ADDC (lr->buf[lr->idx - 1]);
+
+ while (!isspace ((ch = lr_getc (lr))) && ch != '"' && ch != ';'
+ && ch != '<' && ch != ',' && ch != EOF)
+ {
+ if (ch == lr->escape_char)
+ {
+ ch = lr_getc (lr);
+ if (ch == '\n' || ch == EOF)
+ {
+ lr_error (lr, _("invalid escape sequence"));
+ break;
+ }
+ }
+ ADDC (ch);
+ }
+
+ lr_ungetc (lr, ch);
+
+ kw = lr->hash_fct (buf, bufact);
+
+ if (kw != NULL && kw->symname_or_ident == 0)
+ {
+ lr->token.tok = kw->token;
+ free (buf);
+ }
+ else
+ {
+ lr->token.tok = tok_ident;
+
+ buf = xrealloc (buf, bufact + 1);
+ buf[bufact] = '\0';
+
+ lr->token.val.str.startmb = buf;
+ lr->token.val.str.lenmb = bufact;
+ }
+
+ return &lr->token;
+}
+
+
+static struct token *
+get_string (struct linereader *lr, const struct charmap_t *charmap,
+ struct localedef_t *locale, const struct repertoire_t *repertoire,
+ int verbose)
+{
+ int return_widestr = lr->return_widestr;
+ char *buf;
+ wchar_t *buf2 = NULL;
+ size_t bufact;
+ size_t bufmax = 56;
+
+ /* We must return two different strings. */
+ buf = xmalloc (bufmax);
+ bufact = 0;
+
+ /* We know it'll be a string. */
+ lr->token.tok = tok_string;
+
+ /* If we need not translate the strings (i.e., expand <...> parts)
+ we can run a simple loop. */
+ if (!lr->translate_strings)
+ {
+ int ch;
+
+ buf2 = NULL;
+ while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF)
+ ADDC (ch);
+
+ /* Catch errors with trailing escape character. */
+ if (bufact > 0 && buf[bufact - 1] == lr->escape_char
+ && (bufact == 1 || buf[bufact - 2] != lr->escape_char))
+ {
+ lr_error (lr, _("illegal escape sequence at end of string"));
+ --bufact;
+ }
+ else if (ch == '\n' || ch == EOF)
+ lr_error (lr, _("unterminated string"));
+
+ ADDC ('\0');
+ }
+ else
+ {
+ int illegal_string = 0;
+ size_t buf2act = 0;
+ size_t buf2max = 56 * sizeof (uint32_t);
+ int ch;
+ int warned = 0;
+
+ /* We have to provide the wide character result as well. */
+ if (return_widestr)
+ buf2 = xmalloc (buf2max);
+
+ /* Read until the end of the string (or end of the line or file). */
+ while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF)
+ {
+ size_t startidx;
+ uint32_t wch;
+ struct charseq *seq;
+
+ if (ch != '<')
+ {
+ /* The standards leave it up to the implementation to decide
+ what to do with character which stand for themself. We
+ could jump through hoops to find out the value relative to
+ the charmap and the repertoire map, but instead we leave
+ it up to the locale definition author to write a better
+ definition. We assume here that every character which
+ stands for itself is encoded using ISO 8859-1. Using the
+ escape character is allowed. */
+ if (ch == lr->escape_char)
+ {
+ ch = lr_getc (lr);
+ if (ch == '\n' || ch == EOF)
+ break;
+ }
+
+ if (verbose && !warned)
+ {
+ lr_error (lr, _("\
+non-symbolic character value should not be used"));
+ warned = 1;
+ }
+
+ ADDC (ch);
+ if (return_widestr)
+ ADDWC ((uint32_t) ch);
+
+ continue;
+ }
+
+ /* Now we have to search for the end of the symbolic name, i.e.,
+ the closing '>'. */
+ startidx = bufact;
+ while ((ch = lr_getc (lr)) != '>' && ch != '\n' && ch != EOF)
+ {
+ if (ch == lr->escape_char)
+ {
+ ch = lr_getc (lr);
+ if (ch == '\n' || ch == EOF)
+ break;
+ }
+ ADDC (ch);
+ }
+ if (ch == '\n' || ch == EOF)
+ /* Not a correct string. */
+ break;
+ if (bufact == startidx)
+ {
+ /* <> is no correct name. Ignore it and also signal an
+ error. */
+ illegal_string = 1;
+ continue;
+ }
+
+ /* It might be a Uxxxx symbol. */
+ if (buf[startidx] == 'U'
+ && (bufact - startidx == 5 || bufact - startidx == 9))
+ {
+ char *cp = buf + startidx + 1;
+ while (cp < &buf[bufact] && isxdigit (*cp))
+ ++cp;
+
+ if (cp == &buf[bufact])
+ {
+ char utmp[10];
+
+ /* Yes, it is. */
+ ADDC ('\0');
+ wch = strtoul (buf + startidx + 1, NULL, 16);
+
+ /* Now forget about the name we just added. */
+ bufact = startidx;
+
+ if (return_widestr)
+ ADDWC (wch);
+
+ /* See whether the charmap contains the Uxxxxxxxx names. */
+ snprintf (utmp, sizeof (utmp), "U%08X", wch);
+ seq = charmap_find_value (charmap, utmp, 9);
+
+ if (seq == NULL)
+ {
+ /* No, this isn't the case. Now determine from
+ the repertoire the name of the character and
+ find it in the charmap. */
+ if (repertoire != NULL)
+ {
+ const char *symbol;
+
+ symbol = repertoire_find_symbol (repertoire, wch);
+
+ if (symbol != NULL)
+ seq = charmap_find_value (charmap, symbol,
+ strlen (symbol));
+ }
+
+ if (seq == NULL)
+ {
+#ifndef NO_TRANSLITERATION
+ /* Transliterate if possible. */
+ if (locale != NULL)
+ {
+ uint32_t *translit;
+
+ if ((locale->avail & CTYPE_LOCALE) == 0)
+ {
+ /* Load the CTYPE data now. */
+ int old_needed = locale->needed;
+
+ locale->needed = 0;
+ locale = load_locale (LC_CTYPE,
+ locale->name,
+ locale->repertoire_name,
+ charmap, locale);
+ locale->needed = old_needed;
+ }
+
+ if ((locale->avail & CTYPE_LOCALE) != 0
+ && ((translit = find_translit (locale,
+ charmap, wch))
+ != NULL))
+ /* The CTYPE data contains a matching
+ transliteration. */
+ {
+ int i;
+
+ for (i = 0; translit[i] != 0; ++i)
+ {
+ char utmp[10];
+
+ snprintf (utmp, sizeof (utmp), "U%08X",
+ translit[i]);
+ seq = charmap_find_value (charmap, utmp,
+ 9);
+ assert (seq != NULL);
+ ADDS (seq->bytes, seq->nbytes);
+ }
+
+ continue;
+ }
+ }
+#endif /* NO_TRANSLITERATION */
+
+ /* Not a known name. */
+ illegal_string = 1;
+ }
+ }
+
+ if (seq != NULL)
+ ADDS (seq->bytes, seq->nbytes);
+
+ continue;
+ }
+ }
+
+ /* We now have the symbolic name in buf[startidx] to
+ buf[bufact-1]. Now find out the value for this character
+ in the charmap as well as in the repertoire map (in this
+ order). */
+ seq = charmap_find_value (charmap, &buf[startidx],
+ bufact - startidx);
+
+ if (seq == NULL)
+ {
+ /* This name is not in the charmap. */
+ lr_error (lr, _("symbol `%.*s' not in charmap"),
+ (int) (bufact - startidx), &buf[startidx]);
+ illegal_string = 1;
+ }
+
+ if (return_widestr)
+ {
+ /* Now the same for the multibyte representation. */
+ if (seq != NULL && seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
+ wch = seq->ucs4;
+ else
+ {
+ wch = repertoire_find_value (repertoire, &buf[startidx],
+ bufact - startidx);
+ if (seq != NULL)
+ seq->ucs4 = wch;
+ }
+
+ if (wch == ILLEGAL_CHAR_VALUE)
+ {
+ /* This name is not in the repertoire map. */
+ lr_error (lr, _("symbol `%.*s' not in repertoire map"),
+ (int) (bufact - startidx), &buf[startidx]);
+ illegal_string = 1;
+ }
+ else
+ ADDWC (wch);
+ }
+
+ /* Now forget about the name we just added. */
+ bufact = startidx;
+
+ /* And copy the bytes. */
+ if (seq != NULL)
+ ADDS (seq->bytes, seq->nbytes);
+ }
+
+ if (ch == '\n' || ch == EOF)
+ {
+ lr_error (lr, _("unterminated string"));
+ illegal_string = 1;
+ }
+
+ if (illegal_string)
+ {
+ free (buf);
+ free (buf2);
+ lr->token.val.str.startmb = NULL;
+ lr->token.val.str.lenmb = 0;
+ lr->token.val.str.startwc = NULL;
+ lr->token.val.str.lenwc = 0;
+
+ return &lr->token;
+ }
+
+ ADDC ('\0');
+
+ if (return_widestr)
+ {
+ ADDWC (0);
+ lr->token.val.str.startwc = xrealloc (buf2,
+ buf2act * sizeof (uint32_t));
+ lr->token.val.str.lenwc = buf2act;
+ }
+ }
+
+ lr->token.val.str.startmb = xrealloc (buf, bufact);
+ lr->token.val.str.lenmb = bufact;
+
+ return &lr->token;
+}
diff --git a/REORG.TODO/locale/programs/linereader.h b/REORG.TODO/locale/programs/linereader.h
new file mode 100644
index 0000000000..3965db558c
--- /dev/null
+++ b/REORG.TODO/locale/programs/linereader.h
@@ -0,0 +1,146 @@
+/* Copyright (C) 1996-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper, <drepper@gnu.org>.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef _LINEREADER_H
+#define _LINEREADER_H 1
+
+#include <ctype.h>
+#include <libintl.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include "charmap.h"
+#include "error.h"
+#include "locfile-token.h"
+#include "repertoire.h"
+
+
+typedef const struct keyword_t *(*kw_hash_fct_t) (const char *, unsigned int);
+struct charset_t;
+struct localedef_t;
+
+struct token
+{
+ enum token_t tok;
+ union
+ {
+ struct
+ {
+ char *startmb;
+ size_t lenmb;
+ uint32_t *startwc;
+ size_t lenwc;
+ } str;
+ unsigned long int num;
+ struct
+ {
+ /* This element is sized on the safe expectation that no single
+ character in any character set uses more than 16 bytes. */
+ unsigned char bytes[16];
+ int nbytes;
+ } charcode;
+ uint32_t ucs4;
+ } val;
+};
+
+
+struct linereader
+{
+ FILE *fp;
+ const char *fname;
+ char *buf;
+ size_t bufsize;
+ size_t bufact;
+ size_t lineno;
+
+ size_t idx;
+
+ char comment_char;
+ char escape_char;
+
+ struct token token;
+
+ int translate_strings;
+ int return_widestr;
+
+ kw_hash_fct_t hash_fct;
+};
+
+
+/* Functions defined in linereader.c. */
+extern struct linereader *lr_open (const char *fname, kw_hash_fct_t hf);
+extern struct linereader *lr_create (FILE *fp, const char *fname,
+ kw_hash_fct_t hf);
+extern int lr_eof (struct linereader *lr);
+extern void lr_close (struct linereader *lr);
+extern int lr_next (struct linereader *lr);
+extern struct token *lr_token (struct linereader *lr,
+ const struct charmap_t *charmap,
+ struct localedef_t *locale,
+ const struct repertoire_t *repertoire,
+ int verbose);
+extern void lr_ignore_rest (struct linereader *lr, int verbose);
+
+
+#define lr_error(lr, fmt, args...) \
+ WITH_CUR_LOCALE (error_at_line (0, 0, lr->fname, lr->lineno, fmt, ## args))
+
+
+
+static inline int
+__attribute ((always_inline))
+lr_getc (struct linereader *lr)
+{
+ if (lr->idx == lr->bufact)
+ {
+ if (lr->bufact != 0)
+ if (lr_next (lr) < 0)
+ return EOF;
+
+ if (lr->bufact == 0)
+ return EOF;
+ }
+
+ return lr->buf[lr->idx] == '\32' ? EOF : lr->buf[lr->idx++];
+}
+
+
+static inline int
+__attribute ((always_inline))
+lr_ungetc (struct linereader *lr, int ch)
+{
+ if (lr->idx == 0)
+ return -1;
+
+ if (ch != EOF)
+ lr->buf[--lr->idx] = ch;
+ return 0;
+}
+
+
+static inline int
+lr_ungetn (struct linereader *lr, size_t n)
+{
+ if (lr->idx < n)
+ return -1;
+
+ lr->idx -= n;
+ return 0;
+}
+
+
+#endif /* linereader.h */
diff --git a/REORG.TODO/locale/programs/locale-spec.c b/REORG.TODO/locale/programs/locale-spec.c
new file mode 100644
index 0000000000..4e9bf81b78
--- /dev/null
+++ b/REORG.TODO/locale/programs/locale-spec.c
@@ -0,0 +1,131 @@
+/* Handle special requests.
+ Copyright (C) 1996-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <error.h>
+#include <libintl.h>
+#include <stdio.h>
+#include <string.h>
+#include <wchar.h>
+
+#include "localeinfo.h"
+
+
+/* We provide support for some special names. This helps debugging
+ and may be useful for advanced usage of the provided information
+ outside C. */
+void
+locale_special (const char *name, int show_category_name,
+ int show_keyword_name)
+{
+#if 0
+ /* "collate-elements": print collation elements of locale. */
+ if (strcmp (name, "collate-elements") == 0)
+ {
+ size_t nelem = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_ELEM_HASH_SIZE);
+
+ if (show_category_name)
+ puts ("LC_COLLATE");
+ if (show_keyword_name)
+ fputs ("collate-elements=", stdout);
+
+ if (nelem != 0)
+ {
+ int first = 1;
+ size_t cnt;
+
+ for (cnt = 0; cnt < nelem; ++cnt)
+ if (__collate_element_hash[2 * cnt] != (~((u_int32_t) 0)))
+ {
+ size_t idx = __collate_element_hash[2 * cnt];
+
+ printf ("%s<%s>", first ? "" : ";",
+ &__collate_element_strings[idx]);
+
+ /* We don't print the string. This is only confusing
+ because only the programs have to know the
+ encoding. The code is left in place because it
+ shows how to get the information. */
+ {
+ const wchar_t *wp;
+
+ idx = __collate_element_hash[2 * cnt + 1];
+ wp = &__collate_element_values[idx];
+ while (*wp != L'\0')
+ {
+ /********************************************\
+ |* XXX The element values are really wide *|
+ |* chars. But we are currently not able to *|
+ |* print these so fake here. *|
+ \********************************************/
+ int ch = wctob (*wp++);
+ if (ch != EOF)
+ putchar (ch);
+ else
+ fputs ("<??\?>", stdout);
+ }
+
+ putchar ('"');
+ }
+ first = 0;
+ }
+ }
+ putchar ('\n');
+ return;
+ }
+
+ if (strcmp (name, "collate-classes") == 0)
+ {
+ size_t nelem = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZE);
+ size_t cnt;
+ int first = 1;
+
+ if (show_category_name)
+ puts ("LC_COLLATE");
+ if (show_keyword_name)
+ fputs ("collate-classes=", stdout);
+
+ for (cnt = 0; cnt < nelem; ++cnt)
+ if (__collate_symbol_hash[2 * cnt] != 0xffffffff)
+ {
+ printf ("%s<%s>", first ? "" : ",",
+ &__collate_symbol_strings[__collate_symbol_hash[2 * cnt]]);
+#if 0
+ {
+ size_t idx = __collate_symbol_hash[2 * cnt + 1];
+ size_t cls;
+
+ putchar ('=');
+ for (cls = 0; cls < __collate_symbol_classes[idx]; ++cls)
+ printf ("%s%d", cls == 0 ? "" : ":",
+ __collate_symbol_classes[idx + 1 + cls]);
+ }
+#endif
+ first = 0;
+ }
+ putchar ('\n');
+ return;
+ }
+#endif
+
+ /* If nothing matches, fail. */
+ error (1, 0, gettext ("unknown name \"%s\""), name);
+}
diff --git a/REORG.TODO/locale/programs/locale.c b/REORG.TODO/locale/programs/locale.c
new file mode 100644
index 0000000000..941290089b
--- /dev/null
+++ b/REORG.TODO/locale/programs/locale.c
@@ -0,0 +1,989 @@
+/* Implementation of the locale program according to POSIX 9945-2.
+ Copyright (C) 1995-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1995.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <argp.h>
+#include <argz.h>
+#include <dirent.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <langinfo.h>
+#include <libintl.h>
+#include <limits.h>
+#include <locale.h>
+#include <search.h>
+#include <stdio.h>
+#include <stdio_ext.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+
+#include "localeinfo.h"
+#include "charmap-dir.h"
+#include "../locarchive.h"
+#include <programs/xmalloc.h>
+
+#define ARCHIVE_NAME COMPLOCALEDIR "/locale-archive"
+
+/* If set print the name of the category. */
+static int show_category_name;
+
+/* If set print the name of the item. */
+static int show_keyword_name;
+
+/* Print names of all available locales. */
+static int do_all;
+
+/* Print names of all available character maps. */
+static int do_charmaps = 0;
+
+/* Nonzero if verbose output is wanted. */
+static int verbose;
+
+/* Name and version of program. */
+static void print_version (FILE *stream, struct argp_state *state);
+void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version;
+
+/* Definitions of arguments for argp functions. */
+static const struct argp_option options[] =
+{
+ { NULL, 0, NULL, 0, N_("System information:") },
+ { "all-locales", 'a', NULL, OPTION_NO_USAGE,
+ N_("Write names of available locales") },
+ { "charmaps", 'm', NULL, OPTION_NO_USAGE,
+ N_("Write names of available charmaps") },
+ { NULL, 0, NULL, 0, N_("Modify output format:") },
+ { "category-name", 'c', NULL, 0, N_("Write names of selected categories") },
+ { "keyword-name", 'k', NULL, 0, N_("Write names of selected keywords") },
+ { "verbose", 'v', NULL, 0, N_("Print more information") },
+ { NULL, 0, NULL, 0, NULL }
+};
+
+/* Short description of program. */
+static const char doc[] = N_("Get locale-specific information.");
+
+/* Strings for arguments in help texts. */
+static const char args_doc[] = N_("NAME\n[-a|-m]");
+
+/* Prototype for option handler. */
+static error_t parse_opt (int key, char *arg, struct argp_state *state);
+
+/* Function to print some extra text in the help message. */
+static char *more_help (int key, const char *text, void *input);
+
+/* Data structure to communicate with argp functions. */
+static struct argp argp =
+{
+ options, parse_opt, args_doc, doc, NULL, more_help
+};
+
+
+/* We don't have these constants defined because we don't use them. Give
+ default values. */
+#define CTYPE_MB_CUR_MIN 0
+#define CTYPE_MB_CUR_MAX 0
+#define CTYPE_HASH_SIZE 0
+#define CTYPE_HASH_LAYERS 0
+#define CTYPE_CLASS 0
+#define CTYPE_TOUPPER_EB 0
+#define CTYPE_TOLOWER_EB 0
+#define CTYPE_TOUPPER_EL 0
+#define CTYPE_TOLOWER_EL 0
+
+/* Definition of the data structure which represents a category and its
+ items. */
+struct category
+{
+ int cat_id;
+ const char *name;
+ size_t number;
+ struct cat_item
+ {
+ int item_id;
+ const char *name;
+ enum { std, opt } status;
+ enum value_type value_type;
+ int min;
+ int max;
+ } *item_desc;
+};
+
+/* Simple helper macro. */
+#define NELEMS(arr) ((sizeof (arr)) / (sizeof (arr[0])))
+
+/* For some tricky stuff. */
+#define NO_PAREN(Item, More...) Item, ## More
+
+/* We have all categories defined in `categories.def'. Now construct
+ the description and data structure used for all categories. */
+#define DEFINE_ELEMENT(Item, More...) { Item, ## More },
+#define DEFINE_CATEGORY(category, name, items, postload) \
+ static struct cat_item category##_desc[] = \
+ { \
+ NO_PAREN items \
+ };
+
+#include "categories.def"
+#undef DEFINE_CATEGORY
+
+static struct category category[] =
+ {
+#define DEFINE_CATEGORY(category, name, items, postload) \
+ [category] = { _NL_NUM_##category, name, NELEMS (category##_desc), \
+ category##_desc },
+#include "categories.def"
+#undef DEFINE_CATEGORY
+ };
+#define NCATEGORIES NELEMS (category)
+
+
+/* Automatically set variable. */
+extern const char *__progname;
+
+/* helper function for extended name handling. */
+extern void locale_special (const char *name, int show_category_name,
+ int show_keyword_name);
+
+/* Prototypes for local functions. */
+static void print_LC_IDENTIFICATION (void *mapped, size_t size);
+static void print_LC_CTYPE (void *mapped, size_t size);
+static void write_locales (void);
+static int nameentcmp (const void *a, const void *b);
+static int write_archive_locales (void **all_datap, char *linebuf);
+static void write_charmaps (void);
+static void show_locale_vars (void);
+static void show_info (const char *name);
+
+
+int
+main (int argc, char *argv[])
+{
+ int remaining;
+
+ /* Set initial values for global variables. */
+ show_category_name = 0;
+ show_keyword_name = 0;
+
+ /* Set locale. Do not set LC_ALL because the other categories must
+ not be affected (according to POSIX.2). */
+ if (setlocale (LC_CTYPE, "") == NULL)
+ error (0, errno, gettext ("Cannot set LC_CTYPE to default locale"));
+ if (setlocale (LC_MESSAGES, "") == NULL)
+ error (0, errno, gettext ("Cannot set LC_MESSAGES to default locale"));
+
+ /* Initialize the message catalog. */
+ textdomain (PACKAGE);
+
+ /* Parse and process arguments. */
+ argp_parse (&argp, argc, argv, 0, &remaining, NULL);
+
+ /* `-a' requests the names of all available locales. */
+ if (do_all != 0)
+ {
+ if (setlocale (LC_COLLATE, "") == NULL)
+ error (0, errno,
+ gettext ("Cannot set LC_COLLATE to default locale"));
+ write_locales ();
+ exit (EXIT_SUCCESS);
+ }
+
+ /* `m' requests the names of all available charmaps. The names can be
+ used for the -f argument to localedef(1). */
+ if (do_charmaps != 0)
+ {
+ write_charmaps ();
+ exit (EXIT_SUCCESS);
+ }
+
+ /* Specific information about the current locale are requested.
+ Change to this locale now. */
+ if (setlocale (LC_ALL, "") == NULL)
+ error (0, errno, gettext ("Cannot set LC_ALL to default locale"));
+
+ /* If no real argument is given we have to print the contents of the
+ current locale definition variables. These are LANG and the LC_*. */
+ if (remaining == argc && show_keyword_name == 0 && show_category_name == 0)
+ {
+ show_locale_vars ();
+ exit (EXIT_SUCCESS);
+ }
+
+ /* Process all given names. */
+ while (remaining < argc)
+ show_info (argv[remaining++]);
+
+ exit (EXIT_SUCCESS);
+}
+
+
+/* Handle program arguments. */
+static error_t
+parse_opt (int key, char *arg, struct argp_state *state)
+{
+ switch (key)
+ {
+ case 'a':
+ do_all = 1;
+ break;
+ case 'c':
+ show_category_name = 1;
+ break;
+ case 'm':
+ do_charmaps = 1;
+ break;
+ case 'k':
+ show_keyword_name = 1;
+ break;
+ case 'v':
+ verbose = 1;
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+ return 0;
+}
+
+
+static char *
+more_help (int key, const char *text, void *input)
+{
+ char *tp = NULL;
+ switch (key)
+ {
+ case ARGP_KEY_HELP_EXTRA:
+ /* We print some extra information. */
+ if (asprintf (&tp, gettext ("\
+For bug reporting instructions, please see:\n\
+%s.\n"), REPORT_BUGS_TO) < 0)
+ return NULL;
+ return tp;
+ default:
+ break;
+ }
+ return (char *) text;
+}
+
+
+/* Print the version information. */
+static void
+print_version (FILE *stream, struct argp_state *state)
+{
+ fprintf (stream, "locale %s%s\n", PKGVERSION, VERSION);
+ fprintf (stream, gettext ("\
+Copyright (C) %s Free Software Foundation, Inc.\n\
+This is free software; see the source for copying conditions. There is NO\n\
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
+"), "2017");
+ fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper");
+}
+
+
+/* Simple action function which prints arguments as strings. */
+static void
+print_names (const void *nodep, VISIT value, int level)
+{
+ if (value == postorder || value == leaf)
+ puts (*(char **) nodep);
+}
+
+
+static int
+select_dirs (const struct dirent *dirent)
+{
+ int result = 0;
+
+ if (strcmp (dirent->d_name, ".") != 0 && strcmp (dirent->d_name, "..") != 0)
+ {
+ mode_t mode = 0;
+
+#ifdef _DIRENT_HAVE_D_TYPE
+ if (dirent->d_type != DT_UNKNOWN && dirent->d_type != DT_LNK)
+ mode = DTTOIF (dirent->d_type);
+ else
+#endif
+ {
+ struct stat64 st;
+ char buf[sizeof (COMPLOCALEDIR)
+ + strlen (dirent->d_name) + 1];
+
+ stpcpy (stpcpy (stpcpy (buf, COMPLOCALEDIR), "/"),
+ dirent->d_name);
+
+ if (stat64 (buf, &st) == 0)
+ mode = st.st_mode;
+ }
+
+ result = S_ISDIR (mode);
+ }
+
+ return result;
+}
+
+
+static void
+print_LC_IDENTIFICATION (void *mapped, size_t size)
+{
+ /* Read the information from the file. */
+ struct
+ {
+ unsigned int magic;
+ unsigned int nstrings;
+ unsigned int strindex[0];
+ } *filedata = mapped;
+
+ if (filedata->magic == LIMAGIC (LC_IDENTIFICATION)
+ && (sizeof *filedata
+ + (filedata->nstrings
+ * sizeof (unsigned int))
+ <= size))
+ {
+ const char *str;
+
+#define HANDLE(idx, name) \
+ str = ((char *) mapped \
+ + filedata->strindex[_NL_ITEM_INDEX (_NL_IDENTIFICATION_##idx)]); \
+ if (*str != '\0') \
+ printf ("%9s | %s\n", name, str)
+ HANDLE (TITLE, "title");
+ HANDLE (SOURCE, "source");
+ HANDLE (ADDRESS, "address");
+ HANDLE (CONTACT, "contact");
+ HANDLE (EMAIL, "email");
+ HANDLE (TEL, "telephone");
+ HANDLE (FAX, "fax");
+ HANDLE (LANGUAGE, "language");
+ HANDLE (TERRITORY, "territory");
+ HANDLE (AUDIENCE, "audience");
+ HANDLE (APPLICATION, "application");
+ HANDLE (ABBREVIATION, "abbreviation");
+ HANDLE (REVISION, "revision");
+ HANDLE (DATE, "date");
+ }
+}
+
+
+static void
+print_LC_CTYPE (void *mapped, size_t size)
+{
+ struct
+ {
+ unsigned int magic;
+ unsigned int nstrings;
+ unsigned int strindex[0];
+ } *filedata = mapped;
+
+ if (filedata->magic == LIMAGIC (LC_CTYPE)
+ && (sizeof *filedata
+ + (filedata->nstrings
+ * sizeof (unsigned int))
+ <= size))
+ {
+ const char *str;
+
+ str = ((char *) mapped
+ + filedata->strindex[_NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME)]);
+ if (*str != '\0')
+ printf (" codeset | %s\n", str);
+ }
+}
+
+
+/* Write the names of all available locales to stdout. We have some
+ sources of the information: the contents of the locale directory
+ and the locale.alias file. To avoid duplicates and print the
+ result is a reasonable order we put all entries is a search tree
+ and print them afterwards. */
+static void
+write_locales (void)
+{
+ char linebuf[80];
+ void *all_data = NULL;
+ struct dirent **dirents;
+ int ndirents;
+ int cnt;
+ char *alias_path;
+ size_t alias_path_len;
+ char *entry;
+ int first_locale = 1;
+
+#define PUT(name) tsearch (name, &all_data, \
+ (int (*) (const void *, const void *)) strcoll)
+#define GET(name) tfind (name, &all_data, \
+ (int (*) (const void *, const void *)) strcoll)
+
+ /* `POSIX' locale is always available (POSIX.2 4.34.3). */
+ PUT ("POSIX");
+ /* And so is the "C" locale. */
+ PUT ("C");
+
+ memset (linebuf, '-', sizeof (linebuf) - 1);
+ linebuf[sizeof (linebuf) - 1] = '\0';
+
+ /* First scan the locale archive. */
+ if (write_archive_locales (&all_data, linebuf))
+ first_locale = 0;
+
+ /* Now we can look for all files in the directory. */
+ ndirents = scandir (COMPLOCALEDIR, &dirents, select_dirs,
+ alphasort);
+ for (cnt = 0; cnt < ndirents; ++cnt)
+ {
+ /* Test whether at least the LC_CTYPE data is there. Some
+ directories only contain translations. */
+ char buf[sizeof (COMPLOCALEDIR)
+ + strlen (dirents[cnt]->d_name)
+ + sizeof "/LC_IDENTIFICATION"];
+ char *enddir;
+ struct stat64 st;
+
+ stpcpy (enddir = stpcpy (stpcpy (stpcpy (buf,
+ COMPLOCALEDIR),
+ "/"),
+ dirents[cnt]->d_name),
+ "/LC_IDENTIFICATION");
+
+ if (stat64 (buf, &st) == 0 && S_ISREG (st.st_mode))
+ {
+ if (verbose && GET (dirents[cnt]->d_name) == NULL)
+ {
+ /* Provide some nice output of all kinds of
+ information. */
+ int fd;
+
+ if (! first_locale)
+ putchar_unlocked ('\n');
+ first_locale = 0;
+
+ printf ("locale: %-15.15s directory: %.*s\n%s\n",
+ dirents[cnt]->d_name, (int) (enddir - buf), buf,
+ linebuf);
+
+ fd = open64 (buf, O_RDONLY);
+ if (fd != -1)
+ {
+ void *mapped = mmap64 (NULL, st.st_size, PROT_READ,
+ MAP_SHARED, fd, 0);
+ if (mapped != MAP_FAILED)
+ {
+ print_LC_IDENTIFICATION (mapped, st.st_size);
+
+ munmap (mapped, st.st_size);
+ }
+
+ close (fd);
+
+ /* Now try to get the charset information. */
+ strcpy (enddir, "/LC_CTYPE");
+ fd = open64 (buf, O_RDONLY);
+ if (fd != -1 && fstat64 (fd, &st) >= 0
+ && ((mapped = mmap64 (NULL, st.st_size, PROT_READ,
+ MAP_SHARED, fd, 0))
+ != MAP_FAILED))
+ {
+ print_LC_CTYPE (mapped, st.st_size);
+
+ munmap (mapped, st.st_size);
+ }
+
+ if (fd != -1)
+ close (fd);
+ }
+ }
+
+ /* If the verbose format is not selected we simply
+ collect the names. */
+ PUT (xstrdup (dirents[cnt]->d_name));
+ }
+ }
+ if (ndirents > 0)
+ free (dirents);
+
+ /* Now read the locale.alias files. */
+ if (argz_create_sep (LOCALE_ALIAS_PATH, ':', &alias_path, &alias_path_len))
+ error (1, errno, gettext ("while preparing output"));
+
+ entry = NULL;
+ while ((entry = argz_next (alias_path, alias_path_len, entry)))
+ {
+ static const char aliasfile[] = "/locale.alias";
+ FILE *fp;
+ char full_name[strlen (entry) + sizeof aliasfile];
+
+ stpcpy (stpcpy (full_name, entry), aliasfile);
+ fp = fopen (full_name, "rm");
+ if (fp == NULL)
+ /* Ignore non-existing files. */
+ continue;
+
+ /* No threads present. */
+ __fsetlocking (fp, FSETLOCKING_BYCALLER);
+
+ while (! feof_unlocked (fp))
+ {
+ /* It is a reasonable approach to use a fix buffer here
+ because
+ a) we are only interested in the first two fields
+ b) these fields must be usable as file names and so must
+ not be that long */
+ char buf[BUFSIZ];
+ char *alias;
+ char *value;
+ char *cp;
+
+ if (fgets_unlocked (buf, BUFSIZ, fp) == NULL)
+ /* EOF reached. */
+ break;
+
+ cp = buf;
+ /* Ignore leading white space. */
+ while (isspace (cp[0]) && cp[0] != '\n')
+ ++cp;
+
+ /* A leading '#' signals a comment line. */
+ if (cp[0] != '\0' && cp[0] != '#' && cp[0] != '\n')
+ {
+ alias = cp++;
+ while (cp[0] != '\0' && !isspace (cp[0]))
+ ++cp;
+ /* Terminate alias name. */
+ if (cp[0] != '\0')
+ *cp++ = '\0';
+
+ /* Now look for the beginning of the value. */
+ while (isspace (cp[0]))
+ ++cp;
+
+ if (cp[0] != '\0')
+ {
+ value = cp++;
+ while (cp[0] != '\0' && !isspace (cp[0]))
+ ++cp;
+ /* Terminate value. */
+ if (cp[0] == '\n')
+ {
+ /* This has to be done to make the following
+ test for the end of line possible. We are
+ looking for the terminating '\n' which do not
+ overwrite here. */
+ *cp++ = '\0';
+ *cp = '\n';
+ }
+ else if (cp[0] != '\0')
+ *cp++ = '\0';
+
+ /* Add the alias. */
+ if (! verbose && GET (value) != NULL)
+ PUT (xstrdup (alias));
+ }
+ }
+
+ /* Possibly not the whole line fits into the buffer.
+ Ignore the rest of the line. */
+ while (strchr (cp, '\n') == NULL)
+ {
+ cp = buf;
+ if (fgets_unlocked (buf, BUFSIZ, fp) == NULL)
+ /* Make sure the inner loop will be left. The outer
+ loop will exit at the `feof' test. */
+ *cp = '\n';
+ }
+ }
+
+ fclose (fp);
+ }
+
+ if (! verbose)
+ {
+ twalk (all_data, print_names);
+ }
+}
+
+
+struct nameent
+{
+ char *name;
+ uint32_t locrec_offset;
+};
+
+
+static int
+nameentcmp (const void *a, const void *b)
+{
+ return strcoll (((const struct nameent *) a)->name,
+ ((const struct nameent *) b)->name);
+}
+
+
+static int
+write_archive_locales (void **all_datap, char *linebuf)
+{
+ struct stat64 st;
+ void *all_data = *all_datap;
+ size_t len = 0;
+ struct locarhead *head;
+ struct namehashent *namehashtab;
+ char *addr = MAP_FAILED;
+ int fd, ret = 0;
+ uint32_t cnt;
+
+ fd = open64 (ARCHIVE_NAME, O_RDONLY);
+ if (fd < 0)
+ return 0;
+
+ if (fstat64 (fd, &st) < 0 || st.st_size < sizeof (*head))
+ goto error_out;
+
+ len = st.st_size;
+ addr = mmap64 (NULL, len, PROT_READ, MAP_SHARED, fd, 0);
+ if (addr == MAP_FAILED)
+ goto error_out;
+
+ head = (struct locarhead *) addr;
+ if (head->namehash_offset + head->namehash_size > len
+ || head->string_offset + head->string_size > len
+ || head->locrectab_offset + head->locrectab_size > len
+ || head->sumhash_offset + head->sumhash_size > len)
+ goto error_out;
+
+ namehashtab = (struct namehashent *) (addr + head->namehash_offset);
+ if (! verbose)
+ {
+ for (cnt = 0; cnt < head->namehash_size; ++cnt)
+ if (namehashtab[cnt].locrec_offset != 0)
+ {
+ PUT (xstrdup (addr + namehashtab[cnt].name_offset));
+ ++ret;
+ }
+ }
+ else
+ {
+ struct nameent *names;
+ uint32_t used;
+
+ names = (struct nameent *) xmalloc (head->namehash_used
+ * sizeof (struct nameent));
+ for (cnt = used = 0; cnt < head->namehash_size; ++cnt)
+ if (namehashtab[cnt].locrec_offset != 0)
+ {
+ names[used].name = addr + namehashtab[cnt].name_offset;
+ names[used++].locrec_offset = namehashtab[cnt].locrec_offset;
+ }
+
+ /* Sort the names. */
+ qsort (names, used, sizeof (struct nameent), nameentcmp);
+
+ for (cnt = 0; cnt < used; ++cnt)
+ {
+ struct locrecent *locrec;
+
+ PUT (xstrdup (names[cnt].name));
+
+ if (cnt)
+ putchar_unlocked ('\n');
+
+ printf ("locale: %-15.15s archive: " ARCHIVE_NAME "\n%s\n",
+ names[cnt].name, linebuf);
+
+ locrec = (struct locrecent *) (addr + names[cnt].locrec_offset);
+
+ print_LC_IDENTIFICATION (addr
+ + locrec->record[LC_IDENTIFICATION].offset,
+ locrec->record[LC_IDENTIFICATION].len);
+
+ print_LC_CTYPE (addr + locrec->record[LC_CTYPE].offset,
+ locrec->record[LC_CTYPE].len);
+ }
+
+ ret = used;
+ }
+
+error_out:
+ if (addr != MAP_FAILED)
+ munmap (addr, len);
+ close (fd);
+ *all_datap = all_data;
+ return ret;
+}
+
+
+/* Write the names of all available character maps to stdout. */
+static void
+write_charmaps (void)
+{
+ void *all_data = NULL;
+ CHARMAP_DIR *dir;
+ const char *dirent;
+
+ /* Look for all files in the charmap directory. */
+ dir = charmap_opendir (CHARMAP_PATH);
+ if (dir == NULL)
+ return;
+
+ while ((dirent = charmap_readdir (dir)) != NULL)
+ {
+ char **aliases;
+ char **p;
+
+ PUT (xstrdup (dirent));
+
+ aliases = charmap_aliases (CHARMAP_PATH, dirent);
+
+#if 0
+ /* Add the code_set_name and the aliases. */
+ for (p = aliases; *p; p++)
+ PUT (xstrdup (*p));
+#else
+ /* Add the code_set_name only. Most aliases are obsolete. */
+ p = aliases;
+ if (*p)
+ PUT (xstrdup (*p));
+#endif
+
+ charmap_free_aliases (aliases);
+ }
+
+ charmap_closedir (dir);
+
+ twalk (all_data, print_names);
+}
+
+/* Print a properly quoted assignment of NAME with VAL, using double
+ quotes iff DQUOTE is true. */
+static void
+print_assignment (const char *name, const char *val, bool dquote)
+{
+ printf ("%s=", name);
+ if (dquote)
+ putchar ('"');
+ while (*val != '\0')
+ {
+ size_t segment
+ = strcspn (val, dquote ? "$`\"\\" : "~|&;<>()$`\\\"' \t\n");
+ printf ("%.*s", (int) segment, val);
+ val += segment;
+ if (*val == '\0')
+ break;
+ putchar ('\\');
+ putchar (*val++);
+ }
+ if (dquote)
+ putchar ('"');
+ putchar ('\n');
+}
+
+/* We have to show the contents of the environments determining the
+ locale. */
+static void
+show_locale_vars (void)
+{
+ const char *lcall = getenv ("LC_ALL") ?: "";
+ const char *lang = getenv ("LANG") ?: "";
+
+ /* LANG has to be the first value. */
+ print_assignment ("LANG", lang, false);
+
+ /* Now all categories in an unspecified order. */
+ for (size_t cat_no = 0; cat_no < NCATEGORIES; ++cat_no)
+ if (cat_no != LC_ALL)
+ {
+ const char *name = category[cat_no].name;
+ const char *val = getenv (name);
+
+ if (lcall[0] != '\0' || val == NULL)
+ print_assignment (name,
+ lcall[0] != '\0' ? lcall
+ : lang[0] != '\0' ? lang
+ : "POSIX",
+ true);
+ else
+ print_assignment (name, val, false);
+ }
+
+ /* The last is the LC_ALL value. */
+ print_assignment ("LC_ALL", lcall, false);
+}
+
+
+/* Subroutine of show_info, below. */
+static void
+print_item (struct cat_item *item)
+{
+ switch (item->value_type)
+ {
+ case string:
+ if (show_keyword_name)
+ printf ("%s=\"", item->name);
+ fputs (nl_langinfo (item->item_id) ? : "", stdout);
+ if (show_keyword_name)
+ putchar ('"');
+ putchar ('\n');
+ break;
+ case stringarray:
+ {
+ const char *val;
+ int cnt;
+
+ if (show_keyword_name)
+ printf ("%s=\"", item->name);
+
+ for (cnt = 0; cnt < item->max - 1; ++cnt)
+ {
+ val = nl_langinfo (item->item_id + cnt);
+ if (val != NULL)
+ fputs (val, stdout);
+ putchar (';');
+ }
+
+ val = nl_langinfo (item->item_id + cnt);
+ if (val != NULL)
+ fputs (val, stdout);
+
+ if (show_keyword_name)
+ putchar ('"');
+ putchar ('\n');
+ }
+ break;
+ case stringlist:
+ {
+ int first = 1;
+ const char *val = nl_langinfo (item->item_id) ? : "";
+
+ if (show_keyword_name)
+ printf ("%s=", item->name);
+
+ for (int cnt = 0; cnt < item->max && *val != '\0'; ++cnt)
+ {
+ printf ("%s%s%s%s", first ? "" : ";",
+ show_keyword_name ? "\"" : "", val,
+ show_keyword_name ? "\"" : "");
+ val = strchr (val, '\0') + 1;
+ first = 0;
+ }
+ putchar ('\n');
+ }
+ break;
+ case byte:
+ {
+ const char *val = nl_langinfo (item->item_id);
+
+ if (show_keyword_name)
+ printf ("%s=", item->name);
+
+ if (val != NULL)
+ printf ("%d", *val == '\377' ? -1 : *val);
+ putchar ('\n');
+ }
+ break;
+ case bytearray:
+ {
+ const char *val = nl_langinfo (item->item_id);
+ int cnt = val ? strlen (val) : 0;
+
+ if (show_keyword_name)
+ printf ("%s=", item->name);
+
+ while (cnt > 1)
+ {
+ printf ("%d;", *val == '\177' ? -1 : *val);
+ --cnt;
+ ++val;
+ }
+
+ printf ("%d\n", cnt == 0 || *val == '\177' ? -1 : *val);
+ }
+ break;
+ case word:
+ {
+ union { unsigned int word; char *string; } val;
+ val.string = nl_langinfo (item->item_id);
+ if (show_keyword_name)
+ printf ("%s=", item->name);
+
+ printf ("%d\n", val.word);
+ }
+ break;
+ case wordarray:
+ {
+ int first = 1;
+ union { unsigned int *wordarray; char *string; } val;
+
+ val.string = nl_langinfo (item->item_id);
+ if (show_keyword_name)
+ printf ("%s=", item->name);
+
+ for (int cnt = 0; cnt < item->max; ++cnt)
+ {
+ printf ("%s%d", first ? "" : ";", val.wordarray[cnt]);
+ first = 0;
+ }
+ putchar ('\n');
+ }
+ break;
+ case wstring:
+ case wstringarray:
+ case wstringlist:
+ /* We don't print wide character information since the same
+ information is available in a multibyte string. */
+ default:
+ break;
+ }
+}
+
+/* Show the information request for NAME. */
+static void
+show_info (const char *name)
+{
+ for (size_t cat_no = 0; cat_no < NCATEGORIES; ++cat_no)
+ if (cat_no != LC_ALL)
+ {
+ if (strcmp (name, category[cat_no].name) == 0)
+ /* Print the whole category. */
+ {
+ if (show_category_name != 0)
+ puts (category[cat_no].name);
+
+ for (size_t item_no = 0;
+ item_no < category[cat_no].number;
+ ++item_no)
+ print_item (&category[cat_no].item_desc[item_no]);
+
+ return;
+ }
+
+ for (size_t item_no = 0; item_no < category[cat_no].number; ++item_no)
+ if (strcmp (name, category[cat_no].item_desc[item_no].name) == 0)
+ {
+ if (show_category_name != 0)
+ puts (category[cat_no].name);
+
+ print_item (&category[cat_no].item_desc[item_no]);
+ return;
+ }
+ }
+
+ /* The name is not a standard one.
+ For testing and perhaps advanced use allow some more symbols. */
+ locale_special (name, show_category_name, show_keyword_name);
+}
diff --git a/REORG.TODO/locale/programs/localedef.c b/REORG.TODO/locale/programs/localedef.c
new file mode 100644
index 0000000000..6acc1342c7
--- /dev/null
+++ b/REORG.TODO/locale/programs/localedef.c
@@ -0,0 +1,626 @@
+/* Copyright (C) 1995-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1995.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <argp.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <libintl.h>
+#include <locale.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <error.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+
+#include "localedef.h"
+#include "charmap.h"
+#include "locfile.h"
+
+/* Undefine the following line in the production version. */
+/* #define NDEBUG 1 */
+#include <assert.h>
+
+
+/* List of copied locales. */
+struct copy_def_list_t *copy_list;
+
+/* If this is defined be POSIX conform. */
+int posix_conformance;
+
+/* If not zero give a lot more messages. */
+int verbose;
+
+/* If not zero suppress warnings and information messages. */
+int be_quiet;
+
+/* If not zero force output even if warning were issued. */
+static int force_output;
+
+/* Prefix for output files. */
+const char *output_prefix;
+
+/* Name of the character map file. */
+static const char *charmap_file;
+
+/* Name of the locale definition file. */
+static const char *input_file;
+
+/* Name of the repertoire map file. */
+const char *repertoire_global;
+
+/* Name of the locale.alias file. */
+const char *alias_file;
+
+/* List of all locales. */
+static struct localedef_t *locales;
+
+/* If true don't add locale data to archive. */
+bool no_archive;
+
+/* If true add named locales to archive. */
+static bool add_to_archive;
+
+/* If true delete named locales from archive. */
+static bool delete_from_archive;
+
+/* If true replace archive content when adding. */
+static bool replace_archive;
+
+/* If true list archive content. */
+static bool list_archive;
+
+/* Maximum number of retries when opening the locale archive. */
+int max_locarchive_open_retry = 10;
+
+
+/* Name and version of program. */
+static void print_version (FILE *stream, struct argp_state *state);
+void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version;
+
+#define OPT_POSIX 301
+#define OPT_QUIET 302
+#define OPT_PREFIX 304
+#define OPT_NO_ARCHIVE 305
+#define OPT_ADD_TO_ARCHIVE 306
+#define OPT_REPLACE 307
+#define OPT_DELETE_FROM_ARCHIVE 308
+#define OPT_LIST_ARCHIVE 309
+#define OPT_LITTLE_ENDIAN 400
+#define OPT_BIG_ENDIAN 401
+
+/* Definitions of arguments for argp functions. */
+static const struct argp_option options[] =
+{
+ { NULL, 0, NULL, 0, N_("Input Files:") },
+ { "charmap", 'f', N_("FILE"), 0,
+ N_("Symbolic character names defined in FILE") },
+ { "inputfile", 'i', N_("FILE"), 0,
+ N_("Source definitions are found in FILE") },
+ { "repertoire-map", 'u', N_("FILE"), 0,
+ N_("FILE contains mapping from symbolic names to UCS4 values") },
+
+ { NULL, 0, NULL, 0, N_("Output control:") },
+ { "force", 'c', NULL, 0,
+ N_("Create output even if warning messages were issued") },
+ { "prefix", OPT_PREFIX, N_("PATH"), 0, N_("Optional output file prefix") },
+ { "posix", OPT_POSIX, NULL, 0, N_("Strictly conform to POSIX") },
+ { "quiet", OPT_QUIET, NULL, 0,
+ N_("Suppress warnings and information messages") },
+ { "verbose", 'v', NULL, 0, N_("Print more messages") },
+ { NULL, 0, NULL, 0, N_("Archive control:") },
+ { "no-archive", OPT_NO_ARCHIVE, NULL, 0,
+ N_("Don't add new data to archive") },
+ { "add-to-archive", OPT_ADD_TO_ARCHIVE, NULL, 0,
+ N_("Add locales named by parameters to archive") },
+ { "replace", OPT_REPLACE, NULL, 0, N_("Replace existing archive content") },
+ { "delete-from-archive", OPT_DELETE_FROM_ARCHIVE, NULL, 0,
+ N_("Remove locales named by parameters from archive") },
+ { "list-archive", OPT_LIST_ARCHIVE, NULL, 0, N_("List content of archive") },
+ { "alias-file", 'A', N_("FILE"), 0,
+ N_("locale.alias file to consult when making archive")},
+ { "little-endian", OPT_LITTLE_ENDIAN, NULL, 0,
+ N_("Generate little-endian output") },
+ { "big-endian", OPT_BIG_ENDIAN, NULL, 0,
+ N_("Generate big-endian output") },
+ { NULL, 0, NULL, 0, NULL }
+};
+
+/* Short description of program. */
+static const char doc[] = N_("Compile locale specification");
+
+/* Strings for arguments in help texts. */
+static const char args_doc[] = N_("\
+NAME\n\
+[--add-to-archive|--delete-from-archive] FILE...\n\
+--list-archive [FILE]");
+
+/* Prototype for option handler. */
+static error_t parse_opt (int key, char *arg, struct argp_state *state);
+
+/* Function to print some extra text in the help message. */
+static char *more_help (int key, const char *text, void *input);
+
+/* Data structure to communicate with argp functions. */
+static struct argp argp =
+{
+ options, parse_opt, args_doc, doc, NULL, more_help
+};
+
+
+/* Prototypes for local functions. */
+static void error_print (void);
+static const char *construct_output_path (char *path);
+static const char *normalize_codeset (const char *codeset, size_t name_len);
+
+
+int
+main (int argc, char *argv[])
+{
+ const char *output_path;
+ int cannot_write_why;
+ struct charmap_t *charmap;
+ struct localedef_t global;
+ int remaining;
+
+ /* Set initial values for global variables. */
+ copy_list = NULL;
+ posix_conformance = getenv ("POSIXLY_CORRECT") != NULL;
+ error_print_progname = error_print;
+
+ /* Set locale. Do not set LC_ALL because the other categories must
+ not be affected (according to POSIX.2). */
+ setlocale (LC_MESSAGES, "");
+ setlocale (LC_CTYPE, "");
+
+ /* Initialize the message catalog. */
+ textdomain (_libc_intl_domainname);
+
+ /* Parse and process arguments. */
+ argp_err_exit_status = 4;
+ argp_parse (&argp, argc, argv, 0, &remaining, NULL);
+
+ /* Handle a few special cases. */
+ if (list_archive)
+ show_archive_content (remaining > 1 ? argv[remaining] : NULL, verbose);
+ if (add_to_archive)
+ return add_locales_to_archive (argc - remaining, &argv[remaining],
+ replace_archive);
+ if (delete_from_archive)
+ return delete_locales_from_archive (argc - remaining, &argv[remaining]);
+
+ /* POSIX.2 requires to be verbose about missing characters in the
+ character map. */
+ verbose |= posix_conformance;
+
+ if (argc - remaining != 1)
+ {
+ /* We need exactly one non-option parameter. */
+ argp_help (&argp, stdout, ARGP_HELP_SEE | ARGP_HELP_EXIT_ERR,
+ program_invocation_short_name);
+ exit (4);
+ }
+
+ /* The parameter describes the output path of the constructed files.
+ If the described files cannot be written return a NULL pointer. */
+ output_path = construct_output_path (argv[remaining]);
+ if (output_path == NULL && ! no_archive)
+ error (4, errno, _("cannot create directory for output files"));
+ cannot_write_why = errno;
+
+ /* Now that the parameters are processed we have to reset the local
+ ctype locale. (P1003.2 4.35.5.2) */
+ setlocale (LC_CTYPE, "POSIX");
+
+ /* Look whether the system really allows locale definitions. POSIX
+ defines error code 3 for this situation so I think it must be
+ a fatal error (see P1003.2 4.35.8). */
+ if (sysconf (_SC_2_LOCALEDEF) < 0)
+ WITH_CUR_LOCALE (error (3, 0, _("\
+FATAL: system does not define `_POSIX2_LOCALEDEF'")));
+
+ /* Process charmap file. */
+ charmap = charmap_read (charmap_file, verbose, 1, be_quiet, 1);
+
+ /* Add the first entry in the locale list. */
+ memset (&global, '\0', sizeof (struct localedef_t));
+ global.name = input_file ?: "/dev/stdin";
+ global.needed = ALL_LOCALES;
+ locales = &global;
+
+ /* Now read the locale file. */
+ if (locfile_read (&global, charmap) != 0)
+ WITH_CUR_LOCALE (error (4, errno, _("\
+cannot open locale definition file `%s'"), input_file));
+
+ /* Perhaps we saw some `copy' instructions. */
+ while (1)
+ {
+ struct localedef_t *runp = locales;
+
+ while (runp != NULL && (runp->needed & runp->avail) == runp->needed)
+ runp = runp->next;
+
+ if (runp == NULL)
+ /* Everything read. */
+ break;
+
+ if (locfile_read (runp, charmap) != 0)
+ WITH_CUR_LOCALE (error (4, errno, _("\
+cannot open locale definition file `%s'"), runp->name));
+ }
+
+ /* Check the categories we processed in source form. */
+ check_all_categories (locales, charmap);
+
+ /* We are now able to write the data files. If warning were given we
+ do it only if it is explicitly requested (--force). */
+ if (error_message_count == 0 || force_output != 0)
+ {
+ if (cannot_write_why != 0)
+ WITH_CUR_LOCALE (error (4, cannot_write_why, _("\
+cannot write output files to `%s'"), output_path ? : argv[remaining]));
+ else
+ write_all_categories (locales, charmap, argv[remaining], output_path);
+ }
+ else
+ WITH_CUR_LOCALE (error (4, 0, _("\
+no output file produced because warnings were issued")));
+
+ /* This exit status is prescribed by POSIX.2 4.35.7. */
+ exit (error_message_count != 0);
+}
+
+
+/* Handle program arguments. */
+static error_t
+parse_opt (int key, char *arg, struct argp_state *state)
+{
+ switch (key)
+ {
+ case OPT_QUIET:
+ be_quiet = 1;
+ break;
+ case OPT_POSIX:
+ posix_conformance = 1;
+ break;
+ case OPT_PREFIX:
+ output_prefix = arg;
+ break;
+ case OPT_NO_ARCHIVE:
+ no_archive = true;
+ break;
+ case OPT_ADD_TO_ARCHIVE:
+ add_to_archive = true;
+ break;
+ case OPT_REPLACE:
+ replace_archive = true;
+ break;
+ case OPT_DELETE_FROM_ARCHIVE:
+ delete_from_archive = true;
+ break;
+ case OPT_LIST_ARCHIVE:
+ list_archive = true;
+ break;
+ case OPT_LITTLE_ENDIAN:
+ set_big_endian (false);
+ break;
+ case OPT_BIG_ENDIAN:
+ set_big_endian (true);
+ break;
+ case 'c':
+ force_output = 1;
+ break;
+ case 'f':
+ charmap_file = arg;
+ break;
+ case 'A':
+ alias_file = arg;
+ break;
+ case 'i':
+ input_file = arg;
+ break;
+ case 'u':
+ repertoire_global = arg;
+ break;
+ case 'v':
+ verbose = 1;
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+ return 0;
+}
+
+
+static char *
+more_help (int key, const char *text, void *input)
+{
+ char *cp;
+ char *tp;
+
+ switch (key)
+ {
+ case ARGP_KEY_HELP_EXTRA:
+ /* We print some extra information. */
+ if (asprintf (&tp, gettext ("\
+For bug reporting instructions, please see:\n\
+%s.\n"), REPORT_BUGS_TO) < 0)
+ return NULL;
+ if (asprintf (&cp, gettext ("\
+System's directory for character maps : %s\n\
+ repertoire maps: %s\n\
+ locale path : %s\n\
+%s"),
+ CHARMAP_PATH, REPERTOIREMAP_PATH, LOCALE_PATH, tp) < 0)
+ {
+ free (tp);
+ return NULL;
+ }
+ return cp;
+ default:
+ break;
+ }
+ return (char *) text;
+}
+
+/* Print the version information. */
+static void
+print_version (FILE *stream, struct argp_state *state)
+{
+ fprintf (stream, "localedef %s%s\n", PKGVERSION, VERSION);
+ fprintf (stream, gettext ("\
+Copyright (C) %s Free Software Foundation, Inc.\n\
+This is free software; see the source for copying conditions. There is NO\n\
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
+"), "2017");
+ fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper");
+}
+
+
+/* The address of this function will be assigned to the hook in the error
+ functions. */
+static void
+error_print (void)
+{
+}
+
+
+/* The parameter to localedef describes the output path. If it does
+ contain a '/' character it is a relative path. Otherwise it names the
+ locale this definition is for. */
+static const char *
+construct_output_path (char *path)
+{
+ const char *normal = NULL;
+ char *result;
+ char *endp;
+
+ if (strchr (path, '/') == NULL)
+ {
+ /* This is a system path. First examine whether the locale name
+ contains a reference to the codeset. This should be
+ normalized. */
+ char *startp;
+
+ startp = path;
+ /* We must be prepared for finding a CEN name or a location of
+ the introducing `.' where it is not possible anymore. */
+ while (*startp != '\0' && *startp != '@' && *startp != '.')
+ ++startp;
+ if (*startp == '.')
+ {
+ /* We found a codeset specification. Now find the end. */
+ endp = ++startp;
+ while (*endp != '\0' && *endp != '@')
+ ++endp;
+
+ if (endp > startp)
+ normal = normalize_codeset (startp, endp - startp);
+ }
+ else
+ /* This is to keep gcc quiet. */
+ endp = NULL;
+
+ /* We put an additional '\0' at the end of the string because at
+ the end of the function we need another byte for the trailing
+ '/'. */
+ ssize_t n;
+ if (normal == NULL)
+ n = asprintf (&result, "%s%s/%s%c", output_prefix ?: "",
+ COMPLOCALEDIR, path, '\0');
+ else
+ n = asprintf (&result, "%s%s/%.*s%s%s%c",
+ output_prefix ?: "", COMPLOCALEDIR,
+ (int) (startp - path), path, normal, endp, '\0');
+
+ if (n < 0)
+ return NULL;
+
+ endp = result + n - 1;
+ }
+ else
+ {
+ /* This is a user path. Please note the additional byte in the
+ memory allocation. */
+ size_t len = strlen (path) + 1;
+ result = xmalloc (len + 1);
+ endp = mempcpy (result, path, len) - 1;
+
+ /* If the user specified an output path we cannot add the output
+ to the archive. */
+ no_archive = true;
+ }
+
+ errno = 0;
+
+ if (no_archive && euidaccess (result, W_OK) == -1)
+ /* Perhaps the directory does not exist now. Try to create it. */
+ if (errno == ENOENT)
+ {
+ errno = 0;
+ if (mkdir (result, 0777) < 0)
+ return NULL;
+ }
+
+ *endp++ = '/';
+ *endp = '\0';
+
+ return result;
+}
+
+
+/* Normalize codeset name. There is no standard for the codeset
+ names. Normalization allows the user to use any of the common
+ names. */
+static const char *
+normalize_codeset (const char *codeset, size_t name_len)
+{
+ int len = 0;
+ int only_digit = 1;
+ char *retval;
+ char *wp;
+ size_t cnt;
+
+ for (cnt = 0; cnt < name_len; ++cnt)
+ if (isalnum (codeset[cnt]))
+ {
+ ++len;
+
+ if (isalpha (codeset[cnt]))
+ only_digit = 0;
+ }
+
+ retval = (char *) malloc ((only_digit ? 3 : 0) + len + 1);
+
+ if (retval != NULL)
+ {
+ if (only_digit)
+ wp = stpcpy (retval, "iso");
+ else
+ wp = retval;
+
+ for (cnt = 0; cnt < name_len; ++cnt)
+ if (isalpha (codeset[cnt]))
+ *wp++ = tolower (codeset[cnt]);
+ else if (isdigit (codeset[cnt]))
+ *wp++ = codeset[cnt];
+
+ *wp = '\0';
+ }
+
+ return (const char *) retval;
+}
+
+
+struct localedef_t *
+add_to_readlist (int category, const char *name, const char *repertoire_name,
+ int generate, struct localedef_t *copy_locale)
+{
+ struct localedef_t *runp = locales;
+
+ while (runp != NULL && strcmp (name, runp->name) != 0)
+ runp = runp->next;
+
+ if (runp == NULL)
+ {
+ /* Add a new entry at the end. */
+ struct localedef_t *newp;
+
+ assert (generate == 1);
+
+ newp = xcalloc (1, sizeof (struct localedef_t));
+ newp->name = name;
+ newp->repertoire_name = repertoire_name;
+
+ if (locales == NULL)
+ runp = locales = newp;
+ else
+ {
+ runp = locales;
+ while (runp->next != NULL)
+ runp = runp->next;
+ runp = runp->next = newp;
+ }
+ }
+
+ if (generate
+ && (runp->needed & (1 << category)) != 0
+ && (runp->avail & (1 << category)) == 0)
+ WITH_CUR_LOCALE (error (5, 0, _("\
+circular dependencies between locale definitions")));
+
+ if (copy_locale != NULL)
+ {
+ if (runp->categories[category].generic != NULL)
+ WITH_CUR_LOCALE (error (5, 0, _("\
+cannot add already read locale `%s' a second time"), name));
+ else
+ runp->categories[category].generic =
+ copy_locale->categories[category].generic;
+ }
+
+ runp->needed |= 1 << category;
+
+ return runp;
+}
+
+
+struct localedef_t *
+find_locale (int category, const char *name, const char *repertoire_name,
+ const struct charmap_t *charmap)
+{
+ struct localedef_t *result;
+
+ /* Find the locale, but do not generate it since this would be a bug. */
+ result = add_to_readlist (category, name, repertoire_name, 0, NULL);
+
+ assert (result != NULL);
+
+ if ((result->avail & (1 << category)) == 0
+ && locfile_read (result, charmap) != 0)
+ WITH_CUR_LOCALE (error (4, errno, _("\
+cannot open locale definition file `%s'"), result->name));
+
+ return result;
+}
+
+
+struct localedef_t *
+load_locale (int category, const char *name, const char *repertoire_name,
+ const struct charmap_t *charmap, struct localedef_t *copy_locale)
+{
+ struct localedef_t *result;
+
+ /* Generate the locale if it does not exist. */
+ result = add_to_readlist (category, name, repertoire_name, 1, copy_locale);
+
+ assert (result != NULL);
+
+ if ((result->avail & (1 << category)) == 0
+ && locfile_read (result, charmap) != 0)
+ WITH_CUR_LOCALE (error (4, errno, _("\
+cannot open locale definition file `%s'"), result->name));
+
+ return result;
+}
diff --git a/REORG.TODO/locale/programs/localedef.h b/REORG.TODO/locale/programs/localedef.h
new file mode 100644
index 0000000000..74a2eba74a
--- /dev/null
+++ b/REORG.TODO/locale/programs/localedef.h
@@ -0,0 +1,177 @@
+/* General definitions for localedef(1).
+ Copyright (C) 1998-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef _LOCALEDEF_H
+#define _LOCALEDEF_H 1
+
+/* Get the basic locale definitions. */
+#include <errno.h>
+#include <locale.h>
+#include <stdbool.h>
+#include <stddef.h>
+
+#include "repertoire.h"
+#include "../locarchive.h"
+
+
+/* We need a bitmask for the locales. */
+enum
+{
+ CTYPE_LOCALE = 1 << LC_CTYPE,
+ NUMERIC_LOCALE = 1 << LC_NUMERIC,
+ TIME_LOCALE = 1 << LC_TIME,
+ COLLATE_LOCALE = 1 << LC_COLLATE,
+ MONETARY_LOCALE = 1 << LC_MONETARY,
+ MESSAGES_LOCALE = 1 << LC_MESSAGES,
+ PAPER_LOCALE = 1 << LC_PAPER,
+ NAME_LOCALE = 1 << LC_NAME,
+ ADDRESS_LOCALE = 1 << LC_ADDRESS,
+ TELEPHONE_LOCALE = 1 << LC_TELEPHONE,
+ MEASUREMENT_LOCALE = 1 << LC_MEASUREMENT,
+ IDENTIFICATION_LOCALE = 1 << LC_IDENTIFICATION,
+ ALL_LOCALES = (1 << LC_CTYPE
+ | 1 << LC_NUMERIC
+ | 1 << LC_TIME
+ | 1 << LC_COLLATE
+ | 1 << LC_MONETARY
+ | 1 << LC_MESSAGES
+ | 1 << LC_PAPER
+ | 1 << LC_NAME
+ | 1 << LC_ADDRESS
+ | 1 << LC_TELEPHONE
+ | 1 << LC_MEASUREMENT
+ | 1 << LC_IDENTIFICATION)
+};
+
+
+/* Opaque types for the different locales. */
+struct locale_ctype_t;
+struct locale_collate_t;
+struct locale_monetary_t;
+struct locale_numeric_t;
+struct locale_time_t;
+struct locale_messages_t;
+struct locale_paper_t;
+struct locale_name_t;
+struct locale_address_t;
+struct locale_telephone_t;
+struct locale_measurement_t;
+struct locale_identification_t;
+
+
+/* Definitions for the locale. */
+struct localedef_t
+{
+ struct localedef_t *next;
+
+ const char *name;
+
+ int needed;
+ int avail;
+
+ union
+ {
+ void *generic;
+ struct locale_ctype_t *ctype;
+ struct locale_collate_t *collate;
+ struct locale_monetary_t *monetary;
+ struct locale_numeric_t *numeric;
+ struct locale_time_t *time;
+ struct locale_messages_t *messages;
+ struct locale_paper_t *paper;
+ struct locale_name_t *name;
+ struct locale_address_t *address;
+ struct locale_telephone_t *telephone;
+ struct locale_measurement_t *measurement;
+ struct locale_identification_t *identification;
+ } categories[__LC_LAST];
+
+ size_t len[__LC_LAST];
+
+ const char *copy_name[__LC_LAST];
+
+ const char *repertoire_name;
+};
+
+
+/* Global variables of the localedef program. */
+extern int verbose;
+extern int be_quiet;
+extern const char *repertoire_global;
+extern int max_locarchive_open_retry;
+extern bool no_archive;
+extern const char *alias_file;
+
+
+/* Prototypes for a few program-wide used functions. */
+#include <programs/xmalloc.h>
+
+
+/* Wrapper to switch LC_CTYPE back to the locale specified in the
+ environment for output. */
+#define WITH_CUR_LOCALE(stmt) \
+ do { \
+ int saved_errno = errno; \
+ const char *cur_locale_ = setlocale (LC_CTYPE, NULL); \
+ setlocale (LC_CTYPE, ""); \
+ errno = saved_errno; \
+ stmt; \
+ setlocale (LC_CTYPE, cur_locale_); \
+ } while (0)
+
+
+/* Mark given locale as to be read. */
+extern struct localedef_t *add_to_readlist (int locale, const char *name,
+ const char *repertoire_name,
+ int generate,
+ struct localedef_t *copy_locale);
+
+/* Find the information for the locale NAME. */
+extern struct localedef_t *find_locale (int locale, const char *name,
+ const char *repertoire_name,
+ const struct charmap_t *charmap);
+
+/* Load (if necessary) the information for the locale NAME. */
+extern struct localedef_t *load_locale (int locale, const char *name,
+ const char *repertoire_name,
+ const struct charmap_t *charmap,
+ struct localedef_t *copy_locale);
+
+
+/* Open the locale archive. */
+extern void open_archive (struct locarhandle *ah, bool readonly);
+
+/* Close the locale archive. */
+extern void close_archive (struct locarhandle *ah);
+
+/* Add given locale data to the archive. */
+extern int add_locale_to_archive (struct locarhandle *ah, const char *name,
+ locale_data_t data, bool replace);
+
+/* Add content of named directories to locale archive. */
+extern int add_locales_to_archive (size_t nlist, char *list[], bool replace);
+
+/* Removed named locales from archive. */
+extern int delete_locales_from_archive (size_t nlist, char *list[]);
+
+/* List content of locale archive. If FNAME is non-null use that as
+ the locale archive to list, otherwise the default. */
+extern void show_archive_content (const char *fname,
+ int verbose) __attribute__ ((noreturn));
+
+#endif /* localedef.h */
diff --git a/REORG.TODO/locale/programs/locarchive.c b/REORG.TODO/locale/programs/locarchive.c
new file mode 100644
index 0000000000..f67b7b8d99
--- /dev/null
+++ b/REORG.TODO/locale/programs/locarchive.c
@@ -0,0 +1,1757 @@
+/* Copyright (C) 2002-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <assert.h>
+#include <dirent.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <libintl.h>
+#include <locale.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdio_ext.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <sys/mman.h>
+#include <sys/param.h>
+#include <sys/shm.h>
+#include <sys/stat.h>
+
+#include <libc-mmap.h>
+#include <libc-pointer-arith.h>
+#include "../../crypt/md5.h"
+#include "../localeinfo.h"
+#include "../locarchive.h"
+#include "localedef.h"
+#include "locfile.h"
+
+/* Define the hash function. We define the function as static inline.
+ We must change the name so as not to conflict with simple-hash.h. */
+#define compute_hashval static archive_hashval
+#define hashval_t uint32_t
+#include "hashval.h"
+#undef compute_hashval
+
+extern const char *output_prefix;
+
+#define ARCHIVE_NAME COMPLOCALEDIR "/locale-archive"
+
+static const char *locnames[] =
+ {
+#define DEFINE_CATEGORY(category, category_name, items, a) \
+ [category] = category_name,
+#include "categories.def"
+#undef DEFINE_CATEGORY
+ };
+
+
+/* Size of the initial archive header. */
+#define INITIAL_NUM_NAMES 900
+#define INITIAL_SIZE_STRINGS 7500
+#define INITIAL_NUM_LOCREC 420
+#define INITIAL_NUM_SUMS 2000
+
+
+/* Get and set values (possibly endian-swapped) in structures mapped
+ from or written directly to locale archives. */
+#define GET(FIELD) maybe_swap_uint32 (FIELD)
+#define SET(FIELD, VALUE) ((FIELD) = maybe_swap_uint32 (VALUE))
+#define INC(FIELD, INCREMENT) SET (FIELD, GET (FIELD) + (INCREMENT))
+
+
+/* Size of the reserved address space area. */
+#define RESERVE_MMAP_SIZE 512 * 1024 * 1024
+
+/* To prepare for enlargements of the mmaped area reserve some address
+ space. On some machines, being a file mapping rather than an anonymous
+ mapping affects the address selection. So do this mapping from the
+ actual file, even though it's only a dummy to reserve address space. */
+static void *
+prepare_address_space (int fd, size_t total, size_t *reserved, int *xflags,
+ void **mmap_base, size_t *mmap_len)
+{
+ if (total < RESERVE_MMAP_SIZE)
+ {
+ void *p = mmap64 (NULL, RESERVE_MMAP_SIZE, PROT_NONE, MAP_SHARED, fd, 0);
+ if (p != MAP_FAILED)
+ {
+ void *aligned_p = PTR_ALIGN_UP (p, MAP_FIXED_ALIGNMENT);
+ size_t align_adjust = aligned_p - p;
+ *mmap_base = p;
+ *mmap_len = RESERVE_MMAP_SIZE;
+ assert (align_adjust < RESERVE_MMAP_SIZE);
+ *reserved = RESERVE_MMAP_SIZE - align_adjust;
+ *xflags = MAP_FIXED;
+ return aligned_p;
+ }
+ }
+
+ *reserved = total;
+ *xflags = 0;
+ *mmap_base = NULL;
+ *mmap_len = 0;
+ return NULL;
+}
+
+
+static void
+create_archive (const char *archivefname, struct locarhandle *ah)
+{
+ int fd;
+ char fname[strlen (archivefname) + sizeof (".XXXXXX")];
+ struct locarhead head;
+ size_t total;
+
+ strcpy (stpcpy (fname, archivefname), ".XXXXXX");
+
+ /* Create a temporary file in the correct directory. */
+ fd = mkstemp (fname);
+ if (fd == -1)
+ error (EXIT_FAILURE, errno, _("cannot create temporary file: %s"), fname);
+
+ /* Create the initial content of the archive. */
+ SET (head.magic, AR_MAGIC);
+ SET (head.serial, 0);
+ SET (head.namehash_offset, sizeof (struct locarhead));
+ SET (head.namehash_used, 0);
+ SET (head.namehash_size, next_prime (INITIAL_NUM_NAMES));
+
+ SET (head.string_offset,
+ (GET (head.namehash_offset)
+ + GET (head.namehash_size) * sizeof (struct namehashent)));
+ SET (head.string_used, 0);
+ SET (head.string_size, INITIAL_SIZE_STRINGS);
+
+ SET (head.locrectab_offset,
+ GET (head.string_offset) + GET (head.string_size));
+ SET (head.locrectab_used, 0);
+ SET (head.locrectab_size, INITIAL_NUM_LOCREC);
+
+ SET (head.sumhash_offset,
+ (GET (head.locrectab_offset)
+ + GET (head.locrectab_size) * sizeof (struct locrecent)));
+ SET (head.sumhash_used, 0);
+ SET (head.sumhash_size, next_prime (INITIAL_NUM_SUMS));
+
+ total = (GET (head.sumhash_offset)
+ + GET (head.sumhash_size) * sizeof (struct sumhashent));
+
+ /* Write out the header and create room for the other data structures. */
+ if (TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head))) != sizeof (head))
+ {
+ int errval = errno;
+ unlink (fname);
+ error (EXIT_FAILURE, errval, _("cannot initialize archive file"));
+ }
+
+ if (ftruncate64 (fd, total) != 0)
+ {
+ int errval = errno;
+ unlink (fname);
+ error (EXIT_FAILURE, errval, _("cannot resize archive file"));
+ }
+
+ size_t reserved, mmap_len;
+ int xflags;
+ void *mmap_base;
+ void *p = prepare_address_space (fd, total, &reserved, &xflags, &mmap_base,
+ &mmap_len);
+
+ /* Map the header and all the administration data structures. */
+ p = mmap64 (p, total, PROT_READ | PROT_WRITE, MAP_SHARED | xflags, fd, 0);
+ if (p == MAP_FAILED)
+ {
+ int errval = errno;
+ unlink (fname);
+ error (EXIT_FAILURE, errval, _("cannot map archive header"));
+ }
+
+ /* Now try to rename it. We don't use the rename function since
+ this would overwrite a file which has been created in
+ parallel. */
+ if (link (fname, archivefname) == -1)
+ {
+ int errval = errno;
+
+ /* We cannot use the just created file. */
+ close (fd);
+ unlink (fname);
+
+ if (errval == EEXIST)
+ {
+ /* There is already an archive. Must have been a localedef run
+ which happened in parallel. Simply open this file then. */
+ open_archive (ah, false);
+ return;
+ }
+
+ error (EXIT_FAILURE, errval, _("failed to create new locale archive"));
+ }
+
+ /* Remove the temporary name. */
+ unlink (fname);
+
+ /* Make the file globally readable. */
+ if (fchmod (fd, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH) == -1)
+ {
+ int errval = errno;
+ unlink (archivefname);
+ error (EXIT_FAILURE, errval,
+ _("cannot change mode of new locale archive"));
+ }
+
+ ah->fname = NULL;
+ ah->fd = fd;
+ ah->mmap_base = mmap_base;
+ ah->mmap_len = mmap_len;
+ ah->addr = p;
+ ah->mmaped = total;
+ ah->reserved = reserved;
+}
+
+
+/* This structure and qsort comparator function are used below to sort an
+ old archive's locrec table in order of data position in the file. */
+struct oldlocrecent
+{
+ unsigned int cnt;
+ struct locrecent *locrec;
+};
+
+static int
+oldlocrecentcmp (const void *a, const void *b)
+{
+ struct locrecent *la = ((const struct oldlocrecent *) a)->locrec;
+ struct locrecent *lb = ((const struct oldlocrecent *) b)->locrec;
+ uint32_t start_a = -1, end_a = 0;
+ uint32_t start_b = -1, end_b = 0;
+ int cnt;
+
+ for (cnt = 0; cnt < __LC_LAST; ++cnt)
+ if (cnt != LC_ALL)
+ {
+ if (GET (la->record[cnt].offset) < start_a)
+ start_a = GET (la->record[cnt].offset);
+ if (GET (la->record[cnt].offset) + GET (la->record[cnt].len) > end_a)
+ end_a = GET (la->record[cnt].offset) + GET (la->record[cnt].len);
+ }
+ assert (start_a != (uint32_t)-1);
+ assert (end_a != 0);
+
+ for (cnt = 0; cnt < __LC_LAST; ++cnt)
+ if (cnt != LC_ALL)
+ {
+ if (GET (lb->record[cnt].offset) < start_b)
+ start_b = GET (lb->record[cnt].offset);
+ if (GET (lb->record[cnt].offset) + GET (lb->record[cnt].len) > end_b)
+ end_b = GET (lb->record[cnt].offset) + GET (lb->record[cnt].len);
+ }
+ assert (start_b != (uint32_t)-1);
+ assert (end_b != 0);
+
+ if (start_a != start_b)
+ return (int)start_a - (int)start_b;
+ return (int)end_a - (int)end_b;
+}
+
+
+/* forward decls for below */
+static uint32_t add_locale (struct locarhandle *ah, const char *name,
+ locale_data_t data, bool replace);
+static void add_alias (struct locarhandle *ah, const char *alias,
+ bool replace, const char *oldname,
+ uint32_t *locrec_offset_p);
+
+
+static bool
+file_data_available_p (struct locarhandle *ah, uint32_t offset, uint32_t size)
+{
+ if (offset < ah->mmaped && offset + size <= ah->mmaped)
+ return true;
+
+ struct stat64 st;
+ if (fstat64 (ah->fd, &st) != 0)
+ return false;
+
+ if (st.st_size > ah->reserved)
+ return false;
+
+ size_t start = ALIGN_DOWN (ah->mmaped, MAP_FIXED_ALIGNMENT);
+ void *p = mmap64 (ah->addr + start, st.st_size - start,
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED,
+ ah->fd, start);
+ if (p == MAP_FAILED)
+ {
+ ah->mmaped = start;
+ return false;
+ }
+
+ ah->mmaped = st.st_size;
+ return true;
+}
+
+
+static int
+compare_from_file (struct locarhandle *ah, void *p1, uint32_t offset2,
+ uint32_t size)
+{
+ void *p2 = xmalloc (size);
+ if (pread (ah->fd, p2, size, offset2) != size)
+ WITH_CUR_LOCALE (error (4, errno,
+ _("cannot read data from locale archive")));
+
+ int res = memcmp (p1, p2, size);
+ free (p2);
+ return res;
+}
+
+
+static void
+enlarge_archive (struct locarhandle *ah, const struct locarhead *head)
+{
+ struct stat64 st;
+ int fd;
+ struct locarhead newhead;
+ size_t total;
+ unsigned int cnt, loccnt;
+ struct namehashent *oldnamehashtab;
+ struct locarhandle new_ah;
+ size_t prefix_len = output_prefix ? strlen (output_prefix) : 0;
+ char archivefname[prefix_len + sizeof (ARCHIVE_NAME)];
+ char fname[prefix_len + sizeof (ARCHIVE_NAME) + sizeof (".XXXXXX") - 1];
+
+ if (output_prefix)
+ memcpy (archivefname, output_prefix, prefix_len);
+ strcpy (archivefname + prefix_len, ARCHIVE_NAME);
+ strcpy (stpcpy (fname, archivefname), ".XXXXXX");
+
+ /* Not all of the old file has to be mapped. Change this now this
+ we will have to access the whole content. */
+ if (fstat64 (ah->fd, &st) != 0)
+ enomap:
+ error (EXIT_FAILURE, errno, _("cannot map locale archive file"));
+
+ if (st.st_size < ah->reserved)
+ ah->addr = mmap64 (ah->addr, st.st_size, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_FIXED, ah->fd, 0);
+ else
+ {
+ if (ah->mmap_base)
+ munmap (ah->mmap_base, ah->mmap_len);
+ else
+ munmap (ah->addr, ah->reserved);
+ ah->addr = mmap64 (NULL, st.st_size, PROT_READ | PROT_WRITE,
+ MAP_SHARED, ah->fd, 0);
+ ah->reserved = st.st_size;
+ ah->mmap_base = NULL;
+ ah->mmap_len = 0;
+ head = ah->addr;
+ }
+ if (ah->addr == MAP_FAILED)
+ goto enomap;
+ ah->mmaped = st.st_size;
+
+ /* Create a temporary file in the correct directory. */
+ fd = mkstemp (fname);
+ if (fd == -1)
+ error (EXIT_FAILURE, errno, _("cannot create temporary file: %s"), fname);
+
+ /* Copy the existing head information. */
+ newhead = *head;
+
+ /* Create the new archive header. The sizes of the various tables
+ should be double from what is currently used. */
+ SET (newhead.namehash_size,
+ MAX (next_prime (2 * GET (newhead.namehash_used)),
+ GET (newhead.namehash_size)));
+ if (verbose)
+ printf ("name: size: %u, used: %d, new: size: %u\n",
+ GET (head->namehash_size),
+ GET (head->namehash_used), GET (newhead.namehash_size));
+
+ SET (newhead.string_offset, (GET (newhead.namehash_offset)
+ + (GET (newhead.namehash_size)
+ * sizeof (struct namehashent))));
+ /* Keep the string table size aligned to 4 bytes, so that
+ all the struct { uint32_t } types following are happy. */
+ SET (newhead.string_size, MAX ((2 * GET (newhead.string_used) + 3) & -4,
+ GET (newhead.string_size)));
+
+ SET (newhead.locrectab_offset,
+ GET (newhead.string_offset) + GET (newhead.string_size));
+ SET (newhead.locrectab_size, MAX (2 * GET (newhead.locrectab_used),
+ GET (newhead.locrectab_size)));
+
+ SET (newhead.sumhash_offset, (GET (newhead.locrectab_offset)
+ + (GET (newhead.locrectab_size)
+ * sizeof (struct locrecent))));
+ SET (newhead.sumhash_size,
+ MAX (next_prime (2 * GET (newhead.sumhash_used)),
+ GET (newhead.sumhash_size)));
+
+ total = (GET (newhead.sumhash_offset)
+ + GET (newhead.sumhash_size) * sizeof (struct sumhashent));
+
+ /* The new file is empty now. */
+ SET (newhead.namehash_used, 0);
+ SET (newhead.string_used, 0);
+ SET (newhead.locrectab_used, 0);
+ SET (newhead.sumhash_used, 0);
+
+ /* Write out the header and create room for the other data structures. */
+ if (TEMP_FAILURE_RETRY (write (fd, &newhead, sizeof (newhead)))
+ != sizeof (newhead))
+ {
+ int errval = errno;
+ unlink (fname);
+ error (EXIT_FAILURE, errval, _("cannot initialize archive file"));
+ }
+
+ if (ftruncate64 (fd, total) != 0)
+ {
+ int errval = errno;
+ unlink (fname);
+ error (EXIT_FAILURE, errval, _("cannot resize archive file"));
+ }
+
+ size_t reserved, mmap_len;
+ int xflags;
+ void *mmap_base;
+ void *p = prepare_address_space (fd, total, &reserved, &xflags, &mmap_base,
+ &mmap_len);
+
+ /* Map the header and all the administration data structures. */
+ p = mmap64 (p, total, PROT_READ | PROT_WRITE, MAP_SHARED | xflags, fd, 0);
+ if (p == MAP_FAILED)
+ {
+ int errval = errno;
+ unlink (fname);
+ error (EXIT_FAILURE, errval, _("cannot map archive header"));
+ }
+
+ /* Lock the new file. */
+ if (lockf64 (fd, F_LOCK, total) != 0)
+ {
+ int errval = errno;
+ unlink (fname);
+ error (EXIT_FAILURE, errval, _("cannot lock new archive"));
+ }
+
+ new_ah.mmaped = total;
+ new_ah.mmap_base = mmap_base;
+ new_ah.mmap_len = mmap_len;
+ new_ah.addr = p;
+ new_ah.fd = fd;
+ new_ah.reserved = reserved;
+
+ /* Walk through the hash name hash table to find out what data is
+ still referenced and transfer it into the new file. */
+ oldnamehashtab = (struct namehashent *) ((char *) ah->addr
+ + GET (head->namehash_offset));
+
+ /* Sort the old locrec table in order of data position. */
+ struct oldlocrecent oldlocrecarray[GET (head->namehash_size)];
+ for (cnt = 0, loccnt = 0; cnt < GET (head->namehash_size); ++cnt)
+ if (GET (oldnamehashtab[cnt].locrec_offset) != 0)
+ {
+ oldlocrecarray[loccnt].cnt = cnt;
+ oldlocrecarray[loccnt++].locrec
+ = (struct locrecent *) ((char *) ah->addr
+ + GET (oldnamehashtab[cnt].locrec_offset));
+ }
+ qsort (oldlocrecarray, loccnt, sizeof (struct oldlocrecent),
+ oldlocrecentcmp);
+
+ uint32_t last_locrec_offset = 0;
+ for (cnt = 0; cnt < loccnt; ++cnt)
+ {
+ /* Insert this entry in the new hash table. */
+ locale_data_t old_data;
+ unsigned int idx;
+ struct locrecent *oldlocrec = oldlocrecarray[cnt].locrec;
+
+ for (idx = 0; idx < __LC_LAST; ++idx)
+ if (idx != LC_ALL)
+ {
+ old_data[idx].size = GET (oldlocrec->record[idx].len);
+ old_data[idx].addr
+ = ((char *) ah->addr + GET (oldlocrec->record[idx].offset));
+
+ __md5_buffer (old_data[idx].addr, old_data[idx].size,
+ old_data[idx].sum);
+ }
+
+ if (cnt > 0 && oldlocrecarray[cnt - 1].locrec == oldlocrec)
+ {
+ const char *oldname
+ = ((char *) ah->addr
+ + GET (oldnamehashtab[oldlocrecarray[cnt
+ - 1].cnt].name_offset));
+
+ add_alias
+ (&new_ah,
+ ((char *) ah->addr
+ + GET (oldnamehashtab[oldlocrecarray[cnt].cnt].name_offset)),
+ 0, oldname, &last_locrec_offset);
+ continue;
+ }
+
+ last_locrec_offset =
+ add_locale
+ (&new_ah,
+ ((char *) ah->addr
+ + GET (oldnamehashtab[oldlocrecarray[cnt].cnt].name_offset)),
+ old_data, 0);
+ if (last_locrec_offset == 0)
+ error (EXIT_FAILURE, 0, _("cannot extend locale archive file"));
+ }
+
+ /* Make the file globally readable. */
+ if (fchmod (fd, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH) == -1)
+ {
+ int errval = errno;
+ unlink (fname);
+ error (EXIT_FAILURE, errval,
+ _("cannot change mode of resized locale archive"));
+ }
+
+ /* Rename the new file. */
+ if (rename (fname, archivefname) != 0)
+ {
+ int errval = errno;
+ unlink (fname);
+ error (EXIT_FAILURE, errval, _("cannot rename new archive"));
+ }
+
+ /* Close the old file. */
+ close_archive (ah);
+
+ /* Add the information for the new one. */
+ *ah = new_ah;
+}
+
+
+void
+open_archive (struct locarhandle *ah, bool readonly)
+{
+ struct stat64 st;
+ struct stat64 st2;
+ int fd;
+ struct locarhead head;
+ int retry = 0;
+ size_t prefix_len = output_prefix ? strlen (output_prefix) : 0;
+ char default_fname[prefix_len + sizeof (ARCHIVE_NAME)];
+ const char *archivefname = ah->fname;
+
+ /* If ah has a non-NULL fname open that otherwise open the default. */
+ if (archivefname == NULL)
+ {
+ archivefname = default_fname;
+ if (output_prefix)
+ memcpy (default_fname, output_prefix, prefix_len);
+ strcpy (default_fname + prefix_len, ARCHIVE_NAME);
+ }
+
+ while (1)
+ {
+ /* Open the archive. We must have exclusive write access. */
+ fd = open64 (archivefname, readonly ? O_RDONLY : O_RDWR);
+ if (fd == -1)
+ {
+ /* Maybe the file does not yet exist? If we are opening
+ the default locale archive we ignore the failure and
+ list an empty archive, otherwise we print an error
+ and exit. */
+ if (errno == ENOENT && archivefname == default_fname)
+ {
+ if (readonly)
+ {
+ static const struct locarhead nullhead =
+ {
+ .namehash_used = 0,
+ .namehash_offset = 0,
+ .namehash_size = 0
+ };
+
+ ah->addr = (void *) &nullhead;
+ ah->fd = -1;
+ }
+ else
+ create_archive (archivefname, ah);
+
+ return;
+ }
+ else
+ error (EXIT_FAILURE, errno, _("cannot open locale archive \"%s\""),
+ archivefname);
+ }
+
+ if (fstat64 (fd, &st) < 0)
+ error (EXIT_FAILURE, errno, _("cannot stat locale archive \"%s\""),
+ archivefname);
+
+ if (!readonly && lockf64 (fd, F_LOCK, sizeof (struct locarhead)) == -1)
+ {
+ close (fd);
+
+ if (retry++ < max_locarchive_open_retry)
+ {
+ struct timespec req;
+
+ /* Wait for a bit. */
+ req.tv_sec = 0;
+ req.tv_nsec = 1000000 * (random () % 500 + 1);
+ (void) nanosleep (&req, NULL);
+
+ continue;
+ }
+
+ error (EXIT_FAILURE, errno, _("cannot lock locale archive \"%s\""),
+ archivefname);
+ }
+
+ /* One more check. Maybe another process replaced the archive file
+ with a new, larger one since we opened the file. */
+ if (stat64 (archivefname, &st2) == -1
+ || st.st_dev != st2.st_dev
+ || st.st_ino != st2.st_ino)
+ {
+ (void) lockf64 (fd, F_ULOCK, sizeof (struct locarhead));
+ close (fd);
+ continue;
+ }
+
+ /* Leave the loop. */
+ break;
+ }
+
+ /* Read the header. */
+ if (TEMP_FAILURE_RETRY (read (fd, &head, sizeof (head))) != sizeof (head))
+ {
+ (void) lockf64 (fd, F_ULOCK, sizeof (struct locarhead));
+ error (EXIT_FAILURE, errno, _("cannot read archive header"));
+ }
+
+ ah->fd = fd;
+ ah->mmaped = st.st_size;
+
+ size_t reserved, mmap_len;
+ int xflags;
+ void *mmap_base;
+ void *p = prepare_address_space (fd, st.st_size, &reserved, &xflags,
+ &mmap_base, &mmap_len);
+
+ /* Map the entire file. We might need to compare the category data
+ in the file with the newly added data. */
+ ah->addr = mmap64 (p, st.st_size, PROT_READ | (readonly ? 0 : PROT_WRITE),
+ MAP_SHARED | xflags, fd, 0);
+ if (ah->addr == MAP_FAILED)
+ {
+ (void) lockf64 (fd, F_ULOCK, sizeof (struct locarhead));
+ error (EXIT_FAILURE, errno, _("cannot map archive header"));
+ }
+ ah->reserved = reserved;
+ ah->mmap_base = mmap_base;
+ ah->mmap_len = mmap_len;
+}
+
+
+void
+close_archive (struct locarhandle *ah)
+{
+ if (ah->fd != -1)
+ {
+ if (ah->mmap_base)
+ munmap (ah->mmap_base, ah->mmap_len);
+ else
+ munmap (ah->addr, ah->reserved);
+ close (ah->fd);
+ }
+}
+
+#include "../../intl/explodename.c"
+#include "../../intl/l10nflist.c"
+
+static struct namehashent *
+insert_name (struct locarhandle *ah,
+ const char *name, size_t name_len, bool replace)
+{
+ const struct locarhead *const head = ah->addr;
+ struct namehashent *namehashtab
+ = (struct namehashent *) ((char *) ah->addr
+ + GET (head->namehash_offset));
+ unsigned int insert_idx, idx, incr;
+
+ /* Hash value of the locale name. */
+ uint32_t hval = archive_hashval (name, name_len);
+
+ insert_idx = -1;
+ idx = hval % GET (head->namehash_size);
+ incr = 1 + hval % (GET (head->namehash_size) - 2);
+
+ /* If the name_offset field is zero this means this is a
+ deleted entry and therefore no entry can be found. */
+ while (GET (namehashtab[idx].name_offset) != 0)
+ {
+ if (GET (namehashtab[idx].hashval) == hval
+ && (strcmp (name,
+ (char *) ah->addr + GET (namehashtab[idx].name_offset))
+ == 0))
+ {
+ /* Found the entry. */
+ if (GET (namehashtab[idx].locrec_offset) != 0 && ! replace)
+ {
+ if (! be_quiet)
+ error (0, 0, _("locale '%s' already exists"), name);
+ return NULL;
+ }
+
+ break;
+ }
+
+ if (GET (namehashtab[idx].hashval) == hval && ! be_quiet)
+ {
+ error (0, 0, "hash collision (%u) %s, %s",
+ hval, name,
+ (char *) ah->addr + GET (namehashtab[idx].name_offset));
+ }
+
+ /* Remember the first place we can insert the new entry. */
+ if (GET (namehashtab[idx].locrec_offset) == 0 && insert_idx == -1)
+ insert_idx = idx;
+
+ idx += incr;
+ if (idx >= GET (head->namehash_size))
+ idx -= GET (head->namehash_size);
+ }
+
+ /* Add as early as possible. */
+ if (insert_idx != -1)
+ idx = insert_idx;
+
+ SET (namehashtab[idx].hashval, hval); /* no-op if replacing an old entry. */
+ return &namehashtab[idx];
+}
+
+static void
+add_alias (struct locarhandle *ah, const char *alias, bool replace,
+ const char *oldname, uint32_t *locrec_offset_p)
+{
+ uint32_t locrec_offset = *locrec_offset_p;
+ struct locarhead *head = ah->addr;
+ const size_t name_len = strlen (alias);
+ struct namehashent *namehashent = insert_name (ah, alias, strlen (alias),
+ replace);
+ if (namehashent == NULL && ! replace)
+ return;
+
+ if (GET (namehashent->name_offset) == 0)
+ {
+ /* We are adding a new hash entry for this alias.
+ Determine whether we have to resize the file. */
+ if (GET (head->string_used) + name_len + 1 > GET (head->string_size)
+ || (100 * GET (head->namehash_used)
+ > 75 * GET (head->namehash_size)))
+ {
+ /* The current archive is not large enough. */
+ enlarge_archive (ah, head);
+
+ /* The locrecent might have moved, so we have to look up
+ the old name afresh. */
+ namehashent = insert_name (ah, oldname, strlen (oldname), true);
+ assert (GET (namehashent->name_offset) != 0);
+ assert (GET (namehashent->locrec_offset) != 0);
+ *locrec_offset_p = GET (namehashent->locrec_offset);
+
+ /* Tail call to try the whole thing again. */
+ add_alias (ah, alias, replace, oldname, locrec_offset_p);
+ return;
+ }
+
+ /* Add the name string. */
+ memcpy (ah->addr + GET (head->string_offset) + GET (head->string_used),
+ alias, name_len + 1);
+ SET (namehashent->name_offset,
+ GET (head->string_offset) + GET (head->string_used));
+ INC (head->string_used, name_len + 1);
+
+ INC (head->namehash_used, 1);
+ }
+
+ if (GET (namehashent->locrec_offset) != 0)
+ {
+ /* Replacing an existing entry.
+ Mark that we are no longer using the old locrecent. */
+ struct locrecent *locrecent
+ = (struct locrecent *) ((char *) ah->addr
+ + GET (namehashent->locrec_offset));
+ INC (locrecent->refs, -1);
+ }
+
+ /* Point this entry at the locrecent installed for the main name. */
+ SET (namehashent->locrec_offset, locrec_offset);
+}
+
+static int /* qsort comparator used below */
+cmpcategorysize (const void *a, const void *b)
+{
+ if (*(const void **) a == NULL)
+ return 1;
+ if (*(const void **) b == NULL)
+ return -1;
+ return ((*(const struct locale_category_data **) a)->size
+ - (*(const struct locale_category_data **) b)->size);
+}
+
+/* Check the content of the archive for duplicates. Add the content
+ of the files if necessary. Returns the locrec_offset. */
+static uint32_t
+add_locale (struct locarhandle *ah,
+ const char *name, locale_data_t data, bool replace)
+{
+ /* First look for the name. If it already exists and we are not
+ supposed to replace it don't do anything. If it does not exist
+ we have to allocate a new locale record. */
+ size_t name_len = strlen (name);
+ uint32_t file_offsets[__LC_LAST];
+ unsigned int num_new_offsets = 0;
+ struct sumhashent *sumhashtab;
+ uint32_t hval;
+ unsigned int cnt, idx;
+ struct locarhead *head;
+ struct namehashent *namehashent;
+ unsigned int incr;
+ struct locrecent *locrecent;
+ off64_t lastoffset;
+ char *ptr;
+ struct locale_category_data *size_order[__LC_LAST];
+ /* Page size alignment is a minor optimization for locality; use a
+ common value here rather than making the localedef output depend
+ on the page size of the system on which localedef is run. See
+ <https://sourceware.org/glibc/wiki/Development_Todo/Master#Locale_archive_alignment>
+ for more discussion. */
+ const size_t pagesz = 4096;
+ int small_mask;
+
+ head = ah->addr;
+ sumhashtab = (struct sumhashent *) ((char *) ah->addr
+ + GET (head->sumhash_offset));
+
+ memset (file_offsets, 0, sizeof (file_offsets));
+
+ size_order[LC_ALL] = NULL;
+ for (cnt = 0; cnt < __LC_LAST; ++cnt)
+ if (cnt != LC_ALL)
+ size_order[cnt] = &data[cnt];
+
+ /* Sort the array in ascending order of data size. */
+ qsort (size_order, __LC_LAST, sizeof size_order[0], cmpcategorysize);
+
+ small_mask = 0;
+ data[LC_ALL].size = 0;
+ for (cnt = 0; cnt < __LC_LAST; ++cnt)
+ if (size_order[cnt] != NULL)
+ {
+ const size_t rounded_size = (size_order[cnt]->size + 15) & -16;
+ if (data[LC_ALL].size + rounded_size > 2 * pagesz)
+ {
+ /* This category makes the small-categories block
+ stop being small, so this is the end of the road. */
+ do
+ size_order[cnt++] = NULL;
+ while (cnt < __LC_LAST);
+ break;
+ }
+ data[LC_ALL].size += rounded_size;
+ small_mask |= 1 << (size_order[cnt] - data);
+ }
+
+ /* Copy the data for all the small categories into the LC_ALL
+ pseudo-category. */
+
+ data[LC_ALL].addr = alloca (data[LC_ALL].size);
+ memset (data[LC_ALL].addr, 0, data[LC_ALL].size);
+
+ ptr = data[LC_ALL].addr;
+ for (cnt = 0; cnt < __LC_LAST; ++cnt)
+ if (small_mask & (1 << cnt))
+ {
+ memcpy (ptr, data[cnt].addr, data[cnt].size);
+ ptr += (data[cnt].size + 15) & -16;
+ }
+ __md5_buffer (data[LC_ALL].addr, data[LC_ALL].size, data[LC_ALL].sum);
+
+ /* For each locale category data set determine whether the same data
+ is already somewhere in the archive. */
+ for (cnt = 0; cnt < __LC_LAST; ++cnt)
+ if (small_mask == 0 ? cnt != LC_ALL : !(small_mask & (1 << cnt)))
+ {
+ ++num_new_offsets;
+
+ /* Compute the hash value of the checksum to determine a
+ starting point for the search in the MD5 hash value
+ table. */
+ hval = archive_hashval (data[cnt].sum, 16);
+
+ idx = hval % GET (head->sumhash_size);
+ incr = 1 + hval % (GET (head->sumhash_size) - 2);
+
+ while (GET (sumhashtab[idx].file_offset) != 0)
+ {
+ if (memcmp (data[cnt].sum, sumhashtab[idx].sum, 16) == 0)
+ {
+ /* Check the content, there could be a collision of
+ the hash sum.
+
+ Unfortunately the sumhashent record does not include
+ the size of the stored data. So we have to search for
+ it. */
+ locrecent
+ = (struct locrecent *) ((char *) ah->addr
+ + GET (head->locrectab_offset));
+ size_t iloc;
+ for (iloc = 0; iloc < GET (head->locrectab_used); ++iloc)
+ if (GET (locrecent[iloc].refs) != 0
+ && (GET (locrecent[iloc].record[cnt].offset)
+ == GET (sumhashtab[idx].file_offset)))
+ break;
+
+ if (iloc != GET (head->locrectab_used)
+ && data[cnt].size == GET (locrecent[iloc].record[cnt].len)
+ /* We have to compare the content. Either we can
+ have the data mmaped or we have to read from
+ the file. */
+ && (file_data_available_p
+ (ah, GET (sumhashtab[idx].file_offset),
+ data[cnt].size)
+ ? memcmp (data[cnt].addr,
+ (char *) ah->addr
+ + GET (sumhashtab[idx].file_offset),
+ data[cnt].size) == 0
+ : compare_from_file (ah, data[cnt].addr,
+ GET (sumhashtab[idx].file_offset),
+ data[cnt].size) == 0))
+ {
+ /* Found it. */
+ file_offsets[cnt] = GET (sumhashtab[idx].file_offset);
+ --num_new_offsets;
+ break;
+ }
+ }
+
+ idx += incr;
+ if (idx >= GET (head->sumhash_size))
+ idx -= GET (head->sumhash_size);
+ }
+ }
+
+ /* Find a slot for the locale name in the hash table. */
+ namehashent = insert_name (ah, name, name_len, replace);
+ if (namehashent == NULL) /* Already exists and !REPLACE. */
+ return 0;
+
+ /* Determine whether we have to resize the file. */
+ if ((100 * (GET (head->sumhash_used) + num_new_offsets)
+ > 75 * GET (head->sumhash_size))
+ || (GET (namehashent->locrec_offset) == 0
+ && (GET (head->locrectab_used) == GET (head->locrectab_size)
+ || (GET (head->string_used) + name_len + 1
+ > GET (head->string_size))
+ || (100 * GET (head->namehash_used)
+ > 75 * GET (head->namehash_size)))))
+ {
+ /* The current archive is not large enough. */
+ enlarge_archive (ah, head);
+ return add_locale (ah, name, data, replace);
+ }
+
+ /* Add the locale data which is not yet in the archive. */
+ for (cnt = 0, lastoffset = 0; cnt < __LC_LAST; ++cnt)
+ if ((small_mask == 0 ? cnt != LC_ALL : !(small_mask & (1 << cnt)))
+ && file_offsets[cnt] == 0)
+ {
+ /* The data for this section is not yet available in the
+ archive. Append it. */
+ off64_t lastpos;
+ uint32_t md5hval;
+
+ lastpos = lseek64 (ah->fd, 0, SEEK_END);
+ if (lastpos == (off64_t) -1)
+ error (EXIT_FAILURE, errno, _("cannot add to locale archive"));
+
+ /* If block of small categories would cross page boundary,
+ align it unless it immediately follows a large category. */
+ if (cnt == LC_ALL && lastoffset != lastpos
+ && ((((lastpos & (pagesz - 1)) + data[cnt].size + pagesz - 1)
+ & -pagesz)
+ > ((data[cnt].size + pagesz - 1) & -pagesz)))
+ {
+ size_t sz = pagesz - (lastpos & (pagesz - 1));
+ char *zeros = alloca (sz);
+
+ memset (zeros, 0, sz);
+ if (TEMP_FAILURE_RETRY (write (ah->fd, zeros, sz) != sz))
+ error (EXIT_FAILURE, errno,
+ _("cannot add to locale archive"));
+
+ lastpos += sz;
+ }
+
+ /* Align all data to a 16 byte boundary. */
+ if ((lastpos & 15) != 0)
+ {
+ static const char zeros[15] = { 0, };
+
+ if (TEMP_FAILURE_RETRY (write (ah->fd, zeros, 16 - (lastpos & 15)))
+ != 16 - (lastpos & 15))
+ error (EXIT_FAILURE, errno, _("cannot add to locale archive"));
+
+ lastpos += 16 - (lastpos & 15);
+ }
+
+ /* Remember the position. */
+ file_offsets[cnt] = lastpos;
+ lastoffset = lastpos + data[cnt].size;
+
+ /* Write the data. */
+ if (TEMP_FAILURE_RETRY (write (ah->fd, data[cnt].addr, data[cnt].size))
+ != data[cnt].size)
+ error (EXIT_FAILURE, errno, _("cannot add to locale archive"));
+
+ /* Add the hash value to the hash table. */
+ md5hval = archive_hashval (data[cnt].sum, 16);
+
+ idx = md5hval % GET (head->sumhash_size);
+ incr = 1 + md5hval % (GET (head->sumhash_size) - 2);
+
+ while (GET (sumhashtab[idx].file_offset) != 0)
+ {
+ idx += incr;
+ if (idx >= GET (head->sumhash_size))
+ idx -= GET (head->sumhash_size);
+ }
+
+ memcpy (sumhashtab[idx].sum, data[cnt].sum, 16);
+ SET (sumhashtab[idx].file_offset, file_offsets[cnt]);
+
+ INC (head->sumhash_used, 1);
+ }
+
+ lastoffset = file_offsets[LC_ALL];
+ for (cnt = 0; cnt < __LC_LAST; ++cnt)
+ if (small_mask & (1 << cnt))
+ {
+ file_offsets[cnt] = lastoffset;
+ lastoffset += (data[cnt].size + 15) & -16;
+ }
+
+ if (GET (namehashent->name_offset) == 0)
+ {
+ /* Add the name string. */
+ memcpy ((char *) ah->addr + GET (head->string_offset)
+ + GET (head->string_used),
+ name, name_len + 1);
+ SET (namehashent->name_offset,
+ GET (head->string_offset) + GET (head->string_used));
+ INC (head->string_used, name_len + 1);
+ INC (head->namehash_used, 1);
+ }
+
+ if (GET (namehashent->locrec_offset == 0))
+ {
+ /* Allocate a name location record. */
+ SET (namehashent->locrec_offset, (GET (head->locrectab_offset)
+ + (GET (head->locrectab_used)
+ * sizeof (struct locrecent))));
+ INC (head->locrectab_used, 1);
+ locrecent = (struct locrecent *) ((char *) ah->addr
+ + GET (namehashent->locrec_offset));
+ SET (locrecent->refs, 1);
+ }
+ else
+ {
+ /* If there are other aliases pointing to this locrecent,
+ we still need a new one. If not, reuse the old one. */
+
+ locrecent = (struct locrecent *) ((char *) ah->addr
+ + GET (namehashent->locrec_offset));
+ if (GET (locrecent->refs) > 1)
+ {
+ INC (locrecent->refs, -1);
+ SET (namehashent->locrec_offset, (GET (head->locrectab_offset)
+ + (GET (head->locrectab_used)
+ * sizeof (struct locrecent))));
+ INC (head->locrectab_used, 1);
+ locrecent
+ = (struct locrecent *) ((char *) ah->addr
+ + GET (namehashent->locrec_offset));
+ SET (locrecent->refs, 1);
+ }
+ }
+
+ /* Fill in the table with the locations of the locale data. */
+ for (cnt = 0; cnt < __LC_LAST; ++cnt)
+ {
+ SET (locrecent->record[cnt].offset, file_offsets[cnt]);
+ SET (locrecent->record[cnt].len, data[cnt].size);
+ }
+
+ return GET (namehashent->locrec_offset);
+}
+
+
+/* Check the content of the archive for duplicates. Add the content
+ of the files if necessary. Add all the names, possibly overwriting
+ old files. */
+int
+add_locale_to_archive (struct locarhandle *ah, const char *name,
+ locale_data_t data, bool replace)
+{
+ char *normalized_name = NULL;
+ uint32_t locrec_offset;
+
+ /* First analyze the name to decide how to archive it. */
+ const char *language;
+ const char *modifier;
+ const char *territory;
+ const char *codeset;
+ const char *normalized_codeset;
+ int mask = _nl_explode_name (strdupa (name),
+ &language, &modifier, &territory,
+ &codeset, &normalized_codeset);
+ if (mask == -1)
+ return -1;
+
+ if (mask & XPG_NORM_CODESET)
+ /* This name contains a codeset in unnormalized form.
+ We will store it in the archive with a normalized name. */
+ asprintf (&normalized_name, "%s%s%s.%s%s%s",
+ language, territory == NULL ? "" : "_", territory ?: "",
+ (mask & XPG_NORM_CODESET) ? normalized_codeset : codeset,
+ modifier == NULL ? "" : "@", modifier ?: "");
+
+ /* This call does the main work. */
+ locrec_offset = add_locale (ah, normalized_name ?: name, data, replace);
+ if (locrec_offset == 0)
+ {
+ free (normalized_name);
+ if (mask & XPG_NORM_CODESET)
+ free ((char *) normalized_codeset);
+ return -1;
+ }
+
+ if ((mask & XPG_CODESET) == 0)
+ {
+ /* This name lacks a codeset, so determine the locale's codeset and
+ add an alias for its name with normalized codeset appended. */
+
+ const struct
+ {
+ unsigned int magic;
+ unsigned int nstrings;
+ unsigned int strindex[0];
+ } *filedata = data[LC_CTYPE].addr;
+ codeset = (char *) filedata
+ + maybe_swap_uint32 (filedata->strindex[_NL_ITEM_INDEX
+ (_NL_CTYPE_CODESET_NAME)]);
+ char *normalized_codeset_name = NULL;
+
+ normalized_codeset = _nl_normalize_codeset (codeset, strlen (codeset));
+ mask |= XPG_NORM_CODESET;
+
+ asprintf (&normalized_codeset_name, "%s%s%s.%s%s%s",
+ language, territory == NULL ? "" : "_", territory ?: "",
+ normalized_codeset,
+ modifier == NULL ? "" : "@", modifier ?: "");
+
+ add_alias (ah, normalized_codeset_name, replace,
+ normalized_name ?: name, &locrec_offset);
+ free (normalized_codeset_name);
+ }
+
+ /* Now read the locale.alias files looking for lines whose
+ right hand side matches our name after normalization. */
+ int result = 0;
+ if (alias_file != NULL)
+ {
+ FILE *fp;
+ fp = fopen (alias_file, "rm");
+ if (fp == NULL)
+ error (1, errno, _("locale alias file `%s' not found"),
+ alias_file);
+
+ /* No threads present. */
+ __fsetlocking (fp, FSETLOCKING_BYCALLER);
+
+ while (! feof_unlocked (fp))
+ {
+ /* It is a reasonable approach to use a fix buffer here
+ because
+ a) we are only interested in the first two fields
+ b) these fields must be usable as file names and so must
+ not be that long */
+ char buf[BUFSIZ];
+ char *alias;
+ char *value;
+ char *cp;
+
+ if (fgets_unlocked (buf, BUFSIZ, fp) == NULL)
+ /* EOF reached. */
+ break;
+
+ cp = buf;
+ /* Ignore leading white space. */
+ while (isspace (cp[0]) && cp[0] != '\n')
+ ++cp;
+
+ /* A leading '#' signals a comment line. */
+ if (cp[0] != '\0' && cp[0] != '#' && cp[0] != '\n')
+ {
+ alias = cp++;
+ while (cp[0] != '\0' && !isspace (cp[0]))
+ ++cp;
+ /* Terminate alias name. */
+ if (cp[0] != '\0')
+ *cp++ = '\0';
+
+ /* Now look for the beginning of the value. */
+ while (isspace (cp[0]))
+ ++cp;
+
+ if (cp[0] != '\0')
+ {
+ value = cp++;
+ while (cp[0] != '\0' && !isspace (cp[0]))
+ ++cp;
+ /* Terminate value. */
+ if (cp[0] == '\n')
+ {
+ /* This has to be done to make the following
+ test for the end of line possible. We are
+ looking for the terminating '\n' which do not
+ overwrite here. */
+ *cp++ = '\0';
+ *cp = '\n';
+ }
+ else if (cp[0] != '\0')
+ *cp++ = '\0';
+
+ /* Does this alias refer to our locale? We will
+ normalize the right hand side and compare the
+ elements of the normalized form. */
+ {
+ const char *rhs_language;
+ const char *rhs_modifier;
+ const char *rhs_territory;
+ const char *rhs_codeset;
+ const char *rhs_normalized_codeset;
+ int rhs_mask = _nl_explode_name (value,
+ &rhs_language,
+ &rhs_modifier,
+ &rhs_territory,
+ &rhs_codeset,
+ &rhs_normalized_codeset);
+ if (rhs_mask == -1)
+ {
+ result = -1;
+ goto out;
+ }
+ if (!strcmp (language, rhs_language)
+ && ((rhs_mask & XPG_CODESET)
+ /* He has a codeset, it must match normalized. */
+ ? !strcmp ((mask & XPG_NORM_CODESET)
+ ? normalized_codeset : codeset,
+ (rhs_mask & XPG_NORM_CODESET)
+ ? rhs_normalized_codeset : rhs_codeset)
+ /* He has no codeset, we must also have none. */
+ : (mask & XPG_CODESET) == 0)
+ /* Codeset (or lack thereof) matches. */
+ && !strcmp (territory ?: "", rhs_territory ?: "")
+ && !strcmp (modifier ?: "", rhs_modifier ?: ""))
+ /* We have a winner. */
+ add_alias (ah, alias, replace,
+ normalized_name ?: name, &locrec_offset);
+ if (rhs_mask & XPG_NORM_CODESET)
+ free ((char *) rhs_normalized_codeset);
+ }
+ }
+ }
+
+ /* Possibly not the whole line fits into the buffer.
+ Ignore the rest of the line. */
+ while (strchr (cp, '\n') == NULL)
+ {
+ cp = buf;
+ if (fgets_unlocked (buf, BUFSIZ, fp) == NULL)
+ /* Make sure the inner loop will be left. The outer
+ loop will exit at the `feof' test. */
+ *cp = '\n';
+ }
+ }
+
+ out:
+ fclose (fp);
+ }
+
+ free (normalized_name);
+
+ if (mask & XPG_NORM_CODESET)
+ free ((char *) normalized_codeset);
+
+ return result;
+}
+
+
+int
+add_locales_to_archive (size_t nlist, char *list[], bool replace)
+{
+ struct locarhandle ah;
+ int result = 0;
+
+ /* Open the archive. This call never returns if we cannot
+ successfully open the archive. */
+ ah.fname = NULL;
+ open_archive (&ah, false);
+
+ while (nlist-- > 0)
+ {
+ const char *fname = *list++;
+ size_t fnamelen = strlen (fname);
+ struct stat64 st;
+ DIR *dirp;
+ struct dirent64 *d;
+ int seen;
+ locale_data_t data;
+ int cnt;
+
+ if (! be_quiet)
+ printf (_("Adding %s\n"), fname);
+
+ /* First see whether this really is a directory and whether it
+ contains all the require locale category files. */
+ if (stat64 (fname, &st) < 0)
+ {
+ error (0, 0, _("stat of \"%s\" failed: %s: ignored"), fname,
+ strerror (errno));
+ continue;
+ }
+ if (!S_ISDIR (st.st_mode))
+ {
+ error (0, 0, _("\"%s\" is no directory; ignored"), fname);
+ continue;
+ }
+
+ dirp = opendir (fname);
+ if (dirp == NULL)
+ {
+ error (0, 0, _("cannot open directory \"%s\": %s: ignored"),
+ fname, strerror (errno));
+ continue;
+ }
+
+ seen = 0;
+ while ((d = readdir64 (dirp)) != NULL)
+ {
+ for (cnt = 0; cnt < __LC_LAST; ++cnt)
+ if (cnt != LC_ALL)
+ if (strcmp (d->d_name, locnames[cnt]) == 0)
+ {
+ unsigned char d_type;
+
+ /* We have an object of the required name. If it's
+ a directory we have to look at a file with the
+ prefix "SYS_". Otherwise we have found what we
+ are looking for. */
+#ifdef _DIRENT_HAVE_D_TYPE
+ d_type = d->d_type;
+
+ if (d_type != DT_REG)
+#endif
+ {
+ char fullname[fnamelen + 2 * strlen (d->d_name) + 7];
+
+#ifdef _DIRENT_HAVE_D_TYPE
+ if (d_type == DT_UNKNOWN)
+#endif
+ {
+ strcpy (stpcpy (stpcpy (fullname, fname), "/"),
+ d->d_name);
+
+ if (stat64 (fullname, &st) == -1)
+ /* We cannot stat the file, ignore it. */
+ break;
+
+ d_type = IFTODT (st.st_mode);
+ }
+
+ if (d_type == DT_DIR)
+ {
+ /* We have to do more tests. The file is a
+ directory and it therefore must contain a
+ regular file with the same name except a
+ "SYS_" prefix. */
+ char *t = stpcpy (stpcpy (fullname, fname), "/");
+ strcpy (stpcpy (stpcpy (t, d->d_name), "/SYS_"),
+ d->d_name);
+
+ if (stat64 (fullname, &st) == -1)
+ /* There is no SYS_* file or we cannot
+ access it. */
+ break;
+
+ d_type = IFTODT (st.st_mode);
+ }
+ }
+
+ /* If we found a regular file (eventually after
+ following a symlink) we are successful. */
+ if (d_type == DT_REG)
+ ++seen;
+ break;
+ }
+ }
+
+ closedir (dirp);
+
+ if (seen != __LC_LAST - 1)
+ {
+ /* We don't have all locale category files. Ignore the name. */
+ error (0, 0, _("incomplete set of locale files in \"%s\""),
+ fname);
+ continue;
+ }
+
+ /* Add the files to the archive. To do this we first compute
+ sizes and the MD5 sums of all the files. */
+ for (cnt = 0; cnt < __LC_LAST; ++cnt)
+ if (cnt != LC_ALL)
+ {
+ char fullname[fnamelen + 2 * strlen (locnames[cnt]) + 7];
+ int fd;
+
+ strcpy (stpcpy (stpcpy (fullname, fname), "/"), locnames[cnt]);
+ fd = open64 (fullname, O_RDONLY);
+ if (fd == -1 || fstat64 (fd, &st) == -1)
+ {
+ /* Cannot read the file. */
+ if (fd != -1)
+ close (fd);
+ break;
+ }
+
+ if (S_ISDIR (st.st_mode))
+ {
+ char *t;
+ close (fd);
+ t = stpcpy (stpcpy (fullname, fname), "/");
+ strcpy (stpcpy (stpcpy (t, locnames[cnt]), "/SYS_"),
+ locnames[cnt]);
+
+ fd = open64 (fullname, O_RDONLY);
+ if (fd == -1 || fstat64 (fd, &st) == -1
+ || !S_ISREG (st.st_mode))
+ {
+ if (fd != -1)
+ close (fd);
+ break;
+ }
+ }
+
+ /* Map the file. */
+ data[cnt].addr = mmap64 (NULL, st.st_size, PROT_READ, MAP_SHARED,
+ fd, 0);
+ if (data[cnt].addr == MAP_FAILED)
+ {
+ /* Cannot map it. */
+ close (fd);
+ break;
+ }
+
+ data[cnt].size = st.st_size;
+ __md5_buffer (data[cnt].addr, st.st_size, data[cnt].sum);
+
+ /* We don't need the file descriptor anymore. */
+ close (fd);
+ }
+
+ if (cnt != __LC_LAST)
+ {
+ while (cnt-- > 0)
+ if (cnt != LC_ALL)
+ munmap (data[cnt].addr, data[cnt].size);
+
+ error (0, 0, _("cannot read all files in \"%s\": ignored"), fname);
+
+ continue;
+ }
+
+ result |= add_locale_to_archive (&ah, basename (fname), data, replace);
+
+ for (cnt = 0; cnt < __LC_LAST; ++cnt)
+ if (cnt != LC_ALL)
+ munmap (data[cnt].addr, data[cnt].size);
+ }
+
+ /* We are done. */
+ close_archive (&ah);
+
+ return result;
+}
+
+
+int
+delete_locales_from_archive (size_t nlist, char *list[])
+{
+ struct locarhandle ah;
+ struct locarhead *head;
+ struct namehashent *namehashtab;
+
+ /* Open the archive. This call never returns if we cannot
+ successfully open the archive. */
+ ah.fname = NULL;
+ open_archive (&ah, false);
+
+ head = ah.addr;
+ namehashtab = (struct namehashent *) ((char *) ah.addr
+ + GET (head->namehash_offset));
+
+ while (nlist-- > 0)
+ {
+ const char *locname = *list++;
+ uint32_t hval;
+ unsigned int idx;
+ unsigned int incr;
+
+ /* Search for this locale in the archive. */
+ hval = archive_hashval (locname, strlen (locname));
+
+ idx = hval % GET (head->namehash_size);
+ incr = 1 + hval % (GET (head->namehash_size) - 2);
+
+ /* If the name_offset field is zero this means this is no
+ deleted entry and therefore no entry can be found. */
+ while (GET (namehashtab[idx].name_offset) != 0)
+ {
+ if (GET (namehashtab[idx].hashval) == hval
+ && (strcmp (locname,
+ ((char *) ah.addr
+ + GET (namehashtab[idx].name_offset)))
+ == 0))
+ {
+ /* Found the entry. Now mark it as removed by zero-ing
+ the reference to the locale record. */
+ SET (namehashtab[idx].locrec_offset, 0);
+ break;
+ }
+
+ idx += incr;
+ if (idx >= GET (head->namehash_size))
+ idx -= GET (head->namehash_size);
+ }
+
+ if (GET (namehashtab[idx].name_offset) == 0 && ! be_quiet)
+ error (0, 0, _("locale \"%s\" not in archive"), locname);
+ }
+
+ close_archive (&ah);
+
+ return 0;
+}
+
+
+struct nameent
+{
+ char *name;
+ uint32_t locrec_offset;
+};
+
+
+struct dataent
+{
+ const unsigned char *sum;
+ uint32_t file_offset;
+ uint32_t nlink;
+};
+
+
+static int
+nameentcmp (const void *a, const void *b)
+{
+ return strcmp (((const struct nameent *) a)->name,
+ ((const struct nameent *) b)->name);
+}
+
+
+static int
+dataentcmp (const void *a, const void *b)
+{
+ if (((const struct dataent *) a)->file_offset
+ < ((const struct dataent *) b)->file_offset)
+ return -1;
+
+ if (((const struct dataent *) a)->file_offset
+ > ((const struct dataent *) b)->file_offset)
+ return 1;
+
+ return 0;
+}
+
+
+void
+show_archive_content (const char *fname, int verbose)
+{
+ struct locarhandle ah;
+ struct locarhead *head;
+ struct namehashent *namehashtab;
+ struct nameent *names;
+ size_t cnt, used;
+
+ /* Open the archive. This call never returns if we cannot
+ successfully open the archive. */
+ ah.fname = fname;
+ open_archive (&ah, true);
+
+ head = ah.addr;
+
+ names = (struct nameent *) xmalloc (GET (head->namehash_used)
+ * sizeof (struct nameent));
+
+ namehashtab = (struct namehashent *) ((char *) ah.addr
+ + GET (head->namehash_offset));
+ for (cnt = used = 0; cnt < GET (head->namehash_size); ++cnt)
+ if (GET (namehashtab[cnt].locrec_offset) != 0)
+ {
+ assert (used < GET (head->namehash_used));
+ names[used].name = ah.addr + GET (namehashtab[cnt].name_offset);
+ names[used++].locrec_offset = GET (namehashtab[cnt].locrec_offset);
+ }
+
+ /* Sort the names. */
+ qsort (names, used, sizeof (struct nameent), nameentcmp);
+
+ if (verbose)
+ {
+ struct dataent *files;
+ struct sumhashent *sumhashtab;
+ int sumused;
+
+ files = (struct dataent *) xmalloc (GET (head->sumhash_used)
+ * sizeof (struct dataent));
+
+ sumhashtab = (struct sumhashent *) ((char *) ah.addr
+ + GET (head->sumhash_offset));
+ for (cnt = sumused = 0; cnt < GET (head->sumhash_size); ++cnt)
+ if (GET (sumhashtab[cnt].file_offset) != 0)
+ {
+ assert (sumused < GET (head->sumhash_used));
+ files[sumused].sum = (const unsigned char *) sumhashtab[cnt].sum;
+ files[sumused].file_offset = GET (sumhashtab[cnt].file_offset);
+ files[sumused++].nlink = 0;
+ }
+
+ /* Sort by file locations. */
+ qsort (files, sumused, sizeof (struct dataent), dataentcmp);
+
+ /* Compute nlink fields. */
+ for (cnt = 0; cnt < used; ++cnt)
+ {
+ struct locrecent *locrec;
+ int idx;
+
+ locrec = (struct locrecent *) ((char *) ah.addr
+ + names[cnt].locrec_offset);
+ for (idx = 0; idx < __LC_LAST; ++idx)
+ if (GET (locrec->record[LC_ALL].offset) != 0
+ ? (idx == LC_ALL
+ || (GET (locrec->record[idx].offset)
+ < GET (locrec->record[LC_ALL].offset))
+ || ((GET (locrec->record[idx].offset)
+ + GET (locrec->record[idx].len))
+ > (GET (locrec->record[LC_ALL].offset)
+ + GET (locrec->record[LC_ALL].len))))
+ : idx != LC_ALL)
+ {
+ struct dataent *data, dataent;
+
+ dataent.file_offset = GET (locrec->record[idx].offset);
+ data = (struct dataent *) bsearch (&dataent, files, sumused,
+ sizeof (struct dataent),
+ dataentcmp);
+ assert (data != NULL);
+ ++data->nlink;
+ }
+ }
+
+ /* Print it. */
+ for (cnt = 0; cnt < used; ++cnt)
+ {
+ struct locrecent *locrec;
+ int idx, i;
+
+ locrec = (struct locrecent *) ((char *) ah.addr
+ + names[cnt].locrec_offset);
+ for (idx = 0; idx < __LC_LAST; ++idx)
+ if (idx != LC_ALL)
+ {
+ struct dataent *data, dataent;
+
+ dataent.file_offset = GET (locrec->record[idx].offset);
+ if (GET (locrec->record[LC_ALL].offset) != 0
+ && (dataent.file_offset
+ >= GET (locrec->record[LC_ALL].offset))
+ && (dataent.file_offset + GET (locrec->record[idx].len)
+ <= (GET (locrec->record[LC_ALL].offset)
+ + GET (locrec->record[LC_ALL].len))))
+ dataent.file_offset = GET (locrec->record[LC_ALL].offset);
+
+ data = (struct dataent *) bsearch (&dataent, files, sumused,
+ sizeof (struct dataent),
+ dataentcmp);
+ printf ("%6d %7x %3d%c ",
+ GET (locrec->record[idx].len),
+ GET (locrec->record[idx].offset),
+ data->nlink,
+ (dataent.file_offset
+ == GET (locrec->record[LC_ALL].offset))
+ ? '+' : ' ');
+ for (i = 0; i < 16; i += 4)
+ printf ("%02x%02x%02x%02x",
+ data->sum[i], data->sum[i + 1],
+ data->sum[i + 2], data->sum[i + 3]);
+ printf (" %s/%s\n", names[cnt].name,
+ idx == LC_MESSAGES ? "LC_MESSAGES/SYS_LC_MESSAGES"
+ : locnames[idx]);
+ }
+ }
+ }
+ else
+ for (cnt = 0; cnt < used; ++cnt)
+ puts (names[cnt].name);
+
+ close_archive (&ah);
+
+ exit (EXIT_SUCCESS);
+}
diff --git a/REORG.TODO/locale/programs/locfile-kw.gperf b/REORG.TODO/locale/programs/locfile-kw.gperf
new file mode 100644
index 0000000000..3605d15c8e
--- /dev/null
+++ b/REORG.TODO/locale/programs/locfile-kw.gperf
@@ -0,0 +1,201 @@
+%{
+/* Copyright (C) 1996-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@gnu.org>, 1996.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <string.h>
+
+#include "locfile-token.h"
+%}
+struct keyword_t ;
+%%
+escape_char, tok_escape_char, 0
+comment_char, tok_comment_char, 0
+repertoiremap, tok_repertoiremap, 0
+include, tok_include, 0
+LC_CTYPE, tok_lc_ctype, 0
+END, tok_end, 0
+copy, tok_copy, 0
+upper, tok_upper, 0
+lower, tok_lower, 0
+alpha, tok_alpha, 0
+digit, tok_digit, 0
+outdigit, tok_outdigit, 0
+alnum, tok_alnum, 0
+space, tok_space, 0
+cntrl, tok_cntrl, 0
+punct, tok_punct, 0
+graph, tok_graph, 0
+print, tok_print, 0
+xdigit, tok_xdigit, 0
+blank, tok_blank, 0
+charclass, tok_charclass, 0
+class, tok_class, 0
+charconv, tok_charconv, 0
+toupper, tok_toupper, 0
+tolower, tok_tolower, 0
+map, tok_map, 0
+translit_start, tok_translit_start, 0
+translit_end, tok_translit_end, 0
+translit_ignore, tok_translit_ignore, 0
+default_missing, tok_default_missing, 0
+LC_COLLATE, tok_lc_collate, 0
+coll_weight_max, tok_coll_weight_max, 0
+section-symbol, tok_section_symbol, 0
+collating-element, tok_collating_element, 0
+collating-symbol, tok_collating_symbol, 0
+symbol-equivalence, tok_symbol_equivalence, 0
+script, tok_script, 0
+order_start, tok_order_start, 0
+order_end, tok_order_end, 0
+from, tok_from, 0
+forward, tok_forward, 0
+backward, tok_backward, 0
+position, tok_position, 0
+UNDEFINED, tok_undefined, 0
+IGNORE, tok_ignore, 0
+reorder-after, tok_reorder_after, 0
+reorder-end, tok_reorder_end, 0
+reorder-sections-after, tok_reorder_sections_after, 0
+reorder-sections-end, tok_reorder_sections_end, 0
+define, tok_define, 0
+undef, tok_undef, 0
+ifdef, tok_ifdef, 0
+else, tok_else, 0
+elifdef, tok_elifdef, 0
+elifndef, tok_elifndef, 0
+endif, tok_endif, 0
+LC_MONETARY, tok_lc_monetary, 0
+int_curr_symbol, tok_int_curr_symbol, 0
+currency_symbol, tok_currency_symbol, 0
+mon_decimal_point, tok_mon_decimal_point, 0
+mon_thousands_sep, tok_mon_thousands_sep, 0
+mon_grouping, tok_mon_grouping, 0
+positive_sign, tok_positive_sign, 0
+negative_sign, tok_negative_sign, 0
+int_frac_digits, tok_int_frac_digits, 0
+frac_digits, tok_frac_digits, 0
+p_cs_precedes, tok_p_cs_precedes, 0
+p_sep_by_space, tok_p_sep_by_space, 0
+n_cs_precedes, tok_n_cs_precedes, 0
+n_sep_by_space, tok_n_sep_by_space, 0
+p_sign_posn, tok_p_sign_posn, 0
+n_sign_posn, tok_n_sign_posn, 0
+int_p_cs_precedes, tok_int_p_cs_precedes, 0
+int_p_sep_by_space, tok_int_p_sep_by_space, 0
+int_n_cs_precedes, tok_int_n_cs_precedes, 0
+int_n_sep_by_space, tok_int_n_sep_by_space, 0
+int_p_sign_posn, tok_int_p_sign_posn, 0
+int_n_sign_posn, tok_int_n_sign_posn, 0
+duo_int_curr_symbol, tok_duo_int_curr_symbol, 0
+duo_currency_symbol, tok_duo_currency_symbol, 0
+duo_int_frac_digits, tok_duo_int_frac_digits, 0
+duo_frac_digits, tok_duo_frac_digits, 0
+duo_p_cs_precedes, tok_duo_p_cs_precedes, 0
+duo_p_sep_by_space, tok_duo_p_sep_by_space, 0
+duo_n_cs_precedes, tok_duo_n_cs_precedes, 0
+duo_n_sep_by_space, tok_duo_n_sep_by_space, 0
+duo_int_p_cs_precedes, tok_duo_int_p_cs_precedes, 0
+duo_int_p_sep_by_space, tok_duo_int_p_sep_by_space, 0
+duo_int_n_cs_precedes, tok_duo_int_n_cs_precedes, 0
+duo_int_n_sep_by_space, tok_duo_int_n_sep_by_space, 0
+duo_p_sign_posn, tok_duo_p_sign_posn, 0
+duo_n_sign_posn, tok_duo_n_sign_posn, 0
+duo_int_p_sign_posn, tok_duo_int_p_sign_posn, 0
+duo_int_n_sign_posn, tok_duo_int_n_sign_posn, 0
+uno_valid_from, tok_uno_valid_from, 0
+uno_valid_to, tok_uno_valid_to, 0
+duo_valid_from, tok_duo_valid_from, 0
+duo_valid_to, tok_duo_valid_to, 0
+conversion_rate, tok_conversion_rate, 0
+LC_NUMERIC, tok_lc_numeric, 0
+decimal_point, tok_decimal_point, 0
+thousands_sep, tok_thousands_sep, 0
+grouping, tok_grouping, 0
+LC_TIME, tok_lc_time, 0
+abday, tok_abday, 0
+day, tok_day, 0
+week, tok_week, 0
+abmon, tok_abmon, 0
+mon, tok_mon, 0
+d_t_fmt, tok_d_t_fmt, 0
+d_fmt, tok_d_fmt, 0
+t_fmt, tok_t_fmt, 0
+am_pm, tok_am_pm, 0
+t_fmt_ampm, tok_t_fmt_ampm, 0
+era, tok_era, 0
+era_year, tok_era_year, 0
+era_d_fmt, tok_era_d_fmt, 0
+era_d_t_fmt, tok_era_d_t_fmt, 0
+era_t_fmt, tok_era_t_fmt, 0
+alt_digits, tok_alt_digits, 0
+first_weekday, tok_first_weekday, 0
+first_workday, tok_first_workday, 0
+cal_direction, tok_cal_direction, 0
+timezone, tok_timezone, 0
+date_fmt, tok_date_fmt, 0
+LC_MESSAGES, tok_lc_messages, 0
+yesexpr, tok_yesexpr, 0
+noexpr, tok_noexpr, 0
+yesstr, tok_yesstr, 0
+nostr, tok_nostr, 0
+LC_PAPER, tok_lc_paper, 0
+height, tok_height, 0
+width, tok_width, 0
+LC_NAME, tok_lc_name, 0
+name_fmt, tok_name_fmt, 0
+name_gen, tok_name_gen, 0
+name_mr, tok_name_mr, 0
+name_mrs, tok_name_mrs, 0
+name_miss, tok_name_miss, 0
+name_ms, tok_name_ms, 0
+LC_ADDRESS, tok_lc_address, 0
+postal_fmt, tok_postal_fmt, 0
+country_name, tok_country_name, 0
+country_post, tok_country_post, 0
+country_ab2, tok_country_ab2, 0
+country_ab3, tok_country_ab3, 0
+country_num, tok_country_num, 0
+country_car, tok_country_car, 0
+country_isbn, tok_country_isbn, 0
+lang_name, tok_lang_name, 0
+lang_ab, tok_lang_ab, 0
+lang_term, tok_lang_term, 0
+lang_lib, tok_lang_lib, 0
+LC_TELEPHONE, tok_lc_telephone, 0
+tel_int_fmt, tok_tel_int_fmt, 0
+tel_dom_fmt, tok_tel_dom_fmt, 0
+int_select, tok_int_select, 0
+int_prefix, tok_int_prefix, 0
+LC_MEASUREMENT, tok_lc_measurement, 0
+measurement, tok_measurement, 0
+LC_IDENTIFICATION, tok_lc_identification, 0
+title, tok_title, 0
+source, tok_source, 0
+address, tok_address, 0
+contact, tok_contact, 0
+email, tok_email, 0
+tel, tok_tel, 0
+fax, tok_fax, 0
+language, tok_language, 0
+territory, tok_territory, 0
+audience, tok_audience, 0
+application, tok_application, 0
+abbreviation, tok_abbreviation, 0
+revision, tok_revision, 0
+date, tok_date, 0
+category, tok_category, 0
diff --git a/REORG.TODO/locale/programs/locfile-kw.h b/REORG.TODO/locale/programs/locfile-kw.h
new file mode 100644
index 0000000000..1cdca1941b
--- /dev/null
+++ b/REORG.TODO/locale/programs/locfile-kw.h
@@ -0,0 +1,621 @@
+/* ANSI-C code produced by gperf version 3.0.4 */
+/* Command-line: gperf -acCgopt -k'1,2,5,9,$' -L ANSI-C -N locfile_hash locfile-kw.gperf */
+
+#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
+ && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
+ && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
+ && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
+ && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
+ && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
+ && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
+ && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
+ && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
+ && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
+ && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
+ && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
+ && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
+ && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
+ && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
+ && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
+ && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
+ && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
+ && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
+ && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
+ && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
+ && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
+ && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
+/* The character set is not based on ISO-646. */
+#error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>."
+#endif
+
+#line 1 "locfile-kw.gperf"
+
+/* Copyright (C) 1996-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@gnu.org>, 1996.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <string.h>
+
+#include "locfile-token.h"
+#line 24 "locfile-kw.gperf"
+struct keyword_t ;
+
+#define TOTAL_KEYWORDS 176
+#define MIN_WORD_LENGTH 3
+#define MAX_WORD_LENGTH 22
+#define MIN_HASH_VALUE 3
+#define MAX_HASH_VALUE 630
+/* maximum key range = 628, duplicates = 0 */
+
+#ifdef __GNUC__
+__inline
+#else
+#ifdef __cplusplus
+inline
+#endif
+#endif
+static unsigned int
+hash (register const char *str, register unsigned int len)
+{
+ static const unsigned short asso_values[] =
+ {
+ 631, 631, 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631, 631, 631,
+ 5, 0, 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 5, 631, 0, 0, 0,
+ 0, 0, 10, 0, 631, 631, 0, 631, 0, 5,
+ 631, 631, 0, 0, 0, 10, 631, 631, 631, 0,
+ 631, 631, 631, 631, 631, 0, 631, 145, 80, 25,
+ 15, 0, 180, 105, 10, 35, 631, 50, 80, 160,
+ 5, 130, 40, 45, 5, 0, 10, 35, 40, 35,
+ 5, 10, 0, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631, 631, 631, 631, 631,
+ 631, 631, 631, 631, 631, 631
+ };
+ register int hval = len;
+
+ switch (hval)
+ {
+ default:
+ hval += asso_values[(unsigned char)str[8]];
+ /*FALLTHROUGH*/
+ case 8:
+ case 7:
+ case 6:
+ case 5:
+ hval += asso_values[(unsigned char)str[4]];
+ /*FALLTHROUGH*/
+ case 4:
+ case 3:
+ case 2:
+ hval += asso_values[(unsigned char)str[1]];
+ /*FALLTHROUGH*/
+ case 1:
+ hval += asso_values[(unsigned char)str[0]];
+ break;
+ }
+ return hval + asso_values[(unsigned char)str[len - 1]];
+}
+
+#ifdef __GNUC__
+__inline
+#if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__
+__attribute__ ((__gnu_inline__))
+#endif
+#endif
+const struct keyword_t *
+locfile_hash (register const char *str, register unsigned int len)
+{
+ static const struct keyword_t wordlist[] =
+ {
+ {""}, {""}, {""},
+#line 31 "locfile-kw.gperf"
+ {"END", tok_end, 0},
+ {""}, {""},
+#line 70 "locfile-kw.gperf"
+ {"IGNORE", tok_ignore, 0},
+#line 129 "locfile-kw.gperf"
+ {"LC_TIME", tok_lc_time, 0},
+#line 30 "locfile-kw.gperf"
+ {"LC_CTYPE", tok_lc_ctype, 0},
+ {""},
+#line 166 "locfile-kw.gperf"
+ {"LC_ADDRESS", tok_lc_address, 0},
+#line 151 "locfile-kw.gperf"
+ {"LC_MESSAGES", tok_lc_messages, 0},
+#line 159 "locfile-kw.gperf"
+ {"LC_NAME", tok_lc_name, 0},
+#line 156 "locfile-kw.gperf"
+ {"LC_PAPER", tok_lc_paper, 0},
+#line 184 "locfile-kw.gperf"
+ {"LC_MEASUREMENT", tok_lc_measurement, 0},
+#line 56 "locfile-kw.gperf"
+ {"LC_COLLATE", tok_lc_collate, 0},
+ {""},
+#line 186 "locfile-kw.gperf"
+ {"LC_IDENTIFICATION", tok_lc_identification, 0},
+#line 199 "locfile-kw.gperf"
+ {"revision", tok_revision, 0},
+#line 69 "locfile-kw.gperf"
+ {"UNDEFINED", tok_undefined, 0},
+#line 125 "locfile-kw.gperf"
+ {"LC_NUMERIC", tok_lc_numeric, 0},
+#line 82 "locfile-kw.gperf"
+ {"LC_MONETARY", tok_lc_monetary, 0},
+#line 179 "locfile-kw.gperf"
+ {"LC_TELEPHONE", tok_lc_telephone, 0},
+ {""}, {""}, {""},
+#line 75 "locfile-kw.gperf"
+ {"define", tok_define, 0},
+#line 152 "locfile-kw.gperf"
+ {"yesexpr", tok_yesexpr, 0},
+#line 141 "locfile-kw.gperf"
+ {"era_year", tok_era_year, 0},
+ {""},
+#line 54 "locfile-kw.gperf"
+ {"translit_ignore", tok_translit_ignore, 0},
+#line 154 "locfile-kw.gperf"
+ {"yesstr", tok_yesstr, 0},
+ {""},
+#line 89 "locfile-kw.gperf"
+ {"negative_sign", tok_negative_sign, 0},
+ {""},
+#line 137 "locfile-kw.gperf"
+ {"t_fmt", tok_t_fmt, 0},
+#line 157 "locfile-kw.gperf"
+ {"height", tok_height, 0},
+ {""}, {""},
+#line 52 "locfile-kw.gperf"
+ {"translit_start", tok_translit_start, 0},
+#line 136 "locfile-kw.gperf"
+ {"d_fmt", tok_d_fmt, 0},
+ {""},
+#line 53 "locfile-kw.gperf"
+ {"translit_end", tok_translit_end, 0},
+#line 94 "locfile-kw.gperf"
+ {"n_cs_precedes", tok_n_cs_precedes, 0},
+#line 144 "locfile-kw.gperf"
+ {"era_t_fmt", tok_era_t_fmt, 0},
+#line 39 "locfile-kw.gperf"
+ {"space", tok_space, 0},
+#line 72 "locfile-kw.gperf"
+ {"reorder-end", tok_reorder_end, 0},
+#line 73 "locfile-kw.gperf"
+ {"reorder-sections-after", tok_reorder_sections_after, 0},
+ {""},
+#line 142 "locfile-kw.gperf"
+ {"era_d_fmt", tok_era_d_fmt, 0},
+#line 187 "locfile-kw.gperf"
+ {"title", tok_title, 0},
+ {""}, {""},
+#line 149 "locfile-kw.gperf"
+ {"timezone", tok_timezone, 0},
+ {""},
+#line 74 "locfile-kw.gperf"
+ {"reorder-sections-end", tok_reorder_sections_end, 0},
+ {""}, {""}, {""},
+#line 95 "locfile-kw.gperf"
+ {"n_sep_by_space", tok_n_sep_by_space, 0},
+ {""}, {""},
+#line 100 "locfile-kw.gperf"
+ {"int_n_cs_precedes", tok_int_n_cs_precedes, 0},
+ {""}, {""}, {""},
+#line 26 "locfile-kw.gperf"
+ {"escape_char", tok_escape_char, 0},
+ {""},
+#line 28 "locfile-kw.gperf"
+ {"repertoiremap", tok_repertoiremap, 0},
+#line 46 "locfile-kw.gperf"
+ {"charclass", tok_charclass, 0},
+#line 43 "locfile-kw.gperf"
+ {"print", tok_print, 0},
+#line 44 "locfile-kw.gperf"
+ {"xdigit", tok_xdigit, 0},
+#line 110 "locfile-kw.gperf"
+ {"duo_n_cs_precedes", tok_duo_n_cs_precedes, 0},
+#line 127 "locfile-kw.gperf"
+ {"thousands_sep", tok_thousands_sep, 0},
+#line 195 "locfile-kw.gperf"
+ {"territory", tok_territory, 0},
+#line 36 "locfile-kw.gperf"
+ {"digit", tok_digit, 0},
+ {""}, {""},
+#line 92 "locfile-kw.gperf"
+ {"p_cs_precedes", tok_p_cs_precedes, 0},
+ {""}, {""},
+#line 62 "locfile-kw.gperf"
+ {"script", tok_script, 0},
+#line 29 "locfile-kw.gperf"
+ {"include", tok_include, 0},
+ {""},
+#line 78 "locfile-kw.gperf"
+ {"else", tok_else, 0},
+#line 182 "locfile-kw.gperf"
+ {"int_select", tok_int_select, 0},
+ {""}, {""}, {""},
+#line 132 "locfile-kw.gperf"
+ {"week", tok_week, 0},
+#line 33 "locfile-kw.gperf"
+ {"upper", tok_upper, 0},
+ {""}, {""},
+#line 192 "locfile-kw.gperf"
+ {"tel", tok_tel, 0},
+#line 93 "locfile-kw.gperf"
+ {"p_sep_by_space", tok_p_sep_by_space, 0},
+#line 158 "locfile-kw.gperf"
+ {"width", tok_width, 0},
+ {""},
+#line 98 "locfile-kw.gperf"
+ {"int_p_cs_precedes", tok_int_p_cs_precedes, 0},
+ {""}, {""},
+#line 41 "locfile-kw.gperf"
+ {"punct", tok_punct, 0},
+ {""}, {""},
+#line 101 "locfile-kw.gperf"
+ {"int_n_sep_by_space", tok_int_n_sep_by_space, 0},
+ {""}, {""}, {""},
+#line 108 "locfile-kw.gperf"
+ {"duo_p_cs_precedes", tok_duo_p_cs_precedes, 0},
+#line 48 "locfile-kw.gperf"
+ {"charconv", tok_charconv, 0},
+ {""},
+#line 47 "locfile-kw.gperf"
+ {"class", tok_class, 0},
+#line 114 "locfile-kw.gperf"
+ {"duo_int_n_cs_precedes", tok_duo_int_n_cs_precedes, 0},
+#line 115 "locfile-kw.gperf"
+ {"duo_int_n_sep_by_space", tok_duo_int_n_sep_by_space, 0},
+#line 111 "locfile-kw.gperf"
+ {"duo_n_sep_by_space", tok_duo_n_sep_by_space, 0},
+#line 119 "locfile-kw.gperf"
+ {"duo_int_n_sign_posn", tok_duo_int_n_sign_posn, 0},
+ {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+ {""}, {""}, {""}, {""}, {""},
+#line 58 "locfile-kw.gperf"
+ {"section-symbol", tok_section_symbol, 0},
+#line 183 "locfile-kw.gperf"
+ {"int_prefix", tok_int_prefix, 0},
+ {""}, {""}, {""}, {""},
+#line 42 "locfile-kw.gperf"
+ {"graph", tok_graph, 0},
+ {""}, {""},
+#line 99 "locfile-kw.gperf"
+ {"int_p_sep_by_space", tok_int_p_sep_by_space, 0},
+ {""}, {""}, {""}, {""}, {""}, {""}, {""},
+#line 112 "locfile-kw.gperf"
+ {"duo_int_p_cs_precedes", tok_duo_int_p_cs_precedes, 0},
+#line 113 "locfile-kw.gperf"
+ {"duo_int_p_sep_by_space", tok_duo_int_p_sep_by_space, 0},
+#line 109 "locfile-kw.gperf"
+ {"duo_p_sep_by_space", tok_duo_p_sep_by_space, 0},
+#line 118 "locfile-kw.gperf"
+ {"duo_int_p_sign_posn", tok_duo_int_p_sign_posn, 0},
+#line 155 "locfile-kw.gperf"
+ {"nostr", tok_nostr, 0},
+ {""}, {""},
+#line 140 "locfile-kw.gperf"
+ {"era", tok_era, 0},
+ {""},
+#line 84 "locfile-kw.gperf"
+ {"currency_symbol", tok_currency_symbol, 0},
+ {""},
+#line 165 "locfile-kw.gperf"
+ {"name_ms", tok_name_ms, 0},
+#line 163 "locfile-kw.gperf"
+ {"name_mrs", tok_name_mrs, 0},
+#line 164 "locfile-kw.gperf"
+ {"name_miss", tok_name_miss, 0},
+#line 83 "locfile-kw.gperf"
+ {"int_curr_symbol", tok_int_curr_symbol, 0},
+#line 188 "locfile-kw.gperf"
+ {"source", tok_source, 0},
+#line 162 "locfile-kw.gperf"
+ {"name_mr", tok_name_mr, 0},
+#line 161 "locfile-kw.gperf"
+ {"name_gen", tok_name_gen, 0},
+#line 200 "locfile-kw.gperf"
+ {"date", tok_date, 0},
+ {""}, {""},
+#line 189 "locfile-kw.gperf"
+ {"address", tok_address, 0},
+#line 160 "locfile-kw.gperf"
+ {"name_fmt", tok_name_fmt, 0},
+#line 32 "locfile-kw.gperf"
+ {"copy", tok_copy, 0},
+#line 103 "locfile-kw.gperf"
+ {"int_n_sign_posn", tok_int_n_sign_posn, 0},
+ {""}, {""},
+#line 131 "locfile-kw.gperf"
+ {"day", tok_day, 0},
+#line 105 "locfile-kw.gperf"
+ {"duo_currency_symbol", tok_duo_currency_symbol, 0},
+ {""}, {""}, {""},
+#line 150 "locfile-kw.gperf"
+ {"date_fmt", tok_date_fmt, 0},
+#line 64 "locfile-kw.gperf"
+ {"order_end", tok_order_end, 0},
+#line 117 "locfile-kw.gperf"
+ {"duo_n_sign_posn", tok_duo_n_sign_posn, 0},
+ {""},
+#line 168 "locfile-kw.gperf"
+ {"country_name", tok_country_name, 0},
+#line 71 "locfile-kw.gperf"
+ {"reorder-after", tok_reorder_after, 0},
+ {""}, {""},
+#line 153 "locfile-kw.gperf"
+ {"noexpr", tok_noexpr, 0},
+#line 50 "locfile-kw.gperf"
+ {"tolower", tok_tolower, 0},
+#line 196 "locfile-kw.gperf"
+ {"audience", tok_audience, 0},
+ {""}, {""}, {""},
+#line 49 "locfile-kw.gperf"
+ {"toupper", tok_toupper, 0},
+#line 68 "locfile-kw.gperf"
+ {"position", tok_position, 0},
+ {""},
+#line 40 "locfile-kw.gperf"
+ {"cntrl", tok_cntrl, 0},
+ {""},
+#line 27 "locfile-kw.gperf"
+ {"comment_char", tok_comment_char, 0},
+#line 88 "locfile-kw.gperf"
+ {"positive_sign", tok_positive_sign, 0},
+ {""}, {""}, {""}, {""},
+#line 61 "locfile-kw.gperf"
+ {"symbol-equivalence", tok_symbol_equivalence, 0},
+ {""},
+#line 102 "locfile-kw.gperf"
+ {"int_p_sign_posn", tok_int_p_sign_posn, 0},
+#line 173 "locfile-kw.gperf"
+ {"country_car", tok_country_car, 0},
+ {""}, {""},
+#line 104 "locfile-kw.gperf"
+ {"duo_int_curr_symbol", tok_duo_int_curr_symbol, 0},
+ {""}, {""},
+#line 135 "locfile-kw.gperf"
+ {"d_t_fmt", tok_d_t_fmt, 0},
+ {""}, {""},
+#line 116 "locfile-kw.gperf"
+ {"duo_p_sign_posn", tok_duo_p_sign_posn, 0},
+#line 185 "locfile-kw.gperf"
+ {"measurement", tok_measurement, 0},
+#line 174 "locfile-kw.gperf"
+ {"country_isbn", tok_country_isbn, 0},
+#line 37 "locfile-kw.gperf"
+ {"outdigit", tok_outdigit, 0},
+ {""}, {""},
+#line 143 "locfile-kw.gperf"
+ {"era_d_t_fmt", tok_era_d_t_fmt, 0},
+ {""}, {""}, {""},
+#line 34 "locfile-kw.gperf"
+ {"lower", tok_lower, 0},
+#line 181 "locfile-kw.gperf"
+ {"tel_dom_fmt", tok_tel_dom_fmt, 0},
+#line 169 "locfile-kw.gperf"
+ {"country_post", tok_country_post, 0},
+#line 148 "locfile-kw.gperf"
+ {"cal_direction", tok_cal_direction, 0},
+ {""},
+#line 139 "locfile-kw.gperf"
+ {"t_fmt_ampm", tok_t_fmt_ampm, 0},
+#line 91 "locfile-kw.gperf"
+ {"frac_digits", tok_frac_digits, 0},
+ {""}, {""},
+#line 175 "locfile-kw.gperf"
+ {"lang_name", tok_lang_name, 0},
+#line 90 "locfile-kw.gperf"
+ {"int_frac_digits", tok_int_frac_digits, 0},
+ {""},
+#line 121 "locfile-kw.gperf"
+ {"uno_valid_to", tok_uno_valid_to, 0},
+#line 126 "locfile-kw.gperf"
+ {"decimal_point", tok_decimal_point, 0},
+ {""},
+#line 133 "locfile-kw.gperf"
+ {"abmon", tok_abmon, 0},
+ {""}, {""}, {""}, {""},
+#line 107 "locfile-kw.gperf"
+ {"duo_frac_digits", tok_duo_frac_digits, 0},
+#line 180 "locfile-kw.gperf"
+ {"tel_int_fmt", tok_tel_int_fmt, 0},
+#line 123 "locfile-kw.gperf"
+ {"duo_valid_to", tok_duo_valid_to, 0},
+#line 146 "locfile-kw.gperf"
+ {"first_weekday", tok_first_weekday, 0},
+ {""},
+#line 130 "locfile-kw.gperf"
+ {"abday", tok_abday, 0},
+ {""},
+#line 198 "locfile-kw.gperf"
+ {"abbreviation", tok_abbreviation, 0},
+#line 147 "locfile-kw.gperf"
+ {"first_workday", tok_first_workday, 0},
+ {""}, {""},
+#line 97 "locfile-kw.gperf"
+ {"n_sign_posn", tok_n_sign_posn, 0},
+ {""}, {""}, {""},
+#line 145 "locfile-kw.gperf"
+ {"alt_digits", tok_alt_digits, 0},
+ {""}, {""},
+#line 128 "locfile-kw.gperf"
+ {"grouping", tok_grouping, 0},
+ {""},
+#line 45 "locfile-kw.gperf"
+ {"blank", tok_blank, 0},
+ {""}, {""},
+#line 194 "locfile-kw.gperf"
+ {"language", tok_language, 0},
+#line 120 "locfile-kw.gperf"
+ {"uno_valid_from", tok_uno_valid_from, 0},
+ {""},
+#line 197 "locfile-kw.gperf"
+ {"application", tok_application, 0},
+ {""},
+#line 80 "locfile-kw.gperf"
+ {"elifndef", tok_elifndef, 0},
+ {""}, {""}, {""}, {""}, {""},
+#line 122 "locfile-kw.gperf"
+ {"duo_valid_from", tok_duo_valid_from, 0},
+#line 57 "locfile-kw.gperf"
+ {"coll_weight_max", tok_coll_weight_max, 0},
+ {""},
+#line 79 "locfile-kw.gperf"
+ {"elifdef", tok_elifdef, 0},
+#line 67 "locfile-kw.gperf"
+ {"backward", tok_backward, 0},
+#line 106 "locfile-kw.gperf"
+ {"duo_int_frac_digits", tok_duo_int_frac_digits, 0},
+ {""}, {""}, {""}, {""}, {""}, {""},
+#line 96 "locfile-kw.gperf"
+ {"p_sign_posn", tok_p_sign_posn, 0},
+ {""},
+#line 201 "locfile-kw.gperf"
+ {"category", tok_category, 0},
+ {""}, {""}, {""}, {""},
+#line 134 "locfile-kw.gperf"
+ {"mon", tok_mon, 0},
+ {""},
+#line 124 "locfile-kw.gperf"
+ {"conversion_rate", tok_conversion_rate, 0},
+ {""}, {""}, {""}, {""}, {""},
+#line 63 "locfile-kw.gperf"
+ {"order_start", tok_order_start, 0},
+ {""}, {""}, {""}, {""}, {""},
+#line 176 "locfile-kw.gperf"
+ {"lang_ab", tok_lang_ab, 0},
+#line 178 "locfile-kw.gperf"
+ {"lang_lib", tok_lang_lib, 0},
+ {""}, {""}, {""},
+#line 190 "locfile-kw.gperf"
+ {"contact", tok_contact, 0},
+ {""}, {""}, {""},
+#line 171 "locfile-kw.gperf"
+ {"country_ab3", tok_country_ab3, 0},
+ {""}, {""}, {""},
+#line 191 "locfile-kw.gperf"
+ {"email", tok_email, 0},
+#line 170 "locfile-kw.gperf"
+ {"country_ab2", tok_country_ab2, 0},
+ {""}, {""}, {""},
+#line 55 "locfile-kw.gperf"
+ {"default_missing", tok_default_missing, 0},
+ {""}, {""},
+#line 193 "locfile-kw.gperf"
+ {"fax", tok_fax, 0},
+ {""}, {""}, {""}, {""}, {""}, {""}, {""},
+#line 172 "locfile-kw.gperf"
+ {"country_num", tok_country_num, 0},
+ {""}, {""}, {""}, {""}, {""}, {""},
+#line 51 "locfile-kw.gperf"
+ {"map", tok_map, 0},
+#line 65 "locfile-kw.gperf"
+ {"from", tok_from, 0},
+ {""}, {""}, {""}, {""}, {""}, {""}, {""},
+#line 86 "locfile-kw.gperf"
+ {"mon_thousands_sep", tok_mon_thousands_sep, 0},
+ {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+ {""}, {""}, {""},
+#line 81 "locfile-kw.gperf"
+ {"endif", tok_endif, 0},
+ {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+ {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+ {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+ {""}, {""}, {""}, {""}, {""}, {""}, {""},
+#line 76 "locfile-kw.gperf"
+ {"undef", tok_undef, 0},
+ {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+ {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+ {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+#line 59 "locfile-kw.gperf"
+ {"collating-element", tok_collating_element, 0},
+ {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+ {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+ {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+ {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+ {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+#line 66 "locfile-kw.gperf"
+ {"forward", tok_forward, 0},
+ {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+ {""}, {""}, {""}, {""}, {""},
+#line 85 "locfile-kw.gperf"
+ {"mon_decimal_point", tok_mon_decimal_point, 0},
+ {""}, {""},
+#line 167 "locfile-kw.gperf"
+ {"postal_fmt", tok_postal_fmt, 0},
+ {""}, {""}, {""}, {""}, {""},
+#line 60 "locfile-kw.gperf"
+ {"collating-symbol", tok_collating_symbol, 0},
+ {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+ {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+#line 35 "locfile-kw.gperf"
+ {"alpha", tok_alpha, 0},
+ {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+ {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+ {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+ {""}, {""},
+#line 38 "locfile-kw.gperf"
+ {"alnum", tok_alnum, 0},
+ {""},
+#line 87 "locfile-kw.gperf"
+ {"mon_grouping", tok_mon_grouping, 0},
+ {""},
+#line 177 "locfile-kw.gperf"
+ {"lang_term", tok_lang_term, 0},
+ {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+ {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+ {""}, {""}, {""}, {""}, {""}, {""}, {""},
+#line 77 "locfile-kw.gperf"
+ {"ifdef", tok_ifdef, 0},
+ {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+ {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+ {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+ {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+ {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+ {""}, {""}, {""}, {""},
+#line 138 "locfile-kw.gperf"
+ {"am_pm", tok_am_pm, 0}
+ };
+
+ if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
+ {
+ register int key = hash (str, len);
+
+ if (key <= MAX_HASH_VALUE && key >= 0)
+ {
+ register const char *s = wordlist[key].name;
+
+ if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
+ return &wordlist[key];
+ }
+ }
+ return 0;
+}
diff --git a/REORG.TODO/locale/programs/locfile-token.h b/REORG.TODO/locale/programs/locfile-token.h
new file mode 100644
index 0000000000..0c32f2c70b
--- /dev/null
+++ b/REORG.TODO/locale/programs/locfile-token.h
@@ -0,0 +1,258 @@
+/* Copyright (C) 1996-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@gnu.org>, 1996.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef _TOKEN_H
+#define _TOKEN_H
+
+enum token_t
+{
+ tok_none = 0,
+
+ tok_eof,
+ tok_eol,
+ tok_bsymbol,
+ tok_ident,
+ tok_ellipsis2,
+ tok_ellipsis3,
+ tok_ellipsis4,
+ tok_ellipsis2_2,
+ tok_ellipsis4_2,
+ tok_semicolon,
+ tok_comma,
+ tok_open_brace,
+ tok_close_brace,
+ tok_charcode,
+ tok_ucs4,
+ tok_number,
+ tok_minus1,
+ tok_string,
+ tok_include,
+
+ tok_escape_char,
+ tok_comment_char,
+ tok_charmap,
+ tok_end,
+ tok_g0esc,
+ tok_g1esc,
+ tok_g2esc,
+ tok_g3esc,
+ tok_escseq,
+ tok_addset,
+
+ tok_charids,
+
+ tok_code_set_name,
+ tok_mb_cur_max,
+ tok_mb_cur_min,
+ tok_charconv,
+ tok_width,
+ tok_width_variable,
+ tok_width_default,
+ tok_repertoiremap,
+
+ tok_lc_ctype,
+ tok_copy,
+ /* Keep the following entries up to the next comment in this order! */
+ tok_upper,
+ tok_lower,
+ tok_alpha,
+ tok_digit,
+ tok_xdigit,
+ tok_space,
+ tok_print,
+ tok_graph,
+ tok_blank,
+ tok_cntrl,
+ tok_punct,
+ tok_alnum,
+ /* OK, shuffling allowed again. */
+ tok_outdigit,
+ tok_charclass,
+ tok_class,
+ tok_toupper,
+ tok_tolower,
+ tok_map,
+ tok_translit_start,
+ tok_translit_end,
+ tok_translit_ignore,
+ tok_default_missing,
+ tok_lc_collate,
+ tok_coll_weight_max,
+ tok_section_symbol,
+ tok_collating_element,
+ tok_collating_symbol,
+ tok_symbol_equivalence,
+ tok_script,
+ tok_order_start,
+ tok_order_end,
+ tok_from,
+ tok_forward,
+ tok_backward,
+ tok_position,
+ tok_undefined,
+ tok_ignore,
+ tok_reorder_after,
+ tok_reorder_end,
+ tok_reorder_sections_after,
+ tok_reorder_sections_end,
+ tok_define,
+ tok_undef,
+ tok_ifdef,
+ tok_ifndef,
+ tok_else,
+ tok_elifdef,
+ tok_elifndef,
+ tok_endif,
+ tok_lc_monetary,
+ tok_int_curr_symbol,
+ tok_currency_symbol,
+ tok_mon_decimal_point,
+ tok_mon_thousands_sep,
+ tok_mon_grouping,
+ tok_positive_sign,
+ tok_negative_sign,
+ tok_int_frac_digits,
+ tok_frac_digits,
+ tok_p_cs_precedes,
+ tok_p_sep_by_space,
+ tok_n_cs_precedes,
+ tok_n_sep_by_space,
+ tok_p_sign_posn,
+ tok_n_sign_posn,
+ tok_int_p_cs_precedes,
+ tok_int_p_sep_by_space,
+ tok_int_n_cs_precedes,
+ tok_int_n_sep_by_space,
+ tok_int_p_sign_posn,
+ tok_int_n_sign_posn,
+ tok_duo_int_curr_symbol,
+ tok_duo_currency_symbol,
+ tok_duo_int_frac_digits,
+ tok_duo_frac_digits,
+ tok_duo_p_cs_precedes,
+ tok_duo_p_sep_by_space,
+ tok_duo_n_cs_precedes,
+ tok_duo_n_sep_by_space,
+ tok_duo_int_p_cs_precedes,
+ tok_duo_int_p_sep_by_space,
+ tok_duo_int_n_cs_precedes,
+ tok_duo_int_n_sep_by_space,
+ tok_duo_p_sign_posn,
+ tok_duo_n_sign_posn,
+ tok_duo_int_p_sign_posn,
+ tok_duo_int_n_sign_posn,
+ tok_uno_valid_from,
+ tok_uno_valid_to,
+ tok_duo_valid_from,
+ tok_duo_valid_to,
+ tok_conversion_rate,
+ tok_lc_numeric,
+ tok_decimal_point,
+ tok_thousands_sep,
+ tok_grouping,
+ tok_lc_time,
+ tok_abday,
+ tok_day,
+ tok_abmon,
+ tok_mon,
+ tok_d_t_fmt,
+ tok_d_fmt,
+ tok_t_fmt,
+ tok_am_pm,
+ tok_t_fmt_ampm,
+ tok_era,
+ tok_era_year,
+ tok_era_d_fmt,
+ tok_era_d_t_fmt,
+ tok_era_t_fmt,
+ tok_alt_digits,
+ tok_week,
+ tok_first_weekday,
+ tok_first_workday,
+ tok_cal_direction,
+ tok_timezone,
+ tok_date_fmt,
+ tok_lc_messages,
+ tok_yesexpr,
+ tok_noexpr,
+ tok_yesstr,
+ tok_nostr,
+ tok_lc_paper,
+ tok_height,
+ tok_lc_name,
+ tok_name_fmt,
+ tok_name_gen,
+ tok_name_mr,
+ tok_name_mrs,
+ tok_name_miss,
+ tok_name_ms,
+ tok_lc_address,
+ tok_postal_fmt,
+ tok_country_name,
+ tok_country_post,
+ tok_country_ab2,
+ tok_country_ab3,
+ tok_country_num,
+ tok_country_car,
+ tok_country_isbn,
+ tok_lang_name,
+ tok_lang_ab,
+ tok_lang_term,
+ tok_lang_lib,
+ tok_lc_telephone,
+ tok_tel_int_fmt,
+ tok_tel_dom_fmt,
+ tok_int_select,
+ tok_int_prefix,
+ tok_lc_measurement,
+ tok_measurement,
+ tok_lc_identification,
+ tok_title,
+ tok_source,
+ tok_address,
+ tok_contact,
+ tok_email,
+ tok_tel,
+ tok_fax,
+ tok_language,
+ tok_territory,
+ tok_audience,
+ tok_application,
+ tok_abbreviation,
+ tok_revision,
+ tok_date,
+ tok_category,
+
+ tok_error
+};
+
+
+struct keyword_t
+{
+ const char *name;
+ enum token_t token;
+ int symname_or_ident;
+
+ /* Only for locdef file. */
+ int locale;
+ enum token_t base;
+ enum token_t group;
+ enum token_t list;
+};
+
+
+#endif /* token.h */
diff --git a/REORG.TODO/locale/programs/locfile.c b/REORG.TODO/locale/programs/locfile.c
new file mode 100644
index 0000000000..0990ef11be
--- /dev/null
+++ b/REORG.TODO/locale/programs/locfile.c
@@ -0,0 +1,1001 @@
+/* Copyright (C) 1996-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@gnu.org>, 1996.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/param.h>
+#include <sys/stat.h>
+#include <assert.h>
+#include <wchar.h>
+
+#include "../../crypt/md5.h"
+#include "localedef.h"
+#include "localeinfo.h"
+#include "locfile.h"
+#include "simple-hash.h"
+
+#include "locfile-kw.h"
+
+#define obstack_chunk_alloc xmalloc
+#define obstack_chunk_free free
+
+/* Temporary storage of the locale data before writing it to the archive. */
+static locale_data_t to_archive;
+
+
+int
+locfile_read (struct localedef_t *result, const struct charmap_t *charmap)
+{
+ const char *filename = result->name;
+ const char *repertoire_name = result->repertoire_name;
+ int locale_mask = result->needed & ~result->avail;
+ struct linereader *ldfile;
+ int not_here = ALL_LOCALES;
+
+ /* If no repertoire name was specified use the global one. */
+ if (repertoire_name == NULL)
+ repertoire_name = repertoire_global;
+
+ /* Open the locale definition file. */
+ ldfile = lr_open (filename, locfile_hash);
+ if (ldfile == NULL)
+ {
+ if (filename != NULL && filename[0] != '/')
+ {
+ char *i18npath = getenv ("I18NPATH");
+ if (i18npath != NULL && *i18npath != '\0')
+ {
+ const size_t pathlen = strlen (i18npath);
+ char i18npathbuf[pathlen + 1];
+ char path[strlen (filename) + 1 + pathlen
+ + sizeof ("/locales/") - 1];
+ char *next;
+ i18npath = memcpy (i18npathbuf, i18npath, pathlen + 1);
+
+ while (ldfile == NULL
+ && (next = strsep (&i18npath, ":")) != NULL)
+ {
+ stpcpy (stpcpy (stpcpy (path, next), "/locales/"), filename);
+
+ ldfile = lr_open (path, locfile_hash);
+
+ if (ldfile == NULL)
+ {
+ stpcpy (stpcpy (stpcpy (path, next), "/"), filename);
+
+ ldfile = lr_open (path, locfile_hash);
+ }
+ }
+ }
+
+ /* Test in the default directory. */
+ if (ldfile == NULL)
+ {
+ char path[strlen (filename) + 1 + sizeof (LOCSRCDIR)];
+
+ stpcpy (stpcpy (stpcpy (path, LOCSRCDIR), "/"), filename);
+ ldfile = lr_open (path, locfile_hash);
+ }
+ }
+
+ if (ldfile == NULL)
+ return 1;
+ }
+
+ /* Parse locale definition file and store result in RESULT. */
+ while (1)
+ {
+ struct token *now = lr_token (ldfile, charmap, NULL, NULL, verbose);
+ enum token_t nowtok = now->tok;
+ struct token *arg;
+
+ if (nowtok == tok_eof)
+ break;
+
+ if (nowtok == tok_eol)
+ /* Ignore empty lines. */
+ continue;
+
+ switch (nowtok)
+ {
+ case tok_escape_char:
+ case tok_comment_char:
+ /* We need an argument. */
+ arg = lr_token (ldfile, charmap, NULL, NULL, verbose);
+
+ if (arg->tok != tok_ident)
+ {
+ SYNTAX_ERROR (_("bad argument"));
+ continue;
+ }
+
+ if (arg->val.str.lenmb != 1)
+ {
+ lr_error (ldfile, _("\
+argument to `%s' must be a single character"),
+ nowtok == tok_escape_char
+ ? "escape_char" : "comment_char");
+
+ lr_ignore_rest (ldfile, 0);
+ continue;
+ }
+
+ if (nowtok == tok_escape_char)
+ ldfile->escape_char = *arg->val.str.startmb;
+ else
+ ldfile->comment_char = *arg->val.str.startmb;
+ break;
+
+ case tok_repertoiremap:
+ /* We need an argument. */
+ arg = lr_token (ldfile, charmap, NULL, NULL, verbose);
+
+ if (arg->tok != tok_ident)
+ {
+ SYNTAX_ERROR (_("bad argument"));
+ continue;
+ }
+
+ if (repertoire_name == NULL)
+ {
+ char *newp = alloca (arg->val.str.lenmb + 1);
+
+ *((char *) mempcpy (newp, arg->val.str.startmb,
+ arg->val.str.lenmb)) = '\0';
+ repertoire_name = newp;
+ }
+ break;
+
+ case tok_lc_ctype:
+ ctype_read (ldfile, result, charmap, repertoire_name,
+ (locale_mask & CTYPE_LOCALE) == 0);
+ result->avail |= locale_mask & CTYPE_LOCALE;
+ not_here ^= CTYPE_LOCALE;
+ continue;
+
+ case tok_lc_collate:
+ collate_read (ldfile, result, charmap, repertoire_name,
+ (locale_mask & COLLATE_LOCALE) == 0);
+ result->avail |= locale_mask & COLLATE_LOCALE;
+ not_here ^= COLLATE_LOCALE;
+ continue;
+
+ case tok_lc_monetary:
+ monetary_read (ldfile, result, charmap, repertoire_name,
+ (locale_mask & MONETARY_LOCALE) == 0);
+ result->avail |= locale_mask & MONETARY_LOCALE;
+ not_here ^= MONETARY_LOCALE;
+ continue;
+
+ case tok_lc_numeric:
+ numeric_read (ldfile, result, charmap, repertoire_name,
+ (locale_mask & NUMERIC_LOCALE) == 0);
+ result->avail |= locale_mask & NUMERIC_LOCALE;
+ not_here ^= NUMERIC_LOCALE;
+ continue;
+
+ case tok_lc_time:
+ time_read (ldfile, result, charmap, repertoire_name,
+ (locale_mask & TIME_LOCALE) == 0);
+ result->avail |= locale_mask & TIME_LOCALE;
+ not_here ^= TIME_LOCALE;
+ continue;
+
+ case tok_lc_messages:
+ messages_read (ldfile, result, charmap, repertoire_name,
+ (locale_mask & MESSAGES_LOCALE) == 0);
+ result->avail |= locale_mask & MESSAGES_LOCALE;
+ not_here ^= MESSAGES_LOCALE;
+ continue;
+
+ case tok_lc_paper:
+ paper_read (ldfile, result, charmap, repertoire_name,
+ (locale_mask & PAPER_LOCALE) == 0);
+ result->avail |= locale_mask & PAPER_LOCALE;
+ not_here ^= PAPER_LOCALE;
+ continue;
+
+ case tok_lc_name:
+ name_read (ldfile, result, charmap, repertoire_name,
+ (locale_mask & NAME_LOCALE) == 0);
+ result->avail |= locale_mask & NAME_LOCALE;
+ not_here ^= NAME_LOCALE;
+ continue;
+
+ case tok_lc_address:
+ address_read (ldfile, result, charmap, repertoire_name,
+ (locale_mask & ADDRESS_LOCALE) == 0);
+ result->avail |= locale_mask & ADDRESS_LOCALE;
+ not_here ^= ADDRESS_LOCALE;
+ continue;
+
+ case tok_lc_telephone:
+ telephone_read (ldfile, result, charmap, repertoire_name,
+ (locale_mask & TELEPHONE_LOCALE) == 0);
+ result->avail |= locale_mask & TELEPHONE_LOCALE;
+ not_here ^= TELEPHONE_LOCALE;
+ continue;
+
+ case tok_lc_measurement:
+ measurement_read (ldfile, result, charmap, repertoire_name,
+ (locale_mask & MEASUREMENT_LOCALE) == 0);
+ result->avail |= locale_mask & MEASUREMENT_LOCALE;
+ not_here ^= MEASUREMENT_LOCALE;
+ continue;
+
+ case tok_lc_identification:
+ identification_read (ldfile, result, charmap, repertoire_name,
+ (locale_mask & IDENTIFICATION_LOCALE) == 0);
+ result->avail |= locale_mask & IDENTIFICATION_LOCALE;
+ not_here ^= IDENTIFICATION_LOCALE;
+ continue;
+
+ default:
+ SYNTAX_ERROR (_("\
+syntax error: not inside a locale definition section"));
+ continue;
+ }
+
+ /* The rest of the line must be empty. */
+ lr_ignore_rest (ldfile, 1);
+ }
+
+ /* We read all of the file. */
+ lr_close (ldfile);
+
+ /* Mark the categories which are not contained in the file. We assume
+ them to be available and the default data will be used. */
+ result->avail |= not_here;
+
+ return 0;
+}
+
+
+/* Semantic checking of locale specifications. */
+
+static void (*const check_funcs[]) (struct localedef_t *,
+ const struct charmap_t *) =
+{
+ [LC_CTYPE] = ctype_finish,
+ [LC_COLLATE] = collate_finish,
+ [LC_MESSAGES] = messages_finish,
+ [LC_MONETARY] = monetary_finish,
+ [LC_NUMERIC] = numeric_finish,
+ [LC_TIME] = time_finish,
+ [LC_PAPER] = paper_finish,
+ [LC_NAME] = name_finish,
+ [LC_ADDRESS] = address_finish,
+ [LC_TELEPHONE] = telephone_finish,
+ [LC_MEASUREMENT] = measurement_finish,
+ [LC_IDENTIFICATION] = identification_finish
+};
+
+void
+check_all_categories (struct localedef_t *definitions,
+ const struct charmap_t *charmap)
+{
+ int cnt;
+
+ for (cnt = 0; cnt < sizeof (check_funcs) / sizeof (check_funcs[0]); ++cnt)
+ if (check_funcs[cnt] != NULL)
+ check_funcs[cnt] (definitions, charmap);
+}
+
+
+/* Writing the locale data files. All files use the same output_path. */
+
+static void (*const write_funcs[]) (struct localedef_t *,
+ const struct charmap_t *, const char *) =
+{
+ [LC_CTYPE] = ctype_output,
+ [LC_COLLATE] = collate_output,
+ [LC_MESSAGES] = messages_output,
+ [LC_MONETARY] = monetary_output,
+ [LC_NUMERIC] = numeric_output,
+ [LC_TIME] = time_output,
+ [LC_PAPER] = paper_output,
+ [LC_NAME] = name_output,
+ [LC_ADDRESS] = address_output,
+ [LC_TELEPHONE] = telephone_output,
+ [LC_MEASUREMENT] = measurement_output,
+ [LC_IDENTIFICATION] = identification_output
+};
+
+
+void
+write_all_categories (struct localedef_t *definitions,
+ const struct charmap_t *charmap, const char *locname,
+ const char *output_path)
+{
+ int cnt;
+
+ for (cnt = 0; cnt < sizeof (write_funcs) / sizeof (write_funcs[0]); ++cnt)
+ if (write_funcs[cnt] != NULL)
+ write_funcs[cnt] (definitions, charmap, output_path);
+
+ if (! no_archive)
+ {
+ /* The data has to be added to the archive. Do this now. */
+ struct locarhandle ah;
+
+ /* Open the archive. This call never returns if we cannot
+ successfully open the archive. */
+ ah.fname = NULL;
+ open_archive (&ah, false);
+
+ if (add_locale_to_archive (&ah, locname, to_archive, true) != 0)
+ error (EXIT_FAILURE, errno, _("cannot add to locale archive"));
+
+ /* We are done. */
+ close_archive (&ah);
+ }
+}
+
+
+/* Return a NULL terminated list of the directories next to output_path
+ that have the same owner, group, permissions and device as output_path. */
+static const char **
+siblings_uncached (const char *output_path)
+{
+ size_t len;
+ char *base, *p;
+ struct stat64 output_stat;
+ DIR *dirp;
+ int nelems;
+ const char **elems;
+
+ /* Remove trailing slashes and trailing pathname component. */
+ len = strlen (output_path);
+ base = (char *) alloca (len);
+ memcpy (base, output_path, len);
+ p = base + len;
+ while (p > base && p[-1] == '/')
+ p--;
+ if (p == base)
+ return NULL;
+ do
+ p--;
+ while (p > base && p[-1] != '/');
+ if (p == base)
+ return NULL;
+ *--p = '\0';
+ len = p - base;
+
+ /* Get the properties of output_path. */
+ if (lstat64 (output_path, &output_stat) < 0 || !S_ISDIR (output_stat.st_mode))
+ return NULL;
+
+ /* Iterate through the directories in base directory. */
+ dirp = opendir (base);
+ if (dirp == NULL)
+ return NULL;
+ nelems = 0;
+ elems = NULL;
+ for (;;)
+ {
+ struct dirent64 *other_dentry;
+ const char *other_name;
+ char *other_path;
+ struct stat64 other_stat;
+
+ other_dentry = readdir64 (dirp);
+ if (other_dentry == NULL)
+ break;
+
+ other_name = other_dentry->d_name;
+ if (strcmp (other_name, ".") == 0 || strcmp (other_name, "..") == 0)
+ continue;
+
+ other_path = (char *) xmalloc (len + 1 + strlen (other_name) + 2);
+ memcpy (other_path, base, len);
+ other_path[len] = '/';
+ strcpy (other_path + len + 1, other_name);
+
+ if (lstat64 (other_path, &other_stat) >= 0
+ && S_ISDIR (other_stat.st_mode)
+ && other_stat.st_uid == output_stat.st_uid
+ && other_stat.st_gid == output_stat.st_gid
+ && other_stat.st_mode == output_stat.st_mode
+ && other_stat.st_dev == output_stat.st_dev)
+ {
+ /* Found a subdirectory. Add a trailing slash and store it. */
+ p = other_path + len + 1 + strlen (other_name);
+ *p++ = '/';
+ *p = '\0';
+ elems = (const char **) xrealloc ((char *) elems,
+ (nelems + 2) * sizeof (char **));
+ elems[nelems++] = other_path;
+ }
+ else
+ free (other_path);
+ }
+ closedir (dirp);
+
+ if (elems != NULL)
+ elems[nelems] = NULL;
+ return elems;
+}
+
+
+/* Return a NULL terminated list of the directories next to output_path
+ that have the same owner, group, permissions and device as output_path.
+ Cache the result for future calls. */
+static const char **
+siblings (const char *output_path)
+{
+ static const char *last_output_path;
+ static const char **last_result;
+
+ if (output_path != last_output_path)
+ {
+ if (last_result != NULL)
+ {
+ const char **p;
+
+ for (p = last_result; *p != NULL; p++)
+ free ((char *) *p);
+ free (last_result);
+ }
+
+ last_output_path = output_path;
+ last_result = siblings_uncached (output_path);
+ }
+ return last_result;
+}
+
+
+/* Read as many bytes from a file descriptor as possible. */
+static ssize_t
+full_read (int fd, void *bufarea, size_t nbyte)
+{
+ char *buf = (char *) bufarea;
+
+ while (nbyte > 0)
+ {
+ ssize_t retval = read (fd, buf, nbyte);
+
+ if (retval == 0)
+ break;
+ else if (retval > 0)
+ {
+ buf += retval;
+ nbyte -= retval;
+ }
+ else if (errno != EINTR)
+ return retval;
+ }
+ return buf - (char *) bufarea;
+}
+
+
+/* Compare the contents of two regular files of the same size. Return 0
+ if they are equal, 1 if they are different, or -1 if an error occurs. */
+static int
+compare_files (const char *filename1, const char *filename2, size_t size,
+ size_t blocksize)
+{
+ int fd1, fd2;
+ int ret = -1;
+
+ fd1 = open (filename1, O_RDONLY);
+ if (fd1 >= 0)
+ {
+ fd2 = open (filename2, O_RDONLY);
+ if (fd2 >= 0)
+ {
+ char *buf1 = (char *) xmalloc (2 * blocksize);
+ char *buf2 = buf1 + blocksize;
+
+ ret = 0;
+ while (size > 0)
+ {
+ size_t bytes = (size < blocksize ? size : blocksize);
+
+ if (full_read (fd1, buf1, bytes) < (ssize_t) bytes)
+ {
+ ret = -1;
+ break;
+ }
+ if (full_read (fd2, buf2, bytes) < (ssize_t) bytes)
+ {
+ ret = -1;
+ break;
+ }
+ if (memcmp (buf1, buf2, bytes) != 0)
+ {
+ ret = 1;
+ break;
+ }
+ size -= bytes;
+ }
+
+ free (buf1);
+ close (fd2);
+ }
+ close (fd1);
+ }
+ return ret;
+}
+
+/* True if the locale files use the opposite endianness to the
+ machine running localedef. */
+bool swap_endianness_p;
+
+/* When called outside a start_locale_structure/end_locale_structure
+ or start_locale_prelude/end_locale_prelude block, record that the
+ next byte in FILE's obstack will be the first byte of a new element.
+ Do likewise for the first call inside a start_locale_structure/
+ end_locale_structure block. */
+static void
+record_offset (struct locale_file *file)
+{
+ if (file->structure_stage < 2)
+ {
+ assert (file->next_element < file->n_elements);
+ file->offsets[file->next_element++]
+ = (obstack_object_size (&file->data)
+ + (file->n_elements + 2) * sizeof (uint32_t));
+ if (file->structure_stage == 1)
+ file->structure_stage = 2;
+ }
+}
+
+/* Initialize FILE for a new output file. N_ELEMENTS is the number
+ of elements in the file. */
+void
+init_locale_data (struct locale_file *file, size_t n_elements)
+{
+ file->n_elements = n_elements;
+ file->next_element = 0;
+ file->offsets = xmalloc (sizeof (uint32_t) * n_elements);
+ obstack_init (&file->data);
+ file->structure_stage = 0;
+}
+
+/* Align the size of FILE's obstack object to BOUNDARY bytes. */
+void
+align_locale_data (struct locale_file *file, size_t boundary)
+{
+ size_t size = -obstack_object_size (&file->data) & (boundary - 1);
+ obstack_blank (&file->data, size);
+ memset (obstack_next_free (&file->data) - size, 0, size);
+}
+
+/* Record that FILE's next element contains no data. */
+void
+add_locale_empty (struct locale_file *file)
+{
+ record_offset (file);
+}
+
+/* Record that FILE's next element consists of SIZE bytes starting at DATA. */
+void
+add_locale_raw_data (struct locale_file *file, const void *data, size_t size)
+{
+ record_offset (file);
+ obstack_grow (&file->data, data, size);
+}
+
+/* Finish the current object on OBSTACK and use it as the data for FILE's
+ next element. */
+void
+add_locale_raw_obstack (struct locale_file *file, struct obstack *obstack)
+{
+ size_t size = obstack_object_size (obstack);
+ record_offset (file);
+ obstack_grow (&file->data, obstack_finish (obstack), size);
+}
+
+/* Use STRING as FILE's next element. */
+void
+add_locale_string (struct locale_file *file, const char *string)
+{
+ record_offset (file);
+ obstack_grow (&file->data, string, strlen (string) + 1);
+}
+
+/* Likewise for wide strings. */
+void
+add_locale_wstring (struct locale_file *file, const uint32_t *string)
+{
+ add_locale_uint32_array (file, string, wcslen ((const wchar_t *) string) + 1);
+}
+
+/* Record that FILE's next element is the 32-bit integer VALUE. */
+void
+add_locale_uint32 (struct locale_file *file, uint32_t value)
+{
+ align_locale_data (file, LOCFILE_ALIGN);
+ record_offset (file);
+ value = maybe_swap_uint32 (value);
+ obstack_grow (&file->data, &value, sizeof (value));
+}
+
+/* Record that FILE's next element is an array of N_ELEMS integers
+ starting at DATA. */
+void
+add_locale_uint32_array (struct locale_file *file,
+ const uint32_t *data, size_t n_elems)
+{
+ align_locale_data (file, LOCFILE_ALIGN);
+ record_offset (file);
+ obstack_grow (&file->data, data, n_elems * sizeof (uint32_t));
+ maybe_swap_uint32_obstack (&file->data, n_elems);
+}
+
+/* Record that FILE's next element is the single byte given by VALUE. */
+void
+add_locale_char (struct locale_file *file, char value)
+{
+ record_offset (file);
+ obstack_1grow (&file->data, value);
+}
+
+/* Start building an element that contains several different pieces of data.
+ Subsequent calls to add_locale_* will add data to the same element up
+ till the next call to end_locale_structure. The element's alignment
+ is dictated by the first piece of data added to it. */
+void
+start_locale_structure (struct locale_file *file)
+{
+ assert (file->structure_stage == 0);
+ file->structure_stage = 1;
+}
+
+/* Finish a structure element that was started by start_locale_structure.
+ Empty structures are OK and behave like add_locale_empty. */
+void
+end_locale_structure (struct locale_file *file)
+{
+ record_offset (file);
+ assert (file->structure_stage == 2);
+ file->structure_stage = 0;
+}
+
+/* Start building data that goes before the next element's recorded offset.
+ Subsequent calls to add_locale_* will add data to the file without
+ treating any of it as the start of a new element. Calling
+ end_locale_prelude switches back to the usual behavior. */
+void
+start_locale_prelude (struct locale_file *file)
+{
+ assert (file->structure_stage == 0);
+ file->structure_stage = 3;
+}
+
+/* End a block started by start_locale_prelude. */
+void
+end_locale_prelude (struct locale_file *file)
+{
+ assert (file->structure_stage == 3);
+ file->structure_stage = 0;
+}
+
+/* Write a locale file, with contents given by FILE. */
+void
+write_locale_data (const char *output_path, int catidx, const char *category,
+ struct locale_file *file)
+{
+ size_t cnt, step, maxiov;
+ int fd;
+ char *fname;
+ const char **other_paths;
+ uint32_t header[2];
+ size_t n_elem;
+ struct iovec vec[3];
+
+ assert (file->n_elements == file->next_element);
+ header[0] = LIMAGIC (catidx);
+ header[1] = file->n_elements;
+ vec[0].iov_len = sizeof (header);
+ vec[0].iov_base = header;
+ vec[1].iov_len = sizeof (uint32_t) * file->n_elements;
+ vec[1].iov_base = file->offsets;
+ vec[2].iov_len = obstack_object_size (&file->data);
+ vec[2].iov_base = obstack_finish (&file->data);
+ maybe_swap_uint32_array (vec[0].iov_base, 2);
+ maybe_swap_uint32_array (vec[1].iov_base, file->n_elements);
+ n_elem = 3;
+ if (! no_archive)
+ {
+ /* The data will be added to the archive. For now we simply
+ generate the image which will be written. First determine
+ the size. */
+ int cnt;
+ void *endp;
+
+ to_archive[catidx].size = 0;
+ for (cnt = 0; cnt < n_elem; ++cnt)
+ to_archive[catidx].size += vec[cnt].iov_len;
+
+ /* Allocate the memory for it. */
+ to_archive[catidx].addr = xmalloc (to_archive[catidx].size);
+
+ /* Fill it in. */
+ for (cnt = 0, endp = to_archive[catidx].addr; cnt < n_elem; ++cnt)
+ endp = mempcpy (endp, vec[cnt].iov_base, vec[cnt].iov_len);
+
+ /* Compute the MD5 sum for the data. */
+ __md5_buffer (to_archive[catidx].addr, to_archive[catidx].size,
+ to_archive[catidx].sum);
+
+ return;
+ }
+
+ fname = xmalloc (strlen (output_path) + 2 * strlen (category) + 7);
+
+ /* Normally we write to the directory pointed to by the OUTPUT_PATH.
+ But for LC_MESSAGES we have to take care for the translation
+ data. This means we need to have a directory LC_MESSAGES in
+ which we place the file under the name SYS_LC_MESSAGES. */
+ sprintf (fname, "%s%s", output_path, category);
+ fd = -2;
+ if (strcmp (category, "LC_MESSAGES") == 0)
+ {
+ struct stat64 st;
+
+ if (stat64 (fname, &st) < 0)
+ {
+ if (mkdir (fname, 0777) >= 0)
+ {
+ fd = -1;
+ errno = EISDIR;
+ }
+ }
+ else if (!S_ISREG (st.st_mode))
+ {
+ fd = -1;
+ errno = EISDIR;
+ }
+ }
+
+ /* Create the locale file with nlinks == 1; this avoids crashing processes
+ which currently use the locale and damaging files belonging to other
+ locales as well. */
+ if (fd == -2)
+ {
+ unlink (fname);
+ fd = creat (fname, 0666);
+ }
+
+ if (fd == -1)
+ {
+ int save_err = errno;
+
+ if (errno == EISDIR)
+ {
+ sprintf (fname, "%1$s%2$s/SYS_%2$s", output_path, category);
+ unlink (fname);
+ fd = creat (fname, 0666);
+ if (fd == -1)
+ save_err = errno;
+ }
+
+ if (fd == -1)
+ {
+ if (!be_quiet)
+ WITH_CUR_LOCALE (error (0, save_err, _("\
+cannot open output file `%s' for category `%s'"), fname, category));
+ free (fname);
+ return;
+ }
+ }
+
+#ifdef UIO_MAXIOV
+ maxiov = UIO_MAXIOV;
+#else
+ maxiov = sysconf (_SC_UIO_MAXIOV);
+#endif
+
+ /* Write the data using writev. But we must take care for the
+ limitation of the implementation. */
+ for (cnt = 0; cnt < n_elem; cnt += step)
+ {
+ step = n_elem - cnt;
+ if (maxiov > 0)
+ step = MIN (maxiov, step);
+
+ if (writev (fd, &vec[cnt], step) < 0)
+ {
+ if (!be_quiet)
+ WITH_CUR_LOCALE (error (0, errno, _("\
+failure while writing data for category `%s'"), category));
+ break;
+ }
+ }
+
+ close (fd);
+
+ /* Compare the file with the locale data files for the same category in
+ other locales, and see if we can reuse it, to save disk space. */
+ other_paths = siblings (output_path);
+ if (other_paths != NULL)
+ {
+ struct stat64 fname_stat;
+
+ if (lstat64 (fname, &fname_stat) >= 0
+ && S_ISREG (fname_stat.st_mode))
+ {
+ const char *fname_tail = fname + strlen (output_path);
+ const char **other_p;
+ int seen_count;
+ ino_t *seen_inodes;
+
+ seen_count = 0;
+ for (other_p = other_paths; *other_p; other_p++)
+ seen_count++;
+ seen_inodes = (ino_t *) xmalloc (seen_count * sizeof (ino_t));
+ seen_count = 0;
+
+ for (other_p = other_paths; *other_p; other_p++)
+ {
+ const char *other_path = *other_p;
+ size_t other_path_len = strlen (other_path);
+ char *other_fname;
+ struct stat64 other_fname_stat;
+
+ other_fname =
+ (char *) xmalloc (other_path_len + strlen (fname_tail) + 1);
+ memcpy (other_fname, other_path, other_path_len);
+ strcpy (other_fname + other_path_len, fname_tail);
+
+ if (lstat64 (other_fname, &other_fname_stat) >= 0
+ && S_ISREG (other_fname_stat.st_mode)
+ /* Consider only files on the same device.
+ Otherwise hard linking won't work anyway. */
+ && other_fname_stat.st_dev == fname_stat.st_dev
+ /* Consider only files with the same permissions.
+ Otherwise there are security risks. */
+ && other_fname_stat.st_uid == fname_stat.st_uid
+ && other_fname_stat.st_gid == fname_stat.st_gid
+ && other_fname_stat.st_mode == fname_stat.st_mode
+ /* Don't compare fname with itself. */
+ && other_fname_stat.st_ino != fname_stat.st_ino
+ /* Files must have the same size, otherwise they
+ cannot be the same. */
+ && other_fname_stat.st_size == fname_stat.st_size)
+ {
+ /* Skip this file if we have already read it (under a
+ different name). */
+ int i;
+
+ for (i = seen_count - 1; i >= 0; i--)
+ if (seen_inodes[i] == other_fname_stat.st_ino)
+ break;
+ if (i < 0)
+ {
+ /* Now compare fname and other_fname for real. */
+ blksize_t blocksize;
+
+#ifdef _STATBUF_ST_BLKSIZE
+ blocksize = MAX (fname_stat.st_blksize,
+ other_fname_stat.st_blksize);
+ if (blocksize > 8 * 1024)
+ blocksize = 8 * 1024;
+#else
+ blocksize = 8 * 1024;
+#endif
+
+ if (compare_files (fname, other_fname,
+ fname_stat.st_size, blocksize) == 0)
+ {
+ /* Found! other_fname is identical to fname. */
+ /* Link other_fname to fname. But use a temporary
+ file, in case hard links don't work on the
+ particular filesystem. */
+ char * tmp_fname =
+ (char *) xmalloc (strlen (fname) + 4 + 1);
+
+ strcpy (stpcpy (tmp_fname, fname), ".tmp");
+
+ if (link (other_fname, tmp_fname) >= 0)
+ {
+ unlink (fname);
+ if (rename (tmp_fname, fname) < 0)
+ {
+ if (!be_quiet)
+ WITH_CUR_LOCALE (error (0, errno, _("\
+cannot create output file `%s' for category `%s'"), fname, category));
+ }
+ free (tmp_fname);
+ free (other_fname);
+ break;
+ }
+ free (tmp_fname);
+ }
+
+ /* Don't compare with this file a second time. */
+ seen_inodes[seen_count++] = other_fname_stat.st_ino;
+ }
+ }
+ free (other_fname);
+ }
+ free (seen_inodes);
+ }
+ }
+
+ free (fname);
+}
+
+
+/* General handling of `copy'. */
+void
+handle_copy (struct linereader *ldfile, const struct charmap_t *charmap,
+ const char *repertoire_name, struct localedef_t *result,
+ enum token_t token, int locale, const char *locale_name,
+ int ignore_content)
+{
+ struct token *now;
+ int warned = 0;
+
+ now = lr_token (ldfile, charmap, result, NULL, verbose);
+ if (now->tok != tok_string)
+ lr_error (ldfile, _("expecting string argument for `copy'"));
+ else if (!ignore_content)
+ {
+ if (now->val.str.startmb == NULL)
+ lr_error (ldfile, _("\
+locale name should consist only of portable characters"));
+ else
+ {
+ (void) add_to_readlist (locale, now->val.str.startmb,
+ repertoire_name, 1, NULL);
+ result->copy_name[locale] = now->val.str.startmb;
+ }
+ }
+
+ lr_ignore_rest (ldfile, now->tok == tok_string);
+
+ /* The rest of the line must be empty and the next keyword must be
+ `END xxx'. */
+ while ((now = lr_token (ldfile, charmap, result, NULL, verbose))->tok
+ != tok_end && now->tok != tok_eof)
+ {
+ if (warned == 0)
+ {
+ lr_error (ldfile, _("\
+no other keyword shall be specified when `copy' is used"));
+ warned = 1;
+ }
+
+ lr_ignore_rest (ldfile, 0);
+ }
+
+ if (now->tok != tok_eof)
+ {
+ /* Handle `END xxx'. */
+ now = lr_token (ldfile, charmap, result, NULL, verbose);
+
+ if (now->tok != token)
+ lr_error (ldfile, _("\
+`%1$s' definition does not end with `END %1$s'"), locale_name);
+
+ lr_ignore_rest (ldfile, now->tok == token);
+ }
+ else
+ /* When we come here we reached the end of the file. */
+ lr_error (ldfile, _("%s: premature end of file"), locale_name);
+}
diff --git a/REORG.TODO/locale/programs/locfile.h b/REORG.TODO/locale/programs/locfile.h
new file mode 100644
index 0000000000..3407e13c13
--- /dev/null
+++ b/REORG.TODO/locale/programs/locfile.h
@@ -0,0 +1,279 @@
+/* Copyright (C) 1996-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@gnu.org>, 1996.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef _LOCFILE_H
+#define _LOCFILE_H 1
+
+#include <byteswap.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <sys/uio.h>
+
+#include "obstack.h"
+#include "linereader.h"
+#include "localedef.h"
+
+/* Structure for storing the contents of a category file. */
+struct locale_file
+{
+ size_t n_elements, next_element;
+ uint32_t *offsets;
+ struct obstack data;
+ int structure_stage;
+};
+
+
+/* Macros used in the parser. */
+#define SYNTAX_ERROR(string, args...) \
+ do \
+ { \
+ lr_error (ldfile, string, ## args); \
+ lr_ignore_rest (ldfile, 0); \
+ } \
+ while (0)
+
+
+/* General handling of `copy'. */
+extern void handle_copy (struct linereader *ldfile,
+ const struct charmap_t *charmap,
+ const char *repertoire_name,
+ struct localedef_t *result, enum token_t token,
+ int locale, const char *locale_name,
+ int ignore_content);
+
+/* Found in locfile.c. */
+extern int locfile_read (struct localedef_t *result,
+ const struct charmap_t *charmap);
+
+/* Check validity of all the locale data. */
+extern void check_all_categories (struct localedef_t *definitions,
+ const struct charmap_t *charmap);
+
+/* Write out all locale categories. */
+extern void write_all_categories (struct localedef_t *definitions,
+ const struct charmap_t *charmap,
+ const char *locname,
+ const char *output_path);
+
+extern bool swap_endianness_p;
+
+/* Change the output to be big-endian if BIG_ENDIAN is true and
+ little-endian otherwise. */
+static inline void
+set_big_endian (bool big_endian)
+{
+ swap_endianness_p = (big_endian != (__BYTE_ORDER == __BIG_ENDIAN));
+}
+
+/* Munge VALUE so that, when stored, it has the correct byte order
+ for the output files. */
+static uint32_t
+__attribute__ ((unused))
+maybe_swap_uint32 (uint32_t value)
+{
+ return swap_endianness_p ? bswap_32 (value) : value;
+}
+
+/* Likewise, but munge an array of N uint32_ts starting at ARRAY. */
+static inline void
+maybe_swap_uint32_array (uint32_t *array, size_t n)
+{
+ if (swap_endianness_p)
+ while (n-- > 0)
+ array[n] = bswap_32 (array[n]);
+}
+
+/* Like maybe_swap_uint32_array, but the array of N elements is at
+ the end of OBSTACK's current object. */
+static inline void
+maybe_swap_uint32_obstack (struct obstack *obstack, size_t n)
+{
+ maybe_swap_uint32_array ((uint32_t *) obstack_next_free (obstack) - n, n);
+}
+
+/* Write out the data. */
+extern void init_locale_data (struct locale_file *file, size_t n_elements);
+extern void align_locale_data (struct locale_file *file, size_t boundary);
+extern void add_locale_empty (struct locale_file *file);
+extern void add_locale_raw_data (struct locale_file *file, const void *data,
+ size_t size);
+extern void add_locale_raw_obstack (struct locale_file *file,
+ struct obstack *obstack);
+extern void add_locale_string (struct locale_file *file, const char *string);
+extern void add_locale_wstring (struct locale_file *file,
+ const uint32_t *string);
+extern void add_locale_uint32 (struct locale_file *file, uint32_t value);
+extern void add_locale_uint32_array (struct locale_file *file,
+ const uint32_t *data, size_t n_elems);
+extern void add_locale_char (struct locale_file *file, char value);
+extern void start_locale_structure (struct locale_file *file);
+extern void end_locale_structure (struct locale_file *file);
+extern void start_locale_prelude (struct locale_file *file);
+extern void end_locale_prelude (struct locale_file *file);
+extern void write_locale_data (const char *output_path, int catidx,
+ const char *category, struct locale_file *file);
+
+
+/* Entrypoints for the parsers of the individual categories. */
+
+/* Handle LC_CTYPE category. */
+extern void ctype_read (struct linereader *ldfile,
+ struct localedef_t *result,
+ const struct charmap_t *charmap,
+ const char *repertoire_name,
+ int ignore_content);
+extern void ctype_finish (struct localedef_t *locale,
+ const struct charmap_t *charmap);
+extern void ctype_output (struct localedef_t *locale,
+ const struct charmap_t *charmap,
+ const char *output_path);
+extern uint32_t *find_translit (struct localedef_t *locale,
+ const struct charmap_t *charmap, uint32_t wch);
+
+/* Handle LC_COLLATE category. */
+extern void collate_read (struct linereader *ldfile,
+ struct localedef_t *result,
+ const struct charmap_t *charmap,
+ const char *repertoire_name,
+ int ignore_content);
+extern void collate_finish (struct localedef_t *locale,
+ const struct charmap_t *charmap);
+extern void collate_output (struct localedef_t *locale,
+ const struct charmap_t *charmap,
+ const char *output_path);
+
+/* Handle LC_MONETARY category. */
+extern void monetary_read (struct linereader *ldfile,
+ struct localedef_t *result,
+ const struct charmap_t *charmap,
+ const char *repertoire_name,
+ int ignore_content);
+extern void monetary_finish (struct localedef_t *locale,
+ const struct charmap_t *charmap);
+extern void monetary_output (struct localedef_t *locale,
+ const struct charmap_t *charmap,
+ const char *output_path);
+
+/* Handle LC_NUMERIC category. */
+extern void numeric_read (struct linereader *ldfile,
+ struct localedef_t *result,
+ const struct charmap_t *charmap,
+ const char *repertoire_name,
+ int ignore_content);
+extern void numeric_finish (struct localedef_t *locale,
+ const struct charmap_t *charmap);
+extern void numeric_output (struct localedef_t *locale,
+ const struct charmap_t *charmap,
+ const char *output_path);
+
+/* Handle LC_MESSAGES category. */
+extern void messages_read (struct linereader *ldfile,
+ struct localedef_t *result,
+ const struct charmap_t *charmap,
+ const char *repertoire_name,
+ int ignore_content);
+extern void messages_finish (struct localedef_t *locale,
+ const struct charmap_t *charmap);
+extern void messages_output (struct localedef_t *locale,
+ const struct charmap_t *charmap,
+ const char *output_path);
+
+/* Handle LC_TIME category. */
+extern void time_read (struct linereader *ldfile,
+ struct localedef_t *result,
+ const struct charmap_t *charmap,
+ const char *repertoire_name,
+ int ignore_content);
+extern void time_finish (struct localedef_t *locale,
+ const struct charmap_t *charmap);
+extern void time_output (struct localedef_t *locale,
+ const struct charmap_t *charmap,
+ const char *output_path);
+
+/* Handle LC_PAPER category. */
+extern void paper_read (struct linereader *ldfile,
+ struct localedef_t *result,
+ const struct charmap_t *charmap,
+ const char *repertoire_name,
+ int ignore_content);
+extern void paper_finish (struct localedef_t *locale,
+ const struct charmap_t *charmap);
+extern void paper_output (struct localedef_t *locale,
+ const struct charmap_t *charmap,
+ const char *output_path);
+
+/* Handle LC_NAME category. */
+extern void name_read (struct linereader *ldfile,
+ struct localedef_t *result,
+ const struct charmap_t *charmap,
+ const char *repertoire_name,
+ int ignore_content);
+extern void name_finish (struct localedef_t *locale,
+ const struct charmap_t *charmap);
+extern void name_output (struct localedef_t *locale,
+ const struct charmap_t *charmap,
+ const char *output_path);
+
+/* Handle LC_ADDRESS category. */
+extern void address_read (struct linereader *ldfile,
+ struct localedef_t *result,
+ const struct charmap_t *charmap,
+ const char *repertoire_name,
+ int ignore_content);
+extern void address_finish (struct localedef_t *locale,
+ const struct charmap_t *charmap);
+extern void address_output (struct localedef_t *locale,
+ const struct charmap_t *charmap,
+ const char *output_path);
+
+/* Handle LC_TELEPHONE category. */
+extern void telephone_read (struct linereader *ldfile,
+ struct localedef_t *result,
+ const struct charmap_t *charmap,
+ const char *repertoire_name,
+ int ignore_content);
+extern void telephone_finish (struct localedef_t *locale,
+ const struct charmap_t *charmap);
+extern void telephone_output (struct localedef_t *locale,
+ const struct charmap_t *charmap,
+ const char *output_path);
+
+/* Handle LC_MEASUREMENT category. */
+extern void measurement_read (struct linereader *ldfile,
+ struct localedef_t *result,
+ const struct charmap_t *charmap,
+ const char *repertoire_name,
+ int ignore_content);
+extern void measurement_finish (struct localedef_t *locale,
+ const struct charmap_t *charmap);
+extern void measurement_output (struct localedef_t *locale,
+ const struct charmap_t *charmap,
+ const char *output_path);
+
+/* Handle LC_IDENTIFICATION category. */
+extern void identification_read (struct linereader *ldfile,
+ struct localedef_t *result,
+ const struct charmap_t *charmap,
+ const char *repertoire_name,
+ int ignore_content);
+extern void identification_finish (struct localedef_t *locale,
+ const struct charmap_t *charmap);
+extern void identification_output (struct localedef_t *locale,
+ const struct charmap_t *charmap,
+ const char *output_path);
+
+#endif /* locfile.h */
diff --git a/REORG.TODO/locale/programs/repertoire.c b/REORG.TODO/locale/programs/repertoire.c
new file mode 100644
index 0000000000..61f2c055e7
--- /dev/null
+++ b/REORG.TODO/locale/programs/repertoire.c
@@ -0,0 +1,524 @@
+/* Copyright (C) 1998-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <errno.h>
+#include <error.h>
+#include <limits.h>
+#include <obstack.h>
+#include <search.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdint.h>
+
+#include "localedef.h"
+#include "linereader.h"
+#include "charmap.h"
+#include "repertoire.h"
+#include "simple-hash.h"
+
+
+/* Simple keyword hashing for the repertoiremap. */
+static const struct keyword_t *repertoiremap_hash (const char *str,
+ unsigned int len);
+static void repertoire_new_char (struct linereader *lr, hash_table *ht,
+ hash_table *rt, struct obstack *ob,
+ uint32_t value, const char *from,
+ const char *to, int decimal_ellipsis);
+static int repertoire_compare (const void *p1, const void *p2);
+
+/* Already known repertoire maps. */
+static void *known;
+
+/* List of repertoire maps which are not available and which have been
+ reported to not be. */
+static void *unavailable;
+
+
+struct repertoire_t *
+repertoire_read (const char *filename)
+{
+ struct linereader *repfile;
+ struct repertoire_t *result;
+ struct repertoire_t **resultp;
+ struct repertoire_t search;
+ int state;
+ char *from_name = NULL;
+ char *to_name = NULL;
+ enum token_t ellipsis = tok_none;
+
+ search.name = filename;
+ resultp = tfind (&search, &known, &repertoire_compare);
+ if (resultp != NULL)
+ return *resultp;
+
+ /* Determine path. */
+ repfile = lr_open (filename, repertoiremap_hash);
+ if (repfile == NULL)
+ {
+ if (strchr (filename, '/') == NULL)
+ {
+ char *i18npath = getenv ("I18NPATH");
+ if (i18npath != NULL && *i18npath != '\0')
+ {
+ const size_t pathlen = strlen (i18npath);
+ char i18npathbuf[pathlen + 1];
+ char path[strlen (filename) + 1 + pathlen
+ + sizeof ("/repertoiremaps/") - 1];
+ char *next;
+ i18npath = memcpy (i18npathbuf, i18npath, pathlen + 1);
+
+ while (repfile == NULL
+ && (next = strsep (&i18npath, ":")) != NULL)
+ {
+ stpcpy (stpcpy (stpcpy (path, next), "/repertoiremaps/"),
+ filename);
+
+ repfile = lr_open (path, repertoiremap_hash);
+
+ if (repfile == NULL)
+ {
+ stpcpy (stpcpy (stpcpy (path, next), "/"), filename);
+
+ repfile = lr_open (path, repertoiremap_hash);
+ }
+ }
+ }
+
+ if (repfile == NULL)
+ {
+ /* Look in the systems charmap directory. */
+ char *buf = xmalloc (strlen (filename) + 1
+ + sizeof (REPERTOIREMAP_PATH));
+
+ stpcpy (stpcpy (stpcpy (buf, REPERTOIREMAP_PATH), "/"),
+ filename);
+ repfile = lr_open (buf, repertoiremap_hash);
+
+ free (buf);
+ }
+ }
+
+ if (repfile == NULL)
+ return NULL;
+ }
+
+ /* We don't want symbolic names in string to be translated. */
+ repfile->translate_strings = 0;
+
+ /* Allocate room for result. */
+ result = (struct repertoire_t *) xmalloc (sizeof (struct repertoire_t));
+ memset (result, '\0', sizeof (struct repertoire_t));
+
+ result->name = xstrdup (filename);
+
+#define obstack_chunk_alloc malloc
+#define obstack_chunk_free free
+ obstack_init (&result->mem_pool);
+
+ if (init_hash (&result->char_table, 256)
+ || init_hash (&result->reverse_table, 256)
+ || init_hash (&result->seq_table, 256))
+ {
+ free (result);
+ return NULL;
+ }
+
+ /* We use a state machine to describe the charmap description file
+ format. */
+ state = 1;
+ while (1)
+ {
+ /* What's on? */
+ struct token *now = lr_token (repfile, NULL, NULL, NULL, verbose);
+ enum token_t nowtok = now->tok;
+ struct token *arg;
+
+ if (nowtok == tok_eof)
+ break;
+
+ switch (state)
+ {
+ case 1:
+ /* We haven't yet read any character definition. This is where
+ we accept escape_char and comment_char definitions. */
+ if (nowtok == tok_eol)
+ /* Ignore empty lines. */
+ continue;
+
+ if (nowtok == tok_escape_char || nowtok == tok_comment_char)
+ {
+ /* We know that we need an argument. */
+ arg = lr_token (repfile, NULL, NULL, NULL, verbose);
+
+ if (arg->tok != tok_ident)
+ {
+ lr_error (repfile, _("syntax error in prolog: %s"),
+ _("bad argument"));
+
+ lr_ignore_rest (repfile, 0);
+ continue;
+ }
+
+ if (arg->val.str.lenmb != 1)
+ {
+ lr_error (repfile, _("\
+argument to <%s> must be a single character"),
+ nowtok == tok_escape_char ? "escape_char"
+ : "comment_char");
+
+ lr_ignore_rest (repfile, 0);
+ continue;
+ }
+
+ if (nowtok == tok_escape_char)
+ repfile->escape_char = *arg->val.str.startmb;
+ else
+ repfile->comment_char = *arg->val.str.startmb;
+
+ lr_ignore_rest (repfile, 1);
+ continue;
+ }
+
+ if (nowtok == tok_charids)
+ {
+ lr_ignore_rest (repfile, 1);
+
+ state = 2;
+ continue;
+ }
+
+ /* Otherwise we start reading the character definitions. */
+ state = 2;
+ /* FALLTHROUGH */
+
+ case 2:
+ /* We are now are in the body. Each line
+ must have the format "%s %s %s\n" or "%s...%s %s %s\n". */
+ if (nowtok == tok_eol)
+ /* Ignore empty lines. */
+ continue;
+
+ if (nowtok == tok_end)
+ {
+ state = 90;
+ continue;
+ }
+
+ if (nowtok != tok_bsymbol)
+ {
+ lr_error (repfile,
+ _("syntax error in repertoire map definition: %s"),
+ _("no symbolic name given"));
+
+ lr_ignore_rest (repfile, 0);
+ continue;
+ }
+
+ /* If the previous line was not completely correct free the
+ used memory. */
+ if (from_name != NULL)
+ obstack_free (&result->mem_pool, from_name);
+
+ from_name = (char *) obstack_copy0 (&result->mem_pool,
+ now->val.str.startmb,
+ now->val.str.lenmb);
+ to_name = NULL;
+
+ state = 3;
+ continue;
+
+ case 3:
+ /* We have two possibilities: We can see an ellipsis or an
+ encoding value. */
+ if (nowtok == tok_ellipsis3 || nowtok == tok_ellipsis4
+ || nowtok == tok_ellipsis2)
+ {
+ ellipsis = nowtok;
+ state = 4;
+ continue;
+ }
+ /* FALLTHROUGH */
+
+ case 5:
+ /* We expect a value of the form <Uxxxx> or <Uxxxxxxxx> where
+ the xxx mean a hexadecimal value. */
+ state = 2;
+
+ errno = 0;
+ if (nowtok != tok_ucs4)
+ {
+ lr_error (repfile,
+ _("syntax error in repertoire map definition: %s"),
+ _("no <Uxxxx> or <Uxxxxxxxx> value given"));
+
+ lr_ignore_rest (repfile, 0);
+ continue;
+ }
+
+ /* We've found a new valid definition. */
+ repertoire_new_char (repfile, &result->char_table,
+ &result->reverse_table, &result->mem_pool,
+ now->val.ucs4, from_name, to_name,
+ ellipsis != tok_ellipsis2);
+
+ /* Ignore the rest of the line. */
+ lr_ignore_rest (repfile, 0);
+
+ from_name = NULL;
+ to_name = NULL;
+
+ continue;
+
+ case 4:
+ if (nowtok != tok_bsymbol)
+ {
+ lr_error (repfile,
+ _("syntax error in repertoire map definition: %s"),
+ _("no symbolic name given for end of range"));
+
+ lr_ignore_rest (repfile, 0);
+ state = 2;
+ continue;
+ }
+
+ /* Copy the to-name in a safe place. */
+ to_name = (char *) obstack_copy0 (&result->mem_pool,
+ repfile->token.val.str.startmb,
+ repfile->token.val.str.lenmb);
+
+ state = 5;
+ continue;
+
+ case 90:
+ if (nowtok != tok_charids)
+ lr_error (repfile, _("\
+%1$s: definition does not end with `END %1$s'"), "CHARIDS");
+
+ lr_ignore_rest (repfile, nowtok == tok_charids);
+ break;
+ }
+
+ break;
+ }
+
+ if (state != 2 && state != 90 && !be_quiet)
+ WITH_CUR_LOCALE (error (0, 0, _("%s: premature end of file"),
+ repfile->fname));
+
+ lr_close (repfile);
+
+ if (tsearch (result, &known, &repertoire_compare) == NULL)
+ /* Something went wrong. */
+ WITH_CUR_LOCALE (error (0, errno, _("cannot save new repertoire map")));
+
+ return result;
+}
+
+
+void
+repertoire_complain (const char *name)
+{
+ if (tfind (name, &unavailable, (__compar_fn_t) strcmp) == NULL)
+ {
+ WITH_CUR_LOCALE (error (0, errno, _("\
+repertoire map file `%s' not found"), name));
+
+ /* Remember that we reported this map. */
+ tsearch (name, &unavailable, (__compar_fn_t) strcmp);
+ }
+}
+
+
+static int
+repertoire_compare (const void *p1, const void *p2)
+{
+ struct repertoire_t *r1 = (struct repertoire_t *) p1;
+ struct repertoire_t *r2 = (struct repertoire_t *) p2;
+
+ return strcmp (r1->name, r2->name);
+}
+
+
+static const struct keyword_t *
+repertoiremap_hash (const char *str, unsigned int len)
+{
+ static const struct keyword_t wordlist[] =
+ {
+ {"escape_char", tok_escape_char, 0},
+ {"comment_char", tok_comment_char, 0},
+ {"CHARIDS", tok_charids, 0},
+ {"END", tok_end, 0},
+ };
+
+ if (len == 11 && memcmp (wordlist[0].name, str, 11) == 0)
+ return &wordlist[0];
+ if (len == 12 && memcmp (wordlist[1].name, str, 12) == 0)
+ return &wordlist[1];
+ if (len == 7 && memcmp (wordlist[2].name, str, 7) == 0)
+ return &wordlist[2];
+ if (len == 3 && memcmp (wordlist[3].name, str, 3) == 0)
+ return &wordlist[3];
+
+ return NULL;
+}
+
+
+static void
+repertoire_new_char (struct linereader *lr, hash_table *ht, hash_table *rt,
+ struct obstack *ob, uint32_t value, const char *from,
+ const char *to, int decimal_ellipsis)
+{
+ char *from_end;
+ char *to_end;
+ const char *cp;
+ char *buf = NULL;
+ int prefix_len, len1, len2;
+ unsigned long int from_nr, to_nr, cnt;
+
+ if (to == NULL)
+ {
+ insert_entry (ht, from, strlen (from),
+ (void *) (unsigned long int) value);
+ /* Please note that it isn't a bug if a symbol is defined more
+ than once. All later definitions are simply discarded. */
+
+ insert_entry (rt, obstack_copy (ob, &value, sizeof (value)),
+ sizeof (value), (void *) from);
+
+ return;
+ }
+
+ /* We have a range: the names must have names with equal prefixes
+ and an equal number of digits, where the second number is greater
+ or equal than the first. */
+ len1 = strlen (from);
+ len2 = strlen (to);
+
+ if (len1 != len2)
+ {
+ invalid_range:
+ lr_error (lr, _("invalid names for character range"));
+ return;
+ }
+
+ cp = &from[len1 - 1];
+ if (decimal_ellipsis)
+ while (isdigit (*cp) && cp >= from)
+ --cp;
+ else
+ while (isxdigit (*cp) && cp >= from)
+ {
+ if (!isdigit (*cp) && !isupper (*cp))
+ lr_error (lr, _("\
+hexadecimal range format should use only capital characters"));
+ --cp;
+ }
+
+ prefix_len = (cp - from) + 1;
+
+ if (cp == &from[len1 - 1] || strncmp (from, to, prefix_len) != 0)
+ goto invalid_range;
+
+ errno = 0;
+ from_nr = strtoul (&from[prefix_len], &from_end, decimal_ellipsis ? 10 : 16);
+ if (*from_end != '\0' || (from_nr == ULONG_MAX && errno == ERANGE)
+ || ((to_nr = strtoul (&to[prefix_len], &to_end,
+ decimal_ellipsis ? 10 : 16)) == ULONG_MAX
+ && errno == ERANGE)
+ || *to_end != '\0')
+ {
+ lr_error (lr, _("<%s> and <%s> are invalid names for range"),
+ from, to);
+ return;
+ }
+
+ if (from_nr > to_nr)
+ {
+ lr_error (lr, _("upper limit in range is smaller than lower limit"));
+ return;
+ }
+
+ for (cnt = from_nr; cnt <= to_nr; ++cnt)
+ {
+ uint32_t this_value = value + (cnt - from_nr);
+
+ obstack_printf (ob, decimal_ellipsis ? "%.*s%0*ld" : "%.*s%0*lX",
+ prefix_len, from, len1 - prefix_len, cnt);
+ obstack_1grow (ob, '\0');
+
+ insert_entry (ht, buf, len1,
+ (void *) (unsigned long int) this_value);
+ /* Please note we don't examine the return value since it is no error
+ if we have two definitions for a symbol. */
+
+ insert_entry (rt, obstack_copy (ob, &this_value, sizeof (this_value)),
+ sizeof (this_value), (void *) from);
+ }
+}
+
+
+uint32_t
+repertoire_find_value (const struct repertoire_t *rep, const char *name,
+ size_t len)
+{
+ void *result;
+
+ if (rep == NULL)
+ return ILLEGAL_CHAR_VALUE;
+
+ if (find_entry ((hash_table *) &rep->char_table, name, len, &result) < 0)
+ return ILLEGAL_CHAR_VALUE;
+
+ return (uint32_t) ((unsigned long int) result);
+}
+
+
+const char *
+repertoire_find_symbol (const struct repertoire_t *rep, uint32_t ucs)
+{
+ void *result;
+
+ if (rep == NULL)
+ return NULL;
+
+ if (find_entry ((hash_table *) &rep->reverse_table, &ucs, sizeof (ucs),
+ &result) < 0)
+ return NULL;
+
+ return (const char *) result;
+}
+
+
+struct charseq *
+repertoire_find_seq (const struct repertoire_t *rep, uint32_t ucs)
+{
+ void *result;
+
+ if (rep == NULL)
+ return NULL;
+
+ if (find_entry ((hash_table *) &rep->seq_table, &ucs, sizeof (ucs),
+ &result) < 0)
+ return NULL;
+
+ return (struct charseq *) result;
+}
diff --git a/REORG.TODO/locale/programs/repertoire.h b/REORG.TODO/locale/programs/repertoire.h
new file mode 100644
index 0000000000..f07ffcf1f4
--- /dev/null
+++ b/REORG.TODO/locale/programs/repertoire.h
@@ -0,0 +1,64 @@
+/* Copyright (C) 1998-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef _REPERTOIREMAP_H
+#define _REPERTOIREMAP_H 1
+
+#include <obstack.h>
+#include <stdint.h>
+
+#include "charmap.h"
+#include "simple-hash.h"
+
+struct repertoire_t
+{
+ const char *name;
+ struct obstack mem_pool;
+ hash_table char_table;
+ hash_table reverse_table;
+ hash_table seq_table;
+};
+
+
+/* We need one value to mark the error case. Let's use 0xffffffff.
+ I.e., it is placed in the last page of ISO 10646. For now only the
+ first is used and we have plenty of room. */
+#define ILLEGAL_CHAR_VALUE ((uint32_t) 0xffffffffu)
+
+/* Another value is needed to signal that a value is not yet determined. */
+#define UNINITIALIZED_CHAR_VALUE ((uint32_t) 0xfffffffeu)
+
+
+/* Prototypes for repertoire map handling functions. */
+extern struct repertoire_t *repertoire_read (const char *filename);
+
+/* Report missing repertoire map. */
+extern void repertoire_complain (const char *name);
+
+/* Return UCS4 value of character with given NAME. */
+extern uint32_t repertoire_find_value (const struct repertoire_t *repertoire,
+ const char *name, size_t len);
+
+/* Return symbol for given UCS4 value. */
+extern const char *repertoire_find_symbol (const struct repertoire_t *repertoire,
+ uint32_t ucs);
+
+/* Query the has table to memoize mapping from UCS4 to byte sequences. */
+extern struct charseq *repertoire_find_seq (const struct repertoire_t *rep,
+ uint32_t ucs);
+
+#endif /* repertoiremap.h */
diff --git a/REORG.TODO/locale/programs/simple-hash.c b/REORG.TODO/locale/programs/simple-hash.c
new file mode 100644
index 0000000000..5e62e249a6
--- /dev/null
+++ b/REORG.TODO/locale/programs/simple-hash.c
@@ -0,0 +1,291 @@
+/* Implement simple hashing table with string based keys.
+ Copyright (C) 1994-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, October 1994.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+#include <obstack.h>
+
+#ifdef HAVE_VALUES_H
+# include <values.h>
+#endif
+
+#include "simple-hash.h"
+
+#define obstack_chunk_alloc malloc
+#define obstack_chunk_free free
+
+#ifndef BITSPERBYTE
+# define BITSPERBYTE 8
+#endif
+
+#define hashval_t uint32_t
+#include "hashval.h"
+
+#include <programs/xmalloc.h>
+
+typedef struct hash_entry
+{
+ unsigned long used;
+ const void *key;
+ size_t keylen;
+ void *data;
+ struct hash_entry *next;
+}
+hash_entry;
+
+/* Prototypes for local functions. */
+static void insert_entry_2 (hash_table *htab, const void *key, size_t keylen,
+ unsigned long hval, size_t idx, void *data);
+static size_t lookup (const hash_table *htab, const void *key, size_t keylen,
+ unsigned long int hval);
+static int is_prime (unsigned long int candidate);
+
+
+int
+init_hash (hash_table *htab, unsigned long int init_size)
+{
+ /* We need the size to be a prime. */
+ init_size = next_prime (init_size);
+
+ /* Initialize the data structure. */
+ htab->size = init_size;
+ htab->filled = 0;
+ htab->first = NULL;
+ htab->table = (void *) xcalloc (init_size + 1, sizeof (hash_entry));
+ if (htab->table == NULL)
+ return -1;
+
+ obstack_init (&htab->mem_pool);
+
+ return 0;
+}
+
+
+int
+delete_hash (hash_table *htab)
+{
+ free (htab->table);
+ obstack_free (&htab->mem_pool, NULL);
+ return 0;
+}
+
+
+int
+insert_entry (hash_table *htab, const void *key, size_t keylen, void *data)
+{
+ unsigned long int hval = compute_hashval (key, keylen);
+ hash_entry *table = (hash_entry *) htab->table;
+ size_t idx = lookup (htab, key, keylen, hval);
+
+ if (table[idx].used)
+ /* We don't want to overwrite the old value. */
+ return -1;
+ else
+ {
+ /* An empty bucket has been found. */
+ insert_entry_2 (htab, obstack_copy (&htab->mem_pool, key, keylen),
+ keylen, hval, idx, data);
+ return 0;
+ }
+}
+
+static void
+insert_entry_2 (hash_table *htab, const void *key, size_t keylen,
+ unsigned long int hval, size_t idx, void *data)
+{
+ hash_entry *table = (hash_entry *) htab->table;
+
+ table[idx].used = hval;
+ table[idx].key = key;
+ table[idx].keylen = keylen;
+ table[idx].data = data;
+
+ /* List the new value in the list. */
+ if ((hash_entry *) htab->first == NULL)
+ {
+ table[idx].next = &table[idx];
+ htab->first = &table[idx];
+ }
+ else
+ {
+ table[idx].next = ((hash_entry *) htab->first)->next;
+ ((hash_entry *) htab->first)->next = &table[idx];
+ htab->first = &table[idx];
+ }
+
+ ++htab->filled;
+ if (100 * htab->filled > 75 * htab->size)
+ {
+ /* Table is filled more than 75%. Resize the table.
+ Experiments have shown that for best performance, this threshold
+ must lie between 40% and 85%. */
+ unsigned long int old_size = htab->size;
+
+ htab->size = next_prime (htab->size * 2);
+ htab->filled = 0;
+ htab->first = NULL;
+ htab->table = (void *) xcalloc (1 + htab->size, sizeof (hash_entry));
+
+ for (idx = 1; idx <= old_size; ++idx)
+ if (table[idx].used)
+ insert_entry_2 (htab, table[idx].key, table[idx].keylen,
+ table[idx].used,
+ lookup (htab, table[idx].key, table[idx].keylen,
+ table[idx].used),
+ table[idx].data);
+
+ free (table);
+ }
+}
+
+
+int
+find_entry (const hash_table *htab, const void *key, size_t keylen,
+ void **result)
+{
+ hash_entry *table = (hash_entry *) htab->table;
+ size_t idx = lookup (htab, key, keylen, compute_hashval (key, keylen));
+
+ if (table[idx].used == 0)
+ return -1;
+
+ *result = table[idx].data;
+ return 0;
+}
+
+
+int
+set_entry (hash_table *htab, const void *key, size_t keylen, void *newval)
+{
+ hash_entry *table = (hash_entry *) htab->table;
+ size_t idx = lookup (htab, key, keylen, compute_hashval (key, keylen));
+
+ if (table[idx].used == 0)
+ return -1;
+
+ table[idx].data = newval;
+ return 0;
+}
+
+
+int
+iterate_table (const hash_table *htab, void **ptr, const void **key,
+ size_t *keylen, void **data)
+{
+ if (*ptr == NULL)
+ {
+ if (htab->first == NULL)
+ return -1;
+ *ptr = (void *) ((hash_entry *) htab->first)->next;
+ }
+ else
+ {
+ if (*ptr == htab->first)
+ return -1;
+ *ptr = (void *) (((hash_entry *) *ptr)->next);
+ }
+
+ *key = ((hash_entry *) *ptr)->key;
+ *keylen = ((hash_entry *) *ptr)->keylen;
+ *data = ((hash_entry *) *ptr)->data;
+ return 0;
+}
+
+
+/* References:
+ [Aho,Sethi,Ullman] Compilers: Principles, Techniques and Tools, 1986
+ [Knuth] The Art of Computer Programming, part3 (6.4) */
+
+static size_t
+lookup (const hash_table *htab, const void *key, size_t keylen,
+ unsigned long int hval)
+{
+ unsigned long int hash;
+ size_t idx;
+ hash_entry *table = (hash_entry *) htab->table;
+
+ /* First hash function: simply take the modul but prevent zero. */
+ hash = 1 + hval % htab->size;
+
+ idx = hash;
+
+ if (table[idx].used)
+ {
+ if (table[idx].used == hval && table[idx].keylen == keylen
+ && memcmp (table[idx].key, key, keylen) == 0)
+ return idx;
+
+ /* Second hash function as suggested in [Knuth]. */
+ hash = 1 + hval % (htab->size - 2);
+
+ do
+ {
+ if (idx <= hash)
+ idx = htab->size + idx - hash;
+ else
+ idx -= hash;
+
+ /* If entry is found use it. */
+ if (table[idx].used == hval && table[idx].keylen == keylen
+ && memcmp (table[idx].key, key, keylen) == 0)
+ return idx;
+ }
+ while (table[idx].used);
+ }
+ return idx;
+}
+
+
+unsigned long int
+next_prime (unsigned long int seed)
+{
+ /* Make it definitely odd. */
+ seed |= 1;
+
+ while (!is_prime (seed))
+ seed += 2;
+
+ return seed;
+}
+
+
+static int
+is_prime (unsigned long int candidate)
+{
+ /* No even number and none less than 10 will be passed here. */
+ unsigned long int divn = 3;
+ unsigned long int sq = divn * divn;
+
+ while (sq < candidate && candidate % divn != 0)
+ {
+ ++divn;
+ sq += 4 * divn;
+ ++divn;
+ }
+
+ return candidate % divn != 0;
+}
diff --git a/REORG.TODO/locale/programs/simple-hash.h b/REORG.TODO/locale/programs/simple-hash.h
new file mode 100644
index 0000000000..92ce9508e9
--- /dev/null
+++ b/REORG.TODO/locale/programs/simple-hash.h
@@ -0,0 +1,53 @@
+/* Copyright (C) 1995-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef _SIMPLE_HASH_H
+#define _SIMPLE_HASH_H
+
+#include <inttypes.h>
+#include <obstack.h>
+#include <stdint.h>
+
+typedef struct hash_table
+{
+ unsigned long int size;
+ unsigned long int filled;
+ void *first;
+ void *table;
+ struct obstack mem_pool;
+}
+hash_table;
+
+
+extern int init_hash (hash_table *htab, unsigned long int init_size) __THROW;
+extern int delete_hash (hash_table *htab) __THROW;
+extern int insert_entry (hash_table *htab, const void *key, size_t keylen,
+ void *data) __THROW;
+extern int find_entry (const hash_table *htab, const void *key, size_t keylen,
+ void **result) __THROW;
+extern int set_entry (hash_table *htab, const void *key, size_t keylen,
+ void *newval) __THROW;
+
+extern int iterate_table (const hash_table *htab, void **ptr,
+ const void **key, size_t *keylen, void **data)
+ __THROW;
+
+extern uint32_t compute_hashval (const void *key, size_t keylen)
+ __THROW;
+extern unsigned long int next_prime (unsigned long int seed) __THROW;
+
+#endif /* simple-hash.h */
diff --git a/REORG.TODO/locale/programs/xmalloc.c b/REORG.TODO/locale/programs/xmalloc.c
new file mode 100644
index 0000000000..92468b8c7f
--- /dev/null
+++ b/REORG.TODO/locale/programs/xmalloc.c
@@ -0,0 +1,106 @@
+/* xmalloc.c -- malloc with out of memory checking
+ Copyright (C) 1990-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>. */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#define VOID void
+
+#include <sys/types.h>
+
+#if STDC_HEADERS || _LIBC
+#include <stdlib.h>
+static VOID *fixup_null_alloc (size_t n) __THROW;
+VOID *xmalloc (size_t n) __THROW;
+VOID *xcalloc (size_t n, size_t s) __THROW;
+VOID *xrealloc (VOID *p, size_t n) __THROW;
+#else
+VOID *calloc ();
+VOID *malloc ();
+VOID *realloc ();
+void free ();
+#endif
+
+#include <libintl.h>
+#include "error.h"
+
+#ifndef _
+# define _(str) gettext (str)
+#endif
+
+#ifndef EXIT_FAILURE
+#define EXIT_FAILURE 4
+#endif
+
+/* Exit value when the requested amount of memory is not available.
+ The caller may set it to some other value. */
+int xmalloc_exit_failure = EXIT_FAILURE;
+
+static VOID *
+fixup_null_alloc (size_t n)
+{
+ VOID *p;
+
+ p = 0;
+ if (n == 0)
+ p = malloc ((size_t) 1);
+ if (p == 0)
+ error (xmalloc_exit_failure, 0, _("memory exhausted"));
+ return p;
+}
+
+/* Allocate N bytes of memory dynamically, with error checking. */
+
+VOID *
+xmalloc (size_t n)
+{
+ VOID *p;
+
+ p = malloc (n);
+ if (p == 0)
+ p = fixup_null_alloc (n);
+ return p;
+}
+
+/* Allocate memory for N elements of S bytes, with error checking. */
+
+VOID *
+xcalloc (size_t n, size_t s)
+{
+ VOID *p;
+
+ p = calloc (n, s);
+ if (p == 0)
+ p = fixup_null_alloc (n);
+ return p;
+}
+
+/* Change the size of an allocated block of memory P to N bytes,
+ with error checking.
+ If P is NULL, run xmalloc. */
+
+VOID *
+xrealloc (VOID *p, size_t n)
+{
+ if (p == 0)
+ return xmalloc (n);
+ p = realloc (p, n);
+ if (p == 0)
+ p = fixup_null_alloc (n);
+ return p;
+}
diff --git a/REORG.TODO/locale/programs/xstrdup.c b/REORG.TODO/locale/programs/xstrdup.c
new file mode 100644
index 0000000000..dcd89b160f
--- /dev/null
+++ b/REORG.TODO/locale/programs/xstrdup.c
@@ -0,0 +1,36 @@
+/* xstrdup.c -- copy a string with out of memory checking
+ Copyright (C) 1990-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#if defined STDC_HEADERS || defined HAVE_STRING_H || _LIBC
+# include <string.h>
+#else
+# include <strings.h>
+#endif
+void *xmalloc (size_t n) __THROW;
+char *xstrdup (char *string) __THROW;
+
+/* Return a newly allocated copy of STRING. */
+
+char *
+xstrdup (char *string)
+{
+ return strcpy (xmalloc (strlen (string) + 1), string);
+}