aboutsummaryrefslogtreecommitdiff
path: root/locale/programs
diff options
context:
space:
mode:
Diffstat (limited to 'locale/programs')
-rw-r--r--locale/programs/charmap-kw.gperf40
-rw-r--r--locale/programs/charmap-kw.h117
-rw-r--r--locale/programs/charmap.c593
-rw-r--r--locale/programs/charset.c132
-rw-r--r--locale/programs/charset.h61
-rw-r--r--locale/programs/config.h33
-rw-r--r--locale/programs/ctypedump.c163
-rw-r--r--locale/programs/ld-collate.c1549
-rw-r--r--locale/programs/ld-ctype.c1310
-rw-r--r--locale/programs/ld-messages.c237
-rw-r--r--locale/programs/ld-monetary.c385
-rw-r--r--locale/programs/ld-numeric.c208
-rw-r--r--locale/programs/ld-time.c310
-rw-r--r--locale/programs/linereader.c579
-rw-r--r--locale/programs/linereader.h158
-rw-r--r--locale/programs/locale.c544
-rw-r--r--locale/programs/localedef.c461
-rw-r--r--locale/programs/locales.h207
-rw-r--r--locale/programs/locfile-kw.gperf99
-rw-r--r--locale/programs/locfile-kw.h211
-rw-r--r--locale/programs/locfile-token.h147
-rw-r--r--locale/programs/locfile.c979
-rw-r--r--locale/programs/locfile.h75
-rw-r--r--locale/programs/stringtrans.c146
-rw-r--r--locale/programs/stringtrans.h38
25 files changed, 8782 insertions, 0 deletions
diff --git a/locale/programs/charmap-kw.gperf b/locale/programs/charmap-kw.gperf
new file mode 100644
index 0000000000..8e00103882
--- /dev/null
+++ b/locale/programs/charmap-kw.gperf
@@ -0,0 +1,40 @@
+%{
+/* Copyright (C) 1995, 1996 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB. If
+not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
+
+#include <string.h>
+
+#include "locfile-token.h"
+%}
+struct keyword_t ;
+%%
+code_set_name, tok_code_set_name, 1
+mb_cur_max, tok_mb_cur_max, 1
+mb_cur_min, tok_mb_cur_min, 1
+escape_char, tok_escape_char, 1
+comment_char, tok_comment_char, 1
+g0esc, tok_g0esc, 1
+g1esc, tok_g1esc, 1
+g2esc, tok_g2esc, 1
+g3esc, tok_g3esc, 1
+CHARMAP, tok_charmap, 0
+END, tok_end, 0
+WIDTH, tok_width, 0
+WIDTH_VARIABLE, tok_width_variable, 0
+WIDTH_DEFAULT, tok_width_default, 0
diff --git a/locale/programs/charmap-kw.h b/locale/programs/charmap-kw.h
new file mode 100644
index 0000000000..93326d0382
--- /dev/null
+++ b/locale/programs/charmap-kw.h
@@ -0,0 +1,117 @@
+/* C code produced by gperf version 2.5 (GNU C++ version) */
+/* Command-line: gperf -acCgopt -k1,2,5,$ -N charmap_hash programs/charmap-kw.gperf */
+/* Copyright (C) 1995, 1996 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB. If
+not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
+
+#include <string.h>
+
+#include "locfile-token.h"
+struct keyword_t ;
+
+#define TOTAL_KEYWORDS 14
+#define MIN_WORD_LENGTH 3
+#define MAX_WORD_LENGTH 14
+#define MIN_HASH_VALUE 3
+#define MAX_HASH_VALUE 25
+/* maximum key range = 23, duplicates = 0 */
+
+#ifdef __GNUC__
+inline
+#endif
+static unsigned int
+hash (register const char *str, register int len)
+{
+ static const unsigned char asso_values[] =
+ {
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 26, 26, 26, 26, 14, 10,
+ 15, 4, 26, 26, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 26, 26, 26, 0, 0, 0,
+ 26, 26, 0, 0, 26, 26, 26, 0, 0, 26,
+ 0, 26, 26, 26, 5, 26, 26, 0, 26, 26,
+ 26, 26, 26, 26, 26, 0, 26, 26, 0, 0,
+ 26, 0, 26, 0, 26, 26, 26, 26, 26, 0,
+ 15, 0, 0, 26, 0, 0, 26, 0, 26, 26,
+ 0, 26, 26, 26, 26, 26, 26, 26,
+ };
+ register int hval = len;
+
+ switch (hval)
+ {
+ default:
+ case 5:
+ hval += asso_values[(int) str[4]];
+ case 4:
+ case 3:
+ case 2:
+ hval += asso_values[(int) str[1]];
+ case 1:
+ hval += asso_values[(int) str[0]];
+ break;
+ }
+ return hval + asso_values[(int) str[len - 1]];
+}
+
+#ifdef __GNUC__
+inline
+#endif
+const struct keyword_t *
+charmap_hash (register const char *str, register int len)
+{
+ static const struct keyword_t wordlist[] =
+ {
+ {"",}, {"",}, {"",},
+ {"END", tok_end, 0},
+ {"",},
+ {"WIDTH", tok_width, 0},
+ {"",},
+ {"CHARMAP", tok_charmap, 0},
+ {"",},
+ {"g3esc", tok_g3esc, 1},
+ {"mb_cur_max", tok_mb_cur_max, 1},
+ {"escape_char", tok_escape_char, 1},
+ {"comment_char", tok_comment_char, 1},
+ {"code_set_name", tok_code_set_name, 1},
+ {"WIDTH_VARIABLE", tok_width_variable, 0},
+ {"g1esc", tok_g1esc, 1},
+ {"",}, {"",},
+ {"WIDTH_DEFAULT", tok_width_default, 0},
+ {"g0esc", tok_g0esc, 1},
+ {"g2esc", tok_g2esc, 1},
+ {"",}, {"",}, {"",}, {"",},
+ {"mb_cur_min", tok_mb_cur_min, 1},
+ };
+
+ if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
+ {
+ register int key = hash (str, len);
+
+ if (key <= MAX_HASH_VALUE && key >= 0)
+ {
+ register const char *s = wordlist[key].name;
+
+ if (*s == *str && !strncmp (str + 1, s + 1, len - 1))
+ return &wordlist[key];
+ }
+ }
+ return 0;
+}
diff --git a/locale/programs/charmap.c b/locale/programs/charmap.c
new file mode 100644
index 0000000000..2b71821ec0
--- /dev/null
+++ b/locale/programs/charmap.c
@@ -0,0 +1,593 @@
+/* Copyright (C) 1996 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB. If
+not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <ctype.h>
+#include <errno.h>
+#include <libintl.h>
+#include <obstack.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "error.h"
+#include "linereader.h"
+#include "charset.h"
+
+
+/* Uncomment following line for production version. */
+/* define NDEBUG 1 */
+#include <assert.h>
+
+
+/* Define the lookup function. */
+#include "charmap-kw.h"
+
+
+void *xmalloc (size_t __n);
+
+/* Prototypes for local functions. */
+static struct charset_t *parse_charmap (const char *filename);
+
+
+
+struct charset_t *
+charmap_read (const char *filename)
+{
+ const char *pathnfile;
+ struct charset_t *result = NULL;
+
+ if (filename != NULL)
+ {
+ if (euidaccess (filename, R_OK) >= 0)
+ pathnfile = filename;
+ else
+ {
+ char *cp = xmalloc (strlen (filename) + sizeof CHARMAP_PATH + 1);
+ stpcpy (stpcpy (stpcpy (cp, CHARMAP_PATH), "/"), filename);
+
+ pathnfile = (const char *) cp;
+ }
+
+ result = parse_charmap (pathnfile);
+
+ if (result == NULL)
+ error (0, errno, _("character map file `%s' not found"), filename);
+ }
+
+ if (result == NULL)
+ {
+ pathnfile = CHARMAP_PATH "/" DEFAULT_CHARMAP;
+
+ result = parse_charmap (pathnfile);
+
+ if (result == NULL)
+ error (4, errno, _("default character map file `%s' not found"),
+ DEFAULT_CHARMAP);
+ }
+
+ return result;
+}
+
+
+static struct charset_t *
+parse_charmap (const char *filename)
+{
+ struct linereader *cmfile;
+ struct charset_t *result;
+ int state;
+ enum token_t expected_tok = tok_error;
+ const char *expected_str = NULL;
+ char *from_name = NULL;
+ char *to_name = NULL;
+
+ /* Determine path. */
+ cmfile = lr_open (filename, charmap_hash);
+ if (cmfile == NULL)
+ {
+ if (strchr (filename, '/') == NULL)
+ {
+ /* Look in the systems charmap directory. */
+ char *buf = xmalloc (strlen (filename) + 1 + sizeof (CHARMAP_PATH));
+
+ stpcpy (stpcpy (stpcpy (buf, CHARMAP_PATH), "/"), filename);
+ cmfile = lr_open (buf, charmap_hash);
+
+ if (cmfile == NULL)
+ free (buf);
+ }
+
+ if (cmfile == NULL)
+ return NULL;
+ }
+
+ /* Allocate room for result. */
+ result = (struct charset_t *) xmalloc (sizeof (struct charset_t));
+ memset (result, '\0', sizeof (struct charset_t));
+
+#define obstack_chunk_alloc xmalloc
+#define obstack_chunk_free free
+ obstack_init (&result->mem_pool);
+
+ if (init_hash (&result->char_table, 256))
+ {
+ free (result);
+ return NULL;
+ }
+
+ /* We use a state machine to describe the charmap description file
+ format. */
+ state = 1;
+ while (1)
+ {
+ /* What's on? */
+ struct token *now = lr_token (cmfile, NULL);
+ enum token_t nowtok = now->tok;
+ struct token *arg;
+
+ if (nowtok == tok_eof)
+ break;
+
+ switch (state)
+ {
+ case 1:
+ /* The beginning. We expect the special declarations, EOL or
+ `CHARMAP'. */
+ if (nowtok == tok_eol)
+ /* Ignore empty lines. */
+ continue;
+
+ if (nowtok == tok_charmap)
+ {
+ from_name = NULL;
+ to_name = NULL;
+
+ /* We have to set up the real work. Fill in some
+ default values. */
+ if (result->mb_cur_max == 0)
+ result->mb_cur_max = 1;
+ if (result->mb_cur_min == 0)
+ result->mb_cur_min = result->mb_cur_max;
+ if (result->mb_cur_min > result->mb_cur_max)
+ {
+ error (0, 0, _("\
+%s: <mb_cur_max> must be greater than <mb_cur_min>\n"),
+ cmfile->fname);
+
+ result->mb_cur_min = result->mb_cur_max;
+ }
+
+ lr_ignore_rest (cmfile, 1);
+
+ state = 2;
+ continue;
+ }
+
+ if (nowtok != tok_code_set_name && nowtok != tok_mb_cur_max
+ && nowtok != tok_mb_cur_min && nowtok != tok_escape_char
+ && nowtok != tok_comment_char && nowtok != tok_g0esc
+ && nowtok != tok_g1esc && nowtok != tok_g2esc
+ && nowtok != tok_g3esc)
+ {
+ lr_error (cmfile, _("syntax error in prolog: %s"),
+ _("illegal definition"));
+
+ lr_ignore_rest (cmfile, 0);
+ continue;
+ }
+
+ /* We know that we need an argument. */
+ arg = lr_token (cmfile, NULL);
+
+ switch (nowtok)
+ {
+ case tok_code_set_name:
+ if (arg->tok != tok_ident)
+ {
+ badarg:
+ lr_error (cmfile, _("syntax error in prolog: %s"),
+ _("bad argument"));
+
+ lr_ignore_rest (cmfile, 0);
+ continue;
+ }
+
+ result->code_set_name = obstack_copy0 (&result->mem_pool,
+ arg->val.str.start,
+ arg->val.str.len);
+
+ lr_ignore_rest (cmfile, 1);
+ continue;
+
+ case tok_mb_cur_max:
+ case tok_mb_cur_min:
+ if (arg->tok != tok_number)
+ goto badarg;
+
+ if (arg->val.num < 1 || arg->val.num > 4)
+ {
+ lr_error (cmfile,
+ _("value for <%s> must lie between 1 and 4"),
+ nowtok == tok_mb_cur_min ? "mb_cur_min"
+ : "mb_cur_max");
+
+ lr_ignore_rest (cmfile, 0);
+ continue;
+ }
+ if ((nowtok == tok_mb_cur_max && result->mb_cur_min != 0
+ && arg->val.num < result->mb_cur_min)
+ || (nowtok == tok_mb_cur_min && result->mb_cur_max != 0
+ && arg->val.num > result->mb_cur_max))
+ {
+ lr_error (cmfile, _("\
+value of <mb_cur_max> must be greater than the value of <mb_cur_min>"));
+
+ lr_ignore_rest (cmfile, 0);
+ continue;
+ }
+
+ if (nowtok == tok_mb_cur_max)
+ result->mb_cur_max = arg->val.num;
+ else
+ result->mb_cur_min = arg->val.num;
+
+ lr_ignore_rest (cmfile, 1);
+ continue;
+
+ case tok_escape_char:
+ case tok_comment_char:
+ if (arg->tok != tok_ident)
+ goto badarg;
+
+ if (arg->val.str.len != 1)
+ {
+ lr_error (cmfile, _("\
+argument to <%s> must be a single character"),
+ nowtok == tok_escape_char ? "escape_char"
+ : "comment_char");
+
+ lr_ignore_rest (cmfile, 0);
+ continue;
+ }
+
+ if (nowtok == tok_escape_char)
+ cmfile->escape_char = *arg->val.str.start;
+ else
+ cmfile->comment_char = *arg->val.str.start;
+
+ lr_ignore_rest (cmfile, 1);
+ continue;
+
+ case tok_g0esc:
+ case tok_g1esc:
+ case tok_g2esc:
+ case tok_g3esc:
+ lr_ignore_rest (cmfile, 0); /* XXX */
+ continue;
+
+ default:
+ /* Cannot happen. */
+ assert (! "Should not happen");
+ }
+ break;
+
+ case 2:
+ /* We have seen `CHARMAP' and now are in the body. Each line
+ must have the format "%s %s %s\n" or "%s...%s %s %s\n". */
+ if (nowtok == tok_eol)
+ /* Ignore empty lines. */
+ continue;
+
+ if (nowtok == tok_end)
+ {
+ expected_tok = tok_charmap;
+ expected_str = "CHARMAP";
+ state = 90;
+ continue;
+ }
+
+ if (nowtok != tok_bsymbol)
+ {
+ lr_error (cmfile, _("syntax error in %s definition: %s"),
+ "CHARMAP", _("no symbolic name given"));
+
+ lr_ignore_rest (cmfile, 0);
+ continue;
+ }
+
+ /* If the previous line was not completely correct free the
+ used memory. */
+ if (from_name != NULL)
+ obstack_free (&result->mem_pool, from_name);
+
+ from_name = (char *) obstack_copy0 (&result->mem_pool,
+ now->val.str.start,
+ now->val.str.len);
+ to_name = NULL;
+
+ state = 3;
+ continue;
+
+ case 3:
+ /* We have two possibilities: We can see an ellipsis or an
+ encoding value. */
+ if (nowtok == tok_ellipsis)
+ {
+ state = 4;
+ continue;
+ }
+ /* FALLTHROUGH */
+
+ case 5:
+ if (nowtok != tok_charcode && nowtok != tok_ucs2
+ && nowtok != tok_ucs4)
+ {
+ lr_error (cmfile, _("syntax error in %s definition: %s"),
+ "CHARMAP", _("illegal encoding given"));
+
+ lr_ignore_rest (cmfile, 0);
+
+ state = 2;
+ continue;
+ }
+
+ if (nowtok == tok_charcode)
+ /* Write char value in table. */
+ charset_new_char (cmfile, result, now->val.charcode.nbytes,
+ now->val.charcode.val, from_name, to_name);
+ else
+ /* Determine ISO 10646 value and write into table. */
+ charset_new_unicode (cmfile, result, now->val.charcode.nbytes,
+ now->val.charcode.val, from_name, to_name);
+
+ /* Ignore trailing comment silently. */
+ lr_ignore_rest (cmfile, 0);
+
+ from_name = NULL;
+ to_name = NULL;
+
+ state = 2;
+ continue;
+
+ case 4:
+ if (nowtok != tok_bsymbol)
+ {
+ lr_error (cmfile, _("syntax error in %s definition: %s"),
+ "CHARMAP",
+ _("no symbolic name given for end of range"));
+
+ lr_ignore_rest (cmfile, 0);
+ continue;
+ }
+
+ /* If the previous line was not completely correct free the
+ used memory. */
+ to_name = (char *) obstack_copy0 (&result->mem_pool,
+ cmfile->token.val.str.start,
+ cmfile->token.val.str.len);
+
+ state = 3;
+ continue;
+
+ case 90:
+ if (nowtok != expected_tok)
+ lr_error (cmfile, _("\
+`%1$s' definition does not end with `END %1$s'"), expected_str);
+
+ lr_ignore_rest (cmfile, nowtok == expected_tok);
+ state = 91;
+ continue;
+
+ case 91:
+ /* Waiting for WIDTH... */
+ if (nowtok == tok_width_default)
+ {
+ state = 92;
+ continue;
+ }
+
+ if (nowtok == tok_width)
+ {
+ lr_ignore_rest (cmfile, 1);
+ state = 93;
+ continue;
+ }
+
+ if (nowtok == tok_width_variable)
+ {
+ lr_ignore_rest (cmfile, 1);
+ state = 98;
+ continue;
+ }
+
+ lr_error (cmfile, _("\
+only WIDTH definitions are allowed to follow the CHARMAP definition"));
+
+ lr_ignore_rest (cmfile, 0);
+ continue;
+
+ case 92:
+ if (nowtok != tok_number)
+ lr_error (cmfile, _("value for %s must be an integer"),
+ "WIDTH_DEFAULT");
+ else
+ result->width_default = now->val.num;
+
+ lr_ignore_rest (cmfile, nowtok == tok_number);
+
+ state = 91;
+ continue;
+
+ case 93:
+ /* We now expect `END WIDTH' or lines of the format "%s %d\n" or
+ "%s...%s %d\n". */
+ if (nowtok == tok_eol)
+ /* ignore empty lines. */
+ continue;
+
+ if (nowtok == tok_end)
+ {
+ expected_tok = tok_width;
+ expected_str = "WIDTH";
+ state = 90;
+ continue;
+ }
+
+ if (nowtok != tok_bsymbol)
+ {
+ lr_error (cmfile, _("syntax error in %s definition: %s"),
+ "WIDTH", _("no symbolic name given"));
+
+ lr_ignore_rest (cmfile, 0);
+ continue;
+ }
+
+ if (from_name != NULL)
+ obstack_free (&result->mem_pool, from_name);
+
+ from_name = (char *) obstack_copy0 (&result->mem_pool,
+ now->val.str.start,
+ now->val.str.len);
+ to_name = NULL;
+
+ state = 94;
+ continue;
+
+ case 94:
+ if (nowtok == tok_ellipsis)
+ state = 95;
+
+ case 96:
+ if (nowtok != tok_number)
+ lr_error (cmfile, _("value for %s must be an integer"),
+ "WIDTH");
+ else
+ {
+ /* XXX Store width for chars. */
+ from_name = NULL;
+ }
+
+ lr_ignore_rest (cmfile, nowtok == tok_number);
+
+ state = 93;
+ continue;
+
+ case 95:
+ if (nowtok != tok_bsymbol)
+ {
+ lr_error (cmfile, _("syntax error in %s definition: %s"),
+ "WIDTH", _("no symbolic name given for end of range"));
+
+ lr_ignore_rest (cmfile, 0);
+
+ state = 93;
+ continue;
+ }
+
+ to_name = (char *) obstack_copy0 (&result->mem_pool,
+ now->val.str.start,
+ now->val.str.len);
+
+ lr_ignore_rest (cmfile, 1);
+
+ state = 96;
+ continue;
+
+ case 98:
+ /* We now expect `END WIDTH_VARIABLE' or lines of the format
+ "%s\n" or "%s...%s\n". */
+ if (nowtok == tok_eol)
+ /* ignore empty lines. */
+ continue;
+
+ if (nowtok == tok_end)
+ {
+ expected_tok = tok_width_variable;
+ expected_str = "WIDTH_VARIABLE";
+ state = 90;
+ continue;
+ }
+
+ if (nowtok != tok_bsymbol)
+ {
+ lr_error (cmfile, _("syntax error in %s definition: %s"),
+ "WIDTH_VARIABLE", _("no symbolic name given"));
+
+ lr_ignore_rest (cmfile, 0);
+
+ continue;
+ }
+
+ if (from_name != NULL)
+ obstack_free (&result->mem_pool, from_name);
+
+ from_name = (char *) obstack_copy0 (&result->mem_pool,
+ now->val.str.start,
+ now->val.str.len);
+ to_name = NULL;
+
+ state = 99;
+ continue;
+
+ case 99:
+ if (nowtok == tok_ellipsis)
+ state = 100;
+
+ /* Store info. */
+ from_name = NULL;
+
+ /* Warn */
+ state = 98;
+ continue;
+
+ case 100:
+ if (nowtok != tok_bsymbol)
+ lr_error (cmfile, _("syntax error in %s definition: %s"),
+ "WIDTH_VARIABLE",
+ _("no symbolic name given for end of range"));
+ else
+ {
+ to_name = (char *) obstack_copy0 (&result->mem_pool,
+ now->val.str.start,
+ now->val.str.len);
+ /* XXX Enter value into table. */
+ }
+
+ lr_ignore_rest (cmfile, nowtok == tok_bsymbol);
+
+ state = 98;
+ continue;
+
+ default:
+ error (5, 0, _("%s: error in state machine"), __FILE__);
+ /* NOTREACHED */
+ }
+ break;
+ }
+
+ if (state != 91)
+ error (0, 0, _("%s: premature end of file"), cmfile->fname);
+
+ lr_close (cmfile);
+
+ return result;
+}
diff --git a/locale/programs/charset.c b/locale/programs/charset.c
new file mode 100644
index 0000000000..2e2f63bd9a
--- /dev/null
+++ b/locale/programs/charset.c
@@ -0,0 +1,132 @@
+/* Copyright (C) 1996 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB. If
+not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <alloca.h>
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "error.h"
+#include "charset.h"
+
+
+static void
+insert_char (struct linereader *lr, struct charset_t *cs, int bytes,
+ unsigned int value, const char *from, const char *to);
+
+
+void
+charset_new_char (struct linereader *lr, struct charset_t *cs, int bytes,
+ unsigned int value, const char *from, const char *to)
+{
+ if (bytes < cs->mb_cur_min)
+ lr_error (lr, _("too few bytes in character encoding"));
+ else if (bytes > cs->mb_cur_max)
+ lr_error (lr, _("too many bytes in character encoding"));
+ else
+ insert_char (lr, cs, bytes, value, from, to);
+}
+
+
+void
+charset_new_unicode (struct linereader *lr, struct charset_t *cs, int bytes,
+ unsigned int value, const char *from, const char *to)
+{
+ /* For now: perhaps <Uxxxx> support will be removed again... */
+ insert_char (lr, cs, bytes, value, from, to);
+}
+
+
+unsigned int
+charset_find_value (const struct charset_t *cs, const char *name, size_t len)
+{
+ void *result;
+
+ if (find_entry ((hash_table *) &cs->char_table, name, len, &result) < 0)
+ return ILLEGAL_CHAR_VALUE;
+
+ return (unsigned int) result;
+}
+
+
+static void
+insert_char (struct linereader *lr, struct charset_t *cs, int bytes,
+ unsigned int value, const char *from, const char *to)
+{
+ const char *cp;
+ char *buf;
+ int prefix_len, len1, len2;
+ unsigned int from_nr, to_nr, cnt;
+
+ if (to == NULL)
+ {
+ if (insert_entry (&cs->char_table, from, strlen (from), (void *) value)
+ < 0)
+ lr_error (lr, _("duplicate character name `%s'"), from);
+
+ return;
+ }
+
+ /* We have a range: the names must have names with equal prefixes
+ and an equal number of digits, where the second number is greater
+ or equal than the first. */
+ len1 = strlen (from);
+ len2 = strlen (to);
+
+ if (len1 != len2)
+ {
+ illegal_range:
+ lr_error (lr, _("illegal names for character range"));
+ return;
+ }
+
+ cp = &from[len1 - 1];
+ while (isdigit (*cp) && cp >= from)
+ --cp;
+
+ prefix_len = (cp - from) + 1;
+
+ if (cp == &from[len1 - 1] || strncmp (from, to, prefix_len) != 0)
+ goto illegal_range;
+
+ from_nr = strtoul (&from[prefix_len], NULL, 10);
+ to_nr = strtoul (&to[prefix_len], NULL, 10);
+
+ if (from_nr > to_nr)
+ {
+ lr_error (lr, _("upper limit in range is not smaller then lower limit"));
+ return;
+ }
+
+ buf = alloca (len1 + 1);
+ memcpy (buf, from, prefix_len);
+
+ for (cnt = from_nr; cnt <= to_nr; ++cnt)
+ {
+ sprintf (&buf[prefix_len], "%0d", cnt);
+
+ if (insert_entry (&cs->char_table, buf, len1, (void *) cnt) < 0)
+ lr_error (lr, _("duplicate character name `%s'"), buf);
+ }
+}
diff --git a/locale/programs/charset.h b/locale/programs/charset.h
new file mode 100644
index 0000000000..222d468407
--- /dev/null
+++ b/locale/programs/charset.h
@@ -0,0 +1,61 @@
+/* Copyright (C) 1996 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB. If
+not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
+
+#ifndef _CHARSET_H
+#define _CHARSET_H
+
+#include <obstack.h>
+
+#include "simple-hash.h"
+#include "linereader.h"
+
+
+struct charset_t
+{
+ const char *code_set_name;
+ int mb_cur_min;
+ int mb_cur_max;
+ int width_default;
+
+ struct obstack mem_pool;
+ hash_table char_table;
+};
+
+
+/* We need one value to mark the error case. Let's use 0xffffffff.
+ I.e., it is placed in the last page of ISO 10646. For now only the
+ first is used and we have plenty of room. */
+#define ILLEGAL_CHAR_VALUE 0xffffffffu
+
+
+/* Prototypes for charmap handling functions. */
+struct charset_t *charmap_read (const char *filename);
+
+/* Prototypes for funciton to insert new character. */
+void charset_new_char (struct linereader *lr, struct charset_t *cs, int bytes,
+ unsigned int value, const char *from, const char *to);
+
+void charset_new_unicode (struct linereader *lr, struct charset_t *cs,
+ int bytes, unsigned int value, const char *from,
+ const char *to);
+
+unsigned int charset_find_value (const struct charset_t *__cs,
+ const char *__name, size_t __len);
+
+#endif /* charset.h */
diff --git a/locale/programs/config.h b/locale/programs/config.h
new file mode 100644
index 0000000000..64054657cb
--- /dev/null
+++ b/locale/programs/config.h
@@ -0,0 +1,33 @@
+#ifndef _LD_CONFIG_H
+#define _LD_CONFIG_H
+
+/* Use the internal textdomain used for libc messages. */
+#define PACKAGE _libc_intl_domainname
+#ifndef VERSION
+/* Get libc version number. */
+#include "../../version.h"
+#endif
+
+#define DEFAULT_CHARMAP "POSIX"
+
+#ifndef PARAMS
+# if __STDC__
+# define PARAMS(args) args
+# else
+# define PARAMS(args) ()
+# endif
+#endif
+
+
+
+#define HAVE_VPRINTF 1
+
+
+typedef int wint_t;
+typedef unsigned short int u16_t;
+
+
+
+int euidaccess (__const char *__name, int __type);
+
+#endif
diff --git a/locale/programs/ctypedump.c b/locale/programs/ctypedump.c
new file mode 100644
index 0000000000..2a6753495e
--- /dev/null
+++ b/locale/programs/ctypedump.c
@@ -0,0 +1,163 @@
+/* Copyright (C) 1995, 1996 Free Software Foundation, Inc.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB. If
+not, write to the Free Software Foundation, Inc., 675 Mass Ave,
+Cambridge, MA 02139, USA. */
+
+#include <ctype.h>
+#include <endian.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <netinet/in.h> /* Just for htons() */
+
+/*#include "localedef.h"*/
+#include "localeinfo.h"
+
+
+/* FIXME: these values should be part of the LC_CTYPE information. */
+#define mb_cur_max 1
+#define mb_cur_min 1
+
+
+#define SWAP32(v) \
+ ((u32_t) (((((u32_t) (v)) & 0x000000ff) << 24) \
+ | ((((u32_t) (v)) & 0x0000ff00) << 8) \
+ | ((((u32_t) (v)) & 0x00ff0000) >> 8) \
+ | ((((u32_t) (v)) & 0xff000000) >> 24)))
+
+
+
+static inline void
+print_short_in_char (unsigned short val)
+{
+ const unsigned char *p = (const unsigned char *) &val;
+ printf ("\"\\%03o\\%03o\"", p[0], p[1]);
+}
+
+
+static inline void
+print_int_in_char (unsigned int val)
+{
+ const unsigned char *p = (const unsigned char *) &val;
+ printf ("\"\\%03o\\%03o\\%03o\\%03o\"", p[0], p[1], p[2], p[3]);
+}
+
+
+int
+ctype_output (void)
+{
+ int ch;
+ int result = 0;
+ const char *locname = (getenv ("LC_ALL") ?: getenv ("LC_CTYPE") ?:
+ getenv ("LANG") ?: "POSIX");
+
+ puts ("#include <endian.h>\n");
+
+ if (mb_cur_max == 1)
+ {
+ printf ("const char _nl_%s_LC_CTYPE_class[] = \n", locname);
+ for (ch = -128; ch < (1 << (8 * MB_CUR_MAX)); ++ch)
+ {
+ if (((ch + 128) % 6) == 0)
+ printf (" /* 0x%02x */ ", ch < 0 ? 256 + ch : ch);
+ print_short_in_char (htons (__ctype_b [ch < 0 ? 256 + ch : ch]));
+ fputc (((ch + 128) % 6) == 5 ? '\n' : ' ', stdout);
+ }
+ puts (";");
+ }
+
+ printf ("#if BYTE_ORDER == %s\n",
+ BYTE_ORDER == LITTLE_ENDIAN ? "LITTLE_ENDIAN" : "BIG_ENDIAN");
+
+ if (mb_cur_max == 1)
+ {
+ printf ("const char _nl_%s_LC_CTYPE_toupper[] = \n", locname);
+ for (ch = -128; ch < (1 << (8 * MB_CUR_MAX)); ++ch)
+ {
+ if (((ch + 128) % 3) == 0)
+ printf (" /* 0x%02x */ ", ch < 0 ? 256 + ch : ch);
+ print_int_in_char (__ctype_toupper[ch < 0 ? 256 + ch : ch]);
+ fputc (((ch + 128) % 3) == 2 ? '\n' : ' ', stdout);
+ }
+ puts (";");
+
+ printf ("const char _nl_%s_LC_CTYPE_tolower[] = \n", locname);
+ for (ch = -128; ch < (1 << (8 * MB_CUR_MAX)); ++ch)
+ {
+ if (((ch + 128) % 3) == 0)
+ printf (" /* 0x%02x */ ", ch < 0 ? 256 + ch : ch);
+ print_int_in_char (__ctype_tolower[ch < 0 ? 256 + ch : ch]);
+ fputc (((ch + 128) % 3) == 2 ? '\n' : ' ', stdout);
+ }
+ puts (";");
+ }
+ else
+ /* not implemented */;
+
+ printf ("#elif BYTE_ORDER == %s\n",
+ BYTE_ORDER == LITTLE_ENDIAN ? "BIG_ENDIAN" : "LITTLE_ENDIAN");
+
+ if (mb_cur_max == 1)
+ {
+ printf ("const char _nl_%s_LC_CTYPE_toupper[] = \n", locname);
+ for (ch = -128; ch < (1 << (8 * MB_CUR_MAX)); ++ch)
+ {
+ if (((ch + 128) % 3) == 0)
+ printf (" /* 0x%02x */ ", ch < 0 ? 256 + ch : ch);
+ print_int_in_char (SWAP32 (__ctype_toupper[ch < 0 ? 256 + ch : ch]));
+ fputc (((ch + 128) % 3) == 2 ? '\n' : ' ', stdout);
+ }
+ puts (";");
+
+ printf ("const char _nl_%s_LC_CTYPE_tolower[] = \n", locname);
+ for (ch = -128; ch < (1 << (8 * MB_CUR_MAX)); ++ch)
+ {
+ if (((ch + 128) % 3) == 0)
+ printf (" /* 0x%02x */ ", ch < 0 ? 256 + ch : ch);
+ print_int_in_char (SWAP32 (__ctype_tolower[ch < 0 ? 256 + ch : ch]));
+ fputc (((ch + 128) % 3) == 2 ? '\n' : ' ', stdout);
+ }
+ puts (";");
+ }
+ else
+ /* not implemented */;
+
+ puts ("#else\n#error \"BYTE_ORDER\" BYTE_ORDER \" not handled.\"\n#endif\n");
+
+ printf("const struct locale_data _nl_%s_LC_CTYPE = \n\
+{\n\
+ NULL, 0, /* no file mapped */\n\
+ 5,\n\
+ {\n\
+ _nl_C_LC_CTYPE_class,\n\
+#ifdef BYTE_ORDER == LITTLE_ENDIAN\n\
+ NULL, NULL,\n\
+#endif\n\
+ _nl_C_LC_CTYPE_toupper,\n\
+ _nl_C_LC_CTYPE_tolower,\n\
+#ifdef BYTE_ORDER == BIG_ENDIAN\n\
+ NULL, NULL,\n\
+#endif\n\
+ }\n\
+};\n", locname);
+
+ return result;
+}
+
+/*
+ * Local Variables:
+ * mode:c
+ * c-basic-offset:2
+ * End:
+ */
diff --git a/locale/programs/ld-collate.c b/locale/programs/ld-collate.c
new file mode 100644
index 0000000000..0f3bcbca33
--- /dev/null
+++ b/locale/programs/ld-collate.c
@@ -0,0 +1,1549 @@
+/* Copyright (C) 1995, 1996 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB. If
+not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <endian.h>
+#include <errno.h>
+#include <limits.h>
+#include <locale.h>
+#include <obstack.h>
+#include <stdlib.h>
+#include <string.h>
+#include <wcstr.h>
+
+#include "localeinfo.h"
+#include "locales.h"
+#include "simple-hash.h"
+#include "stringtrans.h"
+
+/* Uncomment the following line in the production version. */
+/* define NDEBUG 1 */
+#include <assert.h>
+
+
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+
+#define SWAPU32(w) \
+ (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24))
+
+
+/* What kind of symbols get defined? */
+enum coll_symbol
+{
+ undefined,
+ ellipsis,
+ character,
+ element,
+ symbol
+};
+
+
+typedef struct patch_t
+{
+ const char *fname;
+ size_t lineno;
+ const char *token;
+ union
+ {
+ unsigned int *pos;
+ size_t idx;
+ } where;
+ struct patch_t *next;
+} patch_t;
+
+
+typedef struct element_t
+{
+ const wchar_t *name;
+ unsigned int this_weight;
+
+ struct element_t *next;
+
+ unsigned int *ordering;
+ size_t ordering_len;
+} element_t;
+
+
+/* The real definition of the struct for the LC_CTYPE locale. */
+struct locale_collate_t
+{
+ /* Collate symbol table. Simple mapping to number. */
+ hash_table symbols;
+
+ /* The collation elements. */
+ hash_table elements;
+ struct obstack element_mem;
+
+ /* The result table. */
+ hash_table result;
+
+ /* Sorting rules given in order_start line. */
+ int nrules;
+ int nrules_max;
+ enum coll_sort_rule *rules;
+
+ /* Used while recognizing symbol composed of multiple tokens
+ (collating-element). */
+ const char *combine_token;
+ size_t combine_token_len;
+
+ /* How many sorting order specifications so far. */
+ unsigned int order_cnt;
+
+ /* Was lastline ellipsis? */
+ int was_ellipsis;
+ /* Value of last entry if was character. */
+ wchar_t last_char;
+ /* Current element. */
+ element_t *current_element;
+ /* What kind of symbol is current element. */
+ enum coll_symbol kind;
+
+ /* While collecting the weigths we need some temporary space. */
+ unsigned int current_order;
+ int *weight_cnt;
+ int weight_idx;
+ unsigned int *weight;
+ int nweight;
+ int nweight_max;
+
+ /* Patch lists. */
+ patch_t *current_patch;
+ patch_t *all_patches;
+
+ /* Room for the UNDEFINED information. */
+ element_t undefined;
+ unsigned int undefined_len;
+};
+
+
+/* Be verbose? Defined in localedef.c. */
+extern int verbose;
+
+
+void *xmalloc (size_t __n);
+void *xrealloc (void *__p, size_t __n);
+
+
+#define obstack_chunk_alloc xmalloc
+#define obstack_chunk_free free
+
+
+void
+collate_startup (struct linereader *lr, struct localedef_t *locale,
+ struct charset_t *charset)
+{
+ struct locale_collate_t *collate;
+
+ /* It is important that we always use UCS4 encoding for strings now. */
+ encoding_method = ENC_UCS4;
+
+ /* Allocate the needed room. */
+ locale->categories[LC_COLLATE].collate = collate =
+ (struct locale_collate_t *) xmalloc (sizeof (struct locale_collate_t));
+
+ /* Allocate hash table for collating elements. */
+ if (init_hash (&collate->elements, 512))
+ error (4, 0, _("memory exhausted"));
+ collate->combine_token = NULL;
+ obstack_init (&collate->element_mem);
+
+ /* Allocate hash table for collating elements. */
+ if (init_hash (&collate->symbols, 64))
+ error (4, 0, _("memory exhausted"));
+
+ /* Allocate hash table for result. */
+ if (init_hash (&collate->result, 512))
+ error (4, 0, _("memory exhausted"));
+
+ collate->nrules = 0;
+ collate->nrules_max = 10;
+ collate->rules
+ = (enum coll_sort_rule *) xmalloc (collate->nrules_max
+ * sizeof (enum coll_sort_rule));
+
+ collate->order_cnt = 1; /* The smallest weight is 2. */
+
+ collate->was_ellipsis = 0;
+ collate->last_char = L'\0'; /* 0 because leading ellipsis is allowed. */
+
+ collate->all_patches = NULL;
+
+ /* This tells us no UNDEFINED entry was found until now. */
+ collate->undefined.this_weight = 0;
+
+ lr->translate_strings = 0;
+}
+
+
+void
+collate_finish (struct localedef_t *locale, struct charset_t *charset)
+{
+ struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
+ patch_t *patch;
+ size_t cnt;
+
+ /* Patch the constructed table so that forward references are
+ correctly filled. */
+ for (patch = collate->all_patches; patch != NULL; patch = patch->next)
+ {
+ wchar_t wch;
+ size_t toklen = strlen (patch->token);
+ void *ptmp;
+ unsigned int value = 0;
+
+ wch = charset_find_value (charset, patch->token, toklen);
+ if (wch != ILLEGAL_CHAR_VALUE)
+ {
+ element_t *runp;
+
+ if (find_entry (&collate->result, &wch, sizeof (wchar_t),
+ (void *) &runp) < 0)
+ runp = NULL;
+ for (; runp != NULL; runp = runp->next)
+ if (runp->name[0] == wch && runp->name[1] == L'\0')
+ break;
+
+ value = runp == NULL ? 0 : runp->this_weight;
+ }
+ else if (find_entry (&collate->elements, patch->token, toklen, &ptmp)
+ >= 0)
+ {
+ value = ((element_t *) ptmp)->this_weight;
+ }
+ else if (find_entry (&collate->symbols, patch->token, toklen, &ptmp)
+ >= 0)
+ {
+ value = (unsigned int) ptmp;
+ }
+ else
+ value = 0;
+
+ if (value == 0)
+ error_with_loc (0, 0, patch->fname, patch->lineno,
+ _("no weight defined for symbol `%s'"), patch->token);
+ else
+ *patch->where.pos = value;
+ }
+
+ /* If no definition for UNDEFINED is given, all characters in the
+ given charset must be specified. */
+ if (collate->undefined.ordering == NULL)
+ {
+ /**************************************************************\
+ |* XXX We should test whether really an unspecified character *|
+ |* exists before giving the message. *|
+ \**************************************************************/
+ u32_t weight;
+
+ error (0, 0, _("no definition of `UNDEFINED'"));
+
+ collate->undefined.ordering_len = collate->nrules;
+ weight = ++collate->order_cnt;
+
+ for (cnt = 0; cnt < collate->nrules; ++cnt)
+ {
+ u32_t one = 1;
+ obstack_grow (&collate->element_mem, &one, sizeof (one));
+ }
+
+ for (cnt = 0; cnt < collate->nrules; ++cnt)
+ obstack_grow (&collate->element_mem, &weight, sizeof (weight));
+
+ collate->undefined.ordering = obstack_finish (&collate->element_mem);
+ }
+
+ collate->undefined_len = 2; /* For the name: 1 x wchar_t + L'\0'. */
+ for (cnt = 0; cnt < collate->nrules; ++cnt)
+ collate->undefined_len += 1 + collate->undefined.ordering[cnt];
+
+ /* Collating symbols are not used anymore. */
+ (void) delete_hash (&collate->symbols);
+}
+
+
+
+void
+collate_output (struct localedef_t *locale, const char *output_path)
+{
+ struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
+ u32_t table_size, table_best, level_best, sum_best;
+ void *last;
+ element_t *pelem;
+ wchar_t *name;
+ size_t len;
+ const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE);
+ struct iovec iov[2 + nelems];
+ struct locale_file data;
+ u32_t idx[nelems];
+ struct obstack non_simple;
+ size_t cnt, entry_size;
+ u32_t undefined_offset = UINT_MAX;
+ u32_t *table, *extra, *table2, *extra2;
+ size_t extra_len;
+
+ sum_best = UINT_MAX;
+ table_best = 0xffff;
+ level_best = 0xffff;
+
+ /* Compute table size. */
+ fputs (_("\
+Computing table size for collation information might take a while..."),
+ stderr);
+ for (table_size = 256; table_size < sum_best; ++table_size)
+ {
+ size_t hits[table_size];
+ unsigned int worst = 1;
+ size_t cnt;
+
+ last = NULL;
+
+ for (cnt = 0; cnt < 256; ++cnt)
+ hits[cnt] = 1;
+ memset (&hits[256], '\0', sizeof (hits) - 256 * sizeof (size_t));
+
+ while (iterate_table (&collate->result, &last, (const void **) &name,
+ &len, (void **) &pelem) >= 0)
+ if (pelem->ordering != NULL && pelem->name[0] > 0xff)
+ if (++hits[(unsigned int) pelem->name[0] % table_size] > worst)
+ {
+ worst = hits[(unsigned int) pelem->name[0] % table_size];
+ if (table_size * worst > sum_best)
+ break;
+ }
+
+ if (table_size * worst < sum_best)
+ {
+ sum_best = table_size * worst;
+ table_best = table_size;
+ level_best = worst;
+ }
+ }
+ assert (table_best != 0xffff || level_best != 0xffff);
+ fputs (_(" done\n"), stderr);
+
+ obstack_init (&non_simple);
+
+ data.magic = LIMAGIC (LC_COLLATE);
+ data.n = nelems;
+ iov[0].iov_base = (void *) &data;
+ iov[0].iov_len = sizeof (data);
+
+ iov[1].iov_base = (void *) idx;
+ iov[1].iov_len = sizeof (idx);
+
+ iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_NRULES)].iov_base = &collate->nrules;
+ iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_NRULES)].iov_len = sizeof (u32_t);
+
+ table = (u32_t *) alloca (collate->nrules * sizeof (u32_t));
+ iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_RULES)].iov_base = table;
+ iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_RULES)].iov_len
+ = collate->nrules * sizeof (u32_t);
+ /* Another trick here. Describing the collation method needs only a
+ few bits (3, to be exact). But the binary file should be
+ accessible by maschines with both endianesses and so we store both
+ information in the same word. */
+ for (cnt = 0; cnt < collate->nrules; ++cnt)
+ table[cnt] = collate->rules[cnt] | SWAPU32 (collate->rules[cnt]);
+
+ iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE)].iov_base = &table_best;
+ iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE)].iov_len = sizeof (u32_t);
+
+ iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS)].iov_base = &level_best;
+ iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS)].iov_len = sizeof (u32_t);
+
+ entry_size = 1 + MAX (collate->nrules, 2);
+
+ table = (u32_t *) alloca (table_best * level_best * entry_size
+ * sizeof (table[0]));
+ memset (table, '\0', table_best * level_best * entry_size
+ * sizeof (table[0]));
+
+
+ /* Macros for inserting in output table. */
+#define ADD_VALUE(expr) \
+ do { \
+ u32_t to_write = (u32_t) expr; \
+ obstack_grow (&non_simple, &to_write, sizeof (to_write)); \
+ } while (0)
+
+#define ADD_ELEMENT(pelem, len) \
+ do { \
+ size_t cnt, idx; \
+ \
+ ADD_VALUE (len); \
+ \
+ wlen = wcslen (pelem->name); \
+ obstack_grow (&non_simple, pelem->name, (wlen + 1) * sizeof (u32_t)); \
+ \
+ idx = collate->nrules; \
+ for (cnt = 0; cnt < collate->nrules; ++cnt) \
+ { \
+ size_t disp; \
+ \
+ ADD_VALUE (pelem->ordering[cnt]); \
+ for (disp = 0; disp < pelem->ordering[cnt]; ++disp) \
+ ADD_VALUE (pelem->ordering[idx++]); \
+ } \
+ } while (0)
+
+#define ADD_FORWARD(pelem) \
+ do { \
+ /* We leave a reference in the main table and put all \
+ information in the table for the extended entries. */ \
+ element_t *runp; \
+ element_t *has_simple = NULL; \
+ size_t wlen; \
+ \
+ table[(level * table_best + slot) * entry_size + 1] \
+ = FORWARD_CHAR; \
+ table[(level * table_best + slot) * entry_size + 2] \
+ = obstack_object_size (&non_simple) / sizeof (u32_t); \
+ \
+ /* Here we have to construct the non-simple table entry. First \
+ compute the total length of this entry. */ \
+ for (runp = (pelem); runp != NULL; runp = runp->next) \
+ if (runp->ordering != NULL) \
+ { \
+ u32_t value; \
+ size_t cnt; \
+ \
+ value = 1 + wcslen (runp->name) + 1; \
+ \
+ for (cnt = 0; cnt < collate->nrules; ++cnt) \
+ /* We have to take care for entries without ordering \
+ information. While reading them they get inserted in the \
+ table and later not removed when something goes wrong with \
+ reading its weights. */ \
+ { \
+ value += 1 + runp->ordering[cnt]; \
+ \
+ if (runp->name[1] == L'\0') \
+ has_simple = runp; \
+ } \
+ \
+ ADD_ELEMENT (runp, value); \
+ } \
+ \
+ if (has_simple == NULL) \
+ { \
+ size_t idx, cnt; \
+ \
+ ADD_VALUE (collate->undefined_len + 1); \
+ \
+ /* Add the name. */ \
+ ADD_VALUE ((pelem)->name[0]); \
+ ADD_VALUE (0); \
+ \
+ idx = collate->nrules; \
+ for (cnt = 0; cnt < collate->nrules; ++cnt) \
+ { \
+ size_t disp; \
+ \
+ ADD_VALUE (collate->undefined.ordering[cnt]); \
+ for (disp = 0; disp < collate->undefined.ordering[cnt]; ++disp) \
+ { \
+ if (collate->undefined.ordering[idx] == ELLIPSIS_CHAR) \
+ ADD_VALUE ((pelem)->name[0]); \
+ else \
+ ADD_VALUE (collate->undefined.ordering[idx++]); \
+ ++idx; \
+ } \
+ } \
+ } \
+ } while (0)
+
+
+
+ /* Fill the table now. First we look for all the characters which
+ fit into one single byte. This speeds up the 8-bit string
+ functions. */
+ last = NULL;
+ while (iterate_table (&collate->result, &last, (const void **) &name,
+ &len, (void **) &pelem) >= 0)
+ if (pelem->name[0] <= 0xff)
+ {
+ /* We have a single byte name. Now we must distinguish
+ between entries in simple form (i.e., only one value per
+ weight and no collation element starting with the same
+ character) and those which are not. */
+ size_t slot = ((size_t) pelem->name[0]);
+ const size_t level = 0;
+
+ table[slot * entry_size] = pelem->name[0];
+
+ if (pelem->name[1] == L'\0' && pelem->next == NULL
+ && pelem->ordering_len == collate->nrules)
+ {
+ /* Yes, we have a simple one. Lucky us. */
+ size_t cnt;
+
+ for (cnt = 0; cnt < collate->nrules; ++cnt)
+ table[slot * entry_size + 1 + cnt]
+ = pelem->ordering[collate->nrules + cnt];
+ }
+ else
+ ADD_FORWARD (pelem);
+ }
+
+ /* Now check for missing single byte entries. If one exist we fill
+ with the UNDEFINED entry. */
+ for (cnt = 0; cnt < 256; ++cnt)
+ /* The first weight is never 0 for existing entries. */
+ if (table[cnt * entry_size + 1] == 0)
+ {
+ /* We have to fill in the information from the UNDEFINED
+ entry. */
+ table[cnt * entry_size] = (u32_t) cnt;
+
+ if (collate->undefined.ordering_len == collate->nrules)
+ {
+ size_t inner;
+
+ for (inner = 0; inner < collate->nrules; ++inner)
+ if (collate->undefined.ordering[collate->nrules + inner]
+ == ELLIPSIS_CHAR)
+ table[cnt * entry_size + 1 + inner] = cnt;
+ else
+ table[cnt * entry_size + 1 + inner]
+ = collate->undefined.ordering[collate->nrules + inner];
+ }
+ else
+ {
+ if (undefined_offset != UINT_MAX)
+ {
+ table[cnt * entry_size + 1] = FORWARD_CHAR;
+ table[cnt * entry_size + 2] = undefined_offset;
+ }
+ else
+ {
+ const size_t slot = cnt;
+ const size_t level = 0;
+
+ ADD_FORWARD (&collate->undefined);
+ undefined_offset = table[cnt * entry_size + 2];
+ }
+ }
+ }
+
+ /* Now we are ready for inserting the whole rest. */
+ last = NULL;
+ while (iterate_table (&collate->result, &last, (const void **) &name,
+ &len, (void **) &pelem) >= 0)
+ if (pelem->name[0] > 0xff)
+ {
+ /* Find the position. */
+ size_t slot = ((size_t) pelem->name[0]) % table_best;
+ size_t level = 0;
+
+ while (table[(level * table_best + slot) * entry_size + 1] != 0)
+ ++level;
+ assert (level < level_best);
+
+ if (pelem->name[1] == L'\0' && pelem->next == NULL
+ && pelem->ordering_len == collate->nrules)
+ {
+ /* Again a simple entry. */
+ size_t inner;
+
+ for (inner = 0; inner < collate->nrules; ++inner)
+ table[(level * table_best + slot) * entry_size + 1 + inner]
+ = pelem->ordering[collate->nrules + inner];
+ }
+ else
+ ADD_FORWARD (pelem);
+ }
+
+ /* Add the UNDEFINED entry. */
+ {
+ /* Here we have to construct the non-simple table entry. */
+ size_t idx, cnt;
+
+ undefined_offset = obstack_object_size (&non_simple);
+
+ idx = collate->nrules;
+ for (cnt = 0; cnt < collate->nrules; ++cnt)
+ {
+ size_t disp;
+
+ ADD_VALUE (collate->undefined.ordering[cnt]);
+ for (disp = 0; disp < collate->undefined.ordering[cnt]; ++disp)
+ ADD_VALUE (collate->undefined.ordering[idx++]);
+ }
+ }
+
+ /* Finish the extra block. */
+ extra_len = obstack_object_size (&non_simple);
+ extra = (u32_t *) obstack_finish (&non_simple);
+ assert ((extra_len % sizeof (u32_t)) == 0);
+
+ /* Now we have to build the two array for the other byte ordering. */
+ table2 = (u32_t *) alloca (table_best * level_best * entry_size
+ * sizeof (table[0]));
+ extra2 = (u32_t *) alloca (extra_len);
+
+ for (cnt = 0; cnt < table_best * level_best * entry_size; ++cnt)
+ table2[cnt] = SWAPU32 (table[cnt]);
+
+ for (cnt = 0; cnt < extra_len / sizeof (u32_t); ++cnt)
+ extra2[cnt] = SWAPU32 (extra2[cnt]);
+
+ /* Store table adresses and lengths. */
+#if __BYTE_ORDER == __BIG_ENDIAN
+ iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB)].iov_base = table;
+ iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB)].iov_len
+ = table_best * level_best * entry_size * sizeof (table[0]);
+
+ iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL)].iov_base = table2;
+ iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL)].iov_len
+ = table_best * level_best * entry_size * sizeof (table[0]);
+
+ iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB)].iov_base = extra;
+ iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB)].iov_len = extra_len;
+
+ iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL)].iov_base = extra2;
+ iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL)].iov_len = extra_len;
+#else
+ iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB)].iov_base = table2;
+ iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB)].iov_len
+ = table_best * level_best * entry_size * sizeof (table[0]);
+
+ iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL)].iov_base = table;
+ iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL)].iov_len
+ = table_best * level_best * entry_size * sizeof (table[0]);
+
+ iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB)].iov_base = extra2;
+ iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB)].iov_len = extra_len;
+
+ iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL)].iov_base = extra;
+ iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL)].iov_len = extra_len;
+#endif
+
+ iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_UNDEFINED)].iov_base = &undefined_offset;
+ iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_UNDEFINED)].iov_len = sizeof (u32_t);
+
+ /* Update idx array. */
+ idx[0] = iov[0].iov_len + iov[1].iov_len;
+ for (cnt = 1; cnt < nelems; ++cnt)
+ idx[cnt] = idx[cnt - 1] + iov[1 + cnt].iov_len;
+
+ write_locale_data (output_path, "LC_COLLATE", 2 + nelems, iov);
+}
+
+
+void
+collate_element_to (struct linereader *lr, struct localedef_t *locale,
+ struct token *code, struct charset_t *charset)
+{
+ struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
+ unsigned int value;
+ void *not_used;
+
+ if (collate->combine_token != NULL)
+ {
+ free ((void *) collate->combine_token);
+ collate->combine_token = NULL;
+ }
+
+ value = charset_find_value (charset, code->val.str.start, code->val.str.len);
+ if (value != ILLEGAL_CHAR_VALUE)
+ {
+ lr_error (lr, _("symbol for multicharacter collating element "
+ "`%.*s' duplicates symbolic name in charset"),
+ code->val.str.len, code->val.str.start);
+ return;
+ }
+
+ if (find_entry (&collate->elements, code->val.str.start, code->val.str.len,
+ &not_used) >= 0)
+ {
+ lr_error (lr, _("symbol for multicharacter collating element "
+ "`%.*s' duplicates other element definition"),
+ code->val.str.len, code->val.str.start);
+ return;
+ }
+
+ if (find_entry (&collate->elements, code->val.str.start, code->val.str.len,
+ &not_used) >= 0)
+ {
+ lr_error (lr, _("symbol for multicharacter collating element "
+ "`%.*s' duplicates symbol definition"),
+ code->val.str.len, code->val.str.start);
+ return;
+ }
+
+ collate->combine_token = code->val.str.start;
+ collate->combine_token_len = code->val.str.len;
+}
+
+
+void
+collate_element_from (struct linereader *lr, struct localedef_t *locale,
+ struct token *code, struct charset_t *charset)
+{
+ struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
+ element_t *elemp, *runp;
+
+ /* CODE is a string. */
+ elemp = (element_t *) obstack_alloc (&collate->element_mem,
+ sizeof (element_t));
+
+ /* We have to translate the string. It may contain <...> character
+ names. */
+ elemp->name = (wchar_t *) translate_string (code->val.str.start, charset);
+ elemp->this_weight = 0;
+ elemp->ordering = NULL;
+ elemp->ordering_len = 0;
+
+ free (code->val.str.start);
+
+ if (elemp->name == NULL)
+ {
+ /* At least one character in the string is not defined. We simply
+ do nothing. */
+ if (verbose)
+ lr_error (lr, _("\
+`from' string in collation element declaration contains unknown character"));
+ return;
+ }
+
+ if (elemp->name[0] == L'\0' || elemp->name[1] == L'\0')
+ {
+ lr_error (lr, _("illegal colltion element"));
+ return;
+ }
+
+ /* The entries in the linked lists of RESULT are sorting in
+ descending order. The order is important for the `strcoll' and
+ `wcscoll' functions. */
+ if (find_entry (&collate->result, elemp->name, sizeof (wchar_t),
+ (void *) &runp) >= 0)
+ {
+ /* We already have an entry with this key. Check whether it is
+ identical. */
+ element_t *prevp = NULL;
+ int cmpres;
+
+ do
+ {
+ cmpres = wcscmp (elemp->name, runp->name);
+ if (cmpres <= 0)
+ break;
+ prevp = runp;
+ }
+ while ((runp = runp->next) != NULL);
+
+ if (cmpres == 0)
+ lr_error (lr, _("duplicate collating element definition"));
+ else
+ {
+ elemp->next = runp;
+ if (prevp == NULL)
+ {
+ if (set_entry (&collate->result, elemp->name, sizeof (wchar_t),
+ elemp) < 0)
+ error (EXIT_FAILURE, 0,
+ _("\
+error while inserting collation element into hash table"));
+ }
+ else
+ prevp->next = elemp;
+ }
+ }
+ else
+ {
+ elemp->next = NULL;
+ if (insert_entry (&collate->result, elemp->name, sizeof (wchar_t), elemp)
+ < 0)
+ error (EXIT_FAILURE, errno, _("error while inserting to hash table"));
+ }
+
+ if (insert_entry (&collate->elements, collate->combine_token,
+ collate->combine_token_len, (void *) elemp) < 0)
+ lr_error (lr, _("cannot insert new collating symbol definition: %s"),
+ strerror (errno));
+}
+
+
+void
+collate_symbol (struct linereader *lr, struct localedef_t *locale,
+ struct token *code, struct charset_t *charset)
+{
+ struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
+ wchar_t value;
+ void *not_used;
+
+ value = charset_find_value (charset, code->val.str.start, code->val.str.len);
+ if (value != ILLEGAL_CHAR_VALUE)
+ {
+ lr_error (lr, _("symbol for multicharacter collating element "
+ "`%.*s' duplicates symbolic name in charset"),
+ code->val.str.len, code->val.str.start);
+ return;
+ }
+
+ if (find_entry (&collate->elements, code->val.str.start, code->val.str.len,
+ &not_used) >= 0)
+ {
+ lr_error (lr, _("symbol for multicharacter collating element "
+ "`%.*s' duplicates element definition"),
+ code->val.str.len, code->val.str.start);
+ return;
+ }
+
+ if (find_entry (&collate->symbols, code->val.str.start, code->val.str.len,
+ &not_used) >= 0)
+ {
+ lr_error (lr, _("symbol for multicharacter collating element "
+ "`%.*s' duplicates other symbol definition"),
+ code->val.str.len, code->val.str.start);
+ return;
+ }
+
+ if (insert_entry (&collate->symbols, code->val.str.start, code->val.str.len,
+ (void *) 0) < 0)
+ lr_error (lr, _("cannot insert new collating symbol definition: %s"),
+ strerror (errno));
+}
+
+
+void
+collate_new_order (struct linereader *lr, struct localedef_t *locale,
+ enum coll_sort_rule sort_rule)
+{
+ struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
+
+ if (collate->nrules >= collate->nrules_max)
+ {
+ collate->nrules_max *= 2;
+ collate->rules
+ = (enum coll_sort_rule *) xrealloc (collate->rules,
+ collate->nrules_max
+ * sizeof (enum coll_sort_rule));
+ }
+
+ collate->rules[collate->nrules++] = sort_rule;
+}
+
+
+void
+collate_build_arrays (struct linereader *lr, struct localedef_t *locale)
+{
+ struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
+
+ collate->rules
+ = (enum coll_sort_rule *) xrealloc (collate->rules,
+ collate->nrules
+ * sizeof (enum coll_sort_rule));
+
+ /* Allocate arrays for temporary weights. */
+ collate->weight_cnt = (int *) xmalloc (collate->nrules * sizeof (int));
+
+ /* Choose arbitrary start value for table size. */
+ collate->nweight_max = 5 * collate->nrules;
+ collate->weight = (int *) xmalloc (collate->nweight_max * sizeof (int));
+}
+
+
+int
+collate_order_elem (struct linereader *lr, struct localedef_t *locale,
+ struct token *code, struct charset_t *charset)
+{
+ const wchar_t zero = L'\0';
+ struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
+ int result = 0;
+ wchar_t value;
+ void *tmp;
+ int i;
+
+ switch (code->tok)
+ {
+ case tok_bsymbol:
+ /* We have a string to find in one of the three hashing tables. */
+ value = charset_find_value (charset, code->val.str.start,
+ code->val.str.len);
+ if (value != ILLEGAL_CHAR_VALUE)
+ {
+ element_t *lastp, *firstp;
+
+ collate->kind = character;
+
+ if (find_entry (&collate->result, &value, sizeof (wchar_t),
+ (void *) &firstp) < 0)
+ firstp = lastp = NULL;
+ else
+ {
+ /* The entry for the simple character is always found at
+ the end. */
+ lastp = firstp;
+ while (lastp->next != NULL)
+ lastp = lastp->next;
+
+ if (lastp->name[0] == value && lastp->name[1] == L'\0')
+ {
+ lr_error (lr, _("duplicate definition for character `%.*s'"),
+ code->val.str.len, code->val.str.start);
+ lr_ignore_rest (lr, 0);
+ result = -1;
+ break;
+ }
+ }
+
+ collate->current_element
+ = (element_t *) obstack_alloc (&collate->element_mem,
+ sizeof (element_t));
+
+ obstack_grow (&collate->element_mem, &value, sizeof (value));
+ obstack_grow (&collate->element_mem, &zero, sizeof (zero));
+
+ collate->current_element->name =
+ (const wchar_t *) obstack_finish (&collate->element_mem);
+
+ collate->current_element->this_weight = ++collate->order_cnt;
+
+ collate->current_element->next = NULL;
+
+ if (firstp == NULL)
+ {
+ if (insert_entry (&collate->result, &value, sizeof (wchar_t),
+ (void *) collate->current_element) < 0)
+ {
+ lr_error (lr, _("cannot insert collation element `%.*s'"),
+ code->val.str.len, code->val.str.start);
+ exit (4);
+ }
+ }
+ else
+ lastp->next = collate->current_element;
+ }
+ else if (find_entry (&collate->elements, code->val.str.start,
+ code->val.str.len, &tmp) >= 0)
+ {
+ collate->current_element = (element_t *) tmp;
+
+ if (collate->current_element->this_weight != 0)
+ {
+ lr_error (lr, _("\
+collation element `%.*s' appears more than once: ignore line"),
+ code->val.str.len, code->val.str.start);
+ lr_ignore_rest (lr, 0);
+ result = -1;
+ break;
+ }
+
+ collate->kind = element;
+ collate->current_element->this_weight = ++collate->order_cnt;
+ }
+ else if (find_entry (&collate->symbols, code->val.str.start,
+ code->val.str.len, &tmp) >= 0)
+ {
+ unsigned int order = ++collate->order_cnt;
+
+ if ((unsigned int) tmp != 0)
+ {
+ lr_error (lr, _("\
+collation symbol `.*s' appears more than once: ignore line"),
+ code->val.str.len, code->val.str.start);
+ lr_ignore_rest (lr, 0);
+ result = -1;
+ break;
+ }
+
+ collate->kind = symbol;
+
+ if (set_entry (&collate->symbols, code->val.str.start,
+ code->val.str.len, (void *) order) < 0)
+ {
+ lr_error (lr, _("cannot process order specification"));
+ exit (4);
+ }
+ }
+ else
+ {
+ if (verbose)
+ lr_error (lr, _("unknown symbol `%.*s': line ignored"),
+ code->val.str.len, code->val.str.start);
+ lr_ignore_rest (lr, 0);
+
+ result = -1;
+ }
+ break;
+
+ case tok_undefined:
+ collate->kind = undefined;
+ collate->current_element = &collate->undefined;
+ break;
+
+ case tok_ellipsis:
+ if (collate->was_ellipsis)
+ {
+ lr_error (lr, _("\
+two lines in a row containing `...' are not allowed"));
+ result = -1;
+ }
+ else if (collate->kind != character)
+ {
+ /* An ellipsis requires the previous line to be an
+ character definition. */
+ lr_error (lr, _("\
+line before ellipsis does not contain definition for character constant"));
+ lr_ignore_rest (lr, 0);
+ result = -1;
+ }
+ else
+ collate->kind = ellipsis;
+ break;
+
+ default:
+ assert (! "illegal token in `collate_order_elem'");
+ }
+
+ /* Now it's time to handle the ellipsis in the previous line. We do
+ this only when the last line contained an definition for an
+ character, the current line also defines an character, the
+ character code for the later is bigger than the former. */
+ if (collate->was_ellipsis)
+ {
+ if (collate->kind != character)
+ {
+ lr_error (lr, _("\
+line after ellipsis must contain character definition"));
+ lr_ignore_rest (lr, 0);
+ result = -1;
+ }
+ else if (collate->last_char > value)
+ {
+ lr_error (lr, _("end point of ellipsis range is bigger then start"));
+ lr_ignore_rest (lr, 0);
+ result = -1;
+ }
+ else
+ {
+ /* We can fill the arrays with the information we need. */
+ wchar_t name[2];
+ unsigned int *data;
+ size_t *ptr;
+ size_t cnt;
+
+ name[0] = collate->last_char + 1;
+ name[1] = L'\0';
+
+ data = (unsigned int *) alloca ((collate->nrules + collate->nweight)
+ * sizeof (unsigned int));
+ ptr = (size_t *) alloca (collate->nrules * sizeof (size_t));
+
+ if (data == NULL || ptr == NULL)
+ error (4, 0, _("memory exhausted"));
+
+ /* Prepare data. Because the characters covered by an
+ ellipsis all have equal values we prepare the data once
+ and only change the variable number (if there are any).
+ PTR[...] will point to the entries which will have to be
+ fixed during the output loop. */
+ for (cnt = 0; cnt < collate->nrules; ++cnt)
+ {
+ data[cnt] = collate->weight_cnt[cnt];
+ ptr[cnt] = (cnt == 0
+ ? collate->nweight
+ : ptr[cnt - 1] + collate->weight_cnt[cnt - 1]);
+ }
+
+ for (cnt = 0; cnt < collate->nweight; ++cnt)
+ data[collate->nrules + cnt] = collate->weight[cnt];
+
+ for (cnt = 0; cnt < collate->nrules; ++cnt)
+ if (data[ptr[cnt]] != ELLIPSIS_CHAR)
+ ptr[cnt] = 0;
+
+ while (name[0] <= value)
+ {
+ element_t *pelem;
+
+ pelem = (element_t *) obstack_alloc (&collate->element_mem,
+ sizeof (element_t));
+ if (pelem == NULL)
+ error (4, 0, _("memory exhausted"));
+
+ pelem->name
+ = (const wchar_t *) obstack_copy (&collate->element_mem,
+ name, 2 * sizeof (wchar_t));
+ pelem->this_weight = ++collate->order_cnt;
+
+ pelem->ordering_len = collate->nweight;
+ pelem->ordering
+ = (unsigned int *) obstack_copy (&collate->element_mem, data,
+ (collate->nrules
+ * pelem->ordering_len)
+ * sizeof (unsigned int));
+
+ /* `...' weights need to be adjusted. */
+ for (cnt = 0; cnt < collate->nrules; ++cnt)
+ if (ptr[cnt] != 0)
+ pelem->ordering[ptr[cnt]] = pelem->this_weight;
+
+ /* Insert new entry into result table. */
+ if (find_entry (&collate->result, name, sizeof (wchar_t),
+ (void *) &pelem->next) >= 0)
+ {
+ if (set_entry (&collate->result, name, sizeof (wchar_t),
+ (void *) pelem->next) < 0)
+ error (4, 0, _("cannot insert into result table"));
+ }
+ else
+ if (insert_entry (&collate->result, name, sizeof (wchar_t),
+ (void *) pelem->next) < 0)
+ error (4, 0, _("cannot insert into result table"));
+
+ /* Increment counter. */
+ ++name[0];
+ }
+ }
+ }
+
+ /* Reset counters for weights. */
+ collate->weight_idx = 0;
+ collate->nweight = 0;
+ for (i = 0; i < collate->nrules; ++i)
+ collate->weight_cnt[i] = 0;
+ collate->current_patch = NULL;
+
+ return result;
+}
+
+
+int
+collate_weight_bsymbol (struct linereader *lr, struct localedef_t *locale,
+ struct token *code, struct charset_t *charset)
+{
+ struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
+ unsigned int here_weight;
+ wchar_t value;
+ void *tmp;
+
+ assert (code->tok == tok_bsymbol);
+
+ value = charset_find_value (charset, code->val.str.start, code->val.str.len);
+ if (value != ILLEGAL_CHAR_VALUE)
+ {
+ element_t *runp;
+
+ if (find_entry (&collate->result, &value, sizeof (wchar_t),
+ (void *)&runp) < 0)
+ runp = NULL;
+
+ while (runp != NULL
+ && (runp->name[0] != value || runp->name[1] != L'\0'))
+ runp = runp->next;
+
+ here_weight = runp == NULL ? 0 : runp->this_weight;
+ }
+ else if (find_entry (&collate->elements, code->val.str.start,
+ code->val.str.len, &tmp) >= 0)
+ {
+ element_t *runp = (element_t *) tmp;
+
+ here_weight = runp->this_weight;
+ }
+ else if (find_entry (&collate->symbols, code->val.str.start,
+ code->val.str.len, &tmp) >= 0)
+ {
+ here_weight = (unsigned int) tmp;
+ }
+ else
+ {
+ if (verbose)
+ lr_error (lr, _("unknown symbol `%.*s': line ignored"),
+ code->val.str.len, code->val.str.start);
+ lr_ignore_rest (lr, 0);
+ return -1;
+ }
+
+ /* When we currently work on a collation symbol we do not expect any
+ weight. */
+ if (collate->kind == symbol)
+ {
+ lr_error (lr, _("\
+specification of sorting weight for collation symbol does not make sense"));
+ lr_ignore_rest (lr, 0);
+ return -1;
+ }
+
+ /* Add to the current collection of weights. */
+ if (collate->nweight >= collate->nweight_max)
+ {
+ collate->nweight_max *= 2;
+ collate->weight = (unsigned int *) xrealloc (collate->weight,
+ collate->nweight_max);
+ }
+
+ /* If the weight is currently not known, we remember to patch the
+ resulting tables. */
+ if (here_weight == 0)
+ {
+ patch_t *newp;
+
+ newp = (patch_t *) obstack_alloc (&collate->element_mem,
+ sizeof (patch_t));
+ newp->fname = lr->fname;
+ newp->lineno = lr->lineno;
+ newp->token = (const char *) obstack_copy0 (&collate->element_mem,
+ code->val.str.start,
+ code->val.str.len);
+ newp->where.idx = collate->nweight++;
+ newp->next = collate->current_patch;
+ collate->current_patch = newp;
+ }
+ else
+ collate->weight[collate->nweight++] = here_weight;
+ ++collate->weight_cnt[collate->weight_idx];
+
+ return 0;
+}
+
+
+int
+collate_next_weight (struct linereader *lr, struct localedef_t *locale)
+{
+ struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
+
+ if (collate->kind == symbol)
+ {
+ lr_error (lr, _("\
+specification of sorting weight for collation symbol does not make sense"));
+ lr_ignore_rest (lr, 0);
+ return -1;
+ }
+
+ ++collate->weight_idx;
+ if (collate->weight_idx >= collate->nrules)
+ {
+ lr_error (lr, _("too many weights"));
+ lr_ignore_rest (lr, 0);
+ return -1;
+ }
+
+ return 0;
+}
+
+
+int
+collate_simple_weight (struct linereader *lr, struct localedef_t *locale,
+ struct token *code, struct charset_t *charset)
+{
+ struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
+ unsigned int value = 0;
+
+ /* There current tokens can be `IGNORE', `...', or a string. */
+ switch (code->tok)
+ {
+ case tok_ignore:
+ /* This token is allowed in all situations. */
+ value = IGNORE_CHAR;
+ break;
+
+ case tok_ellipsis:
+ /* The ellipsis is only allowed for the `...' or `UNDEFINED'
+ entry. */
+ if (collate->kind != ellipsis && collate->kind != undefined)
+ {
+ lr_error (lr, _("\
+`...' must only be used in `...' and `UNDEFINED' entries"));
+ lr_ignore_rest (lr, 0);
+ return -1;
+ }
+ value = ELLIPSIS_CHAR;
+ break;
+
+ case tok_string:
+ /* This can become difficult. We have to get the weights which
+ correspind the the single wide chars in the string. But some
+ of the `chars' might not be real characters, but collation
+ elements or symbols. And so the string decoder might have
+ signaled errors. The string at this point is not translated.
+ I.e., all <...> sequences are still there. */
+ {
+ char *runp = code->val.str.start;
+ void *tmp;
+
+ while (*runp != '\0')
+ {
+ char *startp = (char *) runp;
+ char *putp = (char *) runp;
+ wchar_t wch;
+
+ /* Lookup weight for char and store it. */
+ if (*runp == '<')
+ {
+ while (*++runp != '\0' && *runp != '>')
+ {
+ if (*runp == lr->escape_char)
+ if (*++runp == '\0')
+ {
+ lr_error (lr, _("unterminated weight name"));
+ lr_ignore_rest (lr, 0);
+ return -1;
+ }
+ *putp++ = *runp;
+ }
+ if (*runp == '>')
+ ++runp;
+
+ if (putp == startp)
+ {
+ lr_error (lr, _("empty weight name: line ignored"));
+ lr_ignore_rest (lr, 0);
+ return -1;
+ }
+
+ wch = charset_find_value (charset, startp, putp - startp);
+ if (wch != ILLEGAL_CHAR_VALUE)
+ {
+ element_t *pelem;
+
+ if (find_entry (&collate->result, &wch, sizeof (wchar_t),
+ (void *)&pelem) < 0)
+ pelem = NULL;
+
+ while (pelem != NULL
+ && (pelem->name[0] != wch
+ || pelem->name[1] != L'\0'))
+ pelem = pelem->next;
+
+ value = pelem == NULL ? 0 : pelem->this_weight;
+ }
+ else if (find_entry (&collate->elements, startp, putp - startp,
+ &tmp) >= 0)
+ {
+ element_t *pelem = (element_t *) tmp;
+
+ value = pelem->this_weight;
+ }
+ else if (find_entry (&collate->symbols, startp, putp - startp,
+ &tmp) >= 0)
+ {
+ value = (unsigned int) tmp;
+ }
+ else
+ {
+ if (verbose)
+ lr_error (lr, _("unknown symbol `%.*s': line ignored"),
+ putp - startp, startp);
+ lr_ignore_rest (lr, 0);
+ return -1;
+ }
+ }
+ else
+ {
+ element_t *wp;
+ wchar_t wch;
+
+ if (*runp == lr->escape_char)
+ {
+ static char digits[] = "0123456789abcdef";
+ char *dp;
+ int base;
+
+ ++runp;
+ if (tolower (*runp) == 'x')
+ {
+ ++runp;
+ base = 16;
+ }
+ else if (tolower (*runp) == 'd')
+ {
+ ++runp;
+ base = 10;
+ }
+ else
+ base = 8;
+
+ dp = strchr (digits, tolower (*runp));
+ if (dp == NULL || (dp - digits) >= base)
+ {
+ illegal_char:
+ lr_error (lr, _("\
+illegal character constant in string"));
+ lr_ignore_rest (lr, 0);
+ return -1;
+ }
+ wch = dp - digits;
+ ++runp;
+
+ dp = strchr (digits, tolower (*runp));
+ if (dp == NULL || (dp - digits) >= base)
+ goto illegal_char;
+ wch *= base;
+ wch += dp - digits;
+ ++runp;
+
+ if (base != 16)
+ {
+ dp = strchr (digits, tolower (*runp));
+ if (dp != NULL && (dp - digits < base))
+ {
+ wch *= base;
+ wch += dp - digits;
+ ++runp;
+ }
+ }
+ }
+ else
+ wch = (wchar_t) *runp++;
+
+ /* Lookup the weight for WCH. */
+ if (find_entry (&collate->result, &wch, sizeof (wch),
+ (void *)&wp) < 0)
+ wp = NULL;
+
+ while (wp != NULL
+ && (wp->name[0] != wch || wp->name[1] != L'\0'))
+ wp = wp->next;
+
+ value = wp == NULL ? 0 : wp->this_weight;
+
+ /* To get the correct name for the error message. */
+ putp = runp;
+
+ /**************************************************\
+ |* I know here is something wrong. Characters in *|
+ |* the string which are not in the <...> form *|
+ |* cannot be declared forward for now!!! *|
+ \**************************************************/
+ }
+
+ /* Store in weight array. */
+ if (collate->nweight >= collate->nweight_max)
+ {
+ collate->nweight_max *= 2;
+ collate->weight
+ = (unsigned int *) xrealloc (collate->weight,
+ collate->nweight_max);
+ }
+
+ if (value == 0)
+ {
+ patch_t *newp;
+
+ newp = (patch_t *) obstack_alloc (&collate->element_mem,
+ sizeof (patch_t));
+ newp->fname = lr->fname;
+ newp->lineno = lr->lineno;
+ newp->token
+ = (const char *) obstack_copy0 (&collate->element_mem,
+ startp, putp - startp);
+ newp->where.idx = collate->nweight++;
+ newp->next = collate->current_patch;
+ collate->current_patch = newp;
+ }
+ else
+ collate->weight[collate->nweight++] = value;
+ ++collate->weight_cnt[collate->weight_idx];
+ }
+ }
+ return 0;
+
+ default:
+ assert (! "should not happen");
+ }
+
+
+ if (collate->nweight >= collate->nweight_max)
+ {
+ collate->nweight_max *= 2;
+ collate->weight = (unsigned int *) xrealloc (collate->weight,
+ collate->nweight_max);
+ }
+
+ collate->weight[collate->nweight++] = value;
+ ++collate->weight_cnt[collate->weight_idx];
+
+ return 0;
+}
+
+
+void
+collate_end_weight (struct linereader *lr, struct localedef_t *locale)
+{
+ struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
+ element_t *pelem = collate->current_element;
+
+ if (collate->kind == symbol)
+ {
+ /* We don't have to do anything. */
+ collate->was_ellipsis = 0;
+ return;
+ }
+
+ if (collate->kind == ellipsis)
+ {
+ /* Before the next line is processed the ellipsis is handled. */
+ collate->was_ellipsis = 1;
+ return;
+ }
+
+ assert (collate->kind == character || collate->kind == element
+ || collate->kind == undefined);
+
+ /* Fill in the missing weights. */
+ while (++collate->weight_idx < collate->nrules)
+ {
+ collate->weight[collate->nweight++] = pelem->this_weight;
+ ++collate->weight_cnt[collate->weight_idx];
+ }
+
+ /* Now we know how many ordering weights the current
+ character/element has. Allocate room in the element structure
+ and copy information. */
+ pelem->ordering_len = collate->nweight;
+
+ /* First we write an array with the number of values for each
+ weight. */
+ obstack_grow (&collate->element_mem, collate->weight_cnt,
+ collate->nrules * sizeof (unsigned int));
+
+ /* Now the weights itselves. */
+ obstack_grow (&collate->element_mem, collate->weight,
+ collate->nweight * sizeof (unsigned int));
+
+ /* Get result. */
+ pelem->ordering = obstack_finish (&collate->element_mem);
+
+ /* Now we handle the "patches". */
+ while (collate->current_patch != NULL)
+ {
+ patch_t *this_patch;
+
+ this_patch = collate->current_patch;
+
+ this_patch->where.pos = &pelem->ordering[collate->nrules
+ + this_patch->where.idx];
+
+ collate->current_patch = this_patch->next;
+ this_patch->next = collate->all_patches;
+ collate->all_patches = this_patch;
+ }
+
+ /* Set information for next round. */
+ collate->was_ellipsis = 0;
+ if (collate->kind != undefined)
+ collate->last_char = pelem->name[0];
+}
diff --git a/locale/programs/ld-ctype.c b/locale/programs/ld-ctype.c
new file mode 100644
index 0000000000..c1cc8e53b8
--- /dev/null
+++ b/locale/programs/ld-ctype.c
@@ -0,0 +1,1310 @@
+/* Copyright (C) 1995, 1996 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB. If
+not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <endian.h>
+#include <limits.h>
+#include <string.h>
+
+#include "locales.h"
+#include "localeinfo.h"
+#include "langinfo.h"
+#include "locfile-token.h"
+#include "stringtrans.h"
+
+/* Uncomment the following line in the production version. */
+/* define NDEBUG 1 */
+#include <assert.h>
+
+
+void *xmalloc (size_t __n);
+void *xcalloc (size_t __n, size_t __s);
+void *xrealloc (void *__ptr, size_t __n);
+
+
+/* The bit used for representing a special class. */
+#define BITPOS(class) ((class) - tok_upper)
+#define BIT(class) (1 << BITPOS (class))
+
+#define ELEM(ctype, collection, idx, value) \
+ *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
+ &ctype->collection##_act idx, value)
+
+#define SWAPU32(w) \
+ (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24))
+
+#define SWAPU16(w) \
+ ((((w) >> 8) & 0xff) | (((w) & 0xff) << 8))
+
+
+/* To be compatible with former implementations we for now restrict
+ the number of bits for character classes to 16. When compatibility
+ is not necessary anymore increase the number to 32. */
+#define char_class_t u16_t
+#define CHAR_CLASS_TRANS SWAPU16
+#define char_class32_t u32_t
+#define CHAR_CLASS32_TRANS SWAPU32
+
+
+/* The real definition of the struct for the LC_CTYPE locale. */
+struct locale_ctype_t
+{
+ unsigned int *charnames;
+ size_t charnames_max;
+ size_t charnames_act;
+
+ /* We will allow up to 8 * sizeof(u32_t) - 1 character classes. */
+#define MAX_NR_CHARCLASS (8 * sizeof (u32_t) - 1)
+ int nr_charclass;
+ const char *classnames[MAX_NR_CHARCLASS];
+ unsigned long int current_class_mask;
+ unsigned int last_class_char;
+ u32_t *class_collection;
+ size_t class_collection_max;
+ size_t class_collection_act;
+ unsigned long int class_done;
+
+ /* If the following number ever turns out to be too small simply
+ increase it. But I doubt it will. --drepper@gnu */
+#define MAX_NR_CHARMAP 16
+ const char *mapnames[MAX_NR_CHARMAP];
+ u32_t *map_collection[MAX_NR_CHARMAP];
+ unsigned int map_collection_max[MAX_NR_CHARMAP];
+ unsigned int map_collection_act[MAX_NR_CHARMAP];
+ size_t map_collection_nr;
+ size_t last_map_idx;
+ unsigned int from_map_char;
+ int toupper_done;
+ int tolower_done;
+
+ /* The arrays for the binary representation. */
+ u32_t plane_size;
+ u32_t plane_cnt;
+ char_class_t *ctype_b;
+ char_class32_t *ctype32_b;
+ u32_t *names_el;
+ u32_t *names_eb;
+ u32_t **map_eb;
+ u32_t **map_el;
+ u32_t *class_name_ptr;
+ u32_t *map_name_ptr;
+};
+
+
+/* Prototypes for local functions. */
+static void ctype_class_newP (struct linereader *lr,
+ struct locale_ctype_t *ctype, const char *name);
+static void ctype_map_newP (struct linereader *lr,
+ struct locale_ctype_t *ctype,
+ const char *name, struct charset_t *charset);
+static u32_t *find_idx (struct locale_ctype_t *ctype, u32_t **table,
+ size_t *max, size_t *act, unsigned int idx);
+static void set_class_defaults (struct locale_ctype_t *ctype,
+ struct charset_t *charset);
+static void allocate_arrays (struct locale_ctype_t *ctype);
+
+
+void
+ctype_startup (struct linereader *lr, struct localedef_t *locale,
+ struct charset_t *charset)
+{
+ unsigned int cnt;
+ struct locale_ctype_t *ctype;
+
+ /* It is important that we always use UCS1 encoding for strings now. */
+ encoding_method = ENC_UCS1;
+
+ /* Allocate the needed room. */
+ locale->categories[LC_CTYPE].ctype = ctype =
+ (struct locale_ctype_t *) xmalloc (sizeof (struct locale_ctype_t));
+
+ /* We have no names seen yet. */
+ ctype->charnames_max = charset->mb_cur_max == 1 ? 256 : 512;
+ ctype->charnames =
+ (unsigned int *) xmalloc (ctype->charnames_max * sizeof (unsigned int));
+ for (cnt = 0; cnt < 256; ++cnt)
+ ctype->charnames[cnt] = cnt;
+ ctype->charnames_act = 256;
+
+ /* Fill character class information. */
+ ctype->nr_charclass = 0;
+ ctype->current_class_mask = 0;
+ ctype->last_class_char = ILLEGAL_CHAR_VALUE;
+ /* The order of the following instructions determines the bit
+ positions! */
+ ctype_class_newP (lr, ctype, "upper");
+ ctype_class_newP (lr, ctype, "lower");
+ ctype_class_newP (lr, ctype, "alpha");
+ ctype_class_newP (lr, ctype, "digit");
+ ctype_class_newP (lr, ctype, "xdigit");
+ ctype_class_newP (lr, ctype, "space");
+ ctype_class_newP (lr, ctype, "print");
+ ctype_class_newP (lr, ctype, "graph");
+ ctype_class_newP (lr, ctype, "blank");
+ ctype_class_newP (lr, ctype, "cntrl");
+ ctype_class_newP (lr, ctype, "punct");
+ ctype_class_newP (lr, ctype, "alnum");
+
+ ctype->class_collection_max = charset->mb_cur_max == 1 ? 256 : 512;
+ ctype->class_collection = (u32_t *) xmalloc (sizeof (unsigned long int)
+ * ctype->class_collection_max);
+ memset (ctype->class_collection, '\0',
+ sizeof (unsigned long int) * ctype->class_collection_max);
+ ctype->class_collection_act = 256;
+
+ /* Fill character map information. */
+ ctype->map_collection_nr = 0;
+ ctype->last_map_idx = MAX_NR_CHARMAP;
+ ctype->from_map_char = ILLEGAL_CHAR_VALUE;
+ ctype_map_newP (lr, ctype, "toupper", charset);
+ ctype_map_newP (lr, ctype, "tolower", charset);
+
+ /* Fill first 256 entries in `toupper' and `tolower' arrays. */
+ for (cnt = 0; cnt < 256; ++cnt)
+ {
+ ctype->map_collection[0][cnt] = cnt;
+ ctype->map_collection[1][cnt] = cnt;
+ }
+}
+
+
+void
+ctype_finish (struct localedef_t *locale, struct charset_t *charset)
+{
+ /* See POSIX.2, table 2-6 for the meaning of the following table. */
+#define NCLASS 12
+ static const struct
+ {
+ const char *name;
+ const char allow[NCLASS];
+ }
+ valid_table[NCLASS] =
+ {
+ /* The order is important. See token.h for more information.
+ M = Always, D = Default, - = Permitted, X = Mutually exclusive */
+ { "upper", "--MX-XDDXXX-" },
+ { "lower", "--MX-XDDXXX-" },
+ { "alpha", "---X-XDDXXX-" },
+ { "digit", "XXX--XDDXXX-" },
+ { "xdigit", "-----XDDXXX-" },
+ { "space", "XXXXX------X" },
+ { "print", "---------X--" },
+ { "graph", "---------X--" },
+ { "blank", "XXXXXM-----X" },
+ { "cntrl", "XXXXX-XX--XX" },
+ { "punct", "XXXXX-DD-X-X" },
+ { "alnum", "-----XDDXXX-" }
+ };
+ size_t cnt;
+ int cls1, cls2;
+ unsigned int space_value;
+ struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
+
+ /* Set default value for classes not specified. */
+ set_class_defaults (ctype, charset);
+
+ /* Check according to table. */
+ for (cnt = 0; cnt < ctype->class_collection_max; ++cnt)
+ {
+ unsigned long int tmp;
+
+ tmp = ctype->class_collection[cnt];
+ if (tmp == 0)
+ continue;
+
+ for (cls1 = 0; cls1 < NCLASS; ++cls1)
+ if ((tmp & (1 << cls1)) != 0)
+ for (cls2 = 0; cls2 < NCLASS; ++cls2)
+ if (valid_table[cls1].allow[cls2] != '-')
+ {
+ int eq = (tmp & (1 << cls2)) != 0;
+ switch (valid_table[cls1].allow[cls2])
+ {
+ case 'M':
+ if (!eq)
+ {
+ char buf[17];
+ char *cp = buf;
+ unsigned int value;
+
+ value = ctype->charnames[cnt];
+
+ if ((value & 0xff000000) != 0)
+ cp += sprintf (cp, "\\%o", (value >> 24) & 0xff);
+ if ((value & 0xffff0000) != 0)
+ cp += sprintf (cp, "\\%o", (value >> 16) & 0xff);
+ if ((value & 0xffffff00) != 0)
+ cp += sprintf (cp, "\\%o", (value >> 8) & 0xff);
+ sprintf (cp, "\\%o", value & 0xff);
+
+ error (0, 0, _("\
+character %s'%s' in class `%s' must be in class `%s'"), value > 256 ? "L" : "",
+ cp, valid_table[cls1].name,
+ valid_table[cls2].name);
+ }
+ break;
+
+ case 'X':
+ if (eq)
+ {
+ char buf[17];
+ char *cp = buf;
+ unsigned int value;
+
+ value = ctype->charnames[cnt];
+
+ if ((value & 0xff000000) != 0)
+ cp += sprintf (cp, "\\%o", value >> 24);
+ if ((value & 0xffff0000) != 0)
+ cp += sprintf (cp, "\\%o", (value >> 16) & 0xff);
+ if ((value & 0xffffff00) != 0)
+ cp += sprintf (cp, "\\%o", (value >> 8) & 0xff);
+ sprintf (cp, "\\%o", value & 0xff);
+
+ error (0, 0, _("\
+character %s'%s' in class `%s' must not be in class `%s'"),
+ value > 256 ? "L" : "", cp,
+ valid_table[cls1].name, valid_table[cls2].name);
+ }
+ break;
+
+ case 'D':
+ ctype->class_collection[cnt] |= 1 << cls2;
+ break;
+
+ default:
+ error (5, 0, _("internal error in %s, line %u"),
+ __FUNCTION__, __LINE__);
+ }
+ }
+ }
+
+ /* ... and now test <SP> as a special case. */
+ space_value = charset_find_value (charset, "SP", 2);
+ if (space_value == ILLEGAL_CHAR_VALUE)
+ error (0, 0, _("character <SP> not defined in character map"));
+ else if ((cnt = BITPOS (tok_space),
+ (ELEM (ctype, class_collection, , space_value)
+ & BIT (tok_space)) == 0)
+ || (cnt = BITPOS (tok_blank),
+ (ELEM (ctype, class_collection, , space_value)
+ & BIT (tok_blank)) == 0))
+ error (0, 0, _("<SP> character not in class `%s'"),
+ valid_table[cnt].name);
+ else if ((cnt = BITPOS (tok_punct),
+ (ELEM (ctype, class_collection, , space_value)
+ & BIT (tok_punct)) != 0)
+ || (cnt = BITPOS (tok_graph),
+ (ELEM (ctype, class_collection, , space_value)
+ & BIT (tok_graph))
+ != 0))
+ error (0, 0, _("<SP> character must not be in class `%s'"),
+ valid_table[cnt].name);
+ else
+ ELEM (ctype, class_collection, , space_value) |= BIT (tok_print);
+}
+
+
+void
+ctype_output (struct localedef_t *locale, const char *output_path)
+{
+ struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
+ const size_t nelems = (_NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)
+ + 2 * (ctype->map_collection_nr - 2));
+ struct iovec iov[2 + nelems + (ctype->nr_charclass + 1)
+ + (ctype->map_collection_nr + 1)];
+ struct locale_file data;
+ u32_t idx[nelems];
+ size_t elem, cnt, offset;
+
+
+ if ((locale->binary & (1 << LC_CTYPE)) != 0)
+ {
+ iov[0].iov_base = ctype;
+ iov[0].iov_len = locale->len[LC_CTYPE];
+
+ write_locale_data (output_path, "LC_CTYPE", 1, iov);
+
+ return;
+ }
+
+
+ /* Now prepare the output: Find the sizes of the table we can use. */
+ allocate_arrays (ctype);
+
+ data.magic = LIMAGIC (LC_CTYPE);
+ data.n = nelems;
+ iov[0].iov_base = (void *) &data;
+ iov[0].iov_len = sizeof (data);
+
+ iov[1].iov_base = (void *) idx;
+ iov[1].iov_len = sizeof (idx);
+
+ idx[0] = iov[0].iov_len + iov[1].iov_len;
+ offset = 0;
+
+ for (elem = 0; elem < nelems; ++elem)
+ {
+ if (elem < _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE))
+ switch (elem)
+ {
+#define CTYPE_DATA(name, base, len) \
+ case _NL_ITEM_INDEX (name): \
+ iov[2 + elem].iov_base = base; \
+ iov[2 + elem].iov_len = len; \
+ break
+
+ CTYPE_DATA (_NL_CTYPE_CLASS,
+ ctype->ctype_b,
+ (256 + 128) * sizeof (char_class_t));
+
+ CTYPE_DATA (_NL_CTYPE_TOUPPER_EB,
+ ctype->map_eb[0],
+ (ctype->plane_size * ctype->plane_cnt + 128)
+ * sizeof (u32_t));
+ CTYPE_DATA (_NL_CTYPE_TOLOWER_EB,
+ ctype->map_eb[1],
+ (ctype->plane_size * ctype->plane_cnt + 128)
+ * sizeof (u32_t));
+
+ CTYPE_DATA (_NL_CTYPE_TOUPPER_EL,
+ ctype->map_el[0],
+ (ctype->plane_size * ctype->plane_cnt + 128)
+ * sizeof (u32_t));
+ CTYPE_DATA (_NL_CTYPE_TOLOWER_EL,
+ ctype->map_el[1],
+ (ctype->plane_size * ctype->plane_cnt + 128)
+ * sizeof (u32_t));
+
+ CTYPE_DATA (_NL_CTYPE_CLASS32,
+ ctype->ctype32_b,
+ (ctype->plane_size * ctype->plane_cnt
+ * sizeof (char_class32_t)));
+
+ CTYPE_DATA (_NL_CTYPE_NAMES_EB,
+ ctype->names_eb,
+ ctype->plane_size * ctype->plane_cnt * sizeof (u32_t));
+ CTYPE_DATA (_NL_CTYPE_NAMES_EL,
+ ctype->names_el,
+ ctype->plane_size * ctype->plane_cnt * sizeof (u32_t));
+
+ CTYPE_DATA (_NL_CTYPE_HASH_SIZE,
+ &ctype->plane_size, sizeof (u32_t));
+ CTYPE_DATA (_NL_CTYPE_HASH_LAYERS,
+ &ctype->plane_cnt, sizeof (u32_t));
+
+ CTYPE_DATA (_NL_CTYPE_CLASS_NAMES,
+ ctype->class_name_ptr,
+ ctype->nr_charclass * sizeof (u32_t));
+ CTYPE_DATA (_NL_CTYPE_MAP_NAMES,
+ ctype->map_name_ptr,
+ ctype->map_collection_nr * sizeof (u32_t));
+
+ CTYPE_DATA (_NL_CTYPE_WIDTH,
+ NULL, 0); /* Not yet implemented. */
+
+ default:
+ assert (! "unknown CTYPE element");
+ }
+ else
+ {
+ /* Handle extra maps. */
+ size_t nr = (elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) >> 1;
+
+ if (((elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) & 1) == 0)
+ iov[2 + elem].iov_base = ctype->map_eb[nr];
+ else
+ iov[2 + elem].iov_base = ctype->map_el[nr];
+
+ iov[2 + elem].iov_len = ((ctype->plane_size * ctype->plane_cnt + 128)
+ * sizeof (u32_t));
+ }
+
+ if (elem + 1 < nelems)
+ idx[elem + 1] = idx[elem] + iov[2 + elem].iov_len;
+ }
+
+ offset = idx[elem - 1] + iov[2 + elem - 1].iov_len;
+
+ /* The class name array. */
+ for (cnt = 0; cnt < ctype->nr_charclass; ++cnt, ++elem)
+ {
+ iov[2 + elem].iov_base = (void *) ctype->classnames[cnt];
+ iov[2 + elem].iov_len = strlen (ctype->classnames[cnt]) + 1;
+
+ ctype->class_name_ptr[cnt] = offset;
+ offset += iov[2 + elem].iov_len;
+ }
+ iov[2 + elem].iov_base = (void *) "";
+ iov[2 + elem].iov_len = 1;
+ ++elem;
+
+ /* The map name array. */
+ for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt, ++elem)
+ {
+ iov[2 + elem].iov_base = (void *) ctype->mapnames[cnt];
+ iov[2 + elem].iov_len = strlen (ctype->mapnames[cnt]) + 1;
+
+ ctype->map_name_ptr[cnt] = offset;
+ offset += iov[2 + elem].iov_len;
+ }
+ iov[2 + elem].iov_base = (void *) "";
+ iov[2 + elem].iov_len = 1;
+ ++elem;
+
+ assert (elem == nelems + ctype->nr_charclass + ctype->map_collection_nr + 2);
+
+ write_locale_data (output_path, "LC_CTYPE", 2 + elem, iov);
+}
+
+
+/* Character class handling. */
+void
+ctype_class_new (struct linereader *lr, struct localedef_t *locale,
+ enum token_t tok, struct token *code,
+ struct charset_t *charset)
+{
+ ctype_class_newP (lr, locale->categories[LC_CTYPE].ctype,
+ code->val.str.start);
+}
+
+
+int
+ctype_is_charclass (struct linereader *lr, struct localedef_t *locale,
+ const char *name)
+{
+ int cnt;
+
+ for (cnt = 0; cnt < locale->categories[LC_CTYPE].ctype->nr_charclass; ++cnt)
+ if (strcmp (name, locale->categories[LC_CTYPE].ctype->classnames[cnt])
+ == 0)
+ return 1;
+
+ return 0;
+}
+
+
+void
+ctype_class_start (struct linereader *lr, struct localedef_t *locale,
+ enum token_t tok, const char *str,
+ struct charset_t *charset)
+{
+ struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
+ int cnt;
+
+ switch (tok)
+ {
+ case tok_upper:
+ str = "upper";
+ break;
+ case tok_lower:
+ str = "lower";
+ break;
+ case tok_alpha:
+ str = "alpha";
+ break;
+ case tok_digit:
+ str = "digit";
+ break;
+ case tok_xdigit:
+ str = "xdigit";
+ break;
+ case tok_space:
+ str = "space";
+ break;
+ case tok_print:
+ str = "print";
+ break;
+ case tok_graph:
+ str = "graph";
+ break;
+ case tok_blank:
+ str = "blank";
+ break;
+ case tok_cntrl:
+ str = "cntrl";
+ break;
+ case tok_punct:
+ str = "punct";
+ break;
+ case tok_alnum:
+ str = "alnum";
+ break;
+ case tok_ident:
+ break;
+ default:
+ assert (! "illegal token as class name: should not happen");
+ }
+
+ for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
+ if (strcmp (str, ctype->classnames[cnt]) == 0)
+ break;
+
+ if (cnt >= ctype->nr_charclass)
+ assert (! "unknown class in class definition: should not happen");
+
+ ctype->class_done |= BIT (tok);
+
+ ctype->current_class_mask = 1 << cnt;
+ ctype->last_class_char = ILLEGAL_CHAR_VALUE;
+}
+
+
+void
+ctype_class_from (struct linereader *lr, struct localedef_t *locale,
+ struct token *code, struct charset_t *charset)
+{
+ struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
+ unsigned int value;
+
+ value = charset_find_value (charset, code->val.str.start, code->val.str.len);
+
+ ctype->last_class_char = value;
+
+ if (value == ILLEGAL_CHAR_VALUE)
+ /* In the LC_CTYPE category it is no error when a character is
+ not found. This has to be ignored silently. */
+ return;
+
+ *find_idx (ctype, &ctype->class_collection, &ctype->class_collection_max,
+ &ctype->class_collection_act, value)
+ |= ctype->current_class_mask;
+}
+
+
+void
+ctype_class_to (struct linereader *lr, struct localedef_t *locale,
+ struct token *code, struct charset_t *charset)
+{
+ struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
+ unsigned int value, cnt;
+
+ value = charset_find_value (charset, code->val.str.start, code->val.str.len);
+
+ assert (value >= ctype->last_class_char);
+
+ for (cnt = ctype->last_class_char + 1; cnt <= value; ++cnt)
+ *find_idx (ctype, &ctype->class_collection, &ctype->class_collection_max,
+ &ctype->class_collection_act, cnt)
+ |= ctype->current_class_mask;
+
+ ctype->last_class_char = ILLEGAL_CHAR_VALUE;
+}
+
+
+void
+ctype_class_end (struct linereader *lr, struct localedef_t *locale)
+{
+ struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
+
+ /* We have no special actions to perform here. */
+ ctype->current_class_mask = 0;
+ ctype->last_class_char = ILLEGAL_CHAR_VALUE;
+}
+
+
+/* Character map handling. */
+void
+ctype_map_new (struct linereader *lr, struct localedef_t *locale,
+ enum token_t tok, struct token *code,
+ struct charset_t *charset)
+{
+ ctype_map_newP (lr, locale->categories[LC_CTYPE].ctype,
+ code->val.str.start, charset);
+}
+
+
+int
+ctype_is_charmap (struct linereader *lr, struct localedef_t *locale,
+ const char *name)
+{
+ struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
+ size_t cnt;
+
+ for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
+ if (strcmp (name, ctype->mapnames[cnt]) == 0)
+ return 1;
+
+ return 0;
+}
+
+
+void
+ctype_map_start (struct linereader *lr, struct localedef_t *locale,
+ enum token_t tok, const char *name, struct charset_t *charset)
+{
+ struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
+ size_t cnt;
+
+ switch (tok)
+ {
+ case tok_toupper:
+ ctype->toupper_done = 1;
+ name = "toupper";
+ break;
+ case tok_tolower:
+ ctype->tolower_done = 1;
+ name = "tolower";
+ break;
+ case tok_ident:
+ break;
+ default:
+ assert (! "unknown token in category `LC_CTYPE' should not happen");
+ }
+
+ for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
+ if (strcmp (name, ctype->mapnames[cnt]) == 0)
+ break;
+
+ if (cnt == ctype->map_collection_nr)
+ assert (! "unknown token in category `LC_CTYPE' should not happen");
+
+ ctype->last_map_idx = cnt;
+ ctype->from_map_char = ILLEGAL_CHAR_VALUE;
+}
+
+
+void
+ctype_map_from (struct linereader *lr, struct localedef_t *locale,
+ struct token *code, struct charset_t *charset)
+{
+ struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
+ unsigned int value;
+
+ value = charset_find_value (charset, code->val.str.start, code->val.str.len);
+
+ if (value == ILLEGAL_CHAR_VALUE)
+ /* In the LC_CTYPE category it is no error when a character is
+ not found. This has to be ignored silently. */
+ return;
+
+ assert (ctype->last_map_idx < ctype->map_collection_nr);
+
+ ctype->from_map_char = value;
+}
+
+
+void
+ctype_map_to (struct linereader *lr, struct localedef_t *locale,
+ struct token *code, struct charset_t *charset)
+{
+ struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
+ unsigned int value;
+
+ value = charset_find_value (charset, code->val.str.start, code->val.str.len);
+
+ if (ctype->from_map_char == ILLEGAL_CHAR_VALUE
+ || value == ILLEGAL_CHAR_VALUE)
+ {
+ /* In the LC_CTYPE category it is no error when a character is
+ not found. This has to be ignored silently. */
+ ctype->from_map_char = ILLEGAL_CHAR_VALUE;
+ return;
+ }
+
+ *find_idx (ctype, &ctype->map_collection[ctype->last_map_idx],
+ &ctype->map_collection_max[ctype->last_map_idx],
+ &ctype->map_collection_act[ctype->last_map_idx],
+ ctype->from_map_char) = value;
+
+ ctype->from_map_char = ILLEGAL_CHAR_VALUE;
+}
+
+
+void
+ctype_map_end (struct linereader *lr, struct localedef_t *locale)
+{
+ struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
+
+ ctype->last_map_idx = MAX_NR_CHARMAP;
+ ctype->from_map_char = ILLEGAL_CHAR_VALUE;
+}
+
+
+/* Local functions. */
+static void
+ctype_class_newP (struct linereader *lr, struct locale_ctype_t *ctype,
+ const char *name)
+{
+ int cnt;
+
+ for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
+ if (strcmp (ctype->classnames[cnt], name) == 0)
+ break;
+
+ if (cnt < ctype->nr_charclass)
+ {
+ lr_error (lr, _("character class `%s' already defined"));
+ return;
+ }
+
+ if (ctype->nr_charclass == MAX_NR_CHARCLASS)
+ /* Exit code 2 is prescribed in P1003.2b. */
+ error (2, 0, _("\
+implementation limit: no more than %d character classes allowed"),
+ MAX_NR_CHARCLASS);
+
+ ctype->classnames[ctype->nr_charclass++] = name;
+}
+
+
+static void
+ctype_map_newP (struct linereader *lr, struct locale_ctype_t *ctype,
+ const char *name, struct charset_t *charset)
+{
+ size_t max_chars = 0;
+ int cnt;
+
+ for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
+ {
+ if (strcmp (ctype->mapnames[cnt], name) == 0)
+ break;
+
+ if (max_chars < ctype->map_collection_max[cnt])
+ max_chars = ctype->map_collection_max[cnt];
+ }
+
+ if (cnt < ctype->map_collection_nr)
+ {
+ lr_error (lr, _("character map `%s' already defined"));
+ return;
+ }
+
+ if (ctype->map_collection_nr == MAX_NR_CHARMAP)
+ /* Exit code 2 is prescribed in P1003.2b. */
+ error (2, 0, _("\
+implementation limit: no more than %d character maps allowed"),
+ MAX_NR_CHARMAP);
+
+ ctype->mapnames[cnt] = name;
+
+ if (max_chars == 0)
+ ctype->map_collection_max[cnt] = charset->mb_cur_max == 1 ? 256
+ : 512;
+ else
+ ctype->map_collection_max[cnt] = max_chars;
+
+ ctype->map_collection[cnt] =
+ (u32_t *) xmalloc (sizeof (u32_t) * ctype->map_collection_max[cnt]);
+ memset (ctype->map_collection[cnt], '\0',
+ sizeof (u32_t) * ctype->map_collection_max[cnt]);
+ ctype->map_collection_act[cnt] = 256;
+
+ ++ctype->map_collection_nr;
+}
+
+
+static u32_t *
+find_idx (struct locale_ctype_t *ctype, u32_t **table, size_t *max,
+ size_t *act, unsigned int idx)
+{
+ size_t cnt;
+
+ if (idx < 256)
+ return &(*table)[idx];
+
+ for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
+ if (ctype->charnames[cnt] == idx)
+ break;
+
+ /* We have to distinguish two cases: the names is found or not. */
+ if (cnt == ctype->charnames_act)
+ {
+ /* Extend the name array. */
+ if (ctype->charnames_act == ctype->charnames_max)
+ {
+ ctype->charnames_max *= 2;
+ ctype->charnames = (unsigned int *)
+ xrealloc (ctype->charnames,
+ sizeof (unsigned int) * ctype->charnames_max);
+ }
+ ctype->charnames[ctype->charnames_act++] = idx;
+ }
+
+ if (cnt >= *act)
+ {
+ if (cnt >= *max)
+ {
+ size_t old_max = *max;
+ do
+ *max *= 2;
+ while (*max <= cnt);
+
+ *table =
+ (u32_t *) xrealloc (*table, *max * sizeof (unsigned long int));
+ memset (&(*table)[old_max], '\0', (*max - old_max) * sizeof (u32_t));
+ }
+
+ (*table)[cnt] = 0;
+ *act = cnt;
+ }
+
+ return &(*table)[cnt];
+}
+
+
+static void
+set_class_defaults (struct locale_ctype_t *ctype, struct charset_t *charset)
+{
+ /* These function defines the default values for the classes and conversions
+ according to POSIX.2 2.5.2.1.
+ It may seem that the order of these if-blocks is arbitrary but it is NOT.
+ Don't move them unless you know what you do! */
+
+ void set_default (int bit, int from, int to)
+ {
+ char tmp[2];
+ int ch;
+ /* Define string. */
+ strcpy (tmp, "?");
+
+ for (ch = from; ch <= to; ++ch)
+ {
+ unsigned int value;
+ tmp[0] = ch;
+
+ value = charset_find_value (charset, tmp, 1);
+ if (value == ILLEGAL_CHAR_VALUE)
+ {
+ error (0, 0, _("\
+character `%s' not defined while needed as default value"),
+ tmp);
+ continue;
+ }
+ else
+ ELEM (ctype, class_collection, , value) |= bit;
+ }
+ }
+
+ /* Set default values if keyword was not present. */
+ if ((ctype->class_done & BIT (tok_upper)) == 0)
+ /* "If this keyword [lower] is not specified, the lowercase letters
+ `A' through `Z', ..., shall automatically belong to this class,
+ with implementation defined character values." [P1003.2, 2.5.2.1] */
+ set_default (BIT (tok_upper), 'A', 'Z');
+
+ if ((ctype->class_done & BIT (tok_lower)) == 0)
+ /* "If this keyword [lower] is not specified, the lowercase letters
+ `a' through `z', ..., shall automatically belong to this class,
+ with implementation defined character values." [P1003.2, 2.5.2.1] */
+ set_default (BIT (tok_lower), 'a', 'z');
+
+ if ((ctype->class_done & BIT (tok_alpha)) == 0)
+ {
+ /* Table 2-6 in P1003.2 says that characters in class `upper' or
+ class `lower' *must* be in class `alpha'. */
+ unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
+ size_t cnt;
+
+ for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
+ if ((ctype->class_collection[cnt] & mask) != 0)
+ ctype->class_collection[cnt] |= BIT (tok_alpha);
+ }
+
+ if ((ctype->class_done & BIT (tok_digit)) == 0)
+ /* "If this keyword [digit] is not specified, the digits `0' through
+ `9', ..., shall automatically belong to this class, with
+ implementation-defined character values." [P1003.2, 2.5.2.1] */
+ set_default (BIT (tok_digit), '0', '9');
+
+ /* "Only characters specified for the `alpha' and `digit' keyword
+ shall be specified. Characters specified for the keyword `alpha'
+ and `digit' are automatically included in this class. */
+ {
+ unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
+ size_t cnt;
+
+ for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
+ if ((ctype->class_collection[cnt] & mask) != 0)
+ ctype->class_collection[cnt] |= BIT (tok_alnum);
+ }
+
+ if ((ctype->class_done & BIT (tok_space)) == 0)
+ /* "If this keyword [space] is not specified, the characters <space>,
+ <form-feed>, <newline>, <carriage-return>, <tab>, and
+ <vertical-tab>, ..., shall automatically belong to this class,
+ with implementation-defined character values." [P1003.2, 2.5.2.1] */
+ {
+ unsigned int value;
+
+ value = charset_find_value (charset, "space", 5);
+ if (value == ILLEGAL_CHAR_VALUE)
+ error (0, 0, _("\
+character `%s' not defined while needed as default value"),
+ "<space>");
+ else
+ ELEM (ctype, class_collection, , value) |= BIT (tok_space);
+
+ value = charset_find_value (charset, "form-feed", 9);
+ if (value == ILLEGAL_CHAR_VALUE)
+ error (0, 0, _("\
+character `%s' not defined while needed as default value"),
+ "<form-feed>");
+ else
+ ELEM (ctype, class_collection, , value) |= BIT (tok_space);
+
+ value = charset_find_value (charset, "newline", 7);
+ if (value == ILLEGAL_CHAR_VALUE)
+ error (0, 0, _("\
+character `%s' not defined while needed as default value"),
+ "<newline>");
+ else
+ ELEM (ctype, class_collection, , value) |= BIT (tok_space);
+
+ value = charset_find_value (charset, "carriage-return", 15);
+ if (value == ILLEGAL_CHAR_VALUE)
+ error (0, 0, _("\
+character `%s' not defined while needed as default value"),
+ "<carriage-return>");
+ else
+ ELEM (ctype, class_collection, , value) |= BIT (tok_space);
+
+ value = charset_find_value (charset, "tab", 3);
+ if (value == ILLEGAL_CHAR_VALUE)
+ error (0, 0, _("\
+character `%s' not defined while needed as default value"),
+ "<tab>");
+ else
+ ELEM (ctype, class_collection, , value) |= BIT (tok_space);
+
+ value = charset_find_value (charset, "vertical-tab", 12);
+ if (value == ILLEGAL_CHAR_VALUE)
+ error (0, 0, _("\
+character `%s' not defined while needed as default value"),
+ "<vertical-tab>");
+ else
+ ELEM (ctype, class_collection, , value) |= BIT (tok_space);
+ }
+
+ if ((ctype->class_done & BIT (tok_xdigit)) == 0)
+ /* "If this keyword is not specified, the digits `0' to `9', the
+ uppercase letters `A' through `F', and the lowercase letters `a'
+ through `f', ..., shell automatically belong to this class, with
+ implementation defined character values." [P1003.2, 2.5.2.1] */
+ {
+ set_default (BIT (tok_xdigit), '0', '9');
+ set_default (BIT (tok_xdigit), 'A', 'F');
+ set_default (BIT (tok_xdigit), 'a', 'f');
+ }
+
+ if ((ctype->class_done & BIT (tok_blank)) == 0)
+ /* "If this keyword [blank] is unspecified, the characters <space> and
+ <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */
+ {
+ unsigned int value;
+
+ value = charset_find_value (charset, "space", 5);
+ if (value == ILLEGAL_CHAR_VALUE)
+ error (0, 0, _("\
+character `%s' not defined while needed as default value"),
+ "<space>");
+ else
+ ELEM (ctype, class_collection, , value) |= BIT (tok_blank);
+
+ value = charset_find_value (charset, "tab", 3);
+ if (value == ILLEGAL_CHAR_VALUE)
+ error (0, 0, _("\
+character `%s' not defined while needed as default value"),
+ "<tab>");
+ else
+ ELEM (ctype, class_collection, , value) |= BIT (tok_blank);
+ }
+
+ if ((ctype->class_done & BIT (tok_graph)) == 0)
+ /* "If this keyword [graph] is not specified, characters specified for
+ the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
+ shall belong to this character class." [P1003.2, 2.5.2.1] */
+ {
+ unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
+ BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
+ size_t cnt;
+
+ for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
+ if ((ctype->class_collection[cnt] & mask) != 0)
+ ctype->class_collection[cnt] |= BIT (tok_graph);
+ }
+
+ if ((ctype->class_done & BIT (tok_print)) == 0)
+ /* "If this keyword [print] is not provided, characters specified for
+ the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
+ and the <space> character shall belong to this character class."
+ [P1003.2, 2.5.2.1] */
+ {
+ unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
+ BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
+ size_t cnt;
+ int space;
+
+ for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
+ if ((ctype->class_collection[cnt] & mask) != 0)
+ ctype->class_collection[cnt] |= BIT (tok_print);
+
+ space = charset_find_value (charset, "space", 5);
+ if (space == ILLEGAL_CHAR_VALUE)
+ error (0, 0, _("\
+character `%s' not defined while needed as default value"),
+ "<space>");
+ else
+ ELEM (ctype, class_collection, , space) |= BIT (tok_print);
+ }
+
+ if (ctype->toupper_done == 0)
+ /* "If this keyword [toupper] is not spcified, the lowercase letters
+ `a' through `z', and their corresponding uppercase letters `A' to
+ `Z', ..., shall automatically be included, with implementation-
+ defined character values." [P1003.2, 2.5.2.1] */
+ {
+ char tmp[4];
+ int ch;
+
+ strcpy (tmp, "<?>");
+
+ for (ch = 'a'; ch <= 'z'; ++ch)
+ {
+ unsigned int value_from, value_to;
+
+ tmp[1] = (char) ch;
+
+ value_from = charset_find_value (charset, &tmp[1], 1);
+ if (value_from == ILLEGAL_CHAR_VALUE)
+ {
+ error (0, 0, _("\
+character `%c' not defined while needed as default value"),
+ tmp);
+ continue;
+ }
+
+ /* This conversion is implementation defined. */
+ tmp[1] = (char) (ch + ('A' - 'a'));
+ value_to = charset_find_value (charset, &tmp[1], 1);
+ if (value_to == -1)
+ {
+ error (0, 0, _("\
+character `%s' not defined while needed as default value"),
+ tmp);
+ continue;
+ }
+
+ /* The index [0] is determined by the order of the
+ `ctype_map_newP' calls in `ctype_startup'. */
+ ELEM (ctype, map_collection, [0], value_from) = value_to;
+ }
+ }
+
+ if (ctype->tolower_done == 0)
+ /* "If this keyword [tolower] is not specified, the mapping shall be
+ the reverse mapping of the one specified to `toupper'." [P1003.2] */
+ {
+ size_t cnt;
+
+ for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
+ if (ctype->map_collection[0][cnt] != 0)
+ ELEM (ctype, map_collection, [1],
+ ctype->map_collection[0][cnt])
+ = ctype->charnames[cnt];
+ }
+}
+
+
+static void
+allocate_arrays (struct locale_ctype_t *ctype)
+{
+ size_t idx;
+
+ /* First we have to decide how we organize the arrays. It is easy for
+ a one-byte character set. But multi-byte character set cannot be
+ stored flat because they might be sparsly used. So we determine an
+ optimal hashing function for the used characters.
+
+ We use a very trivial hashing function to store the sparse table.
+ CH % TABSIZE is used as an index. To solve multiple hits we have
+ N planes. This gurantees a fixed search time for a character [N
+ / 2]. In the following code we determine the minmum value for
+ TABSIZE * N, where TABSIZE >= 256. */
+ size_t min_total = UINT_MAX;
+ size_t act_size = 256;
+
+ fputs (_("\
+Computing table size for character classes might take a while..."),
+ stderr);
+
+ while (act_size < min_total)
+ {
+ size_t cnt[act_size];
+ size_t act_planes = 1;
+
+ memset (cnt, '\0', sizeof cnt);
+
+ for (idx = 0; idx < 256; ++idx)
+ cnt[idx] = 1;
+
+ for (idx = 0; idx < ctype->charnames_act; ++idx)
+ if (ctype->charnames[idx] >= 256)
+ {
+ size_t nr = ctype->charnames[idx] % act_size;
+
+ if (++cnt[nr] > act_planes)
+ {
+ act_planes = cnt[nr];
+ if (act_size * act_planes >= min_total)
+ break;
+ }
+ }
+
+ if (act_size * act_planes < min_total)
+ {
+ min_total = act_size * act_planes;
+ ctype->plane_size = act_size;
+ ctype->plane_cnt = act_planes;
+ }
+
+ ++act_size;
+ }
+
+ fprintf (stderr, _(" done\n"));
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+# define NAMES_B1 ctype->names_el
+# define NAMES_B2 ctype->names_eb
+#else
+# define NAMES_B1 ctype->names_eb
+# define NAMES_B2 ctype->names_el
+#endif
+
+ ctype->names_eb = (u32_t *) xcalloc (ctype->plane_size * ctype->plane_cnt,
+ sizeof (u32_t));
+ ctype->names_el = (u32_t *) xcalloc (ctype->plane_size * ctype->plane_cnt,
+ sizeof (u32_t));
+
+ for (idx = 1; idx < 256; ++idx)
+ NAMES_B1[idx] = idx;
+
+ /* Trick: change the 0th entry's name to 1 to mark the cell occupied. */
+ NAMES_B1[0] = 1;
+
+ for (idx = 256; idx < ctype->charnames_act; ++idx)
+ {
+ size_t nr = (ctype->charnames[idx] % ctype->plane_size);
+ size_t depth = 0;
+
+ while (NAMES_B1[nr + depth * ctype->plane_size])
+ ++depth;
+ assert (depth < ctype->plane_cnt);
+
+ NAMES_B1[nr + depth * ctype->plane_size] = ctype->charnames[idx];
+
+ /* Now for faster access remember the index in the NAMES_B array. */
+ ctype->charnames[idx] = nr + depth * ctype->plane_size;
+ }
+ NAMES_B1[0] = 0;
+
+ for (idx = 0; idx < ctype->plane_size * ctype->plane_cnt; ++idx)
+ NAMES_B2[idx] = SWAPU32 (NAMES_B1[idx]);
+
+
+ /* You wonder about this amount of memory? This is only because some
+ users do not manage to address the array with unsigned values or
+ data types with range >= 256. '\200' would result in the array
+ index -128. To help these poor people we duplicate the entries for
+ 128 up to 255 below the entry for \0. */
+ ctype->ctype_b = (char_class_t *) xcalloc (256 + 128,
+ sizeof (char_class_t));
+ ctype->ctype32_b = (char_class32_t *) xcalloc (ctype->plane_size
+ * ctype->plane_cnt,
+ sizeof (char_class32_t));
+
+ /* Fill in the character class information. */
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+# define TRANS(w) CHAR_CLASS_TRANS (w)
+# define TRANS32(w) CHAR_CLASS32_TRANS (w)
+#else
+# define TRANS(w) (w)
+# define TRANS32(w) (w)
+#endif
+
+ for (idx = 0; idx < ctype->class_collection_act; ++idx)
+ if (ctype->charnames[idx] < 256)
+ ctype->ctype_b[128 + ctype->charnames[idx]]
+ = TRANS (ctype->class_collection[idx]);
+
+ /* Mirror first 128 entries. */
+ for (idx = 0; idx < 128; ++idx)
+ ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
+
+ /* The 32 bit array contains all characters. */
+ for (idx = 0; idx < ctype->class_collection_act; ++idx)
+ ctype->ctype32_b[ctype->charnames[idx]]
+ = TRANS32 (ctype->class_collection[idx]);
+
+ /* Room for table of mappings. */
+ ctype->map_eb = (u32_t **) xmalloc (ctype->map_collection_nr
+ * sizeof (u32_t *));
+ ctype->map_el = (u32_t **) xmalloc (ctype->map_collection_nr
+ * sizeof (u32_t *));
+
+ /* Fill in all mappings. */
+ for (idx = 0; idx < ctype->map_collection_nr; ++idx)
+ {
+ unsigned int idx2;
+
+ /* Allocate table. */
+ ctype->map_eb[idx] = (u32_t *) xmalloc ((ctype->plane_size
+ * ctype->plane_cnt + 128)
+ * sizeof (u32_t));
+ ctype->map_el[idx] = (u32_t *) xmalloc ((ctype->plane_size
+ * ctype->plane_cnt + 128)
+ * sizeof (u32_t));
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+# define MAP_B1 ctype->map_el
+# define MAP_B2 ctype->map_eb
+#else
+# define MAP_B1 ctype->map_eb
+# define MAP_B2 ctype->map_el
+#endif
+
+ /* Copy default value (identity mapping). */
+ memcpy (&MAP_B1[idx][128], NAMES_B1,
+ ctype->plane_size * ctype->plane_cnt * sizeof (u32_t));
+
+ /* Copy values from collection. */
+ for (idx2 = 0; idx2 < ctype->map_collection_act[idx]; ++idx2)
+ if (ctype->map_collection[idx][idx2] != 0)
+ MAP_B1[idx][128 + ctype->charnames[idx2]] =
+ ctype->map_collection[idx][idx2];
+
+ /* Mirror first 128 entries. */
+ for (idx2 = 0; idx2 < 128; ++idx2)
+ MAP_B1[idx][idx2] = MAP_B1[idx][256 + idx2];
+
+
+ /* And now the other byte order. */
+ for (idx2 = 0; idx2 < ctype->plane_size * ctype->plane_cnt + 128; ++idx2)
+ MAP_B2[idx][idx2] = SWAPU32 (MAP_B1[idx][idx2]);
+ }
+
+ /* Extra array for class and map names. */
+ ctype->class_name_ptr = (u32_t *) xmalloc (ctype->nr_charclass
+ * sizeof (u32_t));
+ ctype->map_name_ptr = (u32_t *) xmalloc (ctype->map_collection_nr
+ * sizeof (u32_t));
+}
diff --git a/locale/programs/ld-messages.c b/locale/programs/ld-messages.c
new file mode 100644
index 0000000000..ebd5054b02
--- /dev/null
+++ b/locale/programs/ld-messages.c
@@ -0,0 +1,237 @@
+/* Copyright (C) 1995, 1996 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB. If
+not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <alloca.h>
+#include <langinfo.h>
+#include <string.h>
+#include <sys/uio.h>
+
+#ifdef HAVE_REGEX
+# include <regex.h>
+#else
+# include <rx.h>
+#endif
+
+/* Undefine following line in production version. */
+/* #define NDEBUG 1 */
+#include <assert.h>
+
+#include "locales.h"
+#include "stringtrans.h"
+#include "localeinfo.h"
+
+
+void *xmalloc (size_t __n);
+
+
+/* The real definition of the struct for the LC_MESSAGES locale. */
+struct locale_messages_t
+{
+ const char *yesexpr;
+ const char *noexpr;
+ const char *yesstr;
+ const char *nostr;
+};
+
+
+void
+messages_startup (struct linereader *lr, struct localedef_t *locale,
+ struct charset_t *charset)
+{
+ struct locale_messages_t *messages;
+
+ /* It is important that we always use UCS1 encoding for strings now. */
+ encoding_method = ENC_UCS1;
+
+ locale->categories[LC_MESSAGES].messages = messages =
+ (struct locale_messages_t *) xmalloc (sizeof (struct locale_messages_t));
+
+ memset (messages, '\0', sizeof (struct locale_messages_t));
+}
+
+
+void
+messages_finish (struct localedef_t *locale)
+{
+ struct locale_messages_t *messages
+ = locale->categories[LC_MESSAGES].messages;
+
+ /* The fields YESSTR and NOSTR are optional. */
+ if (messages->yesexpr == NULL)
+ error (0, 0, _("field `%s' in category `%s' undefined"),
+ "yesexpr", "LC_MESSAGES");
+ else
+ {
+ int result;
+ regex_t re;
+
+ /* Test whether it are correct regular expressions. */
+ result = regcomp (&re, messages->yesexpr, REG_EXTENDED);
+ if (result != 0)
+ {
+ char errbuf[BUFSIZ];
+
+ (void) regerror (result, &re, errbuf, BUFSIZ);
+ error (0, 0, _("\
+no correct regular expression for field `%s' in category `%s': %s"),
+ "yesexpr", "LC_MESSAGES", errbuf);
+ }
+ }
+
+ if (messages->noexpr == NULL)
+ error (0, 0, _("field `%s' in category `%s' undefined"),
+ "noexpr", "LC_MESSAGES");
+ else
+ {
+ int result;
+ regex_t re;
+
+ /* Test whether it are correct regular expressions. */
+ result = regcomp (&re, messages->noexpr, REG_EXTENDED);
+ if (result != 0)
+ {
+ char errbuf[BUFSIZ];
+
+ (void) regerror (result, &re, errbuf, BUFSIZ);
+ error (0, 0, _("\
+no correct regular expression for field `%s' in category `%s': %s"),
+ "noexpr", "LC_MESSAGES", errbuf);
+ }
+ }
+}
+
+
+void
+messages_output (struct localedef_t *locale, const char *output_path)
+{
+ struct locale_messages_t *messages
+ = locale->categories[LC_MESSAGES].messages;
+ struct iovec iov[2 + _NL_ITEM_INDEX (_NL_NUM_LC_MESSAGES)];
+ struct locale_file data;
+ u32_t idx[_NL_ITEM_INDEX (_NL_NUM_LC_MESSAGES)];
+ size_t cnt = 0;
+
+ if ((locale->binary & (1 << LC_MESSAGES)) != 0)
+ {
+ iov[0].iov_base = messages;
+ iov[0].iov_len = locale->len[LC_MESSAGES];
+
+ write_locale_data (output_path, "LC_MESSAGES", 1, iov);
+
+ return;
+ }
+
+ data.magic = LIMAGIC (LC_MESSAGES);
+ data.n = _NL_ITEM_INDEX (_NL_NUM_LC_MESSAGES);
+ iov[cnt].iov_base = (void *) &data;
+ iov[cnt].iov_len = sizeof (data);
+ ++cnt;
+
+ iov[cnt].iov_base = (void *) idx;
+ iov[cnt].iov_len = sizeof (idx);
+ ++cnt;
+
+ idx[cnt - 2] = iov[0].iov_len + iov[1].iov_len;
+ iov[cnt].iov_base = (void *) (messages->yesexpr ?: "");
+ iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1;
+ ++cnt;
+
+ idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len;
+ iov[cnt].iov_base = (void *) (messages->noexpr ?: "");
+ iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1;
+ ++cnt;
+
+ idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len;
+ iov[cnt].iov_base = (void *) (messages->yesstr ?: "");
+ iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1;
+ ++cnt;
+
+ idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len;
+ iov[cnt].iov_base = (void *) (messages->nostr ?: "");
+ iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1;
+
+ assert (cnt + 1 == 2 + _NL_ITEM_INDEX (_NL_NUM_LC_MESSAGES));
+
+ write_locale_data (output_path, "LC_MESSAGES",
+ 2 + _NL_ITEM_INDEX (_NL_NUM_LC_MESSAGES), iov);
+}
+
+
+void
+messages_add (struct linereader *lr, struct localedef_t *locale,
+ enum token_t tok, struct token *code,
+ struct charset_t *charset)
+{
+ struct locale_messages_t *messages
+ = locale->categories[LC_MESSAGES].messages;
+
+ switch (tok)
+ {
+ case tok_yesexpr:
+ if (code->val.str.start == NULL)
+ {
+ lr_error (lr, _("unknown character in field `%s' of category `%s'"),
+ "yesexpr", "LC_MESSAGES");
+ messages->yesexpr = "";
+ }
+ else
+ messages->yesexpr = code->val.str.start;
+ break;
+
+ case tok_noexpr:
+ if (code->val.str.start == NULL)
+ {
+ lr_error (lr, _("unknown character in field `%s' of category `%s'"),
+ "noexpr", "LC_MESSAGES");
+ messages->noexpr = "";
+ }
+ else
+ messages->noexpr = code->val.str.start;
+ break;
+
+ case tok_yesstr:
+ if (code->val.str.start == NULL)
+ {
+ lr_error (lr, _("unknown character in field `%s' of category `%s'"),
+ "yesstr", "LC_MESSAGES");
+ messages->yesstr = "";
+ }
+ else
+ messages->yesstr = code->val.str.start;
+ break;
+
+ case tok_nostr:
+ if (code->val.str.start == NULL)
+ {
+ lr_error (lr, _("unknown character in field `%s' of category `%s'"),
+ "nostr", "LC_MESSAGES");
+ messages->nostr = "";
+ }
+ else
+ messages->nostr = code->val.str.start;
+ break;
+
+ default:
+ assert (! "unknown token in category `LC_MESSAGES': should not happen");
+ }
+}
diff --git a/locale/programs/ld-monetary.c b/locale/programs/ld-monetary.c
new file mode 100644
index 0000000000..18e27866fb
--- /dev/null
+++ b/locale/programs/ld-monetary.c
@@ -0,0 +1,385 @@
+/* Copyright (C) 1995, 1996 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB. If
+not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <langinfo.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
+
+
+/* Undefine following line in production version. */
+/* #define NDEBUG 1 */
+#include <assert.h>
+
+#include "locales.h"
+#include "localeinfo.h"
+#include "stringtrans.h"
+
+void *xmalloc (size_t __n);
+void *xrealloc (void *__ptr, size_t __n);
+
+
+/* The real definition of the struct for the LC_NUMERIC locale. */
+struct locale_monetary_t
+{
+ const char *int_curr_symbol;
+ const char *currency_symbol;
+ const char *mon_decimal_point;
+ const char *mon_thousands_sep;
+ char *mon_grouping;
+ size_t mon_grouping_max;
+ size_t mon_grouping_act;
+ const char *positive_sign;
+ const char *negative_sign;
+ signed char int_frac_digits;
+ signed char frac_digits;
+ signed char p_cs_precedes;
+ signed char p_sep_by_space;
+ signed char n_cs_precedes;
+ signed char n_sep_by_space;
+ signed char p_sign_posn;
+ signed char n_sign_posn;
+};
+
+
+/* The content iof the field int_curr_symbol has to be taken from
+ ISO-4217. We test for correct values. */
+#define DEFINE_INT_CURR(str) str,
+static const char *const valid_int_curr[] =
+ {
+# include "../iso-4217.def"
+ };
+#define NR_VALID_INT_CURR ((sizeof (valid_int_curr) \
+ / sizeof (valid_int_curr[0])))
+#undef DEFINE_INT_CURR
+
+
+/* Prototypes for local functions. */
+static int curr_strcmp(const char *s1, const char **s2);
+
+
+void
+monetary_startup (struct linereader *lr, struct localedef_t *locale,
+ struct charset_t *charset)
+{
+ struct locale_monetary_t *monetary;
+
+ /* It is important that we always use UCS1 encoding for strings now. */
+ encoding_method = ENC_UCS1;
+
+ locale->categories[LC_MONETARY].monetary = monetary =
+ (struct locale_monetary_t *) xmalloc (sizeof (struct locale_monetary_t));
+
+ memset (monetary, '\0', sizeof (struct locale_monetary_t));
+
+ monetary->mon_grouping_max = 80;
+ monetary->mon_grouping =
+ (char *) xmalloc (monetary->mon_grouping_max);
+ monetary->mon_grouping_act = 0;
+
+ monetary->int_frac_digits = -2;
+ monetary->frac_digits = -2;
+ monetary->p_cs_precedes = -2;
+ monetary->p_sep_by_space = -2;
+ monetary->n_cs_precedes = -2;
+ monetary->n_sep_by_space = -2;
+ monetary->p_sign_posn = -2;
+ monetary->n_sign_posn = -2;
+}
+
+
+void
+monetary_finish (struct localedef_t *locale)
+{
+ struct locale_monetary_t *monetary
+ = locale->categories[LC_MONETARY].monetary;
+
+#define TEST_ELEM(cat) \
+ if (monetary->cat == NULL) \
+ error (0, 0, _("field `%s' in category `%s' not defined"), \
+ #cat, "LC_MONETARY")
+
+ TEST_ELEM (int_curr_symbol);
+ TEST_ELEM (currency_symbol);
+ TEST_ELEM (mon_decimal_point);
+ TEST_ELEM (mon_thousands_sep);
+ TEST_ELEM (positive_sign);
+ TEST_ELEM (negative_sign);
+
+ /* The international currency symbol must come from ISO 4217. */
+ if (monetary->int_curr_symbol != NULL)
+ {
+ if (strlen (monetary->int_curr_symbol) != 4)
+ error (0, 0, _("\
+value of field `int_curr_symbol' in category `LC_MONETARY' has wrong length"));
+ else if (bsearch (monetary->int_curr_symbol, valid_int_curr,
+ NR_VALID_INT_CURR, sizeof (const char *),
+ (comparison_fn_t) curr_strcmp) == NULL)
+ error (0, 0, _("\
+value of field `int_curr_symbol' in category `LC_MONETARY' does \
+not correspond to a valid name in ISO 4217"));
+ }
+
+ /* The decimal point must not be empty. This is not said explicitly
+ in POSIX but ANSI C (ISO/IEC 9899) says in 4.4.2.1 it has to be
+ != "". */
+ if (monetary->mon_decimal_point[0] == '\0')
+ {
+ error (0, 0, _("\
+value for field `%s' in category `%s' must not be the empty string"),
+ "mon_decimal_point", "LC_MONETARY");
+ }
+
+ if (monetary->mon_grouping_act == 0)
+ error (0, 0, _("field `%s' in category `%s' not defined"),
+ "mon_grouping", "LC_MONETARY");
+
+#undef TEST_ELEM
+#define TEST_ELEM(cat, min, max) \
+ if (monetary->cat == -2) \
+ error (0, 0, _("field `%s' in category `%s' not defined"), \
+ #cat, "LC_MONETARY"); \
+ else if (monetary->cat < min || monetary->cat > max) \
+ error (0, 0, _("\
+value for field `%s' in category `%s' must be in range %d...%d"), \
+ #cat, "LC_MONETARY", min, max)
+
+ TEST_ELEM (int_frac_digits, -128, 127); /* No range check. */
+ TEST_ELEM (frac_digits, -128, 127); /* No range check. */
+ TEST_ELEM (p_cs_precedes, -1, 1);
+ TEST_ELEM (p_sep_by_space, -1, 2);
+ TEST_ELEM (n_cs_precedes, -1, 1);
+ TEST_ELEM (n_sep_by_space, -1, 2);
+ TEST_ELEM (p_sign_posn, -1, 4);
+ TEST_ELEM (n_sign_posn, -1, 4);
+}
+
+
+void
+monetary_output (struct localedef_t *locale, const char *output_path)
+{
+ struct locale_monetary_t *monetary
+ = locale->categories[LC_MONETARY].monetary;
+ struct iovec iov[2 + _NL_ITEM_INDEX (_NL_NUM_LC_MONETARY)];
+ struct locale_file data;
+ u32_t idx[_NL_ITEM_INDEX (_NL_NUM_LC_MONETARY)];
+ size_t cnt = 0;
+
+ if ((locale->binary & (1 << LC_MONETARY)) != 0)
+ {
+ iov[0].iov_base = monetary;
+ iov[0].iov_len = locale->len[LC_MONETARY];
+
+ write_locale_data (output_path, "LC_MONETARY", 1, iov);
+
+ return;
+ }
+
+ data.magic = LIMAGIC (LC_MONETARY);
+ data.n = _NL_ITEM_INDEX (_NL_NUM_LC_MONETARY);
+ iov[cnt].iov_base = (void *) &data;
+ iov[cnt].iov_len = sizeof (data);
+ ++cnt;
+
+ iov[cnt].iov_base = (void *) idx;
+ iov[cnt].iov_len = sizeof (idx);
+ ++cnt;
+
+ idx[cnt - 2] = iov[0].iov_len + iov[1].iov_len;
+ iov[cnt].iov_base = (void *) (monetary->int_curr_symbol ?: "");
+ iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1;
+ ++cnt;
+
+ idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len;
+ iov[cnt].iov_base = (void *) (monetary->currency_symbol ?: "");
+ iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1;
+ ++cnt;
+
+ idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len;
+ iov[cnt].iov_base = (void *) (monetary->mon_decimal_point ?: "");
+ iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1;
+ ++cnt;
+
+ idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len;
+ iov[cnt].iov_base = (void *) (monetary->mon_thousands_sep ?: "");
+ iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1;
+ ++cnt;
+
+ idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len;
+ iov[cnt].iov_base = alloca (monetary->mon_grouping_act + 1);
+ iov[cnt].iov_len = monetary->mon_grouping_act + 1;
+ memcpy (iov[cnt].iov_base, monetary->mon_grouping,
+ monetary->mon_grouping_act);
+ ((char *) iov[cnt].iov_base)[monetary->mon_grouping_act] = '\0';
+ ++cnt;
+
+ idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len;
+ iov[cnt].iov_base = (void *) (monetary->positive_sign ?: "");
+ iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1;
+ ++cnt;
+
+ idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len;
+ iov[cnt].iov_base = (void *) (monetary->negative_sign ?: "");
+ iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1;
+ ++cnt;
+
+ idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len;
+ iov[cnt].iov_base = (void *) &monetary->int_frac_digits;
+ iov[cnt].iov_len = 1;
+ ++cnt;
+
+ idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len;
+ iov[cnt].iov_base = (void *) &monetary->frac_digits;
+ iov[cnt].iov_len = 1;
+ ++cnt;
+
+ idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len;
+ iov[cnt].iov_base = (void *) &monetary->p_cs_precedes;
+ iov[cnt].iov_len = 1;
+ ++cnt;
+
+ idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len;
+ iov[cnt].iov_base = (void *) &monetary->p_sep_by_space;
+ iov[cnt].iov_len = 1;
+ ++cnt;
+
+ idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len;
+ iov[cnt].iov_base = (void *) &monetary->n_cs_precedes;
+ iov[cnt].iov_len = 1;
+ ++cnt;
+
+ idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len;
+ iov[cnt].iov_base = (void *) &monetary->n_sep_by_space;
+ iov[cnt].iov_len = 1;
+ ++cnt;
+
+ idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len;
+ iov[cnt].iov_base = (void *) &monetary->p_sign_posn;
+ iov[cnt].iov_len = 1;
+ ++cnt;
+
+ idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len;
+ iov[cnt].iov_base = (void *) &monetary->n_sign_posn;
+ iov[cnt].iov_len = 1;
+
+ assert (cnt + 1 == 2 + _NL_ITEM_INDEX (_NL_NUM_LC_MONETARY));
+
+ write_locale_data (output_path, "LC_MONETARY",
+ 2 + _NL_ITEM_INDEX (_NL_NUM_LC_MONETARY), iov);
+}
+
+
+void
+monetary_add (struct linereader *lr, struct localedef_t *locale,
+ enum token_t tok, struct token *code,
+ struct charset_t *charset)
+{
+ struct locale_monetary_t *monetary
+ = locale->categories[LC_MONETARY].monetary;
+
+ switch (tok)
+ {
+#define STR_ELEM(cat) \
+ case tok_##cat: \
+ if (monetary->cat != NULL) \
+ lr_error (lr, _("\
+field `%s' in category `%s' declared more than once"), \
+ #cat, "LC_MONETARY"); \
+ else if (code->val.str.start == NULL) \
+ { \
+ lr_error (lr, _("unknown character in field `%s' of category `%s'"),\
+ #cat, "LC_MONETARY"); \
+ monetary->cat = ""; \
+ } \
+ else \
+ monetary->cat = code->val.str.start; \
+ break
+
+ STR_ELEM (int_curr_symbol);
+ STR_ELEM (currency_symbol);
+ STR_ELEM (mon_decimal_point);
+ STR_ELEM (mon_thousands_sep);
+ STR_ELEM (positive_sign);
+ STR_ELEM (negative_sign);
+
+#define INT_ELEM(cat) \
+ case tok_##cat: \
+ if (monetary->cat != -2) \
+ lr_error (lr, _("\
+field `%s' in category `%s' declared more than once"), \
+ #cat, "LC_MONETARY"); \
+ else \
+ monetary->cat = code->val.num; \
+ break
+
+ INT_ELEM (int_frac_digits);
+ INT_ELEM (frac_digits);
+ INT_ELEM (p_cs_precedes);
+ INT_ELEM (p_sep_by_space);
+ INT_ELEM (n_cs_precedes);
+ INT_ELEM (n_sep_by_space);
+ INT_ELEM (p_sign_posn);
+ INT_ELEM (n_sign_posn);
+
+ case tok_mon_grouping:
+ if (monetary->mon_grouping_act == monetary->mon_grouping_max)
+ {
+ monetary->mon_grouping_max *= 2;
+ monetary->mon_grouping =
+ (char *) xrealloc (monetary->mon_grouping,
+ monetary->mon_grouping_max);
+ }
+ if (monetary->mon_grouping[monetary->mon_grouping_act - 1]
+ == '\177')
+ lr_error (lr, _("\
+`-1' must be last entry in `%s' field in `%s' category"),
+ "mon_grouping", "LC_MONETARY");
+ else
+ {
+ if (code->tok == tok_minus1)
+ monetary->mon_grouping[monetary->mon_grouping_act++] = '\177';
+ else if (code->val.num == 0)
+ lr_error (lr, _("\
+values for field `%s' in category `%s' must not be zero"),
+ "mon_grouping", "LC_MONETARY");
+ else if (code->val.num > 126)
+ lr_error (lr, _("\
+values for field `%s' in category `%s' must be smaller than 127"),
+ "mon_grouping", "LC_MONETARY");
+ else
+ monetary->mon_grouping[monetary->mon_grouping_act++]
+ = code->val.num;
+ }
+ break;
+
+ default:
+ assert (! "unknown token in category `LC_MONETARY': should not happen");
+ }
+}
+
+
+static int
+curr_strcmp(const char *s1, const char **s2)
+{
+ return strcmp (s1, *s2);
+}
diff --git a/locale/programs/ld-numeric.c b/locale/programs/ld-numeric.c
new file mode 100644
index 0000000000..0b5fe2afe5
--- /dev/null
+++ b/locale/programs/ld-numeric.c
@@ -0,0 +1,208 @@
+/* Copyright (C) 1995, 1996 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB. If
+not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <langinfo.h>
+#include <string.h>
+
+/* Undefine following line in production version. */
+/* #define NDEBUG 1 */
+#include <assert.h>
+
+#include "locales.h"
+#include "localeinfo.h"
+#include "stringtrans.h"
+
+void *xmalloc (size_t __n);
+void *xrealloc (void *__ptr, size_t __n);
+
+
+/* The real definition of the struct for the LC_NUMERIC locale. */
+struct locale_numeric_t
+{
+ const char *decimal_point;
+ const char *thousands_sep;
+ char *grouping;
+ size_t grouping_max;
+ size_t grouping_act;
+};
+
+
+void
+numeric_startup (struct linereader *lr, struct localedef_t *locale,
+ struct charset_t *charset)
+{
+ struct locale_numeric_t *numeric;
+
+ /* It is important that we always use UCS1 encoding for strings now. */
+ encoding_method = ENC_UCS1;
+
+ locale->categories[LC_NUMERIC].numeric = numeric =
+ (struct locale_numeric_t *) xmalloc (sizeof (struct locale_numeric_t));
+
+ memset (numeric, '\0', sizeof (struct locale_numeric_t));
+
+ numeric->grouping_max = 80;
+ numeric->grouping = (char *) xmalloc (numeric->grouping_max);
+ numeric->grouping_act = 0;
+}
+
+
+void
+numeric_finish (struct localedef_t *locale)
+{
+ struct locale_numeric_t *numeric = locale->categories[LC_NUMERIC].numeric;
+
+#define TEST_ELEM(cat) \
+ if (numeric->cat == NULL) \
+ error (0, 0, _("field `%s' in category `%s' not defined"), \
+ #cat, "LC_NUMERIC")
+
+ TEST_ELEM (decimal_point);
+ TEST_ELEM (thousands_sep);
+
+ /* The decimal point must not be empty. This is not said explicitly
+ in POSIX but ANSI C (ISO/IEC 9899) says in 4.4.2.1 it has to be
+ != "". */
+ if (numeric->decimal_point[0] == '\0')
+ {
+ error (0, 0, _("\
+value for field `%s' in category `%s' must not be the empty string"),
+ "decimal_point", "LC_NUMERIC");
+ }
+
+ if (numeric->grouping_act == 0)
+ error (0, 0, _("field `%s' in category `%s' not defined"),
+ "grouping", "LC_NUMERIC");
+}
+
+
+void
+numeric_output (struct localedef_t *locale, const char *output_path)
+{
+ struct locale_numeric_t *numeric = locale->categories[LC_NUMERIC].numeric;
+ struct iovec iov[2 + _NL_ITEM_INDEX (_NL_NUM_LC_NUMERIC)];
+ struct locale_file data;
+ u32_t idx[_NL_ITEM_INDEX (_NL_NUM_LC_NUMERIC)];
+ size_t cnt = 0;
+
+ if ((locale->binary & (1 << LC_NUMERIC)) != 0)
+ {
+ iov[0].iov_base = numeric;
+ iov[0].iov_len = locale->len[LC_NUMERIC];
+
+ write_locale_data (output_path, "LC_NUMERIC", 1, iov);
+
+ return;
+ }
+
+ data.magic = LIMAGIC (LC_NUMERIC);
+ data.n = _NL_ITEM_INDEX (_NL_NUM_LC_NUMERIC);
+ iov[cnt].iov_base = (void *) &data;
+ iov[cnt].iov_len = sizeof (data);
+ ++cnt;
+
+ iov[cnt].iov_base = (void *) idx;
+ iov[cnt].iov_len = sizeof (idx);
+ ++cnt;
+
+ idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len;
+ iov[cnt].iov_base = (void *) (numeric->decimal_point ?: "");
+ iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1;
+ ++cnt;
+
+ idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len;
+ iov[cnt].iov_base = (void *) (numeric->thousands_sep ?: "");
+ iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1;
+ ++cnt;
+
+ idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len;
+ iov[cnt].iov_base = alloca (numeric->grouping_act + 1);
+ iov[cnt].iov_len = numeric->grouping_act + 1;
+ memcpy (iov[cnt].iov_base, numeric->grouping, numeric->grouping_act);
+ ((char *) iov[cnt].iov_base)[numeric->grouping_act] = '\0';
+
+ assert (cnt + 1 == 2 + _NL_ITEM_INDEX (_NL_NUM_LC_NUMERIC));
+
+ write_locale_data (output_path, "LC_NUMERIC",
+ 2 + _NL_ITEM_INDEX (_NL_NUM_LC_NUMERIC), iov);
+}
+
+
+void
+numeric_add (struct linereader *lr, struct localedef_t *locale,
+ enum token_t tok, struct token *code,
+ struct charset_t *charset)
+{
+ struct locale_numeric_t *numeric = locale->categories[LC_NUMERIC].numeric;
+
+ switch (tok)
+ {
+#define STR_ELEM(cat) \
+ case tok_##cat: \
+ if (numeric->cat != NULL) \
+ lr_error (lr, _("\
+field `%s' in category `%s' declared more than once"), \
+ #cat, "LC_NUMERIC"); \
+ else if (code->val.str.start == NULL) \
+ { \
+ lr_error (lr, _("unknown character in field `%s' of category `%s'"),\
+ #cat, "LC_NUMERIC"); \
+ numeric->cat = ""; \
+ } \
+ else \
+ numeric->cat = code->val.str.start; \
+ break
+
+ STR_ELEM (decimal_point);
+ STR_ELEM (thousands_sep);
+
+ case tok_grouping:
+ if (numeric->grouping_act == numeric->grouping_max)
+ {
+ numeric->grouping_max *= 2;
+ numeric->grouping = (char *) xrealloc (numeric->grouping,
+ numeric->grouping_max);
+ }
+ if (numeric->grouping_act > 0
+ && (numeric->grouping[numeric->grouping_act - 1] == '\177'))
+ {
+ lr_error (lr, _("\
+`-1' must be last entry in `%s' field in `%s' category"),
+ "grouping", "LC_NUMERIC");
+ --numeric->grouping_act;
+ }
+
+ if (code->tok == tok_minus1)
+ numeric->grouping[numeric->grouping_act++] = '\177';
+ else if (code->val.num > 126)
+ lr_error (lr, _("\
+values for field `%s' in category `%s' must be smaller than 127"),
+ "grouping", "LC_NUMERIC");
+ else
+ numeric->grouping[numeric->grouping_act++] = code->val.num;
+ break;
+
+ default:
+ assert (! "unknown token in category `LC_NUMERIC': should not happen");
+ }
+}
diff --git a/locale/programs/ld-time.c b/locale/programs/ld-time.c
new file mode 100644
index 0000000000..2587faccdc
--- /dev/null
+++ b/locale/programs/ld-time.c
@@ -0,0 +1,310 @@
+/* Copyright (C) 1995, 1996 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB. If
+not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <langinfo.h>
+#include <string.h>
+
+/* Undefine following line in production version. */
+/* #define NDEBUG 1 */
+#include <assert.h>
+
+#include "locales.h"
+#include "localeinfo.h"
+#include "stringtrans.h"
+
+
+void *xmalloc (size_t __n);
+
+
+/* The real definition of the struct for the LC_TIME locale. */
+struct locale_time_t
+{
+ const char *abday[7];
+ size_t cur_num_abday;
+ const char *day[7];
+ size_t cur_num_day;
+ const char *abmon[12];
+ size_t cur_num_abmon;
+ const char *mon[12];
+ size_t cur_num_mon;
+ const char *am_pm[2];
+ size_t cur_num_am_pm;
+ const char *d_t_fmt;
+ const char *d_fmt;
+ const char *t_fmt;
+ const char *t_fmt_ampm;
+ const char *era;
+ const char *era_year;
+ const char *era_d_t_fmt;
+ const char *era_t_fmt;
+ const char *era_d_fmt;
+ const char *alt_digits[100];
+ size_t cur_num_alt_digits;
+};
+
+
+void
+time_startup (struct linereader *lr, struct localedef_t *locale,
+ struct charset_t *charset)
+{
+ struct locale_time_t *time;
+
+ /* It is important that we always use UCS1 encoding for strings now. */
+ encoding_method = ENC_UCS1;
+
+ locale->categories[LC_TIME].time = time =
+ (struct locale_time_t *) xmalloc (sizeof (struct locale_time_t));
+
+ memset (time, '\0', sizeof (struct locale_time_t));
+}
+
+
+void
+time_finish (struct localedef_t *locale)
+{
+ struct locale_time_t *time = locale->categories[LC_TIME].time;
+
+#define TESTARR_ELEM(cat, max) \
+ if (time->cur_num_##cat == 0) \
+ error (0, 0, _("field `%s' in category `%s' not defined"), \
+ #cat, "LC_TIME"); \
+ else if (time->cur_num_##cat != max) \
+ error (0, 0, _("field `%s' in category `%s' has not enough values"), \
+ #cat, "LC_TIME")
+
+ TESTARR_ELEM (abday, 7);
+ TESTARR_ELEM (day, 7);
+ TESTARR_ELEM (abmon, 12);
+ TESTARR_ELEM (mon, 12);
+ TESTARR_ELEM (am_pm, 2);
+
+#define TEST_ELEM(cat) \
+ if (time->cat == NULL) \
+ error (0, 0, _("field `%s' in category `%s' not defined"), \
+ #cat, "LC_TIME")
+
+ TEST_ELEM (d_t_fmt);
+ TEST_ELEM (d_fmt);
+ TEST_ELEM (t_fmt);
+ TEST_ELEM (t_fmt_ampm);
+}
+
+
+void
+time_output (struct localedef_t *locale, const char *output_path)
+{
+ struct locale_time_t *time = locale->categories[LC_TIME].time;
+ struct iovec iov[2 + _NL_ITEM_INDEX (_NL_NUM_LC_TIME)
+ + time->cur_num_alt_digits];
+ struct locale_file data;
+ u32_t idx[_NL_ITEM_INDEX (_NL_NUM_LC_TIME)];
+ size_t cnt, last_idx, num;
+
+ if ((locale->binary & (1 << LC_TIME)) != 0)
+ {
+ iov[0].iov_base = time;
+ iov[0].iov_len = locale->len[LC_TIME];
+
+ write_locale_data (output_path, "LC_TIME", 1, iov);
+
+ return;
+ }
+
+ data.magic = LIMAGIC (LC_TIME);
+ data.n = _NL_ITEM_INDEX (_NL_NUM_LC_TIME);
+ iov[0].iov_base = (void *) &data;
+ iov[0].iov_len = sizeof (data);
+
+ iov[1].iov_base = (void *) idx;
+ iov[1].iov_len = sizeof (idx);
+
+ idx[0] = iov[0].iov_len + iov[1].iov_len;
+
+ /* The ab'days. */
+ for (cnt = 0; cnt <= _NL_ITEM_INDEX (ABDAY_7); ++cnt)
+ {
+ iov[2 + cnt].iov_base =
+ (void *) (time->abday[cnt - _NL_ITEM_INDEX (ABDAY_1)] ?: "");
+ iov[2 + cnt].iov_len = strlen (iov[2 + cnt].iov_base) + 1;
+ idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
+ }
+
+ /* The days. */
+ for (; cnt <= _NL_ITEM_INDEX (DAY_7); ++cnt)
+ {
+ iov[2 + cnt].iov_base =
+ (void *) (time->day[cnt - _NL_ITEM_INDEX (DAY_1)] ?: "");
+ iov[2 + cnt].iov_len = strlen (iov[2 + cnt].iov_base) + 1;
+ idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
+ }
+
+ /* The ab'mons. */
+ for (; cnt <= _NL_ITEM_INDEX (ABMON_12); ++cnt)
+ {
+ iov[2 + cnt].iov_base =
+ (void *) (time->abmon[cnt - _NL_ITEM_INDEX (ABMON_1)] ?: "");
+ iov[2 + cnt].iov_len = strlen (iov[2 + cnt].iov_base) + 1;
+ idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
+ }
+
+ /* The mons. */
+ for (; cnt <= _NL_ITEM_INDEX (MON_12); ++cnt)
+ {
+ iov[2 + cnt].iov_base =
+ (void *) (time->mon[cnt - _NL_ITEM_INDEX (MON_1)] ?: "");
+ iov[2 + cnt].iov_len = strlen (iov[2 + cnt].iov_base) + 1;
+ idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
+ }
+
+ /* AM/PM. */
+ for (; cnt <= _NL_ITEM_INDEX (PM_STR); ++cnt)
+ {
+ iov[2 + cnt].iov_base =
+ (void *) (time->am_pm[cnt - _NL_ITEM_INDEX (AM_STR)] ?: "");
+ iov[2 + cnt].iov_len = strlen (iov[2 + cnt].iov_base) + 1;
+ idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
+ }
+
+ iov[2 + cnt].iov_base = (void *) (time->d_t_fmt ?: "");
+ iov[2 + cnt].iov_len = strlen (iov[2 + cnt].iov_base) + 1;
+ idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
+ ++cnt;
+
+ iov[2 + cnt].iov_base = (void *) (time->d_fmt ?: "");
+ iov[2 + cnt].iov_len = strlen (iov[2 + cnt].iov_base) + 1;
+ idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
+ ++cnt;
+
+ iov[2 + cnt].iov_base = (void *) (time->t_fmt ?: "");
+ iov[2 + cnt].iov_len = strlen (iov[2 + cnt].iov_base) + 1;
+ idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
+ ++cnt;
+
+ iov[2 + cnt].iov_base = (void *) (time->t_fmt_ampm ?: "");
+ iov[2 + cnt].iov_len = strlen (iov[2 + cnt].iov_base) + 1;
+ idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
+ ++cnt;
+
+ iov[2 + cnt].iov_base = (void *) (time->era ?: "");
+ iov[2 + cnt].iov_len = strlen (iov[2 + cnt].iov_base) + 1;
+ idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
+ ++cnt;
+
+ iov[2 + cnt].iov_base = (void *) (time->era_year ?: "");
+ iov[2 + cnt].iov_len = strlen (iov[2 + cnt].iov_base) + 1;
+ idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
+ ++cnt;
+
+ iov[2 + cnt].iov_base = (void *) (time->era_d_fmt ?: "");
+ iov[2 + cnt].iov_len = strlen (iov[2 + cnt].iov_base) + 1;
+ idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
+ last_idx = ++cnt;
+
+ idx[1 + last_idx] = idx[last_idx];
+ for (num = 0; num < time->cur_num_alt_digits; ++num, ++cnt)
+ {
+ iov[2 + cnt].iov_base = (void *) (time->alt_digits[num] ?: "");
+ iov[2 + cnt].iov_len = strlen (iov[2 + cnt].iov_base) + 1;
+ idx[1 + last_idx] += iov[2 + cnt].iov_len;
+ }
+ ++last_idx;
+
+ iov[2 + cnt].iov_base = (void *) (time->era_d_t_fmt ?: "");
+ iov[2 + cnt].iov_len = strlen (iov[2 + cnt].iov_base) + 1;
+ idx[1 + last_idx] = idx[last_idx] + iov[2 + cnt].iov_len;
+ ++cnt;
+
+ iov[2 + cnt].iov_base = (void *) (time->era_d_fmt ?: "");
+ iov[2 + cnt].iov_len = strlen (iov[2 + cnt].iov_base) + 1;
+ ++cnt;
+
+ assert (cnt == (_NL_ITEM_INDEX (_NL_NUM_LC_TIME) - 1
+ + time->cur_num_alt_digits));
+
+ write_locale_data (output_path, "LC_TIME", 2 + cnt, iov);
+}
+
+
+void
+time_add (struct linereader *lr, struct localedef_t *locale,
+ enum token_t tok, struct token *code,
+ struct charset_t *charset)
+{
+ struct locale_time_t *time = locale->categories[LC_TIME].time;
+
+ switch (tok)
+ {
+#define STRARR_ELEM(cat, max) \
+ case tok_##cat: \
+ if (time->cur_num_##cat >= max) \
+ lr_error (lr, _(" \
+too many values for field `%s' in category `LC_TIME'"), \
+ #cat, "LC_TIME"); \
+ else if (code->val.str.start == NULL) \
+ { \
+ lr_error (lr, _("unknown character in field `%s' of category `%s'"),\
+ #cat, "LC_TIME"); \
+ time->cat[time->cur_num_##cat++] = ""; \
+ } \
+ else \
+ time->cat[time->cur_num_##cat++] \
+ = code->val.str.start; \
+ break
+
+ STRARR_ELEM (abday, 7);
+ STRARR_ELEM (day, 7);
+ STRARR_ELEM (abmon, 12);
+ STRARR_ELEM (mon, 12);
+ STRARR_ELEM (am_pm, 2);
+ STRARR_ELEM (alt_digits, 100);
+
+#define STR_ELEM(cat) \
+ case tok_##cat: \
+ if (time->cat != NULL) \
+ lr_error (lr, _("\
+field `%s' in category `%s' declared more than once"), \
+ #cat, "LC_TIME"); \
+ else if (code->val.str.start == NULL) \
+ { \
+ lr_error (lr, _("unknown character in field `%s' of category `%s'"),\
+ #cat, "LC_TIME"); \
+ time->cat = ""; \
+ } \
+ else \
+ time->cat = code->val.str.start; \
+ break
+
+ STR_ELEM (d_t_fmt);
+ STR_ELEM (d_fmt);
+ STR_ELEM (t_fmt);
+ STR_ELEM (t_fmt_ampm);
+ STR_ELEM (era);
+ STR_ELEM (era_year);
+ STR_ELEM (era_d_t_fmt);
+ STR_ELEM (era_d_fmt);
+ STR_ELEM (era_t_fmt);
+
+ default:
+ assert (! "unknown token in category `LC_TIME': should not happen");
+ }
+}
diff --git a/locale/programs/linereader.c b/locale/programs/linereader.c
new file mode 100644
index 0000000000..e4a1305712
--- /dev/null
+++ b/locale/programs/linereader.c
@@ -0,0 +1,579 @@
+/* Copyright (C) 1996 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB. If
+not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <ctype.h>
+#include <errno.h>
+#include <libintl.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "error.h"
+#include "linereader.h"
+#include "charset.h"
+#include "stringtrans.h"
+
+
+void *xmalloc (size_t __n);
+void *xrealloc (void *__p, size_t __n);
+char *xstrdup (const char *__str);
+
+
+static struct token *get_toplvl_escape (struct linereader *lr);
+static struct token *get_symname (struct linereader *lr);
+static struct token *get_ident (struct linereader *lr);
+static struct token *get_string (struct linereader *lr,
+ const struct charset_t *charset);
+
+
+struct linereader *
+lr_open (const char *fname, kw_hash_fct_t hf)
+{
+ FILE *fp;
+ struct linereader *result;
+ int n;
+
+ if (fname == NULL || strcmp (fname, "-") == 0
+ || strcmp (fname, "/dev/stdin") == 0)
+ fp = stdin;
+ else
+ {
+ fp = fopen (fname, "r");
+ if (fp == NULL)
+ return NULL;
+ }
+
+ result = (struct linereader *) xmalloc (sizeof (*result));
+
+ result->fp = fp;
+ result->fname = xstrdup (fname);
+ result->buf = NULL;
+ result->bufsize = 0;
+ result->lineno = 1;
+ result->idx = 0;
+ result->comment_char = '#';
+ result->escape_char = '\\';
+ result->translate_strings = 1;
+
+ n = getdelim (&result->buf, &result->bufsize, '\n', result->fp);
+ if (n < 0)
+ {
+ int save = errno;
+ fclose (result->fp);
+ free (result);
+ errno = save;
+ return NULL;
+ }
+
+ if (n > 1 && result->buf[n - 2] == '\\' && result->buf[n - 1] == '\n')
+ n -= 2;
+
+ result->buf[n] = '\0';
+ result->bufact = n;
+ result->hash_fct = hf;
+
+ return result;
+}
+
+
+int
+lr_eof (struct linereader *lr)
+{
+ return lr->bufact = 0;
+}
+
+
+void
+lr_close (struct linereader *lr)
+{
+ fclose (lr->fp);
+ free (lr->buf);
+ free (lr);
+}
+
+
+int
+lr_next (struct linereader *lr)
+{
+ int n;
+
+ n = getdelim (&lr->buf, &lr->bufsize, '\n', lr->fp);
+ if (n < 0)
+ return -1;
+
+ ++lr->lineno;
+
+ if (n > 1 && lr->buf[n - 2] == lr->escape_char && lr->buf[n - 1] == '\n')
+ {
+ /* An escaped newline character is substituted with a single <SP>. */
+ --n;
+ lr->buf[n - 1] = ' ';
+ }
+
+ lr->buf[n] = '\0';
+ lr->bufact = n;
+ lr->idx = 0;
+
+ return 0;
+}
+
+
+/* Defined in error.c. */
+/* This variable is incremented each time `error' is called. */
+extern unsigned int error_message_count;
+
+/* The calling program should define program_name and set it to the
+ name of the executing program. */
+extern char *program_name;
+
+
+struct token *
+lr_token (struct linereader *lr, const struct charset_t *charset)
+{
+ int ch;
+
+ while (1)
+ {
+ do
+ {
+ ch = lr_getc (lr);
+
+ if (ch == '\n')
+ {
+ lr->token.tok = tok_eol;
+ return &lr->token;
+ }
+ }
+ while (isspace (ch));
+
+ if (ch == EOF)
+ {
+ lr->token.tok = tok_eof;
+ return &lr->token;
+ };
+
+ if (ch != lr->comment_char)
+ break;
+
+ /* Ignore rest of line. */
+ lr_ignore_rest (lr, 0);
+ lr->token.tok = tok_eol;
+ return &lr->token;
+ }
+
+ /* Match escape sequences. */
+ if (ch == lr->escape_char)
+ return get_toplvl_escape (lr);
+
+ /* Match ellipsis. */
+ if (ch == '.' && strncmp (&lr->buf[lr->idx], "..", 2) == 0)
+ {
+ lr_getc (lr);
+ lr_getc (lr);
+ lr->token.tok = tok_ellipsis;
+ return &lr->token;
+ }
+
+ switch (ch)
+ {
+ case '<':
+ return get_symname (lr);
+
+ case '0' ... '9':
+ lr->token.tok = tok_number;
+ lr->token.val.num = ch - '0';
+
+ while (isdigit (ch = lr_getc (lr)))
+ {
+ lr->token.val.num *= 10;
+ lr->token.val.num += ch - '0';
+ }
+ if (isalpha (ch))
+ lr_error (lr, _("garbage at end of digit"));
+ lr_ungetn (lr, 1);
+
+ return &lr->token;
+
+ case ';':
+ lr->token.tok = tok_semicolon;
+ return &lr->token;
+
+ case ',':
+ lr->token.tok = tok_comma;
+ return &lr->token;
+
+ case '(':
+ lr->token.tok = tok_open_brace;
+ return &lr->token;
+
+ case ')':
+ lr->token.tok = tok_close_brace;
+ return &lr->token;
+
+ case '"':
+ return get_string (lr, charset);
+
+ case '-':
+ ch = lr_getc (lr);
+ if (ch == '1')
+ {
+ lr->token.tok = tok_minus1;
+ return &lr->token;
+ }
+ lr_ungetn (lr, 2);
+ break;
+ }
+
+ return get_ident (lr);
+}
+
+
+static struct token *
+get_toplvl_escape (struct linereader *lr)
+{
+ /* This is supposed to be a numeric value. We return the
+ numerical value and the number of bytes. */
+ size_t start_idx = lr->idx - 1;
+ unsigned int value = 0;
+ int nbytes = 0;
+ int ch;
+
+ do
+ {
+ unsigned int byte = 0;
+ unsigned int base = 8;
+
+ ch = lr_getc (lr);
+
+ if (ch == 'd')
+ {
+ base = 10;
+ ch = lr_getc (lr);
+ }
+ else if (ch == 'x')
+ {
+ base = 16;
+ ch = lr_getc (lr);
+ }
+
+ if ((base == 16 && !isxdigit (ch))
+ || (base != 16 && (ch < '0' || ch >= '0' + base)))
+ {
+ esc_error:
+ lr->token.val.str.start = &lr->buf[start_idx];
+
+ while (ch != EOF || !isspace (ch))
+ ch = lr_getc (lr);
+ lr->token.val.str.len = lr->idx - start_idx;
+
+ lr->token.tok = tok_error;
+ return &lr->token;
+ }
+
+ if (isdigit (ch))
+ byte = ch - '0';
+ else
+ byte = tolower (ch) - 'a' + 10;
+
+ ch = lr_getc (lr);
+ if ((base == 16 && !isxdigit (ch))
+ || (base != 16 && (ch < '0' || ch >= '0' + base)))
+ goto esc_error;
+
+ byte *= base;
+ if (isdigit (ch))
+ byte += ch - '0';
+ else
+ byte += tolower (ch) - 'a' + 10;
+
+ ch = lr_getc (lr);
+ if (base != 16 && isdigit (ch))
+ {
+ byte *= base;
+ base += ch - '0';
+
+ ch = lr_getc (lr);
+ }
+
+ value *= 256;
+ value += byte;
+
+ ++nbytes;
+ }
+ while (ch == lr->escape_char && nbytes < 4);
+
+ if (!isspace (ch))
+ lr_error (lr, _("garbage at end of character code specification"));
+
+ lr_ungetn (lr, 1);
+
+ lr->token.tok = tok_charcode;
+ lr->token.val.charcode.val = value;
+ lr->token.val.charcode.nbytes = nbytes;
+
+ return &lr->token;
+}
+
+
+#define ADDC(ch) \
+ do \
+ { \
+ if (bufact == bufmax) \
+ { \
+ bufmax *= 2; \
+ buf = xrealloc (buf, bufmax); \
+ } \
+ buf[bufact++] = (ch); \
+ } \
+ while (0)
+
+
+static struct token *
+get_symname (struct linereader *lr)
+{
+ /* Symbol in brackets. We must distinguish three kinds:
+ 1. reserved words
+ 2. ISO 10646 position values
+ 3. all other. */
+ char *buf;
+ size_t bufact = 0;
+ size_t bufmax = 56;
+ const struct keyword_t *kw;
+ int ch;
+
+ buf = (char *) xmalloc (bufmax);
+
+ do
+ {
+ ch = lr_getc (lr);
+ if (ch == lr->escape_char)
+ {
+ int c2 = lr_getc (lr);
+ ADDC (c2);
+
+ if (c2 == '\n')
+ ch = '\n';
+ }
+ else
+ ADDC (ch);
+ }
+ while (ch != '>' && ch != '\n');
+
+ if (ch == '\n')
+ lr_error (lr, _("unterminated symbolic name"));
+
+ /* Test for ISO 10646 position value. */
+ if (buf[0] == 'U' && (bufact == 6 || bufact == 10))
+ {
+ char *cp = buf + 1;
+ while (cp < &buf[bufact - 1] && isxdigit (*cp))
+ ++cp;
+
+ if (cp == &buf[bufact - 1])
+ {
+ /* Yes, it is. */
+ lr->token.tok = bufact == 6 ? tok_ucs2 : tok_ucs4;
+ lr->token.val.charcode.val = strtoul (buf, NULL, 16);
+ lr->token.val.charcode.nbytes = lr->token.tok == tok_ucs2 ? 2 : 4;
+
+ return &lr->token;
+ }
+ }
+
+ /* It is a symbolic name. Test for reserved words. */
+ kw = lr->hash_fct (buf, bufact - 1);
+
+ if (kw != NULL && kw->symname_or_ident == 1)
+ {
+ lr->token.tok = kw->token;
+ free (buf);
+ }
+ else
+ {
+ lr->token.tok = tok_bsymbol;
+
+ buf[bufact] = '\0';
+ buf = xrealloc (buf, bufact + 1);
+
+ lr->token.val.str.start = buf;
+ lr->token.val.str.len = bufact - 1;
+ }
+
+ return &lr->token;
+}
+
+
+static struct token *
+get_ident (struct linereader *lr)
+{
+ char *buf;
+ size_t bufact;
+ size_t bufmax = 56;
+ const struct keyword_t *kw;
+ int ch;
+
+ buf = xmalloc (bufmax);
+ bufact = 0;
+
+ ADDC (lr->buf[lr->idx - 1]);
+
+ while (!isspace ((ch = lr_getc (lr))) && ch != '"' && ch != ';'
+ && ch != '<' && ch != ',')
+ /* XXX Handle escape sequences? */
+ ADDC (ch);
+
+ lr_ungetn (lr, 1);
+
+ kw = lr->hash_fct (buf, bufact);
+
+ if (kw != NULL && kw->symname_or_ident == 0)
+ {
+ lr->token.tok = kw->token;
+ free (buf);
+ }
+ else
+ {
+ lr->token.tok = tok_ident;
+
+ buf[bufact] = '\0';
+ buf = xrealloc (buf, bufact + 1);
+
+ lr->token.val.str.start = buf;
+ lr->token.val.str.len = bufact;
+ }
+
+ return &lr->token;
+}
+
+
+static struct token *
+get_string (struct linereader *lr, const struct charset_t *charset)
+{
+ int illegal_string = 0;
+ char *buf, *cp;
+ size_t bufact;
+ size_t bufmax = 56;
+ int ch;
+
+ buf = xmalloc (bufmax);
+ bufact = 0;
+
+ while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF)
+ if (ch != '<' || charset == NULL)
+ {
+ if (ch == lr->escape_char)
+ {
+ ch = lr_getc (lr);
+ if (ch == '\n' || ch == EOF)
+ break;
+ }
+ ADDC (ch);
+ }
+ else
+ {
+ /* We have to get the value of the symbol. */
+ unsigned int value;
+ size_t startidx = bufact;
+
+ if (!lr->translate_strings)
+ ADDC ('<');
+
+ while ((ch = lr_getc (lr)) != '>' && ch != '\n' && ch != EOF)
+ {
+ if (ch == lr->escape_char)
+ {
+ ch = lr_getc (lr);
+ if (ch == '\n' || ch == EOF)
+ break;
+ }
+ ADDC (ch);
+ }
+
+ if (ch == '\n' || ch == EOF)
+ lr_error (lr, _("unterminated string"));
+ else
+ if (!lr->translate_strings)
+ ADDC ('>');
+
+ if (lr->translate_strings)
+ {
+ value = charset_find_value (charset, &buf[startidx],
+ bufact - startidx);
+ if (value == ILLEGAL_CHAR_VALUE)
+ illegal_string = 1;
+ bufact = startidx;
+
+ if (bufmax - bufact < 8)
+ {
+ bufmax *= 2;
+ buf = (char *) xrealloc (buf, bufmax);
+ }
+
+ cp = &buf[bufact];
+ if (encode_char (value, &cp))
+ illegal_string = 1;
+
+ bufact = cp - buf;
+ }
+ }
+
+ /* Catch errors with trailing escape character. */
+ if (bufact > 0 && buf[bufact - 1] == lr->escape_char
+ && (bufact == 1 || buf[bufact - 2] != lr->escape_char))
+ {
+ lr_error (lr, _("illegal escape sequence at end of string"));
+ --bufact;
+ }
+ else if (ch == '\n' || ch == EOF)
+ lr_error (lr, _("unterminated string"));
+
+ /* Terminate string if necessary. */
+ if (lr->translate_strings)
+ {
+ cp = &buf[bufact];
+ if (encode_char (0, &cp))
+ illegal_string = 1;
+
+ bufact = cp - buf;
+ }
+ else
+ ADDC ('\0');
+
+ lr->token.tok = tok_string;
+
+ if (illegal_string)
+ {
+ free (buf);
+ lr->token.val.str.start = NULL;
+ lr->token.val.str.len = 0;
+ }
+ else
+ {
+ buf = xrealloc (buf, bufact + 1);
+
+ lr->token.val.str.start = buf;
+ lr->token.val.str.len = bufact;
+ }
+
+ return &lr->token;
+}
diff --git a/locale/programs/linereader.h b/locale/programs/linereader.h
new file mode 100644
index 0000000000..b78697e87d
--- /dev/null
+++ b/locale/programs/linereader.h
@@ -0,0 +1,158 @@
+/* Copyright (C) 1996 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB. If
+not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
+
+#ifndef _LINEREADER_H
+#define _LINEREADER_H 1
+
+#include <ctype.h>
+#include <libintl.h>
+#include <stdio.h>
+
+#include "error.h"
+#include "locfile-token.h"
+
+
+typedef const struct keyword_t *(*kw_hash_fct_t) (const char *, int);
+struct charset_t;
+
+
+struct token
+{
+ enum token_t tok;
+ union
+ {
+ struct
+ {
+ char *start;
+ size_t len;
+ } str;
+ unsigned long int num;
+ struct
+ {
+ unsigned int val;
+ int nbytes;
+ } charcode;
+ } val;
+};
+
+
+struct linereader
+{
+ FILE *fp;
+ const char *fname;
+ char *buf;
+ size_t bufsize;
+ size_t bufact;
+ size_t lineno;
+
+ size_t idx;
+
+ char comment_char;
+ char escape_char;
+
+ struct token token;
+
+ int translate_strings;
+
+ kw_hash_fct_t hash_fct;
+};
+
+
+/* Functions defined in linereader.c. */
+struct linereader *lr_open (const char *fname, kw_hash_fct_t hf);
+int lr_eof (struct linereader *lr);
+void lr_close (struct linereader *lr);
+int lr_next (struct linereader *lr);
+struct token *lr_token (struct linereader *lr,
+ const struct charset_t *charset);
+
+
+#define lr_error(lr, fmt, args...) \
+ error_at_line (0, 0, lr->fname, lr->lineno, fmt, ## args)
+
+
+
+static inline int
+lr_getc (struct linereader *lr)
+{
+ if (lr->idx == lr->bufact)
+ {
+ if (lr->bufact != 0)
+ if (lr_next (lr) < 0)
+ return EOF;
+
+ if (lr->bufact == 0)
+ return EOF;
+ }
+
+ return lr->buf[lr->idx] == '\32' ? EOF : lr->buf[lr->idx++];
+}
+
+
+static inline int
+lr_ungetc (struct linereader *lr, int ch)
+{
+ if (lr->idx == 0)
+ return -1;
+
+ lr->buf[--lr->idx] = ch;
+ return 0;
+}
+
+
+static inline int
+lr_ungetn (struct linereader *lr, int n)
+{
+ if (lr->idx < n)
+ return -1;
+
+ lr->idx -= n;
+ return 0;
+}
+
+
+static inline void
+lr_ignore_rest (struct linereader *lr, int verbose)
+{
+ if (verbose)
+ {
+ while (isspace (lr->buf[lr->idx]) && lr->buf[lr->idx] != '\n'
+ && lr->buf[lr->idx] != lr->comment_char)
+ if (lr->buf[lr->idx] == '\0')
+ {
+ if (lr_next (lr) < 0)
+ return;
+ }
+ else
+ ++lr->idx;
+
+ if (lr->buf[lr->idx] != '\n' &&lr->buf[lr->idx] != lr->comment_char)
+ lr_error (lr, _("trailing garbage at end of line"));
+ }
+
+ /* Ignore continued line. */
+ while (lr->bufact > 0 && lr->buf[lr->bufact - 1] != '\n')
+ if (lr_next (lr) < 0)
+ break;
+
+ lr->idx = lr->bufact;
+}
+
+
+#endif /* linereader.h */
diff --git a/locale/programs/locale.c b/locale/programs/locale.c
new file mode 100644
index 0000000000..4e4ff83a37
--- /dev/null
+++ b/locale/programs/locale.c
@@ -0,0 +1,544 @@
+/* Copyright (C) 1995, 1996 Free Software Foundation, Inc.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB. If
+not, write to the Free Software Foundation, Inc., 675 Mass Ave,
+Cambridge, MA 02139, USA. */
+
+#include <dirent.h>
+#include <getopt.h>
+#include <langinfo.h>
+#include <libintl.h>
+#include <limits.h>
+#include <locale.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+
+/*#include "localedef.h"*/
+#include "localeinfo.h"
+
+
+/* If set dump C code describing the current locale. */
+static int do_dump;
+
+/* If set print the name of the category. */
+static int show_category_name;
+
+/* If set print the name of the item. */
+static int show_keyword_name;
+
+/* Long options. */
+static const struct option long_options[] =
+{
+ { "all-locales", no_argument, NULL, 'a' },
+ { "category-name", no_argument, &show_category_name, 1 },
+ { "charmaps", no_argument, NULL, 'm' },
+ { "dump", no_argument, &do_dump, 1 },
+ { "help", no_argument, NULL, 'h' },
+ { "keyword-name", no_argument, &show_keyword_name, 1 },
+ { "version", no_argument, NULL, 'v' },
+ { NULL, 0, NULL, 0 }
+};
+
+
+/* We don't have these constants defined because we don't use them. Give
+ default values. */
+#define CTYPE_MB_CUR_MIN 0
+#define CTYPE_MB_CUR_MAX 0
+#define CTYPE_HASH_SIZE 0
+#define CTYPE_HASH_LAYERS 0
+#define CTYPE_CLASS 0
+#define CTYPE_TOUPPER_EB 0
+#define CTYPE_TOLOWER_EB 0
+#define CTYPE_TOUPPER_EL 0
+#define CTYPE_TOLOWER_EL 0
+
+/* XXX Hack */
+struct cat_item
+{
+ int item_id;
+ const char *name;
+ enum { std, opt } status;
+ enum value_type value_type;
+ int min;
+ int max;
+};
+
+
+/* We have all categories defined in `categories.def'. Now construct
+ the description and data structure used for all categories. */
+#define DEFINE_CATEGORY(category, name, items, postload, in, check, out) \
+ static struct cat_item category##_desc[] = \
+ { \
+ NO_PAREN items \
+ };
+
+#include "locale/aux/categories.def"
+#undef DEFINE_CATEGORY
+
+static struct category category[] =
+ {
+#define DEFINE_CATEGORY(category, name, items, postload, in, check, out) \
+ { _NL_NUM_##category, name, NELEMS (category##_desc) - 1, \
+ category##_desc, NULL, NULL, NULL, out },
+#include "locale/aux/categories.def"
+#undef DEFINE_CATEGORY
+ };
+#define NCATEGORIES NELEMS (category)
+
+
+/* Prototypes for local functions. */
+static void usage (int status) __attribute__ ((noreturn));
+static void write_locales (void);
+static void write_charmaps (void);
+static void show_locale_vars (void);
+static void show_info (const char *name);
+static void dump_category (const char *name);
+
+
+int
+main (int argc, char *argv[])
+{
+ int optchar;
+ int do_all = 0;
+ int do_help = 0;
+ int do_version = 0;
+ int do_charmaps = 0;
+
+ /* Set initial values for global varaibles. */
+ do_dump = 0;
+ show_category_name = 0;
+ show_keyword_name = 0;
+
+ /* Set locale. Do not set LC_ALL because the other categories must
+ not be affected (acccording to POSIX.2). */
+ setlocale (LC_CTYPE, "");
+ setlocale (LC_MESSAGES, "");
+
+ /* Initialize the message catalog. */
+ textdomain (PACKAGE);
+
+ while ((optchar = getopt_long (argc, argv, "achkmv", long_options, NULL))
+ != EOF)
+ switch (optchar)
+ {
+ case '\0':
+ break;
+ case 'a':
+ do_all = 1;
+ break;
+ case 'c':
+ show_category_name = 1;
+ break;
+ case 'h':
+ do_help = 1;
+ break;
+ case 'k':
+ show_keyword_name = 1;
+ break;
+ case 'm':
+ do_charmaps = 1;
+ break;
+ case 'v':
+ do_version = 1;
+ break;
+ default:
+ error (1, 0, gettext ("illegal option \"%s\""), optarg);
+ break;
+ }
+
+ /* Version information is requested. */
+ if (do_version)
+ {
+ fprintf (stderr, "GNU %s %s\n", PACKAGE, VERSION);
+ exit (EXIT_SUCCESS);
+ }
+
+ /* Help is requested. */
+ if (do_help)
+ usage (EXIT_SUCCESS);
+
+ /* Dump C code. */
+ if (do_dump)
+ {
+ printf ("\
+/* Generated by GNU %s %s. */\n\
+\n\
+#include \"localeinfo.h\"\n", program_invocation_name, VERSION);
+
+ while (optind < argc)
+ dump_category (argv[optind++]);
+
+ exit (EXIT_SUCCESS);
+ }
+
+ /* `-a' requests the names of all available locales. */
+ if (do_all != 0)
+ {
+ write_locales ();
+ exit (EXIT_SUCCESS);
+ }
+
+ /* `m' requests the names of all available charmaps. The names can be
+ used for the -f argument to localedef(3). */
+ if (do_charmaps != 0)
+ {
+ write_charmaps ();
+ exit (EXIT_SUCCESS);
+ }
+
+ /* If no real argument is given we have to print the contents of the
+ current locale definition variables. These are LANG and the LC_*. */
+ if (optind == argc && show_keyword_name == 0 && show_category_name == 0)
+ {
+ show_locale_vars ();
+ exit (EXIT_SUCCESS);
+ }
+
+ /* Process all given names. */
+ while (optind < argc)
+ show_info (argv[optind++]);
+
+ exit (EXIT_SUCCESS);
+}
+
+
+/* Display usage information and exit. */
+static void
+usage(int status)
+{
+ if (status != EXIT_SUCCESS)
+ fprintf (stderr, gettext ("Try `%s --help' for more information.\n"),
+ program_invocation_name);
+ else
+ printf(gettext ("\
+Usage: %s [OPTION]... name\n\
+Mandatory arguments to long options are mandatory for short options too.\n\
+ -h, --help display this help and exit\n\
+ -v, --version output version information and exit\n\
+\n\
+ -a, --all-locales write names of available locales\n\
+ -m, --charmaps write names of available charmaps\n\
+\n\
+ -c, --category-name write names of selected categories\n\
+ -k, --keyword-name write names of selected keywords\n\
+\n\
+ --dump dump C code describing the current locale\n\
+ (this code can be used in the C library)\n\
+"), program_invocation_name);
+
+ exit (status);
+}
+
+
+/* Write the names of all available locales to stdout. */
+static void
+write_locales (void)
+{
+ DIR *dir;
+ struct dirent *dirent;
+
+ /* `POSIX' locale is always available (POSIX.2 4.34.3). */
+ puts ("POSIX");
+
+ dir = opendir (LOCALE_PATH);
+ if (dir == NULL)
+ {
+ error (1, errno, gettext ("cannot read locale directory `%s'"),
+ LOCALE_PATH);
+ return;
+ }
+
+ /* Now we can look for all files in the directory. */
+ while ((dirent = readdir (dir)) != NULL)
+ if (strcmp (dirent->d_name, ".") != 0
+ && strcmp (dirent->d_name, "..") != 0)
+ puts (dirent->d_name);
+
+ closedir (dir);
+}
+
+
+/* Write the names of all available character maps to stdout. */
+static void
+write_charmaps (void)
+{
+ DIR *dir;
+ struct dirent *dirent;
+
+ dir = opendir (CHARMAP_PATH);
+ if (dir == NULL)
+ {
+ error (1, errno, gettext ("cannot read character map directory `%s'"),
+ CHARMAP_PATH);
+ return;
+ }
+
+ /* Now we can look for all files in the directory. */
+ while ((dirent = readdir (dir)) != NULL)
+ if (strcmp (dirent->d_name, ".") != 0
+ && strcmp (dirent->d_name, "..") != 0)
+ puts (dirent->d_name);
+
+ closedir (dir);
+}
+
+
+/* We have to show the contents of the environments determining the
+ locale. */
+static void
+show_locale_vars (void)
+{
+ size_t cat_no;
+ const char *lcall = getenv ("LC_ALL");
+ const char *lang = getenv ("LANG") ? : "POSIX";
+
+ void get_source (const char *name)
+ {
+ char *val = getenv (name);
+
+ if (lcall != NULL || val == NULL)
+ printf ("%s=\"%s\"\n", name, lcall ? : lang);
+ else
+ printf ("%s=%s\n", name, val);
+ }
+
+ /* LANG has to be the first value. */
+ printf ("LANG=%s\n", lang);
+
+ /* Now all categories in an unspecified order. */
+ for (cat_no = 0; cat_no < NCATEGORIES; ++cat_no)
+ get_source (category[cat_no].name);
+
+ /* The last is the LC_ALL value. */
+ printf ("LC_ALL=%s\n", lcall ? : "");
+}
+
+
+/* Show the information request for NAME. */
+static void
+show_info (const char *name)
+{
+ size_t cat_no;
+
+ void print_item (struct cat_item *item)
+ {
+ if (show_keyword_name != 0)
+ printf ("%s=", item->name);
+
+ switch (item->value_type)
+ {
+ case string:
+ printf ("%s%s%s", show_keyword_name ? "\"" : "",
+ nl_langinfo (item->item_id) ? : "",
+ show_keyword_name ? "\"" : "");
+ break;
+ case stringarray:
+ {
+ int cnt;
+ const char *val;
+
+ if (show_keyword_name)
+ putchar ('"');
+
+ for (cnt = 0; cnt < item->max - 1; ++cnt)
+ {
+ val = nl_langinfo (item->item_id + cnt);
+ printf ("%s;", val ? : "");
+ }
+
+ val = nl_langinfo (item->item_id + cnt);
+ printf ("%s", val ? : "");
+
+ if (show_keyword_name)
+ putchar ('"');
+ }
+ break;
+ case byte:
+ {
+ const char *val = nl_langinfo (item->item_id);
+
+ if (val != NULL)
+ printf ("%d", *val == CHAR_MAX ? -1 : *val);
+ }
+ break;
+ case bytearray:
+ {
+ const char *val = nl_langinfo (item->item_id);
+ int cnt = val ? strlen (val) : 0;
+
+ while (cnt > 1)
+ {
+ printf ("%d;", *val == CHAR_MAX ? -1 : *val);
+ --cnt;
+ ++val;
+ }
+
+ printf ("%d", cnt == 0 || *val == CHAR_MAX ? -1 : *val);
+ }
+ break;
+ default:
+ }
+ putchar ('\n');
+ }
+
+ for (cat_no = 0; cat_no < NCATEGORIES; ++cat_no)
+ {
+ size_t item_no;
+
+ if (category[cat_no].outfct != NULL)
+ /* Categories which need special handling of the output are
+ not written. This is especially for LC_CTYPE and LC_COLLATE.
+ It does not make sense to have this large number of cryptic
+ characters displayed. */
+ continue;
+
+ if (strcmp (name, category[cat_no].name) == 0)
+ /* Print the whole category. */
+ {
+ if (show_category_name != 0)
+ puts (category[cat_no].name);
+
+ for (item_no = 0; item_no < category[cat_no].number; ++item_no)
+ print_item (&category[cat_no].item_desc[item_no]);
+
+ return;
+ }
+
+ for (item_no = 0; item_no < category[cat_no].number; ++item_no)
+ if (strcmp (name, category[cat_no].item_desc[item_no].name) == 0)
+ {
+ if (show_category_name != 0)
+ puts (category[cat_no].name);
+
+ print_item (&category[cat_no].item_desc[item_no]);
+ return;
+ }
+ }
+}
+
+
+static void
+dump_category (const char *name)
+{
+ char *locname;
+ size_t cat_no, item_no, nstrings;
+
+ for (cat_no = 0; cat_no < NCATEGORIES; ++cat_no)
+ if (strcmp (name, category[cat_no].name) == 0)
+ break;
+
+ if (cat_no >= NCATEGORIES)
+ return;
+
+ /* The NAME specifies a correct locale category. */
+ if (category[cat_no].outfct != NULL)
+ {
+ category[cat_no].outfct ();
+ return;
+ }
+
+ locname = (getenv ("LC_ALL") ?: getenv (name) ?:
+ getenv ("LANG") ?: (char *) "POSIX");
+
+ /* Determine the number of strings in advance. */
+ nstrings = 0;
+ for (item_no = 0; item_no < category[cat_no].number; ++item_no)
+ switch (category[cat_no].item_desc[item_no].value_type)
+ {
+ case string:
+ case byte:
+ case bytearray:
+ ++nstrings;
+ break;
+ case stringarray:
+ nstrings += category[cat_no].item_desc[item_no].max;
+ default:
+ }
+
+ printf ("\nconst struct locale_data _nl_%s_%s =\n{\n"
+ " NULL, 0, /* no file mapped */\n %Zu,\n {\n",
+ locname, name, nstrings);
+
+ for (item_no = 0; item_no < category[cat_no].number; ++item_no)
+ switch (category[cat_no].item_desc[item_no].value_type)
+ {
+ case string:
+ {
+ const char *val = nl_langinfo (
+ category[cat_no].item_desc[item_no].item_id);
+
+ if (val != NULL)
+ printf (" \"%s\",\n", val);
+ else
+ puts (" NULL,");
+ }
+ break;
+ case stringarray:
+ {
+ const char *val;
+ int cnt;
+
+ for (cnt = 0; cnt < category[cat_no].item_desc[item_no].max; ++cnt)
+ {
+ val = nl_langinfo (
+ category[cat_no].item_desc[item_no].item_id + cnt);
+
+ if (val != NULL)
+ printf (" \"%s\",\n", val);
+ else
+ puts (" NULL,");
+ }
+ }
+ break;
+ case byte:
+ {
+ const char *val = nl_langinfo (
+ category[cat_no].item_desc[item_no].item_id);
+
+ if (val != NULL)
+ printf (" \"\\%o\",\n",
+ *(unsigned char *) val ? : UCHAR_MAX);
+ else
+ puts (" NULL,");
+ }
+ break;
+ case bytearray:
+ {
+ const char *bytes = nl_langinfo (
+ category[cat_no].item_desc[item_no].item_id);
+
+ if (bytes != NULL)
+ {
+ fputs (" \"", stdout);
+ if (*bytes != '\0')
+ do
+ printf ("\\%o", *(unsigned char *) bytes++);
+ while (*bytes != '\0');
+ else
+ printf ("\\%o", UCHAR_MAX);
+
+ puts ("\",");
+ }
+ else
+ puts (" NULL,");
+ }
+ break;
+ default:
+ break;
+ }
+
+ puts (" }\n};");
+}
diff --git a/locale/programs/localedef.c b/locale/programs/localedef.c
new file mode 100644
index 0000000000..a98bac4301
--- /dev/null
+++ b/locale/programs/localedef.c
@@ -0,0 +1,461 @@
+/* Copyright (C) 1995, 1996 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB. If
+not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <libintl.h>
+#include <locale.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+
+#include "error.h"
+#include "charset.h"
+#include "locfile.h"
+
+/* Undefine the following line in the production version. */
+/* #define NDEBUG 1 */
+#include <assert.h>
+
+
+/* List of locale definition files which are used in `copy' instructions. */
+struct copy_def_list_t
+{
+ struct copy_def_list_t *next;
+
+ const char *name;
+ int mask;
+
+ struct localedef_t *locale;
+
+ struct
+ {
+ void *data;
+ size_t len;
+ } binary[6];
+};
+
+
+/* List of copied locales. */
+struct copy_def_list_t *copy_list;
+
+/* If this is defined be POSIX conform. */
+int posix_conformance;
+
+/* Name of the running program. */
+const char *program_name;
+
+/* If not zero give a lot more messages. */
+int verbose;
+
+
+
+/* Long options. */
+static const struct option long_options[] =
+{
+ { "charmap", required_argument, NULL, 'f' },
+ { "code-set-name", required_argument, NULL, 'u' },
+ { "help", no_argument, NULL, 'h' },
+ { "force", no_argument, NULL, 'c' },
+ { "inputfile", required_argument, NULL, 'i' },
+ { "posix", no_argument, &posix_conformance, 1 },
+ { "verbose", no_argument, &verbose, 1},
+ { "version", no_argument, NULL, 'V' },
+ { NULL, 0, NULL, 0 }
+};
+
+
+/* Prototypes for global functions. */
+void *xmalloc (size_t __n);
+
+/* Prototypes for local functions. */
+static void usage (int status) __attribute__ ((noreturn));
+static void error_print (void);
+static const char *construct_output_path (const char *path);
+
+
+int
+main (int argc, char *argv[])
+{
+ int optchar;
+ int do_help = 0;
+ int do_version = 0;
+ int force_output = 0;
+ const char *charmap_file = NULL;
+ const char *input_file = NULL;
+ const char *ucs_csn = NULL;
+ const char *output_path;
+ int cannot_write_why;
+ struct charset_t *charset;
+ struct localedef_t *localedef;
+ struct copy_def_list_t *act_add_locdef;
+
+ /* Set initial values for global varaibles. */
+ copy_list = NULL;
+ posix_conformance = getenv ("POSIXLY_CORRECT") != NULL;
+ program_name = argv[0];
+ error_print_progname = error_print;
+ verbose = 0;
+
+ /* Set locale. Do not set LC_ALL because the other categories must
+ not be affected (acccording to POSIX.2). */
+ setlocale (LC_MESSAGES, "");
+ setlocale (LC_CTYPE, "");
+
+ /* Initialize the message catalog. */
+#if 0
+ /* In the final version for glibc we can use the variable. */
+ textdomain (_libc_intl_domainname);
+#else
+ textdomain ("SYS_libc");
+#endif
+
+ while ((optchar = getopt_long (argc, argv, "cf:hi:u:vV", long_options, NULL))
+ != EOF)
+ switch (optchar)
+ {
+ case '\0': /* Long option. */
+ break;
+
+ case 'c':
+ force_output = 1;
+ break;
+
+ case 'f':
+ charmap_file = optarg;
+ break;
+
+ case 'h':
+ do_help = 1;
+ break;
+
+ case 'i':
+ input_file = optarg;
+ break;
+
+ case 'u':
+ ucs_csn = optarg;
+ break;
+
+ case 'v':
+ verbose = 1;
+ break;
+
+ case 'V':
+ do_version = 1;
+ break;
+
+ default:
+ usage (4); /* A value >3 is forced by POSIX. */
+ break;
+ }
+
+ /* POSIX.2 requires to be verbose about missing characters in the
+ character map. */
+ verbose |= posix_conformance;
+
+ /* Version information is requested. */
+ if (do_version)
+ {
+ fprintf (stderr, "%s - GNU %s %s\n", program_name, PACKAGE, VERSION);
+ exit (0);
+ }
+
+ /* Help is requested. */
+ if (do_help)
+ /* Possible violation: POSIX.2 4.35.8 defines the return value 0 as
+ "No errors occured and the locale(s) were successfully created."
+ But giving a other value than 0 does not make sense here. It
+ is perhaps not that important because POSIX does not specify the
+ -h option for localedef. */
+ usage (0);
+
+ if (argc - optind != 1)
+ /* We need exactly one non-option parameter. */
+ usage (4);
+
+ /* The parameter describes the output path of the constructed files.
+ If the described files cannot be written return a NULL pointer. */
+ output_path = construct_output_path (argv[optind]);
+ cannot_write_why = errno;
+
+ /* Now that the parameters are processed we have to reset the local
+ ctype locale. (P1003.2 4.35.5.2) */
+ setlocale (LC_CTYPE, "POSIX");
+
+ /* Look whether the system really allows locale definitions. POSIX
+ defines error code 3 for this situation so I think it must be
+ a fatal error (see P1003.2 4.35.8). */
+ if (sysconf (_SC_2_LOCALEDEF) < 0)
+ error (3, 0, _("FATAL: system does not define `_POSIX2_LOCALEDEF'"));
+
+ /* Process charmap file. */
+ charset = charmap_read (charmap_file);
+
+ /* Now read the locale file. */
+ localedef = locfile_read (input_file, charset);
+ if (localedef->failed != 0)
+ error (4, errno, _("cannot open locale definition file `%s'"), input_file);
+
+ /* Perhaps we saw some `copy' instructions. Process the given list.
+ We use a very simple algorithm: we look up the list from the
+ beginning every time. */
+ do
+ {
+ int cat;
+
+ for (act_add_locdef = copy_list; act_add_locdef != NULL;
+ act_add_locdef = act_add_locdef->next)
+ {
+ for (cat = LC_COLLATE; cat <= LC_MESSAGES; ++cat)
+ if ((act_add_locdef->mask & (1 << cat)) != 0)
+ {
+ act_add_locdef->mask &= ~(1 << cat);
+ break;
+ }
+ if (cat <= LC_MESSAGES)
+ break;
+ }
+
+ if (act_add_locdef != NULL)
+ {
+ int avail = 0;
+
+ if (act_add_locdef->locale == NULL)
+ act_add_locdef->locale = locfile_read (act_add_locdef->name,
+ charset);
+
+ if (! act_add_locdef->locale->failed)
+ {
+ avail = act_add_locdef->locale->categories[cat].generic != NULL;
+ if (avail)
+ localedef->categories[cat].generic
+ = act_add_locdef->locale->categories[cat].generic;
+ }
+
+ if (! avail)
+ {
+ const char *locale_names[] = { "LC_COLLATE", "LC_CTYPE",
+ "LC_MONETARY", "LC_NUMERIC",
+ "LC_TIME", "LC_MESSAGES" };
+ char *fname;
+ int fd;
+ struct stat st;
+
+ asprintf (&fname, LOCALE_PATH "/%s/%s", act_add_locdef->name,
+ locale_names[cat]);
+ fd = open (fname, O_RDONLY);
+ if (fd == -1)
+ {
+ free (fname);
+
+ asprintf (&fname, LOCALE_PATH "/%s/%s/SYS_%s",
+ act_add_locdef->name, locale_names[cat],
+ locale_names[cat]);
+
+ fd = open (fname, O_RDONLY);
+ if (fd == -1)
+ error (5, 0, _("\
+locale file `%s', used in `copy' statement, not found"),
+ act_add_locdef->name);
+ }
+
+ if (fstat (fd, &st) < 0)
+ error (5, errno, _("\
+cannot `stat' locale file `%s'"),
+ fname);
+
+ localedef->len[cat] = st.st_size;
+ localedef->categories[cat].generic
+ = mmap (NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+
+ if (localedef->categories[cat].generic == (void *) -1)
+ {
+ size_t left = st.st_size;
+ void *read_ptr;
+
+ localedef->categories[cat].generic
+ = xmalloc (st.st_size);
+ read_ptr = localedef->categories[cat].generic;
+
+ do
+ {
+ long int n;
+ n = read (fd, read_ptr, left);
+ if (n == 1)
+ error (5, errno, _("cannot read locale file `%s'"),
+ fname);
+ read_ptr += n;
+ left -= n;
+ }
+ while (left > 0);
+ }
+
+ close (fd);
+ free (fname);
+
+ localedef->binary |= 1 << cat;
+ }
+ }
+ }
+ while (act_add_locdef != NULL);
+
+ /* Check the categories we processed in source form. */
+ check_all_categories (localedef, charset);
+
+ /* We are now able to write the data files. If warning were given we
+ do it only if it is explicitly requested (--force). */
+ if (error_message_count == 0 || force_output != 0)
+ {
+ if (cannot_write_why != 0)
+ error (4, cannot_write_why, _("cannot write output files to `%s'"),
+ output_path);
+ else
+ write_all_categories (localedef, output_path);
+ }
+ else
+ error (4, 0, _("no output file produced because warning were issued"));
+
+ /* This exit status is prescribed by POSIX.2 4.35.7. */
+ exit (error_message_count != 0);
+}
+
+
+void
+def_to_process (const char *name, int category)
+{
+ struct copy_def_list_t *new, **rp;
+
+ for (rp = &copy_list; *rp != NULL; rp = &(*rp)->next)
+ if (strcmp (name, (*rp)->name) == 0)
+ break;
+
+ if (*rp == NULL)
+ {
+ size_t cnt;
+
+ *rp = (struct copy_def_list_t *) xmalloc (sizeof (**rp));
+
+ (*rp)->next = NULL;
+ (*rp)->name = name;
+ (*rp)->mask = 0;
+ (*rp)->locale = NULL;
+
+ for (cnt = 0; cnt < 6; ++cnt)
+ {
+ (*rp)->binary[cnt].data = NULL;
+ (*rp)->binary[cnt].len = 0;
+ }
+ }
+ new = *rp;
+
+ if ((new->mask & category) != 0)
+ /* We already have the information. This cannot happen. */
+ error (5, 0, _("\
+category data requested more than once: should not happen"));
+
+ new->mask |= category;
+}
+
+
+/* Display usage information and exit. */
+static void
+usage (int status)
+{
+ if (status != 0)
+ fprintf (stderr, _("Try `%s --help' for more information.\n"),
+ program_name);
+ else
+ printf (_("\
+Usage: %s [OPTION]... name\n\
+Mandatory arguments to long options are mandatory for short options too.\n\
+ -c, --force create output even if warning messages were issued\n\
+ -h, --help display this help and exit\n\
+ -f, --charmap=FILE symbolic character names defined in FILE\n\
+ -i, --inputfile=FILE source definitions are found in FILE\n\
+ -u, --code-set-name=NAME specify code set for mapping ISO 10646 elements\n\
+ -v, --verbose print more messages\n\
+ -V, --version output version information and exit\n\
+ --posix be strictly POSIX conform\n\
+\n\
+System's directory for character maps: %s\n\
+ locale files : %s\n"),
+ program_name, CHARMAP_PATH, LOCALE_PATH);
+
+ exit (status);
+}
+
+
+/* The address of this function will be assigned to the hook in the error
+ functions. */
+static void
+error_print ()
+{
+ /* We don't want the program name to be printed in messages. Emacs'
+ compile.el does not like this. */
+}
+
+
+/* The parameter to localedef describes the output path. If it does
+ contain a '/' character it is a relativ path. Otherwise it names the
+ locale this definition is for. */
+static const char *
+construct_output_path (const char *path)
+{
+ char *result;
+
+ if (strchr (path, '/') == NULL)
+ {
+ /* This is a system path. */
+ int path_max_len = pathconf (LOCALE_PATH, _PC_PATH_MAX) + 1;
+ result = (char *) xmalloc (path_max_len);
+
+ snprintf (result, path_max_len, "%s/%s", LOCALE_PATH, path);
+ }
+ else
+ {
+ char *t;
+ /* This is a user path. */
+ result = xmalloc (strlen (path) + 2);
+ t = stpcpy (result, path);
+ *t = '\0';
+ }
+
+ errno = 0;
+
+ if (euidaccess (result, W_OK) == -1)
+ /* Perhaps the directory does not exist now. Try to create it. */
+ if (errno == ENOENT)
+ {
+ errno = 0;
+ mkdir (result, 0777);
+ }
+
+ strcat (result, "/");
+
+ return result;
+}
diff --git a/locale/programs/locales.h b/locale/programs/locales.h
new file mode 100644
index 0000000000..3c7676b765
--- /dev/null
+++ b/locale/programs/locales.h
@@ -0,0 +1,207 @@
+/* Copyright (C) 1996 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB. If
+not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
+
+#ifndef _LOCALES_H
+#define _LOCALES_H
+
+#include <ctype.h>
+
+/* Undefine following line in production version. */
+/* #define NDEBUG 1 */
+#include <assert.h>
+
+#include "linereader.h"
+#include "locfile-token.h"
+#include "charset.h"
+#include "locfile.h"
+#include "localeinfo.h"
+
+
+/* Header of the locale data files. */
+struct locale_file
+{
+ int magic;
+ int n;
+};
+
+
+/* Handle LC_CTYPE category. */
+
+static inline unsigned int
+charclass_to_bit (enum token_t tok)
+{
+ static unsigned int lastbit = _ISalnum;
+
+ switch (tok)
+ {
+#define CLASS(name) case tok_##name: return _IS##name
+ CLASS (upper);
+ CLASS (lower);
+ CLASS (alpha);
+ CLASS (digit);
+ CLASS (alnum);
+ CLASS (space);
+ CLASS (cntrl);
+ CLASS (punct);
+ CLASS (graph);
+ CLASS (print);
+ CLASS (xdigit);
+ CLASS (blank);
+#undef CLASS
+ case tok_string:
+ lastbit <<= 1;
+ if (lastbit == 0ul)
+ /* Exit status 2 means a limitation in the implementation is
+ exceeded. */
+ error (2, 0, _("too many character classes defined"));
+ return lastbit;
+ default:
+ assert (1 == 0);
+ }
+ return 0;
+}
+
+/* Remember name of newly created charclass. */
+void ctype_startup (struct linereader *lr, struct localedef_t *locale,
+ struct charset_t *__charset);
+void ctype_finish (struct localedef_t *__locale, struct charset_t *__charset);
+
+void ctype_output (struct localedef_t *locale, const char *output_path);
+
+int ctype_is_charclass (struct linereader *lr, struct localedef_t *locale,
+ const char *__name);
+void ctype_class_new (struct linereader *lr, struct localedef_t *locale,
+ enum token_t __tok, struct token *__code,
+ struct charset_t *__charset);
+void ctype_class_start (struct linereader *lr, struct localedef_t *locale,
+ enum token_t __tok, const char *__name,
+ struct charset_t *__charset);
+void ctype_class_from (struct linereader *lr, struct localedef_t *locale,
+ struct token *__code, struct charset_t *__charset);
+void ctype_class_to (struct linereader *lr, struct localedef_t *locale,
+ struct token *__code, struct charset_t *__charset);
+void ctype_class_end (struct linereader *lr, struct localedef_t *locale);
+
+int ctype_is_charmap (struct linereader *lr, struct localedef_t *locale,
+ const char *__name);
+void ctype_map_new (struct linereader *lr, struct localedef_t *locale,
+ enum token_t __tok, struct token *__code,
+ struct charset_t *__charset);
+void ctype_map_start (struct linereader *lr, struct localedef_t *locale,
+ enum token_t __tok, const char *__name,
+ struct charset_t *__charset);
+void ctype_map_from (struct linereader *lr, struct localedef_t *locale,
+ struct token *__code, struct charset_t *__charset);
+void ctype_map_to (struct linereader *lr, struct localedef_t *locale,
+ struct token *__code, struct charset_t *__charset);
+void ctype_map_end (struct linereader *lr, struct localedef_t *locale);
+
+
+/* Handle LC_COLLATE category. */
+
+void collate_startup (struct linereader *__lr, struct localedef_t *__locale,
+ struct charset_t *__charset);
+
+void collate_finish (struct localedef_t *__locale,
+ struct charset_t *__charset);
+
+void collate_output (struct localedef_t *locale, const char *output_path);
+
+void collate_element_to (struct linereader *__lr, struct localedef_t *__locale,
+ struct token *__code, struct charset_t *__charset);
+void collate_element_from (struct linereader *__lr,
+ struct localedef_t *__locale, struct token *__code,
+ struct charset_t *__charset);
+void collate_symbol (struct linereader *__lr, struct localedef_t *__locale,
+ struct token *__code, struct charset_t *__charset);
+void collate_new_order (struct linereader *__lr, struct localedef_t *__locale,
+ enum coll_sort_rule __sort_rule);
+void collate_build_arrays (struct linereader *__lr,
+ struct localedef_t *__locale);
+int collate_order_elem (struct linereader *__lr, struct localedef_t *__locale,
+ struct token *__code, struct charset_t *__charset);
+int collate_weight_bsymbol (struct linereader *__lr,
+ struct localedef_t *__locale,
+ struct token *__code, struct charset_t *__charset);
+int collate_next_weight (struct linereader *__lr,
+ struct localedef_t *__locale);
+int collate_simple_weight (struct linereader *__lr,
+ struct localedef_t *__locale,
+ struct token *__code, struct charset_t *__charset);
+void collate_end_weight (struct linereader *__lr,
+ struct localedef_t *__locale);
+
+
+/* Handle LC_MONETARY category. */
+
+void monetary_startup (struct linereader *__lr, struct localedef_t *__locale,
+ struct charset_t *__charset);
+
+void monetary_finish (struct localedef_t *__locale);
+
+void monetary_output (struct localedef_t *locale, const char *output_path);
+
+void monetary_add (struct linereader *lr, struct localedef_t *locale,
+ enum token_t __tok, struct token *__code,
+ struct charset_t *__charset);
+
+
+/* Handle LC_NUMERIC category. */
+
+void numeric_startup (struct linereader *__lr, struct localedef_t *__locale,
+ struct charset_t *__charset);
+
+void numeric_finish (struct localedef_t *__locale);
+
+void numeric_output (struct localedef_t *locale, const char *output_path);
+
+void numeric_add (struct linereader *lr, struct localedef_t *locale,
+ enum token_t __tok, struct token *__code,
+ struct charset_t *__charset);
+
+
+/* Handle LC_TIME category. */
+
+void time_startup (struct linereader *__lr, struct localedef_t *__locale,
+ struct charset_t *__charset);
+
+void time_finish (struct localedef_t *__locale);
+
+void time_output (struct localedef_t *locale, const char *output_path);
+
+void time_add (struct linereader *lr, struct localedef_t *locale,
+ enum token_t __tok, struct token *__code,
+ struct charset_t *__charset);
+
+
+/* Handle LC_MESSAGES category. */
+
+void messages_startup (struct linereader *__lr, struct localedef_t *__locale,
+ struct charset_t *__charset);
+
+void messages_finish (struct localedef_t *__locale);
+
+void messages_output (struct localedef_t *locale, const char *output_path);
+
+void messages_add (struct linereader *lr, struct localedef_t *locale,
+ enum token_t __tok, struct token *__code,
+ struct charset_t *__charset);
+
+
+#endif /* locales.h */
diff --git a/locale/programs/locfile-kw.gperf b/locale/programs/locfile-kw.gperf
new file mode 100644
index 0000000000..85e031c777
--- /dev/null
+++ b/locale/programs/locfile-kw.gperf
@@ -0,0 +1,99 @@
+%{
+/* Copyright (C) 1996 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB. If
+not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
+
+#include <string.h>
+
+#include "locfile-token.h"
+%}
+struct keyword_t ;
+%%
+escape_char, tok_escape_char, 0
+comment_char, tok_comment_char, 0
+LC_CTYPE, tok_lc_ctype, 0
+END, tok_end, 0
+copy, tok_copy, 0
+upper, tok_upper, 0
+lower, tok_lower, 0
+alpha, tok_alpha, 0
+digit, tok_digit, 0
+alnum, tok_alnum, 0
+space, tok_space, 0
+cntrl, tok_cntrl, 0
+punct, tok_punct, 0
+graph, tok_graph, 0
+print, tok_print, 0
+xdigit, tok_xdigit, 0
+blank, tok_blank, 0
+charclass, tok_charclass, 0
+charmap, tok_charmap, 0
+toupper, tok_toupper, 0
+tolower, tok_tolower, 0
+LC_COLLATE, tok_lc_collate, 0
+collating-element, tok_collating_element, 0
+collating-symbol, tok_collating_symbol, 0
+order_start, tok_order_start, 0
+order_end, tok_order_end, 0
+from, tok_from, 0
+forward, tok_forward, 0
+backward, tok_backward, 0
+position, tok_position, 0
+UNDEFINED, tok_undefined, 0
+IGNORE, tok_ignore, 0
+LC_MONETARY, tok_lc_monetary, 0
+int_curr_symbol, tok_int_curr_symbol, 0
+currency_symbol, tok_currency_symbol, 0
+mon_decimal_point, tok_mon_decimal_point, 0
+mon_thousands_sep, tok_mon_thousands_sep, 0
+mon_grouping, tok_mon_grouping, 0
+positive_sign, tok_positive_sign, 0
+negative_sign, tok_negative_sign, 0
+int_frac_digits, tok_int_frac_digits, 0
+frac_digits, tok_frac_digits, 0
+p_cs_precedes, tok_p_cs_precedes, 0
+p_sep_by_space, tok_p_sep_by_space, 0
+n_cs_precedes, tok_n_cs_precedes, 0
+n_sep_by_space, tok_n_sep_by_space, 0
+p_sign_posn, tok_p_sign_posn, 0
+n_sign_posn, tok_n_sign_posn, 0
+LC_NUMERIC, tok_lc_numeric, 0
+decimal_point, tok_decimal_point, 0
+thousands_sep, tok_thousands_sep, 0
+grouping, tok_grouping, 0
+LC_TIME, tok_lc_time, 0
+abday, tok_abday, 0
+day, tok_day, 0
+abmon, tok_abmon, 0
+mon, tok_mon, 0
+d_t_fmt, tok_d_t_fmt, 0
+d_fmt, tok_d_fmt, 0
+t_fmt, tok_t_fmt, 0
+am_pm, tok_am_pm, 0
+t_fmt_ampm, tok_t_fmt_ampm, 0
+era, tok_era, 0
+era_year, tok_era_year, 0
+era_d_fmt, tok_era_d_fmt, 0
+era_d_t_fmt, tok_era_d_t_fmt, 0
+era_t_fmt, tok_era_t_fmt, 0
+alt_digits, tok_alt_digits, 0
+LC_MESSAGES, tok_lc_messages, 0
+yesexpr, tok_yesexpr, 0
+noexpr, tok_noexpr, 0
+yesstr, tok_yesstr, 0
+nostr, tok_nostr, 0
diff --git a/locale/programs/locfile-kw.h b/locale/programs/locfile-kw.h
new file mode 100644
index 0000000000..c892669893
--- /dev/null
+++ b/locale/programs/locfile-kw.h
@@ -0,0 +1,211 @@
+/* C code produced by gperf version 2.5 (GNU C++ version) */
+/* Command-line: gperf -acCgopt -k1,2,5,$ -N locfile_hash programs/locfile-kw.gperf */
+/* Copyright (C) 1996 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB. If
+not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
+
+#include <string.h>
+
+#include "locfile-token.h"
+struct keyword_t ;
+
+#define TOTAL_KEYWORDS 73
+#define MIN_WORD_LENGTH 3
+#define MAX_WORD_LENGTH 17
+#define MIN_HASH_VALUE 3
+#define MAX_HASH_VALUE 185
+/* maximum key range = 183, duplicates = 0 */
+
+#ifdef __GNUC__
+inline
+#endif
+static unsigned int
+hash (register const char *str, register int len)
+{
+ static const unsigned char asso_values[] =
+ {
+ 186, 186, 186, 186, 186, 186, 186, 186, 186, 186,
+ 186, 186, 186, 186, 186, 186, 186, 186, 186, 186,
+ 186, 186, 186, 186, 186, 186, 186, 186, 186, 186,
+ 186, 186, 186, 186, 186, 186, 186, 186, 186, 186,
+ 186, 186, 186, 186, 186, 186, 186, 186, 186, 186,
+ 186, 186, 186, 186, 186, 186, 186, 186, 186, 186,
+ 186, 186, 186, 186, 186, 186, 186, 0, 0, 0,
+ 0, 0, 186, 0, 186, 186, 0, 186, 0, 35,
+ 186, 186, 0, 0, 0, 5, 186, 186, 186, 0,
+ 186, 186, 186, 186, 186, 15, 186, 0, 0, 5,
+ 15, 10, 55, 30, 15, 75, 186, 20, 5, 40,
+ 10, 0, 0, 186, 35, 30, 0, 70, 186, 10,
+ 20, 75, 186, 186, 186, 186, 186, 186,
+ };
+ register int hval = len;
+
+ switch (hval)
+ {
+ default:
+ case 5:
+ hval += asso_values[(int) str[4]];
+ case 4:
+ case 3:
+ case 2:
+ hval += asso_values[(int) str[1]];
+ case 1:
+ hval += asso_values[(int) str[0]];
+ break;
+ }
+ return hval + asso_values[(int) str[len - 1]];
+}
+
+#ifdef __GNUC__
+inline
+#endif
+const struct keyword_t *
+locfile_hash (register const char *str, register int len)
+{
+ static const struct keyword_t wordlist[] =
+ {
+ {"",}, {"",}, {"",},
+ {"END", tok_end, 0},
+ {"",}, {"",},
+ {"IGNORE", tok_ignore, 0},
+ {"LC_TIME", tok_lc_time, 0},
+ {"LC_CTYPE", tok_lc_ctype, 0},
+ {"",},
+ {"alpha", tok_alpha, 0},
+ {"LC_MESSAGES", tok_lc_messages, 0},
+ {"",}, {"",},
+ {"UNDEFINED", tok_undefined, 0},
+ {"LC_NUMERIC", tok_lc_numeric, 0},
+ {"",}, {"",},
+ {"position", tok_position, 0},
+ {"",},
+ {"t_fmt", tok_t_fmt, 0},
+ {"",},
+ {"collating-element", tok_collating_element, 0},
+ {"positive_sign", tok_positive_sign, 0},
+ {"",},
+ {"abmon", tok_abmon, 0},
+ {"collating-symbol", tok_collating_symbol, 0},
+ {"",}, {"",}, {"",},
+ {"cntrl", tok_cntrl, 0},
+ {"",}, {"",},
+ {"backward", tok_backward, 0},
+ {"",},
+ {"d_fmt", tok_d_fmt, 0},
+ {"",}, {"",}, {"",},
+ {"p_sep_by_space", tok_p_sep_by_space, 0},
+ {"print", tok_print, 0},
+ {"",},
+ {"toupper", tok_toupper, 0},
+ {"negative_sign", tok_negative_sign, 0},
+ {"",},
+ {"LC_COLLATE", tok_lc_collate, 0},
+ {"LC_MONETARY", tok_lc_monetary, 0},
+ {"",},
+ {"era", tok_era, 0},
+ {"n_sep_by_space", tok_n_sep_by_space, 0},
+ {"blank", tok_blank, 0},
+ {"noexpr", tok_noexpr, 0},
+ {"tolower", tok_tolower, 0},
+ {"mon", tok_mon, 0},
+ {"era_t_fmt", tok_era_t_fmt, 0},
+ {"space", tok_space, 0},
+ {"",},
+ {"mon_thousands_sep", tok_mon_thousands_sep, 0},
+ {"thousands_sep", tok_thousands_sep, 0},
+ {"",},
+ {"alt_digits", tok_alt_digits, 0},
+ {"",},
+ {"comment_char", tok_comment_char, 0},
+ {"",},
+ {"charclass", tok_charclass, 0},
+ {"t_fmt_ampm", tok_t_fmt_ampm, 0},
+ {"p_sign_posn", tok_p_sign_posn, 0},
+ {"charmap", tok_charmap, 0},
+ {"",},
+ {"era_d_fmt", tok_era_d_fmt, 0},
+ {"",},
+ {"era_d_t_fmt", tok_era_d_t_fmt, 0},
+ {"mon_decimal_point", tok_mon_decimal_point, 0},
+ {"p_cs_precedes", tok_p_cs_precedes, 0},
+ {"",},
+ {"punct", tok_punct, 0},
+ {"n_sign_posn", tok_n_sign_posn, 0},
+ {"forward", tok_forward, 0},
+ {"decimal_point", tok_decimal_point, 0},
+ {"",},
+ {"lower", tok_lower, 0},
+ {"order_start", tok_order_start, 0},
+ {"",},
+ {"n_cs_precedes", tok_n_cs_precedes, 0},
+ {"copy", tok_copy, 0},
+ {"nostr", tok_nostr, 0},
+ {"escape_char", tok_escape_char, 0},
+ {"",}, {"",}, {"",},
+ {"alnum", tok_alnum, 0},
+ {"",},
+ {"d_t_fmt", tok_d_t_fmt, 0},
+ {"day", tok_day, 0},
+ {"order_end", tok_order_end, 0},
+ {"digit", tok_digit, 0},
+ {"",}, {"",}, {"",}, {"",},
+ {"graph", tok_graph, 0},
+ {"",}, {"",},
+ {"grouping", tok_grouping, 0},
+ {"",},
+ {"currency_symbol", tok_currency_symbol, 0},
+ {"",}, {"",}, {"",}, {"",},
+ {"int_curr_symbol", tok_int_curr_symbol, 0},
+ {"",},
+ {"mon_grouping", tok_mon_grouping, 0},
+ {"",}, {"",}, {"",},
+ {"xdigit", tok_xdigit, 0},
+ {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",},
+ {"am_pm", tok_am_pm, 0},
+ {"yesstr", tok_yesstr, 0},
+ {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",},
+ {"from", tok_from, 0},
+ {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",},
+ {"",},
+ {"upper", tok_upper, 0},
+ {"frac_digits", tok_frac_digits, 0},
+ {"yesexpr", tok_yesexpr, 0},
+ {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",},
+ {"abday", tok_abday, 0},
+ {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",},
+ {"era_year", tok_era_year, 0},
+ {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",},
+ {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",},
+ {"",}, {"",}, {"",},
+ {"int_frac_digits", tok_int_frac_digits, 0},
+ };
+
+ if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
+ {
+ register int key = hash (str, len);
+
+ if (key <= MAX_HASH_VALUE && key >= 0)
+ {
+ register const char *s = wordlist[key].name;
+
+ if (*s == *str && !strncmp (str + 1, s + 1, len - 1))
+ return &wordlist[key];
+ }
+ }
+ return 0;
+}
diff --git a/locale/programs/locfile-token.h b/locale/programs/locfile-token.h
new file mode 100644
index 0000000000..1c3cfdc9db
--- /dev/null
+++ b/locale/programs/locfile-token.h
@@ -0,0 +1,147 @@
+/* Copyright (C) 1996 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB. If
+not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
+
+#ifndef _TOKEN_H
+#define _TOKEN_H
+
+enum token_t
+{
+ tok_none = 0,
+
+ tok_eof,
+ tok_eol,
+ tok_bsymbol,
+ tok_ident,
+ tok_ellipsis,
+ tok_semicolon,
+ tok_comma,
+ tok_open_brace,
+ tok_close_brace,
+ tok_charcode,
+ tok_ucs2,
+ tok_ucs4,
+ tok_number,
+ tok_minus1,
+ tok_string,
+
+ tok_escape_char,
+ tok_comment_char,
+ tok_end,
+ tok_g0esc,
+ tok_g1esc,
+ tok_g2esc,
+ tok_g3esc,
+
+ tok_code_set_name,
+ tok_mb_cur_max,
+ tok_mb_cur_min,
+ tok_charmap,
+ tok_width,
+ tok_width_variable,
+ tok_width_default,
+
+ tok_lc_ctype,
+ tok_copy,
+ tok_upper,
+ tok_lower,
+ tok_alpha,
+ tok_digit,
+ tok_xdigit,
+ tok_space,
+ tok_print,
+ tok_graph,
+ tok_blank,
+ tok_cntrl,
+ tok_punct,
+ tok_alnum,
+ tok_charclass,
+ tok_toupper,
+ tok_tolower,
+ tok_lc_collate,
+ tok_collating_element,
+ tok_collating_symbol,
+ tok_order_start,
+ tok_order_end,
+ tok_from,
+ tok_forward,
+ tok_backward,
+ tok_position,
+ tok_undefined,
+ tok_ignore,
+ tok_lc_monetary,
+ tok_int_curr_symbol,
+ tok_currency_symbol,
+ tok_mon_decimal_point,
+ tok_mon_thousands_sep,
+ tok_mon_grouping,
+ tok_positive_sign,
+ tok_negative_sign,
+ tok_int_frac_digits,
+ tok_frac_digits,
+ tok_p_cs_precedes,
+ tok_p_sep_by_space,
+ tok_n_cs_precedes,
+ tok_n_sep_by_space,
+ tok_p_sign_posn,
+ tok_n_sign_posn,
+ tok_lc_numeric,
+ tok_decimal_point,
+ tok_thousands_sep,
+ tok_grouping,
+ tok_lc_time,
+ tok_abday,
+ tok_day,
+ tok_abmon,
+ tok_mon,
+ tok_d_t_fmt,
+ tok_d_fmt,
+ tok_t_fmt,
+ tok_am_pm,
+ tok_t_fmt_ampm,
+ tok_era,
+ tok_era_year,
+ tok_era_d_fmt,
+ tok_era_d_t_fmt,
+ tok_era_t_fmt,
+ tok_alt_digits,
+ tok_lc_messages,
+ tok_yesexpr,
+ tok_noexpr,
+ tok_yesstr,
+ tok_nostr,
+
+ tok_error
+};
+
+
+struct keyword_t
+{
+ const char *name;
+ enum token_t token;
+ int symname_or_ident;
+
+ /* Only for locdef file. */
+ int locale;
+ enum token_t base;
+ enum token_t group;
+ enum token_t list;
+};
+
+
+#endif /* token.h */
diff --git a/locale/programs/locfile.c b/locale/programs/locfile.c
new file mode 100644
index 0000000000..cb98a5d530
--- /dev/null
+++ b/locale/programs/locfile.c
@@ -0,0 +1,979 @@
+/* Copyright (C) 1996 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB. If
+not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <errno.h>
+#include <fcntl.h>
+#include <locale.h>
+#include <malloc.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/uio.h>
+
+#include "locfile.h"
+#include "linereader.h"
+#include "localeinfo.h"
+#include "locales.h"
+
+
+/* Uncomment the following line in the production version. */
+/* #define NDEBUG 1 */
+#include <assert.h>
+
+/* Define the lookup function. */
+#include "locfile-kw.h"
+
+
+/* Some useful macros. */
+#define MIN(a, b) (__extension__ ({ typeof (a) _a = (a); \
+ typeof (b) _b = (b); \
+ _a < _b ? _a : _b; }))
+
+
+void *xmalloc (size_t __n);
+char *xstrdup (const char *__str);
+
+struct localedef_t *
+locfile_read (const char *filename, struct charset_t *charset)
+{
+ struct linereader *ldfile;
+ struct localedef_t *result;
+ int state;
+ enum token_t expected_tok = tok_none;
+ const char *expected_str = NULL;
+ enum token_t ctype_tok_sym = tok_none;
+ const char *ctype_tok_str = NULL;
+ int copy_category = 0;
+ int cnt;
+
+ /* Allocate space for result. */
+ result = (struct localedef_t *) xmalloc (sizeof (struct localedef_t));
+ memset (result, '\0', sizeof (struct localedef_t));
+
+ ldfile = lr_open (filename, locfile_hash);
+ if (ldfile == NULL)
+ {
+ if (filename[0] != '/')
+ {
+ char path[strlen (filename) + 1 + sizeof (LOCSRCDIR)];
+
+ stpcpy (stpcpy (stpcpy (path, LOCSRCDIR), "/"), filename);
+ ldfile = lr_open (path, locfile_hash);
+ }
+
+ if (ldfile == NULL)
+ {
+ result->failed = 1;
+ return result;
+ }
+ }
+
+#define HANDLE_COPY(category, token, string) \
+ if (nowtok == tok_copy) \
+ { \
+ copy_category = category; \
+ expected_tok = token; \
+ expected_str = string; \
+ state = 8; \
+ continue; \
+ } \
+ ++state
+
+#define LOCALE_PROLOG(token, string) \
+ if (nowtok == tok_eol) \
+ /* Ignore empty lines. */ \
+ continue; \
+ if (nowtok == tok_end) \
+ { \
+ expected_tok = token; \
+ expected_str = string; \
+ state = 4; \
+ continue; \
+ } \
+ if (nowtok == tok_copy) \
+ goto only_copy;
+
+
+#define READ_STRING(fn, errlabel) \
+ do \
+ { \
+ arg = lr_token (ldfile, charset); \
+ if (arg->tok != tok_string) \
+ goto errlabel; \
+ fn (ldfile, result, nowtok, arg, charset); \
+ lr_ignore_rest (ldfile, 1); \
+ } \
+ while (0)
+
+#define READ_STRING_LIST(fn, errlabel) \
+ do \
+ { \
+ arg = lr_token (ldfile, charset); \
+ while (arg->tok == tok_string) \
+ { \
+ fn (ldfile, result, nowtok, arg, charset); \
+ arg = lr_token (ldfile, charset); \
+ if (arg->tok != tok_semicolon) \
+ break; \
+ arg = lr_token (ldfile, charset); \
+ } \
+ if (arg->tok != tok_eol) \
+ goto errlabel; \
+ } \
+ while (0)
+
+#define READ_NUMBER(fn, errlabel) \
+ do \
+ { \
+ arg = lr_token (ldfile, charset); \
+ if (arg->tok != tok_minus1 && arg->tok != tok_number) \
+ goto errlabel; \
+ fn (ldfile, result, nowtok, arg, charset); \
+ lr_ignore_rest (ldfile, 1); \
+ } \
+ while (0)
+
+#define READ_NUMBER_LIST(fn, errlabel) \
+ do \
+ { \
+ arg = lr_token (ldfile, charset); \
+ while (arg->tok == tok_minus1 || arg->tok == tok_number) \
+ { \
+ fn (ldfile, result, nowtok, arg, charset); \
+ arg = lr_token (ldfile, charset); \
+ if (arg->tok != tok_semicolon) \
+ break; \
+ arg = lr_token (ldfile, charset); \
+ } \
+ if (arg->tok != tok_eol) \
+ goto errlabel; \
+ } \
+ while (0)
+
+#define SYNTAX_ERROR(string) \
+ lr_error (ldfile, string); \
+ lr_ignore_rest (ldfile, 0);
+
+
+ /* Parse locale definition file and store result in RESULT. */
+ state = 1;
+ while (1)
+ {
+ /* What's on? */
+ struct token *now = lr_token (ldfile, charset);
+ enum token_t nowtok = now->tok;
+ struct token *arg;
+
+ if (nowtok == tok_eof)
+ break;
+
+ switch (state)
+ {
+ case 1:
+ /* The beginning. We expect the special declarations, EOL or
+ the start of any locale. */
+ if (nowtok == tok_eol)
+ /* Ignore empty lines. */
+ continue;
+
+ switch (nowtok)
+ {
+ case tok_escape_char:
+ case tok_comment_char:
+ /* We need an argument. */
+ arg = lr_token (ldfile, charset);
+
+ if (arg->tok != tok_ident)
+ {
+ SYNTAX_ERROR (_("bad argument"));
+ continue;
+ }
+
+ if (arg->val.str.len != 1)
+ {
+ lr_error (ldfile, _("\
+argument to `%s' must be a single character"),
+ nowtok == tok_escape_char ? "escape_char"
+ : "comment_char");
+
+ lr_ignore_rest (ldfile, 0);
+ continue;
+ }
+
+ if (nowtok == tok_escape_char)
+ ldfile->escape_char = *arg->val.str.start;
+ else
+ ldfile->comment_char = *arg->val.str.start;
+ break;
+
+ case tok_lc_ctype:
+ state = 2;
+ break;
+
+ case tok_lc_collate:
+ state = 10;
+ break;
+
+ case tok_lc_monetary:
+ state = 20;
+ break;
+
+ case tok_lc_numeric:
+ state = 30;
+ break;
+
+ case tok_lc_time:
+ state = 40;
+ break;
+
+ case tok_lc_messages:
+ state = 50;
+ break;
+
+ default:
+ SYNTAX_ERROR (_("\
+syntax error: not inside a locale definition section"));
+ continue;
+ }
+ lr_ignore_rest (ldfile, 1);
+ continue;
+
+ case 2:
+ HANDLE_COPY (LC_CTYPE, tok_lc_ctype, "LC_CYTPE");
+
+ ctype_startup (ldfile, result, charset);
+ /* FALLTHROUGH */
+
+ case 3:
+ /* Here we accept all the character classes, tolower/toupper,
+ and following ANSI C:1995 self-defined classes. */
+ LOCALE_PROLOG (tok_lc_ctype, "LC_CTYPE");
+
+ if (nowtok == tok_charclass)
+ {
+ READ_STRING_LIST (ctype_class_new, bad_new_charclass);
+ continue;
+ bad_new_charclass:
+ SYNTAX_ERROR (_("\
+syntax error in definition of new character class"));
+ continue;
+ }
+
+ if (nowtok == tok_charmap)
+ {
+ READ_STRING_LIST (ctype_map_new, bad_new_charmap);
+ continue;
+ bad_new_charmap:
+ SYNTAX_ERROR (_("\
+syntax error in definition of new character map"));
+ continue;
+ }
+
+ if (nowtok == tok_upper || nowtok == tok_lower
+ || nowtok == tok_alpha || nowtok == tok_digit
+ || nowtok == tok_alnum || nowtok == tok_space
+ || nowtok == tok_cntrl || nowtok == tok_punct
+ || nowtok == tok_graph || nowtok == tok_print
+ || nowtok == tok_xdigit || nowtok == tok_blank)
+ {
+ ctype_tok_sym = nowtok;
+ ctype_tok_str = NULL;
+ state = 5;
+ continue;
+ }
+
+ if (nowtok == tok_toupper|| nowtok == tok_tolower)
+ {
+ ctype_tok_sym = nowtok;
+ ctype_tok_str = NULL;
+ state = 6;
+ continue;
+ }
+
+ if (nowtok != tok_ident)
+ goto bad_charclass;
+
+ /* We possibly have a self-defined character class. */
+ if (ctype_is_charclass (ldfile, result, now->val.str.start))
+ {
+ ctype_tok_sym = nowtok;
+ ctype_tok_str = now->val.str.start;
+ state = 5;
+ continue;
+ }
+
+ /* ...or a self-defined character map. */
+ if (ctype_is_charmap (ldfile, result, now->val.str.start))
+ {
+ ctype_tok_sym = nowtok;
+ ctype_tok_str = now->val.str.start;
+ state = 6;
+ continue;
+ }
+
+ SYNTAX_ERROR (_("syntax error in definition of LC_CTYPE category"));
+ continue;
+
+ case 4:
+ /* Handle `END xxx'. */
+ if (nowtok != expected_tok)
+ lr_error (ldfile, _("\
+`%1$s' definition does not end with `END %1$s'"), expected_str);
+
+ lr_ignore_rest (ldfile, nowtok == expected_tok);
+ state = 1;
+ continue;
+
+ case 5:
+ /* Here we expect a semicolon separated list of bsymbols. The
+ bit to be set in the word is given in CHARCLASS_BIT. */
+ arg = now;
+
+ ctype_class_start (ldfile, result, ctype_tok_sym, ctype_tok_str,
+ charset);
+
+ while (arg->tok != tok_eol)
+ {
+ /* Any token other than a bsymbol is an error. */
+ if (arg->tok != tok_bsymbol)
+ {
+ bad_charclass:
+ SYNTAX_ERROR (_("\
+syntax error in character class definition"));
+ break;
+ }
+
+ /* Lookup value for token and write into array. */
+ ctype_class_from (ldfile, result, arg, charset);
+
+ arg = lr_token (ldfile, charset);
+ if (arg->tok == tok_semicolon)
+ arg = lr_token (ldfile, charset);
+ else if (arg->tok != tok_eol)
+ goto bad_charclass;
+
+ /* Look for ellipsis. */
+ if (arg->tok == tok_ellipsis)
+ {
+ arg = lr_token (ldfile, charset);
+ if (arg->tok != tok_semicolon)
+ goto bad_charclass;
+
+ arg = lr_token (ldfile, charset);
+ if (arg->tok != tok_bsymbol)
+ goto bad_charclass;
+
+ /* Write range starting at LAST to ARG->VAL. */
+ ctype_class_to (ldfile, result, arg, charset);
+
+ arg = lr_token (ldfile, charset);
+ if (arg->tok == tok_semicolon)
+ arg = lr_token (ldfile, charset);
+ else if (arg->tok != tok_eol)
+ goto bad_charclass;
+ }
+ }
+
+ /* Mark class as already seen. */
+ ctype_class_end (ldfile, result);
+ state = 3;
+
+ continue;
+
+ case 6:
+ /* Here we expect a list of character mappings. Note: the
+ first opening brace is already matched. */
+ ctype_map_start (ldfile, result, ctype_tok_sym, ctype_tok_str,
+ charset);
+
+ while (1)
+ {
+ /* Match ( bsymbol , bsymbol ) */
+ if (now->tok != tok_open_brace)
+ goto bad_charmap;
+
+ now = lr_token (ldfile, charset);
+ if (now->tok != tok_bsymbol)
+ {
+ bad_charmap:
+ SYNTAX_ERROR (_("\
+syntax error in character mapping definition"));
+ state = 3;
+ break;
+ }
+
+ /* Lookup arg and assign to FROM. */
+ ctype_map_from (ldfile, result, now, charset);
+
+ now = lr_token (ldfile, charset);
+ if (now->tok != tok_comma)
+ goto bad_charmap;
+
+ now = lr_token (ldfile, charset);
+ if (now->tok != tok_bsymbol)
+ goto bad_charmap;
+
+ /* Lookup arg and assign to TO. */
+ ctype_map_to (ldfile, result, now, charset);
+
+ now = lr_token (ldfile, charset);
+ if (now->tok != tok_close_brace)
+ goto bad_charmap;
+
+ now = lr_token (ldfile, charset);
+ if (now->tok == tok_eol)
+ {
+ state = 3;
+ break;
+ }
+ if (now->tok != tok_semicolon)
+ goto bad_charmap;
+
+ now = lr_token (ldfile, charset);
+ }
+
+ ctype_map_end (ldfile, result);
+ continue;
+
+ case 8:
+ {
+ /* We have seen `copy'. First match the argument. */
+ int warned = 0;
+
+ if (nowtok != tok_string)
+ lr_error (ldfile, _("expect string argument for `copy'"));
+ else
+ def_to_process (now->val.str.start, 1 << copy_category);
+
+ lr_ignore_rest (ldfile, nowtok == tok_string);
+
+ /* The rest of the line must be empty
+ and the next keyword must be `END xxx'. */
+
+ while (lr_token (ldfile, charset)->tok != tok_end)
+ {
+ if (warned == 0)
+ {
+ only_copy:
+ lr_error (ldfile, _("\
+no other keyword shall be specified when `copy' is used"));
+ warned = 1;
+ }
+
+ lr_ignore_rest (ldfile, 0);
+ }
+
+ state = 4;
+ }
+ continue;
+
+ case 10:
+ HANDLE_COPY (LC_COLLATE, tok_lc_collate, "LC_COLLATE");
+
+ collate_startup (ldfile, result, charset);
+ /* FALLTHROUGH */
+
+ case 11:
+ /* Process the LC_COLLATE section. We expect `END LC_COLLATE'
+ any of the collation specifications, or any bsymbol. */
+ LOCALE_PROLOG (tok_lc_collate, "LC_COLLATE");
+
+ if (nowtok == tok_order_start)
+ {
+ state = 12;
+ continue;
+ }
+
+ if (nowtok != tok_collating_element
+ && nowtok != tok_collating_symbol)
+ {
+ bad_collation:
+ lr_error (ldfile, _("\
+syntax error in collation definition"));
+ lr_ignore_rest (ldfile, 0);
+ continue;
+ }
+
+ /* Get argument. */
+ arg = lr_token (ldfile, charset);
+ if (arg->tok != tok_bsymbol)
+ {
+ lr_error (ldfile, _("\
+collation symbol expected after `%s'"),
+ nowtok == tok_collating_element
+ ? "collating-element" : "collating-symbol");
+ lr_ignore_rest (ldfile, 0);
+ continue;
+ }
+
+ if (nowtok == tok_collating_element)
+ {
+ /* Save to-value as new name. */
+ collate_element_to (ldfile, result, arg, charset);
+
+ arg = lr_token (ldfile, charset);
+ if (arg->tok != tok_from)
+ {
+ lr_error (ldfile, _("\
+`from' expected after first argument to `collating-element'"));
+ lr_ignore_rest (ldfile, 0);
+ continue;
+ }
+
+ arg = lr_token (ldfile, charset);
+ if (arg->tok != tok_string)
+ {
+ lr_error (ldfile, _("\
+from-value of `collating-element' must be a string"));
+ lr_ignore_rest (ldfile, 0);
+ continue;
+ }
+
+ /* Enter new collating element. */
+ collate_element_from (ldfile, result, arg, charset);
+ }
+ else
+ /* Enter new collating symbol into table. */
+ collate_symbol (ldfile, result, arg, charset);
+
+ lr_ignore_rest (ldfile, 1);
+ continue;
+
+ case 12:
+ /* We parse the rest of the line containing `order_start'.
+ In any case we continue with parsing the symbols. */
+ state = 13;
+
+ cnt = 0;
+ while (now->tok != tok_eol)
+ {
+ int collation_method = 0;
+
+ ++cnt;
+
+ do
+ {
+ if (now->tok == tok_forward)
+ collation_method |= sort_forward;
+ else if (now->tok == tok_backward)
+ collation_method |= sort_backward;
+ else if (now->tok == tok_position)
+ collation_method |= sort_position;
+ else
+ {
+ lr_error (ldfile, _("unknown collation directive"));
+ lr_ignore_rest (ldfile, 0);
+ continue;
+ }
+
+ now = lr_token (ldfile, charset);
+ }
+ while (now->tok == tok_comma
+ && (now == lr_token (ldfile, charset) != tok_none));
+
+ /* Check for consistency: forward and backwards are
+ mutually exclusive. */
+ if ((collation_method & sort_forward) != 0
+ && (collation_method & sort_backward) != 0)
+ {
+ lr_error (ldfile, _("\
+sorting order `forward' and `backward' are mutually exclusive"));
+ /* The recover clear the backward flag. */
+ collation_method &= ~sort_backward;
+ }
+
+ /* ??? I don't know whether this is correct but while
+ thinking about the `strcoll' functions I found that I
+ need a direction when performing position depended
+ collation. So I assume here that implicitly the
+ direction `forward' is given when `position' alone is
+ written. --drepper */
+ if (collation_method == sort_position)
+ collation_method |= sort_forward;
+
+ /* Enter info about next collation order. */
+ collate_new_order (ldfile, result, collation_method);
+
+ if (now->tok != tok_eol && now->tok != tok_semicolon)
+ {
+ lr_error (ldfile, _("\
+syntax error in `order_start' directive"));
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+
+ if (now->tok == tok_semicolon)
+ now = lr_token (ldfile, charset);
+ }
+
+ /* If no argument to `order_start' is given, one `forward'
+ argument is implicitely assumed. */
+ if (cnt == 0)
+ collate_new_order (ldfile, result, sort_forward);
+
+
+ /* We now know about all sorting rules. */
+ collate_build_arrays (ldfile, result);
+
+ continue;
+
+ case 13:
+ /* We read one symbol a line until `order_end' is found. */
+ {
+ static int last_correct = 1;
+
+ if (nowtok == tok_order_end)
+ {
+ state = 14;
+ lr_ignore_rest (ldfile, 1);
+ continue;
+ }
+
+ /* Ignore empty lines. */
+ if (nowtok == tok_eol)
+ continue;
+
+ if (nowtok != tok_bsymbol && nowtok != tok_undefined
+ && nowtok != tok_ellipsis)
+ {
+ if (last_correct == 1)
+ {
+ lr_error (ldfile, _("\
+syntax error in collating order definition"));
+ last_correct = 0;
+ }
+ lr_ignore_rest (ldfile, 0);
+ continue;
+ }
+ else
+ {
+ last_correct = 1;
+
+ /* Remember current token. */
+ if (collate_order_elem (ldfile, result, now, charset) < 0)
+ continue;
+ }
+
+ /* Read optional arguments. */
+ arg = lr_token (ldfile, charset);
+ while (arg->tok != tok_eol)
+ {
+ if (arg->tok != tok_ignore && arg->tok != tok_ellipsis
+ && arg->tok != tok_bsymbol && arg->tok != tok_string)
+ break;
+
+ if (arg->tok == tok_ignore || arg->tok == tok_ellipsis
+ || arg->tok == tok_string)
+ {
+ /* Call handler for simple weights. */
+ if (collate_simple_weight (ldfile, result, arg, charset)
+ < 0)
+ goto illegal_weight;
+
+ arg = lr_token (ldfile, charset);
+ }
+ else
+ do
+ {
+ /* Collect char. */
+ int ok = collate_weight_bsymbol (ldfile, result, arg,
+ charset);
+ if (ok < 0)
+ goto illegal_weight;
+
+ arg = lr_token (ldfile, charset);
+ }
+ while (arg->tok == tok_bsymbol);
+
+ /* Are there more weights? */
+ if (arg->tok != tok_semicolon)
+ break;
+
+ /* Yes, prepare next weight. */
+ if (collate_next_weight (ldfile, result) < 0)
+ goto illegal_weight;
+
+ arg = lr_token (ldfile, charset);
+ }
+
+ if (arg->tok != tok_eol)
+ {
+ SYNTAX_ERROR (_("syntax error in order specification"));
+ }
+
+ collate_end_weight (ldfile, result);
+ illegal_weight:
+ }
+ continue;
+
+ case 14:
+ /* Following to the `order_end' keyword we don't expect
+ anything but the `END'. */
+ if (nowtok == tok_eol)
+ continue;
+
+ if (nowtok != tok_end)
+ goto bad_collation;
+
+ expected_tok = tok_lc_collate;
+ expected_str = "LC_COLLATE";
+ state = 4;
+
+ ldfile->translate_strings = 1;
+ continue;
+
+ case 20:
+ HANDLE_COPY (LC_MONETARY, tok_lc_monetary, "LC_MONETARY");
+
+ monetary_startup (ldfile, result, charset);
+ /* FALLTHROUGH */
+
+ case 21:
+ LOCALE_PROLOG (tok_lc_monetary, "LC_MONETARY");
+
+ switch (nowtok)
+ {
+ case tok_int_curr_symbol:
+ case tok_currency_symbol:
+ case tok_mon_decimal_point:
+ case tok_mon_thousands_sep:
+ case tok_positive_sign:
+ case tok_negative_sign:
+ READ_STRING (monetary_add, bad_monetary);
+ break;
+
+ case tok_int_frac_digits:
+ case tok_frac_digits:
+ case tok_p_cs_precedes:
+ case tok_p_sep_by_space:
+ case tok_n_cs_precedes:
+ case tok_n_sep_by_space:
+ case tok_p_sign_posn:
+ case tok_n_sign_posn:
+ READ_NUMBER (monetary_add, bad_monetary);
+ break;
+
+ case tok_mon_grouping:
+ /* We have a semicolon separated list of integers. */
+ READ_NUMBER_LIST (monetary_add, bad_monetary);
+ break;
+
+ default:
+ bad_monetary:
+ SYNTAX_ERROR (_("syntax error in monetary locale definition"));
+ }
+ continue;
+
+ case 30:
+ HANDLE_COPY (LC_NUMERIC, tok_lc_numeric, "LC_NUMERIC");
+
+ numeric_startup (ldfile, result, charset);
+ /* FALLTHROUGH */
+
+ case 31:
+ LOCALE_PROLOG (tok_lc_numeric, "LC_NUMERIC");
+
+ switch (nowtok)
+ {
+ case tok_decimal_point:
+ case tok_thousands_sep:
+ READ_STRING (numeric_add, bad_numeric);
+ break;
+
+ case tok_grouping:
+ /* We have a semicolon separated list of integers. */
+ READ_NUMBER_LIST (numeric_add, bad_numeric);
+ break;
+
+ default:
+ bad_numeric:
+ SYNTAX_ERROR (_("syntax error in numeric locale definition"));
+ }
+ continue;
+
+ case 40:
+ HANDLE_COPY (LC_TIME, tok_lc_time, "LC_TIME");
+
+ time_startup (ldfile, result, charset);
+ /* FALLTHROUGH */
+
+ case 41:
+ LOCALE_PROLOG (tok_lc_time, "LC_TIME");
+
+ switch (nowtok)
+ {
+ case tok_abday:
+ case tok_day:
+ case tok_abmon:
+ case tok_mon:
+ case tok_am_pm:
+ case tok_alt_digits:
+ READ_STRING_LIST (time_add, bad_time);
+ continue;
+
+ case tok_d_t_fmt:
+ case tok_d_fmt:
+ case tok_t_fmt:
+ case tok_t_fmt_ampm:
+ case tok_era:
+ case tok_era_year:
+ case tok_era_d_t_fmt:
+ case tok_era_d_fmt:
+ case tok_era_t_fmt:
+ READ_STRING (time_add, bad_time);
+ break;
+
+ default:
+ bad_time:
+ SYNTAX_ERROR (_("syntax error in time locale definition"));
+ }
+ continue;
+
+ case 50:
+ HANDLE_COPY (LC_MESSAGES, tok_lc_messages, "LC_MESSAGES");
+
+ messages_startup (ldfile, result, charset);
+ /* FALLTHROUGH */
+
+ case 51:
+ LOCALE_PROLOG (tok_lc_messages, "LC_MESSAGES");
+
+ switch (nowtok)
+ {
+ case tok_yesexpr:
+ case tok_noexpr:
+ case tok_yesstr:
+ case tok_nostr:
+ READ_STRING (messages_add, bad_message);
+ break;
+
+ default:
+ bad_message:
+ SYNTAX_ERROR (_("syntax error in message locale definition"));
+ }
+ continue;
+
+ default:
+ error (5, 0, _("%s: error in state machine"), __FILE__);
+ /* NOTREACHED */
+ }
+
+ break;
+ }
+
+ /* We read all of the file. */
+ lr_close (ldfile);
+
+ /* Let's see what information is available. */
+ for (cnt = LC_CTYPE; cnt <= LC_MESSAGES; ++cnt)
+ if (result->categories[cnt].generic != NULL)
+ result->avail |= 1 << cnt;
+
+ return result;
+}
+
+
+void
+check_all_categories (struct localedef_t *locale, struct charset_t *charset)
+{
+ /* Call the finishing functions for all locales. */
+ if ((locale->binary & (1 << LC_CTYPE)) == 0)
+ ctype_finish (locale, charset);
+ if ((locale->binary & (1 << LC_COLLATE)) == 0)
+ collate_finish (locale, charset);
+ if ((locale->binary & (1 << LC_MONETARY)) == 0)
+ monetary_finish (locale);
+ if ((locale->binary & (1 << LC_NUMERIC)) == 0)
+ numeric_finish (locale);
+ if ((locale->binary & (1 << LC_TIME)) == 0)
+ time_finish (locale);
+ if ((locale->binary & (1 << LC_MESSAGES)) == 0)
+ messages_finish (locale);
+}
+
+
+void
+write_all_categories (struct localedef_t *locale, const char *output_path)
+{
+ /* Call all functions to write locale data. */
+ ctype_output (locale, output_path);
+ collate_output (locale, output_path);
+ monetary_output (locale, output_path);
+ numeric_output (locale, output_path);
+ time_output (locale, output_path);
+ messages_output (locale, output_path);
+}
+
+
+void
+write_locale_data (const char *output_path, const char *category,
+ size_t n_elem, struct iovec *vec)
+{
+ size_t cnt, step;
+ int fd;
+ char *fname;
+
+ asprintf (&fname, "%s/%s", output_path, category);
+ fd = creat (fname, 0666);
+ if (fd == -1)
+ {
+ int save_err = errno;
+
+ if (errno == EISDIR)
+ {
+ free (fname);
+ asprintf (&fname, "%1$s/%2$s/SYS_%2$s", output_path, category);
+ fd = creat (fname, 0666);
+ if (fd == -1)
+ save_err = errno;
+ }
+
+ if (fd == -1)
+ {
+ error (0, save_err, _("cannot open output file for category `%s'"),
+ category);
+ return;
+ }
+ }
+ free (fname);
+
+ /* Write the data using writev. But we must take care for the
+ limitation of the implementation. */
+ for (cnt = 0; cnt < n_elem; cnt += step)
+ {
+ /* XXX Fixme: should be in libc header. */
+#ifndef MAX_IOVEC
+# define MAX_IOVEC 8
+#endif
+ step = MIN (MAX_IOVEC, n_elem - cnt);
+
+ if (writev (fd, &vec[cnt], step) < 0)
+ {
+ error (0, errno, _("failure while writing data for category `%s'"),
+ category);
+ break;
+ }
+ }
+
+ close (fd);
+}
diff --git a/locale/programs/locfile.h b/locale/programs/locfile.h
new file mode 100644
index 0000000000..e337e961ed
--- /dev/null
+++ b/locale/programs/locfile.h
@@ -0,0 +1,75 @@
+/* Copyright (C) 1996 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB. If
+not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
+
+#ifndef _LOCFILE_H
+#define _LOCFILE_H
+
+#include <sys/uio.h>
+
+#include "charset.h"
+
+/* Opaque types for the different loales. */
+struct locale_ctype_t;
+struct locale_collate_t;
+struct locale_monetary_t;
+struct locale_numeric_t;
+struct locale_time_t;
+struct locale_messages_t;
+
+struct localedef_t
+{
+ int failed;
+
+ int avail;
+ int binary;
+
+ union
+ {
+ void *generic;
+ struct locale_ctype_t *ctype;
+ struct locale_collate_t *collate;
+ struct locale_monetary_t *monetary;
+ struct locale_numeric_t *numeric;
+ struct locale_time_t *time;
+ struct locale_messages_t *messages;
+ } categories[6];
+
+ size_t len[6];
+};
+
+
+/* Found in localedef.c. */
+void def_to_process (const char *name, int category);
+
+
+/* Found in locfile.c. */
+struct localedef_t *locfile_read (const char *filename,
+ struct charset_t *charset);
+
+void check_all_categories (struct localedef_t *locale,
+ struct charset_t *charset);
+
+void write_all_categories (struct localedef_t *locale,
+ const char *output_path);
+
+
+void write_locale_data (const char *output_path, const char *category,
+ size_t n_elem, struct iovec *vec);
+
+#endif /* locfile.h */
diff --git a/locale/programs/stringtrans.c b/locale/programs/stringtrans.c
new file mode 100644
index 0000000000..bff5aa41a2
--- /dev/null
+++ b/locale/programs/stringtrans.c
@@ -0,0 +1,146 @@
+/* Copyright (C) 1996 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+COntributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB. If
+not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <assert.h>
+#include <stdlib.h>
+
+#include "charset.h"
+#include "stringtrans.h"
+
+
+/* Global variable. */
+enum encoding_method encoding_method = ENC_UCS4;
+
+
+void *xmalloc (size_t __n);
+void *xrealloc (void *__p, size_t __n);
+
+
+#define ADDC(ch) \
+ do \
+ { \
+ if (bufact == bufmax) \
+ { \
+ bufmax *= 2; \
+ buf = xrealloc (buf, bufmax); \
+ } \
+ buf[bufact++] = (ch); \
+ } \
+ while (0)
+
+
+char *
+translate_string (char *str, struct charset_t *charset)
+{
+ char *buf;
+ size_t bufact = 0;
+ size_t bufmax = 56;
+
+ buf = (char *) xmalloc (bufmax);
+
+ while (str[0] != '\0')
+ {
+ char *tp;
+ unsigned int value;
+
+ if (str[0] != '<')
+ {
+ ADDC (*str++);
+ continue;
+ }
+
+ tp = &str[1];
+ while (tp[0] != '\0' && tp[0] != '>')
+ if (tp[0] == '\\')
+ if (tp[1] != '\0')
+ tp += 2;
+ else
+ ++tp;
+ else
+ ++tp;
+
+ if (tp[0] == '\0')
+ {
+ free (buf);
+ return NULL;
+ }
+
+ value = charset_find_value (charset, str + 1, tp - (str + 1));
+ if (value == ILLEGAL_CHAR_VALUE)
+ {
+ free (buf);
+ return NULL;
+ }
+ else
+ {
+ /* Encode string using current method. */
+ char *cp;
+
+ if (bufmax - bufact < 8)
+ {
+ bufmax *= 2;
+ buf = (char *) xrealloc (buf, bufmax);
+ }
+
+ cp = &buf[bufact];
+ if (encode_char (value, &cp) < 0)
+ {
+ free (buf);
+ return NULL;
+ }
+ bufact = cp - buf;
+ }
+
+ str = &tp[1];
+ }
+
+ ADDC ('\0');
+
+ return buf;;
+}
+
+
+int
+encode_char (unsigned int value, char **cpp)
+{
+ switch (encoding_method)
+ {
+ case ENC_UCS1:
+ if (value > 255)
+ return -11;
+ *(*cpp)++ = (char) value;
+ break;
+
+ case ENC_UCS4:
+ *(*cpp)++ = (char) (value >> 24);
+ *(*cpp)++ = (char) ((value >> 16) & 0xff);
+ *(*cpp)++ = (char) ((value >> 8) & 0xff);
+ *(*cpp)++ = (char) (value & 0xff);
+ break;
+
+ default:
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/locale/programs/stringtrans.h b/locale/programs/stringtrans.h
new file mode 100644
index 0000000000..3576ce445c
--- /dev/null
+++ b/locale/programs/stringtrans.h
@@ -0,0 +1,38 @@
+/* Copyright (C) 1996 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB. If
+not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
+
+#ifndef _TRANSLATE_H
+#define _TRANSLATE_H 1
+
+enum encoding_method
+{
+ ENC_UCS1,
+ ENC_UCS4
+};
+
+
+extern enum encoding_method encoding_method;
+
+
+char *translate_string (char *__str, struct charset_t *__charset);
+
+int encode_char (unsigned int __value, char **__cpp);
+
+
+#endif /* translate.h */