aboutsummaryrefslogtreecommitdiff
path: root/locale/locfile-lex.c
diff options
context:
space:
mode:
authorRoland McGrath <roland@gnu.org>1996-03-28 08:30:38 +0000
committerRoland McGrath <roland@gnu.org>1996-03-28 08:30:38 +0000
commit19bc17a90548ee427035994bbc4b14395723ff1f (patch)
treee7a17eda196c2610ca4be26c9e7985815162eafb /locale/locfile-lex.c
parent53f770e0f9d405ea8d1888254c6f7ce431b04c6e (diff)
downloadglibc-19bc17a90548ee427035994bbc4b14395723ff1f.tar
glibc-19bc17a90548ee427035994bbc4b14395723ff1f.tar.gz
glibc-19bc17a90548ee427035994bbc4b14395723ff1f.tar.bz2
glibc-19bc17a90548ee427035994bbc4b14395723ff1f.zip
Thu Mar 28 03:25:10 1996 Roland McGrath <roland@charlie-brown.gnu.ai.mit.edu>
* intl/Makefile (copysrc): Add missing > in sed cmd. Sat Mar 23 17:52:49 1996 Ulrich Drepper <drepper@gnu.ai.mit.edu> * Makeconfig: Rename Makefile variable nlsdir to i18ndir and change value to $(datadir)/i18n. `nls' is not an appropriate name. * Makefile (subdirs): Add new subdir wctype. * ctype/ctype-info.c: Add new global variable __ctype_names and initialize from _nl_C_LC_CTYPE. * ctype/ctype.h: In P1003.3b/D11 `alnum' is a separate character class. Use bit 11. [_ISbit]: Protect definition of bitmasks because they are also used in wctype.h. * libio/genops.c (_IO_sputbackc, _IO_sungetc): Clear EOF flag after successfully pushing back a character. Fundamental changes in locale implementation. Almost nothing from the old code is used anymore. * locale/charmap.c, locale/collate.c, locale/config.h, locale/ctypedump.c, locale/hash.h, locale/keyword.gperf, locale/keyword.h, locale/loadlocale.c, locale/locale-ctype.c, locale/locale.c locale/localeconv.c, locale/localedef.c, locale/localedef.h, locale/locfile-hash.c, locale/locfile-lex.c, locale/locfile-parse.c, locale/messages.c, locale/monetary.c, locale/numeric.c, locale/setlocale.c, locale/token.h, locale/xmalloc.c: Removed. * locale/Makefile: Update for new locale implementation with program source code distributed in subdir. * locale/categories.def, locale/iso-4217.def: Updated file for new locale implementation. * locale/langinfo.h: Updated for new locale implementation. (ERA_D_T_FMT, ERA_T_FMT): New official values according to P1003.2b/D11. (_NL_COLLATE_NRULES, _NL_COLLATE_RULES, _NL_COLLATE_HASH_SIZE, _NL_COLLATE_HASH_LAYERS, _NL_COLLATE_TABLE_EB, _NL_COLLATE_TABLE_EL, _NL_COLLATE_UNDEFINED, _NL_COLLATE_EXTRA_EB, _NL_COLLATE_EXTRA_EL, _NL_CTYPE_NAMES_EB, _NL_CTYPE_NAMES_EL, _NL_CTYPE_HASH_SIZE, _NL_CTYPE_HASH_LAYERS, _NL_CTYPE_CLASS_NAMES, _NL_CTYPE_MAP_NAMES, _NL_CTYPE_WIDTH): New internal values for extended LC_CTYPE and LC_COLLATE implementation. * locale/simple-hash.c, locale/simple-hash.h, locale/xmalloc.c, locale/xstrdup.c: Helper functions for locale related programs. * locale/C-collate.c, locale/C-ctype.c, locale/C-messages.c, locale/C-monetary.c, locale/C-numeric.c, locale/C-time.c, locale/lc-collate.c, locale/lc-ctype.c, locale/lc-messages.c, locale/lc-monetary.c, locale/lc-numeric.c, locale/lc-time.c: New implementation of locale functions, and new generated "C" locale data. * locale/loadlocale.c: Now handles word fields in locale binary automatically by changing the endianess if necessary. * locale/localeinfo.h (LIMAGIC): Changed magic number because of incompatible changes. (locale_data): Changed definition to allow word as a value type. (coll_sort_rule): Values for collation sorting mode. (_NL_CURRENT_WORD): New macro to access word value of locale entry. (__collate_table, __collate_extra): Declare new global variables for collation tables. * locale/programs/charmap-kw.gperf, locale/programs/charmap-kw.h, locale/programs/charmap.c, locale/programs/charset.c, locale/programs/charset.h, locale/programs/config.h, locale/programs/ctypedump.c, locale/programs/ld-collate.c, locale/programs/ld-ctype.c, locale/programs/ld-messages.c, locale/programs/ld-monetary.c, locale/programs/ld-numeric.c, locale/programs/ld-time.c, locale/programs/linereader.c, locale/programs/linereader.h, locale/programs/locale.c, locale/programs/localedef.c, locale/programs/locales.h, locale/programs/locfile-kw.gperf, locale/programs/locfile-kw.h, locale/programs/locfile-token.h, locale/programs/locfile.c, locale/programs/locfile.h, locale/programs/stringtrans.c, locale/programs/stringtrans.h: Implementation of locale related programs. * locale/weight.h: Functions to access collation tables. * posix/unistd.h: Define _POSIX2_LOCALEDEF. * stdio-common/printf_fp.c: Fix bug with printing certain numbers < 10^-1. Reported by Bill Metzenthen. * stdio-common/tfformat.c: Add new test for above bug. * string/strcoll.c, string/strxfrm.c: Real implementation of string collation according to ISO C. * wctype/Makefile, wctype/cname-lookup.h, wctype/iswctype.c, wctype/test_wctype.c, wctype/towctrans.c, wctype/wcfuncs.c, wctype/wctrans.c, wctype/wctype.c, wctype/wctype.h: New files. Implementation of wide character classes and mapping.
Diffstat (limited to 'locale/locfile-lex.c')
-rw-r--r--locale/locfile-lex.c533
1 files changed, 0 insertions, 533 deletions
diff --git a/locale/locfile-lex.c b/locale/locfile-lex.c
deleted file mode 100644
index 20e4f0f9cd..0000000000
--- a/locale/locfile-lex.c
+++ /dev/null
@@ -1,533 +0,0 @@
-/* Copyright (C) 1995 Free Software Foundation, Inc.
-
-The GNU C Library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Library General Public License as
-published by the Free Software Foundation; either version 2 of the
-License, or (at your option) any later version.
-
-The GNU C Library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-Library General Public License for more details.
-
-You should have received a copy of the GNU Library General Public
-License along with the GNU C Library; see the file COPYING.LIB. If
-not, write to the Free Software Foundation, Inc., 675 Mass Ave,
-Cambridge, MA 02139, USA. */
-
-#include <ctype.h>
-#include <langinfo.h>
-#include <libintl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-
-#include "localedef.h"
-#include "token.h"
-
-
-/* Include the hashing table for the keywords. */
-const struct locale_keyword* in_word_set (register const char *str,
- register int len);
-#include "keyword.h"
-
-
-/* Contains the status of reading the locale definition file. */
-struct locfile_data locfile_data;
-
-/* This is a flag used while collation input. This is the only place
- where element names beside the ones defined in the character map are
- allowed. There we must not give error messages. */
-int reject_new_char = 1;
-
-/* Prototypes for local functions. */
-static int get_char (void);
-
-
-#define LD locfile_data
-
-/* Opens the locale definition file and initializes the status data structure
- for following calls of `locfile_lex'. */
-void
-locfile_open (const char *fname)
-{
- if (fname == NULL)
- /* We read from stdin. */
- LD.filename = "<stdin>";
- else
- {
- if (freopen (fname, "r", stdin) == NULL)
- error (4, 0, gettext ("input file `%s' not found"), fname);
- LD.filename = fname;
- }
-
- /* Set default values. */
- LD.escape_char = '\\';
- LD.comment_char = '#';
-
- LD.bufsize = sysconf (_SC_LINE_MAX);
- LD.buf = (char *) xmalloc (LD.bufsize);
- LD.strbuf = (char *) xmalloc (LD.bufsize);
-
- LD.buf_ptr = LD.returned_tokens = LD.line_no = 0;
-
- /* Now sign that we want immediately read a line. */
- LD.continue_line = 1;
- LD.buf[LD.buf_ptr] = '\0';
-}
-
-
-int
-xlocfile_lex (char **token, int *token_len)
-{
- int retval = locfile_lex (token, token_len);
-
- if (retval == 0)
- /* I.e. end of file. */
- error (4, 0, gettext ("%s: unexpected end of file in locale defintion "
- "file"), locfile_data.filename);
-
- return retval;
-}
-
-int
-locfile_lex (char **token, int *token_len)
-{
- int start_again;
- int retval = 0;
-
- do
- {
- int start_ptr;
-
- start_again = 0;
-
- /* Read the next line. Skip over empty lines and comments. */
- if ((LD.buf[LD.buf_ptr] == '\0' && LD.continue_line != 0)
- || LD.buf_ptr >= LD.bufsize
- || (posix_conformance == 0 && LD.buf[LD.buf_ptr] == LD.comment_char))
- do
- {
- size_t linelen;
-
- LD.buf_ptr = 0;
-
- if (fgets (LD.buf, LD.bufsize, stdin) == NULL)
- {
- /* This makes subsequent calls also return EOF. */
- LD.buf[0] = '\0';
- return 0;
- }
-
- /* Increment line number counter. */
- ++LD.line_no;
-
- /* We now have to look whether this line is continued and
- whether it at all fits into our buffer. */
- linelen = strlen (LD.buf);
-
- if (linelen == LD.bufsize - 1)
- /* The did not fit into the buffer. */
- error (2, 0, gettext ("%s:%Zd: line too long; use "
- "`getconf LINE_MAX' to get the maximum "
- "line length"), LD.filename, LD.line_no);
-
- /* Remove '\n' at end of line. */
- if (LD.buf[linelen - 1] == '\n')
- LD.buf[--linelen] = '\0';
-
- if (linelen > 0 && LD.buf[linelen - 1] == LD.escape_char)
- {
- LD.buf[--linelen] = '\0';
- LD.continue_line = 1;
- }
- else
- LD.continue_line = 0;
-
- while (isspace (LD.buf[LD.buf_ptr]))
- ++LD.buf_ptr;
-
- /* We are not so restrictive and allow white spaces before
- a comment. */
- if (posix_conformance == 0
- && LD.buf[LD.buf_ptr] == LD.comment_char
- && LD.buf_ptr != 0)
- error (0, 0, gettext ("%s:%Zd: comment does not start in "
- "column 1"), LD.filename, LD.line_no);
- }
- while (LD.buf[LD.buf_ptr] == '\0'
- || LD.buf[LD.buf_ptr] == LD.comment_char);
-
-
- /* Get information for return values. */
- *token = LD.buf + LD.buf_ptr;
- start_ptr = LD.buf_ptr;
-
- /* If no further character is in the line this is the end of a logical
- line. This information is needed in the parser. */
- if (LD.buf[LD.buf_ptr] == '\0')
- {
- LD.buf_ptr = LD.bufsize;
- retval = TOK_ENDOFLINE;
- }
- else if (isalpha (LD.buf[LD.buf_ptr]))
- /* The token is an identifier. The POSIX standard does not say
- what characters might be contained but offical POSIX locale
- definition files contain beside alnum characters '_', '-' and
- '+'. */
- {
- const struct locale_keyword *kw;
-
- do
- ++LD.buf_ptr;
- while (isalnum (LD.buf[LD.buf_ptr]) || LD.buf[LD.buf_ptr] == '_'
- || LD.buf[LD.buf_ptr] == '-' || LD.buf[LD.buf_ptr] == '+');
-
- /* Look in table of keywords. */
- kw = in_word_set (*token, LD.buf_ptr - start_ptr);
- if (kw == NULL)
- retval = TOK_IDENT;
- else
- {
- if (kw->token_id == TOK_ESCAPE_CHAR
- || kw->token_id == TOK_COMMENT_CHAR)
- /* `escape_char' and `comment_char' are keywords for the
- lexer. Do not give them to the parser. */
- {
- start_again = 1;
-
- if (!isspace (LD.buf[LD.buf_ptr])
- || (posix_conformance && LD.returned_tokens > 0))
- error (0, 0, gettext ("%s:%Zd: syntax error in locale "
- "definition file"),
- LD.filename, LD.line_no);
-
- do
- ++LD.buf_ptr;
- while (isspace (LD.buf[LD.buf_ptr]));
-
- kw->token_id == TOK_ESCAPE_CHAR
- ? LD.escape_char
- : LD.comment_char = LD.buf[LD.buf_ptr++];
-
- ignore_to_eol (0, posix_conformance);
- }
- else
- /* It is one of the normal keywords. */
- retval = kw->token_id;
- }
-
- *token_len = LD.buf_ptr - start_ptr;
- }
- else if (LD.buf[LD.buf_ptr] == '"')
- /* Read a string. All symbolic character descriptions are expanded.
- This has to be done in a local buffer because a simple symbolic
- character like <A> may expand to upto 6 bytes. */
- {
- char *last = LD.strbuf;
-
- ++LD.buf_ptr;
- while (LD.buf[LD.buf_ptr] != '"')
- {
- int pre = LD.buf_ptr;
- int char_val = get_char (); /* token, token_len); */
-
- if (char_val == 0)
- {
- error (4, 0, gettext ("%s:%Zd: unterminated string at end "
- "of line"), LD.filename, LD.line_no);
- /* NOTREACHED */
- }
-
- if (char_val > 0)
- /* Unknown characters are simply not stored. */
- last += char_to_utf (last, char_val);
- else
- {
- char tmp[LD.buf_ptr - pre + 1];
- memcpy (tmp, &LD.buf[pre], LD.buf_ptr - pre);
- tmp[LD.buf_ptr - pre] = '\0';
- error (0, 0, gettext ("%s:%Zd: character `%s' not defined"),
- LD.filename, LD.line_no, tmp);
- }
- }
- if (LD.buf[LD.buf_ptr] != '\0')
- ++LD.buf_ptr;
-
- *last = '\0';
- *token = LD.strbuf;
- *token_len = last - LD.strbuf;
- retval = TOK_STRING;
- }
- else if (LD.buf[LD.buf_ptr] == '.' && LD.buf[LD.buf_ptr + 1] == '.'
- && LD.buf[LD.buf_ptr + 2] == '.')
- {
- LD.buf_ptr += 3;
- retval = TOK_ELLIPSIS;
- }
- else if (LD.buf[LD.buf_ptr] == LD.escape_char)
- {
- char *endp;
-
- ++LD.buf_ptr;
- switch (LD.buf[LD.buf_ptr])
- {
- case 'x':
- if (isdigit (LD.buf[++LD.buf_ptr]))
- {
- retval = strtol (&LD.buf[LD.buf_ptr], &endp, 16);
- if (endp - (LD.buf + LD.buf_ptr) < 2 || retval > 255)
- retval = 'x';
- else
- LD.buf_ptr = endp - LD.buf;
- }
- else
- retval = 'x';
- break;
- case 'd':
- if (isdigit (LD.buf[++LD.buf_ptr]))
- {
- retval = strtol (&LD.buf[LD.buf_ptr], &endp, 10);
- if (endp - (LD.buf + LD.buf_ptr) < 2 || retval > 255)
- retval = 'd';
- else
- LD.buf_ptr = endp - LD.buf;
- }
- else
- retval = 'd';
- break;
- case '0'...'9':
- retval = strtol (&LD.buf[LD.buf_ptr], &endp, 8);
- if (endp - (LD.buf + LD.buf_ptr) < 2 || retval > 255)
- retval = LD.buf[LD.buf_ptr++];
- else
- LD.buf_ptr = endp - LD.buf;
- break;
- case 'a':
- retval = '\a';
- ++LD.buf_ptr;
- break;
- case 'b':
- retval = '\b';
- ++LD.buf_ptr;
- break;
- case 'f':
- retval = '\f';
- ++LD.buf_ptr;
- break;
- case 'n':
- retval = '\n';
- ++LD.buf_ptr;
- break;
- case 'r':
- retval = '\r';
- ++LD.buf_ptr;
- break;
- case 't':
- retval = '\t';
- ++LD.buf_ptr;
- break;
- case 'v':
- retval = '\v';
- ++LD.buf_ptr;
- break;
- default:
- retval = LD.buf[LD.buf_ptr++];
- break;
- }
- }
- else if (isdigit (LD.buf[LD.buf_ptr]))
- {
- char *endp;
-
- *token_len = strtol (&LD.buf[LD.buf_ptr], &endp, 10);
- LD.buf_ptr = endp - LD.buf;
- retval = TOK_NUMBER;
- }
- else if (LD.buf[LD.buf_ptr] == '-' && LD.buf[LD.buf_ptr + 1] == '1')
- {
- LD.buf_ptr += 2;
- retval = TOK_MINUS1;
- }
- else
- {
- int ch = get_char (); /* token, token_len); */
- if (ch != -1)
- {
- *token_len = ch;
- retval = TOK_CHAR;
- }
- else
- retval = TOK_ILL_CHAR;
- }
-
- /* Ignore white space. */
- while (isspace (LD.buf[LD.buf_ptr]))
- ++LD.buf_ptr;
- }
- while (start_again != 0);
-
- ++LD.returned_tokens;
- return retval;
-}
-
-
-/* Code a character with UTF-8 if the character map has multi-byte
- characters. */
-int
-char_to_utf (char *buf, int char_val)
-{
- if (charmap_data.mb_cur_max == 1)
- {
- *buf++ = char_val;
- return 1;
- }
- else
- {
-/* The number of bits coded in each character. */
-#define CBPC 6
- static struct coding_tab
- {
- int mask;
- int val;
- }
- tab[] =
- {
- { 0x7f, 0x00 },
- { 0x7ff, 0xc0 },
- { 0xffff, 0xe0 },
- { 0x1fffff, 0xf0 },
- { 0x3ffffff, 0xf8 },
- { 0x7fffffff, 0xfc },
- { 0, }
- };
- struct coding_tab *t;
- int c;
- int cnt = 1;
-
- for (t = tab; char_val > t->mask; ++t, ++cnt)
- ;
-
- c = cnt;
-
- buf += cnt;
- while (c > 1)
- {
- *--buf = 0x80 | (char_val & ((1 << CBPC) - 1));
- char_val >>= CBPC;
- --c;
- }
-
- *--buf = t->val | char_val;
-
- return cnt;
- }
-}
-
-
-/* Ignore rest of line upto ENDOFLINE token, starting with given token.
- If WARN_FLAG is set warn about any token but ENDOFLINE. */
-void
-ignore_to_eol (int token, int warn_flag)
-{
- if (token == TOK_ENDOFLINE)
- return;
-
- if (LD.buf[LD.buf_ptr] != '\0' && warn_flag)
- error (0, 0, gettext ("%s:%Zd: trailing garbage at end of line"),
- locfile_data.filename, locfile_data.line_no);
-
- while (LD.continue_line)
- {
- LD.continue_line = 0;
-
- /* Increment line number counter. */
- ++LD.line_no;
-
- if (fgets (LD.buf, LD.bufsize, stdin) != NULL)
- {
- /* We now have to look whether this line is continued and
- whether it at all fits into our buffer. */
- int linelen = strlen (LD.buf);
-
- if (linelen == LD.bufsize - 1)
- /* The did not fit into the buffer. */
- error (2, 0, gettext ("%s:%Zd: line too long; use `getconf "
- "LINE_MAX' to get the current maximum "
- "line length"), LD.filename, LD.line_no);
-
- /* Remove '\n' at end of line. */
- if (LD.buf[linelen - 1] == '\n')
- --linelen;
-
- if (LD.buf[linelen - 1] == LD.escape_char)
- LD.continue_line = 1;
- }
- }
-
- /* This causes to begin the next line. */
- LD.buf_ptr = LD.bufsize;
-}
-
-
-/* Return the value of the character at the beginning of the input buffer.
- Symbolic character constants are expanded. */
-static int
-get_char (void)
-{
- if (LD.buf[LD.buf_ptr] == '<')
- /* This is a symbolic character name. */
- {
- int char_val;
- char *startp = LD.buf + (++LD.buf_ptr);
- char *endp = startp;
-
- while (LD.buf[LD.buf_ptr] != '>' && isprint (LD.buf[LD.buf_ptr]))
- {
- if (LD.buf[LD.buf_ptr] == '\0'
- || (LD.buf[LD.buf_ptr] == LD.escape_char
- && LD.buf[++LD.buf_ptr] == '\0'))
- break;
-
- *endp++ = LD.buf[LD.buf_ptr++];
- }
-
- if (LD.buf[LD.buf_ptr] != '>' && LD.buf[LD.buf_ptr] == '\0')
- {
- error (0, 0, gettext ("%s:%Zd: end of line in character symbol"),
- LD.filename, LD.line_no);
-
- if (startp == endp)
- return -1;
- }
- else
- ++LD.buf_ptr;
-
- char_val = find_char (startp, endp - startp);
- if (char_val == -1 && verbose != 0 && reject_new_char != 0)
- {
- /* Locale defintions are often given very general. Missing
- characters are only reported when explicitely requested. */
- char tmp[endp - startp + 3];
-
- tmp[0] = '<';
- memcpy (tmp + 1, startp, endp - startp);
- tmp[endp - startp + 1] = '>';
- tmp[endp - startp + 2] = '\0';
-
- error (0, 0, gettext ("%s:%Zd: character `%s' not defined"),
- LD.filename, LD.line_no, tmp);
- }
-
- return char_val;
- }
- else
- return (int) LD.buf[LD.buf_ptr++];
-}
-
-/*
- * Local Variables:
- * mode:c
- * c-basic-offset:2
- * End:
- */