From 2bd608801708e2fa3d0e39f1220604a81a036a78 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Mon, 18 Jan 1999 23:15:16 +0000 Subject: Update. 1999-01-18 Ulrich Drepper * iconv/gconv_conf.c (add_module): Complete rewrite. Use cleverer data structures and avoid creating intermediate representations first. Rewrite also all helper functions. * iconv/gconv_db.c (find_derivation): Use new data structure for module database. * iconv/Versions: Remove __gconv_nmodules. * iconv/iconv_prog.c: Rewrite generation of charset name list to use new data structure. * iconv/gconv_int.h (struct gconv_module): Add new elements for database data structure. (__gconv_modules_db): Update type. (__gconv_transform_dummy): Removed. * iconv/gconv_builtin.h: Remove dummy transformation. * iconv/gconv_simple.c: Remove __gconv_transform_dummy. * sysdeps/unix/sysv/linux/sparc/sparc32/syscalls.list: Remove __syscall_vfork, add vfork. * sysdeps/unix/sysv/linux/sparc/sparc64/syscalls.list: Likewise. * Rules: Add dummp.c and dummy.o to common-generated. --- iconv/Versions | 2 +- iconv/gconv_builtin.h | 5 - iconv/gconv_conf.c | 308 ++++++++++++++++++---------------- iconv/gconv_db.c | 453 +++++++++++++++++++++++++++++++------------------- iconv/gconv_int.h | 12 +- iconv/gconv_simple.c | 32 ---- iconv/iconv_prog.c | 50 ++++-- 7 files changed, 482 insertions(+), 380 deletions(-) (limited to 'iconv') diff --git a/iconv/Versions b/iconv/Versions index 25d16429f8..577a54867d 100644 --- a/iconv/Versions +++ b/iconv/Versions @@ -1,7 +1,7 @@ libc { GLIBC_2.1 { # global variables - __gconv_alias_db; __gconv_nmodules; __gconv_modules_db; + __gconv_alias_db; __gconv_modules_db; # i* iconv_open; iconv; iconv_close; diff --git a/iconv/gconv_builtin.h b/iconv/gconv_builtin.h index 554989be6b..e12f1e46ee 100644 --- a/iconv/gconv_builtin.h +++ b/iconv/gconv_builtin.h @@ -79,8 +79,3 @@ BUILTIN_TRANSFORMATION (NULL, "INTERNAL", 8, "UNICODELITTLE//", 1, "=INTERNAL->ucs2little", __gconv_transform_internal_ucs2little, NULL, NULL, 4, 4, 2, 2) - - -BUILTIN_TRANSFORMATION ("(.*)", NULL, 0, "\\1", 1, "=dummy", - __gconv_transform_dummy, NULL, NULL, - 1, 1, 1, 1) diff --git a/iconv/gconv_conf.c b/iconv/gconv_conf.c index d3f516effa..24ec14aea8 100644 --- a/iconv/gconv_conf.c +++ b/iconv/gconv_conf.c @@ -1,5 +1,5 @@ /* Handle configuration data. - Copyright (C) 1997, 1998 Free Software Foundation, Inc. + Copyright (C) 1997, 1998, 1999 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper , 1997. @@ -83,99 +83,84 @@ builtin_aliases[] = #endif -/* Function for searching module. */ +/* Test whether there is already a matching module known. */ static int -module_compare (const void *p1, const void *p2) +internal_function +detect_conflict (const char *alias, size_t alias_len) { - const struct gconv_module *s1 = (const struct gconv_module *) p1; - const struct gconv_module *s2 = (const struct gconv_module *) p2; - int result; + struct gconv_module *node = __gconv_modules_db; - if (s1->from_pattern == NULL) + while (node != NULL) { - if (s2->from_pattern == NULL) - result = strcmp (s1->from_constpfx, s2->from_constpfx); - else - result = -1; - } - else if (s2->from_pattern == NULL) - result = 1; - else - result = strcmp (s1->from_pattern, s2->from_pattern); - - if (result == 0) - result = strcmp (s1->to_string, s2->to_string); - - return result; -} - - -/* This function is used to test for a conflict which could be introduced - if adding a new alias. + int cmpres = strncmp (alias, node->from_constpfx, + MIN (alias_len, node->from_constpfx_len)); - This function is a *very* ugly hack. The action-function is not - supposed to alter the parameter. But we have to do this. We will if - necessary compile the regular expression so that we can see whether it - matches the alias name. This is safe in this environment and for the - sake of performance we do it this way. The alternative would be to - compile all regular expressions right from the start or to forget about - the compilation though we might need it later. - - The second ugliness is that we have no possibility to pass parameters - to the function. Therefore we use a global variable. This is no problem - since we are for sure alone even in multi-threaded applications. */ - -/* This is alias we want to check. */ -static const char *alias_to_test; - -/* This variable is set to a nonzero value once we have found a matching - entry. */ -static int abort_conflict_search; - -static void -detect_conflict (const void *p, VISIT value, int level) -{ - struct gconv_module *s = *(struct gconv_module **) p; - - if ((value != endorder && value != leaf) || s->from_constpfx == NULL - || abort_conflict_search) - return; - - /* Before we test the whole expression (if this is a regular expression) - make sure the constant prefix matches. In case this is no regular - expression this is the whole string. */ - if (strcmp (alias_to_test, s->from_constpfx) == 0) - { - if (s->from_pattern == NULL) - /* This is a simple string and therefore we have a conflict. */ - abort_conflict_search = 1; - else + if (cmpres == 0) { - /* Make sure the regular expression is compiled (if possible). */ - if (s->from_regex == NULL) + struct gconv_module *runp; + + if (alias_len < node->from_constpfx_len) + /* Cannot possibly match. */ + return 0; + + /* This means the prefix and the alias are identical. If + there is now a simple extry or a regular expression + matching this name we have found a conflict. If there is + no conflict with the elements in the `same' list there + cannot be a conflict. */ + runp = node; + do { - /* Beware, this is what I warned you about in the comment - above. We are modifying the object. */ - if (__regcomp (&s->from_regex_mem, s->from_pattern, - REG_EXTENDED | REG_ICASE) != 0) - /* Something is wrong. Remember this. */ - s->from_regex = (regex_t *) -1L; + if (runp->from_pattern == NULL) + { + /* This is a simple entry and therefore we have a + conflict if the strings are really the same. */ + if (alias_len == node->from_constpfx_len) + return 1; + } else - s->from_regex = &s->from_regex_mem; + { + /* Compile the regular expression if necessary. */ + if (runp->from_regex == NULL) + { + if (__regcomp (&runp->from_regex_mem, + runp->from_pattern, + REG_EXTENDED | REG_ICASE) != 0) + /* Something is wrong. Remember this. */ + runp->from_regex = (regex_t *) -1L; + else + runp->from_regex = &runp->from_regex_mem; + } + + if (runp->from_regex != (regex_t *) -1L) + { + regmatch_t match[1]; + + /* Try to match the regular expression. */ + if (__regexec (runp->from_regex, alias, 1, match, 0) == 0 + && match[0].rm_so == 0 + && alias[match[0].rm_eo] == '\0') + /* They match, therefore it is a conflict. */ + return 1; + } + } + + runp = runp->same; } + while (runp != NULL); - if (s->from_regex != (regex_t *) -1L) - { - regmatch_t match[1]; + if (alias_len == node->from_constpfx_len) + return 0; - if (__regexec (s->from_regex, alias_to_test, 1, match, 0) == 0 - && match[0].rm_so == 0 - && alias_to_test[match[0].rm_eo] == '\0') - /* The whole string matched. This is also a conflict. */ - abort_conflict_search = 1; - } + node = node->matching; } + else if (cmpres < 0) + node = node->left; + else + node = node->right; } + + return node != NULL; } @@ -207,13 +192,8 @@ add_alias (char *rp, void *modules) return; *wp++ = '\0'; - /* Test whether this alias conflicts with any available module. See - the comment before the function `detect_conflict' for a description - of this ugly hack. */ - alias_to_test = from; - abort_conflict_search = 0; - __twalk (modules, detect_conflict); - if (abort_conflict_search) + /* Test whether this alias conflicts with any available module. */ + if (detect_conflict (from, to - from - 1)) /* It does conflict, don't add the alias. */ return; @@ -234,8 +214,74 @@ add_alias (char *rp, void *modules) } +/* Insert a data structure for a new module in the search tree. */ +static inline void +internal_function +insert_module (struct gconv_module *newp) +{ + struct gconv_module **rootp = &__gconv_modules_db; + + while (*rootp != NULL) + { + struct gconv_module *root = *rootp; + size_t minlen = MIN (newp->from_constpfx_len, root->from_constpfx_len); + int cmpres; + + cmpres = strncmp (newp->from_constpfx, root->from_constpfx, minlen); + if (cmpres == 0) + { + /* This can mean two things: the prefix is entirely the same or + it matches only for the minimum length of both strings. */ + if (newp->from_constpfx_len == root->from_constpfx_len) + { + /* Both prefixes are identical. Insert the string at the + end of the `same' list if it is not already there. */ + const char *from_pattern = (newp->from_pattern + ?: newp->from_constpfx); + + while (strcmp (from_pattern, + root->from_pattern ?: root->from_constpfx) != 0 + || strcmp (newp->to_string, root->to_string) != 0) + { + rootp = &root->same; + root = *rootp; + if (root == NULL) + break; + } + + if (root != NULL) + /* This is a no new conversion. */ + return; + + break; + } + + /* The new element either has a prefix which is itself a + prefix for the prefix of the current node or vice verse. + In the first case we insert the node right here. Otherwise + we have to descent further. */ + if (newp->from_constpfx_len < root->from_constpfx_len) + { + newp->matching = root; + break; + } + + rootp = &root->matching; + } + else if (cmpres < 0) + rootp = &root->left; + else + rootp = &root->right; + } + + /* Plug in the new node here. */ + *rootp = newp; +} + + /* Add new module. */ static inline void +internal_function add_module (char *rp, const char *directory, size_t dir_len, void **modules, size_t *nmodules, int modcounter) { @@ -259,7 +305,7 @@ add_module (char *rp, const char *directory, size_t dir_len, void **modules, while (*rp != '\0' && !isspace (*rp)) { if (!isalnum (*rp) && *rp != '-' && *rp != '/' && *rp != '.' - && *rp != '_') + && *rp != '_' && *rp != '(' && *rp != ')') from_is_regex = 1; ++rp; } @@ -293,7 +339,7 @@ add_module (char *rp, const char *directory, size_t dir_len, void **modules, *wp++ = '\0'; cost_hi = strtol (rp, &endp, 10); - if (rp == endp) + if (rp == endp || cost_hi < 1) /* No useful information. */ cost_hi = 1; } @@ -328,7 +374,8 @@ add_module (char *rp, const char *directory, size_t dir_len, void **modules, else const_len = to - from - 1; - new_module = (struct gconv_module *) malloc (sizeof (struct gconv_module) + new_module = (struct gconv_module *) calloc (1, + sizeof (struct gconv_module) + (wp - from) + dir_len + need_ext); if (new_module != NULL) @@ -340,13 +387,9 @@ add_module (char *rp, const char *directory, size_t dir_len, void **modules, from, to - from); if (from_is_regex) new_module->from_pattern = new_module->from_constpfx; - else - new_module->from_pattern = NULL; new_module->from_constpfx_len = const_len; - new_module->from_regex = NULL; - new_module->to_string = memcpy ((char *) new_module->from_constpfx + (to - from), to, module - to); @@ -388,31 +431,12 @@ add_module (char *rp, const char *directory, size_t dir_len, void **modules, } } - if (__tfind (new_module, modules, module_compare) == NULL) - { - if (__tsearch (new_module, modules, module_compare) == NULL) - /* Something went wrong while inserting the new module. */ - free (new_module); - else - ++*nmodules; - } + /* Now insert the new module data structure in our search tree. */ + insert_module (new_module); } } -static void -insert_module (const void *nodep, VISIT value, int level) -{ - if (value == preorder || value == leaf) - __gconv_modules_db[__gconv_nmodules++] = *(struct gconv_module **) nodep; -} - -static void -nothing (void *unused __attribute__ ((unused))) -{ -} - - /* Read the next configuration file. */ static void internal_function @@ -495,15 +519,14 @@ __gconv_read_conf (void) { /* Append the default path to the user-defined path. */ size_t user_len = strlen (user_path); - char *tmp; gconv_path = alloca (user_len + 1 + sizeof (default_gconv_path)); - tmp = __mempcpy (gconv_path, user_path, user_len); - *tmp++ = ':'; - __mempcpy (tmp, default_gconv_path, sizeof (default_gconv_path)); + __mempcpy (__mempcpy (__mempcpy (gconv_path, user_path, user_len), + ":", 1), + default_gconv_path, sizeof (default_gconv_path)); } - elem = strtok_r (gconv_path, ":", &gconv_path); + elem = __strtok_r (gconv_path, ":", &gconv_path); while (elem != NULL) { #ifndef MAXPATHLEN @@ -515,43 +538,38 @@ __gconv_read_conf (void) if (__realpath (elem, real_elem) != NULL) { size_t elem_len = strlen (real_elem); - char *filename, *tmp; + char *filename; filename = alloca (elem_len + 1 + sizeof (gconv_conf_filename)); - tmp = __mempcpy (filename, real_elem, elem_len); - *tmp++ = '/'; - __mempcpy (tmp, gconv_conf_filename, sizeof (gconv_conf_filename)); + __mempcpy (__mempcpy (__mempcpy (filename, real_elem, elem_len), + "/", 1), + gconv_conf_filename, sizeof (gconv_conf_filename)); /* Read the next configuration file. */ read_conf_file (filename, real_elem, elem_len, &modules, &nmodules); } /* Get next element in the path. */ - elem = strtok_r (NULL, ":", &gconv_path); + elem = __strtok_r (NULL, ":", &gconv_path); } - /* If the configuration files do not contain any valid module specification - remember this by setting the pointer to the module array to NULL. */ - nmodules += sizeof (builtin_modules) / sizeof (builtin_modules[0]); - if (nmodules == 0) - __gconv_modules_db = NULL; - else + /* Add the internal modules. */ + for (cnt = 0; cnt < sizeof (builtin_modules) / sizeof (builtin_modules[0]); + ++cnt) { - __gconv_modules_db = - (struct gconv_module **) malloc (nmodules - * sizeof (struct gconv_module)); - if (__gconv_modules_db != NULL) + if (builtin_modules[cnt].from_pattern == NULL) { - size_t cnt; + struct gconv_alias fake_alias; - /* Insert all module entries into the array. */ - __twalk (modules, insert_module); + fake_alias.fromname = builtin_modules[cnt].from_constpfx; - /* Finally insert the builtin transformations. */ - for (cnt = 0; cnt < (sizeof (builtin_modules) - / sizeof (struct gconv_module)); ++cnt) - __gconv_modules_db[__gconv_nmodules++] = &builtin_modules[cnt]; + if (__tfind (&fake_alias, &__gconv_alias_db, __gconv_alias_compare) + != NULL) + /* It'll conflict so don't add it. */ + continue; } + + insert_module (&builtin_modules[cnt]); } /* Add aliases for builtin conversions. */ @@ -561,10 +579,6 @@ __gconv_read_conf (void) char *copy = strdupa (builtin_aliases[--cnt]); add_alias (copy, modules); } - - if (nmodules != 0) - /* Now remove the tree data structure. */ - __tdestroy (modules, nothing); /* Restore the error number. */ __set_errno (save_errno); diff --git a/iconv/gconv_db.c b/iconv/gconv_db.c index e6253b8380..5269d792f6 100644 --- a/iconv/gconv_db.c +++ b/iconv/gconv_db.c @@ -1,5 +1,5 @@ /* Provide access to the collection of available transformation modules. - Copyright (C) 1997, 1998 Free Software Foundation, Inc. + Copyright (C) 1997, 1998, 1999 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper , 1997. @@ -18,9 +18,11 @@ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ +#include #include #include #include +#include #include #include @@ -32,8 +34,7 @@ void *__gconv_alias_db; /* Array with available modules. */ -size_t __gconv_nmodules; -struct gconv_module **__gconv_modules_db; +struct gconv_module *__gconv_modules_db; /* We modify global data. */ __libc_lock_define_initialized (static, lock) @@ -55,14 +56,20 @@ __gconv_alias_compare (const void *p1, const void *p2) struct derivation_step { const char *result_set; + size_t result_set_len; + int cost_lo; + int cost_hi; struct gconv_module *code; struct derivation_step *last; struct derivation_step *next; }; -#define NEW_STEP(result, module, last_mod) \ +#define NEW_STEP(result, hi, lo, module, last_mod) \ ({ struct derivation_step *newp = alloca (sizeof (struct derivation_step)); \ newp->result_set = result; \ + newp->result_set_len = strlen (result); \ + newp->cost_hi = hi; \ + newp->cost_lo = lo; \ newp->code = module; \ newp->last = last_mod; \ newp->next = NULL; \ @@ -224,7 +231,7 @@ gen_steps (struct derivation_step *best, const char *toset, { status = _CALL_DL_FCT (result[step_cnt].init_fct, (&result[step_cnt])); - + if (status != GCONV_OK) { failed = 1; @@ -276,9 +283,9 @@ find_derivation (const char *toset, const char *toset_expand, struct gconv_step **handle, size_t *nsteps) { __libc_lock_define_initialized (static, lock) - struct derivation_step *first, *current, **lastp, *best = NULL; - int best_cost_hi = 0; - int best_cost_lo = 0; + struct derivation_step *first, *current, **lastp, *solution = NULL; + int best_cost_hi = INT_MAX; + int best_cost_lo = INT_MAX; int result; result = derivation_lookup (fromset_expand ?: fromset, toset_expand ?: toset, @@ -299,202 +306,287 @@ find_derivation (const char *toset, const char *toset_expand, return result; } - /* ### TODO - For now we use a simple algorithm with quadratic runtime behaviour. + /* For now we use a simple algorithm with quadratic runtime behaviour. The task is to match the `toset' with any of the available rules, starting from FROMSET. */ if (fromset_expand != NULL) { - first = NEW_STEP (fromset_expand, NULL, NULL); - first->next = NEW_STEP (fromset, NULL, NULL); + first = NEW_STEP (fromset_expand, 0, 0, NULL, NULL); + first->next = NEW_STEP (fromset, 0, 0, NULL, NULL); lastp = &first->next->next; } else { - first = NEW_STEP (fromset, NULL, NULL); + first = NEW_STEP (fromset, 0, 0, NULL, NULL); lastp = &first->next; } - current = first; - while (current != NULL) + for (current = first; current != NULL; current = current->next) { /* Now match all the available module specifications against the current charset name. If any of them matches check whether we already have a derivation for this charset. If yes, use the one with the lower costs. Otherwise add the new charset at the - end. */ - size_t cnt; - - for (cnt = 0; cnt < __gconv_nmodules; ++cnt) - { - const char *result_set = NULL; + end. + + The module database is organized in a tree form which allows to + search for prefixes. So we search for the first entry with a + matching prefix and any other matching entry can be found from + this place. */ + struct gconv_module *node = __gconv_modules_db; + + /* Maybe it is not necessary anymore to look for a solution for + this entry since the cost is already as high (or heigher) as + the cost for the best solution so far. */ + if (current->cost_hi > best_cost_hi + || (current->cost_hi == best_cost_hi + && current->cost_lo >= best_cost_lo)) + continue; + + while (node != NULL) + { + int cmpres = strncmp (current->result_set, node->from_constpfx, + MIN (current->result_set_len, + node->from_constpfx_len)); - if (__gconv_modules_db[cnt]->from_pattern == NULL) + if (cmpres == 0) { - if (__strcasecmp (current->result_set, - __gconv_modules_db[cnt]->from_constpfx) == 0) + /* Walk through the list of modules with this prefix and + try to match the name. */ + struct gconv_module *runp; + + if (current->result_set_len < node->from_constpfx_len) + /* Cannot possibly match. */ + break; + + /* Check all the modules with this prefix. */ + runp = node; + do { - if (strcmp (__gconv_modules_db[cnt]->to_string, "-") == 0) - result_set = toset_expand ?: toset; + const char *result_set = NULL; + + if (runp->from_pattern == NULL) + { + /* This is a simple entry and therefore we have a + found an matching entry if the strings are really + equal. */ + if (current->result_set_len == runp->from_constpfx_len) + { + if (strcmp (runp->to_string, "-") == 0) + result_set = toset_expand ?: toset; + else + result_set = runp->to_string; + } + } else - result_set = __gconv_modules_db[cnt]->to_string; - } - } - else - /* We have a regular expression. First see if the prefix - matches. */ - if (__strncasecmp (current->result_set, - __gconv_modules_db[cnt]->from_constpfx, - __gconv_modules_db[cnt]->from_constpfx_len) - == 0) - { - /* First compile the regex if not already done. */ - if (__gconv_modules_db[cnt]->from_regex == NULL) - { - if (__regcomp (&__gconv_modules_db[cnt]->from_regex_mem, - __gconv_modules_db[cnt]->from_pattern, - REG_EXTENDED | REG_ICASE) != 0) - /* Something is wrong. Remember this. */ - __gconv_modules_db[cnt]->from_regex = (regex_t *) -1L; - else - __gconv_modules_db[cnt]->from_regex - = &__gconv_modules_db[cnt]->from_regex_mem; - } - - if (__gconv_modules_db[cnt]->from_regex != (regex_t *) -1L) - { - /* Try to match the from name. */ - regmatch_t match[4]; - - if (__regexec (__gconv_modules_db[cnt]->from_regex, - current->result_set, 4, match, 0) == 0 - && match[0].rm_so == 0 - && current->result_set[match[0].rm_eo] == '\0') - { - /* At least the whole string is matched. - We must now match sed-like possible - subexpressions from the match to the - toset expression. */ + { + /* Compile the regular expression if necessary. */ + if (runp->from_regex == NULL) + { + if (__regcomp (&runp->from_regex_mem, + runp->from_pattern, + REG_EXTENDED | REG_ICASE) != 0) + /* Something is wrong. Remember this. */ + runp->from_regex = (regex_t *) -1L; + else + runp->from_regex = &runp->from_regex_mem; + } + + if (runp->from_regex != (regex_t *) -1L) + { + regmatch_t match[4]; + + /* Try to match the regular expression. */ + if (__regexec (runp->from_regex, current->result_set, + 4, match, 0) == 0 + && match[0].rm_so == 0 + && current->result_set[match[0].rm_eo] == '\0') + { + /* At least the whole string is matched. + We must now match sed-like possible + subexpressions from the match to the + toset expression. */ #define ENSURE_LEN(LEN) \ if (wp + (LEN) >= constr + len - 1) \ { \ char *newp = alloca (len += 128); \ - memcpy (newp, constr, wp - constr); \ - wp = newp + (wp - constr); \ + wp = __mempcpy (newp, constr, wp - constr); \ constr = newp; \ } - size_t len = 128; - char *constr = alloca (len); - char *wp = constr; - const char *cp = __gconv_modules_db[cnt]->to_string; - - while (*cp != '\0') - { - if (*cp != '\\') - { - ENSURE_LEN (1); - *wp++ = *cp++; - } - else if (cp[1] == '\0') - /* Backslash at end of string. */ - break; - else - { - ++cp; - if (*cp == '\\') - { - *wp++ = *cp++; - ENSURE_LEN (1); - } - else if (*cp < '1' || *cp > '3') - break; - else - { - int idx = *cp - '0'; - if (match[idx].rm_so == -1) - /* No match. */ - break; - - ENSURE_LEN (match[idx].rm_eo - - match[idx].rm_so); - wp = __mempcpy (wp, - ¤t->result_set[match[idx].rm_so], - match[idx].rm_eo - - match[idx].rm_so); - ++cp; - } - } - } - if (*cp == '\0' && wp != constr) - { - /* Terminate the constructed string. */ - *wp = '\0'; - result_set = constr; - } - } - } - } - - if (result_set != NULL) - { - /* We managed to find a derivation. First see whether - this is what we are looking for. */ - if (__strcasecmp (result_set, toset) == 0 - || (toset_expand != NULL - && __strcasecmp (result_set, toset_expand) == 0)) - { - /* Determine the costs. If they are lower than the - previous solution (or this is the first solution) - remember this solution. */ - int cost_hi = __gconv_modules_db[cnt]->cost_hi; - int cost_lo = __gconv_modules_db[cnt]->cost_lo; - struct derivation_step *runp = current; - while (runp->code != NULL) - { - cost_hi += runp->code->cost_hi; - cost_lo += runp->code->cost_lo; - runp = runp->last; + size_t len = 128; + char *constr = alloca (len); + char *wp = constr; + const char *cp = runp->to_string; + + while (*cp != '\0') + { + if (*cp != '\\') + { + ENSURE_LEN (1); + *wp++ = *cp++; + } + else if (cp[1] == '\0') + /* Backslash at end of string. */ + break; + else + { + ++cp; + if (*cp == '\\') + { + *wp++ = *cp++; + ENSURE_LEN (1); + } + else if (*cp < '1' || *cp > '3') + break; + else + { + int idx = *cp - '0'; + if (match[idx].rm_so == -1) + /* No match. */ + break; + + ENSURE_LEN (match[idx].rm_eo + - match[idx].rm_so); + wp = __mempcpy (wp, + ¤t->result_set[match[idx].rm_so], + match[idx].rm_eo + - match[idx].rm_so); + ++cp; + } + } + } + if (*cp == '\0' && wp != constr) + { + /* Terminate the constructed string. */ + *wp = '\0'; + result_set = constr; + } + } + } } - if (best == NULL || cost_hi < best_cost_hi - || (cost_hi == best_cost_hi && cost_lo < best_cost_lo)) + + if (result_set != NULL) { - best = NEW_STEP (result_set, __gconv_modules_db[cnt], - current); - best_cost_hi = cost_hi; - best_cost_lo = cost_lo; + int cost_hi = runp->cost_hi + current->cost_hi; + int cost_lo = runp->cost_lo + current->cost_lo; + struct derivation_step *step; + + /* We managed to find a derivation. First see whether + this is what we are looking for. */ + if (__strcasecmp (result_set, toset) == 0 + || (toset_expand != NULL + && __strcasecmp (result_set, toset_expand) == 0)) + { + if (solution == NULL || cost_hi < best_cost_hi + || (cost_hi == best_cost_hi + && cost_lo < best_cost_lo)) + { + best_cost_hi = cost_hi; + best_cost_lo = cost_lo; + } + + /* Append this solution to list. */ + if (solution == NULL) + solution = NEW_STEP (result_set, 0, 0, runp, + current); + else + { + while (solution->next != NULL) + solution = solution->next; + + solution->next = NEW_STEP (result_set, 0, 0, + runp, current); + } + } + else if (cost_hi < best_cost_hi + || (cost_hi == best_cost_hi + && cost_lo < best_cost_lo)) + { + /* Append at the end if there is no entry with + this name. */ + for (step = first; step != NULL; step = step->next) + if (__strcasecmp (result_set, step->result_set) + == 0) + break; + + if (step == NULL) + { + *lastp = NEW_STEP (result_set, + cost_hi, cost_lo, + runp, current); + lastp = &(*lastp)->next; + } + else if (step->cost_hi > cost_hi + || (step->cost_hi == cost_hi + && step->cost_lo > cost_lo)) + { + step->code = runp; + step->last = current; + + /* Update the cost for all steps. */ + for (step = first; step != NULL; + step = step->next) + { + struct derivation_step *back; + + if (step->code == NULL) + /* This is one of the entries we started + from. */ + continue; + + step->cost_hi = step->code->cost_hi; + step->cost_lo = step->code->cost_lo; + + for (back = step->last; back->code != NULL; + back = back->last) + { + step->cost_hi += back->code->cost_hi; + step->cost_lo += back->code->cost_lo; + } + } + + for (step = solution; step != NULL; + step = step->next) + { + step->cost_hi = (step->code->cost_hi + + step->last->cost_hi); + step->cost_lo = (step->code->cost_lo + + step->last->cost_lo); + + if (step->cost_hi < best_cost_hi + || (step->cost_hi == best_cost_hi + && step->cost_lo < best_cost_lo)) + { + solution = step; + best_cost_hi = step->cost_hi; + best_cost_lo = step->cost_lo; + } + } + } + } } + + runp = runp->same; } - else - { - /* Append at the end if there is no entry with this name. */ - struct derivation_step *runp = first; + while (runp != NULL); - while (runp != NULL) - { - if (__strcasecmp (result_set, runp->result_set) == 0) - break; - runp = runp->next; - } + if (current->result_set_len == node->from_constpfx_len) + break; - if (runp == NULL) - { - *lastp = NEW_STEP (result_set, __gconv_modules_db[cnt], - current); - lastp = &(*lastp)->next; - } - } + node = node->matching; } + else if (cmpres < 0) + node = node->left; + else + node = node->right; } - - /* Go on with the next entry. */ - current = current->next; } - if (best != NULL) + if (solution != NULL) /* We really found a way to do the transformation. Now build a data structure describing the transformation steps.*/ - result = gen_steps (best, toset_expand ?: toset, fromset_expand ?: fromset, - handle, nsteps); + result = gen_steps (solution, toset_expand ?: toset, + fromset_expand ?: fromset, handle, nsteps); else { /* We haven't found a transformation. Clear the result values. */ @@ -620,20 +712,35 @@ __gconv_close_transform (struct gconv_step *steps, size_t nsteps) } +/* Free the modules mentioned. */ +static void +internal_function +free_modules_db (struct gconv_module *node) +{ + if (node->left != NULL) + free_modules_db (node->left); + if (node->right != NULL) + free_modules_db (node->right); + if (node->same != NULL) + free_modules_db (node->same); + do + { + struct gconv_module *act = node; + node = node->matching; + free (act); + } + while (node != NULL); +} + + /* Free all resources if necessary. */ static void __attribute__ ((unused)) free_mem (void) { - size_t cnt; - if (__gconv_alias_db != NULL) __tdestroy (__gconv_alias_db, free); - for (cnt = 0; cnt < __gconv_nmodules; ++cnt) - /* Modules which names do not start with a slash are builtin - transformations and the memory is not allocated dynamically. */ - if (__gconv_modules_db[cnt]->module_name[0] == '/') - free (__gconv_modules_db[cnt]); + free_modules_db (__gconv_modules_db); if (known_derivations != NULL) __tdestroy (known_derivations, free_derivation); diff --git a/iconv/gconv_int.h b/iconv/gconv_int.h index bc67b0b050..c78d0bb98b 100644 --- a/iconv/gconv_int.h +++ b/iconv/gconv_int.h @@ -1,4 +1,4 @@ -/* Copyright (C) 1997, 1998 Free Software Foundation, Inc. +/* Copyright (C) 1997, 1998, 1999 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper , 1997. @@ -75,6 +75,11 @@ struct gconv_module int cost_lo; const char *module_name; + + struct gconv_module *left; /* Prefix smaller. */ + struct gconv_module *same; /* List of entries with identical prefix. */ + struct gconv_module *matching;/* Next node with more specific prefix. */ + struct gconv_module *right; /* Prefix larger. */ }; @@ -85,7 +90,7 @@ extern void *__gconv_alias_db; /* Array with available modules. */ extern size_t __gconv_nmodules; -extern struct gconv_module **__gconv_modules_db; +extern struct gconv_module *__gconv_modules_db; /* Return in *HANDLE decriptor for transformation from FROMSET to TOSET. */ @@ -102,7 +107,7 @@ extern int __gconv_close (gconv_t cd) bytes in buffer starting at *OUTBUF. Return number of written characters in *CONVERTED if this pointer is not null. */ extern int __gconv (gconv_t __cd, const char **__inbuf, const char *inbufend, - char **__outbuf, char *outbufend, size_t *__converted) + char **__outbuf, char *outbufend, size_t *converted) internal_function; /* Return in *HANDLE a pointer to an array with *NSTEPS elements describing @@ -152,7 +157,6 @@ extern void __gconv_get_builtin_trans (const char *__name, const char **__inbuf, const char *__inbufend, \ size_t *__written, int __do_flush) -__BUILTIN_TRANS (__gconv_transform_dummy); __BUILTIN_TRANS (__gconv_transform_ascii_internal); __BUILTIN_TRANS (__gconv_transform_internal_ascii); __BUILTIN_TRANS (__gconv_transform_utf8_internal); diff --git a/iconv/gconv_simple.c b/iconv/gconv_simple.c index c71c5ed0a4..4084d04b44 100644 --- a/iconv/gconv_simple.c +++ b/iconv/gconv_simple.c @@ -46,38 +46,6 @@ static const unsigned char encoding_byte[] = }; - -int -__gconv_transform_dummy (struct gconv_step *step, struct gconv_step_data *data, - const char **inbuf, const char *inbufend, - size_t *written, int do_flush) -{ - size_t do_write; - - /* We have no stateful encoding. So we don't have to do anything - special. */ - if (do_flush) - do_write = 0; - else - { - do_write = MIN (inbufend - *inbuf, data->outbufend - data->outbuf); - - memcpy (data->outbuf, inbuf, do_write); - - *inbuf -= do_write; - *data->outbuf += do_write; - } - - /* ### TODO Actually, this number must be devided according to the - size of the input charset. I.e., if the input is in UCS4 the - number of copied bytes must be divided by 4. */ - if (written != NULL) - *written = do_write; - - return GCONV_OK; -} - - /* Transform from the internal, UCS4-like format, to UCS4. The difference between the internal ucs4 format and the real UCS4 format is, if any, the endianess. The Unicode/ISO 10646 says that diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c index 8f05d57a18..78fa6234f4 100644 --- a/iconv/iconv_prog.c +++ b/iconv/iconv_prog.c @@ -99,7 +99,7 @@ static int process_block (iconv_t cd, const char *addr, size_t len, FILE *output); static int process_fd (iconv_t cd, int fd, FILE *output); static int process_file (iconv_t cd, FILE *input, FILE *output); -static void print_known_names (void); +static void print_known_names (void) internal_function; int @@ -503,9 +503,38 @@ do_print (const void *nodep, VISIT value, int level) } static void +internal_function +add_known_names (struct gconv_module *node) +{ + if (node->left != NULL) + add_known_names (node->left); + if (node->right != NULL) + add_known_names (node->right); + if (node->same != NULL) + add_known_names (node->same); + do + { + if (node->from_pattern == NULL) + { + if (strcmp (node->from_constpfx, "INTERNAL")) + tsearch (node->from_constpfx, &printlist, + (__compar_fn_t) strverscmp); + if (strcmp (node->to_string, "INTERNAL")) + tsearch (node->to_string, &printlist, (__compar_fn_t) strverscmp); + } + else + if (strcmp (node->from_pattern, "INTERNAL")) + tsearch (node->from_pattern, &printlist, (__compar_fn_t) strverscmp); + + node = node->matching; + } + while (node != NULL); +} + +static void +internal_function print_known_names (void) { - size_t cnt; iconv_t h; /* We must initialize the internal databases first. */ @@ -516,22 +545,7 @@ print_known_names (void) twalk (__gconv_alias_db, insert_print_list); /* Add the from- and to-names from the known modules. */ - for (cnt = 0; cnt < __gconv_nmodules; ++cnt) - { - if (__gconv_modules_db[cnt]->from_pattern == NULL) - { - if (strcmp (__gconv_modules_db[cnt]->from_constpfx, "INTERNAL")) - tsearch (__gconv_modules_db[cnt]->from_constpfx, &printlist, - (__compar_fn_t) strverscmp); - if (strcmp (__gconv_modules_db[cnt]->to_string, "INTERNAL")) - tsearch (__gconv_modules_db[cnt]->to_string, &printlist, - (__compar_fn_t) strverscmp); - } - else - if (strcmp (__gconv_modules_db[cnt]->from_pattern, "INTERNAL")) - tsearch (__gconv_modules_db[cnt]->from_pattern, &printlist, - (__compar_fn_t) strverscmp); - } + add_known_names (__gconv_modules_db); fputs (_("\ The following list contain all the coded character sets known. This does\n\ -- cgit v1.2.3