From b79f74cd622578ce5eea1a3ed5840ac53d6b6d93 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Tue, 5 Sep 2000 02:41:25 +0000 Subject: Update. 2000-09-03 Bruno Haible * charmaps/EUC-TW: Add commented non-reversible mappings. 2000-09-03 Bruno Haible * charmaps/CP949: New file. 2000-09-03 Bruno Haible * charmaps/GB2312: Remove 0x80..0xA0, 0xAA..0xAF, 0xF8..FF. 2000-09-03 Bruno Haible * charmaps/EUC-JP: Nonreversibly map 0xA1C0 to U+005C and 0x8FA2B7 to U+007E. --- iconv/gconv_builtin.c | 1 - iconv/gconv_db.c | 255 +++++++++++++++++++++++++++++++++----------------- iconv/gconv_dl.c | 8 +- iconv/gconv_int.h | 4 +- iconv/gconv_simple.c | 17 +--- iconv/gconv_trans.c | 8 +- 6 files changed, 181 insertions(+), 112 deletions(-) (limited to 'iconv') diff --git a/iconv/gconv_builtin.c b/iconv/gconv_builtin.c index 7a2072d904..a530c82d46 100644 --- a/iconv/gconv_builtin.c +++ b/iconv/gconv_builtin.c @@ -75,7 +75,6 @@ __gconv_get_builtin_trans (const char *name, struct __gconv_step *step) step->__fct = map[cnt].fct; step->__init_fct = map[cnt].init; step->__end_fct = map[cnt].end; - step->__counter = INT_MAX; step->__shlib_handle = NULL; step->__modname = NULL; diff --git a/iconv/gconv_db.c b/iconv/gconv_db.c index ed2698a628..dd51670af1 100644 --- a/iconv/gconv_db.c +++ b/iconv/gconv_db.c @@ -163,7 +163,8 @@ free_derivation (void *p) size_t cnt; for (cnt = 0; cnt < deriv->nsteps; ++cnt) - if (deriv->steps[cnt].__end_fct) + if (deriv->steps[cnt].__counter > 0 + && deriv->steps[cnt].__end_fct != NULL) DL_CALL_FCT (deriv->steps[cnt].__end_fct, (&deriv->steps[cnt])); /* Free the name strings. */ @@ -175,6 +176,28 @@ free_derivation (void *p) } +/* Decrement the reference count for a single step in a steps array. */ +static inline void +release_step (struct __gconv_step *step) +{ + if (--step->__counter == 0) + { + /* Call the destructor. */ + if (step->__end_fct != NULL) + DL_CALL_FCT (step->__end_fct, (step)); + +#ifndef STATIC_GCONV + /* Skip builtin modules; they are not reference counted. */ + if (step->__shlib_handle != NULL) + { + /* Release the loaded module. */ + __gconv_release_shlib (step->__shlib_handle); + step->__shlib_handle = NULL; + } +#endif + } +} + static int internal_function gen_steps (struct derivation_step *best, const char *toset, @@ -222,7 +245,6 @@ gen_steps (struct derivation_step *best, const char *toset, result[step_cnt].__shlib_handle = shlib_handle; result[step_cnt].__modname = shlib_handle->name; - result[step_cnt].__counter = 1; result[step_cnt].__fct = shlib_handle->fct; result[step_cnt].__init_fct = shlib_handle->init_fct; result[step_cnt].__end_fct = shlib_handle->end_fct; @@ -233,6 +255,8 @@ gen_steps (struct derivation_step *best, const char *toset, __gconv_get_builtin_trans (current->code->module_name, &result[step_cnt]); + result[step_cnt].__counter = 1; + /* Call the init function. */ result[step_cnt].__data = NULL; if (result[step_cnt].__init_fct != NULL) @@ -245,6 +269,7 @@ gen_steps (struct derivation_step *best, const char *toset, failed = 1; /* Make sure we unload this modules. */ --step_cnt; + result[step_cnt].__end_fct = NULL; break; } } @@ -256,13 +281,7 @@ gen_steps (struct derivation_step *best, const char *toset, { /* Something went wrong while initializing the modules. */ while (++step_cnt < *nsteps) - { - if (result[step_cnt].__end_fct != NULL) - DL_CALL_FCT (result[step_cnt].__end_fct, (&result[step_cnt])); -#ifndef STATIC_GCONV - __gconv_release_shlib (result[step_cnt].__shlib_handle); -#endif - } + release_step (&result[step_cnt]); free (result); *nsteps = 0; *handle = NULL; @@ -292,29 +311,38 @@ increment_counter (struct __gconv_step *steps, size_t nsteps) int result = __GCONV_OK; while (cnt-- > 0) - if (steps[cnt].__counter++ == 0) - { - steps[cnt].__shlib_handle = - __gconv_find_shlib (steps[cnt].__modname); - if (steps[cnt].__shlib_handle == NULL) - { - /* Oops, this is the second time we use this module (after - unloading) and this time loading failed!? */ - while (++cnt < nsteps) - __gconv_release_shlib (steps[cnt].__shlib_handle); - result = __GCONV_NOCONV; - break; - } - - steps[cnt].__init_fct = steps[cnt].__shlib_handle->init_fct; - steps[cnt].__fct = steps[cnt].__shlib_handle->fct; - steps[cnt].__end_fct = steps[cnt].__shlib_handle->end_fct; - - if (steps[cnt].__end_fct != NULL) - DL_CALL_FCT (steps[cnt].__end_fct, (&steps[cnt])); - if (steps[cnt].__init_fct != NULL) - DL_CALL_FCT (steps[cnt].__init_fct, (&steps[cnt])); - } + { + struct __gconv_step *step = &steps[cnt]; + + if (step->__counter++ == 0) + { + /* Skip builtin modules. */ + if (step->__modname != NULL) + { + /* Reopen a previously used module. */ + step->__shlib_handle = __gconv_find_shlib (step->__modname); + if (step->__shlib_handle == NULL) + { + /* Oops, this is the second time we use this module + (after unloading) and this time loading failed!? */ + --step->__counter; + while (++cnt < nsteps) + release_step (&steps[cnt]); + result = __GCONV_NOCONV; + break; + } + + /* The function addresses defined by the module may + have changed. */ + step->__fct = step->__shlib_handle->fct; + step->__init_fct = step->__shlib_handle->init_fct; + step->__end_fct = step->__shlib_handle->end_fct; + } + + if (step->__init_fct != NULL) + DL_CALL_FCT (step->__init_fct, (step)); + } + } return result; } #endif @@ -333,9 +361,8 @@ find_derivation (const char *toset, const char *toset_expand, int best_cost_lo = INT_MAX; int result; - /* There is a small chance that this derivation is meanwhile found. This - can happen if in `find_derivation' we look for this derivation, didn't - find it but at the same time another thread looked for this derivation. */ + /* Look whether an earlier call to `find_derivation' has already + computed a possible derivation. If so, return it immediately. */ result = derivation_lookup (fromset_expand ?: fromset, toset_expand ?: toset, handle, nsteps); if (result == __GCONV_OK) @@ -346,9 +373,32 @@ find_derivation (const char *toset, const char *toset_expand, return result; } - /* For now we use a simple algorithm with quadratic runtime behaviour. - The task is to match the `toset' with any of the available rules, - starting from FROMSET. */ + /* The task is to find a sequence of transformations, backed by the + existing modules - whether builtin or dynamically loadable -, + starting at `fromset' (or `fromset_expand') and ending at `toset' + (or `toset_expand'), and with minimal cost. + + For computer scientists, this is a shortest path search in the + graph where the nodes are all possible charsets and the edges are + the transformations listed in __gconv_modules_db. + + For now we use a simple algorithm with quadratic runtime behaviour. + A breadth-first search, starting at `fromset' and `fromset_expand'. + The list starting at `first' contains all nodes that have been + visited up to now, in the order in which they have been visited -- + excluding the goal nodes `toset' and `toset_expand' which get + managed in the list starting at `solution'. + `current' walks through the list starting at `first' and looks + which nodes are reachable from the current node, adding them to + the end of the list [`first' or `solution' respectively] (if + they are visited the first time) or updating them in place (if + they have have already been visited). + In each node of either list, cost_lo and cost_hi contain the + minimum cost over any paths found up to now, starting at `fromset' + or `fromset_expand', ending at that node. best_cost_lo and + best_cost_hi represent the minimum over the elements of the + `solution' list. */ + if (fromset_expand != NULL) { first = NEW_STEP (fromset_expand, 0, 0, NULL, NULL); @@ -373,16 +423,17 @@ find_derivation (const char *toset, const char *toset_expand, searching for prefixes. So we search for the first entry with a matching prefix and any other matching entry can be found from this place. */ - struct gconv_module *node = __gconv_modules_db; + struct gconv_module *node; /* Maybe it is not necessary anymore to look for a solution for - this entry since the cost is already as high (or heigher) as + this entry since the cost is already as high (or higher) as the cost for the best solution so far. */ if (current->cost_hi > best_cost_hi || (current->cost_hi == best_cost_hi && current->cost_lo >= best_cost_lo)) continue; + node = __gconv_modules_db; while (node != NULL) { int cmpres = strcmp (current->result_set, node->from_string); @@ -404,37 +455,52 @@ find_derivation (const char *toset, const char *toset_expand, struct derivation_step *step; /* We managed to find a derivation. First see whether - this is what we are looking for. */ + we have reached one of the goal nodes. */ if (strcmp (result_set, toset) == 0 || (toset_expand != NULL && strcmp (result_set, toset_expand) == 0)) { - if (solution == NULL || cost_hi < best_cost_hi + /* Append to the `solution' list if there + is no entry with this name. */ + for (step = solution; step != NULL; step = step->next) + if (strcmp (result_set, step->result_set) == 0) + break; + + if (step == NULL) + { + step = NEW_STEP (result_set, + cost_hi, cost_lo, + runp, current); + step->next = solution; + solution = step; + } + else if (step->cost_hi > cost_hi + || (step->cost_hi == cost_hi + && step->cost_lo > cost_lo)) + { + /* A better path was found for the node, + on the `solution' list. */ + step->code = runp; + step->last = current; + step->cost_hi = cost_hi; + step->cost_lo = cost_lo; + } + + /* Update best_cost accordingly. */ + if (cost_hi < best_cost_hi || (cost_hi == best_cost_hi && cost_lo < best_cost_lo)) { best_cost_hi = cost_hi; best_cost_lo = cost_lo; } - - /* Append this solution to list. */ - if (solution == NULL) - solution = NEW_STEP (result_set, 0, 0, runp, current); - else - { - while (solution->next != NULL) - solution = solution->next; - - solution->next = NEW_STEP (result_set, 0, 0, - runp, current); - } } else if (cost_hi < best_cost_hi || (cost_hi == best_cost_hi && cost_lo < best_cost_lo)) { - /* Append at the end if there is no entry with - this name. */ + /* Append at the end of the `first' list if there + is no entry with this name. */ for (step = first; step != NULL; step = step->next) if (strcmp (result_set, step->result_set) == 0) break; @@ -450,31 +516,36 @@ find_derivation (const char *toset, const char *toset_expand, || (step->cost_hi == cost_hi && step->cost_lo > cost_lo)) { + /* A better path was found for the node, + on the `first' list. */ step->code = runp; step->last = current; /* Update the cost for all steps. */ for (step = first; step != NULL; step = step->next) - { - struct derivation_step *back; - - if (step->code == NULL) - /* This is one of the entries we started - from. */ - continue; - - step->cost_hi = step->code->cost_hi; - step->cost_lo = step->code->cost_lo; - - for (back = step->last; back->code != NULL; - back = back->last) - { - step->cost_hi += back->code->cost_hi; - step->cost_lo += back->code->cost_lo; - } - } - + /* But don't update the start nodes. */ + if (step->code != NULL) + { + struct derivation_step *back; + int hi, lo; + + hi = step->code->cost_hi; + lo = step->code->cost_lo; + + for (back = step->last; back->code != NULL; + back = back->last) + { + hi += back->code->cost_hi; + lo += back->code->cost_lo; + } + + step->cost_hi = hi; + step->cost_lo = lo; + } + + /* Likewise for the nodes on the solution list. + Also update best_cost accordingly. */ for (step = solution; step != NULL; step = step->next) { @@ -487,7 +558,6 @@ find_derivation (const char *toset, const char *toset_expand, || (step->cost_hi == best_cost_hi && step->cost_lo < best_cost_lo)) { - solution = step; best_cost_hi = step->cost_hi; best_cost_lo = step->cost_lo; } @@ -509,10 +579,26 @@ find_derivation (const char *toset, const char *toset_expand, } if (solution != NULL) - /* We really found a way to do the transformation. Now build a data - structure describing the transformation steps.*/ - result = gen_steps (solution, toset_expand ?: toset, - fromset_expand ?: fromset, handle, nsteps); + { + /* We really found a way to do the transformation. */ + + /* Choose the best solution. This is easy because we know that + the solution list has at most length 2 (one for every possible + goal node). */ + if (solution->next != NULL) + { + struct derivation_step *solution2 = solution->next; + + if (solution2->cost_hi < solution->cost_hi + || (solution2->cost_hi == solution->cost_hi + && solution2->cost_lo < solution->cost_lo)) + solution = solution2; + } + + /* Now build a data structure describing the transformation steps. */ + result = gen_steps (solution, toset_expand ?: toset, + fromset_expand ?: fromset, handle, nsteps); + } else { /* We haven't found a transformation. Clear the result values. */ @@ -609,14 +695,7 @@ __gconv_close_transform (struct __gconv_step *steps, size_t nsteps) __libc_lock_lock (lock); while (nsteps-- > 0) - if (steps[nsteps].__shlib_handle != NULL - && --steps[nsteps].__counter == 0) - { - result = __gconv_release_shlib (steps[nsteps].__shlib_handle); - if (result != __GCONV_OK) - break; - steps[nsteps].__shlib_handle = NULL; - } + release_step (&steps[nsteps]); /* Release the lock. */ __libc_lock_unlock (lock); diff --git a/iconv/gconv_dl.c b/iconv/gconv_dl.c index d07f84e31b..308db52c83 100644 --- a/iconv/gconv_dl.c +++ b/iconv/gconv_dl.c @@ -164,7 +164,9 @@ do_release_shlib (const void *nodep, VISIT value, int level) } else if (obj->counter <= 0) { - if (--obj->counter < -TRIES_BEFORE_UNLOAD && obj->handle != NULL) + if (obj->counter >= -TRIES_BEFORE_UNLOAD) + --obj->counter; + if (obj->counter < -TRIES_BEFORE_UNLOAD && obj->handle != NULL) { /* Unload the shared object. */ __libc_dlclose (obj->handle); @@ -175,7 +177,7 @@ do_release_shlib (const void *nodep, VISIT value, int level) /* Notify system that a shared object is not longer needed. */ -int +void internal_function __gconv_release_shlib (struct __gconv_loaded_object *handle) { @@ -186,8 +188,6 @@ __gconv_release_shlib (struct __gconv_loaded_object *handle) with release counts <= 0. This way we can finally unload them if necessary. */ __twalk (loaded, do_release_shlib); - - return __GCONV_OK; } diff --git a/iconv/gconv_int.h b/iconv/gconv_int.h index 8333a215c7..ad2d6e7d4a 100644 --- a/iconv/gconv_int.h +++ b/iconv/gconv_int.h @@ -55,7 +55,7 @@ struct gconv_alias object is also handled. */ struct __gconv_loaded_object { - /* Name of the object. */ + /* Name of the object. It must be the first structure element. */ const char *name; /* Reference counter for the db functionality. If no conversion is @@ -201,7 +201,7 @@ extern struct __gconv_loaded_object *__gconv_find_shlib (const char *name) /* Release shared object. If no further reference is available unload the object. */ -extern int __gconv_release_shlib (struct __gconv_loaded_object *handle) +extern void __gconv_release_shlib (struct __gconv_loaded_object *handle) internal_function; /* Fill STEP with information about builtin module with NAME. */ diff --git a/iconv/gconv_simple.c b/iconv/gconv_simple.c index 9376b151ed..a41e1b50b2 100644 --- a/iconv/gconv_simple.c +++ b/iconv/gconv_simple.c @@ -34,19 +34,6 @@ #endif -/* These are definitions used by some of the functions for handling - UTF-8 encoding below. */ -static const uint32_t encoding_mask[] = -{ - ~0x7ff, ~0xffff, ~0x1fffff, ~0x3ffffff -}; - -static const unsigned char encoding_byte[] = -{ - 0xc0, 0xe0, 0xf0, 0xf8, 0xfc -}; - - /* Transform from the internal, UCS4-like format, to UCS4. The difference between the internal ucs4 format and the real UCS4 format is, if any, the endianess. The Unicode/ISO 10646 says that @@ -856,7 +843,7 @@ ucs4le_internal_loop_single (struct __gconv_step *step, char *start; \ \ for (step = 2; step < 6; ++step) \ - if ((wc & encoding_mask[step - 2]) == 0) \ + if ((wc & (~(uint32_t)0 << (5 * step + 1))) == 0) \ break; \ \ if (__builtin_expect (outptr + step > outend, 0)) \ @@ -867,7 +854,7 @@ ucs4le_internal_loop_single (struct __gconv_step *step, } \ \ start = outptr; \ - *outptr = encoding_byte[step - 2]; \ + *outptr = (unsigned char) (~0xff >> step); \ outptr += step; \ --step; \ do \ diff --git a/iconv/gconv_trans.c b/iconv/gconv_trans.c index 8c658b126f..4a42a35afd 100644 --- a/iconv/gconv_trans.c +++ b/iconv/gconv_trans.c @@ -330,6 +330,7 @@ __gconv_translit_find (struct trans_struct *trans) { /* Copy the data. */ *trans = (*found)->info; + (*found)->open_count++; res = 0; } } @@ -345,7 +346,7 @@ __gconv_translit_find (struct trans_struct *trans) __gconv_get_path (); /* See whether we have to append .so. */ - if (name_len <= 3 || memcmp (&trans->name[name_len - 3], ".so", 3) != 0) + if (name_len <= 4 || memcmp (&trans->name[name_len - 4], ".so", 3) != 0) need_so = 1; /* Create a new entry. */ @@ -366,7 +367,7 @@ __gconv_translit_find (struct trans_struct *trans) newp->fname = cp; - /* Seach in all the directories. */ + /* Search in all the directories. */ for (runp = __gconv_path_elem; runp->name != NULL; ++runp) { cp = __mempcpy (__stpcpy ((char *) newp->fname, runp->name), @@ -382,6 +383,9 @@ __gconv_translit_find (struct trans_struct *trans) } } + if (res) + newp->fname = NULL; + /* In any case we'll add the entry to our search tree. */ if (__tsearch (newp, &search_tree, trans_compare) == NULL) { -- cgit v1.2.3