aboutsummaryrefslogtreecommitdiff
path: root/REORG.TODO/locale/loadarchive.c
diff options
context:
space:
mode:
Diffstat (limited to 'REORG.TODO/locale/loadarchive.c')
-rw-r--r--REORG.TODO/locale/loadarchive.c547
1 files changed, 547 insertions, 0 deletions
diff --git a/REORG.TODO/locale/loadarchive.c b/REORG.TODO/locale/loadarchive.c
new file mode 100644
index 0000000000..e6e1a05d2e
--- /dev/null
+++ b/REORG.TODO/locale/loadarchive.c
@@ -0,0 +1,547 @@
+/* Code to load locale data from the locale archive file.
+ Copyright (C) 2002-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <locale.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <assert.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/param.h>
+
+#include "localeinfo.h"
+#include "locarchive.h"
+#include <not-cancel.h>
+
+/* Define the hash function. We define the function as static inline. */
+#define compute_hashval static inline compute_hashval
+#define hashval_t uint32_t
+#include "hashval.h"
+#undef compute_hashval
+
+
+/* Name of the locale archive file. */
+static const char archfname[] = COMPLOCALEDIR "/locale-archive";
+
+/* Size of initial mapping window, optimal if large enough to
+ cover the header plus the initial locale. */
+#define ARCHIVE_MAPPING_WINDOW (2 * 1024 * 1024)
+
+#ifndef MAP_COPY
+/* This is not quite as good as MAP_COPY since unexamined pages
+ can change out from under us and give us inconsistent data.
+ But we rely on the user not to diddle the system's live archive.
+ Even though we only ever use PROT_READ, using MAP_SHARED would
+ not give the system sufficient freedom to e.g. let the on disk
+ file go away because it doesn't know we won't call mprotect later. */
+# define MAP_COPY MAP_PRIVATE
+#endif
+#ifndef MAP_FILE
+ /* Some systems do not have this flag; it is superfluous. */
+# define MAP_FILE 0
+#endif
+
+/* Record of contiguous pages already mapped from the locale archive. */
+struct archmapped
+{
+ void *ptr;
+ uint32_t from;
+ uint32_t len;
+ struct archmapped *next;
+};
+static struct archmapped *archmapped;
+
+/* This describes the mapping at the beginning of the file that contains
+ the header data. There could be data in the following partial page,
+ so this is searched like any other. Once the archive has been used,
+ ARCHMAPPED points to this; if mapping the archive header failed,
+ then headmap.ptr is null. */
+static struct archmapped headmap;
+static struct stat64 archive_stat; /* stat of archive when header mapped. */
+
+/* Record of locales that we have already loaded from the archive. */
+struct locale_in_archive
+{
+ struct locale_in_archive *next;
+ char *name;
+ struct __locale_data *data[__LC_LAST];
+};
+static struct locale_in_archive *archloaded;
+
+
+/* Local structure and subroutine of _nl_load_archive, see below. */
+struct range
+{
+ uint32_t from;
+ uint32_t len;
+ int category;
+ void *result;
+};
+
+static int
+rangecmp (const void *p1, const void *p2)
+{
+ return ((struct range *) p1)->from - ((struct range *) p2)->from;
+}
+
+
+/* Calculate the amount of space needed for all the tables described
+ by the given header. Note we do not include the empty table space
+ that has been preallocated in the file, so our mapping may not be
+ large enough if localedef adds data to the file in place. However,
+ doing that would permute the header fields while we are accessing
+ them and thus not be safe anyway, so we don't allow for that. */
+static inline off_t
+calculate_head_size (const struct locarhead *h)
+{
+ off_t namehash_end = (h->namehash_offset
+ + h->namehash_size * sizeof (struct namehashent));
+ off_t string_end = h->string_offset + h->string_used;
+ off_t locrectab_end = (h->locrectab_offset
+ + h->locrectab_used * sizeof (struct locrecent));
+ return MAX (namehash_end, MAX (string_end, locrectab_end));
+}
+
+
+/* Find the locale *NAMEP in the locale archive, and return the
+ internalized data structure for its CATEGORY data. If this locale has
+ already been loaded from the archive, just returns the existing data
+ structure. If successful, sets *NAMEP to point directly into the mapped
+ archive string table; that way, the next call can short-circuit strcmp. */
+struct __locale_data *
+internal_function
+_nl_load_locale_from_archive (int category, const char **namep)
+{
+ const char *name = *namep;
+ struct
+ {
+ void *addr;
+ size_t len;
+ } results[__LC_LAST];
+ struct locale_in_archive *lia;
+ struct locarhead *head;
+ struct namehashent *namehashtab;
+ struct locrecent *locrec;
+ struct archmapped *mapped;
+ struct archmapped *last;
+ unsigned long int hval;
+ size_t idx;
+ size_t incr;
+ struct range ranges[__LC_LAST - 1];
+ int nranges;
+ int cnt;
+ size_t ps = __sysconf (_SC_PAGE_SIZE);
+ int fd = -1;
+
+ /* Check if we have already loaded this locale from the archive.
+ If we previously loaded the locale but found bogons in the data,
+ then we will have stored a null pointer to return here. */
+ for (lia = archloaded; lia != NULL; lia = lia->next)
+ if (name == lia->name || !strcmp (name, lia->name))
+ {
+ *namep = lia->name;
+ return lia->data[category];
+ }
+
+ {
+ /* If the name contains a codeset, then we normalize the name before
+ doing the lookup. */
+ const char *p = strchr (name, '.');
+ if (p != NULL && p[1] != '@' && p[1] != '\0')
+ {
+ const char *rest = __strchrnul (++p, '@');
+ const char *normalized_codeset = _nl_normalize_codeset (p, rest - p);
+ if (normalized_codeset == NULL) /* malloc failure */
+ return NULL;
+ if (strncmp (normalized_codeset, p, rest - p) != 0
+ || normalized_codeset[rest - p] != '\0')
+ {
+ /* There is a normalized codeset name that is different from
+ what was specified; reconstruct a new locale name using it. */
+ size_t normlen = strlen (normalized_codeset);
+ size_t restlen = strlen (rest) + 1;
+ char *newname = alloca (p - name + normlen + restlen);
+ memcpy (__mempcpy (__mempcpy (newname, name, p - name),
+ normalized_codeset, normlen),
+ rest, restlen);
+ name = newname;
+ }
+ free ((char *) normalized_codeset);
+ }
+ }
+
+ /* Make sure the archive is loaded. */
+ if (archmapped == NULL)
+ {
+ void *result;
+ size_t headsize, mapsize;
+
+ /* We do this early as a sign that we have tried to open the archive.
+ If headmap.ptr remains null, that's an indication that we tried
+ and failed, so we won't try again. */
+ archmapped = &headmap;
+
+ /* The archive has never been opened. */
+ fd = open_not_cancel_2 (archfname, O_RDONLY|O_LARGEFILE|O_CLOEXEC);
+ if (fd < 0)
+ /* Cannot open the archive, for whatever reason. */
+ return NULL;
+
+ if (__fxstat64 (_STAT_VER, fd, &archive_stat) == -1)
+ {
+ /* stat failed, very strange. */
+ close_and_out:
+ if (fd >= 0)
+ close_not_cancel_no_status (fd);
+ return NULL;
+ }
+
+
+ /* Map an initial window probably large enough to cover the header
+ and the first locale's data. With a large address space, we can
+ just map the whole file and be sure everything is covered. */
+
+ mapsize = (sizeof (void *) > 4 ? archive_stat.st_size
+ : MIN (archive_stat.st_size, ARCHIVE_MAPPING_WINDOW));
+
+ result = __mmap64 (NULL, mapsize, PROT_READ, MAP_FILE|MAP_COPY, fd, 0);
+ if (result == MAP_FAILED)
+ goto close_and_out;
+
+ /* Check whether the file is large enough for the sizes given in
+ the header. Theoretically an archive could be so large that
+ just the header fails to fit in our initial mapping window. */
+ headsize = calculate_head_size ((const struct locarhead *) result);
+ if (headsize > mapsize)
+ {
+ (void) __munmap (result, mapsize);
+ if (sizeof (void *) > 4 || headsize > archive_stat.st_size)
+ /* The file is not big enough for the header. Bogus. */
+ goto close_and_out;
+
+ /* Freakishly long header. */
+ /* XXX could use mremap when available */
+ mapsize = (headsize + ps - 1) & ~(ps - 1);
+ result = __mmap64 (NULL, mapsize, PROT_READ, MAP_FILE|MAP_COPY,
+ fd, 0);
+ if (result == MAP_FAILED)
+ goto close_and_out;
+ }
+
+ if (sizeof (void *) > 4 || mapsize >= archive_stat.st_size)
+ {
+ /* We've mapped the whole file already, so we can be
+ sure we won't need this file descriptor later. */
+ close_not_cancel_no_status (fd);
+ fd = -1;
+ }
+
+ headmap.ptr = result;
+ /* headmap.from already initialized to zero. */
+ headmap.len = mapsize;
+ }
+
+ /* If there is no archive or it cannot be loaded for some reason fail. */
+ if (__glibc_unlikely (headmap.ptr == NULL))
+ goto close_and_out;
+
+ /* We have the archive available. To find the name we first have to
+ determine its hash value. */
+ hval = compute_hashval (name, strlen (name));
+
+ head = headmap.ptr;
+ namehashtab = (struct namehashent *) ((char *) head
+ + head->namehash_offset);
+
+ /* Avoid division by 0 if the file is corrupted. */
+ if (__glibc_unlikely (head->namehash_size == 0))
+ goto close_and_out;
+
+ idx = hval % head->namehash_size;
+ incr = 1 + hval % (head->namehash_size - 2);
+
+ /* If the name_offset field is zero this means this is a
+ deleted entry and therefore no entry can be found. */
+ while (1)
+ {
+ if (namehashtab[idx].name_offset == 0)
+ /* Not found. */
+ goto close_and_out;
+
+ if (namehashtab[idx].hashval == hval
+ && strcmp (name, headmap.ptr + namehashtab[idx].name_offset) == 0)
+ /* Found the entry. */
+ break;
+
+ idx += incr;
+ if (idx >= head->namehash_size)
+ idx -= head->namehash_size;
+ }
+
+ /* We found an entry. It might be a placeholder for a removed one. */
+ if (namehashtab[idx].locrec_offset == 0)
+ goto close_and_out;
+
+ locrec = (struct locrecent *) (headmap.ptr + namehashtab[idx].locrec_offset);
+
+ if (sizeof (void *) > 4 /* || headmap.len == archive_stat.st_size */)
+ {
+ /* We already have the whole locale archive mapped in. */
+ assert (headmap.len == archive_stat.st_size);
+ for (cnt = 0; cnt < __LC_LAST; ++cnt)
+ if (cnt != LC_ALL)
+ {
+ if (locrec->record[cnt].offset + locrec->record[cnt].len
+ > headmap.len)
+ /* The archive locrectab contains bogus offsets. */
+ goto close_and_out;
+ results[cnt].addr = headmap.ptr + locrec->record[cnt].offset;
+ results[cnt].len = locrec->record[cnt].len;
+ }
+ }
+ else
+ {
+ /* Get the offsets of the data files and sort them. */
+ for (cnt = nranges = 0; cnt < __LC_LAST; ++cnt)
+ if (cnt != LC_ALL)
+ {
+ ranges[nranges].from = locrec->record[cnt].offset;
+ ranges[nranges].len = locrec->record[cnt].len;
+ ranges[nranges].category = cnt;
+ ranges[nranges].result = NULL;
+
+ ++nranges;
+ }
+
+ qsort (ranges, nranges, sizeof (ranges[0]), rangecmp);
+
+ /* The information about mmap'd blocks is kept in a list.
+ Skip over the blocks which are before the data we need. */
+ last = mapped = archmapped;
+ for (cnt = 0; cnt < nranges; ++cnt)
+ {
+ int upper;
+ size_t from;
+ size_t to;
+ void *addr;
+ struct archmapped *newp;
+
+ /* Determine whether the appropriate page is already mapped. */
+ while (mapped != NULL
+ && (mapped->from + mapped->len
+ <= ranges[cnt].from + ranges[cnt].len))
+ {
+ last = mapped;
+ mapped = mapped->next;
+ }
+
+ /* Do we have a match? */
+ if (mapped != NULL
+ && mapped->from <= ranges[cnt].from
+ && (ranges[cnt].from + ranges[cnt].len
+ <= mapped->from + mapped->len))
+ {
+ /* Yep, already loaded. */
+ results[ranges[cnt].category].addr = ((char *) mapped->ptr
+ + ranges[cnt].from
+ - mapped->from);
+ results[ranges[cnt].category].len = ranges[cnt].len;
+ continue;
+ }
+
+ /* Map the range with the locale data from the file. We will
+ try to cover as much of the locale as possible. I.e., if the
+ next category (next as in "next offset") is on the current or
+ immediately following page we use it as well. */
+ assert (powerof2 (ps));
+ from = ranges[cnt].from & ~(ps - 1);
+ upper = cnt;
+ do
+ {
+ to = ranges[upper].from + ranges[upper].len;
+ if (to > (size_t) archive_stat.st_size)
+ /* The archive locrectab contains bogus offsets. */
+ goto close_and_out;
+ to = (to + ps - 1) & ~(ps - 1);
+
+ /* If a range is already mmaped in, stop. */
+ if (mapped != NULL && ranges[upper].from >= mapped->from)
+ break;
+
+ ++upper;
+ }
+ /* Loop while still in contiguous pages. */
+ while (upper < nranges && ranges[upper].from < to + ps);
+
+ /* Open the file if it hasn't happened yet. */
+ if (fd == -1)
+ {
+ struct stat64 st;
+ fd = open_not_cancel_2 (archfname,
+ O_RDONLY|O_LARGEFILE|O_CLOEXEC);
+ if (fd == -1)
+ /* Cannot open the archive, for whatever reason. */
+ return NULL;
+ /* Now verify we think this is really the same archive file
+ we opened before. If it has been changed we cannot trust
+ the header we read previously. */
+ if (__fxstat64 (_STAT_VER, fd, &st) < 0
+ || st.st_size != archive_stat.st_size
+ || st.st_mtime != archive_stat.st_mtime
+ || st.st_dev != archive_stat.st_dev
+ || st.st_ino != archive_stat.st_ino)
+ goto close_and_out;
+ }
+
+ /* Map the range from the archive. */
+ addr = __mmap64 (NULL, to - from, PROT_READ, MAP_FILE|MAP_COPY,
+ fd, from);
+ if (addr == MAP_FAILED)
+ goto close_and_out;
+
+ /* Allocate a record for this mapping. */
+ newp = (struct archmapped *) malloc (sizeof (struct archmapped));
+ if (newp == NULL)
+ {
+ (void) __munmap (addr, to - from);
+ goto close_and_out;
+ }
+
+ /* And queue it. */
+ newp->ptr = addr;
+ newp->from = from;
+ newp->len = to - from;
+ assert (last->next == mapped);
+ newp->next = mapped;
+ last->next = newp;
+ last = newp;
+
+ /* Determine the load addresses for the category data. */
+ do
+ {
+ assert (ranges[cnt].from >= from);
+ results[ranges[cnt].category].addr = ((char *) addr
+ + ranges[cnt].from - from);
+ results[ranges[cnt].category].len = ranges[cnt].len;
+ }
+ while (++cnt < upper);
+ --cnt; /* The 'for' will increase 'cnt' again. */
+ }
+ }
+
+ /* We don't need the file descriptor any longer. */
+ if (fd >= 0)
+ close_not_cancel_no_status (fd);
+ fd = -1;
+
+ /* We succeeded in mapping all the necessary regions of the archive.
+ Now we need the expected data structures to point into the data. */
+
+ lia = malloc (sizeof *lia);
+ if (__glibc_unlikely (lia == NULL))
+ return NULL;
+
+ lia->name = __strdup (*namep);
+ if (__glibc_unlikely (lia->name == NULL))
+ {
+ free (lia);
+ return NULL;
+ }
+
+ lia->next = archloaded;
+ archloaded = lia;
+
+ for (cnt = 0; cnt < __LC_LAST; ++cnt)
+ if (cnt != LC_ALL)
+ {
+ lia->data[cnt] = _nl_intern_locale_data (cnt,
+ results[cnt].addr,
+ results[cnt].len);
+ if (__glibc_likely (lia->data[cnt] != NULL))
+ {
+ /* _nl_intern_locale_data leaves us these fields to initialize. */
+ lia->data[cnt]->alloc = ld_archive;
+ lia->data[cnt]->name = lia->name;
+
+ /* We do this instead of bumping the count each time we return
+ this data because the mappings stay around forever anyway
+ and we might as well hold on to a little more memory and not
+ have to rebuild it on the next lookup of the same thing.
+ If we were to maintain the usage_count normally and let the
+ structures be freed, we would have to remove the elements
+ from archloaded too. */
+ lia->data[cnt]->usage_count = UNDELETABLE;
+ }
+ }
+
+ *namep = lia->name;
+ return lia->data[category];
+}
+
+void __libc_freeres_fn_section
+_nl_archive_subfreeres (void)
+{
+ struct locale_in_archive *lia;
+ struct archmapped *am;
+
+ /* Toss out our cached locales. */
+ lia = archloaded;
+ while (lia != NULL)
+ {
+ int category;
+ struct locale_in_archive *dead = lia;
+ lia = lia->next;
+
+ free (dead->name);
+ for (category = 0; category < __LC_LAST; ++category)
+ if (category != LC_ALL && dead->data[category] != NULL)
+ {
+ /* _nl_unload_locale just does this free for the archive case. */
+ if (dead->data[category]->private.cleanup)
+ (*dead->data[category]->private.cleanup) (dead->data[category]);
+
+ free (dead->data[category]);
+ }
+ free (dead);
+ }
+ archloaded = NULL;
+
+ if (archmapped != NULL)
+ {
+ /* Now toss all the mapping windows, which we know nothing is using any
+ more because we just tossed all the locales that point into them. */
+
+ assert (archmapped == &headmap);
+ archmapped = NULL;
+ (void) __munmap (headmap.ptr, headmap.len);
+ am = headmap.next;
+ while (am != NULL)
+ {
+ struct archmapped *dead = am;
+ am = am->next;
+ (void) __munmap (dead->ptr, dead->len);
+ free (dead);
+ }
+ }
+}