This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |
Other format: | [Raw text] |
On Tue, Mar 10, 2015 at 07:20:15PM +0100, Leonhard Holz wrote: > This is essentially the benchmark from last year > (https://sourceware.org/ml/libc-alpha/2014-10/msg00345.html) plus an > enchanced benchtests/Makefile to generate the needed locales. The attached > input file have to be deployed to the benchtest directory. > > * benchtests/bench-strcoll.c: New benchmark. > * benchtests/Makefile: Generate locales and run benchmark. > * localedata/gen-locale.sh (generate_locale): Make path to charmaps > independent of current working directory. > * benchtests/lorem_ipsum_ar_SA: New benchmark input file. > * benchtests/lorem_ipsum_cs_CZ: Likewise. > * benchtests/lorem_ipsum_da_DK: Likewise. > * benchtests/lorem_ipsum_el_GR: Likewise. > * benchtests/lorem_ipsum_en_GB: Likewise. > * benchtests/lorem_ipsum_en_US: Likewise. > * benchtests/lorem_ipsum_es_ES: Likewise. > * benchtests/lorem_ipsum_fr_FR: Likewise. > * benchtests/lorem_ipsum_hi_IN: Likewise. > * benchtests/lorem_ipsum_hu_HU: Likewise. > * benchtests/lorem_ipsum_is_IS: Likewise. > * benchtests/lorem_ipsum_it_IT: Likewise. > * benchtests/lorem_ipsum_iw_IL: Likewise. > * benchtests/lorem_ipsum_ja_JP: Likewise. > * benchtests/lorem_ipsum_pl_PL: Likewise. > * benchtests/lorem_ipsum_pt_PT: Likewise. > * benchtests/lorem_ipsum_ru_RU: Likewise. > * benchtests/lorem_ipsum_sr_RS: Likewise. > * benchtests/lorem_ipsum_sv_SE: Likewise. > * benchtests/lorem_ipsum_tr_TR: Likewise. > * benchtests/lorem_ipsum_vi_VN: Likewise. > * benchtests/lorem_ipsum_zh_CN: Likewise. Put the input files in a separate directory, say, benchtests/strcoll-inputs/. > diff --git a/benchtests/bench-strcoll.c b/benchtests/bench-strcoll.c > index e69de29..6636e38 100644 > --- a/benchtests/bench-strcoll.c > +++ b/benchtests/bench-strcoll.c > @@ -0,0 +1,382 @@ > +/* Measure strcoll execution time in different locales. > + Copyright (C) 2015 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <http://www.gnu.org/licenses/>. */ > + > +#define TEST_MAIN > +#define TEST_NAME "strcoll" > + > +#include <stdio.h> > +#include <fcntl.h> > +#include <assert.h> > +#include <stdlib.h> > +#include <locale.h> > +#include <unistd.h> > +#include <dirent.h> > +#include "json-lib.h" > +#include "bench-timing.h" > + > +/* many thanks to http://generator.lorem-ipsum.info/ */ Capitalize first letter and two spaces before comment end. > +#define CHARSET "UTF-8" > +#define INPUT_PREFIX "lorem_ipsum_" > + > +static const char *const locales[] = { > + "vi_VN", > + "en_US", > + "ar_SA", > + "en_US", > + "zh_CN", > + "cs_CZ", > + "en_GB", > + "da_DK", > + "pl_PL", > + "fr_FR", > + "pt_PT", > + "el_GR", > + "ru_RU", > + "iw_IL", > + "es_ES", > + "hi_IN", > + "sv_SE", > + "hu_HU", > + "tr_TR", > + "is_IS", > + "it_IT", > + "sr_RS", > + "ja_JP" > +}; > + > +#define TEXTFILE_DELIMITER " \n\r\t.,?!" > +#define FILENAMES_DELIMITER "\n\r" > + > +static int > +is_dir (const char *filename) > +{ > + int is_dir = 0; > + struct stat stats; > + > + if (stat (filename, &stats) == 0) > + is_dir = stats.st_mode & S_IFDIR; > + > + return is_dir; > +} > + > +static char * > +concat_path (const char *dirname, const char *filename) > +{ > + char *path; > + int ret = asprintf (&path, "%s/%s", dirname, filename); > + assert (ret > 0); > + return path; > +} > + > +static char * > +read_file (const char *filename) > +{ > + struct stat stats; > + char *buffer = NULL; > + int fd = open (filename, O_CLOEXEC); > + > + if (fd >= 0) > + { > + if (fstat (fd, &stats) == 0) > + { > + buffer = malloc (stats.st_size + 1); > + if (buffer) > + { > + if (read (fd, buffer, stats.st_size) == stats.st_size) > + buffer[stats.st_size] = '\0'; > + else > + { > + free (buffer); > + buffer = NULL; > + } > + } > + } > + close (fd); > + } > + > + return buffer; > +} > + > +static size_t > +count_words (const char *text, const char *delim) > +{ > + size_t wordcount = 0; > + char *tmp = strdup (text); > + > + char *token = strtok (tmp, delim); > + while (token != NULL) > + { > + if (*token != '\0') > + wordcount++; > + token = strtok (NULL, delim); > + } > + > + free (tmp); > + return wordcount; > +} > + > +typedef struct > +{ > + size_t size; > + char **words; > +} word_list; > + > +static word_list * > +new_word_list (size_t size) > +{ > + word_list *list = malloc (sizeof (word_list)); > + assert (list != NULL); > + list->size = size; > + list->words = malloc (size * sizeof (char *)); > + assert (list->words != NULL); > + return list; > +} > + > +static word_list * > +merge_word_list (word_list * dst, word_list * src) > +{ > + size_t i, n = dst->size; > + dst->size += src->size; > + > + char **new_words = malloc (dst->size * sizeof (char *)); > + assert (new_words != NULL); > + memcpy (new_words, dst->words, n * sizeof (char *)); > + free (dst->words); > + dst->words = new_words; > + > + for (i = 0; i < src->size; i++) > + dst->words[i + n] = src->words[i]; > + > + free (src->words); > + free (src); > + > + return dst; > +} > + > +static word_list * > +file_word_list (const char *dirname) > +{ > + DIR *dir; > + struct dirent *ent; > + word_list *list = NULL; > + > + if ((dir = opendir (dirname)) != NULL) > + { > + size_t ent_cnt = 0, i = 0; > + word_list *sublist = new_word_list (0); > + > + while ((ent = readdir (dir)) != NULL) > + if (ent->d_name[0] != '.') > + { > + char *subdirname = concat_path (dirname, ent->d_name); > + if (is_dir (subdirname)) > + merge_word_list (sublist, file_word_list (subdirname)); > + free (subdirname); > + > + ent_cnt++; > + } > + > + rewinddir (dir); > + list = new_word_list (ent_cnt); > + while ((ent = readdir (dir)) != NULL) > + if (ent->d_name[0] != '.') > + { > + list->words[i] = strdup (ent->d_name); > + i++; > + } > + > + merge_word_list (list, sublist); > + closedir (dir); > + } > + > + return list; > +} > + > +static word_list * > +str_word_list (const char *str, const char *delim) > +{ > + size_t n = 0; > + word_list *list = new_word_list (count_words (str, delim)); > + > + char *toks = strdup (str); > + char *word = strtok (toks, delim); > + while (word != NULL && n < list->size) > + { > + if (*word != '\0') > + list->words[n++] = strdup (word); > + word = strtok (NULL, delim); > + } > + > + free (toks); > + return list; > +} > + > +static word_list * > +copy_word_list (const word_list *list) > +{ > + size_t i; > + word_list *copy = new_word_list (list->size); > + > + for (i = 0; i < list->size; i++) > + copy->words[i] = strdup (list->words[i]); > + > + return copy; > +} > + > +static void > +free_word_list (word_list *list) > +{ > + size_t i; > + for (i = 0; i < list->size; i++) > + free (list->words[i]); > + > + free (list->words); > + free (list); > +} > + > +static int > +compare_words (const void *a, const void *b) > +{ > + const char *s1 = *(char **) a; > + const char *s2 = *(char **) b; > + return strcoll (s1, s2); > +} > + > +#undef INNER_LOOP_ITERS > +#define INNER_LOOP_ITERS 16 > + > +static void > +bench_list (json_ctx_t *json_ctx, word_list *list) > +{ > + size_t i; > + timing_t start, stop, cur; > + > + word_list **tests = malloc (INNER_LOOP_ITERS * sizeof (word_list *)); > + assert (tests != NULL); > + for (i = 0; i < INNER_LOOP_ITERS; i++) > + tests[i] = copy_word_list (list); > + > + TIMING_NOW (start); > + for (i = 0; i < INNER_LOOP_ITERS; i++) > + qsort (tests[i]->words, tests[i]->size, sizeof (char *), compare_words); > + TIMING_NOW (stop); > + > + TIMING_DIFF (cur, start, stop); > + setlocale (LC_ALL, "en_US.UTF-8"); > + json_attr_double (json_ctx, "duration", cur); > + json_attr_double (json_ctx, "iterations", i); > + json_attr_double (json_ctx, "mean", cur / i); > + > + for (i = 0; i < INNER_LOOP_ITERS; i++) > + free_word_list (tests[i]); > + free (tests); > +} > + > +#define OK 0 > +#define ERROR_LOCALE 1 > +#define ERROR_IO 2 Use an enum instead. > + > +static int > +bench_file (json_ctx_t *json_ctx, const char *filename, const char *delim, > + const char *locale) Adjust this as recommended below. > +{ > + if (setlocale (LC_ALL, locale) == NULL) > + return ERROR_LOCALE; > + > + char *text = read_file (filename); > + if (text == NULL) > + return ERROR_IO; > + > + word_list *list = str_word_list (text, delim); > + > + json_attr_object_begin (json_ctx, locale); > + bench_list (json_ctx, list); > + json_attr_object_end (json_ctx); > + > + free_word_list (list); > + free (text); > + return OK; > +} > + > +static int > +bench_file_list (json_ctx_t *json_ctx, const char *dirname, const char *locale) If you just create a file with this information once, you can collapse this special case into the other cases and get rid of this function. > +{ > + if (setlocale (LC_ALL, locale) == NULL) > + return ERROR_LOCALE; > + > + word_list *list = file_word_list (dirname); > + if (list == NULL) > + return ERROR_IO; > + > + json_attr_object_begin (json_ctx, ""); > + json_attr_string (json_ctx, "locale", locale); > + bench_list (json_ctx, list); > + json_attr_object_end (json_ctx); > + > + free_word_list (list); > + return OK; > +} > + > +static int > +do_bench (void) > +{ > + timing_t res; > + TIMING_INIT (res); > + > + json_ctx_t *json_ctx = malloc (sizeof (json_ctx_t)); > + assert (json_ctx != NULL); > + json_init (json_ctx, 2, stdout); > + json_attr_object_begin (json_ctx, "strcoll"); > + > + int result = bench_file_list (json_ctx, "..", "C"); > + if (result != OK) { > + error: > + if (result == ERROR_LOCALE) > + printf ("Failed to set locale en_US.UTF-8, aborting!\n"); s/en_US.UTF-8/C/ In fact, set a variable and use it to pass to bench_file_list and the printf so as to avoid such an error. > + else if (result == ERROR_IO) > + printf ("Failed to read libc directory, aborting!\n"); > + else > + printf ("Error, aborting!\n"); Print the value of the result. > + return result; > + } > + result = bench_file_list (json_ctx, "..", "en_US.UTF-8"); > + if (result != OK) > + goto error; Instead, split the error handling code into a function, set RESULT and goto a label 'out' near the end of the function that just returns RESULT. > + > + size_t i; > + char filename[40], locale[15]; > + for (i = 0; i < (sizeof (locales) / sizeof (locales[0])); i++) > + { > + sprintf (locale, "%s." CHARSET, locales[i]); > + sprintf (filename, INPUT_PREFIX "%s", locales[i]); > + result = bench_file (json_ctx, filename, TEXTFILE_DELIMITER, locale); Just make bench_file accept the locale name and deduce the locale and filename in the function. You don't really need to pass the delimiter either since you don't call bench_file with anything other than TEXTFILE_DELIMITER. That could simplify your code to: result = bench_file (json_ctx, locales[i]); if (result != OK) goto out; > + if (result != OK) > + goto error; > + } > + > + json_attr_object_end (json_ctx); > + free (json_ctx); This is where the out: label goes and you return RESULT instead of OK. > + return OK; > +} > + > +#define TEST_FUNCTION do_bench () > + > +/* On slower platforms this test needs more than the default 2 seconds. */ > +#define TIMEOUT 10 > + > +#include "../test-skeleton.c" Don't use the test-skeleton. Make do_bench the main function. The other string benchmarks do this because they were directly ported from the old test cases. In fact, they need to be evaluated and improved. > diff --git a/benchtests/Makefile b/benchtests/Makefile > index 08603a2..4aaff73 100644 > --- a/benchtests/Makefile > +++ b/benchtests/Makefile > @@ -34,9 +34,22 @@ string-bench := bcopy bzero memccpy memchr memcmp memcpy > memmem memmove \ > mempcpy memset rawmemchr stpcpy stpncpy strcasecmp strcasestr \ > strcat strchr strchrnul strcmp strcpy strcspn strlen \ > strncasecmp strncat strncmp strncpy strnlen strpbrk strrchr \ > - strspn strstr strcpy_chk stpcpy_chk memrchr strsep strtok > + strspn strstr strcpy_chk stpcpy_chk memrchr strsep strtok \ > + strcoll > string-bench-all := $(string-bench) > > +# We have to generate locales > +LOCALES := en_US.UTF-8 tr_TR.UTF-8 cs_CZ.UTF-8 fa_IR.UTF-8 fr_FR.UTF-8 \ > + ja_JP.UTF-8 si_LK.UTF-8 en_GB.UTF-8 vi_VN.UTF-8 ar_SA.UTF-8 \ > + da_DK.UTF-8 pl_PL.UTF-8 pt_PT.UTF-8 el_GR.UTF-8 ru_RU.UTF-8 \ > + iw_IL.UTF-8 is_IS.UTF-8 es_ES.UTF-8 hi_IN.UTF-8 sv_SE.UTF-8 \ > + hu_HU.UTF-8 it_IT.UTF-8 sr_RS.UTF-8 zh_CN.UTF-8 > +LOCALE_SRCS := $(shell echo "$(LOCALES)"|sed 's/\([^ .]*\)[^ ]*/\1/g') > +CHARMAPS := $(shell echo "$(LOCALES)" | \ > + sed -e 's/[^ .]*[.]\([^ ]*\)/\1/g' -e s/SJIS/SHIFT_JIS/g) > +CTYPE_FILES = $(addsuffix /LC_CTYPE,$(LOCALES)) > +gen-locales := $(addprefix $(common-objpfx)localedata/,$(CTYPE_FILES)) > + > stdlib-bench := strtod > > benchset := $(string-bench-all) $(stdlib-bench) > @@ -65,6 +78,19 @@ binaries-bench := $(addprefix $(objpfx)bench-,$(bench)) > binaries-benchset := $(addprefix $(objpfx)bench-,$(benchset)) > binaries-bench-malloc := $(addprefix $(objpfx)bench-,$(bench-malloc)) > > +# Dependency for the locale files. We actually make it depend only on > +# one of the files. > +$(addprefix $(common-objpfx)localedata/,$(CTYPE_FILES)): %: \ > + ../localedata/gen-locale.sh \ > + $(common-objpfx)locale/localedef \ > + ../localedata/Makefile \ > + $(addprefix ../localedata/charmaps/,$(CHARMAPS)) \ > + $(addprefix ../localedata/locales/,$(LOCALE_SRCS)) > + @$(SHELL) ../localedata/gen-locale.sh $(common-objpfx) \ > + '$(built-program-cmd-before-env)' '$(run-program-env)' \ > + '$(built-program-cmd-after-env)' $@; \ > + $(evaluate-test) > + AFAICT all of this is copied from localedata/Makefile. Instead, make a common snippet that you can include in both these places. This will reduce the pain of remembering to update two locations when any of this code needs to be fixed. > # The default duration: 10 seconds. > ifndef BENCH_DURATION > BENCH_DURATION := 10 > @@ -107,7 +133,7 @@ bench-clean: > rm -f $(binaries-bench-malloc) $(addsuffix .o,$(binaries-bench-malloc)) > rm -f $(timing-type) $(addsuffix .o,$(timing-type)) > > -bench: $(timing-type) bench-set bench-func bench-malloc > +bench: $(timing-type) $(gen-locales) bench-set bench-func bench-malloc > > bench-set: $(binaries-benchset) > for run in $^; do \ > > diff --git a/localedata/gen-locale.sh b/localedata/gen-locale.sh > index 4156f9e..2a48a34 100644 > --- a/localedata/gen-locale.sh > +++ b/localedata/gen-locale.sh > @@ -30,7 +30,8 @@ generate_locale () > charmap=$1 > input=$2 > out=$3 > - if ${localedef_before_env} ${run_program_env} I18NPATH=. \ > + if ${localedef_before_env} ${run_program_env} \ > + I18NPATH=${common_objpfx}../localedata \ Shouldn't this be I18NPATH=${common_objpfx}localedata? > ${localedef_after_env} --quiet -c -f $charmap -i $input \ > ${common_objpfx}localedata/$out > then >
Attachment:
pgpVSVKadt5T3.pgp
Description: PGP signature
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |