This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |
Other format: | [Raw text] |
Sorry for getting back on this so late. This version looks much better, except for a few nits. On Tue, Mar 31, 2015 at 12:48:45PM +0200, Leonhard Holz wrote: > Ok, this is v3 of the benchmark. I followed the given advisories but > reworked the input file handling in a different way. We have different input > texts *and* different locales, so just providing the locales is not enough. > Instead the filenames are now spelled out and the locale is derived from it. > > * benchtests/bench-strcoll.c: New benchmark. > * gen-locales.mk: New Makefile snippet to generate locales. > * localedata/Makefile: Use gen-locales.mk for locale generation. > * benchtests/Makefile: Generate locales and run benchmark. > * localedata/gen-locale.sh (generate_locale): Make path to charmaps > independent of current working directory. > * benchtests/strcoll-inputs/filelist#C: New benchmark input file. > * benchtests/strcoll-inputs/filelist#en_US.UTF-8: Likewise. > * benchtests/strcoll-inputs/lorem_ipsum#ar_SA.UTF-8: Likewise. > * benchtests/strcoll-inputs/lorem_ipsum#cs_CZ.UTF-8: Likewise. > * benchtests/strcoll-inputs/lorem_ipsum#da_DK.UTF-8: Likewise. > * benchtests/strcoll-inputs/lorem_ipsum#el_GR.UTF-8: Likewise. > * benchtests/strcoll-inputs/lorem_ipsum#en_GB.UTF-8: Likewise. > * benchtests/strcoll-inputs/lorem_ipsum#en_US.UTF-8: Likewise. > * benchtests/strcoll-inputs/lorem_ipsum#es_ES.UTF-8: Likewise. > * benchtests/strcoll-inputs/lorem_ipsum#fr_FR.UTF-8: Likewise. > * benchtests/strcoll-inputs/lorem_ipsum#hi_IN.UTF-8: Likewise. > * benchtests/strcoll-inputs/lorem_ipsum#hu_HU.UTF-8: Likewise. > * benchtests/strcoll-inputs/lorem_ipsum#is_IS.UTF-8: Likewise. > * benchtests/strcoll-inputs/lorem_ipsum#it_IT.UTF-8: Likewise. > * benchtests/strcoll-inputs/lorem_ipsum#iw_IL.UTF-8: Likewise. > * benchtests/strcoll-inputs/lorem_ipsum#ja_JP.UTF-8: Likewise. > * benchtests/strcoll-inputs/lorem_ipsum#pl_PL.UTF-8: Likewise. > * benchtests/strcoll-inputs/lorem_ipsum#pt_PT.UTF-8: Likewise. > * benchtests/strcoll-inputs/lorem_ipsum#ru_RU.UTF-8: Likewise. > * benchtests/strcoll-inputs/lorem_ipsum#sr_RS.UTF-8: Likewise. > * benchtests/strcoll-inputs/lorem_ipsum#sv_SE.UTF-8: Likewise. > * benchtests/strcoll-inputs/lorem_ipsum#tr_TR.UTF-8: Likewise. > * benchtests/strcoll-inputs/lorem_ipsum#vi_VN.UTF-8: Likewise. > * benchtests/strcoll-inputs/lorem_ipsum#zh_CN.UTF-8: Likewise. > > diff --git a/benchtests/Makefile b/benchtests/Makefile > index 08603a2..cb7a97e 100644 > --- a/benchtests/Makefile > +++ b/benchtests/Makefile > @@ -34,9 +34,18 @@ string-bench := bcopy bzero memccpy memchr memcmp memcpy > memmem memmove \ > mempcpy memset rawmemchr stpcpy stpncpy strcasecmp strcasestr \ > strcat strchr strchrnul strcmp strcpy strcspn strlen \ > strncasecmp strncat strncmp strncpy strnlen strpbrk strrchr \ > - strspn strstr strcpy_chk stpcpy_chk memrchr strsep strtok > + strspn strstr strcpy_chk stpcpy_chk memrchr strsep strtok \ > + strcoll > string-bench-all := $(string-bench) > > +# We have to generate locales > +LOCALES := en_US.UTF-8 tr_TR.UTF-8 cs_CZ.UTF-8 fa_IR.UTF-8 fr_FR.UTF-8 \ > + ja_JP.UTF-8 si_LK.UTF-8 en_GB.UTF-8 vi_VN.UTF-8 ar_SA.UTF-8 \ > + da_DK.UTF-8 pl_PL.UTF-8 pt_PT.UTF-8 el_GR.UTF-8 ru_RU.UTF-8 \ > + iw_IL.UTF-8 is_IS.UTF-8 es_ES.UTF-8 hi_IN.UTF-8 sv_SE.UTF-8 \ > + hu_HU.UTF-8 it_IT.UTF-8 sr_RS.UTF-8 zh_CN.UTF-8 > +include ../gen-locales.mk > + > stdlib-bench := strtod > > benchset := $(string-bench-all) $(stdlib-bench) > @@ -107,7 +116,7 @@ bench-clean: > rm -f $(binaries-bench-malloc) $(addsuffix .o,$(binaries-bench-malloc)) > rm -f $(timing-type) $(addsuffix .o,$(timing-type)) > > -bench: $(timing-type) bench-set bench-func bench-malloc > +bench: $(timing-type) $(gen-locales) bench-set bench-func bench-malloc > > bench-set: $(binaries-benchset) > for run in $^; do \ > diff --git a/benchtests/bench-strcoll.c b/benchtests/bench-strcoll.c > index e69de29..fda79c2 100644 > --- a/benchtests/bench-strcoll.c > +++ b/benchtests/bench-strcoll.c > @@ -0,0 +1,276 @@ > +/* Measure strcoll execution time in different locales. > + Copyright (C) 2015 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <http://www.gnu.org/licenses/>. */ > + > +#include <stdio.h> > +#include <fcntl.h> > +#include <assert.h> > +#include <stdlib.h> > +#include <locale.h> > +#include <unistd.h> > +#include "json-lib.h" > +#include "bench-timing.h" > + > +/* Many thanks to http://generator.lorem-ipsum.info/ */ > +#define INPUT_PREFIX "strcoll-inputs/" > + > +static const char *const input_files[] = { > + "filelist#C", > + "filelist#en_US.UTF-8", > + "lorem_ipsum#vi_VN.UTF-8", > + "lorem_ipsum#en_US.UTF-8", > + "lorem_ipsum#ar_SA.UTF-8", > + "lorem_ipsum#en_US.UTF-8", > + "lorem_ipsum#zh_CN.UTF-8", > + "lorem_ipsum#cs_CZ.UTF-8", > + "lorem_ipsum#en_GB.UTF-8", > + "lorem_ipsum#da_DK.UTF-8", > + "lorem_ipsum#pl_PL.UTF-8", > + "lorem_ipsum#fr_FR.UTF-8", > + "lorem_ipsum#pt_PT.UTF-8", > + "lorem_ipsum#el_GR.UTF-8", > + "lorem_ipsum#ru_RU.UTF-8", > + "lorem_ipsum#iw_IL.UTF-8", > + "lorem_ipsum#es_ES.UTF-8", > + "lorem_ipsum#hi_IN.UTF-8", > + "lorem_ipsum#sv_SE.UTF-8", > + "lorem_ipsum#hu_HU.UTF-8", > + "lorem_ipsum#tr_TR.UTF-8", > + "lorem_ipsum#is_IS.UTF-8", > + "lorem_ipsum#it_IT.UTF-8", > + "lorem_ipsum#sr_RS.UTF-8", > + "lorem_ipsum#ja_JP.UTF-8" > +}; > + > +#define TEXTFILE_DELIMITER " \n\r\t.,?!" > + > +static char * > +read_file (const char *filename) > +{ > + struct stat stats; > + char *buffer = NULL; > + int fd = open (filename, O_CLOEXEC); > + > + if (fd >= 0) > + { > + if (fstat (fd, &stats) == 0) > + { > + buffer = malloc (stats.st_size + 1); > + if (buffer) > + { > + if (read (fd, buffer, stats.st_size) == stats.st_size) > + buffer[stats.st_size] = '\0'; > + else > + { > + free (buffer); > + buffer = NULL; > + } > + } > + } > + close (fd); > + } > + > + return buffer; > +} > + > +static size_t > +count_words (const char *text, const char *delim) > +{ > + size_t wordcount = 0; > + char *tmp = strdup (text); > + > + char *token = strtok (tmp, delim); > + while (token != NULL) > + { > + if (*token != '\0') > + wordcount++; > + token = strtok (NULL, delim); > + } > + > + free (tmp); > + return wordcount; > +} > + > +typedef struct > +{ > + size_t size; > + char **words; > +} word_list; > + > +static word_list * > +new_word_list (size_t size) > +{ > + word_list *list = malloc (sizeof (word_list)); > + assert (list != NULL); > + list->size = size; > + list->words = malloc (size * sizeof (char *)); > + assert (list->words != NULL); > + return list; > +} > + > +static word_list * > +str_word_list (const char *str, const char *delim) > +{ > + size_t n = 0; > + word_list *list = new_word_list (count_words (str, delim)); > + > + char *toks = strdup (str); > + char *word = strtok (toks, delim); > + while (word != NULL && n < list->size) > + { > + if (*word != '\0') > + list->words[n++] = strdup (word); > + word = strtok (NULL, delim); > + } > + > + free (toks); > + return list; > +} > + > +static word_list * > +copy_word_list (const word_list *list) > +{ > + size_t i; > + word_list *copy = new_word_list (list->size); > + > + for (i = 0; i < list->size; i++) > + copy->words[i] = strdup (list->words[i]); > + > + return copy; > +} > + > +static void > +free_word_list (word_list *list) > +{ > + size_t i; > + for (i = 0; i < list->size; i++) > + free (list->words[i]); > + > + free (list->words); > + free (list); > +} > + > +static int > +compare_words (const void *a, const void *b) > +{ > + const char *s1 = *(char **) a; > + const char *s2 = *(char **) b; > + return strcoll (s1, s2); > +} > + > +#undef INNER_LOOP_ITERS > +#define INNER_LOOP_ITERS 16 > + > +static void > +bench_list (json_ctx_t *json_ctx, word_list *list) > +{ > + size_t i; > + timing_t start, stop, cur; > + > + word_list **tests = malloc (INNER_LOOP_ITERS * sizeof (word_list *)); > + assert (tests != NULL); > + for (i = 0; i < INNER_LOOP_ITERS; i++) > + tests[i] = copy_word_list (list); > + > + TIMING_NOW (start); > + for (i = 0; i < INNER_LOOP_ITERS; i++) > + qsort (tests[i]->words, tests[i]->size, sizeof (char *), compare_words); > + TIMING_NOW (stop); > + > + TIMING_DIFF (cur, start, stop); > + setlocale (LC_ALL, "en_US.UTF-8"); > + json_attr_double (json_ctx, "duration", cur); > + json_attr_double (json_ctx, "iterations", i); > + json_attr_double (json_ctx, "mean", cur / i); Cast either CUR or I as double to avoid a rounded integral result instead of the actual mean. > + > + for (i = 0; i < INNER_LOOP_ITERS; i++) > + free_word_list (tests[i]); > + free (tests); > +} > + > +typedef enum > +{ > + OK, > + ERROR_FILENAME, > + ERROR_LOCALE, > + ERROR_IO > +} result_t; > + > +static result_t > +bench_file (json_ctx_t *json_ctx, const char *testname, const char *filename, > + const char *locale) Incorrect formatting. > +{ > + if (setlocale (LC_ALL, locale) == NULL) > + return ERROR_LOCALE; > + > + char *text = read_file (filename); > + if (text == NULL) > + return ERROR_IO; > + > + word_list *list = str_word_list (text, TEXTFILE_DELIMITER); > + > + json_attr_object_begin (json_ctx, testname); > + bench_list (json_ctx, list); > + json_attr_object_end (json_ctx); > + > + free_word_list (list); > + free (text); > + return OK; > +} > + > +int > +main (void) > +{ > + timing_t res; > + TIMING_INIT (res); > + > + json_ctx_t *json_ctx = malloc (sizeof (json_ctx_t)); > + assert (json_ctx != NULL); > + json_init (json_ctx, 2, stdout); > + json_attr_object_begin (json_ctx, "strcoll"); > + > + size_t i; > + result_t result = OK; > + for (i = 0; i < (sizeof (input_files) / sizeof (input_files[0])); i++) > + { > + char *locale = strchr (input_files[i], '#'); > + if (locale == NULL) { Incorrect formatting > + printf ("Failed to get locale from filename %s, aborting!\n", > + input_files[i]); > + return ERROR_FILENAME; > + } > + > + char *filename; > + asprintf (&filename, INPUT_PREFIX "%s", input_files[i]); > + result = bench_file (json_ctx, input_files[i], filename, locale + 1); > + > + if (result != OK) { Incorrect formatting. > + if (result == ERROR_LOCALE) > + printf ("Failed to set locale %s, aborting!\n", locale); > + else if (result == ERROR_IO) > + printf ("Failed to read file %s, aborting!\n", filename); > + free (filename); > + goto out; > + } > + free (filename); > + } > + > +out: > + json_attr_object_end (json_ctx); > + free (json_ctx); > + return result; > +} > diff --git a/gen-locales.mk b/gen-locales.mk > index e69de29..354fc53 100644 > --- a/gen-locales.mk > +++ b/gen-locales.mk > @@ -0,0 +1,21 @@ > +# defines target $(gen-locales) that generates the locales given in $(LOCALES) > + > +LOCALE_SRCS := $(shell echo "$(LOCALES)"|sed 's/\([^ .]*\)[^ ]*/\1/g') > +CHARMAPS := $(shell echo "$(LOCALES)" | \ > + sed -e 's/[^ .]*[.]\([^ ]*\)/\1/g' -e s/SJIS/SHIFT_JIS/g) > +CTYPE_FILES = $(addsuffix /LC_CTYPE,$(LOCALES)) > +gen-locales := $(addprefix $(common-objpfx)localedata/,$(CTYPE_FILES)) > + > +# Dependency for the locale files. We actually make it depend only on > +# one of the files. > +$(addprefix $(common-objpfx)localedata/,$(CTYPE_FILES)): %: \ > + ../localedata/gen-locale.sh \ > + $(common-objpfx)locale/localedef \ > + ../localedata/Makefile \ > + $(addprefix ../localedata/charmaps/,$(CHARMAPS)) \ > + $(addprefix ../localedata/locales/,$(LOCALE_SRCS)) > + @$(SHELL) ../localedata/gen-locale.sh $(common-objpfx) \ > + '$(built-program-cmd-before-env)' '$(run-program-env)' \ > + '$(built-program-cmd-after-env)' $@; \ > + $(evaluate-test) > + > diff --git a/localedata/Makefile b/localedata/Makefile > index 0f67725..03ec98c 100644 > --- a/localedata/Makefile > +++ b/localedata/Makefile > @@ -107,11 +107,7 @@ LOCALES := de_DE.ISO-8859-1 de_DE.UTF-8 en_US.ANSI_X3.4-1968 \ > nb_NO.ISO-8859-1 nn_NO.ISO-8859-1 tr_TR.UTF-8 cs_CZ.UTF-8 \ > zh_TW.EUC-TW fa_IR.UTF-8 fr_FR.UTF-8 ja_JP.UTF-8 si_LK.UTF-8 \ > tr_TR.ISO-8859-9 en_GB.UTF-8 > -LOCALE_SRCS := $(shell echo "$(LOCALES)"|sed 's/\([^ .]*\)[^ ]*/\1/g') > -CHARMAPS := $(shell echo "$(LOCALES)" | \ > - sed -e 's/[^ .]*[.]\([^ ]*\)/\1/g' -e s/SJIS/SHIFT_JIS/g) > -CTYPE_FILES = $(addsuffix /LC_CTYPE,$(LOCALES)) > -tests-special += $(addprefix $(objpfx),$(CTYPE_FILES)) > +include ../gen-locales.mk > endif > > include ../Rules > @@ -130,16 +126,6 @@ $(inst_i18ndir)/locales/%: locales/% $(+force); $(do-install) > ifeq ($(run-built-tests),yes) > generated-dirs += $(LOCALES) > > -# Dependency for the locale files. We actually make it depend only on > -# one of the files. > -$(addprefix $(objpfx),$(CTYPE_FILES)): %: \ > - gen-locale.sh $(common-objpfx)locale/localedef Makefile \ > - $(addprefix charmaps/,$(CHARMAPS)) $(addprefix locales/,$(LOCALE_SRCS)) > - @$(SHELL) gen-locale.sh $(common-objpfx) \ > - '$(built-program-cmd-before-env)' '$(run-program-env)' \ > - '$(built-program-cmd-after-env)' $@; \ > - $(evaluate-test) > - > $(addsuffix .out,$(addprefix $(objpfx),$(tests))): %: \ > $(addprefix $(objpfx),$(CTYPE_FILES)) > > diff --git a/localedata/gen-locale.sh b/localedata/gen-locale.sh > index 4156f9e..f713c55 100644 > --- a/localedata/gen-locale.sh > +++ b/localedata/gen-locale.sh > @@ -30,9 +30,9 @@ generate_locale () > charmap=$1 > input=$2 > out=$3 > - if ${localedef_before_env} ${run_program_env} I18NPATH=. \ > + if ${localedef_before_env} ${run_program_env} I18NPATH=../localedata \ > ${localedef_after_env} --quiet -c -f $charmap -i $input \ > - ${common_objpfx}localedata/$out > + ${common_objpfx}localedata/$out > then > # The makefile checks the timestamp of the LC_CTYPE file, > # but localedef won't have touched it if it was able to > Split this change into a separate patch and make it PATCH 1/2, with the actual benchmark then being PATCH 2/2. Thanks, Siddhesh
Attachment:
pgpMMD0D_7Lsp.pgp
Description: PGP signature
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |