This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |
Other format: | [Raw text] |
* benchtests/bench-strcoll.c: New benchmark. * benchtests/Makefile: Generate locales and run benchmark. * localedata/gen-locale.sh (generate_locale): Make path to charmaps independent of current working directory. * benchtests/lorem_ipsum_ar_SA: New benchmark input file. * benchtests/lorem_ipsum_cs_CZ: Likewise. * benchtests/lorem_ipsum_da_DK: Likewise. * benchtests/lorem_ipsum_el_GR: Likewise. * benchtests/lorem_ipsum_en_GB: Likewise. * benchtests/lorem_ipsum_en_US: Likewise. * benchtests/lorem_ipsum_es_ES: Likewise. * benchtests/lorem_ipsum_fr_FR: Likewise. * benchtests/lorem_ipsum_hi_IN: Likewise. * benchtests/lorem_ipsum_hu_HU: Likewise. * benchtests/lorem_ipsum_is_IS: Likewise. * benchtests/lorem_ipsum_it_IT: Likewise. * benchtests/lorem_ipsum_iw_IL: Likewise. * benchtests/lorem_ipsum_ja_JP: Likewise. * benchtests/lorem_ipsum_pl_PL: Likewise. * benchtests/lorem_ipsum_pt_PT: Likewise. * benchtests/lorem_ipsum_ru_RU: Likewise. * benchtests/lorem_ipsum_sr_RS: Likewise. * benchtests/lorem_ipsum_sv_SE: Likewise. * benchtests/lorem_ipsum_tr_TR: Likewise. * benchtests/lorem_ipsum_vi_VN: Likewise. * benchtests/lorem_ipsum_zh_CN: Likewise. diff --git a/benchtests/bench-strcoll.c b/benchtests/bench-strcoll.c index e69de29..fab11e5 100644 --- a/benchtests/bench-strcoll.c +++ b/benchtests/bench-strcoll.c @@ -0,0 +1,373 @@ +/* Measure strcoll implementation. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define TEST_MAIN +#define TEST_NAME "strcoll" + +#include <stdio.h> +#include <stdlib.h> +#include <fcntl.h> +#include <locale.h> +#include <unistd.h> +#include <dirent.h> +#include "bench-timing.h" +#include "json-lib.h" + +/* many thanks to http://generator.lorem-ipsum.info/ */ +#define CHARSET "UTF-8" +#define INPUT_PREFIX "lorem_ipsum_" + +const char *locales[] = { + "vi_VN", + "en_US", + "ar_SA", + "en_US", + "zh_CN", + "cs_CZ", + "en_GB", + "da_DK", + "pl_PL", + "fr_FR", + "pt_PT", + "el_GR", + "ru_RU", + "iw_IL", + "es_ES", + "hi_IN", + "sv_SE", + "hu_HU", + "tr_TR", + "is_IS", + "it_IT", + "sr_RS", + "ja_JP" +}; + +#define TEXTFILE_DELIMITER " \n\r\t.,?!" +#define FILENAMES_DELIMITER "\n\r" + +int +is_dir (const char *filename) +{ + int is_dir = 0; + struct stat stats; + + if (stat (filename, &stats) == 0) + is_dir = stats.st_mode & S_IFDIR; + + return is_dir; +} + +char * +concat_path (const char *dirname, const char *filename) +{ + size_t d_len = strlen (dirname); + size_t f_len = strlen (filename); + char * path = malloc (d_len + f_len + 2); + + memcpy (path, dirname, d_len); + * (path + d_len) = '/'; + memcpy (path + d_len + 1, filename, f_len); + * (path + d_len + f_len + 1) = '\0'; + + return path; +} + +char * +read_file (const char *filename) +{ + struct stat stats; + char *buffer = NULL; + int fd = open (filename, O_RDONLY); + + if (fd >= 0) + { + if (fstat (fd, &stats) == 0) + { + buffer = malloc (stats.st_size + 1); + if (buffer) + { + read (fd, buffer, stats.st_size); + *(buffer + stats.st_size) = '\0'; + } + } + close (fd); + } + + return buffer; +} + +size_t +count_words (const char *text, const char *delim) +{ + size_t wordcount = 0; + char *tmp = strdup (text); + + char *token = strtok (tmp, delim); + while (token != NULL) + { + if (*token != '\0') + wordcount++; + token = strtok (NULL, delim); + } + + free (tmp); + return wordcount; +} + +typedef struct +{ + size_t size; + char **words; +} word_list; + +word_list * +new_word_list (size_t size) +{ + word_list *list = malloc (sizeof (word_list)); + list->size = size; + list->words = malloc (size * sizeof (char *)); + return list; +} + +word_list * +merge_word_list (word_list * dst, word_list * src) +{ + size_t i, n = dst->size; + dst->size += src->size; + dst->words = realloc (dst->words, dst->size * sizeof (char *)); + + for (i = 0; i < src->size; i++) + dst->words[i + n] = src->words[i]; + + free (src->words); + free (src); + + return dst; +} + +word_list * +file_word_list (const char *dirname) +{ + DIR *dir; + struct dirent *ent; + word_list *list = NULL; + + if ((dir = opendir (dirname)) != NULL) + { + size_t ent_cnt = 0, i = 0; + word_list *sublist = new_word_list (0); + + while ((ent = readdir (dir)) != NULL) + if (strcmp (".", ent->d_name) != 0 && strcmp ("..", ent->d_name) != 0) + { + char *subdirname = concat_path (dirname, ent->d_name); + if (is_dir (subdirname)) + merge_word_list (sublist, file_word_list (subdirname)); + free (subdirname); + + ent_cnt++; + } + + rewinddir (dir); + list = new_word_list (ent_cnt); + while ((ent = readdir (dir)) != NULL) + if (strcmp (".", ent->d_name) != 0 && strcmp ("..", ent->d_name) != 0) + { + list->words[i] = strdup (ent->d_name); + i++; + } + + merge_word_list (list, sublist); + closedir (dir); + } + + return list; +} + +word_list * +str_word_list (const char *str, const char *delim) +{ + size_t n = 0; + word_list *list = new_word_list (count_words (str, delim)); + + char *toks = strdup (str); + char *word = strtok (toks, delim); + while (word != NULL && n < list->size) + { + if (*word != '\0') + list->words[n++] = strdup (word); + word = strtok (NULL, delim); + } + + free (toks); + return list; +} + +word_list * +copy_word_list (const word_list *list) +{ + size_t i; + word_list *copy = new_word_list (list->size); + + for (i = 0; i < list->size; i++) + copy->words[i] = strdup (list->words[i]); + + return copy; +} + +void +free_word_list (word_list *list) +{ + size_t i; + for (i = 0; i < list->size; i++) + free (list->words[i]); + + free (list->words); + free (list); +} + +int +compare_words (const void *a, const void *b) +{ + const char *s1 = *(char **) a; + const char *s2 = *(char **) b; + return strcoll (s1, s2); +} + +#undef INNER_LOOP_ITERS +#define INNER_LOOP_ITERS 16 + +void +bench_list (json_ctx_t *json_ctx, word_list *list) +{ + size_t i; + timing_t start, stop, cur; + + word_list **tests = malloc (INNER_LOOP_ITERS * sizeof (word_list *)); + for (i = 0; i < INNER_LOOP_ITERS; i++) + tests[i] = copy_word_list (list); + + TIMING_NOW (start); + for (i = 0; i < INNER_LOOP_ITERS; i++) + qsort (tests[i]->words, tests[i]->size, sizeof (char *), compare_words); + TIMING_NOW (stop); + + TIMING_DIFF (cur, start, stop); + setlocale (LC_ALL, "en_US.UTF-8"); + json_attr_double (json_ctx, "duration", cur); + json_attr_double (json_ctx, "iterations", i); + json_attr_double (json_ctx, "mean", cur / i); + + for (i = 0; i < INNER_LOOP_ITERS; i++) + free_word_list (tests[i]); + free (tests); +} + +#define OK 0 +#define ERROR_LOCALE 1 +#define ERROR_IO 2 + +int +bench_file (json_ctx_t *json_ctx, const char *filename, const char *delim, + const char *locale) +{ + if (setlocale (LC_ALL, locale) == NULL) + return ERROR_LOCALE; + + char *text = read_file (filename); + if (text == NULL) + return ERROR_IO; + + word_list *list = str_word_list (text, delim); + + json_attr_object_begin (json_ctx, locale); + bench_list (json_ctx, list); + json_attr_object_end (json_ctx); + + free_word_list (list); + free (text); + return OK; +} + +int +bench_file_list (json_ctx_t *json_ctx, const char *dirname, const char *locale) +{ + if (setlocale (LC_ALL, locale) == NULL) + return ERROR_LOCALE; + + word_list *list = file_word_list (dirname); + if (list == NULL) + return ERROR_IO; + + json_attr_object_begin (json_ctx, ""); + json_attr_string (json_ctx, "locale", locale); + bench_list (json_ctx, list); + json_attr_object_end (json_ctx); + + free_word_list (list); + return OK; +} + +int +do_bench (void) +{ + timing_t res __attribute__ ((unused)); + TIMING_INIT (res); + + json_ctx_t *json_ctx = malloc (sizeof (json_ctx_t)); + json_init (json_ctx, 2, stdout); + json_attr_object_begin (json_ctx, "strcoll"); + + int result = bench_file_list (json_ctx, "..", "C"); + if (result != OK) { + error: + if (result == ERROR_LOCALE) + printf ("Failed to set locale en_US.UTF-8, aborting!\n"); + else if (result == ERROR_IO) + printf ("Failed to read libc directory, aborting!\n"); + else + printf ("Error, aborting!\n"); + return result; + } + result = bench_file_list (json_ctx, "..", "en_US.UTF-8"); + if (result != OK) + goto error; + + size_t i; + char filename[40], locale[15]; + for (i = 0; i < (sizeof (locales) / sizeof (locales[0])); i++) + { + sprintf (locale, "%s." CHARSET, locales[i]); + sprintf (filename, INPUT_PREFIX "%s", locales[i]); + result = bench_file (json_ctx, filename, TEXTFILE_DELIMITER, locale); + if (result != OK) + goto error; + } + + json_attr_object_end (json_ctx); + free (json_ctx); + return OK; +} + +#define TEST_FUNCTION do_bench () + +/* On slower platforms this test needs more than the default 2 seconds. */ +#define TIMEOUT 10 + +#include "../test-skeleton.c" diff --git a/benchtests/Makefile b/benchtests/Makefile index 08603a2..feef531 100644 --- a/benchtests/Makefile +++ b/benchtests/Makefile@@ -34,7 +34,8 @@ string-bench := bcopy bzero memccpy memchr memcmp memcpy memmem memmove \
mempcpy memset rawmemchr stpcpy stpncpy strcasecmp strcasestr \ strcat strchr strchrnul strcmp strcpy strcspn strlen \ strncasecmp strncat strncmp strncpy strnlen strpbrk strrchr \ - strspn strstr strcpy_chk stpcpy_chk memrchr strsep strtok + strspn strstr strcpy_chk stpcpy_chk memrchr strsep strtok \ + strcoll string-bench-all := $(string-bench) stdlib-bench := strtod @@ -50,6 +51,18 @@ $(addprefix $(objpfx)bench-,$(bench-math)): $(libm) $(addprefix $(objpfx)bench-,$(bench-pthread)): $(shared-thread-library) $(objpfx)bench-malloc-thread: $(shared-thread-library) +# We have to generate locales +LOCALES := en_US.UTF-8 tr_TR.UTF-8 cs_CZ.UTF-8 fa_IR.UTF-8 fr_FR.UTF-8 \ + ja_JP.UTF-8 si_LK.UTF-8 en_GB.UTF-8 vi_VN.UTF-8 ar_SA.UTF-8 \ + da_DK.UTF-8 pl_PL.UTF-8 pt_PT.UTF-8 el_GR.UTF-8 ru_RU.UTF-8 \ + iw_IL.UTF-8 is_IS.UTF-8 es_ES.UTF-8 hi_IN.UTF-8 sv_SE.UTF-8 \ + hu_HU.UTF-8 it_IT.UTF-8 sr_RS.UTF-8 zh_CN.UTF-8 +LOCALE_SRCS := $(shell echo "$(LOCALES)"|sed 's/\([^ .]*\)[^ ]*/\1/g') +CHARMAPS := $(shell echo "$(LOCALES)" | \ + sed -e 's/[^ .]*[.]\([^ ]*\)/\1/g' -e s/SJIS/SHIFT_JIS/g) +CTYPE_FILES = $(addsuffix /LC_CTYPE,$(LOCALES)) +gen-locales := $(addprefix $(common-objpfx)localedata/,$(CTYPE_FILES)) + # Rules to build and execute the benchmarks. Do not put any benchmark @@ -65,6 +78,19 @@ binaries-bench := $(addprefix $(objpfx)bench-,$(bench)) binaries-benchset := $(addprefix $(objpfx)bench-,$(benchset)) binaries-bench-malloc := $(addprefix $(objpfx)bench-,$(bench-malloc)) +# Dependency for the locale files. We actually make it depend only on +# one of the files. +$(addprefix $(common-objpfx)localedata/,$(CTYPE_FILES)): %: \ + ../localedata/gen-locale.sh \ + $(common-objpfx)locale/localedef \ + ../localedata/Makefile \ + $(addprefix ../localedata/charmaps/,$(CHARMAPS)) \ + $(addprefix ../localedata/locales/,$(LOCALE_SRCS)) + @$(SHELL) ../localedata/gen-locale.sh $(common-objpfx) \ + '$(built-program-cmd-before-env)' '$(run-program-env)' \ + '$(built-program-cmd-after-env)' $@; \ + $(evaluate-test) + # The default duration: 10 seconds. ifndef BENCH_DURATION BENCH_DURATION := 10 @@ -107,7 +133,7 @@ bench-clean: rm -f $(binaries-bench-malloc) $(addsuffix .o,$(binaries-bench-malloc)) rm -f $(timing-type) $(addsuffix .o,$(timing-type)) -bench: $(timing-type) bench-set bench-func bench-malloc +bench: $(timing-type) $(gen-locales) bench-set bench-func bench-malloc bench-set: $(binaries-benchset) for run in $^; do \ diff --git a/localedata/gen-locale.sh b/localedata/gen-locale.sh index 4156f9e..2a48a34 100644 --- a/localedata/gen-locale.sh +++ b/localedata/gen-locale.sh @@ -30,7 +30,8 @@ generate_locale () charmap=$1 input=$2 out=$3 - if ${localedef_before_env} ${run_program_env} I18NPATH=. \ + if ${localedef_before_env} ${run_program_env} \ + I18NPATH=${common_objpfx}../localedata \ ${localedef_after_env} --quiet -c -f $charmap -i $input \ ${common_objpfx}localedata/$out then
Attachment:
strcoll-bench-input.tar.bz2
Description: Binary data
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |