This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |
Other format: | [Raw text] |
On Tue, Oct 14, 2014 at 03:11:20PM +0200, Leonhard Holz wrote: > The output of the benchmark is: Thanks, the outputs look good to me. I'm going to push your strcoll changes, but the benchmark changes need more work. I hope you'll continue to work on them :) > diff --git a/benchtests/Makefile b/benchtests/Makefile > index fd3036d..e79ceee 100644 > --- a/benchtests/Makefile > +++ b/benchtests/Makefile > @@ -34,7 +34,7 @@ string-bench := bcopy bzero memccpy memchr memcmp memcpy memmem memmove \ > mempcpy memset rawmemchr stpcpy stpncpy strcasecmp strcasestr \ > strcat strchr strchrnul strcmp strcpy strcspn strlen \ > strncasecmp strncat strncmp strncpy strnlen strpbrk strrchr \ > - strspn strstr strcpy_chk stpcpy_chk memrchr strsep strtok > + strspn strstr strcpy_chk stpcpy_chk memrchr strsep strtok strcoll > string-bench-all := $(string-bench) > > stdlib-bench := strtod You need to ensure that the locales are generated first. The tests target does this already. Also my fault that I didn't point out earlier that you'll need to set GCONV_PATH and LOCPATH to actually make the test use the generated locales. Otherwise they'll just use the system locales. > diff --git a/localedata/Makefile b/localedata/Makefile > index b6235f2..cb24974 100644 > --- a/localedata/Makefile > +++ b/localedata/Makefile > @@ -106,7 +106,10 @@ LOCALES := de_DE.ISO-8859-1 de_DE.UTF-8 en_US.ANSI_X3.4-1968 \ > hr_HR.ISO-8859-2 sv_SE.ISO-8859-1 ja_JP.SJIS fr_FR.ISO-8859-1 \ > nb_NO.ISO-8859-1 nn_NO.ISO-8859-1 tr_TR.UTF-8 cs_CZ.UTF-8 \ > zh_TW.EUC-TW fa_IR.UTF-8 fr_FR.UTF-8 ja_JP.UTF-8 si_LK.UTF-8 \ > - tr_TR.ISO-8859-9 en_GB.UTF-8 > + tr_TR.ISO-8859-9 en_GB.UTF-8 vi_VN.UTF-8 ar_SA.UTF-8 zh_CN.UTF-8 \ > + da_DK.UTF-8 pl_PL.UTF-8 pt_PT.UTF-8 el_GR.UTF-8 ru_RU.UTF-8 \ > + iw_IL.UTF-8 es_ES.UTF-8 hi_IN.UTF-8 sv_SE.UTF-8 hu_HU.UTF-8 \ > + is_IS.UTF-8 it_IT.UTF-8 sr_RS.UTF-8 > LOCALE_SRCS := $(shell echo "$(LOCALES)"|sed 's/\([^ .]*\)[^ ]*/\1/g') > CHARMAPS := $(shell echo "$(LOCALES)" | \ > sed -e 's/[^ .]*[.]\([^ ]*\)/\1/g' -e s/SJIS/SHIFT_JIS/g) > /* Measure strcoll implementation. > Copyright (C) 2014 Free Software Foundation, Inc. > This file is part of the GNU C Library. > > The GNU C Library is free software; you can redistribute it and/or > modify it under the terms of the GNU Lesser General Public > License as published by the Free Software Foundation; either > version 2.1 of the License, or (at your option) any later version. > > The GNU C Library is distributed in the hope that it will be useful, > but WITHOUT ANY WARRANTY; without even the implied warranty of > MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > Lesser General Public License for more details. > > You should have received a copy of the GNU Lesser General Public > License along with the GNU C Library; if not, see > <http://www.gnu.org/licenses/>. */ > > #define TEST_MAIN > #define TEST_NAME "strcoll" > > #include <stdio.h> > #include <stdlib.h> > #include <fcntl.h> > #include <locale.h> > #include <unistd.h> > #include <dirent.h> > #include "bench-timing.h" > #include "json-lib.h" > > /* many thanks to http://generator.lorem-ipsum.info/ */ Capitalize first letter of the first word. > #define CHARSET "UTF-8" > #define INPUT_FOLDER "strcoll-inputs" > #define INPUT_PREFIX "lorem_ipsum_" > > const char *locales[] = { Make variable static. > "vi_VN", > "en_US", > "ar_SA", > "en_US", > "zh_CN", > "cs_CZ", > "en_GB", > "da_DK", > "pl_PL", > "fr_FR", > "pt_PT", > "el_GR", > "ru_RU", > "iw_IL", > "es_ES", > "hi_IN", > "sv_SE", > "hu_HU", > "tr_TR", > "is_IS", > "it_IT", > "sr_RS", > "ja_JP" > }; > > #define TEXTFILE_DELIMITER " \n\r\t.,?!" > #define FILENAMES_DELIMITER "\n\r" > > int Make function static. > is_dir (const char *filename) > { > int is_dir = 0; > struct stat stats; > > if (stat (filename, &stats) == 0) > is_dir = stats.st_mode & S_IFDIR; > > return is_dir; > } > > char * Make function static. > concat_path (const char *dirname, const char *filename) > { > size_t d_len = strlen (dirname); > size_t f_len = strlen (filename); > char * path = malloc (d_len + f_len + 2); > > memcpy (path, dirname, d_len); > * (path + d_len) = '/'; > memcpy (path + d_len + 1, filename, f_len); > * (path + d_len + f_len + 1) = '\0'; Use asprintf: int ret = asprintf (&path, "%s/%s", dirname, filename); if (ret < 0) return NULL; > > return path; > } > > char * Make function static. > read_file (const char *filename) > { > struct stat stats; > char *buffer = NULL; > int fd = open (filename, O_RDONLY); > > if (fd >= 0) > { > if (fstat (fd, &stats) == 0) > { > buffer = malloc (stats.st_size + 1); > if (buffer) > { > read (fd, buffer, stats.st_size); Check for read failure and free buffer and return if there's an error. > *(buffer + stats.st_size) = '\0'; > } > } > close (fd); > } > > return buffer; > } > > size_t Make function static. > count_words (const char *text, const char *delim) > { > size_t wordcount = 0; > char *tmp = strdup (text); > > char *token = strtok (tmp, delim); > while (token != NULL) > { > if (*token != '\0') > wordcount++; > token = strtok (NULL, delim); > } > > free (tmp); > return wordcount; > } > > typedef struct > { > size_t size; > char **words; > } word_list; > > word_list * > new_word_list (size_t size) > { > word_list *list = malloc (sizeof (word_list)); > list->size = size; > list->words = malloc (size * sizeof (char *)); > return list; > } > > word_list * Make function static. > merge_word_list (word_list * dst, word_list * src) > { > size_t i, n = dst->size; > dst->size += src->size; > dst->words = realloc (dst->words, dst->size * sizeof (char *)); > > for (i = 0; i < src->size; i++) > dst->words[i + n] = src->words[i]; > > free (src->words); > free (src); > > return dst; > } > > word_list * Make function static. > file_word_list (const char *dirname) > { > DIR *dir; > struct dirent *ent; > word_list *list = NULL; > > if ((dir = opendir (dirname)) != NULL) > { > size_t ent_cnt = 0, i = 0; > word_list *sublist = new_word_list (0); > > while ((ent = readdir (dir)) != NULL) > if (strcmp (".", ent->d_name) != 0 && strcmp ("..", ent->d_name) != 0) > { > char *subdirname = concat_path (dirname, ent->d_name); > if (is_dir (subdirname)) > merge_word_list (sublist, file_word_list (subdirname)); > free (subdirname); > > ent_cnt++; > } > > rewinddir (dir); > list = new_word_list (ent_cnt); > while ((ent = readdir (dir)) != NULL) > if (strcmp (".", ent->d_name) != 0 && strcmp ("..", ent->d_name) != 0) > { > list->words[i] = strdup (ent->d_name); > i++; > } > > merge_word_list (list, sublist); > closedir (dir); > } > > return list; > } > > word_list * Make function static. > str_word_list (const char *str, const char *delim) > { > size_t n = 0; > word_list *list = new_word_list (count_words (str, delim)); > > char *toks = strdup (str); > char *word = strtok (toks, delim); > while (word != NULL && n < list->size) > { > if (*word != '\0') > list->words[n++] = strdup (word); > word = strtok (NULL, delim); > } You're doing the same thing twice, which is suboptimal. It's fine for now I guess. > > free (toks); > return list; > } > > word_list * Make the function static. > copy_word_list (const word_list *list) > { > size_t i; > word_list *copy = new_word_list (list->size); > > for (i = 0; i < list->size; i++) > copy->words[i] = strdup (list->words[i]); > > return copy; > } > > void Make the function static. > free_word_list (word_list *list) > { > size_t i; > for (i = 0; i < list->size; i++) > free (list->words[i]); > > free (list->words); > free (list); > } > > int Make the function static. > compare_words (const void *a, const void *b) > { > const char *s1 = *(char **) a; > const char *s2 = *(char **) b; > return strcoll (s1, s2); > } > > #undef INNER_LOOP_ITERS > #define INNER_LOOP_ITERS 16 > > void Make the function static. > bench_list (json_ctx_t *json_ctx, word_list *list) > { > size_t i; > timing_t start, stop, cur; > > word_list **tests = malloc (INNER_LOOP_ITERS * sizeof (word_list *)); > for (i = 0; i < INNER_LOOP_ITERS; i++) > tests[i] = copy_word_list (list); > > TIMING_NOW (start); > for (i = 0; i < INNER_LOOP_ITERS; i++) > qsort (tests[i]->words, tests[i]->size, sizeof (char *), compare_words); > TIMING_NOW (stop); > > TIMING_DIFF (cur, start, stop); > setlocale (LC_ALL, "en_US.UTF-8"); > json_attr_double (json_ctx, "duration", cur); > json_attr_double (json_ctx, "iterations", i); > json_attr_double (json_ctx, "mean", cur / i); > > for (i = 0; i < INNER_LOOP_ITERS; i++) > free_word_list (tests[i]); > free (tests); > } > > #define OK 0 > #define ERROR_LOCALE 1 > #define ERROR_IO 2 > > int Make the function static. > bench_file (json_ctx_t *json_ctx, const char *filename, const char *delim, > const char *locale) > { > if (setlocale (LC_ALL, locale) == NULL) > return ERROR_LOCALE; > > char *text = read_file (filename); > if (text == NULL) > return ERROR_IO; > > word_list *list = str_word_list (text, delim); > > json_attr_object_begin (json_ctx, locale); > bench_list (json_ctx, list); > json_attr_object_end (json_ctx); > > free_word_list (list); > free (text); > return OK; > } > > int Make the function static. > bench_file_list (json_ctx_t *json_ctx, const char *dirname, const char *locale) > { > if (setlocale (LC_ALL, locale) == NULL) > return ERROR_LOCALE; > > word_list *list = file_word_list (dirname); > if (list == NULL) > return ERROR_IO; > > json_attr_object_begin (json_ctx, ""); > json_attr_string (json_ctx, "locale", locale); > bench_list (json_ctx, list); > json_attr_object_end (json_ctx); > > free_word_list (list); > return OK; > } > > int > do_bench (void) > { > timing_t res __attribute__ ((unused)); > TIMING_INIT (res); > > json_ctx_t *json_ctx = malloc (sizeof (json_ctx_t)); > json_init (json_ctx, 2, stdout); > json_attr_object_begin (json_ctx, "strcoll"); > > int result = bench_file_list (json_ctx, "..", "en_US.UTF-8"); > if (result == ERROR_LOCALE) > { > printf ("Failed to set locale en_US.UTF-8, aborting!\n"); > return result; > } > if (result == ERROR_IO) > { > printf ("Failed to read libc directory, aborting!\n"); > return result; > } > > size_t i; > char filename[40], locale[15]; > for (i = 0; i < (sizeof (locales) / sizeof (locales[0])); i++) > { > sprintf (locale, "%s." CHARSET, locales[i]); > sprintf (filename, INPUT_FOLDER "/" INPUT_PREFIX "%s", locales[i]); > result = bench_file (json_ctx, filename, TEXTFILE_DELIMITER, locale); > if (result == ERROR_LOCALE) > { > printf ("Failed to set locale %s, aborting!\n", locale); > return result; > } > if (result == ERROR_IO) > { > printf ("Failed to read file %s, aborting!\n", filename); > return result; > } > } > > json_attr_object_end (json_ctx); > free (json_ctx); > return OK; > } > > #define TEST_FUNCTION do_bench () > > /* On slower platforms this test needs more than the default 2 seconds. */ > #define TIMEOUT 10 > > #include "../test-skeleton.c" Siddhesh PS: I'm out for two weeks, so I'll be able to do the next review once I'm back in November.
Attachment:
pgpIr2R1AkTek.pgp
Description: PGP signature
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |