This is the mail archive of the libc-help@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

strfry does not always give a uniform distribution


The strfry function does not always give a uniform distribution.  Run
the attached program and you can see that (once every few tries)
strfry is biased toward some permutations of the input string.  I
modified strfry to not use a custom prng state.  It seems to behave
much more fairly.


Example output:


DEFAULT_STR: ab

max_iterations: 50000000

getpid: 2862

(1285379699) strfry...
(1285379709) strfry_mod...

strfry_histogram
ab => 27952756	(55.9055%)
ba => 22047244	(44.0945%)

strfry_mod_histogram
ab => 25002087	(50.0042%)
ba => 24997913	(49.9958%)



DEFAULT_STR: abc

max_iterations: 25000000

getpid: 3336

(1285381555) strfry...
(1285381561) strfry_mod...

strfry_histogram
abc => 3610465	(14.4419%)
acb => 4723913	(18.8957%)
bac => 3607390	(14.4296%)
bca => 4726172	(18.9047%)
cab => 4723147	(18.8926%)
cba => 3608913	(14.4357%)

strfry_mod_histogram
abc => 4163372	(16.6535%)
acb => 4165675	(16.6627%)
bac => 4165102	(16.6604%)
bca => 4170281	(16.6811%)
cab => 4166255	(16.665%)
cba => 4169315	(16.6773%)



Steve Ward
--- strfry.c	2010-09-24 22:43:13.000000000 -0400
+++ strfry_mod.c	2010-09-24 22:44:43.000000000 -0400
@@ -25,14 +25,10 @@
 strfry (char *string)
 {
   static int init;
-  static struct random_data rdata;
 
   if (!init)
     {
-      static char state[32];
-      rdata.state = NULL;
-      __initstate_r (time ((time_t *) NULL) ^ getpid (),
-		     state, sizeof (state), &rdata);
+      srandom (time ((time_t *) NULL) ^ getpid ());
       init = 1;
     }
 
@@ -40,9 +36,7 @@
   if (len > 0)
     for (size_t i = 0; i < len - 1; ++i)
       {
-	int32_t j;
-	__random_r (&rdata, &j);
-	j = j % (len - i) + i;
+	size_t j = random () % (len - i) + i;
 
 	char c = string[i];
 	string[i] = string[j];
/*

$ uname --all
Linux ubuntu 2.6.32-25-generic-pae #44-Ubuntu SMP Fri Sep 17 21:57:48 UTC 2010 i686 GNU/Linux


$ /lib/libc-2.11.1.so
GNU C Library (Ubuntu EGLIBC 2.11.1-0ubuntu7.3) stable release version 2.11.1, by Roland McGrath et al.
Copyright (C) 2009 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.
There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE.
Compiled by GNU CC version 4.4.3.
Compiled on a Linux >>2.6.24-27-server<< system on 2010-08-17.
Available extensions:
	crypt add-on version 2.1 by Michael Glad and others
	GNU Libidn by Simon Josefsson
	Native POSIX Threads Library by Ulrich Drepper et al
	BIND-8.2.3-T5B
For bug reporting instructions, please see:
<http://www.debian.org/Bugs/>.


$ g++ -std=c++0x -Wall -O2 -Iscul/include -o strfry_mod strfry_mod.cpp && ./strfry_mod


According to <http://www.gnu.org/s/libc/manual/html_node/strfry.html>, strfry
should shuffle the input string using a uniform distribution random numbers.
Sometimes it does not seem to do this.

This program calls the strfry and strfry_mod functions max_iterations times and
counts the result strings in a map<string, size_t>.  The map functions like a
histogram and it is printed.  The DEFAULT_STR string is always passed into the
strfry and strfry_mod functions.

strfry_mod was taken from
<http://sourceware.org/git/?p=glibc.git;a=blob_plain;f=string/strfry.c;hb=3eb9c80984fa8ffa1bba570b34c380bbfd2a0258>
and modified to not use a custom PRNG state.

The program output shows that sometimes (at least for small input strings) the
output of strgry is not uniformly distributed.

Additionally, the man page for strfry incorrectly says that the rand() function
is used to randomly swap the characters in the string.


strfry source
http://sourceware.org/git/?p=glibc.git;a=history;f=string/strfry.c;hb=HEAD


Example output:


DEFAULT_STR: abc

max_iterations: 10000000

getpid: 2800

(1285379523) strfry...
(1285379525) strfry_mod...

strfry_histogram
abc => 1442909	(14.4291%)
acb => 1888958	(18.8896%)
bac => 1444606	(14.4461%)
bca => 1891155	(18.9115%)
cab => 1889175	(18.8917%)
cba => 1443197	(14.432%)

strfry_mod_histogram
abc => 1666350	(16.6635%)
acb => 1669133	(16.6913%)
bac => 1665473	(16.6547%)
bca => 1665188	(16.6519%)
cab => 1667398	(16.674%)
cba => 1666458	(16.6646%)



DEFAULT_STR: ab

max_iterations: 10000000

getpid: 2812

(1285379552) strfry...
(1285379553) strfry_mod...

strfry_histogram
ab => 5590552	(55.9055%)
ba => 4409448	(44.0945%)

strfry_mod_histogram
ab => 5003091	(50.0309%)
ba => 4996909	(49.9691%)



DEFAULT_STR: ab

max_iterations: 50000000

getpid: 2862

(1285379699) strfry...
(1285379709) strfry_mod...

strfry_histogram
ab => 27952756	(55.9055%)
ba => 22047244	(44.0945%)

strfry_mod_histogram
ab => 25002087	(50.0042%)
ba => 24997913	(49.9958%)



DEFAULT_STR: ab

max_iterations: 50000000

getpid: 2878

(1285379805) strfry...
(1285379814) strfry_mod...

strfry_histogram
ab => 21653544	(43.3071%)
ba => 28346456	(56.6929%)

strfry_mod_histogram
ab => 25005673	(50.0113%)
ba => 24994327	(49.9887%)



DEFAULT_STR: ab

max_iterations: 25000000

getpid: 2909

(1285379911) strfry...
(1285379916) strfry_mod...

strfry_histogram
ab => 13976380	(55.9055%)
ba => 11023620	(44.0945%)

strfry_mod_histogram
ab => 12497817	(49.9913%)
ba => 12502183	(50.0087%)



DEFAULT_STR: abc

max_iterations: 25000000

getpid: 2930

(1285379960) strfry...
(1285379966) strfry_mod...

strfry_histogram
abc => 4660065	(18.6403%)
acb => 3674401	(14.6976%)
bac => 4657010	(18.628%)
bca => 3674400	(14.6976%)
cab => 3674827	(14.6993%)
cba => 4659297	(18.6372%)

strfry_mod_histogram
abc => 4168125	(16.6725%)
acb => 4168404	(16.6736%)
bac => 4163652	(16.6546%)
bca => 4169491	(16.678%)
cab => 4164116	(16.6565%)
cba => 4166212	(16.6648%)



DEFAULT_STR: abc

max_iterations: 25000000

getpid: 2937

(1285379987) strfry...
(1285379993) strfry_mod...

strfry_histogram
abc => 3606336	(14.4253%)
acb => 4722653	(18.8906%)
bac => 3607366	(14.4295%)
bca => 4726001	(18.904%)
cab => 4724576	(18.8983%)
cba => 3613068	(14.4523%)

strfry_mod_histogram
abc => 4166975	(16.6679%)
acb => 4166685	(16.6667%)
bac => 4168776	(16.6751%)
bca => 4165956	(16.6638%)
cab => 4165944	(16.6638%)
cba => 4165664	(16.6627%)



DEFAULT_STR: abc

max_iterations: 25000000

getpid: 3155

(1285380877) strfry...
(1285380882) strfry_mod...

strfry_histogram
abc => 4659830	(18.6393%)
acb => 3675009	(14.7%)
bac => 4658990	(18.636%)
bca => 3675026	(14.7001%)
cab => 3673591	(14.6944%)
cba => 4657554	(18.6302%)

strfry_mod_histogram
abc => 4166880	(16.6675%)
acb => 4168808	(16.6752%)
bac => 4166908	(16.6676%)
bca => 4165642	(16.6626%)
cab => 4164334	(16.6573%)
cba => 4167428	(16.6697%)



DEFAULT_STR: abc

max_iterations: 25000000

getpid: 3336

(1285381555) strfry...
(1285381561) strfry_mod...

strfry_histogram
abc => 3610465	(14.4419%)
acb => 4723913	(18.8957%)
bac => 3607390	(14.4296%)
bca => 4726172	(18.9047%)
cab => 4723147	(18.8926%)
cba => 3608913	(14.4357%)

strfry_mod_histogram
abc => 4163372	(16.6535%)
acb => 4165675	(16.6627%)
bac => 4165102	(16.6604%)
bca => 4170281	(16.6811%)
cab => 4166255	(16.665%)
cba => 4169315	(16.6773%)


*/


//------------------------------------------------------------------------------


/* Copyright (C) 1992, 1996, 1999, 2002, 2007 Free Software Foundation, Inc.
   This file is part of the GNU C Library.

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, write to the Free
   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
   02111-1307 USA.  */

#include <string.h>
#include <stdlib.h>
#include <time.h>
#include <unistd.h>

char *
strfry_mod (char *string)
{
  static int init;
  //static struct random_data rdata;

  if (!init)
    {
      //static char state[32];
      //rdata.state = NULL;
      //__initstate_r (time ((time_t *) NULL) ^ getpid (),
		     //state, sizeof (state), &rdata);
      srandom (time ((time_t *) NULL) ^ getpid ());
      init = 1;
    }

  size_t len = strlen (string);
  if (len > 0)
    for (size_t i = 0; i < len - 1; ++i)
      {
	//int32_t j;
	//__random_r (&rdata, &j);
	//j = j % (len - i) + i;
	size_t j = random () % (len - i) + i;

	char c = string[i];
	string[i] = string[j];
	string[j] = c;
      }

  return string;
}


//------------------------------------------------------------------------------


#include <iostream>

#include <map>

using namespace std;

#define DEFAULT_STR "abc"

int main()
{
	cout << "DEFAULT_STR: " << DEFAULT_STR << '\n';
	cout << '\n';

	const size_t max_iterations = 25000000;

	cout << "max_iterations: " << max_iterations << '\n';
	cout << '\n';

	map<string, size_t> strfry_histogram;
	map<string, size_t> strfry_mod_histogram;

	cout << "getpid: " << getpid() << '\n';
	cout << '\n';


	cout << "(" << time(NULL) << ") " << "strfry..." << flush;
	for (size_t i = 0; i < max_iterations; ++i)
	{
		char str[] = DEFAULT_STR;

		strfry(str);

		strfry_histogram[str]++;
	}
	cout << '\n';


	cout << "(" << time(NULL) << ") " << "strfry_mod..." << flush;
	for (size_t i = 0; i < max_iterations; ++i)
	{
		char str[] = DEFAULT_STR;

		strfry_mod(str);

		strfry_mod_histogram[str]++;
	}
	cout << '\n';


	cout << '\n';


	cout << "strfry_histogram\n";
	for (auto i = strfry_histogram.begin(); i != strfry_histogram.end(); ++i)
	{
		const string key = i->first;
		const size_t value = i->second;
		const double percentage = value / static_cast<double>(max_iterations) * 100.0;

		cout << key << " => " << value << "\t(" << percentage << "%)" << '\n';
	}
	cout << '\n';


	cout << "strfry_mod_histogram\n";
	for (auto i = strfry_mod_histogram.begin(); i != strfry_mod_histogram.end(); ++i)
	{
		const string key = i->first;
		const size_t value = i->second;
		const double percentage = value / static_cast<double>(max_iterations) * 100.0;

		cout << key << " => " << value << "\t(" << percentage << "%)" << '\n';
	}
	cout << '\n';


	return 0;
}

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]