so the library compiles without warnings at level 3 in MS Visual
Studio 6.0. Included a Y2K statement in the file Y2K_INFO. All other
changes are minor documentation changes.
+
+1.0
+~~~
+Several minor bugfixes and enhancements:
+
+* Large file support. The library uses 64-bit counters to
+ count the volume of data passing through it. bzip2.c
+ is now compiled with -D_FILE_OFFSET_BITS=64 to get large
+ file support from the C library. -v correctly prints out
+ file sizes greater than 4 gigabytes. All these changes have
+ been made without assuming a 64-bit platform or a C compiler
+ which supports 64-bit ints, so, except for the C library
+ aspect, they are fully portable.
+
+* Decompression robustness. The library/program should be
+ robust to any corruption of compressed data, detecting and
+ handling _all_ corruption, instead of merely relying on
+ the CRCs. What this means is that the program should
+ never crash, given corrupted data, and the library should
+ always return BZ_DATA_ERROR.
+
+* Fixed an obscure race-condition bug only ever observed on
+ Solaris, in which, if you were very unlucky and issued
+ control-C at exactly the wrong time, both input and output
+ files would be deleted.
+
+* Don't run out of file handles on test/decompression when
+ large numbers of files have invalid magic numbers.
+
+* Avoid library namespace pollution. Prefix all exported
+ symbols with BZ2_.
+
+* Minor sorting enhancements from my DCC2000 paper.
+
+* Advance the version number to 1.0, so as to counteract the
+ (false-in-this-case) impression some people have that programs
+ with version numbers less than 1.0 are in someway, experimental,
+ pre-release versions.
+
+* Create an initial Makefile-libbz2_so to build a shared library.
+ Yes, I know I should really use libtool et al ...
+
+* Make the program exit with 2 instead of 0 when decompression
+ fails due to a bad magic number (ie, an invalid bzip2 header).
+ Also exit with 1 (as the manual claims :-) whenever a diagnostic
+ message would have been printed AND the corresponding operation
+ is aborted, for example
+ bzip2: Output file xx already exists.
+ When a diagnostic message is printed but the operation is not
+ aborted, for example
+ bzip2: Can't guess original name for wurble -- using wurble.out
+ then the exit value 0 is returned, unless some other problem is
+ also detected.
+
+ I think it corresponds more closely to what the manual claims now.
+
+
+1.0.1
+~~~~~
+* Modified dlltest.c so it uses the new BZ2_ naming scheme.
+* Modified makefile-msc to fix minor build probs on Win2k.
+* Updated README.COMPILATION.PROBLEMS.
+
+There are no functionality changes or bug fixes relative to version
+1.0.0. This is just a documentation update + a fix for minor Win32
+build problems. For almost everyone, upgrading from 1.0.0 to 1.0.1 is
+utterly pointless. Don't bother.
This program, "bzip2" and associated library "libbzip2", are
-copyright (C) 1996-1999 Julian R Seward. All rights reserved.
+copyright (C) 1996-2000 Julian R Seward. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
Julian Seward, Cambridge, UK.
jseward@acm.org
-bzip2/libbzip2 version 0.9.5 of 24 May 1999
+bzip2/libbzip2 version 1.0 of 21 March 2000
SHELL=/bin/sh
CC=gcc
-CFLAGS=-Wall -Winline -O2 -fomit-frame-pointer -fno-strength-reduce
+BIGFILES=-D_FILE_OFFSET_BITS=64
+CFLAGS=-Wall -Winline -O2 -fomit-frame-pointer -fno-strength-reduce $(BIGFILES)
OBJS= blocksort.o \
huffman.o \
sample1.tst sample2.tst sample3.tst
blocksort.o: blocksort.c
+ @cat words0
$(CC) $(CFLAGS) -c blocksort.c
huffman.o: huffman.c
$(CC) $(CFLAGS) -c huffman.c
bzip2recover.o: bzip2recover.c
$(CC) $(CFLAGS) -c bzip2recover.c
+DISTNAME=bzip2-1.0.1
tarfile:
- tar cvf interim.tar blocksort.c huffman.c crctable.c \
- randtable.c compress.c decompress.c bzlib.c bzip2.c \
- bzip2recover.c bzlib.h bzlib_private.h Makefile manual.texi \
- manual.ps LICENSE bzip2.1 bzip2.1.preformatted bzip2.txt \
- words1 words2 words3 sample1.ref sample2.ref sample3.ref \
- sample1.bz2 sample2.bz2 sample3.bz2 dlltest.c \
- *.html README CHANGES libbz2.def libbz2.dsp \
- dlltest.dsp makefile.msc Y2K_INFO
-
+ rm -f $(DISTNAME)
+ ln -sf . $(DISTNAME)
+ tar cvf $(DISTNAME).tar \
+ $(DISTNAME)/blocksort.c \
+ $(DISTNAME)/huffman.c \
+ $(DISTNAME)/crctable.c \
+ $(DISTNAME)/randtable.c \
+ $(DISTNAME)/compress.c \
+ $(DISTNAME)/decompress.c \
+ $(DISTNAME)/bzlib.c \
+ $(DISTNAME)/bzip2.c \
+ $(DISTNAME)/bzip2recover.c \
+ $(DISTNAME)/bzlib.h \
+ $(DISTNAME)/bzlib_private.h \
+ $(DISTNAME)/Makefile \
+ $(DISTNAME)/manual.texi \
+ $(DISTNAME)/manual.ps \
+ $(DISTNAME)/LICENSE \
+ $(DISTNAME)/bzip2.1 \
+ $(DISTNAME)/bzip2.1.preformatted \
+ $(DISTNAME)/bzip2.txt \
+ $(DISTNAME)/words0 \
+ $(DISTNAME)/words1 \
+ $(DISTNAME)/words2 \
+ $(DISTNAME)/words3 \
+ $(DISTNAME)/sample1.ref \
+ $(DISTNAME)/sample2.ref \
+ $(DISTNAME)/sample3.ref \
+ $(DISTNAME)/sample1.bz2 \
+ $(DISTNAME)/sample2.bz2 \
+ $(DISTNAME)/sample3.bz2 \
+ $(DISTNAME)/dlltest.c \
+ $(DISTNAME)/*.html \
+ $(DISTNAME)/README \
+ $(DISTNAME)/README.COMPILATION.PROBLEMS \
+ $(DISTNAME)/CHANGES \
+ $(DISTNAME)/libbz2.def \
+ $(DISTNAME)/libbz2.dsp \
+ $(DISTNAME)/dlltest.dsp \
+ $(DISTNAME)/makefile.msc \
+ $(DISTNAME)/Y2K_INFO \
+ $(DISTNAME)/unzcrash.c \
+ $(DISTNAME)/spewG.c \
+ $(DISTNAME)/Makefile-libbz2_so
--- /dev/null
+
+# This Makefile builds a shared version of the library,
+# libbz2.so.1.0.1, with soname libbz2.so.1.0,
+# at least on x86-Linux (RedHat 5.2),
+# with gcc-2.7.2.3. Please see the README file for some
+# important info about building the library like this.
+
+SHELL=/bin/sh
+CC=gcc
+BIGFILES=-D_FILE_OFFSET_BITS=64
+CFLAGS=-fpic -fPIC -Wall -Winline -O2 -fomit-frame-pointer -fno-strength-reduce $(BIGFILES)
+
+OBJS= blocksort.o \
+ huffman.o \
+ crctable.o \
+ randtable.o \
+ compress.o \
+ decompress.o \
+ bzlib.o
+
+all: $(OBJS)
+ $(CC) -shared -Wl,-soname -Wl,libbz2.so.1.0 -o libbz2.so.1.0.1 $(OBJS)
+ $(CC) $(CFLAGS) -o bzip2-shared bzip2.c libbz2.so.1.0.1
+ rm -f libbz2.so.1.0
+ ln -s libbz2.so.1.0.1 libbz2.so.1.0
+
+clean:
+ rm -f $(OBJS) bzip2.o libbz2.so.1.0.1 libbz2.so.1.0 bzip2-shared
+
+blocksort.o: blocksort.c
+ $(CC) $(CFLAGS) -c blocksort.c
+huffman.o: huffman.c
+ $(CC) $(CFLAGS) -c huffman.c
+crctable.o: crctable.c
+ $(CC) $(CFLAGS) -c crctable.c
+randtable.o: randtable.c
+ $(CC) $(CFLAGS) -c randtable.c
+compress.o: compress.c
+ $(CC) $(CFLAGS) -c compress.c
+decompress.o: decompress.c
+ $(CC) $(CFLAGS) -c decompress.c
+bzlib.o: bzlib.c
+ $(CC) $(CFLAGS) -c bzlib.c
This is the README for bzip2, a block-sorting file compressor, version
-0.9.5d. This version is fully compatible with the previous public
-releases, bzip2-0.1pl2 and bzip2-0.9.0.
+1.0. This version is fully compatible with the previous public
+releases, bzip2-0.1pl2, bzip2-0.9.0 and bzip2-0.9.5.
-bzip2-0.9.5 is distributed under a BSD-style license. For details,
+bzip2-1.0 is distributed under a BSD-style license. For details,
see the file LICENSE.
Complete documentation is available in Postscript form (manual.ps) or
not actually execute them.
+HOW TO BUILD -- UNIX, shared library libbz2.so.
+
+Do 'make -f Makefile-libbz2_so'. This Makefile seems to work for
+Linux-ELF (RedHat 5.2 on an x86 box), with gcc. I make no claims
+that it works for any other platform, though I suspect it probably
+will work for most platforms employing both ELF and gcc.
+
+bzip2-shared, a client of the shared library, is also build, but
+not self-tested. So I suggest you also build using the normal
+Makefile, since that conducts a self-test.
+
+Important note for people upgrading .so's from 0.9.0/0.9.5 to
+version 1.0. All the functions in the library have been renamed,
+from (eg) bzCompress to BZ2_bzCompress, to avoid namespace pollution.
+Unfortunately this means that the libbz2.so created by
+Makefile-libbz2_so will not work with any program which used an
+older version of the library. Sorry. I do encourage library
+clients to make the effort to upgrade to use version 1.0, since
+it is both faster and more robust than previous versions.
+
+
HOW TO BUILD -- Windows 95, NT, DOS, Mac, etc.
It's difficult for me to support compilation on all these platforms.
My approach is to collect binaries for these platforms, and put them
-on my web page (http://www.muraroa.demon.co.uk). Look there. However
-(FWIW), bzip2-0.9.5 is very standard ANSI C and should compile
-unmodified with MS Visual C. For Win32, there is one important
-caveat: in bzip2.c, you must set BZ_UNIX to 0 and BZ_LCCWIN32 to 1
-before building.
+on the master web page (http://sourceware.cygnus.com/bzip2). Look
+there. However (FWIW), bzip2-1.0 is very standard ANSI C and should
+compile unmodified with MS Visual C. For Win32, there is one
+important caveat: in bzip2.c, you must set BZ_UNIX to 0 and
+BZ_LCCWIN32 to 1 before building. If you have difficulties building,
+you might want to read README.COMPILATION.PROBLEMS.
VALIDATION
* Many small improvements in file and flag handling.
* A Y2K statement.
+WHAT'S NEW IN 1.0
+
+ See the CHANGES file.
+
I hope you find bzip2 useful. Feel free to contact me at
jseward@acm.org
if you have any suggestions or queries. Many people mailed me with
23 August 1998 (bzip2, version 0.9.0)
8 June 1999 (bzip2, version 0.9.5)
4 Sept 1999 (bzip2, version 0.9.5d)
+ 5 May 2000 (bzip2, version 1.0pre8)
--- /dev/null
+
+bzip2-1.0 should compile without problems on the vast majority of
+platforms. Using the supplied Makefile, I've built and tested it
+myself for x86-linux, sparc-solaris, alpha-linux, x86-cygwin32 and
+alpha-tru64unix. With makefile.msc, Visual C++ 6.0 and nmake, you can
+build a native Win32 version too. Large file support seems to work
+correctly on at least alpha-tru64unix and x86-cygwin32 (on Windows
+2000).
+
+When I say "large file" I mean a file of size 2,147,483,648 (2^31)
+bytes or above. Many older OSs can't handle files above this size,
+but many newer ones can. Large files are pretty huge -- most files
+you'll encounter are not Large Files.
+
+Earlier versions of bzip2 (0.1, 0.9.0, 0.9.5) compiled on a wide
+variety of platforms without difficulty, and I hope this version will
+continue in that tradition. However, in order to support large files,
+I've had to include the define -D_FILE_OFFSET_BITS=64 in the Makefile.
+This can cause problems.
+
+The technique of adding -D_FILE_OFFSET_BITS=64 to get large file
+support is, as far as I know, the Recommended Way to get correct large
+file support. For more details, see the Large File Support
+Specification, published by the Large File Summit, at
+ http://www.sas.com/standard/large.file/
+
+As a general comment, if you get compilation errors which you think
+are related to large file support, try removing the above define from
+the Makefile, ie, delete the line
+ BIGFILES=-D_FILE_OFFSET_BITS=64
+from the Makefile, and do 'make clean ; make'. This will give you a
+version of bzip2 without large file support, which, for most
+applications, is probably not a problem.
+
+Alternatively, try some of the platform-specific hints listed below.
+
+You can use the spewG.c program to generate huge files to test bzip2's
+large file support, if you are feeling paranoid. Be aware though that
+any compilation problems which affect bzip2 will also affect spewG.c,
+alas.
+
+
+Known problems as of 1.0pre8:
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+* HP/UX 10.20 and 11.00, using gcc (2.7.2.3 and 2.95.2): A large
+ number of warnings appear, including the following:
+
+ /usr/include/sys/resource.h: In function `getrlimit':
+ /usr/include/sys/resource.h:168:
+ warning: implicit declaration of function `__getrlimit64'
+ /usr/include/sys/resource.h: In function `setrlimit':
+ /usr/include/sys/resource.h:170:
+ warning: implicit declaration of function `__setrlimit64'
+
+ This would appear to be a problem with large file support, header
+ files and gcc. gcc may or may not give up at this point. If it
+ fails, you might be able to improve matters by adding
+ -D__STDC_EXT__=1
+ to the BIGFILES variable in the Makefile (ie, change its definition
+ to
+ BIGFILES=-D_FILE_OFFSET_BITS=64 -D__STDC_EXT__=1
+
+ Even if gcc does produce a binary which appears to work (ie passes
+ its self-tests), you might want to test it to see if it works properly
+ on large files.
+
+
+* HP/UX 10.20 and 11.00, using HP's cc compiler.
+
+ No specific problems for this combination, except that you'll need to
+ specify the -Ae flag, and zap the gcc-specific stuff
+ -Wall -Winline -O2 -fomit-frame-pointer -fno-strength-reduce.
+ You should retain -D_FILE_OFFSET_BITS=64 in order to get large
+ file support -- which is reported to work ok for this HP/UX + cc
+ combination.
+
+
+* SunOS 4.1.X.
+
+ Amazingly, there are still people out there using this venerable old
+ banger. I shouldn't be too rude -- I started life on SunOS, and
+ it was a pretty darn good OS, way back then. Anyway:
+
+ SunOS doesn't seem to have strerror(), so you'll have to use
+ perror(), perhaps by doing adding this (warning: UNTESTED CODE):
+
+ char* strerror ( int errnum )
+ {
+ if (errnum < 0 || errnum >= sys_nerr)
+ return "Unknown error";
+ else
+ return sys_errlist[errnum];
+ }
+
+ Or you could comment out the relevant calls to strerror; they're
+ not mission-critical. Or you could upgrade to Solaris. Ha ha ha!
+ (what?? you think I've got Bad Attitude?)
+
+
+* Making a shared library on Solaris. (Not really a compilation
+ problem, but many people ask ...)
+
+ Firstly, if you have Solaris 8, either you have libbz2.so already
+ on your system, or you can install it from the Solaris CD.
+
+ Secondly, be aware that there are potential naming conflicts
+ between the .so file supplied with Solaris 8, and the .so file
+ which Makefile-libbz2_so will make. Makefile-libbz2_so creates
+ a .so which has the names which I intend to be "official" as
+ of version 1.0.0 and onwards. Unfortunately, the .so in
+ Solaris 8 appeared before I decided on the final names, so
+ the two libraries are incompatible. We have since communicated
+ and I hope that the problems will have been solved in the next
+ version of Solaris, whenever that might appear.
+
+ All that said: you might be able to get somewhere
+ by finding the line in Makefile-libbz2_so which says
+
+ $(CC) -shared -Wl,-soname -Wl,libbz2.so.1.0 -o libbz2.so.1.0.1 $(OBJS)
+
+ and replacing with
+
+ ($CC) -G -shared -o libbz2.so.1.0.1 -h libbz2.so.1.0 $(OBJS)
+
+ If gcc objects to the combination -fpic -fPIC, get rid of
+ the second one, leaving just "-fpic".
+
+
+That's the end of the currently known compilation problems.
This file is a part of bzip2 and/or libbzip2, a program and
library for lossless, block-sorting data compression.
- Copyright (C) 1996-1999 Julian R Seward. All rights reserved.
+ Copyright (C) 1996-2000 Julian R Seward. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
Julian Seward, Cambridge, UK.
jseward@acm.org
- bzip2/libbzip2 version 0.9.5 of 24 May 1999
+ bzip2/libbzip2 version 1.0 of 21 March 2000
This program is based on (at least) the work of:
Mike Burrows
Jon L. Bentley
For more information on these sources, see the manual.
+
+ To get some idea how the block sorting algorithms in this file
+ work, read my paper
+ On the Performance of BWT Sorting Algorithms
+ in Proceedings of the IEEE Data Compression Conference 2000,
+ Snowbird, Utah, USA, 27-30 March 2000. The main sort in this
+ file implements the algorithm called cache in the paper.
--*/
/* Pre:
nblock > 0
eclass exists for [0 .. nblock-1]
- ((UInt16*)eclass) [0 .. nblock-1] [15:8] holds block
+ ((UChar*)eclass) [0 .. nblock-1] holds block
ptr exists for [0 .. nblock-1]
Post:
- ((UInt16*)eclass) [0 .. nblock-1] [15:8] holds block
+ ((UChar*)eclass) [0 .. nblock-1] holds block
All other areas of eclass destroyed
fmap [0 .. nblock-1] holds sorted order
bhtab [ 0 .. 2+(nblock/32) ] destroyed
Int32 H, i, j, k, l, r, cc, cc1;
Int32 nNotDone;
Int32 nBhtab;
- UInt16* eclass16 = (UInt16*)eclass;
+ UChar* eclass8 = (UChar*)eclass;
/*--
Initial 1-char radix sort to generate
if (verb >= 4)
VPrintf0 ( " bucket sorting ...\n" );
for (i = 0; i < 257; i++) ftab[i] = 0;
- for (i = 0; i < nblock; i++) ftab[eclass16[i] >> 8]++;
+ for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i];
for (i = 1; i < 257; i++) ftab[i] += ftab[i-1];
for (i = 0; i < nblock; i++) {
- j = eclass16[i] >> 8;
+ j = eclass8[i];
k = ftab[j] - 1;
ftab[j] = k;
fmap[k] = i;
/*--
Reconstruct the original block in
- eclass16 [0 .. nblock-1] [15:8], since the
+ eclass8 [0 .. nblock-1], since the
previous phase destroyed it.
--*/
if (verb >= 4)
for (i = 0; i < nblock; i++) {
while (ftabCopy[j] == 0) j++;
ftabCopy[j]--;
- eclass16[fmap[i]] = j << 8;
+ eclass8[fmap[i]] = (UChar)j;
}
AssertH ( j < 256, 1005 );
}
__inline__
Bool mainGtU ( UInt32 i1,
UInt32 i2,
- UInt16* block,
+ UChar* block,
UInt16* quadrant,
UInt32 nblock,
Int32* budget )
{
- Int32 k;
+ Int32 k;
+ UChar c1, c2;
UInt16 s1, s2;
AssertD ( i1 != i2, "mainGtU" );
-
- s1 = block[i1]; s2 = block[i2];
- if (s1 != s2) return (s1 > s2);
- i1 += 2; i2 += 2;
-
- s1 = block[i1]; s2 = block[i2];
- if (s1 != s2) return (s1 > s2);
- i1 += 2; i2 += 2;
-
- s1 = block[i1]; s2 = block[i2];
- if (s1 != s2) return (s1 > s2);
- i1 += 2; i2 += 2;
-
- s1 = block[i1]; s2 = block[i2];
- if (s1 != s2) return (s1 > s2);
- i1 += 2; i2 += 2;
-
- s1 = block[i1]; s2 = block[i2];
- if (s1 != s2) return (s1 > s2);
- i1 += 2; i2 += 2;
-
- s1 = block[i1]; s2 = block[i2];
- if (s1 != s2) return (s1 > s2);
- i1 += 2; i2 += 2;
+ /* 1 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 2 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 3 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 4 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 5 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 6 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 7 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 8 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 9 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 10 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 11 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 12 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
k = nblock + 8;
do {
-
- s1 = block[i1]; s2 = block[i2];
+ /* 1 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ s1 = quadrant[i1]; s2 = quadrant[i2];
if (s1 != s2) return (s1 > s2);
+ i1++; i2++;
+ /* 2 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
s1 = quadrant[i1]; s2 = quadrant[i2];
if (s1 != s2) return (s1 > s2);
- i1 += 2; i2 += 2;
-
- s1 = block[i1]; s2 = block[i2];
+ i1++; i2++;
+ /* 3 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ s1 = quadrant[i1]; s2 = quadrant[i2];
if (s1 != s2) return (s1 > s2);
+ i1++; i2++;
+ /* 4 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
s1 = quadrant[i1]; s2 = quadrant[i2];
if (s1 != s2) return (s1 > s2);
- i1 += 2; i2 += 2;
-
- s1 = block[i1]; s2 = block[i2];
+ i1++; i2++;
+ /* 5 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ s1 = quadrant[i1]; s2 = quadrant[i2];
if (s1 != s2) return (s1 > s2);
+ i1++; i2++;
+ /* 6 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
s1 = quadrant[i1]; s2 = quadrant[i2];
if (s1 != s2) return (s1 > s2);
- i1 += 2; i2 += 2;
-
- s1 = block[i1]; s2 = block[i2];
+ i1++; i2++;
+ /* 7 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ s1 = quadrant[i1]; s2 = quadrant[i2];
if (s1 != s2) return (s1 > s2);
+ i1++; i2++;
+ /* 8 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
s1 = quadrant[i1]; s2 = quadrant[i2];
if (s1 != s2) return (s1 > s2);
- i1 += 2; i2 += 2;
+ i1++; i2++;
if (i1 >= nblock) i1 -= nblock;
if (i2 >= nblock) i2 -= nblock;
because the number of elems to sort is
usually small, typically <= 20.
--*/
+static
Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
9841, 29524, 88573, 265720,
797161, 2391484 };
static
void mainSimpleSort ( UInt32* ptr,
- UInt16* block,
+ UChar* block,
UInt16* quadrant,
Int32 nblock,
Int32 lo,
} \
}
-
static
__inline__
-UInt16 mmed3 ( UInt16 a, UInt16 b, UInt16 c )
+UChar mmed3 ( UChar a, UChar b, UChar c )
{
- UInt16 t;
+ UChar t;
if (a > b) { t = a; a = b; b = t; };
- if (b > c) { t = b; b = c; c = t; };
- if (a > b) b = a;
+ if (b > c) {
+ b = c;
+ if (a > b) b = a;
+ }
return b;
}
-
#define mmin(a,b) ((a) < (b)) ? (a) : (b)
#define mpush(lz,hz,dz) { stackLo[sp] = lz; \
static
void mainQSort3 ( UInt32* ptr,
- UInt16* block,
+ UChar* block,
UInt16* quadrant,
Int32 nblock,
Int32 loSt,
AssertD ( unHi == unLo-1, "mainQSort3(2)" );
if (gtHi < ltLo) {
- mpush(lo, hi, d+2 );
+ mpush(lo, hi, d+1 );
continue;
}
nextLo[0] = lo; nextHi[0] = n; nextD[0] = d;
nextLo[1] = m; nextHi[1] = hi; nextD[1] = d;
- nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+2;
+ nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
/* Pre:
nblock > N_OVERSHOOT
block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
- ((UInt16*)block32) [0 .. nblock-1] [15:8] holds block
+ ((UChar*)block32) [0 .. nblock-1] holds block
ptr exists for [0 .. nblock-1]
Post:
- ((UInt16*)block32) [0 .. nblock-1] [15:8] holds block
+ ((UChar*)block32) [0 .. nblock-1] holds block
All other areas of block32 destroyed
ftab [0 .. 65536 ] destroyed
ptr [0 .. nblock-1] holds sorted order
static
void mainSort ( UInt32* ptr,
- UInt16* block,
+ UChar* block,
UInt16* quadrant,
UInt32* ftab,
Int32 nblock,
Int32 verb,
Int32* budget )
{
- Int32 i, j, k, m, ss, sb;
+ Int32 i, j, k, ss, sb;
Int32 runningOrder[256];
- Int32 copy[256];
Bool bigDone[256];
+ Int32 copyStart[256];
+ Int32 copyEnd [256];
UChar c1;
Int32 numQSorted;
- Int32 biggestSoFar;
UInt16 s;
-
if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" );
- /*-- Stripe the block data into 16 bits, and at the
- same time set up the 2-byte frequency table
- --*/
+ /*-- set up the 2-byte frequency table --*/
for (i = 65536; i >= 0; i--) ftab[i] = 0;
- s = block[0];
- for (i = 1; i < nblock; i++) {
+ j = block[0] << 8;
+ i = nblock-1;
+ for (; i >= 3; i -= 4) {
+ quadrant[i] = 0;
+ j = (j >> 8) | ( ((UInt16)block[i]) << 8);
+ ftab[j]++;
+ quadrant[i-1] = 0;
+ j = (j >> 8) | ( ((UInt16)block[i-1]) << 8);
+ ftab[j]++;
+ quadrant[i-2] = 0;
+ j = (j >> 8) | ( ((UInt16)block[i-2]) << 8);
+ ftab[j]++;
+ quadrant[i-3] = 0;
+ j = (j >> 8) | ( ((UInt16)block[i-3]) << 8);
+ ftab[j]++;
+ }
+ for (; i >= 0; i--) {
quadrant[i] = 0;
- s = (s << 8) | block[i];
- block[i-1] = s;
- ftab[s]++;
+ j = (j >> 8) | ( ((UInt16)block[i]) << 8);
+ ftab[j]++;
}
- quadrant[0] = 0;
- s = (s << 8) | (block[0] >> 8);
- block[nblock-1] = s;
- ftab[s]++;
/*-- (emphasises close relationship of block & quadrant) --*/
for (i = 0; i < BZ_N_OVERSHOOT; i++) {
/*-- Complete the initial radix sort --*/
for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
- for (i = 0; i < nblock; i++) {
- s = block[i];
- j = ftab[s] - 1;
+ s = block[0] << 8;
+ i = nblock-1;
+ for (; i >= 3; i -= 4) {
+ s = (s >> 8) | (block[i] << 8);
+ j = ftab[s] -1;
+ ftab[s] = j;
+ ptr[j] = i;
+ s = (s >> 8) | (block[i-1] << 8);
+ j = ftab[s] -1;
+ ftab[s] = j;
+ ptr[j] = i-1;
+ s = (s >> 8) | (block[i-2] << 8);
+ j = ftab[s] -1;
+ ftab[s] = j;
+ ptr[j] = i-2;
+ s = (s >> 8) | (block[i-3] << 8);
+ j = ftab[s] -1;
+ ftab[s] = j;
+ ptr[j] = i-3;
+ }
+ for (; i >= 0; i--) {
+ s = (s >> 8) | (block[i] << 8);
+ j = ftab[s] -1;
ftab[s] = j;
ptr[j] = i;
}
The main sorting loop.
--*/
- biggestSoFar = numQSorted = 0;
+ numQSorted = 0;
for (i = 0; i <= 255; i++) {
/*--
Process big buckets, starting with the least full.
- Basically this is a 4-step process in which we call
+ Basically this is a 3-step process in which we call
mainQSort3 to sort the small buckets [ss, j], but
also make a big effort to avoid the calls if we can.
--*/
}
}
+ AssertH ( !bigDone[ss], 1006 );
+
/*--
Step 2:
- Deal specially with case [ss, ss]. This establishes the
- sorted order for [ss, ss] without any comparisons.
- A clever trick, cryptically described as steps Q6b and Q6c
- in SRC-124 (aka BW94). Compared to bzip2, this makes it
- practical not to use a preliminary run-length coder.
+ Now scan this big bucket [ss] so as to synthesise the
+ sorted order for small buckets [t, ss] for all t,
+ including, magically, the bucket [ss,ss] too.
+ This will avoid doing Real Work in subsequent Step 1's.
--*/
{
- Int32 put0, get0, put1, get1;
- Int32 sbn = (ss << 8) + ss;
- Int32 lo = ftab[sbn] & CLEARMASK;
- Int32 hi = (ftab[sbn+1] & CLEARMASK) - 1;
- UChar ssc = (UChar)ss;
- put0 = lo;
- get0 = ftab[ss << 8] & CLEARMASK;
- put1 = hi;
- get1 = (ftab[(ss+1) << 8] & CLEARMASK) - 1;
- while (get0 < put0) {
- j = ptr[get0]-1; if (j < 0) j += nblock;
- c1 = (UChar)(block[j] >> 8);
- if (c1 == ssc) { ptr[put0] = j; put0++; };
- get0++;
+ for (j = 0; j <= 255; j++) {
+ copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK;
+ copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
+ }
+ for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
+ k = ptr[j]-1; if (k < 0) k += nblock;
+ c1 = block[k];
+ if (!bigDone[c1])
+ ptr[ copyStart[c1]++ ] = k;
}
- while (get1 > put1) {
- j = ptr[get1]-1; if (j < 0) j += nblock;
- c1 = (UChar)(block[j] >> 8);
- if (c1 == ssc) { ptr[put1] = j; put1--; };
- get1--;
+ for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
+ k = ptr[j]-1; if (k < 0) k += nblock;
+ c1 = block[k];
+ if (!bigDone[c1])
+ ptr[ copyEnd[c1]-- ] = k;
}
- ftab[sbn] |= SETMASK;
}
+ AssertH ( copyStart[ss]-1 == copyEnd[ss], 1007 );
+
+ for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
+
/*--
Step 3:
The [ss] big bucket is now done. Record this fact,
while ((bbSize >> shifts) > 65534) shifts++;
- for (j = 0; j < bbSize; j++) {
+ for (j = bbSize-1; j >= 0; j--) {
Int32 a2update = ptr[bbStart + j];
UInt16 qVal = (UInt16)(j >> shifts);
quadrant[a2update] = qVal;
AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
}
- /*--
- Step 4:
- Now scan this big bucket [ss] so as to synthesise the
- sorted order for small buckets [t, ss] for all t != ss.
- This will avoid doing Real Work in subsequent Step 1's.
- --*/
- for (j = 0; j <= 255; j++)
- copy[j] = ftab[(j << 8) + ss] & CLEARMASK;
-
- m = ftab[(ss+1) << 8] & CLEARMASK;
- for (j = ftab[ss << 8] & CLEARMASK; j < m; j++) {
- k = ptr[j] - 1; if (k < 0) k += nblock;
- c1 = (UChar)(block[k] >> 8);
- if ( ! bigDone[c1] ) {
- ptr[copy[c1]] = k;
- copy[c1] ++;
- }
- }
-
- for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
}
if (verb >= 4)
/* Pre:
nblock > 0
arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
- ((UInt16*)arr2) [0 .. nblock-1] [15:8] holds block
+ ((UChar*)arr2) [0 .. nblock-1] holds block
arr1 exists for [0 .. nblock-1]
Post:
- ((UInt16*)arr2) [0 .. nblock-1] [15:8] holds block
+ ((UChar*)arr2) [0 .. nblock-1] holds block
All other areas of block destroyed
ftab [ 0 .. 65536 ] destroyed
arr1 [0 .. nblock-1] holds sorted order
*/
-void blockSort ( EState* s )
+void BZ2_blockSort ( EState* s )
{
UInt32* ptr = s->ptr;
- UInt16* block = s->block;
+ UChar* block = s->block;
UInt32* ftab = s->ftab;
Int32 nblock = s->nblock;
Int32 verb = s->verbosity;
Int32 i;
if (nblock < 10000) {
- for (i = 0; i < nblock; i++) block[i] <<= 8;
fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
} else {
- quadrant = &(block[nblock+BZ_N_OVERSHOOT]);
+ /* Calculate the location for quadrant, remembering to get
+ the alignment right. Assumes that &(block[0]) is at least
+ 2-byte aligned -- this should be ok since block is really
+ the first section of arr2.
+ */
+ i = nblock+BZ_N_OVERSHOOT;
+ if (i & 1) i++;
+ quadrant = (UInt16*)(&(block[i]));
/* (wfact-1) / 3 puts the default-factor-30
transition point at very roughly the same place as
.PU
.TH bzip2 1
.SH NAME
-bzip2, bunzip2 \- a block-sorting file compressor, v0.9.5
+bzip2, bunzip2 \- a block-sorting file compressor, v1.0
.br
bzcat \- decompresses files to stdout
.br
tries hard to detect I/O errors and exit cleanly, but the details of
what the problem is sometimes seem rather misleading.
-This manual page pertains to version 0.9.5 of
+This manual page pertains to version 1.0 of
.I bzip2.
Compressed
data created by this version is entirely forwards and backwards
-compatible with the previous public releases, versions 0.1pl2 and 0.9.0,
+compatible with the previous public releases, versions 0.1pl2, 0.9.0
+and 0.9.5,
but with the following exception: 0.9.0 and above can correctly
decompress multiple concatenated compressed files. 0.1pl2 cannot do
this; it will stop after decompressing just the first file in the
.SH AUTHOR
Julian Seward, jseward@acm.org.
+http://sourceware.cygnus.com/bzip2
http://www.muraroa.demon.co.uk
The ideas embodied in
+
+bzip2(1) bzip2(1)
+
+
N\bNA\bAM\bME\bE
- bzip2, bunzip2 - a block-sorting file compressor, v0.9.5
+ bzip2, bunzip2 - a block-sorting file compressor, v1.0
bzcat - decompresses files to stdout
bzip2recover - recovers data from damaged bzip2 files
filename.bz2 becomes filename
filename.bz becomes filename
filename.tbz2 becomes filename.tar
+
+
+
+ 1
+
+
+
+
+
+bzip2(1) bzip2(1)
+
+
filename.tbz becomes filename.tar
anyothername becomes anyothername.out
you recover the original uncompressed data. You can use
_\bb_\bz_\bi_\bp_\b2_\br_\be_\bc_\bo_\bv_\be_\br to try to recover data from damaged files.
+
+
+ 2
+
+
+
+
+
+bzip2(1) bzip2(1)
+
+
Return values: 0 for a normal exit, 1 for environmental
problems (file not found, invalid flags, I/O errors, &c),
2 to indicate a corrupt compressed file, 3 for an internal
-\b-q\bq -\b--\b-q\bqu\bui\bie\bet\bt
Suppress non-essential warning messages. Messages
pertaining to I/O errors and other critical events
+
+
+
+ 3
+
+
+
+
+
+bzip2(1) bzip2(1)
+
+
will not be suppressed.
-\b-v\bv -\b--\b-v\bve\ber\brb\bbo\bos\bse\be
Larger block sizes give rapidly diminishing marginal
returns. Most of the compression comes from the first two
+
+
+
+ 4
+
+
+
+
+
+bzip2(1) bzip2(1)
+
+
or three hundred k of block size, a fact worth bearing in
mind when using _\bb_\bz_\bi_\bp_\b2 on small machines. It is also
important to appreciate that the decompression memory
-9 7600k 3700k 2350k 828642
+
+
+
+
+ 5
+
+
+
+
+
+bzip2(1) bzip2(1)
+
+
R\bRE\bEC\bCO\bOV\bVE\bER\bRI\bIN\bNG\bG D\bDA\bAT\bTA\bA F\bFR\bRO\bOM\bM D\bDA\bAM\bMA\bAG\bGE\bED\bD F\bFI\bIL\bLE\bES\bS
_\bb_\bz_\bi_\bp_\b2 compresses files in blocks, usually 900kbytes long.
Each block is handled independently. If a media or trans-
operate in, and then charges all over it in a fairly ran-
dom fashion. This means that performance, both for com-
pressing and decompressing, is largely determined by the
+
+
+
+ 6
+
+
+
+
+
+bzip2(1) bzip2(1)
+
+
speed at which your machine can service cache misses.
Because of this, small changes to the code to reduce the
miss rate have been observed to give disproportionately
but the details of what the problem is sometimes seem
rather misleading.
- This manual page pertains to version 0.9.5 of _\bb_\bz_\bi_\bp_\b2_\b. Com-
+ This manual page pertains to version 1.0 of _\bb_\bz_\bi_\bp_\b2_\b. Com-
pressed data created by this version is entirely forwards
and backwards compatible with the previous public
- releases, versions 0.1pl2 and 0.9.0, but with the follow-
- ing exception: 0.9.0 and above can correctly decompress
- multiple concatenated compressed files. 0.1pl2 cannot do
- this; it will stop after decompressing just the first file
- in the stream.
+ releases, versions 0.1pl2, 0.9.0 and 0.9.5, but with the
+ following exception: 0.9.0 and above can correctly decom-
+ press multiple concatenated compressed files. 0.1pl2 can-
+ not do this; it will stop after decompressing just the
+ first file in the stream.
_\bb_\bz_\bi_\bp_\b2_\br_\be_\bc_\bo_\bv_\be_\br uses 32-bit integers to represent bit posi-
tions in compressed files, so it cannot handle compressed
A\bAU\bUT\bTH\bHO\bOR\bR
Julian Seward, jseward@acm.org.
+ http://sourceware.cygnus.com/bzip2
http://www.muraroa.demon.co.uk
The ideas embodied in _\bb_\bz_\bi_\bp_\b2 are due to (at least) the fol-
- lowing people: Michael Burrows and David Wheeler (for the
- block sorting transformation), David Wheeler (again, for
+ lowing people: Michael Burrows and David Wheeler (for the
+ block sorting transformation), David Wheeler (again, for
the Huffman coder), Peter Fenwick (for the structured cod-
ing model in the original _\bb_\bz_\bi_\bp_\b, and many refinements), and
- Alistair Moffat, Radford Neal and Ian Witten (for the
+ Alistair Moffat, Radford Neal and Ian Witten (for the
arithmetic coder in the original _\bb_\bz_\bi_\bp_\b)_\b. I am much
indebted for their help, support and advice. See the man-
- ual in the source distribution for pointers to sources of
+ ual in the source distribution for pointers to sources of
documentation. Christian von Roques encouraged me to look
- for faster sorting algorithms, so as to speed up compres-
+ for faster sorting algorithms, so as to speed up compres-
sion. Bela Lubkin encouraged me to improve the worst-case
compression performance. Many people sent patches, helped
- with portability problems, lent machines, gave advice and
+ with portability problems, lent machines, gave advice and
were generally helpful.
+
+
+
+
+
+
+
+ 7
+
+
This file is a part of bzip2 and/or libbzip2, a program and
library for lossless, block-sorting data compression.
- Copyright (C) 1996-1999 Julian R Seward. All rights reserved.
+ Copyright (C) 1996-2000 Julian R Seward. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
Julian Seward, Cambridge, UK.
jseward@acm.org
- bzip2/libbzip2 version 0.9.5 of 24 May 1999
+ bzip2/libbzip2 version 1.0 of 21 March 2000
This program is based on (at least) the work of:
Mike Burrows
--*/
#define BZ_LCCWIN32 0
-#if defined(_WIN32) && !defined(__CYGWIN32__)
-#undef BZ_LCCWIN32
+#if defined(_WIN32) && !defined(__CYGWIN__)
+#undef BZ_LCCWIN32
#define BZ_LCCWIN32 1
-#undef BZ_UNIX
+#undef BZ_UNIX
#define BZ_UNIX 0
#endif
ERROR_IF_MINUS_ONE ( retVal ); \
} while ( 0 )
# endif
+# ifdef __CYGWIN__
+# include <io.h>
+# include <fcntl.h>
+# undef SET_BINARY_MODE
+# define SET_BINARY_MODE(fd) \
+ do { \
+ int retVal = setmode ( fileno ( fd ), \
+ O_BINARY ); \
+ ERROR_IF_MINUS_ONE ( retVal ); \
+ } while ( 0 )
+# endif
#endif
/*---------------------------------------------------*/
Int32 verbosity;
-Bool keepInputFiles, smallMode;
-Bool forceOverwrite, testFailsExist, noisy;
+Bool keepInputFiles, smallMode, deleteOutputOnInterrupt;
+Bool forceOverwrite, testFailsExist, unzFailsExist, noisy;
Int32 numFileNames, numFilesProcessed, blockSize100k;
-
+Int32 exitValue;
/*-- source modes; F==file, I==stdin, O==stdout --*/
#define SM_I2O 1
FILE *outputHandleJustInCase;
Int32 workFactor;
-void panic ( Char* ) NORETURN;
-void ioError ( void ) NORETURN;
-void outOfMemory ( void ) NORETURN;
-void blockOverrun ( void ) NORETURN;
-void badBlockHeader ( void ) NORETURN;
-void badBGLengths ( void ) NORETURN;
-void crcError ( void ) NORETURN;
-void bitStreamEOF ( void ) NORETURN;
-void cleanUpAndFail ( Int32 ) NORETURN;
-void compressedStreamEOF ( void ) NORETURN;
+static void panic ( Char* ) NORETURN;
+static void ioError ( void ) NORETURN;
+static void outOfMemory ( void ) NORETURN;
+static void configError ( void ) NORETURN;
+static void crcError ( void ) NORETURN;
+static void cleanUpAndFail ( Int32 ) NORETURN;
+static void compressedStreamEOF ( void ) NORETURN;
-void copyFileName ( Char*, Char* );
-void* myMalloc ( Int32 );
+static void copyFileName ( Char*, Char* );
+static void* myMalloc ( Int32 );
+/*---------------------------------------------------*/
+/*--- An implementation of 64-bit ints. Sigh. ---*/
+/*--- Roll on widespread deployment of ANSI C9X ! ---*/
+/*---------------------------------------------------*/
+
+typedef
+ struct { UChar b[8]; }
+ UInt64;
+
+static
+void uInt64_from_UInt32s ( UInt64* n, UInt32 lo32, UInt32 hi32 )
+{
+ n->b[7] = (UChar)((hi32 >> 24) & 0xFF);
+ n->b[6] = (UChar)((hi32 >> 16) & 0xFF);
+ n->b[5] = (UChar)((hi32 >> 8) & 0xFF);
+ n->b[4] = (UChar) (hi32 & 0xFF);
+ n->b[3] = (UChar)((lo32 >> 24) & 0xFF);
+ n->b[2] = (UChar)((lo32 >> 16) & 0xFF);
+ n->b[1] = (UChar)((lo32 >> 8) & 0xFF);
+ n->b[0] = (UChar) (lo32 & 0xFF);
+}
+
+static
+double uInt64_to_double ( UInt64* n )
+{
+ Int32 i;
+ double base = 1.0;
+ double sum = 0.0;
+ for (i = 0; i < 8; i++) {
+ sum += base * (double)(n->b[i]);
+ base *= 256.0;
+ }
+ return sum;
+}
+
+static
+void uInt64_add ( UInt64* src, UInt64* dst )
+{
+ Int32 i;
+ Int32 carry = 0;
+ for (i = 0; i < 8; i++) {
+ carry += ( ((Int32)src->b[i]) + ((Int32)dst->b[i]) );
+ dst->b[i] = (UChar)(carry & 0xFF);
+ carry >>= 8;
+ }
+}
+
+static
+void uInt64_sub ( UInt64* src, UInt64* dst )
+{
+ Int32 t, i;
+ Int32 borrow = 0;
+ for (i = 0; i < 8; i++) {
+ t = ((Int32)dst->b[i]) - ((Int32)src->b[i]) - borrow;
+ if (t < 0) {
+ dst->b[i] = (UChar)(t + 256);
+ borrow = 1;
+ } else {
+ dst->b[i] = (UChar)t;
+ borrow = 0;
+ }
+ }
+}
+
+static
+void uInt64_mul ( UInt64* a, UInt64* b, UInt64* r_hi, UInt64* r_lo )
+{
+ UChar sum[16];
+ Int32 ia, ib, carry;
+ for (ia = 0; ia < 16; ia++) sum[ia] = 0;
+ for (ia = 0; ia < 8; ia++) {
+ carry = 0;
+ for (ib = 0; ib < 8; ib++) {
+ carry += ( ((Int32)sum[ia+ib])
+ + ((Int32)a->b[ia]) * ((Int32)b->b[ib]) );
+ sum[ia+ib] = (UChar)(carry & 0xFF);
+ carry >>= 8;
+ }
+ sum[ia+8] = (UChar)(carry & 0xFF);
+ if ((carry >>= 8) != 0) panic ( "uInt64_mul" );
+ }
+
+ for (ia = 0; ia < 8; ia++) r_hi->b[ia] = sum[ia+8];
+ for (ia = 0; ia < 8; ia++) r_lo->b[ia] = sum[ia];
+}
+
+
+static
+void uInt64_shr1 ( UInt64* n )
+{
+ Int32 i;
+ for (i = 0; i < 8; i++) {
+ n->b[i] >>= 1;
+ if (i < 7 && (n->b[i+1] & 1)) n->b[i] |= 0x80;
+ }
+}
+
+static
+void uInt64_shl1 ( UInt64* n )
+{
+ Int32 i;
+ for (i = 7; i >= 0; i--) {
+ n->b[i] <<= 1;
+ if (i > 0 && (n->b[i-1] & 0x80)) n->b[i]++;
+ }
+}
+
+static
+Bool uInt64_isZero ( UInt64* n )
+{
+ Int32 i;
+ for (i = 0; i < 8; i++)
+ if (n->b[i] != 0) return 0;
+ return 1;
+}
+
+static
+Int32 uInt64_qrm10 ( UInt64* n )
+{
+ /* Divide *n by 10, and return the remainder. Long division
+ is difficult, so we cheat and instead multiply by
+ 0xCCCC CCCC CCCC CCCD, which is 0.8 (viz, 0.1 << 3).
+ */
+ Int32 i;
+ UInt64 tmp1, tmp2, n_orig, zero_point_eight;
+
+ zero_point_eight.b[1] = zero_point_eight.b[2] =
+ zero_point_eight.b[3] = zero_point_eight.b[4] =
+ zero_point_eight.b[5] = zero_point_eight.b[6] =
+ zero_point_eight.b[7] = 0xCC;
+ zero_point_eight.b[0] = 0xCD;
+
+ n_orig = *n;
+
+ /* divide n by 10,
+ by multiplying by 0.8 and then shifting right 3 times */
+ uInt64_mul ( n, &zero_point_eight, &tmp1, &tmp2 );
+ uInt64_shr1(&tmp1); uInt64_shr1(&tmp1); uInt64_shr1(&tmp1);
+ *n = tmp1;
+
+ /* tmp1 = 8*n, tmp2 = 2*n */
+ uInt64_shl1(&tmp1); uInt64_shl1(&tmp1); uInt64_shl1(&tmp1);
+ tmp2 = *n; uInt64_shl1(&tmp2);
+
+ /* tmp1 = 10*n */
+ uInt64_add ( &tmp2, &tmp1 );
+
+ /* n_orig = n_orig - 10*n */
+ uInt64_sub ( &tmp1, &n_orig );
+
+ /* n_orig should now hold quotient, in range 0 .. 9 */
+ for (i = 7; i >= 1; i--)
+ if (n_orig.b[i] != 0) panic ( "uInt64_qrm10(1)" );
+ if (n_orig.b[0] > 9)
+ panic ( "uInt64_qrm10(2)" );
+
+ return (int)n_orig.b[0];
+}
+
+/* ... and the Whole Entire Point of all this UInt64 stuff is
+ so that we can supply the following function.
+*/
+static
+void uInt64_toAscii ( char* outbuf, UInt64* n )
+{
+ Int32 i, q;
+ UChar buf[32];
+ Int32 nBuf = 0;
+ UInt64 n_copy = *n;
+ do {
+ q = uInt64_qrm10 ( &n_copy );
+ buf[nBuf] = q + '0';
+ nBuf++;
+ } while (!uInt64_isZero(&n_copy));
+ outbuf[nBuf] = 0;
+ for (i = 0; i < nBuf; i++) outbuf[i] = buf[nBuf-i-1];
+}
+
+
/*---------------------------------------------------*/
/*--- Processing of complete files and streams ---*/
/*---------------------------------------------------*/
/*---------------------------------------------*/
+static
Bool myfeof ( FILE* f )
{
Int32 c = fgetc ( f );
/*---------------------------------------------*/
+static
void compressStream ( FILE *stream, FILE *zStream )
{
BZFILE* bzf = NULL;
UChar ibuf[5000];
Int32 nIbuf;
- UInt32 nbytes_in, nbytes_out;
+ UInt32 nbytes_in_lo32, nbytes_in_hi32;
+ UInt32 nbytes_out_lo32, nbytes_out_hi32;
Int32 bzerr, bzerr_dummy, ret;
SET_BINARY_MODE(stream);
if (ferror(stream)) goto errhandler_io;
if (ferror(zStream)) goto errhandler_io;
- bzf = bzWriteOpen ( &bzerr, zStream,
- blockSize100k, verbosity, workFactor );
+ bzf = BZ2_bzWriteOpen ( &bzerr, zStream,
+ blockSize100k, verbosity, workFactor );
if (bzerr != BZ_OK) goto errhandler;
if (verbosity >= 2) fprintf ( stderr, "\n" );
if (myfeof(stream)) break;
nIbuf = fread ( ibuf, sizeof(UChar), 5000, stream );
if (ferror(stream)) goto errhandler_io;
- if (nIbuf > 0) bzWrite ( &bzerr, bzf, (void*)ibuf, nIbuf );
+ if (nIbuf > 0) BZ2_bzWrite ( &bzerr, bzf, (void*)ibuf, nIbuf );
if (bzerr != BZ_OK) goto errhandler;
}
- bzWriteClose ( &bzerr, bzf, 0, &nbytes_in, &nbytes_out );
+ BZ2_bzWriteClose64 ( &bzerr, bzf, 0,
+ &nbytes_in_lo32, &nbytes_in_hi32,
+ &nbytes_out_lo32, &nbytes_out_hi32 );
if (bzerr != BZ_OK) goto errhandler;
if (ferror(zStream)) goto errhandler_io;
ret = fclose ( stream );
if (ret == EOF) goto errhandler_io;
- if (nbytes_in == 0) nbytes_in = 1;
+ if (nbytes_in_lo32 == 0 && nbytes_in_hi32 == 0)
+ nbytes_in_lo32 = 1;
- if (verbosity >= 1)
+ if (verbosity >= 1) {
+ Char buf_nin[32], buf_nout[32];
+ UInt64 nbytes_in, nbytes_out;
+ double nbytes_in_d, nbytes_out_d;
+ uInt64_from_UInt32s ( &nbytes_in,
+ nbytes_in_lo32, nbytes_in_hi32 );
+ uInt64_from_UInt32s ( &nbytes_out,
+ nbytes_out_lo32, nbytes_out_hi32 );
+ nbytes_in_d = uInt64_to_double ( &nbytes_in );
+ nbytes_out_d = uInt64_to_double ( &nbytes_out );
+ uInt64_toAscii ( buf_nin, &nbytes_in );
+ uInt64_toAscii ( buf_nout, &nbytes_out );
fprintf ( stderr, "%6.3f:1, %6.3f bits/byte, "
- "%5.2f%% saved, %d in, %d out.\n",
- (float)nbytes_in / (float)nbytes_out,
- (8.0 * (float)nbytes_out) / (float)nbytes_in,
- 100.0 * (1.0 - (float)nbytes_out / (float)nbytes_in),
- nbytes_in,
- nbytes_out
+ "%5.2f%% saved, %s in, %s out.\n",
+ nbytes_in_d / nbytes_out_d,
+ (8.0 * nbytes_out_d) / nbytes_in_d,
+ 100.0 * (1.0 - nbytes_out_d / nbytes_in_d),
+ buf_nin,
+ buf_nout
);
+ }
return;
errhandler:
- bzWriteClose ( &bzerr_dummy, bzf, 1, &nbytes_in, &nbytes_out );
+ BZ2_bzWriteClose64 ( &bzerr_dummy, bzf, 1,
+ &nbytes_in_lo32, &nbytes_in_hi32,
+ &nbytes_out_lo32, &nbytes_out_hi32 );
switch (bzerr) {
+ case BZ_CONFIG_ERROR:
+ configError(); break;
case BZ_MEM_ERROR:
- outOfMemory ();
+ outOfMemory (); break;
case BZ_IO_ERROR:
errhandler_io:
ioError(); break;
/*---------------------------------------------*/
+static
Bool uncompressStream ( FILE *zStream, FILE *stream )
{
BZFILE* bzf = NULL;
while (True) {
- bzf = bzReadOpen (
+ bzf = BZ2_bzReadOpen (
&bzerr, zStream, verbosity,
(int)smallMode, unused, nUnused
);
streamNo++;
while (bzerr == BZ_OK) {
- nread = bzRead ( &bzerr, bzf, obuf, 5000 );
+ nread = BZ2_bzRead ( &bzerr, bzf, obuf, 5000 );
if (bzerr == BZ_DATA_ERROR_MAGIC) goto errhandler;
if ((bzerr == BZ_OK || bzerr == BZ_STREAM_END) && nread > 0)
fwrite ( obuf, sizeof(UChar), nread, stream );
}
if (bzerr != BZ_STREAM_END) goto errhandler;
- bzReadGetUnused ( &bzerr, bzf, (void**)(&unusedTmp), &nUnused );
+ BZ2_bzReadGetUnused ( &bzerr, bzf, (void**)(&unusedTmp), &nUnused );
if (bzerr != BZ_OK) panic ( "decompress:bzReadGetUnused" );
for (i = 0; i < nUnused; i++) unused[i] = unusedTmp[i];
- bzReadClose ( &bzerr, bzf );
+ BZ2_bzReadClose ( &bzerr, bzf );
if (bzerr != BZ_OK) panic ( "decompress:bzReadGetUnused" );
if (nUnused == 0 && myfeof(zStream)) break;
return True;
errhandler:
- bzReadClose ( &bzerr_dummy, bzf );
+ BZ2_bzReadClose ( &bzerr_dummy, bzf );
switch (bzerr) {
+ case BZ_CONFIG_ERROR:
+ configError(); break;
case BZ_IO_ERROR:
errhandler_io:
ioError(); break;
case BZ_UNEXPECTED_EOF:
compressedStreamEOF();
case BZ_DATA_ERROR_MAGIC:
+ if (zStream != stdin) fclose(zStream);
+ if (stream != stdout) fclose(stream);
if (streamNo == 1) {
return False;
} else {
/*---------------------------------------------*/
+static
Bool testStream ( FILE *zStream )
{
BZFILE* bzf = NULL;
while (True) {
- bzf = bzReadOpen (
+ bzf = BZ2_bzReadOpen (
&bzerr, zStream, verbosity,
(int)smallMode, unused, nUnused
);
streamNo++;
while (bzerr == BZ_OK) {
- nread = bzRead ( &bzerr, bzf, obuf, 5000 );
+ nread = BZ2_bzRead ( &bzerr, bzf, obuf, 5000 );
if (bzerr == BZ_DATA_ERROR_MAGIC) goto errhandler;
}
if (bzerr != BZ_STREAM_END) goto errhandler;
- bzReadGetUnused ( &bzerr, bzf, (void**)(&unusedTmp), &nUnused );
+ BZ2_bzReadGetUnused ( &bzerr, bzf, (void**)(&unusedTmp), &nUnused );
if (bzerr != BZ_OK) panic ( "test:bzReadGetUnused" );
for (i = 0; i < nUnused; i++) unused[i] = unusedTmp[i];
- bzReadClose ( &bzerr, bzf );
+ BZ2_bzReadClose ( &bzerr, bzf );
if (bzerr != BZ_OK) panic ( "test:bzReadGetUnused" );
if (nUnused == 0 && myfeof(zStream)) break;
return True;
errhandler:
- bzReadClose ( &bzerr_dummy, bzf );
+ BZ2_bzReadClose ( &bzerr_dummy, bzf );
if (verbosity == 0)
fprintf ( stderr, "%s: %s: ", progName, inName );
switch (bzerr) {
+ case BZ_CONFIG_ERROR:
+ configError(); break;
case BZ_IO_ERROR:
errhandler_io:
ioError(); break;
"file ends unexpectedly\n" );
return False;
case BZ_DATA_ERROR_MAGIC:
+ if (zStream != stdin) fclose(zStream);
if (streamNo == 1) {
fprintf ( stderr,
"bad magic number (file not created by bzip2)\n" );
/*---------------------------------------------------*/
/*---------------------------------------------*/
+static
+void setExit ( Int32 v )
+{
+ if (v > exitValue) exitValue = v;
+}
+
+
+/*---------------------------------------------*/
+static
void cadvise ( void )
{
if (noisy)
/*---------------------------------------------*/
+static
void showFileNames ( void )
{
if (noisy)
/*---------------------------------------------*/
+static
void cleanUpAndFail ( Int32 ec )
{
IntNative retVal;
- if ( srcMode == SM_F2F && opMode != OM_TEST ) {
+ if ( srcMode == SM_F2F
+ && opMode != OM_TEST
+ && deleteOutputOnInterrupt ) {
if (noisy)
fprintf ( stderr, "%s: Deleting output file %s, if it exists.\n",
progName, outName );
progName, numFileNames,
numFileNames - numFilesProcessed );
}
- exit ( ec );
+ setExit(ec);
+ exit(exitValue);
}
/*---------------------------------------------*/
+static
void panic ( Char* s )
{
fprintf ( stderr,
/*---------------------------------------------*/
+static
void crcError ( void )
{
fprintf ( stderr,
/*---------------------------------------------*/
+static
void compressedStreamEOF ( void )
{
fprintf ( stderr,
/*---------------------------------------------*/
+static
void ioError ( void )
{
fprintf ( stderr,
- "\n%s: I/O or other error, bailing out. Possible reason follows.\n",
+ "\n%s: I/O or other error, bailing out. "
+ "Possible reason follows.\n",
progName );
perror ( progName );
showFileNames();
/*---------------------------------------------*/
+static
void mySignalCatcher ( IntNative n )
{
fprintf ( stderr,
/*---------------------------------------------*/
+static
void mySIGSEGVorSIGBUScatcher ( IntNative n )
{
if (opMode == OM_Z)
- fprintf ( stderr,
- "\n%s: Caught a SIGSEGV or SIGBUS whilst compressing,\n"
- "\twhich probably indicates a bug in bzip2. Please\n"
- "\treport it to me at: jseward@acm.org\n",
- progName );
+ fprintf (
+ stderr,
+ "\n%s: Caught a SIGSEGV or SIGBUS whilst compressing.\n"
+ "\n"
+ " Possible causes are (most likely first):\n"
+ " (1) This computer has unreliable memory or cache hardware\n"
+ " (a surprisingly common problem; try a different machine.)\n"
+ " (2) A bug in the compiler used to create this executable\n"
+ " (unlikely, if you didn't compile bzip2 yourself.)\n"
+ " (3) A real bug in bzip2 -- I hope this should never be the case.\n"
+ " The user's manual, Section 4.3, has more info on (1) and (2).\n"
+ " \n"
+ " If you suspect this is a bug in bzip2, or are unsure about (1)\n"
+ " or (2), feel free to report it to me at: jseward@acm.org.\n"
+ " Section 4.3 of the user's manual describes the info a useful\n"
+ " bug report should have. If the manual is available on your\n"
+ " system, please try and read it before mailing me. If you don't\n"
+ " have the manual or can't be bothered to read it, mail me anyway.\n"
+ "\n",
+ progName );
else
- fprintf ( stderr,
- "\n%s: Caught a SIGSEGV or SIGBUS whilst decompressing,\n"
- "\twhich probably indicates that the compressed data\n"
- "\tis corrupted.\n",
- progName );
+ fprintf (
+ stderr,
+ "\n%s: Caught a SIGSEGV or SIGBUS whilst decompressing.\n"
+ "\n"
+ " Possible causes are (most likely first):\n"
+ " (1) The compressed data is corrupted, and bzip2's usual checks\n"
+ " failed to detect this. Try bzip2 -tvv my_file.bz2.\n"
+ " (2) This computer has unreliable memory or cache hardware\n"
+ " (a surprisingly common problem; try a different machine.)\n"
+ " (3) A bug in the compiler used to create this executable\n"
+ " (unlikely, if you didn't compile bzip2 yourself.)\n"
+ " (4) A real bug in bzip2 -- I hope this should never be the case.\n"
+ " The user's manual, Section 4.3, has more info on (2) and (3).\n"
+ " \n"
+ " If you suspect this is a bug in bzip2, or are unsure about (2)\n"
+ " or (3), feel free to report it to me at: jseward@acm.org.\n"
+ " Section 4.3 of the user's manual describes the info a useful\n"
+ " bug report should have. If the manual is available on your\n"
+ " system, please try and read it before mailing me. If you don't\n"
+ " have the manual or can't be bothered to read it, mail me anyway.\n"
+ "\n",
+ progName );
showFileNames();
if (opMode == OM_Z)
/*---------------------------------------------*/
+static
void outOfMemory ( void )
{
fprintf ( stderr,
}
+/*---------------------------------------------*/
+static
+void configError ( void )
+{
+ fprintf ( stderr,
+ "bzip2: I'm not configured correctly for this platform!\n"
+ "\tI require Int32, Int16 and Char to have sizes\n"
+ "\tof 4, 2 and 1 bytes to run properly, and they don't.\n"
+ "\tProbably you can fix this by defining them correctly,\n"
+ "\tand recompiling. Bye!\n" );
+ setExit(3);
+ exit(exitValue);
+}
+
+
/*---------------------------------------------------*/
/*--- The main driver machinery ---*/
/*---------------------------------------------------*/
/*---------------------------------------------*/
+static
void pad ( Char *s )
{
Int32 i;
/*---------------------------------------------*/
+static
void copyFileName ( Char* to, Char* from )
{
if ( strlen(from) > FILE_NAME_LEN-10 ) {
"Try using a reasonable file name instead. Sorry! :-)\n",
from, FILE_NAME_LEN-10
);
- exit(1);
+ setExit(1);
+ exit(exitValue);
}
strncpy(to,from,FILE_NAME_LEN-10);
/*---------------------------------------------*/
+static
Bool fileExists ( Char* name )
{
FILE *tmp = fopen ( name, "rb" );
/*--
if in doubt, return True
--*/
+static
Bool notAStandardFile ( Char* name )
{
IntNative i;
/*--
rac 11/21/98 see if file has hard links to it
--*/
+static
Int32 countHardLinks ( Char* name )
{
IntNative i;
/*---------------------------------------------*/
+static
void copyDatePermissionsAndOwner ( Char *srcName, Char *dstName )
{
#if BZ_UNIX
/*---------------------------------------------*/
+static
void setInterimPermissions ( Char *dstName )
{
#if BZ_UNIX
/*---------------------------------------------*/
+static
Bool containsDubiousChars ( Char* name )
{
Bool cdc = False;
Char* unzSuffix[BZ_N_SUFFIX_PAIRS]
= { "", "", ".tar", ".tar" };
+static
Bool hasSuffix ( Char* s, Char* suffix )
{
Int32 ns = strlen(s);
return False;
}
+static
Bool mapSuffix ( Char* name,
Char* oldSuffix, Char* newSuffix )
{
/*---------------------------------------------*/
+static
void compress ( Char *name )
{
FILE *inStr;
FILE *outStr;
Int32 n, i;
+
+ deleteOutputOnInterrupt = False;
+
if (name == NULL && srcMode != SM_I2O)
panic ( "compress: bad modes\n" );
if ( srcMode != SM_I2O && containsDubiousChars ( inName ) ) {
if (noisy)
fprintf ( stderr, "%s: There are no files matching `%s'.\n",
- progName, inName );
+ progName, inName );
+ setExit(1);
return;
}
if ( srcMode != SM_I2O && !fileExists ( inName ) ) {
fprintf ( stderr, "%s: Can't open input file %s: %s.\n",
progName, inName, strerror(errno) );
+ setExit(1);
return;
}
for (i = 0; i < BZ_N_SUFFIX_PAIRS; i++) {
fprintf ( stderr,
"%s: Input file %s already has %s suffix.\n",
progName, inName, zSuffix[i] );
+ setExit(1);
return;
}
}
if (noisy)
fprintf ( stderr, "%s: Input file %s is not a normal file.\n",
progName, inName );
+ setExit(1);
return;
}
if ( srcMode == SM_F2F && !forceOverwrite && fileExists ( outName ) ) {
fprintf ( stderr, "%s: Output file %s already exists.\n",
progName, outName );
+ setExit(1);
return;
}
if ( srcMode == SM_F2F && !forceOverwrite &&
(n=countHardLinks ( inName )) > 0) {
fprintf ( stderr, "%s: Input file %s has %d other link%s.\n",
progName, inName, n, n > 1 ? "s" : "" );
+ setExit(1);
return;
}
progName );
fprintf ( stderr, "%s: For help, type: `%s --help'.\n",
progName, progName );
+ setExit(1);
return;
};
break;
fprintf ( stderr, "%s: For help, type: `%s --help'.\n",
progName, progName );
if ( inStr != NULL ) fclose ( inStr );
+ setExit(1);
return;
};
if ( inStr == NULL ) {
fprintf ( stderr, "%s: Can't open input file %s: %s.\n",
progName, inName, strerror(errno) );
+ setExit(1);
return;
};
break;
fprintf ( stderr, "%s: Can't create output file %s: %s.\n",
progName, outName, strerror(errno) );
if ( inStr != NULL ) fclose ( inStr );
+ setExit(1);
return;
}
if ( inStr == NULL ) {
fprintf ( stderr, "%s: Can't open input file %s: %s.\n",
progName, inName, strerror(errno) );
if ( outStr != NULL ) fclose ( outStr );
+ setExit(1);
return;
};
setInterimPermissions ( outName );
/*--- Now the input and output handles are sane. Do the Biz. ---*/
outputHandleJustInCase = outStr;
+ deleteOutputOnInterrupt = True;
compressStream ( inStr, outStr );
outputHandleJustInCase = NULL;
/*--- If there was an I/O error, we won't get here. ---*/
if ( srcMode == SM_F2F ) {
copyDatePermissionsAndOwner ( inName, outName );
+ deleteOutputOnInterrupt = False;
if ( !keepInputFiles ) {
IntNative retVal = remove ( inName );
ERROR_IF_NOT_ZERO ( retVal );
}
}
+
+ deleteOutputOnInterrupt = False;
}
/*---------------------------------------------*/
+static
void uncompress ( Char *name )
{
FILE *inStr;
Bool magicNumberOK;
Bool cantGuess;
+ deleteOutputOnInterrupt = False;
+
if (name == NULL && srcMode != SM_I2O)
panic ( "uncompress: bad modes\n" );
if (noisy)
fprintf ( stderr, "%s: There are no files matching `%s'.\n",
progName, inName );
+ setExit(1);
return;
}
if ( srcMode != SM_I2O && !fileExists ( inName ) ) {
fprintf ( stderr, "%s: Can't open input file %s: %s.\n",
progName, inName, strerror(errno) );
+ setExit(1);
return;
}
if ( srcMode == SM_F2F && !forceOverwrite && notAStandardFile ( inName )) {
if (noisy)
fprintf ( stderr, "%s: Input file %s is not a normal file.\n",
progName, inName );
+ setExit(1);
return;
}
if ( /* srcMode == SM_F2F implied && */ cantGuess ) {
if ( srcMode == SM_F2F && !forceOverwrite && fileExists ( outName ) ) {
fprintf ( stderr, "%s: Output file %s already exists.\n",
progName, outName );
+ setExit(1);
return;
}
if ( srcMode == SM_F2F && !forceOverwrite &&
(n=countHardLinks ( inName ) ) > 0) {
fprintf ( stderr, "%s: Input file %s has %d other link%s.\n",
progName, inName, n, n > 1 ? "s" : "" );
+ setExit(1);
return;
}
progName );
fprintf ( stderr, "%s: For help, type: `%s --help'.\n",
progName, progName );
+ setExit(1);
return;
};
break;
fprintf ( stderr, "%s: Can't open input file %s:%s.\n",
progName, inName, strerror(errno) );
if ( inStr != NULL ) fclose ( inStr );
+ setExit(1);
return;
};
break;
fprintf ( stderr, "%s: Can't create output file %s: %s.\n",
progName, outName, strerror(errno) );
if ( inStr != NULL ) fclose ( inStr );
+ setExit(1);
return;
}
if ( inStr == NULL ) {
fprintf ( stderr, "%s: Can't open input file %s: %s.\n",
progName, inName, strerror(errno) );
if ( outStr != NULL ) fclose ( outStr );
+ setExit(1);
return;
};
setInterimPermissions ( outName );
/*--- Now the input and output handles are sane. Do the Biz. ---*/
outputHandleJustInCase = outStr;
+ deleteOutputOnInterrupt = True;
magicNumberOK = uncompressStream ( inStr, outStr );
outputHandleJustInCase = NULL;
if ( magicNumberOK ) {
if ( srcMode == SM_F2F ) {
copyDatePermissionsAndOwner ( inName, outName );
+ deleteOutputOnInterrupt = False;
if ( !keepInputFiles ) {
IntNative retVal = remove ( inName );
ERROR_IF_NOT_ZERO ( retVal );
}
}
} else {
+ unzFailsExist = True;
+ deleteOutputOnInterrupt = False;
if ( srcMode == SM_F2F ) {
IntNative retVal = remove ( outName );
ERROR_IF_NOT_ZERO ( retVal );
}
}
+ deleteOutputOnInterrupt = False;
if ( magicNumberOK ) {
if (verbosity >= 1)
fprintf ( stderr, "done\n" );
} else {
+ setExit(2);
if (verbosity >= 1)
fprintf ( stderr, "not a bzip2 file.\n" ); else
fprintf ( stderr,
/*---------------------------------------------*/
+static
void testf ( Char *name )
{
FILE *inStr;
Bool allOK;
+ deleteOutputOnInterrupt = False;
+
if (name == NULL && srcMode != SM_I2O)
panic ( "testf: bad modes\n" );
if (noisy)
fprintf ( stderr, "%s: There are no files matching `%s'.\n",
progName, inName );
+ setExit(1);
return;
}
if ( srcMode != SM_I2O && !fileExists ( inName ) ) {
fprintf ( stderr, "%s: Can't open input %s: %s.\n",
progName, inName, strerror(errno) );
+ setExit(1);
return;
}
progName );
fprintf ( stderr, "%s: For help, type: `%s --help'.\n",
progName, progName );
+ setExit(1);
return;
};
inStr = stdin;
if ( inStr == NULL ) {
fprintf ( stderr, "%s: Can't open input file %s:%s.\n",
progName, inName, strerror(errno) );
+ setExit(1);
return;
};
break;
/*---------------------------------------------*/
+static
void license ( void )
{
fprintf ( stderr,
"bzip2, a block-sorting file compressor. "
- "Version 0.9.5d, 4-Sept-99.\n"
+ "Version %s.\n"
" \n"
- " Copyright (C) 1996, 1997, 1998, 1999 by Julian Seward.\n"
+ " Copyright (C) 1996-2000 by Julian Seward.\n"
" \n"
" This program is free software; you can redistribute it and/or modify\n"
" it under the terms set out in the LICENSE file, which is included\n"
- " in the bzip2-0.9.5 source distribution.\n"
+ " in the bzip2-1.0 source distribution.\n"
" \n"
" This program is distributed in the hope that it will be useful,\n"
" but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n"
" LICENSE file for more details.\n"
- " \n"
+ " \n",
+ BZ2_bzlibVersion()
);
}
/*---------------------------------------------*/
+static
void usage ( Char *fullProgName )
{
fprintf (
stderr,
"bzip2, a block-sorting file compressor. "
- "Version 0.9.5d, 4-Sept-99.\n"
+ "Version %s.\n"
"\n usage: %s [flags and input files in any order]\n"
"\n"
" -h --help print this message\n"
#endif
,
+ BZ2_bzlibVersion(),
fullProgName
);
}
/*---------------------------------------------*/
+static
void redundant ( Char* flag )
{
fprintf (
/*---------------------------------------------*/
+static
void *myMalloc ( Int32 n )
{
void* p;
/*---------------------------------------------*/
+static
Cell *mkCell ( void )
{
Cell *c;
/*---------------------------------------------*/
+static
Cell *snocString ( Cell *root, Char *name )
{
if (root == NULL) {
/*---------------------------------------------*/
+static
void addFlagsFromEnvVar ( Cell** argList, Char* varName )
{
Int32 i, j, k;
/*-- Be really really really paranoid :-) --*/
if (sizeof(Int32) != 4 || sizeof(UInt32) != 4 ||
sizeof(Int16) != 2 || sizeof(UInt16) != 2 ||
- sizeof(Char) != 1 || sizeof(UChar) != 1) {
- fprintf ( stderr,
- "bzip2: I'm not configured correctly for this platform!\n"
- "\tI require Int32, Int16 and Char to have sizes\n"
- "\tof 4, 2 and 1 bytes to run properly, and they don't.\n"
- "\tProbably you can fix this by defining them correctly,\n"
- "\tand recompiling. Bye!\n" );
- exit(3);
- }
-
+ sizeof(Char) != 1 || sizeof(UChar) != 1)
+ configError();
/*-- Initialise --*/
outputHandleJustInCase = NULL;
verbosity = 0;
blockSize100k = 9;
testFailsExist = False;
+ unzFailsExist = False;
numFileNames = 0;
numFilesProcessed = 0;
workFactor = 30;
+ deleteOutputOnInterrupt = False;
+ exitValue = 0;
i = j = 0; /* avoid bogus warning from egcs-1.1.X */
/*-- Set up signal handlers for mem access errors --*/
else
if (opMode == OM_UNZ) {
+ unzFailsExist = False;
if (srcMode == SM_I2O) {
uncompress ( NULL );
} else {
uncompress ( aa->name );
}
}
+ if (unzFailsExist) {
+ setExit(2);
+ exit(exitValue);
+ }
}
else {
"You can use the `bzip2recover' program to attempt to recover\n"
"data from undamaged sections of corrupted files.\n\n"
);
- exit(2);
+ setExit(2);
+ exit(exitValue);
}
}
aa = argList;
while (aa != NULL) {
Cell* aa2 = aa->link;
- if (aa->name) free(aa->name);
+ if (aa->name != NULL) free(aa->name);
free(aa);
aa = aa2;
}
- return 0;
+ return exitValue;
}
NAME
- bzip2, bunzip2 - a block-sorting file compressor, v0.9.5
+ bzip2, bunzip2 - a block-sorting file compressor, v1.0
bzcat - decompresses files to stdout
bzip2recover - recovers data from damaged bzip2 files
but the details of what the problem is sometimes seem
rather misleading.
- This manual page pertains to version 0.9.5 of bzip2. Com-
+ This manual page pertains to version 1.0 of bzip2. Com-
pressed data created by this version is entirely forwards
and backwards compatible with the previous public
- releases, versions 0.1pl2 and 0.9.0, but with the follow-
- ing exception: 0.9.0 and above can correctly decompress
- multiple concatenated compressed files. 0.1pl2 cannot do
- this; it will stop after decompressing just the first file
- in the stream.
+ releases, versions 0.1pl2, 0.9.0 and 0.9.5, but with the
+ following exception: 0.9.0 and above can correctly decom-
+ press multiple concatenated compressed files. 0.1pl2 can-
+ not do this; it will stop after decompressing just the
+ first file in the stream.
bzip2recover uses 32-bit integers to represent bit posi-
tions in compressed files, so it cannot handle compressed
AUTHOR
Julian Seward, jseward@acm.org.
+ http://sourceware.cygnus.com/bzip2
http://www.muraroa.demon.co.uk
The ideas embodied in bzip2 are due to (at least) the fol-
/*--
This program is bzip2recover, a program to attempt data
salvage from damaged files created by the accompanying
- bzip2-0.9.5 program.
+ bzip2-1.0 program.
- Copyright (C) 1996-1999 Julian R Seward. All rights reserved.
+ Copyright (C) 1996-2000 Julian R Seward. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
Julian Seward, Cambridge, UK.
jseward@acm.org
- bzip2/libbzip2 version 0.9.5 of 24 May 1999
+ bzip2/libbzip2 version 1.0 of 21 March 2000
--*/
/*--
strcpy ( progName, argv[0] );
inFileName[0] = outFileName[0] = 0;
- fprintf ( stderr, "bzip2recover 0.9.5d: extracts blocks from damaged .bz2 files.\n" );
+ fprintf ( stderr, "bzip2recover 1.0: extracts blocks from damaged .bz2 files.\n" );
if (argc != 2) {
fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n",
This file is a part of bzip2 and/or libbzip2, a program and
library for lossless, block-sorting data compression.
- Copyright (C) 1996-1999 Julian R Seward. All rights reserved.
+ Copyright (C) 1996-2000 Julian R Seward. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
Julian Seward, Cambridge, UK.
jseward@acm.org
- bzip2/libbzip2 version 0.9.5 of 24 May 1999
+ bzip2/libbzip2 version 1.0 of 21 March 2000
This program is based on (at least) the work of:
Mike Burrows
/*---------------------------------------------------*/
#ifndef BZ_NO_STDIO
-void bz__AssertH__fail ( int errcode )
+void BZ2_bz__AssertH__fail ( int errcode )
{
fprintf(stderr,
- "\n\nbzip2/libbzip2, v0.9.5d: internal error number %d.\n"
- "This is a bug in bzip2/libbzip2, v0.9.5d. Please report\n"
- "it to me at: jseward@acm.org. If this happened when\n"
- "you were using some program which uses libbzip2 as a\n"
+ "\n\nbzip2/libbzip2: internal error number %d.\n"
+ "This is a bug in bzip2/libbzip2, %s.\n"
+ "Please report it to me at: jseward@acm.org. If this happened\n"
+ "when you were using some program which uses libbzip2 as a\n"
"component, you should also report this bug to the author(s)\n"
"of that program. Please make an effort to report this bug;\n"
"timely and accurate bug reports eventually lead to higher\n"
- "quality software. Thanks. Julian Seward, 4 Sept 1999.\n\n",
- errcode
+ "quality software. Thanks. Julian Seward, 21 March 2000.\n\n",
+ errcode,
+ BZ2_bzlibVersion()
);
exit(3);
}
#endif
+/*---------------------------------------------------*/
+static
+int bz_config_ok ( void )
+{
+ if (sizeof(int) != 4) return 0;
+ if (sizeof(short) != 2) return 0;
+ if (sizeof(char) != 1) return 0;
+ return 1;
+}
+
+
/*---------------------------------------------------*/
static
void* default_bzalloc ( void* opaque, Int32 items, Int32 size )
/*---------------------------------------------------*/
-int BZ_API(bzCompressInit)
+int BZ_API(BZ2_bzCompressInit)
( bz_stream* strm,
int blockSize100k,
int verbosity,
Int32 n;
EState* s;
+ if (!bz_config_ok()) return BZ_CONFIG_ERROR;
+
if (strm == NULL ||
blockSize100k < 1 || blockSize100k > 9 ||
workFactor < 0 || workFactor > 250)
s->verbosity = verbosity;
s->workFactor = workFactor;
- s->block = (UInt16*)s->arr2;
+ s->block = (UChar*)s->arr2;
s->mtfv = (UInt16*)s->arr1;
s->zbits = NULL;
s->ptr = (UInt32*)s->arr1;
strm->state = s;
- strm->total_in = 0;
- strm->total_out = 0;
+ strm->total_in_lo32 = 0;
+ strm->total_in_hi32 = 0;
+ strm->total_out_lo32 = 0;
+ strm->total_out_hi32 = 0;
init_RL ( s );
prepare_new_block ( s );
return BZ_OK;
s->inUse[s->state_in_ch] = True;
switch (s->state_in_len) {
case 1:
- s->block[s->nblock] = (UInt16)ch; s->nblock++;
+ s->block[s->nblock] = (UChar)ch; s->nblock++;
break;
case 2:
- s->block[s->nblock] = (UInt16)ch; s->nblock++;
- s->block[s->nblock] = (UInt16)ch; s->nblock++;
+ s->block[s->nblock] = (UChar)ch; s->nblock++;
+ s->block[s->nblock] = (UChar)ch; s->nblock++;
break;
case 3:
- s->block[s->nblock] = (UInt16)ch; s->nblock++;
- s->block[s->nblock] = (UInt16)ch; s->nblock++;
- s->block[s->nblock] = (UInt16)ch; s->nblock++;
+ s->block[s->nblock] = (UChar)ch; s->nblock++;
+ s->block[s->nblock] = (UChar)ch; s->nblock++;
+ s->block[s->nblock] = (UChar)ch; s->nblock++;
break;
default:
s->inUse[s->state_in_len-4] = True;
- s->block[s->nblock] = (UInt16)ch; s->nblock++;
- s->block[s->nblock] = (UInt16)ch; s->nblock++;
- s->block[s->nblock] = (UInt16)ch; s->nblock++;
- s->block[s->nblock] = (UInt16)ch; s->nblock++;
- s->block[s->nblock] = ((UInt16)(s->state_in_len-4));
+ s->block[s->nblock] = (UChar)ch; s->nblock++;
+ s->block[s->nblock] = (UChar)ch; s->nblock++;
+ s->block[s->nblock] = (UChar)ch; s->nblock++;
+ s->block[s->nblock] = (UChar)ch; s->nblock++;
+ s->block[s->nblock] = ((UChar)(s->state_in_len-4));
s->nblock++;
break;
}
UChar ch = (UChar)(zs->state_in_ch); \
BZ_UPDATE_CRC( zs->blockCRC, ch ); \
zs->inUse[zs->state_in_ch] = True; \
- zs->block[zs->nblock] = (UInt16)ch; \
+ zs->block[zs->nblock] = (UChar)ch; \
zs->nblock++; \
zs->state_in_ch = zchh; \
} \
ADD_CHAR_TO_BLOCK ( s, (UInt32)(*((UChar*)(s->strm->next_in))) );
s->strm->next_in++;
s->strm->avail_in--;
- s->strm->total_in++;
+ s->strm->total_in_lo32++;
+ if (s->strm->total_in_lo32 == 0) s->strm->total_in_hi32++;
}
} else {
ADD_CHAR_TO_BLOCK ( s, (UInt32)(*((UChar*)(s->strm->next_in))) );
s->strm->next_in++;
s->strm->avail_in--;
- s->strm->total_in++;
+ s->strm->total_in_lo32++;
+ if (s->strm->total_in_lo32 == 0) s->strm->total_in_hi32++;
s->avail_in_expect--;
}
}
s->state_out_pos++;
s->strm->avail_out--;
s->strm->next_out++;
- s->strm->total_out++;
-
+ s->strm->total_out_lo32++;
+ if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++;
}
return progress_out;
progress_in |= copy_input_until_stop ( s );
if (s->mode != BZ_M_RUNNING && s->avail_in_expect == 0) {
flush_RL ( s );
- compressBlock ( s, (Bool)(s->mode == BZ_M_FINISHING) );
+ BZ2_compressBlock ( s, (Bool)(s->mode == BZ_M_FINISHING) );
s->state = BZ_S_OUTPUT;
}
else
if (s->nblock >= s->nblockMAX) {
- compressBlock ( s, False );
+ BZ2_compressBlock ( s, False );
s->state = BZ_S_OUTPUT;
}
else
/*---------------------------------------------------*/
-int BZ_API(bzCompress) ( bz_stream *strm, int action )
+int BZ_API(BZ2_bzCompress) ( bz_stream *strm, int action )
{
Bool progress;
EState* s;
case BZ_M_FLUSHING:
if (action != BZ_FLUSH) return BZ_SEQUENCE_ERROR;
- if (s->avail_in_expect != s->strm->avail_in) return BZ_SEQUENCE_ERROR;
+ if (s->avail_in_expect != s->strm->avail_in)
+ return BZ_SEQUENCE_ERROR;
progress = handle_compress ( strm );
if (s->avail_in_expect > 0 || !isempty_RL(s) ||
s->state_out_pos < s->numZ) return BZ_FLUSH_OK;
case BZ_M_FINISHING:
if (action != BZ_FINISH) return BZ_SEQUENCE_ERROR;
- if (s->avail_in_expect != s->strm->avail_in) return BZ_SEQUENCE_ERROR;
+ if (s->avail_in_expect != s->strm->avail_in)
+ return BZ_SEQUENCE_ERROR;
progress = handle_compress ( strm );
if (!progress) return BZ_SEQUENCE_ERROR;
if (s->avail_in_expect > 0 || !isempty_RL(s) ||
/*---------------------------------------------------*/
-int BZ_API(bzCompressEnd) ( bz_stream *strm )
+int BZ_API(BZ2_bzCompressEnd) ( bz_stream *strm )
{
EState* s;
if (strm == NULL) return BZ_PARAM_ERROR;
/*---------------------------------------------------*/
/*---------------------------------------------------*/
-int BZ_API(bzDecompressInit)
+int BZ_API(BZ2_bzDecompressInit)
( bz_stream* strm,
int verbosity,
int small )
{
DState* s;
+ if (!bz_config_ok()) return BZ_CONFIG_ERROR;
+
if (strm == NULL) return BZ_PARAM_ERROR;
if (small != 0 && small != 1) return BZ_PARAM_ERROR;
if (verbosity < 0 || verbosity > 4) return BZ_PARAM_ERROR;
s->bsLive = 0;
s->bsBuff = 0;
s->calculatedCombinedCRC = 0;
- strm->total_in = 0;
- strm->total_out = 0;
+ strm->total_in_lo32 = 0;
+ strm->total_in_hi32 = 0;
+ strm->total_out_lo32 = 0;
+ strm->total_out_hi32 = 0;
s->smallDecompress = (Bool)small;
s->ll4 = NULL;
s->ll16 = NULL;
s->state_out_len--;
s->strm->next_out++;
s->strm->avail_out--;
- s->strm->total_out++;
+ s->strm->total_out_lo32++;
+ if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++;
}
/* can a new run be started? */
unsigned int cs_avail_out = s->strm->avail_out;
/* end restore */
- UInt32 avail_out_INIT = cs_avail_out;
- Int32 s_save_nblockPP = s->save_nblock+1;
+ UInt32 avail_out_INIT = cs_avail_out;
+ Int32 s_save_nblockPP = s->save_nblock+1;
+ unsigned int total_out_lo32_old;
while (True) {
}
return_notr:
- s->strm->total_out += (avail_out_INIT - cs_avail_out);
+ total_out_lo32_old = s->strm->total_out_lo32;
+ s->strm->total_out_lo32 += (avail_out_INIT - cs_avail_out);
+ if (s->strm->total_out_lo32 < total_out_lo32_old)
+ s->strm->total_out_hi32++;
/* save */
s->calculatedBlockCRC = c_calculatedBlockCRC;
/*---------------------------------------------------*/
-__inline__ Int32 indexIntoF ( Int32 indx, Int32 *cftab )
+__inline__ Int32 BZ2_indexIntoF ( Int32 indx, Int32 *cftab )
{
Int32 nb, na, mid;
nb = 0;
s->state_out_len--;
s->strm->next_out++;
s->strm->avail_out--;
- s->strm->total_out++;
+ s->strm->total_out_lo32++;
+ if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++;
}
/* can a new run be started? */
s->state_out_len--;
s->strm->next_out++;
s->strm->avail_out--;
- s->strm->total_out++;
+ s->strm->total_out_lo32++;
+ if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++;
}
/* can a new run be started? */
/*---------------------------------------------------*/
-int BZ_API(bzDecompress) ( bz_stream *strm )
+int BZ_API(BZ2_bzDecompress) ( bz_stream *strm )
{
DState* s;
if (strm == NULL) return BZ_PARAM_ERROR;
}
}
if (s->state >= BZ_X_MAGIC_1) {
- Int32 r = decompress ( s );
+ Int32 r = BZ2_decompress ( s );
if (r == BZ_STREAM_END) {
if (s->verbosity >= 3)
VPrintf2 ( "\n combined CRCs: stored = 0x%x, computed = 0x%x",
/*---------------------------------------------------*/
-int BZ_API(bzDecompressEnd) ( bz_stream *strm )
+int BZ_API(BZ2_bzDecompressEnd) ( bz_stream *strm )
{
DState* s;
if (strm == NULL) return BZ_PARAM_ERROR;
/*---------------------------------------------------*/
-BZFILE* BZ_API(bzWriteOpen)
+BZFILE* BZ_API(BZ2_bzWriteOpen)
( int* bzerror,
FILE* f,
int blockSize100k,
bzf->strm.opaque = NULL;
if (workFactor == 0) workFactor = 30;
- ret = bzCompressInit ( &(bzf->strm), blockSize100k,
- verbosity, workFactor );
+ ret = BZ2_bzCompressInit ( &(bzf->strm), blockSize100k,
+ verbosity, workFactor );
if (ret != BZ_OK)
{ BZ_SETERR(ret); free(bzf); return NULL; };
/*---------------------------------------------------*/
-void BZ_API(bzWrite)
+void BZ_API(BZ2_bzWrite)
( int* bzerror,
BZFILE* b,
void* buf,
while (True) {
bzf->strm.avail_out = BZ_MAX_UNUSED;
bzf->strm.next_out = bzf->buf;
- ret = bzCompress ( &(bzf->strm), BZ_RUN );
+ ret = BZ2_bzCompress ( &(bzf->strm), BZ_RUN );
if (ret != BZ_RUN_OK)
{ BZ_SETERR(ret); return; };
/*---------------------------------------------------*/
-void BZ_API(bzWriteClose)
+void BZ_API(BZ2_bzWriteClose)
( int* bzerror,
BZFILE* b,
int abandon,
unsigned int* nbytes_in,
unsigned int* nbytes_out )
+{
+ BZ2_bzWriteClose64 ( bzerror, b, abandon,
+ nbytes_in, NULL, nbytes_out, NULL );
+}
+
+
+void BZ_API(BZ2_bzWriteClose64)
+ ( int* bzerror,
+ BZFILE* b,
+ int abandon,
+ unsigned int* nbytes_in_lo32,
+ unsigned int* nbytes_in_hi32,
+ unsigned int* nbytes_out_lo32,
+ unsigned int* nbytes_out_hi32 )
{
Int32 n, n2, ret;
bzFile* bzf = (bzFile*)b;
if (ferror(bzf->handle))
{ BZ_SETERR(BZ_IO_ERROR); return; };
- if (nbytes_in != NULL) *nbytes_in = 0;
- if (nbytes_out != NULL) *nbytes_out = 0;
+ if (nbytes_in_lo32 != NULL) *nbytes_in_lo32 = 0;
+ if (nbytes_in_hi32 != NULL) *nbytes_in_hi32 = 0;
+ if (nbytes_out_lo32 != NULL) *nbytes_out_lo32 = 0;
+ if (nbytes_out_hi32 != NULL) *nbytes_out_hi32 = 0;
if ((!abandon) && bzf->lastErr == BZ_OK) {
while (True) {
bzf->strm.avail_out = BZ_MAX_UNUSED;
bzf->strm.next_out = bzf->buf;
- ret = bzCompress ( &(bzf->strm), BZ_FINISH );
+ ret = BZ2_bzCompress ( &(bzf->strm), BZ_FINISH );
if (ret != BZ_FINISH_OK && ret != BZ_STREAM_END)
{ BZ_SETERR(ret); return; };
{ BZ_SETERR(BZ_IO_ERROR); return; };
}
- if (nbytes_in != NULL) *nbytes_in = bzf->strm.total_in;
- if (nbytes_out != NULL) *nbytes_out = bzf->strm.total_out;
+ if (nbytes_in_lo32 != NULL)
+ *nbytes_in_lo32 = bzf->strm.total_in_lo32;
+ if (nbytes_in_hi32 != NULL)
+ *nbytes_in_hi32 = bzf->strm.total_in_hi32;
+ if (nbytes_out_lo32 != NULL)
+ *nbytes_out_lo32 = bzf->strm.total_out_lo32;
+ if (nbytes_out_hi32 != NULL)
+ *nbytes_out_hi32 = bzf->strm.total_out_hi32;
BZ_SETERR(BZ_OK);
- bzCompressEnd ( &(bzf->strm) );
+ BZ2_bzCompressEnd ( &(bzf->strm) );
free ( bzf );
}
/*---------------------------------------------------*/
-BZFILE* BZ_API(bzReadOpen)
+BZFILE* BZ_API(BZ2_bzReadOpen)
( int* bzerror,
FILE* f,
int verbosity,
nUnused--;
}
- ret = bzDecompressInit ( &(bzf->strm), verbosity, small );
+ ret = BZ2_bzDecompressInit ( &(bzf->strm), verbosity, small );
if (ret != BZ_OK)
{ BZ_SETERR(ret); free(bzf); return NULL; };
/*---------------------------------------------------*/
-void BZ_API(bzReadClose) ( int *bzerror, BZFILE *b )
+void BZ_API(BZ2_bzReadClose) ( int *bzerror, BZFILE *b )
{
bzFile* bzf = (bzFile*)b;
{ BZ_SETERR(BZ_SEQUENCE_ERROR); return; };
if (bzf->initialisedOk)
- (void)bzDecompressEnd ( &(bzf->strm) );
+ (void)BZ2_bzDecompressEnd ( &(bzf->strm) );
free ( bzf );
}
/*---------------------------------------------------*/
-int BZ_API(bzRead)
+int BZ_API(BZ2_bzRead)
( int* bzerror,
BZFILE* b,
void* buf,
bzf->strm.next_in = bzf->buf;
}
- ret = bzDecompress ( &(bzf->strm) );
+ ret = BZ2_bzDecompress ( &(bzf->strm) );
if (ret != BZ_OK && ret != BZ_STREAM_END)
{ BZ_SETERR(ret); return 0; };
/*---------------------------------------------------*/
-void BZ_API(bzReadGetUnused)
+void BZ_API(BZ2_bzReadGetUnused)
( int* bzerror,
BZFILE* b,
void** unused,
/*---------------------------------------------------*/
/*---------------------------------------------------*/
-int BZ_API(bzBuffToBuffCompress)
+int BZ_API(BZ2_bzBuffToBuffCompress)
( char* dest,
unsigned int* destLen,
char* source,
strm.bzalloc = NULL;
strm.bzfree = NULL;
strm.opaque = NULL;
- ret = bzCompressInit ( &strm, blockSize100k,
- verbosity, workFactor );
+ ret = BZ2_bzCompressInit ( &strm, blockSize100k,
+ verbosity, workFactor );
if (ret != BZ_OK) return ret;
strm.next_in = source;
strm.avail_in = sourceLen;
strm.avail_out = *destLen;
- ret = bzCompress ( &strm, BZ_FINISH );
+ ret = BZ2_bzCompress ( &strm, BZ_FINISH );
if (ret == BZ_FINISH_OK) goto output_overflow;
if (ret != BZ_STREAM_END) goto errhandler;
/* normal termination */
*destLen -= strm.avail_out;
- bzCompressEnd ( &strm );
+ BZ2_bzCompressEnd ( &strm );
return BZ_OK;
output_overflow:
- bzCompressEnd ( &strm );
+ BZ2_bzCompressEnd ( &strm );
return BZ_OUTBUFF_FULL;
errhandler:
- bzCompressEnd ( &strm );
+ BZ2_bzCompressEnd ( &strm );
return ret;
}
/*---------------------------------------------------*/
-int BZ_API(bzBuffToBuffDecompress)
+int BZ_API(BZ2_bzBuffToBuffDecompress)
( char* dest,
unsigned int* destLen,
char* source,
strm.bzalloc = NULL;
strm.bzfree = NULL;
strm.opaque = NULL;
- ret = bzDecompressInit ( &strm, verbosity, small );
+ ret = BZ2_bzDecompressInit ( &strm, verbosity, small );
if (ret != BZ_OK) return ret;
strm.next_in = source;
strm.avail_in = sourceLen;
strm.avail_out = *destLen;
- ret = bzDecompress ( &strm );
+ ret = BZ2_bzDecompress ( &strm );
if (ret == BZ_OK) goto output_overflow_or_eof;
if (ret != BZ_STREAM_END) goto errhandler;
/* normal termination */
*destLen -= strm.avail_out;
- bzDecompressEnd ( &strm );
+ BZ2_bzDecompressEnd ( &strm );
return BZ_OK;
output_overflow_or_eof:
if (strm.avail_out > 0) {
- bzDecompressEnd ( &strm );
+ BZ2_bzDecompressEnd ( &strm );
return BZ_UNEXPECTED_EOF;
} else {
- bzDecompressEnd ( &strm );
+ BZ2_bzDecompressEnd ( &strm );
return BZ_OUTBUFF_FULL;
};
errhandler:
- bzDecompressEnd ( &strm );
+ BZ2_bzDecompressEnd ( &strm );
return ret;
}
/*--
return version like "0.9.0c".
--*/
-const char * BZ_API(bzlibVersion)(void)
+const char * BZ_API(BZ2_bzlibVersion)(void)
{
return BZ_VERSION;
}
/* Guard against total chaos and anarchy -- JRS */
if (blockSize100k < 1) blockSize100k = 1;
if (blockSize100k > 9) blockSize100k = 9;
- bzfp = bzWriteOpen(&bzerr,fp,blockSize100k,verbosity,workFactor);
+ bzfp = BZ2_bzWriteOpen(&bzerr,fp,blockSize100k,
+ verbosity,workFactor);
} else {
- bzfp = bzReadOpen(&bzerr,fp,verbosity,smallMode,unused,nUnused);
+ bzfp = BZ2_bzReadOpen(&bzerr,fp,verbosity,smallMode,
+ unused,nUnused);
}
if (bzfp == NULL) {
if (fp != stdin && fp != stdout) fclose(fp);
ex) bzopen("file","w9")
case path="" or NULL => use stdin or stdout.
--*/
-BZFILE * BZ_API(bzopen)
+BZFILE * BZ_API(BZ2_bzopen)
( const char *path,
const char *mode )
{
/*---------------------------------------------------*/
-BZFILE * BZ_API(bzdopen)
+BZFILE * BZ_API(BZ2_bzdopen)
( int fd,
const char *mode )
{
/*---------------------------------------------------*/
-int BZ_API(bzread) (BZFILE* b, void* buf, int len )
+int BZ_API(BZ2_bzread) (BZFILE* b, void* buf, int len )
{
int bzerr, nread;
if (((bzFile*)b)->lastErr == BZ_STREAM_END) return 0;
- nread = bzRead(&bzerr,b,buf,len);
+ nread = BZ2_bzRead(&bzerr,b,buf,len);
if (bzerr == BZ_OK || bzerr == BZ_STREAM_END) {
return nread;
} else {
/*---------------------------------------------------*/
-int BZ_API(bzwrite) (BZFILE* b, void* buf, int len )
+int BZ_API(BZ2_bzwrite) (BZFILE* b, void* buf, int len )
{
int bzerr;
- bzWrite(&bzerr,b,buf,len);
+ BZ2_bzWrite(&bzerr,b,buf,len);
if(bzerr == BZ_OK){
return len;
}else{
/*---------------------------------------------------*/
-int BZ_API(bzflush) (BZFILE *b)
+int BZ_API(BZ2_bzflush) (BZFILE *b)
{
/* do nothing now... */
return 0;
/*---------------------------------------------------*/
-void BZ_API(bzclose) (BZFILE* b)
+void BZ_API(BZ2_bzclose) (BZFILE* b)
{
int bzerr;
FILE *fp = ((bzFile *)b)->handle;
if (b==NULL) {return;}
if(((bzFile*)b)->writing){
- bzWriteClose(&bzerr,b,0,NULL,NULL);
+ BZ2_bzWriteClose(&bzerr,b,0,NULL,NULL);
if(bzerr != BZ_OK){
- bzWriteClose(NULL,b,1,NULL,NULL);
+ BZ2_bzWriteClose(NULL,b,1,NULL,NULL);
}
}else{
- bzReadClose(&bzerr,b);
+ BZ2_bzReadClose(&bzerr,b);
}
if(fp!=stdin && fp!=stdout){
fclose(fp);
,"IO_ERROR"
,"UNEXPECTED_EOF"
,"OUTBUFF_FULL"
+ ,"CONFIG_ERROR"
,"???" /* for future */
,"???" /* for future */
,"???" /* for future */
};
-const char * BZ_API(bzerror) (BZFILE *b, int *errnum)
+const char * BZ_API(BZ2_bzerror) (BZFILE *b, int *errnum)
{
int err = ((bzFile *)b)->lastErr;
This file is a part of bzip2 and/or libbzip2, a program and
library for lossless, block-sorting data compression.
- Copyright (C) 1996-1999 Julian R Seward. All rights reserved.
+ Copyright (C) 1996-2000 Julian R Seward. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
Julian Seward, Cambridge, UK.
jseward@acm.org
- bzip2/libbzip2 version 0.9.5 of 24 May 1999
+ bzip2/libbzip2 version 1.0 of 21 March 2000
This program is based on (at least) the work of:
Mike Burrows
#define BZ_IO_ERROR (-6)
#define BZ_UNEXPECTED_EOF (-7)
#define BZ_OUTBUFF_FULL (-8)
+#define BZ_CONFIG_ERROR (-9)
typedef
struct {
char *next_in;
unsigned int avail_in;
- unsigned int total_in;
+ unsigned int total_in_lo32;
+ unsigned int total_in_hi32;
char *next_out;
unsigned int avail_out;
- unsigned int total_out;
+ unsigned int total_out_lo32;
+ unsigned int total_out_hi32;
void *state;
/*-- Core (low-level) library functions --*/
-BZ_EXTERN int BZ_API(bzCompressInit) (
+BZ_EXTERN int BZ_API(BZ2_bzCompressInit) (
bz_stream* strm,
int blockSize100k,
int verbosity,
int workFactor
);
-BZ_EXTERN int BZ_API(bzCompress) (
+BZ_EXTERN int BZ_API(BZ2_bzCompress) (
bz_stream* strm,
int action
);
-BZ_EXTERN int BZ_API(bzCompressEnd) (
+BZ_EXTERN int BZ_API(BZ2_bzCompressEnd) (
bz_stream* strm
);
-BZ_EXTERN int BZ_API(bzDecompressInit) (
+BZ_EXTERN int BZ_API(BZ2_bzDecompressInit) (
bz_stream *strm,
int verbosity,
int small
);
-BZ_EXTERN int BZ_API(bzDecompress) (
+BZ_EXTERN int BZ_API(BZ2_bzDecompress) (
bz_stream* strm
);
-BZ_EXTERN int BZ_API(bzDecompressEnd) (
+BZ_EXTERN int BZ_API(BZ2_bzDecompressEnd) (
bz_stream *strm
);
typedef void BZFILE;
-BZ_EXTERN BZFILE* BZ_API(bzReadOpen) (
+BZ_EXTERN BZFILE* BZ_API(BZ2_bzReadOpen) (
int* bzerror,
FILE* f,
int verbosity,
int nUnused
);
-BZ_EXTERN void BZ_API(bzReadClose) (
+BZ_EXTERN void BZ_API(BZ2_bzReadClose) (
int* bzerror,
BZFILE* b
);
-BZ_EXTERN void BZ_API(bzReadGetUnused) (
+BZ_EXTERN void BZ_API(BZ2_bzReadGetUnused) (
int* bzerror,
BZFILE* b,
void** unused,
int* nUnused
);
-BZ_EXTERN int BZ_API(bzRead) (
+BZ_EXTERN int BZ_API(BZ2_bzRead) (
int* bzerror,
BZFILE* b,
void* buf,
int len
);
-BZ_EXTERN BZFILE* BZ_API(bzWriteOpen) (
+BZ_EXTERN BZFILE* BZ_API(BZ2_bzWriteOpen) (
int* bzerror,
FILE* f,
int blockSize100k,
int workFactor
);
-BZ_EXTERN void BZ_API(bzWrite) (
+BZ_EXTERN void BZ_API(BZ2_bzWrite) (
int* bzerror,
BZFILE* b,
void* buf,
int len
);
-BZ_EXTERN void BZ_API(bzWriteClose) (
+BZ_EXTERN void BZ_API(BZ2_bzWriteClose) (
int* bzerror,
BZFILE* b,
int abandon,
unsigned int* nbytes_in,
unsigned int* nbytes_out
);
+
+BZ_EXTERN void BZ_API(BZ2_bzWriteClose64) (
+ int* bzerror,
+ BZFILE* b,
+ int abandon,
+ unsigned int* nbytes_in_lo32,
+ unsigned int* nbytes_in_hi32,
+ unsigned int* nbytes_out_lo32,
+ unsigned int* nbytes_out_hi32
+ );
#endif
/*-- Utility functions --*/
-BZ_EXTERN int BZ_API(bzBuffToBuffCompress) (
+BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffCompress) (
char* dest,
unsigned int* destLen,
char* source,
int workFactor
);
-BZ_EXTERN int BZ_API(bzBuffToBuffDecompress) (
+BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffDecompress) (
char* dest,
unsigned int* destLen,
char* source,
If this code breaks, please contact both Yoshioka and me.
--*/
-BZ_EXTERN const char * BZ_API(bzlibVersion) (
+BZ_EXTERN const char * BZ_API(BZ2_bzlibVersion) (
void
);
#ifndef BZ_NO_STDIO
-BZ_EXTERN BZFILE * BZ_API(bzopen) (
+BZ_EXTERN BZFILE * BZ_API(BZ2_bzopen) (
const char *path,
const char *mode
);
-BZ_EXTERN BZFILE * BZ_API(bzdopen) (
+BZ_EXTERN BZFILE * BZ_API(BZ2_bzdopen) (
int fd,
const char *mode
);
-BZ_EXTERN int BZ_API(bzread) (
+BZ_EXTERN int BZ_API(BZ2_bzread) (
BZFILE* b,
void* buf,
int len
);
-BZ_EXTERN int BZ_API(bzwrite) (
+BZ_EXTERN int BZ_API(BZ2_bzwrite) (
BZFILE* b,
void* buf,
int len
);
-BZ_EXTERN int BZ_API(bzflush) (
+BZ_EXTERN int BZ_API(BZ2_bzflush) (
BZFILE* b
);
-BZ_EXTERN void BZ_API(bzclose) (
+BZ_EXTERN void BZ_API(BZ2_bzclose) (
BZFILE* b
);
-BZ_EXTERN const char * BZ_API(bzerror) (
+BZ_EXTERN const char * BZ_API(BZ2_bzerror) (
BZFILE *b,
int *errnum
);
This file is a part of bzip2 and/or libbzip2, a program and
library for lossless, block-sorting data compression.
- Copyright (C) 1996-1999 Julian R Seward. All rights reserved.
+ Copyright (C) 1996-2000 Julian R Seward. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
Julian Seward, Cambridge, UK.
jseward@acm.org
- bzip2/libbzip2 version 0.9.5 of 24 May 1999
+ bzip2/libbzip2 version 1.0 of 21 March 2000
This program is based on (at least) the work of:
Mike Burrows
/*-- General stuff. --*/
-#define BZ_VERSION "0.9.5d"
+#define BZ_VERSION "1.0.1, 23-June-2000"
typedef char Char;
typedef unsigned char Bool;
#endif
#ifndef BZ_NO_STDIO
-extern void bz__AssertH__fail ( int errcode );
+extern void BZ2_bz__AssertH__fail ( int errcode );
#define AssertH(cond,errcode) \
- { if (!(cond)) bz__AssertH__fail ( errcode ); }
+ { if (!(cond)) BZ2_bz__AssertH__fail ( errcode ); }
#if BZ_DEBUG
#define AssertD(cond,msg) \
{ if (!(cond)) { \
/*-- Stuff for randomising repetitive blocks. --*/
-extern Int32 rNums[512];
+extern Int32 BZ2_rNums[512];
#define BZ_RAND_DECLS \
Int32 rNToGo; \
#define BZ_RAND_UPD_MASK \
if (s->rNToGo == 0) { \
- s->rNToGo = rNums[s->rTPos]; \
+ s->rNToGo = BZ2_rNums[s->rTPos]; \
s->rTPos++; \
if (s->rTPos == 512) s->rTPos = 0; \
} \
/*-- Stuff for doing CRCs. --*/
-extern UInt32 crc32Table[256];
+extern UInt32 BZ2_crc32Table[256];
#define BZ_INITIALISE_CRC(crcVar) \
{ \
#define BZ_UPDATE_CRC(crcVar,cha) \
{ \
crcVar = (crcVar << 8) ^ \
- crc32Table[(crcVar >> 24) ^ \
- ((UChar)cha)]; \
+ BZ2_crc32Table[(crcVar >> 24) ^ \
+ ((UChar)cha)]; \
}
/* aliases for arr1 and arr2 */
UInt32* ptr;
- UInt16* block;
+ UChar* block;
UInt16* mtfv;
UChar* zbits;
UChar selector [BZ_MAX_SELECTORS];
UChar selectorMtf[BZ_MAX_SELECTORS];
- UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
- Int32 code [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
- Int32 rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+ UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+ Int32 code [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+ Int32 rfreq [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+ /* second dimension: only 3 needed; 4 makes index calculations faster */
+ UInt32 len_pack[BZ_MAX_ALPHA_SIZE][4];
}
EState;
/*-- externs for compression. --*/
extern void
-blockSort ( EState* );
+BZ2_blockSort ( EState* );
extern void
-compressBlock ( EState*, Bool );
+BZ2_compressBlock ( EState*, Bool );
extern void
-bsInitWrite ( EState* );
+BZ2_bsInitWrite ( EState* );
extern void
-hbAssignCodes ( Int32*, UChar*, Int32, Int32, Int32 );
+BZ2_hbAssignCodes ( Int32*, UChar*, Int32, Int32, Int32 );
extern void
-hbMakeCodeLengths ( UChar*, Int32*, Int32, Int32 );
+BZ2_hbMakeCodeLengths ( UChar*, Int32*, Int32, Int32 );
#define GET_LL(i) \
(((UInt32)s->ll16[i]) | (GET_LL4(i) << 16))
-#define BZ_GET_SMALL(cccc) \
- cccc = indexIntoF ( s->tPos, s->cftab ); \
+#define BZ_GET_SMALL(cccc) \
+ cccc = BZ2_indexIntoF ( s->tPos, s->cftab ); \
s->tPos = GET_LL(s->tPos);
/*-- externs for decompression. --*/
extern Int32
-indexIntoF ( Int32, Int32* );
+BZ2_indexIntoF ( Int32, Int32* );
extern Int32
-decompress ( DState* );
+BZ2_decompress ( DState* );
extern void
-hbCreateDecodeTables ( Int32*, Int32*, Int32*, UChar*,
- Int32, Int32, Int32 );
+BZ2_hbCreateDecodeTables ( Int32*, Int32*, Int32*, UChar*,
+ Int32, Int32, Int32 );
#endif
This file is a part of bzip2 and/or libbzip2, a program and
library for lossless, block-sorting data compression.
- Copyright (C) 1996-1999 Julian R Seward. All rights reserved.
+ Copyright (C) 1996-2000 Julian R Seward. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
Julian Seward, Cambridge, UK.
jseward@acm.org
- bzip2/libbzip2 version 0.9.5 of 24 May 1999
+ bzip2/libbzip2 version 1.0 of 21 March 2000
This program is based on (at least) the work of:
Mike Burrows
/*---------------------------------------------------*/
/*---------------------------------------------------*/
-void bsInitWrite ( EState* s )
+void BZ2_bsInitWrite ( EState* s )
{
s->bsLive = 0;
s->bsBuff = 0;
/*---------------------------------------------------*/
static
+__inline__
void bsW ( EState* s, Int32 n, UInt32 v )
{
bsNEEDW ( n );
{
UChar yy[256];
Int32 i, j;
- UChar tmp;
- UChar tmp2;
Int32 zPend;
Int32 wr;
Int32 EOB;
After sorting (eg, here),
s->arr1 [ 0 .. s->nblock-1 ] holds sorted order,
and
- ((UInt16*)s->arr2) [ 0 .. s->nblock-1 ] [15:8]
+ ((UChar*)s->arr2) [ 0 .. s->nblock-1 ]
holds the original block data.
The first thing to do is generate the MTF values,
The final compressed bitstream is generated into the
area starting at
- (UChar*) (&((UInt16)s->arr2)[s->nblock])
+ (UChar*) (&((UChar*)s->arr2)[s->nblock])
These storage aliases are set up in bzCompressInit(),
except for the last one, which is arranged in
compressBlock().
*/
UInt32* ptr = s->ptr;
- UInt16* block = s->block;
+ UChar* block = s->block;
UInt16* mtfv = s->mtfv;
makeMaps_e ( s );
for (i = 0; i < s->nblock; i++) {
UChar ll_i;
-
AssertD ( wr <= i, "generateMTFValues(1)" );
j = ptr[i]-1; if (j < 0) j += s->nblock;
- ll_i = s->unseqToSeq[block[j] >> 8];
+ ll_i = s->unseqToSeq[block[j]];
AssertD ( ll_i < s->nInUse, "generateMTFValues(2a)" );
- tmp = yy[0];
- if (tmp == ll_i) {
+ if (yy[0] == ll_i) {
zPend++;
} else {
- tmp2 = tmp;
- tmp = yy[1];
- yy[1] = tmp2;
- j = 1;
- while ( ll_i != tmp ) {
- j++;
- tmp2 = tmp;
- tmp = yy[j];
- yy[j] = tmp2;
- };
- yy[0] = tmp;
if (zPend > 0) {
zPend--;
};
zPend = 0;
}
- mtfv[wr] = j+1; wr++; s->mtfFreq[j+1]++;
+ {
+ register UChar rtmp;
+ register UChar* ryy_j;
+ register UChar rll_i;
+ rtmp = yy[1];
+ yy[1] = yy[0];
+ ryy_j = &(yy[1]);
+ rll_i = ll_i;
+ while ( rll_i != rtmp ) {
+ register UChar rtmp2;
+ ryy_j++;
+ rtmp2 = rtmp;
+ rtmp = *ryy_j;
+ *ryy_j = rtmp2;
+ };
+ yy[0] = rtmp;
+ j = ryy_j - &(yy[0]);
+ mtfv[wr] = j+1; wr++; s->mtfFreq[j+1]++;
+ }
+
}
}
if (zPend < 2) break;
zPend = (zPend - 2) / 2;
};
+ zPend = 0;
}
mtfv[wr] = EOB; wr++; s->mtfFreq[EOB]++;
for (v = 0; v < alphaSize; v++)
s->rfreq[t][v] = 0;
+ /*---
+ Set up an auxiliary length table which is used to fast-track
+ the common case (nGroups == 6).
+ ---*/
+ if (nGroups == 6) {
+ for (v = 0; v < alphaSize; v++) {
+ s->len_pack[v][0] = (s->len[1][v] << 16) | s->len[0][v];
+ s->len_pack[v][1] = (s->len[3][v] << 16) | s->len[2][v];
+ s->len_pack[v][2] = (s->len[5][v] << 16) | s->len[4][v];
+ }
+ }
+
nSelectors = 0;
totc = 0;
gs = 0;
--*/
for (t = 0; t < nGroups; t++) cost[t] = 0;
- if (nGroups == 6) {
- register UInt16 cost0, cost1, cost2, cost3, cost4, cost5;
- cost0 = cost1 = cost2 = cost3 = cost4 = cost5 = 0;
- for (i = gs; i <= ge; i++) {
- UInt16 icv = mtfv[i];
- cost0 += s->len[0][icv];
- cost1 += s->len[1][icv];
- cost2 += s->len[2][icv];
- cost3 += s->len[3][icv];
- cost4 += s->len[4][icv];
- cost5 += s->len[5][icv];
- }
- cost[0] = cost0; cost[1] = cost1; cost[2] = cost2;
- cost[3] = cost3; cost[4] = cost4; cost[5] = cost5;
+ if (nGroups == 6 && 50 == ge-gs+1) {
+ /*--- fast track the common case ---*/
+ register UInt32 cost01, cost23, cost45;
+ register UInt16 icv;
+ cost01 = cost23 = cost45 = 0;
+
+# define BZ_ITER(nn) \
+ icv = mtfv[gs+(nn)]; \
+ cost01 += s->len_pack[icv][0]; \
+ cost23 += s->len_pack[icv][1]; \
+ cost45 += s->len_pack[icv][2]; \
+
+ BZ_ITER(0); BZ_ITER(1); BZ_ITER(2); BZ_ITER(3); BZ_ITER(4);
+ BZ_ITER(5); BZ_ITER(6); BZ_ITER(7); BZ_ITER(8); BZ_ITER(9);
+ BZ_ITER(10); BZ_ITER(11); BZ_ITER(12); BZ_ITER(13); BZ_ITER(14);
+ BZ_ITER(15); BZ_ITER(16); BZ_ITER(17); BZ_ITER(18); BZ_ITER(19);
+ BZ_ITER(20); BZ_ITER(21); BZ_ITER(22); BZ_ITER(23); BZ_ITER(24);
+ BZ_ITER(25); BZ_ITER(26); BZ_ITER(27); BZ_ITER(28); BZ_ITER(29);
+ BZ_ITER(30); BZ_ITER(31); BZ_ITER(32); BZ_ITER(33); BZ_ITER(34);
+ BZ_ITER(35); BZ_ITER(36); BZ_ITER(37); BZ_ITER(38); BZ_ITER(39);
+ BZ_ITER(40); BZ_ITER(41); BZ_ITER(42); BZ_ITER(43); BZ_ITER(44);
+ BZ_ITER(45); BZ_ITER(46); BZ_ITER(47); BZ_ITER(48); BZ_ITER(49);
+
+# undef BZ_ITER
+
+ cost[0] = cost01 & 0xffff; cost[1] = cost01 >> 16;
+ cost[2] = cost23 & 0xffff; cost[3] = cost23 >> 16;
+ cost[4] = cost45 & 0xffff; cost[5] = cost45 >> 16;
+
} else {
+ /*--- slow version which correctly handles all situations ---*/
for (i = gs; i <= ge; i++) {
UInt16 icv = mtfv[i];
for (t = 0; t < nGroups; t++) cost[t] += s->len[t][icv];
/*--
Increment the symbol frequencies for the selected table.
--*/
- for (i = gs; i <= ge; i++)
- s->rfreq[bt][ mtfv[i] ]++;
+ if (nGroups == 6 && 50 == ge-gs+1) {
+ /*--- fast track the common case ---*/
+
+# define BZ_ITUR(nn) s->rfreq[bt][ mtfv[gs+(nn)] ]++
+
+ BZ_ITUR(0); BZ_ITUR(1); BZ_ITUR(2); BZ_ITUR(3); BZ_ITUR(4);
+ BZ_ITUR(5); BZ_ITUR(6); BZ_ITUR(7); BZ_ITUR(8); BZ_ITUR(9);
+ BZ_ITUR(10); BZ_ITUR(11); BZ_ITUR(12); BZ_ITUR(13); BZ_ITUR(14);
+ BZ_ITUR(15); BZ_ITUR(16); BZ_ITUR(17); BZ_ITUR(18); BZ_ITUR(19);
+ BZ_ITUR(20); BZ_ITUR(21); BZ_ITUR(22); BZ_ITUR(23); BZ_ITUR(24);
+ BZ_ITUR(25); BZ_ITUR(26); BZ_ITUR(27); BZ_ITUR(28); BZ_ITUR(29);
+ BZ_ITUR(30); BZ_ITUR(31); BZ_ITUR(32); BZ_ITUR(33); BZ_ITUR(34);
+ BZ_ITUR(35); BZ_ITUR(36); BZ_ITUR(37); BZ_ITUR(38); BZ_ITUR(39);
+ BZ_ITUR(40); BZ_ITUR(41); BZ_ITUR(42); BZ_ITUR(43); BZ_ITUR(44);
+ BZ_ITUR(45); BZ_ITUR(46); BZ_ITUR(47); BZ_ITUR(48); BZ_ITUR(49);
+
+# undef BZ_ITUR
+
+ } else {
+ /*--- slow version which correctly handles all situations ---*/
+ for (i = gs; i <= ge; i++)
+ s->rfreq[bt][ mtfv[i] ]++;
+ }
gs = ge+1;
}
Recompute the tables based on the accumulated frequencies.
--*/
for (t = 0; t < nGroups; t++)
- hbMakeCodeLengths ( &(s->len[t][0]), &(s->rfreq[t][0]),
- alphaSize, 20 );
+ BZ2_hbMakeCodeLengths ( &(s->len[t][0]), &(s->rfreq[t][0]),
+ alphaSize, 20 );
}
}
AssertH ( !(maxLen > 20), 3004 );
AssertH ( !(minLen < 1), 3005 );
- hbAssignCodes ( &(s->code[t][0]), &(s->len[t][0]),
- minLen, maxLen, alphaSize );
+ BZ2_hbAssignCodes ( &(s->code[t][0]), &(s->len[t][0]),
+ minLen, maxLen, alphaSize );
}
/*--- Transmit the mapping table. ---*/
if (gs >= s->nMTF) break;
ge = gs + BZ_G_SIZE - 1;
if (ge >= s->nMTF) ge = s->nMTF-1;
- for (i = gs; i <= ge; i++) {
- AssertH ( s->selector[selCtr] < nGroups, 3006 );
- bsW ( s,
- s->len [s->selector[selCtr]] [mtfv[i]],
- s->code [s->selector[selCtr]] [mtfv[i]] );
+ AssertH ( s->selector[selCtr] < nGroups, 3006 );
+
+ if (nGroups == 6 && 50 == ge-gs+1) {
+ /*--- fast track the common case ---*/
+ UInt16 mtfv_i;
+ UChar* s_len_sel_selCtr
+ = &(s->len[s->selector[selCtr]][0]);
+ Int32* s_code_sel_selCtr
+ = &(s->code[s->selector[selCtr]][0]);
+
+# define BZ_ITAH(nn) \
+ mtfv_i = mtfv[gs+(nn)]; \
+ bsW ( s, \
+ s_len_sel_selCtr[mtfv_i], \
+ s_code_sel_selCtr[mtfv_i] )
+
+ BZ_ITAH(0); BZ_ITAH(1); BZ_ITAH(2); BZ_ITAH(3); BZ_ITAH(4);
+ BZ_ITAH(5); BZ_ITAH(6); BZ_ITAH(7); BZ_ITAH(8); BZ_ITAH(9);
+ BZ_ITAH(10); BZ_ITAH(11); BZ_ITAH(12); BZ_ITAH(13); BZ_ITAH(14);
+ BZ_ITAH(15); BZ_ITAH(16); BZ_ITAH(17); BZ_ITAH(18); BZ_ITAH(19);
+ BZ_ITAH(20); BZ_ITAH(21); BZ_ITAH(22); BZ_ITAH(23); BZ_ITAH(24);
+ BZ_ITAH(25); BZ_ITAH(26); BZ_ITAH(27); BZ_ITAH(28); BZ_ITAH(29);
+ BZ_ITAH(30); BZ_ITAH(31); BZ_ITAH(32); BZ_ITAH(33); BZ_ITAH(34);
+ BZ_ITAH(35); BZ_ITAH(36); BZ_ITAH(37); BZ_ITAH(38); BZ_ITAH(39);
+ BZ_ITAH(40); BZ_ITAH(41); BZ_ITAH(42); BZ_ITAH(43); BZ_ITAH(44);
+ BZ_ITAH(45); BZ_ITAH(46); BZ_ITAH(47); BZ_ITAH(48); BZ_ITAH(49);
+
+# undef BZ_ITAH
+
+ } else {
+ /*--- slow version which correctly handles all situations ---*/
+ for (i = gs; i <= ge; i++) {
+ bsW ( s,
+ s->len [s->selector[selCtr]] [mtfv[i]],
+ s->code [s->selector[selCtr]] [mtfv[i]] );
+ }
}
+
gs = ge+1;
selCtr++;
}
/*---------------------------------------------------*/
-void compressBlock ( EState* s, Bool is_last_block )
+void BZ2_compressBlock ( EState* s, Bool is_last_block )
{
if (s->nblock > 0) {
"combined CRC = 0x%8x, size = %d\n",
s->blockNo, s->blockCRC, s->combinedCRC, s->nblock );
- blockSort ( s );
+ BZ2_blockSort ( s );
}
- s->zbits = (UChar*) (&((UInt16*)s->arr2)[s->nblock]);
+ s->zbits = (UChar*) (&((UChar*)s->arr2)[s->nblock]);
/*-- If this is the first block, create the stream header. --*/
if (s->blockNo == 1) {
- bsInitWrite ( s );
+ BZ2_bsInitWrite ( s );
bsPutUChar ( s, 'B' );
bsPutUChar ( s, 'Z' );
bsPutUChar ( s, 'h' );
This file is a part of bzip2 and/or libbzip2, a program and
library for lossless, block-sorting data compression.
- Copyright (C) 1996-1999 Julian R Seward. All rights reserved.
+ Copyright (C) 1996-2000 Julian R Seward. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
Julian Seward, Cambridge, UK.
jseward@acm.org
- bzip2/libbzip2 version 0.9.5 of 24 May 1999
+ bzip2/libbzip2 version 1.0 of 21 March 2000
This program is based on (at least) the work of:
Mike Burrows
comp.compression FAQ.
--*/
-UInt32 crc32Table[256] = {
+UInt32 BZ2_crc32Table[256] = {
/*-- Ugly, innit? --*/
This file is a part of bzip2 and/or libbzip2, a program and
library for lossless, block-sorting data compression.
- Copyright (C) 1996-1999 Julian R Seward. All rights reserved.
+ Copyright (C) 1996-2000 Julian R Seward. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
Julian Seward, Cambridge, UK.
jseward@acm.org
- bzip2/libbzip2 version 0.9.5 of 24 May 1999
+ bzip2/libbzip2 version 1.0 of 21 March 2000
This program is based on (at least) the work of:
Mike Burrows
s->bsLive += 8; \
s->strm->next_in++; \
s->strm->avail_in--; \
- s->strm->total_in++; \
+ s->strm->total_in_lo32++; \
+ if (s->strm->total_in_lo32 == 0) \
+ s->strm->total_in_hi32++; \
}
#define GET_UCHAR(lll,uuu) \
{ \
if (groupPos == 0) { \
groupNo++; \
+ if (groupNo >= nSelectors) \
+ RETURN(BZ_DATA_ERROR); \
groupPos = BZ_G_SIZE; \
gSel = s->selector[groupNo]; \
gMinlen = s->minLens[gSel]; \
groupPos--; \
zn = gMinlen; \
GET_BITS(label1, zvec, zn); \
- while (zvec > gLimit[zn]) { \
+ while (1) { \
+ if (zn > 20 /* the longest code */) \
+ RETURN(BZ_DATA_ERROR); \
+ if (zvec <= gLimit[zn]) break; \
zn++; \
GET_BIT(label2, zj); \
zvec = (zvec << 1) | zj; \
}; \
+ if (zvec - gBase[zn] < 0 \
+ || zvec - gBase[zn] >= BZ_MAX_ALPHA_SIZE) \
+ RETURN(BZ_DATA_ERROR); \
lval = gPerm[zvec - gBase[zn]]; \
}
/*---------------------------------------------------*/
-Int32 decompress ( DState* s )
+Int32 BZ2_decompress ( DState* s )
{
UChar uc;
Int32 retVal;
GET_UCHAR(BZ_X_ORIGPTR_3, uc);
s->origPtr = (s->origPtr << 8) | ((Int32)uc);
+ if (s->origPtr < 0)
+ RETURN(BZ_DATA_ERROR);
+ if (s->origPtr > 10 + 100000*s->blockSize100k)
+ RETURN(BZ_DATA_ERROR);
+
/*--- Receive the mapping table ---*/
for (i = 0; i < 16; i++) {
GET_BIT(BZ_X_MAPPING_1, uc);
if (uc == 1) s->inUse[i * 16 + j] = True;
}
makeMaps_d ( s );
+ if (s->nInUse == 0) RETURN(BZ_DATA_ERROR);
alphaSize = s->nInUse+2;
/*--- Now the selectors ---*/
GET_BITS(BZ_X_SELECTOR_1, nGroups, 3);
+ if (nGroups < 2 || nGroups > 6) RETURN(BZ_DATA_ERROR);
GET_BITS(BZ_X_SELECTOR_2, nSelectors, 15);
+ if (nSelectors < 1) RETURN(BZ_DATA_ERROR);
for (i = 0; i < nSelectors; i++) {
j = 0;
while (True) {
GET_BIT(BZ_X_SELECTOR_3, uc);
if (uc == 0) break;
j++;
- if (j > 5) RETURN(BZ_DATA_ERROR);
+ if (j >= nGroups) RETURN(BZ_DATA_ERROR);
}
s->selectorMtf[i] = j;
}
if (s->len[t][i] > maxLen) maxLen = s->len[t][i];
if (s->len[t][i] < minLen) minLen = s->len[t][i];
}
- hbCreateDecodeTables (
+ BZ2_hbCreateDecodeTables (
&(s->limit[t][0]),
&(s->base[t][0]),
&(s->perm[t][0]),
/*-- end MTF init --*/
nblock = 0;
-
GET_MTF_VAL(BZ_X_MTF_1, BZ_X_MTF_2, nextSym);
while (True) {
if (s->smallDecompress)
while (es > 0) {
+ if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
s->ll16[nblock] = (UInt16)uc;
nblock++;
es--;
}
else
while (es > 0) {
+ if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
s->tt[nblock] = (UInt32)uc;
nblock++;
es--;
};
- if (nblock > nblockMAX) RETURN(BZ_DATA_ERROR);
continue;
} else {
- if (nblock > nblockMAX) RETURN(BZ_DATA_ERROR);
+ if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
/*-- uc = MTF ( nextSym-1 ) --*/
{
}
}
+ /* Now we know what nblock is, we can do a better sanity
+ check on s->origPtr.
+ */
+ if (s->origPtr < 0 || s->origPtr >= nblock)
+ RETURN(BZ_DATA_ERROR);
+
s->state_out_len = 0;
s->state_out_ch = 0;
BZ_INITIALISE_CRC ( s->calculatedBlockCRC );
-/*
- minibz2
- libbz2.dll test program.
- by Yoshioka Tsuneo(QWF00133@nifty.ne.jp/tsuneo-y@is.aist-nara.ac.jp)
- This file is Public Domain.
- welcome any email to me.
-
- usage: minibz2 [-d] [-{1,2,..9}] [[srcfilename] destfilename]
-*/
-
-#define BZ_IMPORT
-#include <stdio.h>
-#include <stdlib.h>
-#include "bzlib.h"
-#ifdef _WIN32
-#include <io.h>
-#endif
-
-
-#ifdef _WIN32
-
-#include <windows.h>
-static int BZ2DLLLoaded = 0;
-static HINSTANCE BZ2DLLhLib;
-int BZ2DLLLoadLibrary(void)
-{
- HINSTANCE hLib;
-
- if(BZ2DLLLoaded==1){return 0;}
- hLib=LoadLibrary("libbz2.dll");
- if(hLib == NULL){
- puts("Can't load libbz2.dll");
- return -1;
- }
- BZ2DLLLoaded=1;
- BZ2DLLhLib=hLib;
- bzlibVersion=GetProcAddress(hLib,"bzlibVersion");
- bzopen=GetProcAddress(hLib,"bzopen");
- bzdopen=GetProcAddress(hLib,"bzdopen");
- bzread=GetProcAddress(hLib,"bzread");
- bzwrite=GetProcAddress(hLib,"bzwrite");
- bzflush=GetProcAddress(hLib,"bzflush");
- bzclose=GetProcAddress(hLib,"bzclose");
- bzerror=GetProcAddress(hLib,"bzerror");
- return 0;
-
-}
-int BZ2DLLFreeLibrary(void)
-{
- if(BZ2DLLLoaded==0){return 0;}
- FreeLibrary(BZ2DLLhLib);
- BZ2DLLLoaded=0;
-}
-#endif /* WIN32 */
-
-void usage(void)
-{
- puts("usage: minibz2 [-d] [-{1,2,..9}] [[srcfilename] destfilename]");
-}
-
-int main(int argc,char *argv[])
-{
- int decompress = 0;
- int level = 9;
- char *fn_r = NULL;
- char *fn_w = NULL;
-
-#ifdef _WIN32
- if(BZ2DLLLoadLibrary()<0){
- puts("can't load dll");
- exit(1);
- }
-#endif
- while(++argv,--argc){
- if(**argv =='-' || **argv=='/'){
- char *p;
-
- for(p=*argv+1;*p;p++){
- if(*p=='d'){
- decompress = 1;
- }else if('1'<=*p && *p<='9'){
- level = *p - '0';
- }else{
- usage();
- exit(1);
- }
- }
- }else{
- break;
- }
- }
- if(argc>=1){
- fn_r = *argv;
- argc--;argv++;
- }else{
- fn_r = NULL;
- }
- if(argc>=1){
- fn_w = *argv;
- argc--;argv++;
- }else{
- fn_w = NULL;
- }
- {
- int len;
- char buff[0x1000];
- char mode[10];
-
- if(decompress){
- BZFILE *BZ2fp_r = NULL;
- FILE *fp_w = NULL;
-
- if(fn_w){
- if((fp_w = fopen(fn_w,"wb"))==NULL){
- printf("can't open [%s]\n",fn_w);
- perror("reason:");
- exit(1);
- }
- }else{
- fp_w = stdout;
- }
- if((BZ2fp_r == NULL && (BZ2fp_r = bzdopen(fileno(stdin),"rb"))==NULL)
- || (BZ2fp_r != NULL && (BZ2fp_r = bzopen(fn_r,"rb"))==NULL)){
- printf("can't bz2openstream\n");
- exit(1);
- }
- while((len=bzread(BZ2fp_r,buff,0x1000))>0){
- fwrite(buff,1,len,fp_w);
- }
- bzclose(BZ2fp_r);
- if(fp_w != stdout) fclose(fp_w);
- }else{
- BZFILE *BZ2fp_w = NULL;
- FILE *fp_r = NULL;
-
- if(fn_r){
- if((fp_r = fopen(fn_r,"rb"))==NULL){
- printf("can't open [%s]\n",fn_r);
- perror("reason:");
- exit(1);
- }
- }else{
- fp_r = stdin;
- }
- mode[0]='w';
- mode[1] = '0' + level;
- mode[2] = '\0';
-
- if((fn_w == NULL && (BZ2fp_w = bzdopen(fileno(stdout),mode))==NULL)
- || (fn_w !=NULL && (BZ2fp_w = bzopen(fn_w,mode))==NULL)){
- printf("can't bz2openstream\n");
- exit(1);
- }
- while((len=fread(buff,1,0x1000,fp_r))>0){
- bzwrite(BZ2fp_w,buff,len);
- }
- bzclose(BZ2fp_w);
- if(fp_r!=stdin)fclose(fp_r);
- }
- }
-#ifdef _WIN32
- BZ2DLLFreeLibrary();
-#endif
- return 0;
-}
+/*\r
+ minibz2\r
+ libbz2.dll test program.\r
+ by Yoshioka Tsuneo(QWF00133@nifty.ne.jp/tsuneo-y@is.aist-nara.ac.jp)\r
+ This file is Public Domain.\r
+ welcome any email to me.\r
+\r
+ usage: minibz2 [-d] [-{1,2,..9}] [[srcfilename] destfilename]\r
+*/\r
+\r
+#define BZ_IMPORT\r
+#include <stdio.h>\r
+#include <stdlib.h>\r
+#include "bzlib.h"\r
+#ifdef _WIN32\r
+#include <io.h>\r
+#endif\r
+\r
+\r
+#ifdef _WIN32\r
+\r
+#define BZ2_LIBNAME "libbz2-1.0.0.DLL" \r
+\r
+#include <windows.h>\r
+static int BZ2DLLLoaded = 0;\r
+static HINSTANCE BZ2DLLhLib;\r
+int BZ2DLLLoadLibrary(void)\r
+{\r
+ HINSTANCE hLib;\r
+\r
+ if(BZ2DLLLoaded==1){return 0;}\r
+ hLib=LoadLibrary(BZ2_LIBNAME);\r
+ if(hLib == NULL){\r
+ fprintf(stderr,"Can't load %s\n",BZ2_LIBNAME);\r
+ return -1;\r
+ }\r
+ BZ2_bzlibVersion=GetProcAddress(hLib,"BZ2_bzlibVersion");\r
+ BZ2_bzopen=GetProcAddress(hLib,"BZ2_bzopen");\r
+ BZ2_bzdopen=GetProcAddress(hLib,"BZ2_bzdopen");\r
+ BZ2_bzread=GetProcAddress(hLib,"BZ2_bzread");\r
+ BZ2_bzwrite=GetProcAddress(hLib,"BZ2_bzwrite");\r
+ BZ2_bzflush=GetProcAddress(hLib,"BZ2_bzflush");\r
+ BZ2_bzclose=GetProcAddress(hLib,"BZ2_bzclose");\r
+ BZ2_bzerror=GetProcAddress(hLib,"BZ2_bzerror");\r
+\r
+ if (!BZ2_bzlibVersion || !BZ2_bzopen || !BZ2_bzdopen\r
+ || !BZ2_bzread || !BZ2_bzwrite || !BZ2_bzflush\r
+ || !BZ2_bzclose || !BZ2_bzerror) {\r
+ fprintf(stderr,"GetProcAddress failed.\n");\r
+ return -1;\r
+ }\r
+ BZ2DLLLoaded=1;\r
+ BZ2DLLhLib=hLib;\r
+ return 0;\r
+\r
+}\r
+int BZ2DLLFreeLibrary(void)\r
+{\r
+ if(BZ2DLLLoaded==0){return 0;}\r
+ FreeLibrary(BZ2DLLhLib);\r
+ BZ2DLLLoaded=0;\r
+}\r
+#endif /* WIN32 */\r
+\r
+void usage(void)\r
+{\r
+ puts("usage: minibz2 [-d] [-{1,2,..9}] [[srcfilename] destfilename]");\r
+}\r
+\r
+int main(int argc,char *argv[])\r
+{\r
+ int decompress = 0;\r
+ int level = 9;\r
+ char *fn_r = NULL;\r
+ char *fn_w = NULL;\r
+\r
+#ifdef _WIN32\r
+ if(BZ2DLLLoadLibrary()<0){\r
+ fprintf(stderr,"Loading of %s failed. Giving up.\n", BZ2_LIBNAME);\r
+ exit(1);\r
+ }\r
+ printf("Loading of %s succeeded. Library version is %s.\n",\r
+ BZ2_LIBNAME, BZ2_bzlibVersion() );\r
+#endif\r
+ while(++argv,--argc){\r
+ if(**argv =='-' || **argv=='/'){\r
+ char *p;\r
+\r
+ for(p=*argv+1;*p;p++){\r
+ if(*p=='d'){\r
+ decompress = 1;\r
+ }else if('1'<=*p && *p<='9'){\r
+ level = *p - '0';\r
+ }else{\r
+ usage();\r
+ exit(1);\r
+ }\r
+ }\r
+ }else{\r
+ break;\r
+ }\r
+ }\r
+ if(argc>=1){\r
+ fn_r = *argv;\r
+ argc--;argv++;\r
+ }else{\r
+ fn_r = NULL;\r
+ }\r
+ if(argc>=1){\r
+ fn_w = *argv;\r
+ argc--;argv++;\r
+ }else{\r
+ fn_w = NULL;\r
+ }\r
+ {\r
+ int len;\r
+ char buff[0x1000];\r
+ char mode[10];\r
+\r
+ if(decompress){\r
+ BZFILE *BZ2fp_r = NULL;\r
+ FILE *fp_w = NULL;\r
+\r
+ if(fn_w){\r
+ if((fp_w = fopen(fn_w,"wb"))==NULL){\r
+ printf("can't open [%s]\n",fn_w);\r
+ perror("reason:");\r
+ exit(1);\r
+ }\r
+ }else{\r
+ fp_w = stdout;\r
+ }\r
+ if((BZ2fp_r == NULL && (BZ2fp_r = BZ2_bzdopen(fileno(stdin),"rb"))==NULL)\r
+ || (BZ2fp_r != NULL && (BZ2fp_r = BZ2_bzopen(fn_r,"rb"))==NULL)){\r
+ printf("can't bz2openstream\n");\r
+ exit(1);\r
+ }\r
+ while((len=BZ2_bzread(BZ2fp_r,buff,0x1000))>0){\r
+ fwrite(buff,1,len,fp_w);\r
+ }\r
+ BZ2_bzclose(BZ2fp_r);\r
+ if(fp_w != stdout) fclose(fp_w);\r
+ }else{\r
+ BZFILE *BZ2fp_w = NULL;\r
+ FILE *fp_r = NULL;\r
+\r
+ if(fn_r){\r
+ if((fp_r = fopen(fn_r,"rb"))==NULL){\r
+ printf("can't open [%s]\n",fn_r);\r
+ perror("reason:");\r
+ exit(1);\r
+ }\r
+ }else{\r
+ fp_r = stdin;\r
+ }\r
+ mode[0]='w';\r
+ mode[1] = '0' + level;\r
+ mode[2] = '\0';\r
+\r
+ if((fn_w == NULL && (BZ2fp_w = BZ2_bzdopen(fileno(stdout),mode))==NULL)\r
+ || (fn_w !=NULL && (BZ2fp_w = BZ2_bzopen(fn_w,mode))==NULL)){\r
+ printf("can't bz2openstream\n");\r
+ exit(1);\r
+ }\r
+ while((len=fread(buff,1,0x1000,fp_r))>0){\r
+ BZ2_bzwrite(BZ2fp_w,buff,len);\r
+ }\r
+ BZ2_bzclose(BZ2fp_w);\r
+ if(fp_r!=stdin)fclose(fp_r);\r
+ }\r
+ }\r
+#ifdef _WIN32\r
+ BZ2DLLFreeLibrary();\r
+#endif\r
+ return 0;\r
+}\r
This file is a part of bzip2 and/or libbzip2, a program and
library for lossless, block-sorting data compression.
- Copyright (C) 1996-1999 Julian R Seward. All rights reserved.
+ Copyright (C) 1996-2000 Julian R Seward. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
Julian Seward, Cambridge, UK.
jseward@acm.org
- bzip2/libbzip2 version 0.9.5 of 24 May 1999
+ bzip2/libbzip2 version 1.0 of 21 March 2000
This program is based on (at least) the work of:
Mike Burrows
/*---------------------------------------------------*/
-void hbMakeCodeLengths ( UChar *len,
- Int32 *freq,
- Int32 alphaSize,
- Int32 maxLen )
+void BZ2_hbMakeCodeLengths ( UChar *len,
+ Int32 *freq,
+ Int32 alphaSize,
+ Int32 maxLen )
{
/*--
Nodes and heap entries run from 1. Entry 0
/*---------------------------------------------------*/
-void hbAssignCodes ( Int32 *code,
- UChar *length,
- Int32 minLen,
- Int32 maxLen,
- Int32 alphaSize )
+void BZ2_hbAssignCodes ( Int32 *code,
+ UChar *length,
+ Int32 minLen,
+ Int32 maxLen,
+ Int32 alphaSize )
{
Int32 n, vec, i;
/*---------------------------------------------------*/
-void hbCreateDecodeTables ( Int32 *limit,
- Int32 *base,
- Int32 *perm,
- UChar *length,
- Int32 minLen,
- Int32 maxLen,
- Int32 alphaSize )
+void BZ2_hbCreateDecodeTables ( Int32 *limit,
+ Int32 *base,
+ Int32 *perm,
+ UChar *length,
+ Int32 minLen,
+ Int32 maxLen,
+ Int32 alphaSize )
{
Int32 pp, i, j, vec;
LIBRARY LIBBZ2\r
DESCRIPTION "libbzip2: library for data compression"\r
EXPORTS\r
- bzCompressInit\r
- bzCompress\r
- bzCompressEnd\r
- bzDecompressInit\r
- bzDecompress\r
- bzDecompressEnd\r
- bzReadOpen\r
- bzReadClose\r
- bzReadGetUnused\r
- bzRead\r
- bzWriteOpen\r
- bzWrite\r
- bzWriteClose\r
- bzBuffToBuffCompress\r
- bzBuffToBuffDecompress\r
- bzlibVersion\r
- bzopen\r
- bzdopen\r
- bzread\r
- bzwrite\r
- bzflush\r
- bzclose\r
+ BZ2_bzCompressInit\r
+ BZ2_bzCompress\r
+ BZ2_bzCompressEnd\r
+ BZ2_bzDecompressInit\r
+ BZ2_bzDecompress\r
+ BZ2_bzDecompressEnd\r
+ BZ2_bzReadOpen\r
+ BZ2_bzReadClose\r
+ BZ2_bzReadGetUnused\r
+ BZ2_bzRead\r
+ BZ2_bzWriteOpen\r
+ BZ2_bzWrite\r
+ BZ2_bzWriteClose\r
+ BZ2_bzWriteClose64\r
+ BZ2_bzBuffToBuffCompress\r
+ BZ2_bzBuffToBuffDecompress\r
+ BZ2_bzlibVersion\r
+ BZ2_bzopen\r
+ BZ2_bzdopen\r
+ BZ2_bzread\r
+ BZ2_bzwrite\r
+ BZ2_bzflush\r
+ BZ2_bzclose\r
+ BZ2_bzerror\r
# Fixed up by JRS for bzip2-0.9.5d release.\r
\r
CC=cl\r
-CFLAGS= -DWIN32 -MD -Ox\r
+CFLAGS= -DWIN32 -MD -Ox -D_FILE_OFFSET_BITS=64\r
\r
OBJS= blocksort.obj \\r
huffman.obj \\r
$(CC) $(CFLAGS) -o bzip2recover bzip2recover.c\r
\r
lib: $(OBJS)\r
- del libbz2.lib\r
lib /out:libbz2.lib $(OBJS)\r
\r
test: bzip2\r
.\\bzip2 -d < sample1.bz2 > sample1.tst\r
.\\bzip2 -d < sample2.bz2 > sample2.tst\r
.\\bzip2 -ds < sample3.bz2 > sample3.tst\r
+ @echo All six of the fc's should find no differences.\r
+ @echo If fc finds an error on sample3.bz2, this could be\r
+ @echo because WinZip's 'TAR file smart CR/LF conversion'\r
+ @echo is too clever for its own good. Disable this option.\r
+ @echo The correct size for sample3.ref is 120,244. If it\r
+ @echo is 150,251, WinZip has messed it up.\r
fc sample1.bz2 sample1.rb2 \r
fc sample2.bz2 sample2.rb2\r
fc sample3.bz2 sample3.rb2\r
fc sample1.tst sample1.ref\r
fc sample2.tst sample2.ref\r
fc sample3.tst sample3.ref\r
- @echo All six of the fc's should find no differences.\r
- @echo If fc finds an error on sample3.tst, this could be\r
- @echo because WinZips 'TAR file smart CR/LF conversion'\r
- @echo is too clever for its own good. Disable this option.\r
- @echo The correct size for sample3.ref is 120,244. If it\r
- @echo is around 150k, WinZip has stuffed it up.\r
- @echo Also remember to set BZ_UNIX to 0 and BZ_LCCWIN32\r
- @echo to 1 in bzip2.c.\r
+\r
\r
\r
clean: \r
@setfilename bzip2.info
@ignore
-This file documents bzip2 version 0.9.5, and associated library
+This file documents bzip2 version 1.0, and associated library
libbzip2, written by Julian Seward (jseward@acm.org).
-Copyright (C) 1996-1999 Julian R Seward
+Copyright (C) 1996-2000 Julian R Seward
Permission is granted to make and distribute verbatim copies of
this manual provided the copyright notice and this permission notice
@titlepage
@title bzip2 and libbzip2
@subtitle a program and library for data compression
-@subtitle copyright (C) 1996-1999 Julian Seward
-@subtitle version 0.9.5d of 4 September 1999
+@subtitle copyright (C) 1996-2000 Julian Seward
+@subtitle version 1.0 of 21 March 2000
@author Julian Seward
@end titlepage
This program, @code{bzip2},
and associated library @code{libbzip2}, are
-Copyright (C) 1996-1999 Julian R Seward. All rights reserved.
+Copyright (C) 1996-2000 Julian R Seward. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@code{jseward@@acm.org}
+@code{http://sourceware.cygnus.com/bzip2}
+
+@code{http://www.cacheprof.org}
+
@code{http://www.muraroa.demon.co.uk}
-@code{bzip2}/@code{libbzip2} version 0.9.5 of 24 May 1999.
+@code{bzip2}/@code{libbzip2} version 1.0 of 21 March 2000.
PATENTS: To the best of my knowledge, @code{bzip2} does not use any patented
algorithms. However, I do not have the resources available to carry out
@unnumberedsubsubsec NAME
@itemize
@item @code{bzip2}, @code{bunzip2}
-- a block-sorting file compressor, v0.9.5
+- a block-sorting file compressor, v1.0
@item @code{bzcat}
- decompresses files to stdout
@item @code{bzip2recover}
tries hard to detect I/O errors and exit cleanly, but the details of
what the problem is sometimes seem rather misleading.
-This manual page pertains to version 0.9.5 of @code{bzip2}. Compressed
+This manual page pertains to version 1.0 of @code{bzip2}. Compressed
data created by this version is entirely forwards and backwards
-compatible with the previous public releases, versions 0.1pl2 and 0.9.0,
-but with the following exception: 0.9.0 and above can correctly
+compatible with the previous public releases, versions 0.1pl2, 0.9.0 and
+0.9.5, but with the following exception: 0.9.0 and above can correctly
decompress multiple concatenated compressed files. 0.1pl2 cannot do
this; it will stop after decompressing just the first file in the
stream.
that of Jean-loup Gailly's and Mark Adler's excellent @code{zlib}
library.
+All externally visible symbols have names beginning @code{BZ2_}.
+This is new in version 1.0. The intention is to minimise pollution
+of the namespaces of library clients.
+
@subsection Low-level summary
This interface provides services for compressing and decompressing
is therefore thread-safe.
Six routines make up the low level interface:
-@code{bzCompressInit}, @code{bzCompress}, and @* @code{bzCompressEnd}
+@code{BZ2_bzCompressInit}, @code{BZ2_bzCompress}, and @* @code{BZ2_bzCompressEnd}
for compression,
-and a corresponding trio @code{bzDecompressInit}, @* @code{bzDecompress}
-and @code{bzDecompressEnd} for decompression.
+and a corresponding trio @code{BZ2_bzDecompressInit}, @* @code{BZ2_bzDecompress}
+and @code{BZ2_bzDecompressEnd} for decompression.
The @code{*Init} functions allocate
memory for compression/decompression and do other
initialisations, whilst the @code{*End} functions close down operations
and release memory.
-The real work is done by @code{bzCompress} and @code{bzDecompress}.
-These compress/decompress data from a user-supplied input buffer
+The real work is done by @code{BZ2_bzCompress} and @code{BZ2_bzDecompress}.
+These compress and decompress data from a user-supplied input buffer
to a user-supplied output buffer. These buffers can be any size;
arbitrary quantities of data are handled by making repeated calls
to these functions. This is a flexible mechanism allowing a
within some larger-scale file structure, or where there are
multiple @code{bzip2} data streams concatenated end-to-end.
-For reading files, @code{bzReadOpen}, @code{bzRead}, @code{bzReadClose}
-and @code{bzReadGetUnused} are supplied. For writing files,
-@code{bzWriteOpen}, @code{bzWrite} and @code{bzWriteFinish} are
-available.
+For reading files, @code{BZ2_bzReadOpen}, @code{BZ2_bzRead},
+@code{BZ2_bzReadClose} and @* @code{BZ2_bzReadGetUnused} are supplied. For
+writing files, @code{BZ2_bzWriteOpen}, @code{BZ2_bzWrite} and
+@code{BZ2_bzWriteFinish} are available.
As with the low-level library, no global variables are used
so the library is per se thread-safe. However, if I/O errors
supports @code{errno} in a multithreaded environment.
To make the library a little simpler and more portable,
-@code{bzReadOpen} and @code{bzWriteOpen} require you to pass them file
+@code{BZ2_bzReadOpen} and @code{BZ2_bzWriteOpen} require you to pass them file
handles (@code{FILE*}s) which have previously been opened for reading or
writing respectively. That avoids portability problems associated with
file operations and file attributes, whilst not being much of an
@subsection Utility functions summary
-For very simple needs, @code{bzBuffToBuffCompress} and
-@code{bzBuffToBuffDecompress} are provided. These compress
+For very simple needs, @code{BZ2_bzBuffToBuffCompress} and
+@code{BZ2_bzBuffToBuffDecompress} are provided. These compress
data in memory from one buffer to another buffer in a single
function call. You should assess whether these functions
fulfill your memory-to-memory compression/decompression
Yoshioka Tsuneo (@code{QWF00133@@niftyserve.or.jp} /
@code{tsuneo-y@@is.aist-nara.ac.jp}) has contributed some functions to
give better @code{zlib} compatibility. These functions are
-@code{bzopen}, @code{bzread}, @code{bzwrite}, @code{bzflush},
-@code{bzclose},
-@code{bzerror} and @code{bzlibVersion}. You may find these functions
+@code{BZ2_bzopen}, @code{BZ2_bzread}, @code{BZ2_bzwrite}, @code{BZ2_bzflush},
+@code{BZ2_bzclose},
+@code{BZ2_bzerror} and @code{BZ2_bzlibVersion}. You may find these functions
more convenient for simple file reading and writing, than those in the
high-level interface. These functions are not (yet) officially part of
the library, and are minimally documented here. If they break, you
hearing more about the robustness of the library to corrupted
compressed data.
+Version 1.0 is much more robust in this respect than
+0.9.0 or 0.9.5. Investigations with Checker (a tool for
+detecting problems with memory management, similar to Purify)
+indicate that, at least for the few files I tested, all single-bit
+errors in the decompressed data are caught properly, with no
+segmentation faults, no reads of uninitialised data and no
+out of range reads or writes. So it's certainly much improved,
+although I wouldn't claim it to be totally bombproof.
+
The file @code{bzlib.h} contains all definitions needed to use
the library. In particular, you should definitely not include
@code{bzlib_private.h}.
@item BZ_RUN_OK
@itemx BZ_FLUSH_OK
@itemx BZ_FINISH_OK
-In @code{bzCompress}, the requested flush/finish/nothing-special action
+In @code{BZ2_bzCompress}, the requested flush/finish/nothing-special action
was completed successfully.
@item BZ_STREAM_END
Compression of data was completed, or the logical stream end was
The following return values indicate an error of some kind.
@table @code
+@item BZ_CONFIG_ERROR
+Indicates that the library has been improperly compiled on your
+platform -- a major configuration error. Specifically, it means
+that @code{sizeof(char)}, @code{sizeof(short)} and @code{sizeof(int)}
+are not 1, 2 and 4 respectively, as they should be. Note that the
+library should still work properly on 64-bit platforms which follow
+the LP64 programming model -- that is, where @code{sizeof(long)}
+and @code{sizeof(void*)} are 8. Under LP64, @code{sizeof(int)} is
+still 4, so @code{libbzip2}, which doesn't use the @code{long} type,
+is OK.
@item BZ_SEQUENCE_ERROR
When using the library, it is important to call the functions in the
correct sequence and with data structures (buffers etc) in the correct
@item BZ_MEM_ERROR
Returned when a request to allocate memory failed. Note that the
quantity of memory needed to decompress a stream cannot be determined
-until the stream's header has been read. So @code{bzDecompress} and
-@code{bzRead} may return @code{BZ_MEM_ERROR} even though some of
+until the stream's header has been read. So @code{BZ2_bzDecompress} and
+@code{BZ2_bzRead} may return @code{BZ_MEM_ERROR} even though some of
the compressed data has been read. The same is not true for
-compression; once @code{bzCompressInit} or @code{bzWriteOpen} have
+compression; once @code{BZ2_bzCompressInit} or @code{BZ2_bzWriteOpen} have
successfully completed, @code{BZ_MEM_ERROR} cannot occur.
@item BZ_DATA_ERROR
Returned when a data integrity error is detected during decompression.
know when the compressed stream does not start with the correct
magic bytes (@code{'B' 'Z' 'h'}).
@item BZ_IO_ERROR
-Returned by @code{bzRead} and @code{bzRead} when there is an error
-reading or writing in the compressed file, and by @code{bzReadOpen}
-and @code{bzWriteOpen} for attempts to use a file for which the
+Returned by @code{BZ2_bzRead} and @code{BZ2_bzWrite} when there is an error
+reading or writing in the compressed file, and by @code{BZ2_bzReadOpen}
+and @code{BZ2_bzWriteOpen} for attempts to use a file for which the
error indicator (viz, @code{ferror(f)}) is set.
On receipt of @code{BZ_IO_ERROR}, the caller should consult
@code{errno} and/or @code{perror} to acquire operating-system
specific information about the problem.
@item BZ_UNEXPECTED_EOF
-Returned by @code{bzRead} when the compressed file finishes
+Returned by @code{BZ2_bzRead} when the compressed file finishes
before the logical end of stream is detected.
@item BZ_OUTBUFF_FULL
-Returned by @code{bzBuffToBuffCompress} and
-@code{bzBuffToBuffDecompress} to indicate that the output data
+Returned by @code{BZ2_bzBuffToBuffCompress} and
+@code{BZ2_bzBuffToBuffDecompress} to indicate that the output data
will not fit into the output buffer provided.
@end table
@section Low-level interface
-@subsection @code{bzCompressInit}
+@subsection @code{BZ2_bzCompressInit}
@example
typedef
struct @{
char *next_in;
unsigned int avail_in;
- unsigned int total_in;
+ unsigned int total_in_lo32;
+ unsigned int total_in_hi32;
char *next_out;
unsigned int avail_out;
- unsigned int total_out;
+ unsigned int total_out_lo32;
+ unsigned int total_out_hi32;
void *state;
@}
bz_stream;
-int bzCompressInit ( bz_stream *strm,
- int blockSize100k,
- int verbosity,
- int workFactor );
+int BZ2_bzCompressInit ( bz_stream *strm,
+ int blockSize100k,
+ int verbosity,
+ int workFactor );
@end example
and the library will then use the standard @code{malloc}/@code{free}
routines.
-Before calling @code{bzCompressInit}, fields @code{bzalloc},
+Before calling @code{BZ2_bzCompressInit}, fields @code{bzalloc},
@code{bzfree} and @code{opaque} should
be filled appropriately, as just described. Upon return, the internal
-state will have been allocated and initialised, and @code{total_in} and
-@code{total_out} will have been set to zero.
-These last two fields are used by the library
+state will have been allocated and initialised, and @code{total_in_lo32},
+@code{total_in_hi32}, @code{total_out_lo32} and
+@code{total_out_hi32} will have been set to zero.
+These four fields are used by the library
to inform the caller of the total amount of data passed into and out of
the library, respectively. You should not try to change them.
+As of version 1.0, 64-bit counts are maintained, even on 32-bit
+platforms, using the @code{_hi32} fields to store the upper 32 bits
+of the count. So, for example, the total amount of data in
+is @code{(total_in_hi32 << 32) + total_in_lo32}.
Parameter @code{blockSize100k} specifies the block size to be used for
compression. It should be a value between 1 and 9 inclusive, and the
Possible return values:
@display
+ @code{BZ_CONFIG_ERROR}
+ if the library has been mis-compiled
@code{BZ_PARAM_ERROR}
if @code{strm} is @code{NULL}
or @code{blockSize} < 1 or @code{blockSize} > 9
@end display
Allowable next actions:
@display
- @code{bzCompress}
+ @code{BZ2_bzCompress}
if @code{BZ_OK} is returned
no specific action needed in case of error
@end display
-@subsection @code{bzCompress}
+@subsection @code{BZ2_bzCompress}
@example
- int bzCompress ( bz_stream *strm, int action );
+ int BZ2_bzCompress ( bz_stream *strm, int action );
@end example
Provides more input and/or output buffer space for the library. The
-caller maintains input and output buffers, and calls @code{bzCompress} to
+caller maintains input and output buffers, and calls @code{BZ2_bzCompress} to
transfer data between them.
-Before each call to @code{bzCompress}, @code{next_in} should point at
+Before each call to @code{BZ2_bzCompress}, @code{next_in} should point at
the data to be compressed, and @code{avail_in} should indicate how many
-bytes the library may read. @code{bzCompress} updates @code{next_in},
+bytes the library may read. @code{BZ2_bzCompress} updates @code{next_in},
@code{avail_in} and @code{total_in} to reflect the number of bytes it
has read.
Similarly, @code{next_out} should point to a buffer in which the
compressed data is to be placed, with @code{avail_out} indicating how
-much output space is available. @code{bzCompress} updates
+much output space is available. @code{BZ2_bzCompress} updates
@code{next_out}, @code{avail_out} and @code{total_out} to reflect the
number of bytes output.
You may provide and remove as little or as much data as you like on each
-call of @code{bzCompress}. In the limit, it is acceptable to supply and
+call of @code{BZ2_bzCompress}. In the limit, it is acceptable to supply and
remove data one byte at a time, although this would be terribly
inefficient. You should always ensure that at least one byte of output
space is available at each call.
-A second purpose of @code{bzCompress} is to request a change of mode of the
+A second purpose of @code{BZ2_bzCompress} is to request a change of mode of the
compressed stream.
Conceptually, a compressed stream can be in one of four states: IDLE,
RUNNING, FLUSHING and FINISHING. Before initialisation
-(@code{bzCompressInit}) and after termination (@code{bzCompressEnd}), a
+(@code{BZ2_bzCompressInit}) and after termination (@code{BZ2_bzCompressEnd}), a
stream is regarded as IDLE.
-Upon initialisation (@code{bzCompressInit}), the stream is placed in the
-RUNNING state. Subsequent calls to @code{bzCompress} should pass
+Upon initialisation (@code{BZ2_bzCompressInit}), the stream is placed in the
+RUNNING state. Subsequent calls to @code{BZ2_bzCompress} should pass
@code{BZ_RUN} as the requested action; other actions are illegal and
will result in @code{BZ_SEQUENCE_ERROR}.
At some point, the calling program will have provided all the input data
it wants to. It will then want to finish up -- in effect, asking the
library to process any data it might have buffered internally. In this
-state, @code{bzCompress} will no longer attempt to read data from
+state, @code{BZ2_bzCompress} will no longer attempt to read data from
@code{next_in}, but it will want to write data to @code{next_out}.
Because the output buffer supplied by the user can be arbitrarily small,
the finishing-up operation cannot necessarily be done with a single call
-of @code{bzCompress}.
+of @code{BZ2_bzCompress}.
Instead, the calling program passes @code{BZ_FINISH} as an action to
-@code{bzCompress}. This changes the stream's state to FINISHING. Any
+@code{BZ2_bzCompress}. This changes the stream's state to FINISHING. Any
remaining input (ie, @code{next_in[0 .. avail_in-1]}) is compressed and
-transferred to the output buffer. To do this, @code{bzCompress} must be
+transferred to the output buffer. To do this, @code{BZ2_bzCompress} must be
called repeatedly until all the output has been consumed. At that
-point, @code{bzCompress} returns @code{BZ_STREAM_END}, and the stream's
-state is set back to IDLE. @code{bzCompressEnd} should then be
+point, @code{BZ2_bzCompress} returns @code{BZ_STREAM_END}, and the stream's
+state is set back to IDLE. @code{BZ2_bzCompressEnd} should then be
called.
Just to make sure the calling program does not cheat, the library makes
a note of @code{avail_in} at the time of the first call to
-@code{bzCompress} which has @code{BZ_FINISH} as an action (ie, at the
+@code{BZ2_bzCompress} which has @code{BZ_FINISH} as an action (ie, at the
time the program has announced its intention to not supply any more
input). By comparing this value with that of @code{avail_in} over
-subsequent calls to @code{bzCompress}, the library can detect any
+subsequent calls to @code{BZ2_bzCompress}, the library can detect any
attempts to slip in more data to compress. Any calls for which this is
detected will return @code{BZ_SEQUENCE_ERROR}. This indicates a
programming mistake which should be corrected.
Instead of asking to finish, the calling program may ask
-@code{bzCompress} to take all the remaining input, compress it and
+@code{BZ2_bzCompress} to take all the remaining input, compress it and
terminate the current (Burrows-Wheeler) compression block. This could
be useful for error control purposes. The mechanism is analogous to
-that for finishing: call @code{bzCompress} with an action of
+that for finishing: call @code{BZ2_bzCompress} with an action of
@code{BZ_FLUSH}, remove output data, and persist with the
@code{BZ_FLUSH} action until the value @code{BZ_RUN} is returned. As
-with finishing, @code{bzCompress} detects any attempt to provide more
+with finishing, @code{BZ2_bzCompress} detects any attempt to provide more
input data once the flush has begun.
Once the flush is complete, the stream returns to the normal RUNNING
will be taken, what the next state is, and what the non-error return
values are. Note that you can't explicitly ask what state the
stream is in, but nor do you need to -- it can be inferred from the
-values returned by @code{bzCompress}.
+values returned by @code{BZ2_bzCompress}.
@display
IDLE/@code{any}
- Illegal. IDLE state only exists after @code{bzCompressEnd} or
- before @code{bzCompressInit}.
+ Illegal. IDLE state only exists after @code{BZ2_bzCompressEnd} or
+ before @code{BZ2_bzCompressInit}.
Return value = @code{BZ_SEQUENCE_ERROR}
RUNNING/@code{BZ_RUN}
That still looks complicated? Well, fair enough. The usual sequence
of calls for compressing a load of data is:
@itemize @bullet
-@item Get started with @code{bzCompressInit}.
+@item Get started with @code{BZ2_bzCompressInit}.
@item Shovel data in and shlurp out its compressed form using zero or more
-calls of @code{bzCompress} with action = @code{BZ_RUN}.
+calls of @code{BZ2_bzCompress} with action = @code{BZ_RUN}.
@item Finish up.
-Repeatedly call @code{bzCompress} with action = @code{BZ_FINISH},
+Repeatedly call @code{BZ2_bzCompress} with action = @code{BZ_FINISH},
copying out the compressed output, until @code{BZ_STREAM_END} is returned.
-@item Close up and go home. Call @code{bzCompressEnd}.
+@item Close up and go home. Call @code{BZ2_bzCompressEnd}.
@end itemize
If the data you want to compress fits into your input buffer all
-at once, you can skip the calls of @code{bzCompress ( ..., BZ_RUN )} and
-just do the @code{bzCompress ( ..., BZ_FINISH )} calls.
+at once, you can skip the calls of @code{BZ2_bzCompress ( ..., BZ_RUN )} and
+just do the @code{BZ2_bzCompress ( ..., BZ_FINISH )} calls.
-All required memory is allocated by @code{bzCompressInit}. The
+All required memory is allocated by @code{BZ2_bzCompressInit}. The
compression library can accept any data at all (obviously). So you
-shouldn't get any error return values from the @code{bzCompress} calls.
+shouldn't get any error return values from the @code{BZ2_bzCompress} calls.
If you do, they will be @code{BZ_SEQUENCE_ERROR}, and indicate a bug in
your programming.
if @code{strm} is @code{NULL}, or @code{strm->s} is @code{NULL}
@end display
-@subsection @code{bzCompressEnd}
+@subsection @code{BZ2_bzCompressEnd}
@example
-int bzCompressEnd ( bz_stream *strm );
+int BZ2_bzCompressEnd ( bz_stream *strm );
@end example
Releases all memory associated with a compression stream.
@end display
-@subsection @code{bzDecompressInit}
+@subsection @code{BZ2_bzDecompressInit}
@example
-int bzDecompressInit ( bz_stream *strm, int verbosity, int small );
+int BZ2_bzDecompressInit ( bz_stream *strm, int verbosity, int small );
@end example
-Prepares for decompression. As with @code{bzCompressInit}, a
+Prepares for decompression. As with @code{BZ2_bzCompressInit}, a
@code{bz_stream} record should be allocated and initialised before the
call. Fields @code{bzalloc}, @code{bzfree} and @code{opaque} should be
set if a custom memory allocator is required, or made @code{NULL} for
state will have been initialised, and @code{total_in} and
@code{total_out} will be zero.
-For the meaning of parameter @code{verbosity}, see @code{bzCompressInit}.
+For the meaning of parameter @code{verbosity}, see @code{BZ2_bzCompressInit}.
If @code{small} is nonzero, the library will use an alternative
decompression algorithm which uses less memory but at the cost of
Note that the amount of memory needed to decompress
a stream cannot be determined until the stream's header has been read,
-so even if @code{bzDecompressInit} succeeds, a subsequent
-@code{bzDecompress} could fail with @code{BZ_MEM_ERROR}.
+so even if @code{BZ2_bzDecompressInit} succeeds, a subsequent
+@code{BZ2_bzDecompress} could fail with @code{BZ_MEM_ERROR}.
Possible return values:
@display
+ @code{BZ_CONFIG_ERROR}
+ if the library has been mis-compiled
@code{BZ_PARAM_ERROR}
if @code{(small != 0 && small != 1)}
or @code{(verbosity < 0 || verbosity > 4)}
Allowable next actions:
@display
- @code{bzDecompress}
+ @code{BZ2_bzDecompress}
if @code{BZ_OK} was returned
no specific action required in case of error
@end display
-@subsection @code{bzDecompress}
+@subsection @code{BZ2_bzDecompress}
@example
-int bzDecompress ( bz_stream *strm );
+int BZ2_bzDecompress ( bz_stream *strm );
@end example
Provides more input and/out output buffer space for the library. The
-caller maintains input and output buffers, and uses @code{bzDecompress}
+caller maintains input and output buffers, and uses @code{BZ2_bzDecompress}
to transfer data between them.
-Before each call to @code{bzDecompress}, @code{next_in}
+Before each call to @code{BZ2_bzDecompress}, @code{next_in}
should point at the compressed data,
and @code{avail_in} should indicate how many bytes the library
-may read. @code{bzDecompress} updates @code{next_in}, @code{avail_in}
+may read. @code{BZ2_bzDecompress} updates @code{next_in}, @code{avail_in}
and @code{total_in}
to reflect the number of bytes it has read.
Similarly, @code{next_out} should point to a buffer in which the uncompressed
output is to be placed, with @code{avail_out} indicating how much output space
-is available. @code{bzCompress} updates @code{next_out},
+is available. @code{BZ2_bzCompress} updates @code{next_out},
@code{avail_out} and @code{total_out} to reflect
the number of bytes output.
You may provide and remove as little or as much data as you like on
-each call of @code{bzDecompress}.
+each call of @code{BZ2_bzDecompress}.
In the limit, it is acceptable to
supply and remove data one byte at a time, although this would be
terribly inefficient. You should always ensure that at least one
byte of output space is available at each call.
-Use of @code{bzDecompress} is simpler than @code{bzCompress}.
+Use of @code{BZ2_bzDecompress} is simpler than @code{BZ2_bzCompress}.
You should provide input and remove output as described above, and
-repeatedly call @code{bzDecompress} until @code{BZ_STREAM_END} is
+repeatedly call @code{BZ2_bzDecompress} until @code{BZ_STREAM_END} is
returned. Appearance of @code{BZ_STREAM_END} denotes that
-@code{bzDecompress} has detected the logical end of the compressed
-stream. @code{bzDecompress} will not produce @code{BZ_STREAM_END} until
+@code{BZ2_bzDecompress} has detected the logical end of the compressed
+stream. @code{BZ2_bzDecompress} will not produce @code{BZ_STREAM_END} until
all output data has been placed into the output buffer, so once
@code{BZ_STREAM_END} appears, you are guaranteed to have available all
-the decompressed output, and @code{bzDecompressEnd} can safely be
+the decompressed output, and @code{BZ2_bzDecompressEnd} can safely be
called.
-If case of an error return value, you should call @code{bzDecompressEnd}
+If case of an error return value, you should call @code{BZ2_bzDecompressEnd}
to clean up and release memory.
Possible return values:
@end display
Allowable next actions:
@display
- @code{bzDecompress}
+ @code{BZ2_bzDecompress}
if @code{BZ_OK} was returned
- @code{bzDecompressEnd}
+ @code{BZ2_bzDecompressEnd}
otherwise
@end display
-@subsection @code{bzDecompressEnd}
+@subsection @code{BZ2_bzDecompressEnd}
@example
-int bzDecompressEnd ( bz_stream *strm );
+int BZ2_bzDecompressEnd ( bz_stream *strm );
@end example
Releases all memory associated with a decompression stream.
given on a per-function basis below.
@item If @code{bzerror} indicates an error
(ie, anything except @code{BZ_OK} and @code{BZ_STREAM_END}),
- you should immediately call @code{bzReadClose} (or @code{bzWriteClose},
+ you should immediately call @code{BZ2_bzReadClose} (or @code{BZ2_bzWriteClose},
depending on whether you are attempting to read or to write)
to free up all resources associated
with the stream. Once an error has been indicated, behaviour of all calls
- except @code{bzReadClose} (@code{bzWriteClose}) is undefined.
+ except @code{BZ2_bzReadClose} (@code{BZ2_bzWriteClose}) is undefined.
The implication is that (1) @code{bzerror} should
be checked after each call, and (2) if @code{bzerror} indicates an error,
- @code{bzReadClose} (@code{bzWriteClose}) should then be called to clean up.
+ @code{BZ2_bzReadClose} (@code{BZ2_bzWriteClose}) should then be called to clean up.
@item The @code{FILE*} arguments passed to
- @code{bzReadOpen}/@code{bzWriteOpen}
+ @code{BZ2_bzReadOpen}/@code{BZ2_bzWriteOpen}
should be set to binary mode.
Most Unix systems will do this by default, but other platforms,
including Windows and Mac, will not. If you omit this, you may
-@subsection @code{bzReadOpen}
+@subsection @code{BZ2_bzReadOpen}
@example
typedef void BZFILE;
- BZFILE *bzReadOpen ( int *bzerror, FILE *f,
- int small, int verbosity,
- void *unused, int nUnused );
+ BZFILE *BZ2_bzReadOpen ( int *bzerror, FILE *f,
+ int small, int verbosity,
+ void *unused, int nUnused );
@end example
Prepare to read compressed data from file handle @code{f}. @code{f}
should refer to a file which has been opened for reading, and for which
the library will try to decompress using less memory, at the expense of
speed.
-For reasons explained below, @code{bzRead} will decompress the
+For reasons explained below, @code{BZ2_bzRead} will decompress the
@code{nUnused} bytes starting at @code{unused}, before starting to read
from the file @code{f}. At most @code{BZ_MAX_UNUSED} bytes may be
supplied like this. If this facility is not required, you should pass
respectively.
For the meaning of parameters @code{small} and @code{verbosity},
-see @code{bzDecompressInit}.
+see @code{BZ2_bzDecompressInit}.
The amount of memory needed to decompress a file cannot be determined
until the file's header has been read. So it is possible that
-@code{bzReadOpen} returns @code{BZ_OK} but a subsequent call of
-@code{bzRead} will return @code{BZ_MEM_ERROR}.
+@code{BZ2_bzReadOpen} returns @code{BZ_OK} but a subsequent call of
+@code{BZ2_bzRead} will return @code{BZ_MEM_ERROR}.
Possible assignments to @code{bzerror}:
@display
+ @code{BZ_CONFIG_ERROR}
+ if the library has been mis-compiled
@code{BZ_PARAM_ERROR}
if @code{f} is @code{NULL}
or @code{small} is neither @code{0} nor @code{1}
Allowable next actions:
@display
- @code{bzRead}
+ @code{BZ2_bzRead}
if @code{bzerror} is @code{BZ_OK}
- @code{bzClose}
+ @code{BZ2_bzClose}
otherwise
@end display
-@subsection @code{bzRead}
+@subsection @code{BZ2_bzRead}
@example
- int bzRead ( int *bzerror, BZFILE *b, void *buf, int len );
+ int BZ2_bzRead ( int *bzerror, BZFILE *b, void *buf, int len );
@end example
Reads up to @code{len} (uncompressed) bytes from the compressed file
@code{b} into
and the number
of bytes read is returned. All other @code{bzerror} values denote an error.
-@code{bzRead} will supply @code{len} bytes,
+@code{BZ2_bzRead} will supply @code{len} bytes,
unless the logical stream end is detected
or an error occurs. Because of this, it is possible to detect the
stream end by observing when the number of bytes returned is
instead check @code{bzerror} after every call and watch out for
@code{BZ_STREAM_END}.
-Internally, @code{bzRead} copies data from the compressed file in chunks
+Internally, @code{BZ2_bzRead} copies data from the compressed file in chunks
of size @code{BZ_MAX_UNUSED} bytes
before decompressing it. If the file contains more bytes than strictly
-needed to reach the logical end-of-stream, @code{bzRead} will almost certainly
+needed to reach the logical end-of-stream, @code{BZ2_bzRead} will almost certainly
read some of the trailing data before signalling @code{BZ_SEQUENCE_END}.
To collect the read but unused data once @code{BZ_SEQUENCE_END} has
-appeared, call @code{bzReadGetUnused} immediately before @code{bzReadClose}.
+appeared, call @code{BZ2_bzReadGetUnused} immediately before @code{BZ2_bzReadClose}.
Possible assignments to @code{bzerror}:
@display
@code{BZ_PARAM_ERROR}
if @code{b} is @code{NULL} or @code{buf} is @code{NULL} or @code{len < 0}
@code{BZ_SEQUENCE_ERROR}
- if @code{b} was opened with @code{bzWriteOpen}
+ if @code{b} was opened with @code{BZ2_bzWriteOpen}
@code{BZ_IO_ERROR}
if there is an error reading from the compressed file
@code{BZ_UNEXPECTED_EOF}
Allowable next actions:
@display
- collect data from @code{buf}, then @code{bzRead} or @code{bzReadClose}
+ collect data from @code{buf}, then @code{BZ2_bzRead} or @code{BZ2_bzReadClose}
if @code{bzerror} is @code{BZ_OK}
- collect data from @code{buf}, then @code{bzReadClose} or @code{bzReadGetUnused}
+ collect data from @code{buf}, then @code{BZ2_bzReadClose} or @code{BZ2_bzReadGetUnused}
if @code{bzerror} is @code{BZ_SEQUENCE_END}
- @code{bzReadClose}
+ @code{BZ2_bzReadClose}
otherwise
@end display
-@subsection @code{bzReadGetUnused}
+@subsection @code{BZ2_bzReadGetUnused}
@example
- void bzReadGetUnused ( int* bzerror, BZFILE *b,
- void** unused, int* nUnused );
+ void BZ2_bzReadGetUnused ( int* bzerror, BZFILE *b,
+ void** unused, int* nUnused );
@end example
Returns data which was read from the compressed file but was not needed
to get to the logical end-of-stream. @code{*unused} is set to the address
of the data, and @code{*nUnused} to the number of bytes. @code{*nUnused} will
be set to a value between @code{0} and @code{BZ_MAX_UNUSED} inclusive.
-This function may only be called once @code{bzRead} has signalled
-@code{BZ_STREAM_END} but before @code{bzReadClose}.
+This function may only be called once @code{BZ2_bzRead} has signalled
+@code{BZ_STREAM_END} but before @code{BZ2_bzReadClose}.
Possible assignments to @code{bzerror}:
@display
or @code{unused} is @code{NULL} or @code{nUnused} is @code{NULL}
@code{BZ_SEQUENCE_ERROR}
if @code{BZ_STREAM_END} has not been signalled
- or if @code{b} was opened with @code{bzWriteOpen}
+ or if @code{b} was opened with @code{BZ2_bzWriteOpen}
@code{BZ_OK}
otherwise
@end display
Allowable next actions:
@display
- @code{bzReadClose}
+ @code{BZ2_bzReadClose}
@end display
-@subsection @code{bzReadClose}
+@subsection @code{BZ2_bzReadClose}
@example
- void bzReadClose ( int *bzerror, BZFILE *b );
+ void BZ2_bzReadClose ( int *bzerror, BZFILE *b );
@end example
Releases all memory pertaining to the compressed file @code{b}.
-@code{bzReadClose} does not call @code{fclose} on the underlying file
+@code{BZ2_bzReadClose} does not call @code{fclose} on the underlying file
handle, so you should do that yourself if appropriate.
-@code{bzReadClose} should be called to clean up after all error
+@code{BZ2_bzReadClose} should be called to clean up after all error
situations.
Possible assignments to @code{bzerror}:
@display
@code{BZ_SEQUENCE_ERROR}
- if @code{b} was opened with @code{bzOpenWrite}
+ if @code{b} was opened with @code{BZ2_bzOpenWrite}
@code{BZ_OK}
otherwise
@end display
-@subsection @code{bzWriteOpen}
+@subsection @code{BZ2_bzWriteOpen}
@example
- BZFILE *bzWriteOpen ( int *bzerror, FILE *f,
- int blockSize100k, int verbosity,
- int workFactor );
+ BZFILE *BZ2_bzWriteOpen ( int *bzerror, FILE *f,
+ int blockSize100k, int verbosity,
+ int workFactor );
@end example
Prepare to write compressed data to file handle @code{f}.
@code{f} should refer to
For the meaning of parameters @code{blockSize100k},
@code{verbosity} and @code{workFactor}, see
-@* @code{bzCompressInit}.
+@* @code{BZ2_bzCompressInit}.
All required memory is allocated at this stage, so if the call
completes successfully, @code{BZ_MEM_ERROR} cannot be signalled by a
-subsequent call to @code{bzWrite}.
+subsequent call to @code{BZ2_bzWrite}.
Possible assignments to @code{bzerror}:
@display
+ @code{BZ_CONFIG_ERROR}
+ if the library has been mis-compiled
@code{BZ_PARAM_ERROR}
if @code{f} is @code{NULL}
or @code{blockSize100k < 1} or @code{blockSize100k > 9}
Allowable next actions:
@display
- @code{bzWrite}
+ @code{BZ2_bzWrite}
if @code{bzerror} is @code{BZ_OK}
- (you could go directly to @code{bzWriteClose}, but this would be pretty pointless)
- @code{bzWriteClose}
+ (you could go directly to @code{BZ2_bzWriteClose}, but this would be pretty pointless)
+ @code{BZ2_bzWriteClose}
otherwise
@end display
-@subsection @code{bzWrite}
+@subsection @code{BZ2_bzWrite}
@example
- void bzWrite ( int *bzerror, BZFILE *b, void *buf, int len );
+ void BZ2_bzWrite ( int *bzerror, BZFILE *b, void *buf, int len );
@end example
Absorbs @code{len} bytes from the buffer @code{buf}, eventually to be
compressed and written to the file.
@code{BZ_PARAM_ERROR}
if @code{b} is @code{NULL} or @code{buf} is @code{NULL} or @code{len < 0}
@code{BZ_SEQUENCE_ERROR}
- if b was opened with @code{bzReadOpen}
+ if b was opened with @code{BZ2_bzReadOpen}
@code{BZ_IO_ERROR}
if there is an error writing the compressed file.
@code{BZ_OK}
-@subsection @code{bzWriteClose}
+@subsection @code{BZ2_bzWriteClose}
@example
- int bzWriteClose ( int *bzerror, BZFILE* f,
- int abandon,
- unsigned int* nbytes_in,
- unsigned int* nbytes_out );
+ void BZ2_bzWriteClose ( int *bzerror, BZFILE* f,
+ int abandon,
+ unsigned int* nbytes_in,
+ unsigned int* nbytes_out );
+
+ void BZ2_bzWriteClose64 ( int *bzerror, BZFILE* f,
+ int abandon,
+ unsigned int* nbytes_in_lo32,
+ unsigned int* nbytes_in_hi32,
+ unsigned int* nbytes_out_lo32,
+ unsigned int* nbytes_out_hi32 );
@end example
Compresses and flushes to the compressed file all data so far supplied
-by @code{bzWrite}. The logical end-of-stream markers are also written, so
-subsequent calls to @code{bzWrite} are illegal. All memory associated
+by @code{BZ2_bzWrite}. The logical end-of-stream markers are also written, so
+subsequent calls to @code{BZ2_bzWrite} are illegal. All memory associated
with the compressed file @code{b} is released.
@code{fflush} is called on the
compressed file, but it is not @code{fclose}'d.
-If @code{bzWriteClose} is called to clean up after an error, the only
+If @code{BZ2_bzWriteClose} is called to clean up after an error, the only
action is to release the memory. The library records the error codes
issued by previous calls, so this situation will be detected
automatically. There is no attempt to complete the compression
If @code{nbytes_in} is non-null, @code{*nbytes_in} will be set to be the
total volume of uncompressed data handled. Similarly, @code{nbytes_out}
-will be set to the total volume of compressed data written.
+will be set to the total volume of compressed data written. For
+compatibility with older versions of the library, @code{BZ2_bzWriteClose}
+only yields the lower 32 bits of these counts. Use
+@code{BZ2_bzWriteClose64} if you want the full 64 bit counts. These
+two functions are otherwise absolutely identical.
+
Possible assignments to @code{bzerror}:
@display
@code{BZ_SEQUENCE_ERROR}
- if @code{b} was opened with @code{bzReadOpen}
+ if @code{b} was opened with @code{BZ2_bzReadOpen}
@code{BZ_IO_ERROR}
if there is an error writing the compressed file
@code{BZ_OK}
compressed data stream, using that same file handle.
@item Reading is more complex, and the facilities are not as general
as they could be since generality is hard to reconcile with efficiency.
-@code{bzRead} reads from the compressed file in blocks of size
+@code{BZ2_bzRead} reads from the compressed file in blocks of size
@code{BZ_MAX_UNUSED} bytes, and in doing so probably will overshoot
the logical end of compressed stream.
To recover this data once decompression has
-ended, call @code{bzReadGetUnused} after the last call of @code{bzRead}
+ended, call @code{BZ2_bzReadGetUnused} after the last call of @code{BZ2_bzRead}
(the one returning @code{BZ_STREAM_END}) but before calling
-@code{bzReadClose}.
+@code{BZ2_bzReadClose}.
@end itemize
This mechanism makes it easy to decompress multiple @code{bzip2}
-streams placed end-to-end. As the end of one stream, when @code{bzRead}
-returns @code{BZ_STREAM_END}, call @code{bzReadGetUnused} to collect the
+streams placed end-to-end. As the end of one stream, when @code{BZ2_bzRead}
+returns @code{BZ_STREAM_END}, call @code{BZ2_bzReadGetUnused} to collect the
unused data (copy it into your own buffer somewhere).
That data forms the start of the next compressed stream.
-To start uncompressing that next stream, call @code{bzReadOpen} again,
+To start uncompressing that next stream, call @code{BZ2_bzReadOpen} again,
feeding in the unused data via the @code{unused}/@code{nUnused}
parameters.
Keep doing this until @code{BZ_STREAM_END} return coincides with the
physical end of file (@code{feof(f)}). In this situation
-@code{bzReadGetUnused}
+@code{BZ2_bzReadGetUnused}
will of course return no data.
This should give some feel for how the high-level interface can be used.
if (!f) @{
/* handle error */
@}
-b = bzWriteOpen ( &bzerror, f, 9 );
+b = BZ2_bzWriteOpen ( &bzerror, f, 9 );
if (bzerror != BZ_OK) @{
- bzWriteClose ( b );
+ BZ2_bzWriteClose ( b );
/* handle error */
@}
while ( /* condition */ ) @{
/* get data to write into buf, and set nBuf appropriately */
- nWritten = bzWrite ( &bzerror, b, buf, nBuf );
+ nWritten = BZ2_bzWrite ( &bzerror, b, buf, nBuf );
if (bzerror == BZ_IO_ERROR) @{
- bzWriteClose ( &bzerror, b );
+ BZ2_bzWriteClose ( &bzerror, b );
/* handle error */
@}
@}
-bzWriteClose ( &bzerror, b );
+BZ2_bzWriteClose ( &bzerror, b );
if (bzerror == BZ_IO_ERROR) @{
/* handle error */
@}
if (!f) @{
/* handle error */
@}
-b = bzReadOpen ( &bzerror, f, 0, NULL, 0 );
+b = BZ2_bzReadOpen ( &bzerror, f, 0, NULL, 0 );
if (bzerror != BZ_OK) @{
- bzReadClose ( &bzerror, b );
+ BZ2_bzReadClose ( &bzerror, b );
/* handle error */
@}
bzerror = BZ_OK;
while (bzerror == BZ_OK && /* arbitrary other conditions */) @{
- nBuf = bzRead ( &bzerror, b, buf, /* size of buf */ );
+ nBuf = BZ2_bzRead ( &bzerror, b, buf, /* size of buf */ );
if (bzerror == BZ_OK) @{
/* do something with buf[0 .. nBuf-1] */
@}
@}
if (bzerror != BZ_STREAM_END) @{
- bzReadClose ( &bzerror, b );
+ BZ2_bzReadClose ( &bzerror, b );
/* handle error */
@} else @{
- bzReadClose ( &bzerror );
+ BZ2_bzReadClose ( &bzerror );
@}
@end example
@section Utility functions
-@subsection @code{bzBuffToBuffCompress}
+@subsection @code{BZ2_bzBuffToBuffCompress}
@example
- int bzBuffToBuffCompress( char* dest,
- unsigned int* destLen,
- char* source,
- unsigned int sourceLen,
- int blockSize100k,
- int verbosity,
- int workFactor );
+ int BZ2_bzBuffToBuffCompress( char* dest,
+ unsigned int* destLen,
+ char* source,
+ unsigned int sourceLen,
+ int blockSize100k,
+ int verbosity,
+ int workFactor );
@end example
Attempts to compress the data in @code{source[0 .. sourceLen-1]}
into the destination buffer, @code{dest[0 .. *destLen-1]}.
mechanism, use the low-level interface.
For the meaning of parameters @code{blockSize100k}, @code{verbosity}
-and @code{workFactor}, @* see @code{bzCompressInit}.
+and @code{workFactor}, @* see @code{BZ2_bzCompressInit}.
To guarantee that the compressed data will fit in its buffer, allocate
an output buffer of size 1% larger than the uncompressed data, plus
six hundred extra bytes.
-@code{bzBuffToBuffDecompress} will not write data at or
+@code{BZ2_bzBuffToBuffDecompress} will not write data at or
beyond @code{dest[*destLen]}, even in case of buffer overflow.
Possible return values:
@display
+ @code{BZ_CONFIG_ERROR}
+ if the library has been mis-compiled
@code{BZ_PARAM_ERROR}
if @code{dest} is @code{NULL} or @code{destLen} is @code{NULL}
or @code{blockSize100k < 1} or @code{blockSize100k > 9}
-@subsection @code{bzBuffToBuffDecompress}
+@subsection @code{BZ2_bzBuffToBuffDecompress}
@example
- int bzBuffToBuffDecompress ( char* dest,
- unsigned int* destLen,
- char* source,
- unsigned int sourceLen,
- int small,
- int verbosity );
+ int BZ2_bzBuffToBuffDecompress ( char* dest,
+ unsigned int* destLen,
+ char* source,
+ unsigned int sourceLen,
+ int small,
+ int verbosity );
@end example
Attempts to decompress the data in @code{source[0 .. sourceLen-1]}
into the destination buffer, @code{dest[0 .. *destLen-1]}.
is unchanged, and @code{BZ_OUTBUFF_FULL} is returned.
@code{source} is assumed to hold a complete @code{bzip2} format
-data stream. @code{bzBuffToBuffDecompress} tries to decompress
+data stream. @* @code{BZ2_bzBuffToBuffDecompress} tries to decompress
the entirety of the stream into the output buffer.
For the meaning of parameters @code{small} and @code{verbosity},
-see @code{bzDecompressInit}.
+see @code{BZ2_bzDecompressInit}.
Because the compression ratio of the compressed data cannot be known in
advance, there is no easy way to guarantee that the output buffer will
record the size of the uncompressed data, but such a mechanism is beyond
the scope of this library.
-@code{bzBuffToBuffDecompress} will not write data at or
+@code{BZ2_bzBuffToBuffDecompress} will not write data at or
beyond @code{dest[*destLen]}, even in case of buffer overflow.
Possible return values:
@display
+ @code{BZ_CONFIG_ERROR}
+ if the library has been mis-compiled
@code{BZ_PARAM_ERROR}
if @code{dest} is @code{NULL} or @code{destLen} is @code{NULL}
or @code{small != 0 && small != 1}
@section @code{zlib} compatibility functions
Yoshioka Tsuneo has contributed some functions to
give better @code{zlib} compatibility. These functions are
-@code{bzopen}, @code{bzread}, @code{bzwrite}, @code{bzflush},
-@code{bzclose},
-@code{bzerror} and @code{bzlibVersion}.
+@code{BZ2_bzopen}, @code{BZ2_bzread}, @code{BZ2_bzwrite}, @code{BZ2_bzflush},
+@code{BZ2_bzclose},
+@code{BZ2_bzerror} and @code{BZ2_bzlibVersion}.
These functions are not (yet) officially part of
the library. If they break, you get to keep all the pieces.
Nevertheless, I think they work ok.
@example
typedef void BZFILE;
-const char * bzlibVersion ( void );
+const char * BZ2_bzlibVersion ( void );
@end example
Returns a string indicating the library version.
@example
-BZFILE * bzopen ( const char *path, const char *mode );
-BZFILE * bzdopen ( int fd, const char *mode );
+BZFILE * BZ2_bzopen ( const char *path, const char *mode );
+BZFILE * BZ2_bzdopen ( int fd, const char *mode );
@end example
Opens a @code{.bz2} file for reading or writing, using either its name
or a pre-existing file descriptor.
Analogous to @code{fopen} and @code{fdopen}.
@example
-int bzread ( BZFILE* b, void* buf, int len );
-int bzwrite ( BZFILE* b, void* buf, int len );
+int BZ2_bzread ( BZFILE* b, void* buf, int len );
+int BZ2_bzwrite ( BZFILE* b, void* buf, int len );
@end example
Reads/writes data from/to a previously opened @code{BZFILE}.
Analogous to @code{fread} and @code{fwrite}.
@example
-int bzflush ( BZFILE* b );
-void bzclose ( BZFILE* b );
+int BZ2_bzflush ( BZFILE* b );
+void BZ2_bzclose ( BZFILE* b );
@end example
-Flushes/closes a @code{BZFILE}. @code{bzflush} doesn't actually do
+Flushes/closes a @code{BZFILE}. @code{BZ2_bzflush} doesn't actually do
anything. Analogous to @code{fflush} and @code{fclose}.
@example
-const char * bzerror ( BZFILE *b, int *errnum )
+const char * BZ2_bzerror ( BZFILE *b, int *errnum )
@end example
Returns a string describing the more recent error status of
@code{b}, and also sets @code{*errnum} to its numerical value.
defined. Doing this gives you a library containing only the following
eight functions:
-@code{bzCompressInit}, @code{bzCompress}, @code{bzCompressEnd} @*
-@code{bzDecompressInit}, @code{bzDecompress}, @code{bzDecompressEnd} @*
-@code{bzBuffToBuffCompress}, @code{bzBuffToBuffDecompress}
+@code{BZ2_bzCompressInit}, @code{BZ2_bzCompress}, @code{BZ2_bzCompressEnd} @*
+@code{BZ2_bzDecompressInit}, @code{BZ2_bzDecompress}, @code{BZ2_bzDecompressEnd} @*
+@code{BZ2_bzBuffToBuffCompress}, @code{BZ2_bzBuffToBuffDecompress}
When compiled like this, all functions will ignore @code{verbosity}
settings.
For a normal compile, an assertion failure yields the message
@example
- bzip2/libbzip2, v0.9.5: internal error number N.
- This is a bug in bzip2/libbzip2, v0.9.5. Please report
- it to me at: jseward@@acm.org. If this happened when
- you were using some program which uses libbzip2 as a
+ bzip2/libbzip2: internal error number N.
+ This is a bug in bzip2/libbzip2, 1.0 of 21-Mar-2000.
+ Please report it to me at: jseward@@acm.org. If this happened
+ when you were using some program which uses libbzip2 as a
component, you should also report this bug to the author(s)
of that program. Please make an effort to report this bug;
timely and accurate bug reports eventually lead to higher
- quality software. Thanks. Julian Seward, 24 May 1999.
+ quality software. Thanks. Julian Seward, 21 March 2000.
@end example
where @code{N} is some error code number. @code{exit(3)}
is then called.
vary.
@section Limitations of the compressed file format
-@code{bzip2-0.9.5} and @code{0.9.0}
+@code{bzip2-1.0}, @code{0.9.5} and @code{0.9.0}
use exactly the same file format as the previous
version, @code{bzip2-0.1}. This decision was made in the interests of
stability. Creating yet another incompatible compressed file format
@section Portability issues
After some consideration, I have decided not to use
-GNU @code{autoconf} to configure 0.9.5.
+GNU @code{autoconf} to configure 0.9.5 or 1.0.
@code{autoconf}, admirable and wonderful though it is,
mainly assists with portability problems between Unix-like
@code{-O2} @code{-fomit-frame-pointer} @code{-fno-strength-reduce}.
You should specifically @emph{not} use @code{-funroll-loops}.
-You may notice that the Makefile runs four tests as part of
+You may notice that the Makefile runs six tests as part of
the build process. If the program passes all of these, it's
a pretty good (but not 100%) indication that the compiler has
done its job correctly.
consider Jean-loup
Gailly's and Mark Adler's work, @code{zlib-1.1.2} and
@code{gzip-1.2.4}. Look for them at
+
@code{http://www.cdrom.com/pub/infozip/zlib} and
@code{http://www.gzip.org} respectively.
@example
http://www.cs.arizona.edu/people/gene/PAPERS/suffix.ps
@end example
-
+Finally, the following paper documents some recent investigations
+I made into the performance of sorting algorithms:
+@example
+Julian Seward:
+ On the Performance of BWT Sorting Algorithms
+ Proceedings of the IEEE Data Compression Conference 2000
+ Snowbird, Utah. 28-30 March 2000.
+@end example
@contents
This file is a part of bzip2 and/or libbzip2, a program and
library for lossless, block-sorting data compression.
- Copyright (C) 1996-1999 Julian R Seward. All rights reserved.
+ Copyright (C) 1996-2000 Julian R Seward. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
Julian Seward, Cambridge, UK.
jseward@acm.org
- bzip2/libbzip2 version 0.9.5 of 24 May 1999
+ bzip2/libbzip2 version 1.0 of 21 March 2000
This program is based on (at least) the work of:
Mike Burrows
/*---------------------------------------------*/
-Int32 rNums[512] = {
+Int32 BZ2_rNums[512] = {
619, 720, 127, 481, 931, 816, 813, 233, 566, 247,
985, 724, 205, 454, 863, 491, 741, 242, 949, 214,
733, 859, 335, 708, 621, 574, 73, 654, 730, 472,
--- /dev/null
+
+/* spew out a thoroughly gigantic file designed so that bzip2
+ can compress it reasonably rapidly. This is to help test
+ support for large files (> 2GB) in a reasonable amount of time.
+ I suggest you use the undocumented --exponential option to
+ bzip2 when compressing the resulting file; this saves a bit of
+ time. Note: *don't* bother with --exponential when compressing
+ Real Files; it'll just waste a lot of CPU time :-)
+ (but is otherwise harmless).
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <stdio.h>
+#include <stdlib.h>
+
+/* The number of megabytes of junk to spew out (roughly) */
+#define MEGABYTES 5000
+
+#define N_BUF 1000000
+char buf[N_BUF];
+
+int main ( int argc, char** argv )
+{
+ int ii, kk, p;
+ srandom(1);
+ setbuffer ( stdout, buf, N_BUF );
+ for (kk = 0; kk < MEGABYTES * 515; kk+=3) {
+ p = 25+random()%50;
+ for (ii = 0; ii < p; ii++)
+ printf ( "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" );
+ for (ii = 0; ii < p-1; ii++)
+ printf ( "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" );
+ for (ii = 0; ii < p+1; ii++)
+ printf ( "ccccccccccccccccccccccccccccccccccccc" );
+ }
+ fflush(stdout);
+ return 0;
+}
--- /dev/null
+
+/* A test program written to test robustness to decompression of
+ corrupted data. Usage is
+ unzcrash filename
+ and the program will read the specified file, compress it (in memory),
+ and then repeatedly decompress it, each time with a different bit of
+ the compressed data inverted, so as to test all possible one-bit errors.
+ This should not cause any invalid memory accesses. If it does,
+ I want to know about it!
+
+ p.s. As you can see from the above description, the process is
+ incredibly slow. A file of size eg 5KB will cause it to run for
+ many hours.
+*/
+
+#include <stdio.h>
+#include <assert.h>
+#include "bzlib.h"
+
+#define M_BLOCK 1000000
+
+typedef unsigned char uchar;
+
+#define M_BLOCK_OUT (M_BLOCK + 1000000)
+uchar inbuf[M_BLOCK];
+uchar outbuf[M_BLOCK_OUT];
+uchar zbuf[M_BLOCK + 600 + (M_BLOCK / 100)];
+
+int nIn, nOut, nZ;
+
+static char *bzerrorstrings[] = {
+ "OK"
+ ,"SEQUENCE_ERROR"
+ ,"PARAM_ERROR"
+ ,"MEM_ERROR"
+ ,"DATA_ERROR"
+ ,"DATA_ERROR_MAGIC"
+ ,"IO_ERROR"
+ ,"UNEXPECTED_EOF"
+ ,"OUTBUFF_FULL"
+ ,"???" /* for future */
+ ,"???" /* for future */
+ ,"???" /* for future */
+ ,"???" /* for future */
+ ,"???" /* for future */
+ ,"???" /* for future */
+};
+
+void flip_bit ( int bit )
+{
+ int byteno = bit / 8;
+ int bitno = bit % 8;
+ uchar mask = 1 << bitno;
+ //fprintf ( stderr, "(byte %d bit %d mask %d)",
+ // byteno, bitno, (int)mask );
+ zbuf[byteno] ^= mask;
+}
+
+int main ( int argc, char** argv )
+{
+ FILE* f;
+ int r;
+ int bit;
+ int i;
+
+ if (argc != 2) {
+ fprintf ( stderr, "usage: unzcrash filename\n" );
+ return 1;
+ }
+
+ f = fopen ( argv[1], "r" );
+ if (!f) {
+ fprintf ( stderr, "unzcrash: can't open %s\n", argv[1] );
+ return 1;
+ }
+
+ nIn = fread ( inbuf, 1, M_BLOCK, f );
+ fprintf ( stderr, "%d bytes read\n", nIn );
+
+ nZ = M_BLOCK;
+ r = BZ2_bzBuffToBuffCompress (
+ zbuf, &nZ, inbuf, nIn, 9, 0, 30 );
+
+ assert (r == BZ_OK);
+ fprintf ( stderr, "%d after compression\n", nZ );
+
+ for (bit = 0; bit < nZ*8; bit++) {
+ fprintf ( stderr, "bit %d ", bit );
+ flip_bit ( bit );
+ nOut = M_BLOCK_OUT;
+ r = BZ2_bzBuffToBuffDecompress (
+ outbuf, &nOut, zbuf, nZ, 0, 0 );
+ fprintf ( stderr, " %d %s ", r, bzerrorstrings[-r] );
+
+ if (r != BZ_OK) {
+ fprintf ( stderr, "\n" );
+ } else {
+ if (nOut != nIn) {
+ fprintf(stderr, "nIn/nOut mismatch %d %d\n", nIn, nOut );
+ return 1;
+ } else {
+ for (i = 0; i < nOut; i++)
+ if (inbuf[i] != outbuf[i]) {
+ fprintf(stderr, "mismatch at %d\n", i );
+ return 1;
+ }
+ if (i == nOut) fprintf(stderr, "really ok!\n" );
+ }
+ }
+
+ flip_bit ( bit );
+ }
+
+#if 0
+ assert (nOut == nIn);
+ for (i = 0; i < nOut; i++) {
+ if (inbuf[i] != outbuf[i]) {
+ fprintf ( stderr, "difference at %d !\n", i );
+ return 1;
+ }
+ }
+#endif
+
+ fprintf ( stderr, "all ok\n" );
+ return 0;
+}
--- /dev/null
+
+If compilation produces errors, or a large number of warnings,
+please read README.COMPILATION.PROBLEMS -- you might be able to
+adjust the flags in this Makefile to improve matters.
+