1 files changed, 3682 insertions, 0 deletions
diff --git a/pigz/pigz.c b/pigz/pigz.c
new file mode 100644
index 000000000..5416bc97d
--- /dev/null
+++ b/pigz/pigz.c
@@ -0,0 +1,3682 @@
+/* pigz.c -- parallel implementation of gzip
+ * Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012 Mark Adler
+ * Version 2.2.5  28 Jul 2012  Mark Adler
+ */
+
+/*
+  This software is provided 'as-is', without any express or implied
+  warranty.  In no event will the author be held liable for any damages
+  arising from the use of this software.
+
+  Permission is granted to anyone to use this software for any purpose,
+  including commercial applications, and to alter it and redistribute it
+  freely, subject to the following restrictions:
+
+  1. The origin of this software must not be misrepresented; you must not
+     claim that you wrote the original software. If you use this software
+     in a product, an acknowledgment in the product documentation would be
+     appreciated but is not required.
+  2. Altered source versions must be plainly marked as such, and must not be
+     misrepresented as being the original software.
+  3. This notice may not be removed or altered from any source distribution.
+
+  Mark Adler
+  madler@alumni.caltech.edu
+
+  Mark accepts donations for providing this software.  Donations are not
+  required or expected.  Any amount that you feel is appropriate would be
+  appreciated.  You can use this link:
+
+  https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=536055
+
+ */
+
+/* Version history:
+   1.0    17 Jan 2007  First version, pipe only
+   1.1    28 Jan 2007  Avoid void * arithmetic (some compilers don't get that)
+                       Add note about requiring zlib 1.2.3
+                       Allow compression level 0 (no compression)
+                       Completely rewrite parallelism -- add a write thread
+                       Use deflateSetDictionary() to make use of history
+                       Tune argument defaults to best performance on four cores
+   1.2.1   1 Feb 2007  Add long command line options, add all gzip options
+                       Add debugging options
+   1.2.2  19 Feb 2007  Add list (--list) function
+                       Process file names on command line, write .gz output
+                       Write name and time in gzip header, set output file time
+                       Implement all command line options except --recursive
+                       Add --keep option to prevent deleting input files
+                       Add thread tracing information with -vv used
+                       Copy crc32_combine() from zlib (shared libraries issue)
+   1.3    25 Feb 2007  Implement --recursive
+                       Expand help to show all options
+                       Show help if no arguments or output piping are provided
+                       Process options in GZIP environment variable
+                       Add progress indicator to write thread if --verbose
+   1.4     4 Mar 2007  Add --independent to facilitate damaged file recovery
+                       Reallocate jobs for new --blocksize or --processes
+                       Do not delete original if writing to stdout
+                       Allow --processes 1, which does no threading
+                       Add NOTHREAD define to compile without threads
+                       Incorporate license text from zlib in source code
+   1.5    25 Mar 2007  Reinitialize jobs for new compression level
+                       Copy attributes and owner from input file to output file
+                       Add decompression and testing
+                       Add -lt (or -ltv) to show all entries and proper lengths
+                       Add decompression, testing, listing of LZW (.Z) files
+                       Only generate and show trace log if DEBUG defined
+                       Take "-" argument to mean read file from stdin
+   1.6    30 Mar 2007  Add zlib stream compression (--zlib), and decompression
+   1.7    29 Apr 2007  Decompress first entry of a zip file (if deflated)
+                       Avoid empty deflate blocks at end of deflate stream
+                       Show zlib check value (Adler-32) when listing
+                       Don't complain when decompressing empty file
+                       Warn about trailing junk for gzip and zlib streams
+                       Make listings consistent, ignore gzip extra flags
+                       Add zip stream compression (--zip)
+   1.8    13 May 2007  Document --zip option in help output
+   2.0    19 Oct 2008  Complete rewrite of thread usage and synchronization
+                       Use polling threads and a pool of memory buffers
+                       Remove direct pthread library use, hide in yarn.c
+   2.0.1  20 Oct 2008  Check version of zlib at compile time, need >= 1.2.3
+   2.1    24 Oct 2008  Decompress with read, write, inflate, and check threads
+                       Remove spurious use of ctime_r(), ctime() more portable
+                       Change application of job->calc lock to be a semaphore
+                       Detect size of off_t at run time to select %lu vs. %llu
+                       #define large file support macro even if not __linux__
+                       Remove _LARGEFILE64_SOURCE, _FILE_OFFSET_BITS is enough
+                       Detect file-too-large error and report, blame build
+                       Replace check combination routines with those from zlib
+   2.1.1  28 Oct 2008  Fix a leak for files with an integer number of blocks
+                       Update for yarn 1.1 (yarn_prefix and yarn_abort)
+   2.1.2  30 Oct 2008  Work around use of beta zlib in production systems
+   2.1.3   8 Nov 2008  Don't use zlib combination routines, put back in pigz
+   2.1.4   9 Nov 2008  Fix bug when decompressing very short files
+   2.1.5  20 Jul 2009  Added 2008, 2009 to --license statement
+                       Allow numeric parameter immediately after -p or -b
+                       Enforce parameter after -p, -b, -s, before other options
+                       Enforce numeric parameters to have only numeric digits
+                       Try to determine the number of processors for -p default
+                       Fix --suffix short option to be -S to match gzip [Bloch]
+                       Decompress if executable named "unpigz" [Amundsen]
+                       Add a little bit of testing to Makefile
+   2.1.6  17 Jan 2010  Added pigz.spec to distribution for RPM systems [Brown]
+                       Avoid some compiler warnings
+                       Process symbolic links if piping to stdout [Hoffstätte]
+                       Decompress if executable named "gunzip" [Hoffstätte]
+                       Allow ".tgz" suffix [Chernookiy]
+                       Fix adler32 comparison on .zz files
+   2.1.7  17 Dec 2011  Avoid unused parameter warning in reenter()
+                       Don't assume 2's complement ints in compress_thread()
+                       Replicate gzip -cdf cat-like behavior
+                       Replicate gzip -- option to suppress option decoding
+                       Test output from make test instead of showing it
+                       Updated pigz.spec to install unpigz, pigz.1 [Obermaier]
+                       Add PIGZ environment variable [Mueller]
+                       Replicate gzip suffix search when decoding or listing
+                       Fix bug in load() to set in_left to zero on end of file
+                       Do not check suffix when input file won't be modified
+                       Decompress to stdout if name is "*cat" [Hayasaka]
+                       Write data descriptor signature to be like Info-ZIP
+                       Update and sort options list in help
+                       Use CC variable for compiler in Makefile
+                       Exit with code 2 if a warning has been issued
+                       Fix thread synchronization problem when tracing
+                       Change macro name MAX to MAX2 to avoid library conflicts
+                       Determine number of processors on HP-UX [Lloyd]
+   2.2    31 Dec 2011  Check for expansion bound busting (e.g. modified zlib)
+                       Make the "threads" list head global variable volatile
+                       Fix construction and printing of 32-bit check values
+                       Add --rsyncable functionality
+   2.2.1   1 Jan 2012  Fix bug in --rsyncable buffer management
+   2.2.2   1 Jan 2012  Fix another bug in --rsyncable buffer management
+   2.2.3  15 Jan 2012  Remove volatile in yarn.c
+                       Reduce the number of input buffers
+                       Change initial rsyncable hash to comparison value
+                       Improve the efficiency of arriving at a byte boundary
+                       Add thread portability #defines from yarn.c
+                       Have rsyncable compression be independent of threading
+                       Fix bug where constructed dictionaries not being used
+   2.2.4  11 Mar 2012  Avoid some return value warnings
+                       Improve the portability of printing the off_t type
+                       Check for existence of compress binary before using
+                       Update zlib version checking to 1.2.6 for new functions
+                       Fix bug in zip (-K) output
+                       Fix license in pigz.spec
+                       Remove thread portability #defines in pigz.c
+   2.2.5  28 Jul 2012  Avoid race condition in free_pool()
+                       Change suffix to .tar when decompressing or listing .tgz
+                       Print name of executable in error messages
+                       Show help properly when the name is unpigz or gunzip
+                       Fix permissions security problem before output is closed
+ */
+
+#define VERSION "pigz 2.2.5\n"
+
+/* To-do:
+    - make source portable for Windows, VMS, etc. (see gzip source code)
+    - make build portable (currently good for Unixish)
+ */
+
+/*
+   pigz compresses using threads to make use of multiple processors and cores.
+   The input is broken up into 128 KB chunks with each compressed in parallel.
+   The individual check value for each chunk is also calculated in parallel.
+   The compressed data is written in order to the output, and a combined check
+   value is calculated from the individual check values.
+
+   The compressed data format generated is in the gzip, zlib, or single-entry
+   zip format using the deflate compression method.  The compression produces
+   partial raw deflate streams which are concatenated by a single write thread
+   and wrapped with the appropriate header and trailer, where the trailer
+   contains the combined check value.
+
+   Each partial raw deflate stream is terminated by an empty stored block
+   (using the Z_SYNC_FLUSH option of zlib), in order to end that partial bit
+   stream at a byte boundary, unless that partial stream happens to already end
+   at a byte boundary (the latter requires zlib 1.2.6 or later).  Ending on a
+   byte boundary allows the partial streams to be concatenated simply as
+   sequences of bytes.  This adds a very small four to five byte overhead
+   (average 3.75 bytes) to the output for each input chunk.
+
+   The default input block size is 128K, but can be changed with the -b option.
+   The number of compress threads is set by default to 8, which can be changed
+   using the -p option.  Specifying -p 1 avoids the use of threads entirely.
+   pigz will try to determine the number of processors in the machine, in which
+   case if that number is two or greater, pigz will use that as the default for
+   -p instead of 8.
+
+   The input blocks, while compressed independently, have the last 32K of the
+   previous block loaded as a preset dictionary to preserve the compression
+   effectiveness of deflating in a single thread.  This can be turned off using
+   the --independent or -i option, so that the blocks can be decompressed
+   independently for partial error recovery or for random access.
+
+   Decompression can't be parallelized, at least not without specially prepared
+   deflate streams for that purpose.  As a result, pigz uses a single thread
+   (the main thread) for decompression, but will create three other threads for
+   reading, writing, and check calculation, which can speed up decompression
+   under some circumstances.  Parallel decompression can be turned off by
+   specifying one process (-dp 1 or -tp 1).
+
+   pigz requires zlib 1.2.1 or later to allow setting the dictionary when doing
+   raw deflate.  Since zlib 1.2.3 corrects security vulnerabilities in zlib
+   version 1.2.1 and 1.2.2, conditionals check for zlib 1.2.3 or later during
+   the compilation of pigz.c.  zlib 1.2.4 includes some improvements to
+   Z_FULL_FLUSH and deflateSetDictionary() that permit identical output for
+   pigz with and without threads, which is not possible with zlib 1.2.3.  This
+   may be important for uses of pigz -R where small changes in the contents
+   should result in small changes in the archive for rsync.  Note that due to
+   the details of how the lower levels of compression result in greater speed,
+   compression level 3 and below does not permit identical pigz output with
+   and without threads.
+
+   pigz uses the POSIX pthread library for thread control and communication,
+   through the yarn.h interface to yarn.c.  yarn.c can be replaced with
+   equivalent implementations using other thread libraries.  pigz can be
+   compiled with NOTHREAD #defined to not use threads at all (in which case
+   pigz will not be able to live up to the "parallel" in its name).
+ */
+
+/*
+   Details of parallel compression implementation:
+
+   When doing parallel compression, pigz uses the main thread to read the input
+   in 'size' sized chunks (see -b), and puts those in a compression job list,
+   each with a sequence number to keep track of the ordering.  If it is not the
+   first chunk, then that job also points to the previous input buffer, from
+   which the last 32K will be used as a dictionary (unless -i is specified).
+   This sets a lower limit of 32K on 'size'.
+
+   pigz launches up to 'procs' compression threads (see -p).  Each compression
+   thread continues to look for jobs in the compression list and perform those
+   jobs until instructed to return.  When a job is pulled, the dictionary, if
+   provided, will be loaded into the deflate engine and then that input buffer
+   is dropped for reuse.  Then the input data is compressed into an output
+   buffer that grows in size if necessary to hold the compressed data. The job
+   is then put into the write job list, sorted by the sequence number. The
+   compress thread however continues to calculate the check value on the input
+   data, either a CRC-32 or Adler-32, possibly in parallel with the write
+   thread writing the output data.  Once that's done, the compress thread drops
+   the input buffer and also releases the lock on the check value so that the
+   write thread can combine it with the previous check values.  The compress
+   thread has then completed that job, and goes to look for another.
+
+   All of the compress threads are left running and waiting even after the last
+   chunk is processed, so that they can support the next input to be compressed
+   (more than one input file on the command line).  Once pigz is done, it will
+   call all the compress threads home (that'll do pig, that'll do).
+
+   Before starting to read the input, the main thread launches the write thread
+   so that it is ready pick up jobs immediately.  The compress thread puts the
+   write jobs in the list in sequence sorted order, so that the first job in
+   the list is always has the lowest sequence number.  The write thread waits
+   for the next write job in sequence, and then gets that job.  The job still
+   holds its input buffer, from which the write thread gets the input buffer
+   length for use in check value combination.  Then the write thread drops that
+   input buffer to allow its reuse.  Holding on to the input buffer until the
+   write thread starts also has the benefit that the read and compress threads
+   can't get way ahead of the write thread and build up a large backlog of
+   unwritten compressed data.  The write thread will write the compressed data,
+   drop the output buffer, and then wait for the check value to be unlocked
+   by the compress thread.  Then the write thread combines the check value for
+   this chunk with the total check value for eventual use in the trailer.  If
+   this is not the last chunk, the write thread then goes back to look for the
+   next output chunk in sequence.  After the last chunk, the write thread
+   returns and joins the main thread.  Unlike the compress threads, a new write
+   thread is launched for each input stream.  The write thread writes the
+   appropriate header and trailer around the compressed data.
+
+   The input and output buffers are reused through their collection in pools.
+   Each buffer has a use count, which when decremented to zero returns the
+   buffer to the respective pool.  Each input buffer has up to three parallel
+   uses: as the input for compression, as the data for the check value
+   calculation, and as a dictionary for compression.  Each output buffer has
+   only one use, which is as the output of compression followed serially as
+   data to be written.  The input pool is limited in the number of buffers, so
+   that reading does not get way ahead of compression and eat up memory with
+   more input than can be used.  The limit is approximately two times the
+   number of compression threads.  In the case that reading is fast as compared
+   to compression, that number allows a second set of buffers to be read while
+   the first set of compressions are being performed.  The number of output
+   buffers is not directly limited, but is indirectly limited by the release of
+   input buffers to about the same number.
+ */
+
+/* use large file functions if available */
+#define _FILE_OFFSET_BITS 64
+
+/* included headers and what is expected from each */
+#include <stdio.h>      /* fflush(), fprintf(), fputs(), getchar(), putc(), */
+                        /* puts(), printf(), vasprintf(), stderr, EOF, NULL,
+                           SEEK_END, size_t, off_t */
+#include <stdlib.h>     /* exit(), malloc(), free(), realloc(), atol(), */
+                        /* atoi(), getenv() */
+#include <stdarg.h>     /* va_start(), va_end(), va_list */
+#include <string.h>     /* memset(), memchr(), memcpy(), strcmp(), strcpy() */
+                        /* strncpy(), strlen(), strcat(), strrchr() */
+#include <errno.h>      /* errno, EEXIST */
+#include <assert.h>     /* assert() */
+#include <time.h>       /* ctime(), time(), time_t, mktime() */
+#include <signal.h>     /* signal(), SIGINT */
+#include <sys/types.h>  /* ssize_t */
+#include <sys/stat.h>   /* chmod(), stat(), fstat(), lstat(), struct stat, */
+                        /* S_IFDIR, S_IFLNK, S_IFMT, S_IFREG */
+#include <sys/time.h>   /* utimes(), gettimeofday(), struct timeval */
+#include <unistd.h>     /* unlink(), _exit(), read(), write(), close(), */
+                        /* lseek(), isatty(), chown() */
+#include <fcntl.h>      /* open(), O_CREAT, O_EXCL, O_RDONLY, O_TRUNC, */
+                        /* O_WRONLY */
+#include <dirent.h>     /* opendir(), readdir(), closedir(), DIR, */
+                        /* struct dirent */
+#include <limits.h>     /* PATH_MAX, UINT_MAX */
+#if __STDC_VERSION__-0 >= 199901L || __GNUC__-0 >= 3
+#  include <inttypes.h> /* intmax_t */
+#endif
+
+#ifdef __hpux
+#  include <sys/param.h>
+#  include <sys/pstat.h>
+#endif
+
+#include "zlib.h"       /* deflateInit2(), deflateReset(), deflate(), */
+                        /* deflateEnd(), deflateSetDictionary(), crc32(),
+                           inflateBackInit(), inflateBack(), inflateBackEnd(),
+                           Z_DEFAULT_COMPRESSION, Z_DEFAULT_STRATEGY,
+                           Z_DEFLATED, Z_NO_FLUSH, Z_NULL, Z_OK,
+                           Z_SYNC_FLUSH, z_stream */
+#if !defined(ZLIB_VERNUM) || ZLIB_VERNUM < 0x1230
+#  error Need zlib version 1.2.3 or later
+#endif
+
+#ifndef NOTHREAD
+#  include "yarn.h"     /* thread, launch(), join(), join_all(), */
+                        /* lock, new_lock(), possess(), twist(), wait_for(),
+                           release(), peek_lock(), free_lock(), yarn_name */
+#endif
+
+/* for local functions and globals */
+#define local static
+
+/* prevent end-of-line conversions on MSDOSish operating systems */
+#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__)
+#  include <io.h>       /* setmode(), O_BINARY */
+#  define SET_BINARY_MODE(fd) setmode(fd, O_BINARY)
+#else
+#  define SET_BINARY_MODE(fd)
+#endif
+
+/* release an allocated pointer, if allocated, and mark as unallocated */
+#define RELEASE(ptr) \
+    do { \
+        if ((ptr) != NULL) { \
+            free(ptr); \
+            ptr = NULL; \
+        } \
+    } while (0)
+
+/* sliding dictionary size for deflate */
+#define DICT 32768U
+
+/* largest power of 2 that fits in an unsigned int -- used to limit requests
+   to zlib functions that use unsigned int lengths */
+#define MAXP2 (UINT_MAX - (UINT_MAX >> 1))
+
+/* rsyncable constants -- RSYNCBITS is the number of bits in the mask for
+   comparison.  For random input data, there will be a hit on average every
+   1<<RSYNCBITS bytes.  So for an RSYNCBITS of 12, there will be an average of
+   one hit every 4096 bytes, resulting in a mean block size of 4096.  RSYNCMASK
+   is the resulting bit mask.  RSYNCHIT is what the hash value is compared to
+   after applying the mask.
+
+   The choice of 12 for RSYNCBITS is consistent with the original rsyncable
+   patch for gzip which also uses a 12-bit mask.  This results in a relatively
+   small hit to compression, on the order of 1.5% to 3%.  A mask of 13 bits can
+   be used instead if a hit of less than 1% to the compression is desired, at
+   the expense of more blocks transmitted for rsync updates.  (Your mileage may
+   vary.)
+
+   This implementation of rsyncable uses a different hash algorithm than what
+   the gzip rsyncable patch uses in order to provide better performance in
+   several regards.  The algorithm is simply to shift the hash value left one
+   bit and exclusive-or that with the next byte.  This is masked to the number
+   of hash bits (RSYNCMASK) and compared to all ones except for a zero in the
+   top bit (RSYNCHIT). This rolling hash has a very small window of 19 bytes
+   (RSYNCBITS+7).  The small window provides the benefit of much more rapid
+   resynchronization after a change, than does the 4096-byte window of the gzip
+   rsyncable patch.
+
+   The comparison value is chosen to avoid matching any repeated bytes or short
+   sequences.  The gzip rsyncable patch on the other hand uses a sum and zero
+   for comparison, which results in certain bad behaviors, such as always
+   matching everywhere in a long sequence of zeros.  Such sequences occur
+   frequently in tar files.
+
+   This hash efficiently discards history older than 19 bytes simply by
+   shifting that data past the top of the mask -- no history needs to be
+   retained to undo its impact on the hash value, as is needed for a sum.
+
+   The choice of the comparison value (RSYNCHIT) has the virtue of avoiding
+   extremely short blocks.  The shortest block is five bytes (RSYNCBITS-7) from
+   hit to hit, and is unlikely.  Whereas with the gzip rsyncable algorithm,
+   blocks of one byte are not only possible, but in fact are the most likely
+   block size.
+
+   Thanks and acknowledgement to Kevin Day for his experimentation and insights
+   on rsyncable hash characteristics that led to some of the choices here.
+ */
+#define RSYNCBITS 12
+#define RSYNCMASK ((1U << RSYNCBITS) - 1)
+#define RSYNCHIT (RSYNCMASK >> 1)
+
+/* initial pool counts and sizes -- INBUFS is the limit on the number of input
+   spaces as a function of the number of processors (used to throttle the
+   creation of compression jobs), OUTPOOL is the initial size of the output
+   data buffer, chosen to make resizing of the buffer very unlikely */
+#define INBUFS(p) (((p)<<1)+3)
+#define OUTPOOL(s) ((s)+((s)>>4))
+
+/* globals (modified by main thread only when it's the only thread) */
+local char *prog;           /* name by which pigz was invoked */
+local int ind;              /* input file descriptor */
+local int outd;             /* output file descriptor */
+local char in[PATH_MAX+1];  /* input file name (accommodate recursion) */
+local char *out = NULL;     /* output file name (allocated if not NULL) */
+local int verbosity;        /* 0 = quiet, 1 = normal, 2 = verbose, 3 = trace */
+local int headis;           /* 1 to store name, 2 to store date, 3 both */
+local int pipeout;          /* write output to stdout even if file */
+local int keep;             /* true to prevent deletion of input file */
+local int force;            /* true to overwrite, compress links, cat */
+local int form;             /* gzip = 0, zlib = 1, zip = 2 or 3 */
+local unsigned char magic1; /* first byte of possible header when decoding */
+local int recurse;          /* true to dive down into directory structure */
+local char *sufx;           /* suffix to use (".gz" or user supplied) */
+local char *name;           /* name for gzip header */
+local time_t mtime;         /* time stamp from input file for gzip header */
+local int list;             /* true to list files instead of compress */
+local int first = 1;        /* true if we need to print listing header */
+local int decode;           /* 0 to compress, 1 to decompress, 2 to test */
+local int level;            /* compression level */
+local int rsync;            /* true for rsync blocking */
+local int procs;            /* maximum number of compression threads (>= 1) */
+local int setdict;          /* true to initialize dictionary in each thread */
+local size_t size;          /* uncompressed input size per thread (>= 32K) */
+local int warned = 0;       /* true if a warning has been given */
+
+/* saved gzip/zip header data for decompression, testing, and listing */
+local time_t stamp;                 /* time stamp from gzip header */
+local char *hname = NULL;           /* name from header (allocated) */
+local unsigned long zip_crc;        /* local header crc */
+local unsigned long zip_clen;       /* local header compressed length */
+local unsigned long zip_ulen;       /* local header uncompressed length */
+
+/* display a complaint with the program name on stderr */
+local int complain(char *fmt, ...)
+{
+    va_list ap;
+
+    if (verbosity > 0) {
+        fprintf(stderr, "%s: ", prog);
+        va_start(ap, fmt);
+        vfprintf(stderr, fmt, ap);
+        va_end(ap);
+        putc('\n', stderr);
+        fflush(stderr);
+        warned = 1;
+    }
+    return 0;
+}
+
+/* exit with error, delete output file if in the middle of writing it */
+local int bail(char *why, char *what)
+{
+    if (outd != -1 && out != NULL)
+        unlink(out);
+    complain("abort: %s%s", why, what);
+    exit(1);
+    return 0;
+}
+
+#ifdef DEBUG
+
+/* starting time of day for tracing */
+local struct timeval start;
+
+/* trace log */
+local struct log {
+    struct timeval when;    /* time of entry */
+    char *msg;              /* message */
+    struct log *next;       /* next entry */
+} *log_head, **log_tail = NULL;
+#ifndef NOTHREAD
+  local lock *log_lock = NULL;
+#endif
+
+/* maximum log entry length */
+#define MAXMSG 256
+
+/* set up log (call from main thread before other threads launched) */
+local void log_init(void)
+{
+    if (log_tail == NULL) {
+#ifndef NOTHREAD
+        log_lock = new_lock(0);
+#endif
+        log_head = NULL;
+        log_tail = &log_head;
+    }
+}
+
+/* add entry to trace log */
+local void log_add(char *fmt, ...)
+{
+    struct timeval now;
+    struct log *me;
+    va_list ap;
+    char msg[MAXMSG];
+
+    gettimeofday(&now, NULL);
+    me = malloc(sizeof(struct log));
+    if (me == NULL)
+        bail("not enough memory", "");
+    me->when = now;
+    va_start(ap, fmt);
+    vsnprintf(msg, MAXMSG, fmt, ap);
+    va_end(ap);
+    me->msg = malloc(strlen(msg) + 1);
+    if (me->msg == NULL) {
+        free(me);
+        bail("not enough memory", "");
+    }
+    strcpy(me->msg, msg);
+    me->next = NULL;
+#ifndef NOTHREAD
+    assert(log_lock != NULL);
+    possess(log_lock);
+#endif
+    *log_tail = me;
+    log_tail = &(me->next);
+#ifndef NOTHREAD
+    twist(log_lock, BY, +1);
+#endif
+}
+
+/* pull entry from trace log and print it, return false if empty */
+local int log_show(void)
+{
+    struct log *me;
+    struct timeval diff;
+
+    if (log_tail == NULL)
+        return 0;
+#ifndef NOTHREAD
+    possess(log_lock);
+#endif
+    me = log_head;
+    if (me == NULL) {
+#ifndef NOTHREAD
+        release(log_lock);
+#endif
+        return 0;
+    }
+    log_head = me->next;
+    if (me->next == NULL)
+        log_tail = &log_head;
+#ifndef NOTHREAD
+    twist(log_lock, BY, -1);
+#endif
+    diff.tv_usec = me->when.tv_usec - start.tv_usec;
+    diff.tv_sec = me->when.tv_sec - start.tv_sec;
+    if (diff.tv_usec < 0) {
+        diff.tv_usec += 1000000L;
+        diff.tv_sec--;
+    }
+    fprintf(stderr, "trace %ld.%06ld %s\n",
+            (long)diff.tv_sec, (long)diff.tv_usec, me->msg);
+    fflush(stderr);
+    free(me->msg);
+    free(me);
+    return 1;
+}
+
+/* release log resources (need to do log_init() to use again) */
+local void log_free(void)
+{
+    struct log *me;
+
+    if (log_tail != NULL) {
+#ifndef NOTHREAD
+        possess(log_lock);
+#endif
+        while ((me = log_head) != NULL) {
+            log_head = me->next;
+            free(me->msg);
+            free(me);
+        }
+#ifndef NOTHREAD
+        twist(log_lock, TO, 0);
+        free_lock(log_lock);
+        log_lock = NULL;
+#endif
+        log_tail = NULL;
+    }
+}
+
+/* show entries until no more, free log */
+local void log_dump(void)
+{
+    if (log_tail == NULL)
+        return;
+    while (log_show())
+        ;
+    log_free();
+}
+
+/* debugging macro */
+#define Trace(x) \
+    do { \
+        if (verbosity > 2) { \
+            log_add x; \
+        } \
+    } while (0)
+
+#else /* !DEBUG */
+
+#define log_dump()
+#define Trace(x)
+
+#endif
+
+/* read up to len bytes into buf, repeating read() calls as needed */
+local size_t readn(int desc, unsigned char *buf, size_t len)
+{
+    ssize_t ret;
+    size_t got;
+
+    got = 0;
+    while (len) {
+        ret = read(desc, buf, len);
+        if (ret < 0)
+            bail("read error on ", in);
+        if (ret == 0)
+            break;
+        buf += ret;
+        len -= ret;
+        got += ret;
+    }
+    return got;
+}
+
+/* write len bytes, repeating write() calls as needed */
+local void writen(int desc, unsigned char *buf, size_t len)
+{
+    ssize_t ret;
+
+    while (len) {
+        ret = write(desc, buf, len);
+        if (ret < 1) {
+            complain("write error code %d", errno);
+            bail("write error on ", out);
+        }
+        buf += ret;
+        len -= ret;
+    }
+}
+
+/* convert Unix time to MS-DOS date and time, assuming current timezone
+   (you got a better idea?) */
+local unsigned long time2dos(time_t t)
+{
+    struct tm *tm;
+    unsigned long dos;
+
+    if (t == 0)
+        t = time(NULL);
+    tm = localtime(&t);
+    if (tm->tm_year < 80 || tm->tm_year > 207)
+        return 0;
+    dos = (tm->tm_year - 80) << 25;
+    dos += (tm->tm_mon + 1) << 21;
+    dos += tm->tm_mday << 16;
+    dos += tm->tm_hour << 11;
+    dos += tm->tm_min << 5;
+    dos += (tm->tm_sec + 1) >> 1;   /* round to double-seconds */
+    return dos;
+}
+
+/* put a 4-byte integer into a byte array in LSB order or MSB order */
+#define PUT2L(a,b) (*(a)=(b)&0xff,(a)[1]=(b)>>8)
+#define PUT4L(a,b) (PUT2L(a,(b)&0xffff),PUT2L((a)+2,(b)>>16))
+#define PUT4M(a,b) (*(a)=(b)>>24,(a)[1]=(b)>>16,(a)[2]=(b)>>8,(a)[3]=(b))
+
+/* write a gzip, zlib, or zip header using the information in the globals */
+local unsigned long put_header(void)
+{
+    unsigned long len;
+    unsigned char head[30];
+
+    if (form > 1) {                 /* zip */
+        /* write local header */
+        PUT4L(head, 0x04034b50UL);  /* local header signature */
+        PUT2L(head + 4, 20);        /* version needed to extract (2.0) */
+        PUT2L(head + 6, 8);         /* flags: data descriptor follows data */
+        PUT2L(head + 8, 8);         /* deflate */
+        PUT4L(head + 10, time2dos(mtime));
+        PUT4L(head + 14, 0);        /* crc (not here) */
+        PUT4L(head + 18, 0);        /* compressed length (not here) */
+        PUT4L(head + 22, 0);        /* uncompressed length (not here) */
+        PUT2L(head + 26, name == NULL ? 1 : strlen(name));  /* name length */
+        PUT2L(head + 28, 9);        /* length of extra field (see below) */
+        writen(outd, head, 30);     /* write local header */
+        len = 30;
+
+        /* write file name (use "-" for stdin) */
+        if (name == NULL)
+            writen(outd, (unsigned char *)"-", 1);
+        else
+            writen(outd, (unsigned char *)name, strlen(name));
+        len += name == NULL ? 1 : strlen(name);
+
+        /* write extended timestamp extra field block (9 bytes) */
+        PUT2L(head, 0x5455);        /* extended timestamp signature */
+        PUT2L(head + 2, 5);         /* number of data bytes in this block */
+        head[4] = 1;                /* flag presence of mod time */
+        PUT4L(head + 5, mtime);     /* mod time */
+        writen(outd, head, 9);      /* write extra field block */
+        len += 9;
+    }
+    else if (form) {                /* zlib */
+        head[0] = 0x78;             /* deflate, 32K window */
+        head[1] = (level == 9 ? 3 : (level == 1 ? 0 :
+            (level >= 6 || level == Z_DEFAULT_COMPRESSION ? 1 :  2))) << 6;
+        head[1] += 31 - (((head[0] << 8) + head[1]) % 31);
+        writen(outd, head, 2);
+        len = 2;
+    }
+    else {                          /* gzip */
+        head[0] = 31;
+        head[1] = 139;
+        head[2] = 8;                /* deflate */
+        head[3] = name != NULL ? 8 : 0;
+        PUT4L(head + 4, mtime);
+        head[8] = level == 9 ? 2 : (level == 1 ? 4 : 0);
+        head[9] = 3;                /* unix */
+        writen(outd, head, 10);
+        len = 10;
+        if (name != NULL)
+            writen(outd, (unsigned char *)name, strlen(name) + 1);
+        if (name != NULL)
+            len += strlen(name) + 1;
+    }
+    return len;
+}
+
+/* write a gzip, zlib, or zip trailer */
+local void put_trailer(unsigned long ulen, unsigned long clen,
+                       unsigned long check, unsigned long head)
+{
+    unsigned char tail[46];
+
+    if (form > 1) {                 /* zip */
+        unsigned long cent;
+
+        /* write data descriptor (as promised in local header) */
+        PUT4L(tail, 0x08074b50UL);
+        PUT4L(tail + 4, check);
+        PUT4L(tail + 8, clen);
+        PUT4L(tail + 12, ulen);
+        writen(outd, tail, 16);
+
+        /* write central file header */
+        PUT4L(tail, 0x02014b50UL);  /* central header signature */
+        tail[4] = 63;               /* obeyed version 6.3 of the zip spec */
+        tail[5] = 255;              /* ignore external attributes */
+        PUT2L(tail + 6, 20);        /* version needed to extract (2.0) */
+        PUT2L(tail + 8, 8);         /* data descriptor is present */
+        PUT2L(tail + 10, 8);        /* deflate */
+        PUT4L(tail + 12, time2dos(mtime));
+        PUT4L(tail + 16, check);    /* crc */
+        PUT4L(tail + 20, clen);     /* compressed length */
+        PUT4L(tail + 24, ulen);     /* uncompressed length */
+        PUT2L(tail + 28, name == NULL ? 1 : strlen(name));  /* name length */
+        PUT2L(tail + 30, 9);        /* length of extra field (see below) */
+        PUT2L(tail + 32, 0);        /* no file comment */
+        PUT2L(tail + 34, 0);        /* disk number 0 */
+        PUT2L(tail + 36, 0);        /* internal file attributes */
+        PUT4L(tail + 38, 0);        /* external file attributes (ignored) */
+        PUT4L(tail + 42, 0);        /* offset of local header */
+        writen(outd, tail, 46);     /* write central file header */
+        cent = 46;
+
+        /* write file name (use "-" for stdin) */
+        if (name == NULL)
+            writen(outd, (unsigned char *)"-", 1);
+        else
+            writen(outd, (unsigned char *)name, strlen(name));
+        cent += name == NULL ? 1 : strlen(name);
+
+        /* write extended timestamp extra field block (9 bytes) */
+        PUT2L(tail, 0x5455);        /* extended timestamp signature */
+        PUT2L(tail + 2, 5);         /* number of data bytes in this block */
+        tail[4] = 1;                /* flag presence of mod time */
+        PUT4L(tail + 5, mtime);     /* mod time */
+        writen(outd, tail, 9);      /* write extra field block */
+        cent += 9;
+
+        /* write end of central directory record */
+        PUT4L(tail, 0x06054b50UL);  /* end of central directory signature */
+        PUT2L(tail + 4, 0);         /* number of this disk */
+        PUT2L(tail + 6, 0);         /* disk with start of central directory */
+        PUT2L(tail + 8, 1);         /* number of entries on this disk */
+        PUT2L(tail + 10, 1);        /* total number of entries */
+        PUT4L(tail + 12, cent);     /* size of central directory */
+        PUT4L(tail + 16, head + clen + 16); /* offset of central directory */
+        PUT2L(tail + 20, 0);        /* no zip file comment */
+        writen(outd, tail, 22);     /* write end of central directory record */
+    }
+    else if (form) {                /* zlib */
+        PUT4M(tail, check);
+        writen(outd, tail, 4);
+    }
+    else {                          /* gzip */
+        PUT4L(tail, check);
+        PUT4L(tail + 4, ulen);
+        writen(outd, tail, 8);
+    }
+}
+
+/* compute check value depending on format */
+#define CHECK(a,b,c) (form == 1 ? adler32(a,b,c) : crc32(a,b,c))
+
+#ifndef NOTHREAD
+/* -- threaded portions of pigz -- */
+
+/* -- check value combination routines for parallel calculation -- */
+
+#define COMB(a,b,c) (form == 1 ? adler32_comb(a,b,c) : crc32_comb(a,b,c))
+/* combine two crc-32's or two adler-32's (copied from zlib 1.2.3 so that pigz
+   can be compatible with older versions of zlib) */
+
+/* we copy the combination routines from zlib here, in order to avoid
+   linkage issues with the zlib 1.2.3 builds on Sun, Ubuntu, and others */
+
+local unsigned long gf2_matrix_times(unsigned long *mat, unsigned long vec)
+{
+    unsigned long sum;
+
+    sum = 0;
+    while (vec) {
+        if (vec & 1)
+            sum ^= *mat;
+        vec >>= 1;
+        mat++;
+    }
+    return sum;
+}
+
+local void gf2_matrix_square(unsigned long *square, unsigned long *mat)
+{
+    int n;
+
+    for (n = 0; n < 32; n++)
+        square[n] = gf2_matrix_times(mat, mat[n]);
+}
+
+local unsigned long crc32_comb(unsigned long crc1, unsigned long crc2,
+                               size_t len2)
+{
+    int n;
+    unsigned long row;
+    unsigned long even[32];     /* even-power-of-two zeros operator */
+    unsigned long odd[32];      /* odd-power-of-two zeros operator */
+
+    /* degenerate case */
+    if (len2 == 0)
+        return crc1;
+
+    /* put operator for one zero bit in odd */
+    odd[0] = 0xedb88320UL;          /* CRC-32 polynomial */
+    row = 1;
+    for (n = 1; n < 32; n++) {
+        odd[n] = row;
+        row <<= 1;
+    }
+
+    /* put operator for two zero bits in even */
+    gf2_matrix_square(even, odd);
+
+    /* put operator for four zero bits in odd */
+    gf2_matrix_square(odd, even);
+
+    /* apply len2 zeros to crc1 (first square will put the operator for one
+       zero byte, eight zero bits, in even) */
+    do {
+        /* apply zeros operator for this bit of len2 */
+        gf2_matrix_square(even, odd);
+        if (len2 & 1)
+            crc1 = gf2_matrix_times(even, crc1);
+        len2 >>= 1;
+
+        /* if no more bits set, then done */
+        if (len2 == 0)
+            break;
+
+        /* another iteration of the loop with odd and even swapped */
+        gf2_matrix_square(odd, even);
+        if (len2 & 1)
+            crc1 = gf2_matrix_times(odd, crc1);
+        len2 >>= 1;
+
+        /* if no more bits set, then done */
+    } while (len2 != 0);
+
+    /* return combined crc */
+    crc1 ^= crc2;
+    return crc1;
+}
+
+#define BASE 65521U     /* largest prime smaller than 65536 */
+#define LOW16 0xffff    /* mask lower 16 bits */
+
+local unsigned long adler32_comb(unsigned long adler1, unsigned long adler2,
+                                 size_t len2)
+{
+    unsigned long sum1;
+    unsigned long sum2;
+    unsigned rem;
+
+    /* the derivation of this formula is left as an exercise for the reader */
+    rem = (unsigned)(len2 % BASE);
+    sum1 = adler1 & LOW16;
+    sum2 = (rem * sum1) % BASE;
+    sum1 += (adler2 & LOW16) + BASE - 1;
+    sum2 += ((adler1 >> 16) & LOW16) + ((adler2 >> 16) & LOW16) + BASE - rem;
+    if (sum1 >= BASE) sum1 -= BASE;
+    if (sum1 >= BASE) sum1 -= BASE;
+    if (sum2 >= (BASE << 1)) sum2 -= (BASE << 1);
+    if (sum2 >= BASE) sum2 -= BASE;
+    return sum1 | (sum2 << 16);
+}
+
+/* -- pool of spaces for buffer management -- */
+
+/* These routines manage a pool of spaces.  Each pool specifies a fixed size
+   buffer to be contained in each space.  Each space has a use count, which
+   when decremented to zero returns the space to the pool.  If a space is
+   requested from the pool and the pool is empty, a space is immediately
+   created unless a specified limit on the number of spaces has been reached.
+   Only if the limit is reached will it wait for a space to be returned to the
+   pool.  Each space knows what pool it belongs to, so that it can be returned.
+ */
+
+/* a space (one buffer for each space) */
+struct space {
+    lock *use;              /* use count -- return to pool when zero */
+    unsigned char *buf;     /* buffer of size size */
+    size_t size;            /* current size of this buffer */
+    size_t len;             /* for application usage (initially zero) */
+    struct pool *pool;      /* pool to return to */
+    struct space *next;     /* for pool linked list */
+};
+
+/* pool of spaces (one pool for each type needed) */
+struct pool {
+    lock *have;             /* unused spaces available, lock for list */
+    struct space *head;     /* linked list of available buffers */
+    size_t size;            /* size of new buffers in this pool */
+    int limit;              /* number of new spaces allowed, or -1 */
+    int made;               /* number of buffers made */
+};
+
+/* initialize a pool (pool structure itself provided, not allocated) -- the
+   limit is the maximum number of spaces in the pool, or -1 to indicate no
+   limit, i.e., to never wait for a buffer to return to the pool */
+local void new_pool(struct pool *pool, size_t size, int limit)
+{
+    pool->have = new_lock(0);
+    pool->head = NULL;
+    pool->size = size;
+    pool->limit = limit;
+    pool->made = 0;
+}
+
+/* get a space from a pool -- the use count is initially set to one, so there
+   is no need to call use_space() for the first use */
+local struct space *get_space(struct pool *pool)
+{
+    struct space *space;
+
+    /* if can't create any more, wait for a space to show up */
+    possess(pool->have);
+    if (pool->limit == 0)
+        wait_for(pool->have, NOT_TO_BE, 0);
+
+    /* if a space is available, pull it from the list and return it */
+    if (pool->head != NULL) {
+        space = pool->head;
+        possess(space->use);
+        pool->head = space->next;
+        twist(pool->have, BY, -1);      /* one less in pool */
+        twist(space->use, TO, 1);       /* initially one user */
+        space->len = 0;
+        return space;
+    }
+
+    /* nothing available, don't want to wait, make a new space */
+    assert(pool->limit != 0);
+    if (pool->limit > 0)
+        pool->limit--;
+    pool->made++;
+    release(pool->have);
+    space = malloc(sizeof(struct space));
+    if (space == NULL)
+        bail("not enough memory", "");
+    space->use = new_lock(1);           /* initially one user */
+    space->buf = malloc(pool->size);
+    if (space->buf == NULL)
+        bail("not enough memory", "");
+    space->size = pool->size;
+    space->len = 0;
+    space->pool = pool;                 /* remember the pool this belongs to */
+    return space;
+}
+
+/* compute next size up by multiplying by about 2**(1/3) and round to the next
+   power of 2 if we're close (so three applications results in doubling) -- if
+   small, go up to at least 16, if overflow, go to max size_t value */
+local size_t grow(size_t size)
+{
+    size_t was, top;
+    int shift;
+
+    was = size;
+    size += size >> 2;
+    top = size;
+    for (shift = 0; top > 7; shift++)
+        top >>= 1;
+    if (top == 7)
+        size = (size_t)1 << (shift + 3);
+    if (size < 16)
+        size = 16;
+    if (size <= was)
+        size = (size_t)0 - 1;
+    return size;
+}
+
+/* increase the size of the buffer in space */
+local void grow_space(struct space *space)
+{
+    size_t more;
+
+    /* compute next size up */
+    more = grow(space->size);
+    if (more == space->size)
+        bail("not enough memory", "");
+
+    /* reallocate the buffer */
+    space->buf = realloc(space->buf, more);
+    if (space->buf == NULL)
+        bail("not enough memory", "");
+    space->size = more;
+}
+
+/* increment the use count to require one more drop before returning this space
+   to the pool */
+local void use_space(struct space *space)
+{
+    possess(space->use);
+    twist(space->use, BY, +1);
+}
+
+/* drop a space, returning it to the pool if the use count is zero */
+local void drop_space(struct space *space)
+{
+    int use;
+    struct pool *pool;
+
+    possess(space->use);
+    use = peek_lock(space->use);
+    assert(use != 0);
+    if (use == 1) {
+        pool = space->pool;
+        possess(pool->have);
+        space->next = pool->head;
+        pool->head = space;
+        twist(pool->have, BY, +1);
+    }
+    twist(space->use, BY, -1);
+}
+
+/* free the memory and lock resources of a pool -- return number of spaces for
+   debugging and resource usage measurement */
+local int free_pool(struct pool *pool)
+{
+    int count;
+    struct space *space;
+
+    possess(pool->have);
+    count = 0;
+    while ((space = pool->head) != NULL) {
+        pool->head = space->next;
+        free(space->buf);
+        free_lock(space->use);
+        free(space);
+        count++;
+    }
+    assert(count == pool->made);
+    release(pool->have);
+    free_lock(pool->have);
+    return count;
+}
+
+/* input and output buffer pools */
+local struct pool in_pool;
+local struct pool out_pool;
+local struct pool dict_pool;
+local struct pool lens_pool;
+
+/* -- parallel compression -- */
+
+/* compress or write job (passed from compress list to write list) -- if seq is
+   equal to -1, compress_thread is instructed to return; if more is false then
+   this is the last chunk, which after writing tells write_thread to return */
+struct job {
+    long seq;                   /* sequence number */
+    int more;                   /* true if this is not the last chunk */
+    struct space *in;           /* input data to compress */
+    struct space *out;          /* dictionary or resulting compressed data */
+    struct space *lens;         /* coded list of flush block lengths */
+    unsigned long check;        /* check value for input data */
+    lock *calc;                 /* released when check calculation complete */
+    struct job *next;           /* next job in the list (either list) */
+};
+
+/* list of compress jobs (with tail for appending to list) */
+local lock *compress_have = NULL;   /* number of compress jobs waiting */
+local struct job *compress_head, **compress_tail;
+
+/* list of write jobs */
+local lock *write_first;            /* lowest sequence number in list */
+local struct job *write_head;
+
+/* number of compression threads running */
+local int cthreads = 0;
+
+/* write thread if running */
+local thread *writeth = NULL;
+
+/* setup job lists (call from main thread) */
+local void setup_jobs(void)
+{
+    /* set up only if not already set up*/
+    if (compress_have != NULL)
+        return;
+
+    /* allocate locks and initialize lists */
+    compress_have = new_lock(0);
+    compress_head = NULL;
+    compress_tail = &compress_head;
+    write_first = new_lock(-1);
+    write_head = NULL;
+
+    /* initialize buffer pools (initial size for out_pool not critical, since
+       buffers will be grown in size if needed -- initial size chosen to make
+       this unlikely -- same for lens_pool) */
+    new_pool(&in_pool, size, INBUFS(procs));
+    new_pool(&out_pool, OUTPOOL(size), -1);
+    new_pool(&dict_pool, DICT, -1);
+    new_pool(&lens_pool, size >> (RSYNCBITS - 1), -1);
+}
+
+/* command the compress threads to all return, then join them all (call from
+   main thread), free all the thread-related resources */
+local void finish_jobs(void)
+{
+    struct job job;
+    int caught;
+
+    /* only do this once */
+    if (compress_have == NULL)
+        return;
+
+    /* command all of the extant compress threads to return */
+    possess(compress_have);
+    job.seq = -1;
+    job.next = NULL;
+    compress_head = &job;
+    compress_tail = &(job.next);
+    twist(compress_have, BY, +1);       /* will wake them all up */
+
+    /* join all of the compress threads, verify they all came back */
+    caught = join_all();
+    Trace(("-- joined %d compress threads", caught));
+    assert(caught == cthreads);
+    cthreads = 0;
+
+    /* free the resources */
+    caught = free_pool(&lens_pool);
+    Trace(("-- freed %d block lengths buffers", caught));
+    caught = free_pool(&dict_pool);
+    Trace(("-- freed %d dictionary buffers", caught));
+    caught = free_pool(&out_pool);
+    Trace(("-- freed %d output buffers", caught));
+    caught = free_pool(&in_pool);
+    Trace(("-- freed %d input buffers", caught));
+    free_lock(write_first);
+    free_lock(compress_have);
+    compress_have = NULL;
+}
+
+/* compress all strm->avail_in bytes at strm->next_in to out->buf, updating
+   out->len, grow the size of the buffer (out->size) if necessary -- respect
+   the size limitations of the zlib stream data types (size_t may be larger
+   than unsigned) */
+local void deflate_engine(z_stream *strm, struct space *out, int flush)
+{
+    size_t room;
+
+    do {
+        room = out->size - out->len;
+        if (room == 0) {
+            grow_space(out);
+            room = out->size - out->len;
+        }
+        strm->next_out = out->buf + out->len;
+        strm->avail_out = room < UINT_MAX ? (unsigned)room : UINT_MAX;
+        (void)deflate(strm, flush);
+        out->len = strm->next_out - out->buf;
+    } while (strm->avail_out == 0);
+    assert(strm->avail_in == 0);
+}
+
+/* get the next compression job from the head of the list, compress and compute
+   the check value on the input, and put a job in the write list with the
+   results -- keep looking for more jobs, returning when a job is found with a
+   sequence number of -1 (leave that job in the list for other incarnations to
+   find) */
+local void compress_thread(void *dummy)
+{
+    struct job *job;                /* job pulled and working on */
+    struct job *here, **prior;      /* pointers for inserting in write list */
+    unsigned long check;            /* check value of input */
+    unsigned char *next;            /* pointer for blocks, check value data */
+    size_t left;                    /* input left to process */
+    size_t len;                     /* remaining bytes to compress/check */
+#if ZLIB_VERNUM >= 0x1260
+    int bits;                       /* deflate pending bits */
+#endif
+    z_stream strm;                  /* deflate stream */
+
+    (void)dummy;
+
+    /* initialize the deflate stream for this thread */
+    strm.zfree = Z_NULL;
+    strm.zalloc = Z_NULL;
+    strm.opaque = Z_NULL;
+    if (deflateInit2(&strm, level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY) !=
+            Z_OK)
+        bail("not enough memory", "");
+
+    /* keep looking for work */
+    for (;;) {
+        /* get a job (like I tell my son) */
+        possess(compress_have);
+        wait_for(compress_have, NOT_TO_BE, 0);
+        job = compress_head;
+        assert(job != NULL);
+        if (job->seq == -1)
+            break;
+        compress_head = job->next;
+        if (job->next == NULL)
+            compress_tail = &compress_head;
+        twist(compress_have, BY, -1);
+
+        /* got a job -- initialize and set the compression level (note that if
+           deflateParams() is called immediately after deflateReset(), there is
+           no need to initialize the input/output for the stream) */
+        Trace(("-- compressing #%ld", job->seq));
+        (void)deflateReset(&strm);
+        (void)deflateParams(&strm, level, Z_DEFAULT_STRATEGY);
+
+        /* set dictionary if provided, release that input or dictionary buffer
+           (not NULL if dict is true and if this is not the first work unit) */
+        if (job->out != NULL) {
+            len = job->out->len;
+            left = len < DICT ? len : DICT;
+            deflateSetDictionary(&strm, job->out->buf + (len - left), left);
+            drop_space(job->out);
+        }
+
+        /* set up input and output */
+        job->out = get_space(&out_pool);
+        strm.next_in = job->in->buf;
+        strm.next_out = job->out->buf;
+
+        /* compress each block, either flushing or finishing */
+        next = job->lens == NULL ? NULL : job->lens->buf;
+        left = job->in->len;
+        job->out->len = 0;
+        do {
+            /* decode next block length from blocks list */
+            len = next == NULL ? 128 : *next++;
+            if (len < 128)                          /* 64..32831 */
+                len = (len << 8) + (*next++) + 64;
+            else if (len == 128)                    /* end of list */
+                len = left;
+            else if (len < 192)                     /* 1..63 */
+                len &= 0x3f;
+            else {                                  /* 32832..4227135 */
+                len = ((len & 0x3f) << 16) + (*next++ << 8) + 32832U;
+                len += *next++;
+            }
+            left -= len;
+
+            /* run MAXP2-sized amounts of input through deflate -- this loop is
+               needed for those cases where the unsigned type is smaller than
+               the size_t type, or when len is close to the limit of the size_t
+               type */
+            while (len > MAXP2) {
+                strm.avail_in = MAXP2;
+                deflate_engine(&strm, job->out, Z_NO_FLUSH);
+                len -= MAXP2;
+            }
+
+            /* run the last piece through deflate -- end on a byte boundary,
+               using a sync marker if necessary, or finish the deflate stream
+               if this is the last block */
+            strm.avail_in = (unsigned)len;
+            if (left || job->more) {
+#if ZLIB_VERNUM >= 0x1260
+                deflate_engine(&strm, job->out, Z_BLOCK);
+
+                /* add just enough empty blocks to get to a byte boundary */
+                (void)deflatePending(&strm, Z_NULL, &bits);
+                if (bits & 1)
+                    deflate_engine(&strm, job->out, Z_SYNC_FLUSH);
+                else if (bits & 7) {
+                    do {
+                        bits = deflatePrime(&strm, 10, 2);  /* static empty */
+                        assert(bits == Z_OK);
+                        (void)deflatePending(&strm, Z_NULL, &bits);
+                    } while (bits & 7);
+                    deflate_engine(&strm, job->out, Z_BLOCK);
+                }
+#else
+                deflate_engine(&strm, job->out, Z_SYNC_FLUSH);
+#endif
+            }
+            else
+                deflate_engine(&strm, job->out, Z_FINISH);
+        } while (left);
+        if (job->lens != NULL) {
+            drop_space(job->lens);
+            job->lens = NULL;
+        }
+        Trace(("-- compressed #%ld%s", job->seq, job->more ? "" : " (last)"));
+
+        /* reserve input buffer until check value has been calculated */
+        use_space(job->in);
+
+        /* insert write job in list in sorted order, alert write thread */
+        possess(write_first);
+        prior = &write_head;
+        while ((here = *prior) != NULL) {
+            if (here->seq > job->seq)
+                break;
+            prior = &(here->next);
+        }
+        job->next = here;
+        *prior = job;
+        twist(write_first, TO, write_head->seq);
+
+        /* calculate the check value in parallel with writing, alert the write
+           thread that the calculation is complete, and drop this usage of the
+           input buffer */
+        len = job->in->len;
+        next = job->in->buf;
+        check = CHECK(0L, Z_NULL, 0);
+        while (len > MAXP2) {
+            check = CHECK(check, next, MAXP2);
+            len -= MAXP2;
+            next += MAXP2;
+        }
+        check = CHECK(check, next, (unsigned)len);
+        drop_space(job->in);
+        job->check = check;
+        Trace(("-- checked #%ld%s", job->seq, job->more ? "" : " (last)"));
+        possess(job->calc);
+        twist(job->calc, TO, 1);
+
+        /* done with that one -- go find another job */
+    }
+
+    /* found job with seq == -1 -- free deflate memory and return to join */
+    release(compress_have);
+    (void)deflateEnd(&strm);
+}
+
+/* collect the write jobs off of the list in sequence order and write out the
+   compressed data until the last chunk is written -- also write the header and
+   trailer and combine the individual check values of the input buffers */
+local void write_thread(void *dummy)
+{
+    long seq;                       /* next sequence number looking for */
+    struct job *job;                /* job pulled and working on */
+    size_t len;                     /* input length */
+    int more;                       /* true if more chunks to write */
+    unsigned long head;             /* header length */
+    unsigned long ulen;             /* total uncompressed size (overflow ok) */
+    unsigned long clen;             /* total compressed size (overflow ok) */
+    unsigned long check;            /* check value of uncompressed data */
+
+    (void)dummy;
+
+    /* build and write header */
+    Trace(("-- write thread running"));
+    head = put_header();
+
+    /* process output of compress threads until end of input */
+    ulen = clen = 0;
+    check = CHECK(0L, Z_NULL, 0);
+    seq = 0;
+    do {
+        /* get next write job in order */
+        possess(write_first);
+        wait_for(write_first, TO_BE, seq);
+        job = write_head;
+        write_head = job->next;
+        twist(write_first, TO, write_head == NULL ? -1 : write_head->seq);
+
+        /* update lengths, save uncompressed length for COMB */
+        more = job->more;
+        len = job->in->len;
+        drop_space(job->in);
+        ulen += (unsigned long)len;
+        clen += (unsigned long)(job->out->len);
+
+        /* write the compressed data and drop the output buffer */
+        Trace(("-- writing #%ld", seq));
+        writen(outd, job->out->buf, job->out->len);
+        drop_space(job->out);
+        Trace(("-- wrote #%ld%s", seq, more ? "" : " (last)"));
+
+        /* wait for check calculation to complete, then combine, once
+           the compress thread is done with the input, release it */
+        possess(job->calc);
+        wait_for(job->calc, TO_BE, 1);
+        release(job->calc);
+        check = COMB(check, job->check, len);
+
+        /* free the job */
+        free_lock(job->calc);
+        free(job);
+
+        /* get the next buffer in sequence */
+        seq++;
+    } while (more);
+
+    /* write trailer */
+    put_trailer(ulen, clen, check, head);
+
+    /* verify no more jobs, prepare for next use */
+    possess(compress_have);
+    assert(compress_head == NULL && peek_lock(compress_have) == 0);
+    release(compress_have);
+    possess(write_first);
+    assert(write_head == NULL);
+    twist(write_first, TO, -1);
+}
+
+/* encode a hash hit to the block lengths list -- hit == 0 ends the list */
+local void append_len(struct job *job, size_t len)
+{
+    struct space *lens;
+
+    assert(len < 4227136UL);
+    if (job->lens == NULL)
+        job->lens = get_space(&lens_pool);
+    lens = job->lens;
+    if (lens->size < lens->len + 3)
+        grow_space(lens);
+    if (len < 64)
+        lens->buf[lens->len++] = len + 128;
+    else if (len < 32832U) {
+        len -= 64;
+        lens->buf[lens->len++] = len >> 8;
+        lens->buf[lens->len++] = len;
+    }
+    else {
+        len -= 32832U;
+        lens->buf[lens->len++] = (len >> 16) + 192;
+        lens->buf[lens->len++] = len >> 8;
+        lens->buf[lens->len++] = len;
+    }
+}
+
+/* compress ind to outd, using multiple threads for the compression and check
+   value calculations and one other thread for writing the output -- compress
+   threads will be launched and left running (waiting actually) to support
+   subsequent calls of parallel_compress() */
+local void parallel_compress(void)
+{
+    long seq;                       /* sequence number */
+    struct space *curr;             /* input data to compress */
+    struct space *next;             /* input data that follows curr */
+    struct space *hold;             /* input data that follows next */
+    struct space *dict;             /* dictionary for next compression */
+    struct job *job;                /* job for compress, then write */
+    int more;                       /* true if more input to read */
+    unsigned hash;                  /* hash for rsyncable */
+    unsigned char *scan;            /* next byte to compute hash on */
+    unsigned char *end;             /* after end of data to compute hash on */
+    unsigned char *last;            /* position after last hit */
+    size_t left;                    /* last hit in curr to end of curr */
+    size_t len;                     /* for various length computations */
+
+    /* if first time or after an option change, setup the job lists */
+    setup_jobs();
+
+    /* start write thread */
+    writeth = launch(write_thread, NULL);
+
+    /* read from input and start compress threads (write thread will pick up
+     the output of the compress threads) */
+    seq = 0;
+    next = get_space(&in_pool);
+    next->len = readn(ind, next->buf, next->size);
+    hold = NULL;
+    dict = NULL;
+    scan = next->buf;
+    hash = RSYNCHIT;
+    left = 0;
+    do {
+        /* create a new job */
+        job = malloc(sizeof(struct job));
+        if (job == NULL)
+            bail("not enough memory", "");
+        job->calc = new_lock(0);
+
+        /* update input spaces */
+        curr = next;
+        next = hold;
+        hold = NULL;
+
+        /* get more input if we don't already have some */
+        if (next == NULL) {
+            next = get_space(&in_pool);
+            next->len = readn(ind, next->buf, next->size);
+        }
+
+        /* if rsyncable, generate block lengths and prepare curr for job to
+           likely have less than size bytes (up to the last hash hit) */
+        job->lens = NULL;
+        if (rsync && curr->len) {
+            /* compute the hash function starting where we last left off to
+               cover either size bytes or to EOF, whichever is less, through
+               the data in curr (and in the next loop, through next) -- save
+               the block lengths resulting from the hash hits in the job->lens
+               list */
+            if (left == 0) {
+                /* scan is in curr */
+                last = curr->buf;
+                end = curr->buf + curr->len;
+                while (scan < end) {
+                    hash = ((hash << 1) ^ *scan++) & RSYNCMASK;
+                    if (hash == RSYNCHIT) {
+                        len = scan - last;
+                        append_len(job, len);
+                        last = scan;
+                    }
+                }
+
+                /* continue scan in next */
+                left = scan - last;
+                scan = next->buf;
+            }
+
+            /* scan in next for enough bytes to fill curr, or what is available
+               in next, whichever is less (if next isn't full, then we're at
+               the end of the file) -- the bytes in curr since the last hit,
+               stored in left, counts towards the size of the first block */
+            last = next->buf;
+            len = curr->size - curr->len;
+            if (len > next->len)
+                len = next->len;
+            end = next->buf + len;
+            while (scan < end) {
+                hash = ((hash << 1) ^ *scan++) & RSYNCMASK;
+                if (hash == RSYNCHIT) {
+                    len = (scan - last) + left;
+                    left = 0;
+                    append_len(job, len);
+                    last = scan;
+                }
+            }
+            append_len(job, 0);
+
+            /* create input in curr for job up to last hit or entire buffer if
+               no hits at all -- save remainder in next and possibly hold */
+            len = (job->lens->len == 1 ? scan : last) - next->buf;
+            if (len) {
+                /* got hits in next, or no hits in either -- copy to curr */
+                memcpy(curr->buf + curr->len, next->buf, len);
+                curr->len += len;
+                memmove(next->buf, next->buf + len, next->len - len);
+                next->len -= len;
+                scan -= len;
+                left = 0;
+            }
+            else if (job->lens->len != 1 && left && next->len) {
+                /* had hits in curr, but none in next, and last hit in curr
+                   wasn't right at the end, so we have input there to save --
+                   use curr up to the last hit, save the rest, moving next to
+                   hold */
+                hold = next;
+                next = get_space(&in_pool);
+                memcpy(next->buf, curr->buf + (curr->len - left), left);
+                next->len = left;
+                curr->len -= left;
+            }
+            else {
+                /* else, last match happened to be right at the end of curr,
+                   or we're at the end of the input compressing the rest */
+                left = 0;
+            }
+        }
+
+        /* compress curr->buf to curr->len -- compress thread will drop curr */
+        job->in = curr;
+
+        /* set job->more if there is more to compress after curr */
+        more = next->len != 0;
+        job->more = more;
+
+        /* provide dictionary for this job, prepare dictionary for next job */
+        job->out = dict;
+        if (more && setdict) {
+            if (curr->len >= DICT || job->out == NULL) {
+                dict = curr;
+                use_space(dict);
+            }
+            else {
+                dict = get_space(&dict_pool);
+                len = DICT - curr->len;
+                memcpy(dict->buf, job->out->buf + (job->out->len - len), len);
+                memcpy(dict->buf + len, curr->buf, curr->len);
+                dict->len = DICT;
+            }
+        }
+
+        /* preparation of job is complete */
+        job->seq = seq;
+        Trace(("-- read #%ld%s", seq, more ? "" : " (last)"));
+        if (++seq < 1)
+            bail("input too long: ", in);
+
+        /* start another compress thread if needed */
+        if (cthreads < seq && cthreads < procs) {
+            (void)launch(compress_thread, NULL);
+            cthreads++;
+        }
+
+        /* put job at end of compress list, let all the compressors know */
+        possess(compress_have);
+        job->next = NULL;
+        *compress_tail = job;
+        compress_tail = &(job->next);
+        twist(compress_have, BY, +1);
+    } while (more);
+    drop_space(next);
+
+    /* wait for the write thread to complete (we leave the compress threads out
+       there and waiting in case there is another stream to compress) */
+    join(writeth);
+    writeth = NULL;
+    Trace(("-- write thread joined"));
+}
+
+#endif
+
+/* repeated code in single_compress to compress available input and write it */
+#define DEFLATE_WRITE(flush) \
+    do { \
+        do { \
+            strm->avail_out = out_size; \
+            strm->next_out = out; \
+            (void)deflate(strm, flush); \
+            writen(outd, out, out_size - strm->avail_out); \
+            clen += out_size - strm->avail_out; \
+        } while (strm->avail_out == 0); \
+        assert(strm->avail_in == 0); \
+    } while (0)
+
+/* do a simple compression in a single thread from ind to outd -- if reset is
+   true, instead free the memory that was allocated and retained for input,
+   output, and deflate */
+local void single_compress(int reset)
+{
+    size_t got;                     /* amount read */
+    size_t more;                    /* amount of next read (0 if eof) */
+    size_t start;                   /* start of next read */
+    size_t block;                   /* bytes in current block for -i */
+    unsigned hash;                  /* hash for rsyncable */
+#if ZLIB_VERNUM >= 0x1260
+    int bits;                       /* deflate pending bits */
+#endif
+    unsigned char *scan;            /* pointer for hash computation */
+    size_t left;                    /* bytes left to compress after hash hit */
+    unsigned long head;             /* header length */
+    unsigned long ulen;             /* total uncompressed size (overflow ok) */
+    unsigned long clen;             /* total compressed size (overflow ok) */
+    unsigned long check;            /* check value of uncompressed data */
+    static unsigned out_size;       /* size of output buffer */
+    static unsigned char *in, *next, *out;  /* reused i/o buffers */
+    static z_stream *strm = NULL;   /* reused deflate structure */
+
+    /* if requested, just release the allocations and return */
+    if (reset) {
+        if (strm != NULL) {
+            (void)deflateEnd(strm);
+            free(strm);
+            free(out);
+            free(next);
+            free(in);
+            strm = NULL;
+        }
+        return;
+    }
+
+    /* initialize the deflate structure if this is the first time */
+    if (strm == NULL) {
+        out_size = size > MAXP2 ? MAXP2 : (unsigned)size;
+        if ((in = malloc(size)) == NULL ||
+            (next = malloc(size)) == NULL ||
+            (out = malloc(out_size)) == NULL ||
+            (strm = malloc(sizeof(z_stream))) == NULL)
+            bail("not enough memory", "");
+        strm->zfree = Z_NULL;
+        strm->zalloc = Z_NULL;
+        strm->opaque = Z_NULL;
+        if (deflateInit2(strm, level, Z_DEFLATED, -15, 8,
+                         Z_DEFAULT_STRATEGY) != Z_OK)
+            bail("not enough memory", "");
+    }
+
+    /* write header */
+    head = put_header();
+
+    /* set compression level in case it changed */
+    (void)deflateReset(strm);
+    (void)deflateParams(strm, level, Z_DEFAULT_STRATEGY);
+
+    /* do raw deflate and calculate check value */
+    got = 0;
+    more = readn(ind, next, size);
+    ulen = (unsigned)more;
+    start = 0;
+    clen = 0;
+    block = 0;
+    check = CHECK(0L, Z_NULL, 0);
+    hash = RSYNCHIT;
+    do {
+        /* get data to compress, see if there is any more input */
+        if (got == 0) {
+            scan = in;  in = next;  next = scan;
+            strm->next_in = in + start;
+            got = more;
+            more = readn(ind, next, size);
+            ulen += (unsigned long)more;
+            start = 0;
+        }
+
+        /* if rsyncable, compute hash until a hit or the end of the block */
+        left = 0;
+        if (rsync && got) {
+            scan = strm->next_in;
+            left = got;
+            do {
+                if (left == 0) {
+                    /* went to the end -- if no more or no hit in size bytes,
+                       then proceed to do a flush or finish with got bytes */
+                    if (more == 0 || got == size)
+                        break;
+
+                    /* fill in[] with what's left there and as much as possible
+                       from next[] -- set up to continue hash hit search */
+                    memmove(in, strm->next_in, got);
+                    strm->next_in = in;
+                    scan = in + got;
+                    left = more > size - got ? size - got : more;
+                    memcpy(scan, next + start, left);
+                    got += left;
+                    more -= left;
+                    start += left;
+
+                    /* if that emptied the next buffer, try to refill it */
+                    if (more == 0) {
+                        more = readn(ind, next, size);
+                        ulen += (unsigned long)more;
+                        start = 0;
+                    }
+                }
+                left--;
+                hash = ((hash << 1) ^ *scan++) & RSYNCMASK;
+            } while (hash != RSYNCHIT);
+            got -= left;
+        }
+
+        /* clear history for --independent option */
+        if (!setdict) {
+            block += got;
+            if (block > size) {
+                (void)deflateReset(strm);
+                block = got;
+            }
+        }
+
+        /* compress MAXP2-size chunks in case unsigned type is small */
+        while (got > MAXP2) {
+            strm->avail_in = MAXP2;
+            check = CHECK(check, strm->next_in, strm->avail_in);
+            DEFLATE_WRITE(Z_NO_FLUSH);
+            got -= MAXP2;
+        }
+
+        /* compress the remainder, emit a block -- finish if end of input */
+        strm->avail_in = (unsigned)got;
+        got = left;
+        check = CHECK(check, strm->next_in, strm->avail_in);
+        if (more || got) {
+#if ZLIB_VERNUM >= 0x1260
+            DEFLATE_WRITE(Z_BLOCK);
+            (void)deflatePending(strm, Z_NULL, &bits);
+            if (bits & 1)
+                DEFLATE_WRITE(Z_SYNC_FLUSH);
+            else if (bits & 7) {
+                do {
+                    bits = deflatePrime(strm, 10, 2);
+                    assert(bits == Z_OK);
+                    (void)deflatePending(strm, Z_NULL, &bits);
+                } while (bits & 7);
+                DEFLATE_WRITE(Z_NO_FLUSH);
+            }
+#else
+            DEFLATE_WRITE(Z_SYNC_FLUSH);
+#endif
+        }
+        else
+            DEFLATE_WRITE(Z_FINISH);
+
+        /* do until no more input */
+    } while (more || got);
+
+    /* write trailer */
+    put_trailer(ulen, clen, check, head);
+}
+
+/* --- decompression --- */
+
+/* globals for decompression and listing buffered reading */
+#define BUF 32768U                  /* input buffer size */
+local unsigned char in_buf[BUF];    /* input buffer */
+local unsigned char *in_next;       /* next unused byte in buffer */
+local size_t in_left;               /* number of unused bytes in buffer */
+local int in_eof;                   /* true if reached end of file on input */
+local int in_short;                 /* true if last read didn't fill buffer */
+local off_t in_tot;                 /* total bytes read from input */
+local off_t out_tot;                /* total bytes written to output */
+local unsigned long out_check;      /* check value of output */
+
+#ifndef NOTHREAD
+/* parallel reading */
+
+local unsigned char in_buf2[BUF];   /* second buffer for parallel reads */
+local size_t in_len;                /* data waiting in next buffer */
+local int in_which;                 /* -1: start, 0: in_buf2, 1: in_buf */
+local lock *load_state;             /* value = 0 to wait, 1 to read a buffer */
+local thread *load_thread;          /* load_read() thread for joining */
+
+/* parallel read thread */
+local void load_read(void *dummy)
+{
+    size_t len;
+
+    (void)dummy;
+
+    Trace(("-- launched decompress read thread"));
+    do {
+        possess(load_state);
+        wait_for(load_state, TO_BE, 1);
+        in_len = len = readn(ind, in_which ? in_buf : in_buf2, BUF);
+        Trace(("-- decompress read thread read %lu bytes", len));
+        twist(load_state, TO, 0);
+    } while (len == BUF);
+    Trace(("-- exited decompress read thread"));
+}
+
+#endif
+
+/* load() is called when the input has been consumed in order to provide more
+   input data: load the input buffer with BUF or less bytes (less if at end of
+   file) from the file ind, set in_next to point to the in_left bytes read,
+   update in_tot, and return in_left -- in_eof is set to true when in_left has
+   gone to zero and there is no more data left to read from ind */
+local size_t load(void)
+{
+    /* if already detected end of file, do nothing */
+    if (in_short) {
+        in_eof = 1;
+        in_left = 0;
+        return 0;
+    }
+
+#ifndef NOTHREAD
+    /* if first time in or procs == 1, read a buffer to have something to
+       return, otherwise wait for the previous read job to complete */
+    if (procs > 1) {
+        /* if first time, fire up the read thread, ask for a read */
+        if (in_which == -1) {
+            in_which = 1;
+            load_state = new_lock(1);
+            load_thread = launch(load_read, NULL);
+        }
+
+        /* wait for the previously requested read to complete */
+        possess(load_state);
+        wait_for(load_state, TO_BE, 0);
+        release(load_state);
+
+        /* set up input buffer with the data just read */
+        in_next = in_which ? in_buf : in_buf2;
+        in_left = in_len;
+
+        /* if not at end of file, alert read thread to load next buffer,
+           alternate between in_buf and in_buf2 */
+        if (in_len == BUF) {
+            in_which = 1 - in_which;
+            possess(load_state);
+            twist(load_state, TO, 1);
+        }
+
+        /* at end of file -- join read thread (already exited), clean up */
+        else {
+            join(load_thread);
+            free_lock(load_state);
+            in_which = -1;
+        }
+    }
+    else
+#endif
+    {
+        /* don't use threads -- simply read a buffer into in_buf */
+        in_left = readn(ind, in_next = in_buf, BUF);
+    }
+
+    /* note end of file */
+    if (in_left < BUF) {
+        in_short = 1;
+
+        /* if we got bupkis, now is the time to mark eof */
+        if (in_left == 0)
+            in_eof = 1;
+    }
+
+    /* update the total and return the available bytes */
+    in_tot += in_left;
+    return in_left;
+}
+
+/* initialize for reading new input */
+local void in_init(void)
+{
+    in_left = 0;
+    in_eof = 0;
+    in_short = 0;
+    in_tot = 0;
+#ifndef NOTHREAD
+    in_which = -1;
+#endif
+}
+
+/* buffered reading macros for decompression and listing */
+#define GET() (in_eof || (in_left == 0 && load() == 0) ? EOF : \
+               (in_left--, *in_next++))
+#define GET2() (tmp2 = GET(), tmp2 + ((unsigned)(GET()) << 8))
+#define GET4() (tmp4 = GET2(), tmp4 + ((unsigned long)(GET2()) << 16))
+#define SKIP(dist) \
+    do { \
+        size_t togo = (dist); \
+        while (togo > in_left) { \
+            togo -= in_left; \
+            if (load() == 0) \
+                return -1; \
+        } \
+        in_left -= togo; \
+        in_next += togo; \
+    } while (0)
+
+/* pull LSB order or MSB order integers from an unsigned char buffer */
+#define PULL2L(p) ((p)[0] + ((unsigned)((p)[1]) << 8))
+#define PULL4L(p) (PULL2L(p) + ((unsigned long)(PULL2L((p) + 2)) << 16))
+#define PULL2M(p) (((unsigned)((p)[0]) << 8) + (p)[1])
+#define PULL4M(p) (((unsigned long)(PULL2M(p)) << 16) + PULL2M((p) + 2))
+
+/* convert MS-DOS date and time to a Unix time, assuming current timezone
+   (you got a better idea?) */
+local time_t dos2time(unsigned long dos)
+{
+    struct tm tm;
+
+    if (dos == 0)
+        return time(NULL);
+    tm.tm_year = ((int)(dos >> 25) & 0x7f) + 80;
+    tm.tm_mon  = ((int)(dos >> 21) & 0xf) - 1;
+    tm.tm_mday = (int)(dos >> 16) & 0x1f;
+    tm.tm_hour = (int)(dos >> 11) & 0x1f;
+    tm.tm_min  = (int)(dos >> 5) & 0x3f;
+    tm.tm_sec  = (int)(dos << 1) & 0x3e;
+    tm.tm_isdst = -1;           /* figure out if DST or not */
+    return mktime(&tm);
+}
+
+/* convert an unsigned 32-bit integer to signed, even if long > 32 bits */
+local long tolong(unsigned long val)
+{
+    return (long)(val & 0x7fffffffUL) - (long)(val & 0x80000000UL);
+}
+
+#define LOW32 0xffffffffUL
+
+/* process zip extra field to extract zip64 lengths and Unix mod time */
+local int read_extra(unsigned len, int save)
+{
+    unsigned id, size, tmp2;
+    unsigned long tmp4;
+
+    /* process extra blocks */
+    while (len >= 4) {
+        id = GET2();
+        size = GET2();
+        if (in_eof)
+            return -1;
+        len -= 4;
+        if (size > len)
+            break;
+        len -= size;
+        if (id == 0x0001) {
+            /* Zip64 Extended Information Extra Field */
+            if (zip_ulen == LOW32 && size >= 8) {
+                zip_ulen = GET4();
+                SKIP(4);
+                size -= 8;
+            }
+            if (zip_clen == LOW32 && size >= 8) {
+                zip_clen = GET4();
+                SKIP(4);
+                size -= 8;
+            }
+        }
+        if (save) {
+            if ((id == 0x000d || id == 0x5855) && size >= 8) {
+                /* PKWare Unix or Info-ZIP Type 1 Unix block */
+                SKIP(4);
+                stamp = tolong(GET4());
+                size -= 8;
+            }
+            if (id == 0x5455 && size >= 5) {
+                /* Extended Timestamp block */
+                size--;
+                if (GET() & 1) {
+                    stamp = tolong(GET4());
+                    size -= 4;
+                }
+            }
+        }
+        SKIP(size);
+    }
+    SKIP(len);
+    return 0;
+}
+
+/* read a gzip, zip, zlib, or lzw header from ind and extract useful
+   information, return the method -- or on error return negative: -1 is
+   immediate EOF, -2 is not a recognized compressed format, -3 is premature EOF
+   within the header, -4 is unexpected header flag values; a method of 256 is
+   lzw -- set form to indicate gzip, zlib, or zip */
+local int get_header(int save)
+{
+    unsigned magic;             /* magic header */
+    int method;                 /* compression method */
+    int flags;                  /* header flags */
+    unsigned fname, extra;      /* name and extra field lengths */
+    unsigned tmp2;              /* for macro */
+    unsigned long tmp4;         /* for macro */
+
+    /* clear return information */
+    if (save) {
+        stamp = 0;
+        RELEASE(hname);
+    }
+
+    /* see if it's a gzip, zlib, or lzw file */
+    form = 0;
+    magic1 = GET();
+    if (in_eof)
+        return -1;
+    magic = magic1 << 8;
+    magic += GET();
+    if (in_eof)
+        return -2;
+    if (magic % 31 == 0) {          /* it's zlib */
+        form = 1;
+        return (int)((magic >> 8) & 0xf);
+    }
+    if (magic == 0x1f9d)            /* it's lzw */
+        return 256;
+    if (magic == 0x504b) {          /* it's zip */
+        if (GET() != 3 || GET() != 4)
+            return -3;
+        SKIP(2);
+        flags = GET2();
+        if (in_eof)
+            return -3;
+        if (flags & 0xfff0)
+            return -4;
+        method = GET2();
+        if (flags & 1)              /* encrypted */
+            method = 255;           /* mark as unknown method */
+        if (in_eof)
+            return -3;
+        if (save)
+            stamp = dos2time(GET4());
+        else
+            SKIP(4);
+        zip_crc = GET4();
+        zip_clen = GET4();
+        zip_ulen = GET4();
+        fname = GET2();
+        extra = GET2();
+        if (save) {
+            char *next = hname = malloc(fname + 1);
+            if (hname == NULL)
+                bail("not enough memory", "");
+            while (fname > in_left) {
+                memcpy(next, in_next, in_left);
+                fname -= in_left;
+                next += in_left;
+                if (load() == 0)
+                    return -3;
+            }
+            memcpy(next, in_next, fname);
+            in_left -= fname;
+            in_next += fname;
+            next += fname;
+            *next = 0;
+        }
+        else
+            SKIP(fname);
+        read_extra(extra, save);
+        form = 2 + ((flags & 8) >> 3);
+        return in_eof ? -3 : method;
+    }
+    if (magic != 0x1f8b) {          /* not gzip */
+        in_left++;      /* unget second magic byte */
+        in_next--;
+        return -2;
+    }
+
+    /* it's gzip -- get method and flags */
+    method = GET();
+    flags = GET();
+    if (in_eof)
+        return -1;
+    if (flags & 0xe0)
+        return -4;
+
+    /* get time stamp */
+    if (save)
+        stamp = tolong(GET4());
+    else
+        SKIP(4);
+
+    /* skip extra field and OS */
+    SKIP(2);
+
+    /* skip extra field, if present */
+    if (flags & 4) {
+        extra = GET2();
+        if (in_eof)
+            return -3;
+        SKIP(extra);
+    }
+
+    /* read file name, if present, into allocated memory */
+    if ((flags & 8) && save) {
+        unsigned char *end;
+        size_t copy, have, size = 128;
+        hname = malloc(size);
+        if (hname == NULL)
+            bail("not enough memory", "");
+        have = 0;
+        do {
+            if (in_left == 0 && load() == 0)
+                return -3;
+            end = memchr(in_next, 0, in_left);
+            copy = end == NULL ? in_left : (size_t)(end - in_next) + 1;
+            if (have + copy > size) {
+                while (have + copy > (size <<= 1))
+                    ;
+                hname = realloc(hname, size);
+                if (hname == NULL)
+                    bail("not enough memory", "");
+            }
+            memcpy(hname + have, in_next, copy);
+            have += copy;
+            in_left -= copy;
+            in_next += copy;
+        } while (end == NULL);
+    }
+    else if (flags & 8)
+        while (GET() != 0)
+            if (in_eof)
+                return -3;
+
+    /* skip comment */
+    if (flags & 16)
+        while (GET() != 0)
+            if (in_eof)
+                return -3;
+
+    /* skip header crc */
+    if (flags & 2)
+        SKIP(2);
+
+    /* return compression method */
+    return method;
+}
+
+/* --- list contents of compressed input (gzip, zlib, or lzw) */
+
+/* find standard compressed file suffix, return length of suffix */
+local size_t compressed_suffix(char *nm)
+{
+    size_t len;
+
+    len = strlen(nm);
+    if (len > 4) {
+        nm += len - 4;
+        len = 4;
+        if (strcmp(nm, ".zip") == 0 || strcmp(nm, ".ZIP") == 0 ||
+            strcmp(nm, ".tgz") == 0)
+            return 4;
+    }
+    if (len > 3) {
+        nm += len - 3;
+        len = 3;
+        if (strcmp(nm, ".gz") == 0 || strcmp(nm, "-gz") == 0 ||
+            strcmp(nm, ".zz") == 0 || strcmp(nm, "-zz") == 0)
+            return 3;
+    }
+    if (len > 2) {
+        nm += len - 2;
+        if (strcmp(nm, ".z") == 0 || strcmp(nm, "-z") == 0 ||
+            strcmp(nm, "_z") == 0 || strcmp(nm, ".Z") == 0)
+            return 2;
+    }
+    return 0;
+}
+
+/* listing file name lengths for -l and -lv */
+#define NAMEMAX1 48     /* name display limit at verbosity 1 */
+#define NAMEMAX2 16     /* name display limit at verbosity 2 */
+
+/* print gzip or lzw file information */
+local void show_info(int method, unsigned long check, off_t len, int cont)
+{
+    size_t max;             /* maximum name length for current verbosity */
+    size_t n;               /* name length without suffix */
+    time_t now;             /* for getting current year */
+    char mod[26];           /* modification time in text */
+    char name[NAMEMAX1+1];  /* header or file name, possibly truncated */
+
+    /* create abbreviated name from header file name or actual file name */
+    max = verbosity > 1 ? NAMEMAX2 : NAMEMAX1;
+    memset(name, 0, max + 1);
+    if (cont)
+        strncpy(name, "<...>", max + 1);
+    else if (hname == NULL) {
+        n = strlen(in) - compressed_suffix(in);
+        strncpy(name, in, n > max + 1 ? max + 1 : n);
+        if (strcmp(in + n, ".tgz") == 0 && n < max + 1)
+            strncpy(name + n, ".tar", max + 1 - n);
+    }
+    else
+        strncpy(name, hname, max + 1);
+    if (name[max])
+        strcpy(name + max - 3, "...");
+
+    /* convert time stamp to text */
+    if (stamp) {
+        strcpy(mod, ctime(&stamp));
+        now = time(NULL);
+        if (strcmp(mod + 20, ctime(&now) + 20) != 0)
+            strcpy(mod + 11, mod + 19);
+    }
+    else
+        strcpy(mod + 4, "------ -----");
+    mod[16] = 0;
+
+    /* if first time, print header */
+    if (first) {
+        if (verbosity > 1)
+            fputs("method    check    timestamp    ", stdout);
+        if (verbosity > 0)
+            puts("compressed   original reduced  name");
+        first = 0;
+    }
+
+    /* print information */
+    if (verbosity > 1) {
+        if (form == 3 && !decode)
+            printf("zip%3d  --------  %s  ", method, mod + 4);
+        else if (form > 1)
+            printf("zip%3d  %08lx  %s  ", method, check, mod + 4);
+        else if (form)
+            printf("zlib%2d  %08lx  %s  ", method, check, mod + 4);
+        else if (method == 256)
+            printf("lzw     --------  %s  ", mod + 4);
+        else
+            printf("gzip%2d  %08lx  %s  ", method, check, mod + 4);
+    }
+    if (verbosity > 0) {
+        if ((form == 3 && !decode) ||
+            (method == 8 && in_tot > (len + (len >> 10) + 12)) ||
+            (method == 256 && in_tot > len + (len >> 1) + 3))
+#if __STDC_VERSION__-0 >= 199901L || __GNUC__-0 >= 3
+            printf("%10jd %10jd?  unk    %s\n",
+                   (intmax_t)in_tot, (intmax_t)len, name);
+        else
+            printf("%10jd %10jd %6.1f%%  %s\n",
+                   (intmax_t)in_tot, (intmax_t)len,
+                   len == 0 ? 0 : 100 * (len - in_tot)/(double)len,
+                   name);
+#else
+            printf(sizeof(off_t) == sizeof(long) ?
+                   "%10ld %10ld?  unk    %s\n" : "%10lld %10lld?  unk    %s\n",
+                   in_tot, len, name);
+        else
+            printf(sizeof(off_t) == sizeof(long) ?
+                   "%10ld %10ld %6.1f%%  %s\n" : "%10lld %10lld %6.1f%%  %s\n",
+                   in_tot, len,
+                   len == 0 ? 0 : 100 * (len - in_tot)/(double)len,
+                   name);
+#endif
+    }
+}
+
+/* list content information about the gzip file at ind (only works if the gzip
+   file contains a single gzip stream with no junk at the end, and only works
+   well if the uncompressed length is less than 4 GB) */
+local void list_info(void)
+{
+    int method;             /* get_header() return value */
+    size_t n;               /* available trailer bytes */
+    off_t at;               /* used to calculate compressed length */
+    unsigned char tail[8];  /* trailer containing check and length */
+    unsigned long check, len;   /* check value and length from trailer */
+
+    /* initialize input buffer */
+    in_init();
+
+    /* read header information and position input after header */
+    method = get_header(1);
+    if (method < 0) {
+        RELEASE(hname);
+        if (method != -1 && verbosity > 1)
+            complain("%s not a compressed file -- skipping", in);
+        return;
+    }
+
+    /* list zip file */
+    if (form > 1) {
+        in_tot = zip_clen;
+        show_info(method, zip_crc, zip_ulen, 0);
+        return;
+    }
+
+    /* list zlib file */
+    if (form) {
+        at = lseek(ind, 0, SEEK_END);
+        if (at == -1) {
+            check = 0;
+            do {
+                len = in_left < 4 ? in_left : 4;
+                in_next += in_left - len;
+                while (len--)
+                    check = (check << 8) + *in_next++;
+            } while (load() != 0);
+            check &= LOW32;
+        }
+        else {
+            in_tot = at;
+            lseek(ind, -4, SEEK_END);
+            readn(ind, tail, 4);
+            check = PULL4M(tail);
+        }
+        in_tot -= 6;
+        show_info(method, check, 0, 0);
+        return;
+    }
+
+    /* list lzw file */
+    if (method == 256) {
+        at = lseek(ind, 0, SEEK_END);
+        if (at == -1)
+            while (load() != 0)
+                ;
+        else
+            in_tot = at;
+        in_tot -= 3;
+        show_info(method, 0, 0, 0);
+        return;
+    }
+
+    /* skip to end to get trailer (8 bytes), compute compressed length */
+    if (in_short) {                     /* whole thing already read */
+        if (in_left < 8) {
+            complain("%s not a valid gzip file -- skipping", in);
+            return;
+        }
+        in_tot = in_left - 8;           /* compressed size */
+        memcpy(tail, in_next + (in_left - 8), 8);
+    }
+    else if ((at = lseek(ind, -8, SEEK_END)) != -1) {
+        in_tot = at - in_tot + in_left; /* compressed size */
+        readn(ind, tail, 8);            /* get trailer */
+    }
+    else {                              /* can't seek */
+        at = in_tot - in_left;          /* save header size */
+        do {
+            n = in_left < 8 ? in_left : 8;
+            memcpy(tail, in_next + (in_left - n), n);
+            load();
+        } while (in_left == BUF);       /* read until end */
+        if (in_left < 8) {
+            if (n + in_left < 8) {
+                complain("%s not a valid gzip file -- skipping", in);
+                return;
+            }
+            if (in_left) {
+                if (n + in_left > 8)
+                    memcpy(tail, tail + n - (8 - in_left), 8 - in_left);
+                memcpy(tail + 8 - in_left, in_next, in_left);
+            }
+        }
+        else
+            memcpy(tail, in_next + (in_left - 8), 8);
+        in_tot -= at + 8;
+    }
+    if (in_tot < 2) {
+        complain("%s not a valid gzip file -- skipping", in);
+        return;
+    }
+
+    /* convert trailer to check and uncompressed length (modulo 2^32) */
+    check = PULL4L(tail);
+    len = PULL4L(tail + 4);
+
+    /* list information about contents */
+    show_info(method, check, len, 0);
+    RELEASE(hname);
+}
+
+/* --- copy input to output (when acting like cat) --- */
+
+local void cat(void)
+{
+    /* write first magic byte (if we're here, there's at least one byte) */
+    writen(outd, &magic1, 1);
+    out_tot = 1;
+
+    /* copy the remainder of the input to the output (if there were any more
+       bytes of input, then in_left is non-zero and in_next is pointing to the
+       second magic byte) */
+    while (in_left) {
+        writen(outd, in_next, in_left);
+        out_tot += in_left;
+        in_left = 0;
+        load();
+    }
+}
+
+/* --- decompress deflate input --- */
+
+/* call-back input function for inflateBack() */
+local unsigned inb(void *desc, unsigned char **buf)
+{
+    (void)desc;
+    load();
+    *buf = in_next;
+    return in_left;
+}
+
+/* output buffers and window for infchk() and unlzw() */
+#define OUTSIZE 32768U      /* must be at least 32K for inflateBack() window */
+local unsigned char out_buf[OUTSIZE];
+
+#ifndef NOTHREAD
+/* output data for parallel write and check */
+local unsigned char out_copy[OUTSIZE];
+local size_t out_len;
+
+/* outb threads states */
+local lock *outb_write_more = NULL;
+local lock *outb_check_more;
+
+/* output write thread */
+local void outb_write(void *dummy)
+{
+    size_t len;
+
+    (void)dummy;
+
+    Trace(("-- launched decompress write thread"));
+    do {
+        possess(outb_write_more);
+        wait_for(outb_write_more, TO_BE, 1);
+        len = out_len;
+        if (len && decode == 1)
+            writen(outd, out_copy, len);
+        Trace(("-- decompress wrote %lu bytes", len));
+        twist(outb_write_more, TO, 0);
+    } while (len);
+    Trace(("-- exited decompress write thread"));
+}
+
+/* output check thread */
+local void outb_check(void *dummy)
+{
+    size_t len;
+
+    (void)dummy;
+
+    Trace(("-- launched decompress check thread"));
+    do {
+        possess(outb_check_more);
+        wait_for(outb_check_more, TO_BE, 1);
+        len = out_len;
+        out_check = CHECK(out_check, out_copy, len);
+        Trace(("-- decompress checked %lu bytes", len));
+        twist(outb_check_more, TO, 0);
+    } while (len);
+    Trace(("-- exited decompress check thread"));
+}
+#endif
+
+/* call-back output function for inflateBack() -- wait for the last write and
+   check calculation to complete, copy the write buffer, and then alert the
+   write and check threads and return for more decompression while that's
+   going on (or just write and check if no threads or if proc == 1) */
+local int outb(void *desc, unsigned char *buf, unsigned len)
+{
+#ifndef NOTHREAD
+    static thread *wr, *ch;
+
+    (void)desc;
+
+    if (procs > 1) {
+        /* if first time, initialize state and launch threads */
+        if (outb_write_more == NULL) {
+            outb_write_more = new_lock(0);
+            outb_check_more = new_lock(0);
+            wr = launch(outb_write, NULL);
+            ch = launch(outb_check, NULL);
+        }
+
+        /* wait for previous write and check threads to complete */
+        possess(outb_check_more);
+        wait_for(outb_check_more, TO_BE, 0);
+        possess(outb_write_more);
+        wait_for(outb_write_more, TO_BE, 0);
+
+        /* copy the output and alert the worker bees */
+        out_len = len;
+        out_tot += len;
+        memcpy(out_copy, buf, len);
+        twist(outb_write_more, TO, 1);
+        twist(outb_check_more, TO, 1);
+
+        /* if requested with len == 0, clean up -- terminate and join write and
+           check threads, free lock */
+        if (len == 0) {
+            join(ch);
+            join(wr);
+            free_lock(outb_check_more);
+            free_lock(outb_write_more);
+            outb_write_more = NULL;
+        }
+
+        /* return for more decompression while last buffer is being written
+           and having its check value calculated -- we wait for those to finish
+           the next time this function is called */
+        return 0;
+    }
+#endif
+
+    /* if just one process or no threads, then do it without threads */
+    if (len) {
+        if (decode == 1)
+            writen(outd, buf, len);
+        out_check = CHECK(out_check, buf, len);
+        out_tot += len;
+    }
+    return 0;
+}
+
+/* inflate for decompression or testing -- decompress from ind to outd unless
+   decode != 1, in which case just test ind, and then also list if list != 0;
+   look for and decode multiple, concatenated gzip and/or zlib streams;
+   read and check the gzip, zlib, or zip trailer */
+local void infchk(void)
+{
+    int ret, cont;
+    unsigned long check, len;
+    z_stream strm;
+    unsigned tmp2;
+    unsigned long tmp4;
+    off_t clen;
+
+    cont = 0;
+    do {
+        /* header already read -- set up for decompression */
+        in_tot = in_left;               /* track compressed data length */
+        out_tot = 0;
+        out_check = CHECK(0L, Z_NULL, 0);
+        strm.zalloc = Z_NULL;
+        strm.zfree = Z_NULL;
+        strm.opaque = Z_NULL;
+        ret = inflateBackInit(&strm, 15, out_buf);
+        if (ret != Z_OK)
+            bail("not enough memory", "");
+
+        /* decompress, compute lengths and check value */
+        strm.avail_in = in_left;
+        strm.next_in = in_next;
+        ret = inflateBack(&strm, inb, NULL, outb, NULL);
+        if (ret != Z_STREAM_END)
+            bail("corrupted input -- invalid deflate data: ", in);
+        in_left = strm.avail_in;
+        in_next = strm.next_in;
+        inflateBackEnd(&strm);
+        outb(NULL, NULL, 0);        /* finish off final write and check */
+
+        /* compute compressed data length */
+        clen = in_tot - in_left;
+
+        /* read and check trailer */
+        if (form > 1) {             /* zip local trailer (if any) */
+            if (form == 3) {        /* data descriptor follows */
+                /* read original version of data descriptor */
+                zip_crc = GET4();
+                zip_clen = GET4();
+                zip_ulen = GET4();
+                if (in_eof)
+                    bail("corrupted zip entry -- missing trailer: ", in);
+
+                /* if crc doesn't match, try info-zip variant with sig */
+                if (zip_crc != out_check) {
+                    if (zip_crc != 0x08074b50UL || zip_clen != out_check)
+                        bail("corrupted zip entry -- crc32 mismatch: ", in);
+                    zip_crc = zip_clen;
+                    zip_clen = zip_ulen;
+                    zip_ulen = GET4();
+                }
+
+                /* handle incredibly rare cases where crc equals signature */
+                else if (zip_crc == 0x08074b50UL && zip_clen == zip_crc &&
+                         ((clen & LOW32) != zip_crc || zip_ulen == zip_crc)) {
+                    zip_crc = zip_clen;
+                    zip_clen = zip_ulen;
+                    zip_ulen = GET4();
+                }
+
+                /* if second length doesn't match, try 64-bit lengths */
+                if (zip_ulen != (out_tot & LOW32)) {
+                    zip_ulen = GET4();
+                    (void)GET4();
+                }
+                if (in_eof)
+                    bail("corrupted zip entry -- missing trailer: ", in);
+            }
+            if (zip_clen != (clen & LOW32) || zip_ulen != (out_tot & LOW32))
+                bail("corrupted zip entry -- length mismatch: ", in);
+            check = zip_crc;
+        }
+        else if (form == 1) {       /* zlib (big-endian) trailer */
+            check = (unsigned long)(GET()) << 24;
+            check += (unsigned long)(GET()) << 16;
+            check += (unsigned)(GET()) << 8;
+            check += GET();
+            if (in_eof)
+                bail("corrupted zlib stream -- missing trailer: ", in);
+            if (check != out_check)
+                bail("corrupted zlib stream -- adler32 mismatch: ", in);
+        }
+        else {                      /* gzip trailer */
+            check = GET4();
+            len = GET4();
+            if (in_eof)
+                bail("corrupted gzip stream -- missing trailer: ", in);
+            if (check != out_check)
+                bail("corrupted gzip stream -- crc32 mismatch: ", in);
+            if (len != (out_tot & LOW32))
+                bail("corrupted gzip stream -- length mismatch: ", in);
+        }
+
+        /* show file information if requested */
+        if (list) {
+            in_tot = clen;
+            show_info(8, check, out_tot, cont);
+            cont = 1;
+        }
+
+        /* if a gzip or zlib entry follows a gzip or zlib entry, decompress it
+           (don't replace saved header information from first entry) */
+    } while (form < 2 && (ret = get_header(0)) == 8 && form < 2);
+
+    /* gzip -cdf copies junk after gzip stream directly to output */
+    if (form < 2 && ret == -2 && force && pipeout && decode != 2 && !list)
+        cat();
+    else if (ret != -1 && form < 2)
+        complain("%s OK, has trailing junk which was ignored", in);
+}
+
+/* --- decompress Unix compress (LZW) input --- */
+
+/* memory for unlzw() --
+   the first 256 entries of prefix[] and suffix[] are never used, could
+   have offset the index, but it's faster to waste the memory */
+unsigned short prefix[65536];           /* index to LZW prefix string */
+unsigned char suffix[65536];            /* one-character LZW suffix */
+unsigned char match[65280 + 2];         /* buffer for reversed match */
+
+/* throw out what's left in the current bits byte buffer (this is a vestigial
+   aspect of the compressed data format derived from an implementation that
+   made use of a special VAX machine instruction!) */
+#define FLUSHCODE() \
+    do { \
+        left = 0; \
+        rem = 0; \
+        if (chunk > in_left) { \
+            chunk -= in_left; \
+            if (load() == 0) \
+                break; \
+            if (chunk > in_left) { \
+                chunk = in_left = 0; \
+                break; \
+            } \
+        } \
+        in_left -= chunk; \
+        in_next += chunk; \
+        chunk = 0; \
+    } while (0)
+
+/* Decompress a compress (LZW) file from ind to outd.  The compress magic
+   header (two bytes) has already been read and verified. */
+local void unlzw(void)
+{
+    int got;                    /* byte just read by GET() */
+    unsigned chunk;             /* bytes left in current chunk */
+    int left;                   /* bits left in rem */
+    unsigned rem;               /* unused bits from input */
+    int bits;                   /* current bits per code */
+    unsigned code;              /* code, table traversal index */
+    unsigned mask;              /* mask for current bits codes */
+    int max;                    /* maximum bits per code for this stream */
+    int flags;                  /* compress flags, then block compress flag */
+    unsigned end;               /* last valid entry in prefix/suffix tables */
+    unsigned temp;              /* current code */
+    unsigned prev;              /* previous code */
+    unsigned final;             /* last character written for previous code */
+    unsigned stack;             /* next position for reversed string */
+    unsigned outcnt;            /* bytes in output buffer */
+    unsigned char *p;
+
+    /* process remainder of compress header -- a flags byte */
+    out_tot = 0;
+    flags = GET();
+    if (in_eof)
+        bail("missing lzw data: ", in);
+    if (flags & 0x60)
+        bail("unknown lzw flags set: ", in);
+    max = flags & 0x1f;
+    if (max < 9 || max > 16)
+        bail("lzw bits out of range: ", in);
+    if (max == 9)                           /* 9 doesn't really mean 9 */
+        max = 10;
+    flags &= 0x80;                          /* true if block compress */
+
+    /* clear table */
+    bits = 9;
+    mask = 0x1ff;
+    end = flags ? 256 : 255;
+
+    /* set up: get first 9-bit code, which is the first decompressed byte, but
+       don't create a table entry until the next code */
+    got = GET();
+    if (in_eof)                             /* no compressed data is ok */
+        return;
+    final = prev = (unsigned)got;           /* low 8 bits of code */
+    got = GET();
+    if (in_eof || (got & 1) != 0)           /* missing a bit or code >= 256 */
+        bail("invalid lzw code: ", in);
+    rem = (unsigned)got >> 1;               /* remaining 7 bits */
+    left = 7;
+    chunk = bits - 2;                       /* 7 bytes left in this chunk */
+    out_buf[0] = (unsigned char)final;      /* write first decompressed byte */
+    outcnt = 1;
+
+    /* decode codes */
+    stack = 0;
+    for (;;) {
+        /* if the table will be full after this, increment the code size */
+        if (end >= mask && bits < max) {
+            FLUSHCODE();
+            bits++;
+            mask <<= 1;
+            mask++;
+        }
+
+        /* get a code of length bits */
+        if (chunk == 0)                     /* decrement chunk modulo bits */
+            chunk = bits;
+        code = rem;                         /* low bits of code */
+        got = GET();
+        if (in_eof) {                       /* EOF is end of compressed data */
+            /* write remaining buffered output */
+            out_tot += outcnt;
+            if (outcnt && decode == 1)
+                writen(outd, out_buf, outcnt);
+            return;
+        }
+        code += (unsigned)got << left;      /* middle (or high) bits of code */
+        left += 8;
+        chunk--;
+        if (bits > left) {                  /* need more bits */
+            got = GET();
+            if (in_eof)                     /* can't end in middle of code */
+                bail("invalid lzw code: ", in);
+            code += (unsigned)got << left;  /* high bits of code */
+            left += 8;
+            chunk--;
+        }
+        code &= mask;                       /* mask to current code length */
+        left -= bits;                       /* number of unused bits */
+        rem = (unsigned)got >> (8 - left);  /* unused bits from last byte */
+
+        /* process clear code (256) */
+        if (code == 256 && flags) {
+            FLUSHCODE();
+            bits = 9;                       /* initialize bits and mask */
+            mask = 0x1ff;
+            end = 255;                      /* empty table */
+            continue;                       /* get next code */
+        }
+
+        /* special code to reuse last match */
+        temp = code;                        /* save the current code */
+        if (code > end) {
+            /* Be picky on the allowed code here, and make sure that the code
+               we drop through (prev) will be a valid index so that random
+               input does not cause an exception.  The code != end + 1 check is
+               empirically derived, and not checked in the original uncompress
+               code.  If this ever causes a problem, that check could be safely
+               removed.  Leaving this check in greatly improves pigz's ability
+               to detect random or corrupted input after a compress header.
+               In any case, the prev > end check must be retained. */
+            if (code != end + 1 || prev > end)
+                bail("invalid lzw code: ", in);
+            match[stack++] = (unsigned char)final;
+            code = prev;
+        }
+
+        /* walk through linked list to generate output in reverse order */
+        p = match + stack;
+        while (code >= 256) {
+            *p++ = suffix[code];
+            code = prefix[code];
+        }
+        stack = p - match;
+        match[stack++] = (unsigned char)code;
+        final = code;
+
+        /* link new table entry */
+        if (end < mask) {
+            end++;
+            prefix[end] = (unsigned short)prev;
+            suffix[end] = (unsigned char)final;
+        }
+
+        /* set previous code for next iteration */
+        prev = temp;
+
+        /* write output in forward order */
+        while (stack > OUTSIZE - outcnt) {
+            while (outcnt < OUTSIZE)
+                out_buf[outcnt++] = match[--stack];
+            out_tot += outcnt;
+            if (decode == 1)
+                writen(outd, out_buf, outcnt);
+            outcnt = 0;
+        }
+        p = match + stack;
+        do {
+            out_buf[outcnt++] = *--p;
+        } while (p > match);
+        stack = 0;
+
+        /* loop for next code with final and prev as the last match, rem and
+           left provide the first 0..7 bits of the next code, end is the last
+           valid table entry */
+    }
+}
+
+/* --- file processing --- */
+
+/* extract file name from path */
+local char *justname(char *path)
+{
+    char *p;
+
+    p = path + strlen(path);
+    while (--p >= path)
+        if (*p == '/')
+            break;
+    return p + 1;
+}
+
+/* Copy file attributes, from -> to, as best we can.  This is best effort, so
+   no errors are reported.  The mode bits, including suid, sgid, and the sticky
+   bit are copied (if allowed), the owner's user id and group id are copied
+   (again if allowed), and the access and modify times are copied. */
+local void copymeta(char *from, char *to)
+{
+    struct stat st;
+    struct timeval times[2];
+
+    /* get all of from's Unix meta data, return if not a regular file */
+    if (stat(from, &st) != 0 || (st.st_mode & S_IFMT) != S_IFREG)
+        return;
+
+    /* set to's mode bits, ignore errors */
+    (void)chmod(to, st.st_mode & 07777);
+
+    /* copy owner's user and group, ignore errors */
+    (void)chown(to, st.st_uid, st.st_gid);
+
+    /* copy access and modify times, ignore errors */
+    times[0].tv_sec = st.st_atime;
+    times[0].tv_usec = 0;
+    times[1].tv_sec = st.st_mtime;
+    times[1].tv_usec = 0;
+    (void)utimes(to, times);
+}
+
+/* set the access and modify times of fd to t */
+local void touch(char *path, time_t t)
+{
+    struct timeval times[2];
+
+    times[0].tv_sec = t;
+    times[0].tv_usec = 0;
+    times[1].tv_sec = t;
+    times[1].tv_usec = 0;
+    (void)utimes(path, times);
+}
+
+/* process provided input file, or stdin if path is NULL -- process() can
+   call itself for recursive directory processing */
+local void process(char *path)
+{
+    int method = -1;                /* get_header() return value */
+    size_t len;                     /* length of base name (minus suffix) */
+    struct stat st;                 /* to get file type and mod time */
+    /* all compressed suffixes for decoding search, in length order */
+    static char *sufs[] = {".z", "-z", "_z", ".Z", ".gz", "-gz", ".zz", "-zz",
+                           ".zip", ".ZIP", ".tgz", NULL};
+
+    /* open input file with name in, descriptor ind -- set name and mtime */
+    if (path == NULL) {
+        strcpy(in, "<stdin>");
+        ind = 0;
+        name = NULL;
+        mtime = headis & 2 ?
+                (fstat(ind, &st) ? time(NULL) : st.st_mtime) : 0;
+        len = 0;
+    }
+    else {
+        /* set input file name (already set if recursed here) */
+        if (path != in) {
+            strncpy(in, path, sizeof(in));
+            if (in[sizeof(in) - 1])
+                bail("name too long: ", path);
+        }
+        len = strlen(in);
+
+        /* try to stat input file -- if not there and decoding, look for that
+           name with compressed suffixes */
+        if (lstat(in, &st)) {
+            if (errno == ENOENT && (list || decode)) {
+                char **try = sufs;
+                do {
+                    if (*try == NULL || len + strlen(*try) >= sizeof(in))
+                        break;
+                    strcpy(in + len, *try++);
+                    errno = 0;
+                } while (lstat(in, &st) && errno == ENOENT);
+            }
+#ifdef EOVERFLOW
+            if (errno == EOVERFLOW || errno == EFBIG)
+                bail(in,
+                    " too large -- not compiled with large file support");
+#endif
+            if (errno) {
+                in[len] = 0;
+                complain("%s does not exist -- skipping", in);
+                return;
+            }
+            len = strlen(in);
+        }
+
+        /* only process regular files, but allow symbolic links if -f,
+           recurse into directory if -r */
+        if ((st.st_mode & S_IFMT) != S_IFREG &&
+            (st.st_mode & S_IFMT) != S_IFLNK &&
+            (st.st_mode & S_IFMT) != S_IFDIR) {
+            complain("%s is a special file or device -- skipping", in);
+            return;
+        }
+        if ((st.st_mode & S_IFMT) == S_IFLNK && !force && !pipeout) {
+            complain("%s is a symbolic link -- skipping", in);
+            return;
+        }
+        if ((st.st_mode & S_IFMT) == S_IFDIR && !recurse) {
+            complain("%s is a directory -- skipping", in);
+            return;
+        }
+
+        /* recurse into directory (assumes Unix) */
+        if ((st.st_mode & S_IFMT) == S_IFDIR) {
+            char *roll, *item, *cut, *base, *bigger;
+            size_t len, hold;
+            DIR *here;
+            struct dirent *next;
+
+            /* accumulate list of entries (need to do this, since readdir()
+               behavior not defined if directory modified between calls) */
+            here = opendir(in);
+            if (here == NULL)
+                return;
+            hold = 512;
+            roll = malloc(hold);
+            if (roll == NULL)
+                bail("not enough memory", "");
+            *roll = 0;
+            item = roll;
+            while ((next = readdir(here)) != NULL) {
+                if (next->d_name[0] == 0 ||
+                    (next->d_name[0] == '.' && (next->d_name[1] == 0 ||
+                     (next->d_name[1] == '.' && next->d_name[2] == 0))))
+                    continue;
+                len = strlen(next->d_name) + 1;
+                if (item + len + 1 > roll + hold) {
+                    do {                    /* make roll bigger */
+                        hold <<= 1;
+                    } while (item + len + 1 > roll + hold);
+                    bigger = realloc(roll, hold);
+                    if (bigger == NULL) {
+                        free(roll);
+                        bail("not enough memory", "");
+                    }
+                    item = bigger + (item - roll);
+                    roll = bigger;
+                }
+                strcpy(item, next->d_name);
+                item += len;
+                *item = 0;
+            }
+            closedir(here);
+
+            /* run process() for each entry in the directory */
+            cut = base = in + strlen(in);
+            if (base > in && base[-1] != (unsigned char)'/') {
+                if ((size_t)(base - in) >= sizeof(in))
+                    bail("path too long", in);
+                *base++ = '/';
+            }
+            item = roll;
+            while (*item) {
+                strncpy(base, item, sizeof(in) - (base - in));
+                if (in[sizeof(in) - 1]) {
+                    strcpy(in + (sizeof(in) - 4), "...");
+                    bail("path too long: ", in);
+                }
+                process(in);
+                item += strlen(item) + 1;
+            }
+            *cut = 0;
+
+            /* release list of entries */
+            free(roll);
+            return;
+        }
+
+        /* don't compress .gz (or provided suffix) files, unless -f */
+        if (!(force || list || decode) && len >= strlen(sufx) &&
+                strcmp(in + len - strlen(sufx), sufx) == 0) {
+            complain("%s ends with %s -- skipping", in, sufx);
+            return;
+        }
+
+        /* only decompress over input file with compressed suffix */
+        if (decode && !pipeout) {
+            int suf = compressed_suffix(in);
+            if (suf == 0) {
+                complain("%s does not have compressed suffix -- skipping", in);
+                return;
+            }
+            len -= suf;
+        }
+
+        /* open input file */
+        ind = open(in, O_RDONLY, 0);
+        if (ind < 0)
+            bail("read error on ", in);
+
+        /* prepare gzip header information for compression */
+        name = headis & 1 ? justname(in) : NULL;
+        mtime = headis & 2 ? st.st_mtime : 0;
+    }
+    SET_BINARY_MODE(ind);
+
+    /* if decoding or testing, try to read gzip header */
+    hname = NULL;
+    if (decode) {
+        in_init();
+        method = get_header(1);
+        if (method != 8 && method != 256 &&
+                /* gzip -cdf acts like cat on uncompressed input */
+                !(method == -2 && force && pipeout && decode != 2 && !list)) {
+            RELEASE(hname);
+            if (ind != 0)
+                close(ind);
+            if (method != -1)
+                complain(method < 0 ? "%s is not compressed -- skipping" :
+                         "%s has unknown compression method -- skipping", in);
+            return;
+        }
+
+        /* if requested, test input file (possibly a special list) */
+        if (decode == 2) {
+            if (method == 8)
+                infchk();
+            else {
+                unlzw();
+                if (list) {
+                    in_tot -= 3;
+                    show_info(method, 0, out_tot, 0);
+                }
+            }
+            RELEASE(hname);
+            if (ind != 0)
+                close(ind);
+            return;
+        }
+    }
+
+    /* if requested, just list information about input file */
+    if (list) {
+        list_info();
+        RELEASE(hname);
+        if (ind != 0)
+            close(ind);
+        return;
+    }
+
+    /* create output file out, descriptor outd */
+    if (path == NULL || pipeout) {
+        /* write to stdout */
+        out = malloc(strlen("<stdout>") + 1);
+        if (out == NULL)
+            bail("not enough memory", "");
+        strcpy(out, "<stdout>");
+        outd = 1;
+        if (!decode && !force && isatty(outd))
+            bail("trying to write compressed data to a terminal",
+                 " (use -f to force)");
+    }
+    else {
+        char *to, *repl;
+
+        /* use header name for output when decompressing with -N */
+        to = in;
+        if (decode && (headis & 1) != 0 && hname != NULL) {
+            to = hname;
+            len = strlen(hname);
+        }
+
+        /* replace .tgx with .tar when decoding */
+        repl = decode && strcmp(to + len, ".tgz") ? "" : ".tar";
+
+        /* create output file and open to write */
+        out = malloc(len + (decode ? strlen(repl) : strlen(sufx)) + 1);
+        if (out == NULL)
+            bail("not enough memory", "");
+        memcpy(out, to, len);
+        strcpy(out + len, decode ? repl : sufx);
+        outd = open(out, O_CREAT | O_TRUNC | O_WRONLY |
+                         (force ? 0 : O_EXCL), 0600);
+
+        /* if exists and not -f, give user a chance to overwrite */
+        if (outd < 0 && errno == EEXIST && isatty(0) && verbosity) {
+            int ch, reply;
+
+            fprintf(stderr, "%s exists -- overwrite (y/n)? ", out);
+            fflush(stderr);
+            reply = -1;
+            do {
+                ch = getchar();
+                if (reply < 0 && ch != ' ' && ch != '\t')
+                    reply = ch == 'y' || ch == 'Y' ? 1 : 0;
+            } while (ch != EOF && ch != '\n' && ch != '\r');
+            if (reply == 1)
+                outd = open(out, O_CREAT | O_TRUNC | O_WRONLY,
+                            0600);
+        }
+
+        /* if exists and no overwrite, report and go on to next */
+        if (outd < 0 && errno == EEXIST) {
+            complain("%s exists -- skipping", out);
+            RELEASE(out);
+            RELEASE(hname);
+            if (ind != 0)
+                close(ind);
+            return;
+        }
+
+        /* if some other error, give up */
+        if (outd < 0)
+            bail("write error on ", out);
+    }
+    SET_BINARY_MODE(outd);
+    RELEASE(hname);
+
+    /* process ind to outd */
+    if (verbosity > 1)
+        fprintf(stderr, "%s to %s ", in, out);
+    if (decode) {
+        if (method == 8)
+            infchk();
+        else if (method == 256)
+            unlzw();
+        else
+            cat();
+    }
+#ifndef NOTHREAD
+    else if (procs > 1)
+        parallel_compress();
+#endif
+    else
+        single_compress(0);
+    if (verbosity > 1) {
+        putc('\n', stderr);
+        fflush(stderr);
+    }
+
+    /* finish up, copy attributes, set times, delete original */
+    if (ind != 0)
+        close(ind);
+    if (outd != 1) {
+        if (close(outd))
+            bail("write error on ", out);
+        outd = -1;              /* now prevent deletion on interrupt */
+        if (ind != 0) {
+            copymeta(in, out);
+            if (!keep)
+                unlink(in);
+        }
+        if (decode && (headis & 2) != 0 && stamp)
+            touch(out, stamp);
+    }
+    RELEASE(out);
+}
+
+local char *helptext[] = {
+"Usage: pigz [options] [files ...]",
+"  will compress files in place, adding the suffix '.gz'.  If no files are",
+#ifdef NOTHREAD
+"  specified, stdin will be compressed to stdout.  pigz does what gzip does.",
+#else
+"  specified, stdin will be compressed to stdout.  pigz does what gzip does,",
+"  but spreads the work over multiple processors and cores when compressing.",
+#endif
+"",
+"Options:",
+"  -0 to -9, --fast, --best   Compression levels, --fast is -1, --best is -9",
+"  -b, --blocksize mmm  Set compression block size to mmmK (default 128K)",
+"  -c, --stdout         Write all processed output to stdout (won't delete)",
+"  -d, --decompress     Decompress the compressed input",
+"  -f, --force          Force overwrite, compress .gz, links, and to terminal",
+"  -h, --help           Display a help screen and quit",
+"  -i, --independent    Compress blocks independently for damage recovery",
+"  -k, --keep           Do not delete original file after processing",
+"  -K, --zip            Compress to PKWare zip (.zip) single entry format",
+"  -l, --list           List the contents of the compressed input",
+"  -L, --license        Display the pigz license and quit",
+"  -n, --no-name        Do not store or restore file name in/from header",
+"  -N, --name           Store/restore file name and mod time in/from header",
+#ifndef NOTHREAD
+"  -p, --processes n    Allow up to n compression threads (default is the",
+"                       number of online processors, or 8 if unknown)",
+#endif
+"  -q, --quiet          Print no messages, even on error",
+"  -r, --recursive      Process the contents of all subdirectories",
+"  -R, --rsyncable      Input-determined block locations for rsync",
+"  -S, --suffix .sss    Use suffix .sss instead of .gz (for compression)",
+"  -t, --test           Test the integrity of the compressed input",
+"  -T, --no-time        Do not store or restore mod time in/from header",
+#ifdef DEBUG
+"  -v, --verbose        Provide more verbose output (-vv to debug)",
+#else
+"  -v, --verbose        Provide more verbose output",
+#endif
+"  -V  --version        Show the version of pigz",
+"  -z, --zlib           Compress to zlib (.zz) instead of gzip format",
+"  --                   All arguments after \"--\" are treated as files"
+};
+
+/* display the help text above */
+local void help(void)
+{
+    int n;
+
+    if (verbosity == 0)
+        return;
+    for (n = 0; n < (int)(sizeof(helptext) / sizeof(char *)); n++)
+        fprintf(stderr, "%s\n", helptext[n]);
+    fflush(stderr);
+    exit(0);
+}
+
+#ifndef NOTHREAD
+
+/* try to determine the number of processors */
+local int nprocs(int n)
+{
+#  ifdef _SC_NPROCESSORS_ONLN
+    n = (int)sysconf(_SC_NPROCESSORS_ONLN);
+#  else
+#    ifdef _SC_NPROC_ONLN
+    n = (int)sysconf(_SC_NPROC_ONLN);
+#    else
+#      ifdef __hpux
+    struct pst_dynamic psd;
+
+    if (pstat_getdynamic(&psd, sizeof(psd), (size_t)1, 0) != -1)
+        n = psd.psd_proc_cnt;
+#      endif
+#    endif
+#  endif
+    return n;
+}
+
+#endif
+
+/* set option defaults */
+local void defaults(void)
+{
+    level = Z_DEFAULT_COMPRESSION;
+#ifdef NOTHREAD
+    procs = 1;
+#else
+    procs = nprocs(8);
+#endif
+    size = 131072UL;
+    rsync = 0;                      /* don't do rsync blocking */
+    setdict = 1;                    /* initialize dictionary each thread */
+    verbosity = 1;                  /* normal message level */
+    headis = 3;                     /* store/restore name and timestamp */
+    pipeout = 0;                    /* don't force output to stdout */
+    sufx = ".gz";                   /* compressed file suffix */
+    decode = 0;                     /* compress */
+    list = 0;                       /* compress */
+    keep = 0;                       /* delete input file once compressed */
+    force = 0;                      /* don't overwrite, don't compress links */
+    recurse = 0;                    /* don't go into directories */
+    form = 0;                       /* use gzip format */
+}
+
+/* long options conversion to short options */
+local char *longopts[][2] = {
+    {"LZW", "Z"}, {"ascii", "a"}, {"best", "9"}, {"bits", "Z"},
+    {"blocksize", "b"}, {"decompress", "d"}, {"fast", "1"}, {"force", "f"},
+    {"help", "h"}, {"independent", "i"}, {"keep", "k"}, {"license", "L"},
+    {"list", "l"}, {"name", "N"}, {"no-name", "n"}, {"no-time", "T"},
+    {"processes", "p"}, {"quiet", "q"}, {"recursive", "r"}, {"rsyncable", "R"},
+    {"silent", "q"}, {"stdout", "c"}, {"suffix", "S"}, {"test", "t"},
+    {"to-stdout", "c"}, {"uncompress", "d"}, {"verbose", "v"},
+    {"version", "V"}, {"zip", "K"}, {"zlib", "z"}};
+#define NLOPTS (sizeof(longopts) / (sizeof(char *) << 1))
+
+/* either new buffer size, new compression level, or new number of processes --
+   get rid of old buffers and threads to force the creation of new ones with
+   the new settings */
+local void new_opts(void)
+{
+    single_compress(1);
+#ifndef NOTHREAD
+    finish_jobs();
+#endif
+}
+
+/* verify that arg is only digits, and if so, return the decimal value */
+local size_t num(char *arg)
+{
+    char *str = arg;
+    size_t val = 0;
+
+    if (*str == 0)
+        bail("internal error: empty parameter", "");
+    do {
+        if (*str < '0' || *str > '9')
+            bail("invalid numeric parameter: ", arg);
+        val = val * 10 + (*str - '0');
+        /* %% need to detect overflow here */
+    } while (*++str);
+    return val;
+}
+
+/* process an option, return true if a file name and not an option */
+local int option(char *arg)
+{
+    static int get = 0;     /* if not zero, look for option parameter */
+    char bad[3] = "-X";     /* for error messages (X is replaced) */
+
+    /* if no argument or dash option, check status of get */
+    if (get && (arg == NULL || *arg == '-')) {
+        bad[1] = "bpS"[get - 1];
+        bail("missing parameter after ", bad);
+    }
+    if (arg == NULL)
+        return 0;
+
+    /* process long option or short options */
+    if (*arg == '-') {
+        /* a single dash will be interpreted as stdin */
+        if (*++arg == 0)
+            return 1;
+
+        /* process long option (fall through with equivalent short option) */
+        if (*arg == '-') {
+            int j;
+
+            arg++;
+            for (j = NLOPTS - 1; j >= 0; j--)
+                if (strcmp(arg, longopts[j][0]) == 0) {
+                    arg = longopts[j][1];
+                    break;
+                }
+            if (j < 0)
+                bail("invalid option: ", arg - 2);
+        }
+
+        /* process short options (more than one allowed after dash) */
+        do {
+            /* if looking for a parameter, don't process more single character
+               options until we have the parameter */
+            if (get) {
+                if (get == 3)
+                    bail("invalid usage: -s must be followed by space", "");
+                break;      /* allow -pnnn and -bnnn, fall to parameter code */
+            }
+
+            /* process next single character option */
+            bad[1] = *arg;
+            switch (*arg) {
+            case '0': case '1': case '2': case '3': case '4':
+            case '5': case '6': case '7': case '8': case '9':
+                level = *arg - '0';
+                new_opts();
+                break;
+            case 'K':  form = 2;  sufx = ".zip";  break;
+            case 'L':
+                fputs(VERSION, stderr);
+                fputs("Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012"
+                      " Mark Adler\n",
+                      stderr);
+                fputs("Subject to the terms of the zlib license.\n",
+                      stderr);
+                fputs("No warranty is provided or implied.\n", stderr);
+                exit(0);
+            case 'N':  headis = 3;  break;
+            case 'T':  headis &= ~2;  break;
+            case 'R':  rsync = 1;  break;
+            case 'S':  get = 3;  break;
+            case 'V':  fputs(VERSION, stderr);  exit(0);
+            case 'Z':
+                bail("invalid option: LZW output not supported: ", bad);
+            case 'a':
+                bail("invalid option: ascii conversion not supported: ", bad);
+            case 'b':  get = 1;  break;
+            case 'c':  pipeout = 1;  break;
+            case 'd':  decode = 1;  headis = 0;  break;
+            case 'f':  force = 1;  break;
+            case 'h':  help();  break;
+            case 'i':  setdict = 0;  break;
+            case 'k':  keep = 1;  break;
+            case 'l':  list = 1;  break;
+            case 'n':  headis &= ~1;  break;
+            case 'p':  get = 2;  break;
+            case 'q':  verbosity = 0;  break;
+            case 'r':  recurse = 1;  break;
+            case 't':  decode = 2;  break;
+            case 'v':  verbosity++;  break;
+            case 'z':  form = 1;  sufx = ".zz";  break;
+            default:
+                bail("invalid option: ", bad);
+            }
+        } while (*++arg);
+        if (*arg == 0)
+            return 0;
+    }
+
+    /* process option parameter for -b, -p, or -S */
+    if (get) {
+        size_t n;
+
+        if (get == 1) {
+            n = num(arg);
+            size = n << 10;                     /* chunk size */
+            if (size < DICT)
+                bail("block size too small (must be >= 32K)", "");
+            if (n != size >> 10 ||
+                OUTPOOL(size) < size ||
+                (ssize_t)OUTPOOL(size) < 0 ||
+                size > (1UL << 22))
+                bail("block size too large: ", arg);
+            new_opts();
+        }
+        else if (get == 2) {
+            n = num(arg);
+            procs = (int)n;                     /* # processes */
+            if (procs < 1)
+                bail("invalid number of processes: ", arg);
+            if ((size_t)procs != n || INBUFS(procs) < 1)
+                bail("too many processes: ", arg);
+#ifdef NOTHREAD
+            if (procs > 1)
+                bail("compiled without threads", "");
+#endif
+            new_opts();
+        }
+        else if (get == 3)
+            sufx = arg;                         /* gz suffix */
+        get = 0;
+        return 0;
+    }
+
+    /* neither an option nor parameter */
+    return 1;
+}
+
+/* catch termination signal */
+local void cut_short(int sig)
+{
+    (void)sig;
+    Trace(("termination by user"));
+    if (outd != -1 && out != NULL)
+        unlink(out);
+    log_dump();
+    _exit(1);
+}
+
+/* Process arguments, compress in the gzip format.  Note that procs must be at
+   least two in order to provide a dictionary in one work unit for the other
+   work unit, and that size must be at least 32K to store a full dictionary. */
+int main(int argc, char **argv)
+{
+    int n;                          /* general index */
+    int noop;                       /* true to suppress option decoding */
+    unsigned long done;             /* number of named files processed */
+    char *opts, *p;                 /* environment default options, marker */
+
+    /* save pointer to program name for error messages */
+    p = strrchr(argv[0], '/');
+    p = p == NULL ? argv[0] : p + 1;
+    prog = *p ? p : "pigz";
+
+    /* prepare for interrupts and logging */
+    signal(SIGINT, cut_short);
+#ifndef NOTHREAD
+    yarn_prefix = prog;             /* prefix for yarn error messages */
+    yarn_abort = cut_short;         /* call on thread error */
+#endif
+#ifdef DEBUG
+    gettimeofday(&start, NULL);     /* starting time for log entries */
+    log_init();                     /* initialize logging */
+#endif
+
+    /* set all options to defaults */
+    defaults();
+
+    /* process user environment variable defaults in GZIP */
+    opts = getenv("GZIP");
+    if (opts != NULL) {
+        while (*opts) {
+            while (*opts == ' ' || *opts == '\t')
+                opts++;
+            p = opts;
+            while (*p && *p != ' ' && *p != '\t')
+                p++;
+            n = *p;
+            *p = 0;
+            if (option(opts))
+                bail("cannot provide files in GZIP environment variable", "");
+            opts = p + (n ? 1 : 0);
+        }
+        option(NULL);
+    }
+
+    /* process user environment variable defaults in PIGZ as well */
+    opts = getenv("PIGZ");
+    if (opts != NULL) {
+        while (*opts) {
+            while (*opts == ' ' || *opts == '\t')
+                opts++;
+            p = opts;
+            while (*p && *p != ' ' && *p != '\t')
+                p++;
+            n = *p;
+            *p = 0;
+            if (option(opts))
+                bail("cannot provide files in PIGZ environment variable", "");
+            opts = p + (n ? 1 : 0);
+        }
+        option(NULL);
+    }
+
+    /* decompress if named "unpigz" or "gunzip", to stdout if "*cat" */
+    if (strcmp(prog, "unpigz") == 0 || strcmp(prog, "gunzip") == 0)
+        decode = 1, headis = 0;
+    if ((n = strlen(prog)) > 2 && strcmp(prog + n - 3, "cat") == 0)
+        decode = 1, headis = 0, pipeout = 1;
+
+    /* if no arguments and compressed data to or from a terminal, show help */
+    if (argc < 2 && isatty(decode ? 0 : 1))
+        help();
+
+    /* process command-line arguments, no options after "--" */
+    done = noop = 0;
+    for (n = 1; n < argc; n++)
+        if (noop == 0 && strcmp(argv[n], "--") == 0) {
+            noop = 1;
+            option(NULL);
+        }
+        else if (noop || option(argv[n])) { /* true if file name, process it */
+            if (done == 1 && pipeout && !decode && !list && form > 1)
+                complain("warning: output will be concatenated zip files -- "
+                         "will not be able to extract");
+            process(strcmp(argv[n], "-") ? argv[n] : NULL);
+            done++;
+        }
+    option(NULL);
+
+    /* list stdin or compress stdin to stdout if no file names provided */
+    if (done == 0)
+        process(NULL);
+
+    /* done -- release resources, show log */
+    new_opts();
+    log_dump();
+    return warned ? 2 : 0;
+}