Root/lib/decompress_bunzip2.c

1/* vi: set sw = 4 ts = 4: */
2/* Small bzip2 deflate implementation, by Rob Landley (rob@landley.net).
3
4    Based on bzip2 decompression code by Julian R Seward (jseward@acm.org),
5    which also acknowledges contributions by Mike Burrows, David Wheeler,
6    Peter Fenwick, Alistair Moffat, Radford Neal, Ian H. Witten,
7    Robert Sedgewick, and Jon L. Bentley.
8
9    This code is licensed under the LGPLv2:
10        LGPL (http://www.gnu.org/copyleft/lgpl.html
11*/
12
13/*
14    Size and speed optimizations by Manuel Novoa III (mjn3@codepoet.org).
15
16    More efficient reading of Huffman codes, a streamlined read_bunzip()
17    function, and various other tweaks. In (limited) tests, approximately
18    20% faster than bzcat on x86 and about 10% faster on arm.
19
20    Note that about 2/3 of the time is spent in read_unzip() reversing
21    the Burrows-Wheeler transformation. Much of that time is delay
22    resulting from cache misses.
23
24    I would ask that anyone benefiting from this work, especially those
25    using it in commercial products, consider making a donation to my local
26    non-profit hospice organization in the name of the woman I loved, who
27    passed away Feb. 12, 2003.
28
29        In memory of Toni W. Hagan
30
31        Hospice of Acadiana, Inc.
32        2600 Johnston St., Suite 200
33        Lafayette, LA 70503-3240
34
35        Phone (337) 232-1234 or 1-800-738-2226
36        Fax (337) 232-1297
37
38        http://www.hospiceacadiana.com/
39
40    Manuel
41 */
42
43/*
44    Made it fit for running in Linux Kernel by Alain Knaff (alain@knaff.lu)
45*/
46
47
48#ifdef STATIC
49#define PREBOOT
50#else
51#include <linux/decompress/bunzip2.h>
52#include <linux/slab.h>
53#endif /* STATIC */
54
55#include <linux/decompress/mm.h>
56
57#ifndef INT_MAX
58#define INT_MAX 0x7fffffff
59#endif
60
61/* Constants for Huffman coding */
62#define MAX_GROUPS 6
63#define GROUP_SIZE 50 /* 64 would have been more efficient */
64#define MAX_HUFCODE_BITS 20 /* Longest Huffman code allowed */
65#define MAX_SYMBOLS 258 /* 256 literals + RUNA + RUNB */
66#define SYMBOL_RUNA 0
67#define SYMBOL_RUNB 1
68
69/* Status return values */
70#define RETVAL_OK 0
71#define RETVAL_LAST_BLOCK (-1)
72#define RETVAL_NOT_BZIP_DATA (-2)
73#define RETVAL_UNEXPECTED_INPUT_EOF (-3)
74#define RETVAL_UNEXPECTED_OUTPUT_EOF (-4)
75#define RETVAL_DATA_ERROR (-5)
76#define RETVAL_OUT_OF_MEMORY (-6)
77#define RETVAL_OBSOLETE_INPUT (-7)
78
79/* Other housekeeping constants */
80#define BZIP2_IOBUF_SIZE 4096
81
82/* This is what we know about each Huffman coding group */
83struct group_data {
84    /* We have an extra slot at the end of limit[] for a sentinal value. */
85    int limit[MAX_HUFCODE_BITS+1];
86    int base[MAX_HUFCODE_BITS];
87    int permute[MAX_SYMBOLS];
88    int minLen, maxLen;
89};
90
91/* Structure holding all the housekeeping data, including IO buffers and
92   memory that persists between calls to bunzip */
93struct bunzip_data {
94    /* State for interrupting output loop */
95    int writeCopies, writePos, writeRunCountdown, writeCount, writeCurrent;
96    /* I/O tracking data (file handles, buffers, positions, etc.) */
97    int (*fill)(void*, unsigned int);
98    int inbufCount, inbufPos /*, outbufPos*/;
99    unsigned char *inbuf /*,*outbuf*/;
100    unsigned int inbufBitCount, inbufBits;
101    /* The CRC values stored in the block header and calculated from the
102    data */
103    unsigned int crc32Table[256], headerCRC, totalCRC, writeCRC;
104    /* Intermediate buffer and its size (in bytes) */
105    unsigned int *dbuf, dbufSize;
106    /* These things are a bit too big to go on the stack */
107    unsigned char selectors[32768]; /* nSelectors = 15 bits */
108    struct group_data groups[MAX_GROUPS]; /* Huffman coding tables */
109    int io_error; /* non-zero if we have IO error */
110    int byteCount[256];
111    unsigned char symToByte[256], mtfSymbol[256];
112};
113
114
115/* Return the next nnn bits of input. All reads from the compressed input
116   are done through this function. All reads are big endian */
117static unsigned int INIT get_bits(struct bunzip_data *bd, char bits_wanted)
118{
119    unsigned int bits = 0;
120
121    /* If we need to get more data from the byte buffer, do so.
122       (Loop getting one byte at a time to enforce endianness and avoid
123       unaligned access.) */
124    while (bd->inbufBitCount < bits_wanted) {
125        /* If we need to read more data from file into byte buffer, do
126           so */
127        if (bd->inbufPos == bd->inbufCount) {
128            if (bd->io_error)
129                return 0;
130            bd->inbufCount = bd->fill(bd->inbuf, BZIP2_IOBUF_SIZE);
131            if (bd->inbufCount <= 0) {
132                bd->io_error = RETVAL_UNEXPECTED_INPUT_EOF;
133                return 0;
134            }
135            bd->inbufPos = 0;
136        }
137        /* Avoid 32-bit overflow (dump bit buffer to top of output) */
138        if (bd->inbufBitCount >= 24) {
139            bits = bd->inbufBits&((1 << bd->inbufBitCount)-1);
140            bits_wanted -= bd->inbufBitCount;
141            bits <<= bits_wanted;
142            bd->inbufBitCount = 0;
143        }
144        /* Grab next 8 bits of input from buffer. */
145        bd->inbufBits = (bd->inbufBits << 8)|bd->inbuf[bd->inbufPos++];
146        bd->inbufBitCount += 8;
147    }
148    /* Calculate result */
149    bd->inbufBitCount -= bits_wanted;
150    bits |= (bd->inbufBits >> bd->inbufBitCount)&((1 << bits_wanted)-1);
151
152    return bits;
153}
154
155/* Unpacks the next block and sets up for the inverse burrows-wheeler step. */
156
157static int INIT get_next_block(struct bunzip_data *bd)
158{
159    struct group_data *hufGroup = NULL;
160    int *base = NULL;
161    int *limit = NULL;
162    int dbufCount, nextSym, dbufSize, groupCount, selector,
163        i, j, k, t, runPos, symCount, symTotal, nSelectors, *byteCount;
164    unsigned char uc, *symToByte, *mtfSymbol, *selectors;
165    unsigned int *dbuf, origPtr;
166
167    dbuf = bd->dbuf;
168    dbufSize = bd->dbufSize;
169    selectors = bd->selectors;
170    byteCount = bd->byteCount;
171    symToByte = bd->symToByte;
172    mtfSymbol = bd->mtfSymbol;
173
174    /* Read in header signature and CRC, then validate signature.
175       (last block signature means CRC is for whole file, return now) */
176    i = get_bits(bd, 24);
177    j = get_bits(bd, 24);
178    bd->headerCRC = get_bits(bd, 32);
179    if ((i == 0x177245) && (j == 0x385090))
180        return RETVAL_LAST_BLOCK;
181    if ((i != 0x314159) || (j != 0x265359))
182        return RETVAL_NOT_BZIP_DATA;
183    /* We can add support for blockRandomised if anybody complains.
184       There was some code for this in busybox 1.0.0-pre3, but nobody ever
185       noticed that it didn't actually work. */
186    if (get_bits(bd, 1))
187        return RETVAL_OBSOLETE_INPUT;
188    origPtr = get_bits(bd, 24);
189    if (origPtr > dbufSize)
190        return RETVAL_DATA_ERROR;
191    /* mapping table: if some byte values are never used (encoding things
192       like ascii text), the compression code removes the gaps to have fewer
193       symbols to deal with, and writes a sparse bitfield indicating which
194       values were present. We make a translation table to convert the
195       symbols back to the corresponding bytes. */
196    t = get_bits(bd, 16);
197    symTotal = 0;
198    for (i = 0; i < 16; i++) {
199        if (t&(1 << (15-i))) {
200            k = get_bits(bd, 16);
201            for (j = 0; j < 16; j++)
202                if (k&(1 << (15-j)))
203                    symToByte[symTotal++] = (16*i)+j;
204        }
205    }
206    /* How many different Huffman coding groups does this block use? */
207    groupCount = get_bits(bd, 3);
208    if (groupCount < 2 || groupCount > MAX_GROUPS)
209        return RETVAL_DATA_ERROR;
210    /* nSelectors: Every GROUP_SIZE many symbols we select a new
211       Huffman coding group. Read in the group selector list,
212       which is stored as MTF encoded bit runs. (MTF = Move To
213       Front, as each value is used it's moved to the start of the
214       list.) */
215    nSelectors = get_bits(bd, 15);
216    if (!nSelectors)
217        return RETVAL_DATA_ERROR;
218    for (i = 0; i < groupCount; i++)
219        mtfSymbol[i] = i;
220    for (i = 0; i < nSelectors; i++) {
221        /* Get next value */
222        for (j = 0; get_bits(bd, 1); j++)
223            if (j >= groupCount)
224                return RETVAL_DATA_ERROR;
225        /* Decode MTF to get the next selector */
226        uc = mtfSymbol[j];
227        for (; j; j--)
228            mtfSymbol[j] = mtfSymbol[j-1];
229        mtfSymbol[0] = selectors[i] = uc;
230    }
231    /* Read the Huffman coding tables for each group, which code
232       for symTotal literal symbols, plus two run symbols (RUNA,
233       RUNB) */
234    symCount = symTotal+2;
235    for (j = 0; j < groupCount; j++) {
236        unsigned char length[MAX_SYMBOLS], temp[MAX_HUFCODE_BITS+1];
237        int minLen, maxLen, pp;
238        /* Read Huffman code lengths for each symbol. They're
239           stored in a way similar to mtf; record a starting
240           value for the first symbol, and an offset from the
241           previous value for everys symbol after that.
242           (Subtracting 1 before the loop and then adding it
243           back at the end is an optimization that makes the
244           test inside the loop simpler: symbol length 0
245           becomes negative, so an unsigned inequality catches
246           it.) */
247        t = get_bits(bd, 5)-1;
248        for (i = 0; i < symCount; i++) {
249            for (;;) {
250                if (((unsigned)t) > (MAX_HUFCODE_BITS-1))
251                    return RETVAL_DATA_ERROR;
252
253                /* If first bit is 0, stop. Else
254                   second bit indicates whether to
255                   increment or decrement the value.
256                   Optimization: grab 2 bits and unget
257                   the second if the first was 0. */
258
259                k = get_bits(bd, 2);
260                if (k < 2) {
261                    bd->inbufBitCount++;
262                    break;
263                }
264                /* Add one if second bit 1, else
265                 * subtract 1. Avoids if/else */
266                t += (((k+1)&2)-1);
267            }
268            /* Correct for the initial -1, to get the
269             * final symbol length */
270            length[i] = t+1;
271        }
272        /* Find largest and smallest lengths in this group */
273        minLen = maxLen = length[0];
274
275        for (i = 1; i < symCount; i++) {
276            if (length[i] > maxLen)
277                maxLen = length[i];
278            else if (length[i] < minLen)
279                minLen = length[i];
280        }
281
282        /* Calculate permute[], base[], and limit[] tables from
283         * length[].
284         *
285         * permute[] is the lookup table for converting
286         * Huffman coded symbols into decoded symbols. base[]
287         * is the amount to subtract from the value of a
288         * Huffman symbol of a given length when using
289         * permute[].
290         *
291         * limit[] indicates the largest numerical value a
292         * symbol with a given number of bits can have. This
293         * is how the Huffman codes can vary in length: each
294         * code with a value > limit[length] needs another
295         * bit.
296         */
297        hufGroup = bd->groups+j;
298        hufGroup->minLen = minLen;
299        hufGroup->maxLen = maxLen;
300        /* Note that minLen can't be smaller than 1, so we
301           adjust the base and limit array pointers so we're
302           not always wasting the first entry. We do this
303           again when using them (during symbol decoding).*/
304        base = hufGroup->base-1;
305        limit = hufGroup->limit-1;
306        /* Calculate permute[]. Concurrently, initialize
307         * temp[] and limit[]. */
308        pp = 0;
309        for (i = minLen; i <= maxLen; i++) {
310            temp[i] = limit[i] = 0;
311            for (t = 0; t < symCount; t++)
312                if (length[t] == i)
313                    hufGroup->permute[pp++] = t;
314        }
315        /* Count symbols coded for at each bit length */
316        for (i = 0; i < symCount; i++)
317            temp[length[i]]++;
318        /* Calculate limit[] (the largest symbol-coding value
319         *at each bit length, which is (previous limit <<
320         *1)+symbols at this level), and base[] (number of
321         *symbols to ignore at each bit length, which is limit
322         *minus the cumulative count of symbols coded for
323         *already). */
324        pp = t = 0;
325        for (i = minLen; i < maxLen; i++) {
326            pp += temp[i];
327            /* We read the largest possible symbol size
328               and then unget bits after determining how
329               many we need, and those extra bits could be
330               set to anything. (They're noise from
331               future symbols.) At each level we're
332               really only interested in the first few
333               bits, so here we set all the trailing
334               to-be-ignored bits to 1 so they don't
335               affect the value > limit[length]
336               comparison. */
337            limit[i] = (pp << (maxLen - i)) - 1;
338            pp <<= 1;
339            base[i+1] = pp-(t += temp[i]);
340        }
341        limit[maxLen+1] = INT_MAX; /* Sentinal value for
342                        * reading next sym. */
343        limit[maxLen] = pp+temp[maxLen]-1;
344        base[minLen] = 0;
345    }
346    /* We've finished reading and digesting the block header. Now
347       read this block's Huffman coded symbols from the file and
348       undo the Huffman coding and run length encoding, saving the
349       result into dbuf[dbufCount++] = uc */
350
351    /* Initialize symbol occurrence counters and symbol Move To
352     * Front table */
353    for (i = 0; i < 256; i++) {
354        byteCount[i] = 0;
355        mtfSymbol[i] = (unsigned char)i;
356    }
357    /* Loop through compressed symbols. */
358    runPos = dbufCount = symCount = selector = 0;
359    for (;;) {
360        /* Determine which Huffman coding group to use. */
361        if (!(symCount--)) {
362            symCount = GROUP_SIZE-1;
363            if (selector >= nSelectors)
364                return RETVAL_DATA_ERROR;
365            hufGroup = bd->groups+selectors[selector++];
366            base = hufGroup->base-1;
367            limit = hufGroup->limit-1;
368        }
369        /* Read next Huffman-coded symbol. */
370        /* Note: It is far cheaper to read maxLen bits and
371           back up than it is to read minLen bits and then an
372           additional bit at a time, testing as we go.
373           Because there is a trailing last block (with file
374           CRC), there is no danger of the overread causing an
375           unexpected EOF for a valid compressed file. As a
376           further optimization, we do the read inline
377           (falling back to a call to get_bits if the buffer
378           runs dry). The following (up to got_huff_bits:) is
379           equivalent to j = get_bits(bd, hufGroup->maxLen);
380         */
381        while (bd->inbufBitCount < hufGroup->maxLen) {
382            if (bd->inbufPos == bd->inbufCount) {
383                j = get_bits(bd, hufGroup->maxLen);
384                goto got_huff_bits;
385            }
386            bd->inbufBits =
387                (bd->inbufBits << 8)|bd->inbuf[bd->inbufPos++];
388            bd->inbufBitCount += 8;
389        };
390        bd->inbufBitCount -= hufGroup->maxLen;
391        j = (bd->inbufBits >> bd->inbufBitCount)&
392            ((1 << hufGroup->maxLen)-1);
393got_huff_bits:
394        /* Figure how how many bits are in next symbol and
395         * unget extras */
396        i = hufGroup->minLen;
397        while (j > limit[i])
398            ++i;
399        bd->inbufBitCount += (hufGroup->maxLen - i);
400        /* Huffman decode value to get nextSym (with bounds checking) */
401        if ((i > hufGroup->maxLen)
402            || (((unsigned)(j = (j>>(hufGroup->maxLen-i))-base[i]))
403                >= MAX_SYMBOLS))
404            return RETVAL_DATA_ERROR;
405        nextSym = hufGroup->permute[j];
406        /* We have now decoded the symbol, which indicates
407           either a new literal byte, or a repeated run of the
408           most recent literal byte. First, check if nextSym
409           indicates a repeated run, and if so loop collecting
410           how many times to repeat the last literal. */
411        if (((unsigned)nextSym) <= SYMBOL_RUNB) { /* RUNA or RUNB */
412            /* If this is the start of a new run, zero out
413             * counter */
414            if (!runPos) {
415                runPos = 1;
416                t = 0;
417            }
418            /* Neat trick that saves 1 symbol: instead of
419               or-ing 0 or 1 at each bit position, add 1
420               or 2 instead. For example, 1011 is 1 << 0
421               + 1 << 1 + 2 << 2. 1010 is 2 << 0 + 2 << 1
422               + 1 << 2. You can make any bit pattern
423               that way using 1 less symbol than the basic
424               or 0/1 method (except all bits 0, which
425               would use no symbols, but a run of length 0
426               doesn't mean anything in this context).
427               Thus space is saved. */
428            t += (runPos << nextSym);
429            /* +runPos if RUNA; +2*runPos if RUNB */
430
431            runPos <<= 1;
432            continue;
433        }
434        /* When we hit the first non-run symbol after a run,
435           we now know how many times to repeat the last
436           literal, so append that many copies to our buffer
437           of decoded symbols (dbuf) now. (The last literal
438           used is the one at the head of the mtfSymbol
439           array.) */
440        if (runPos) {
441            runPos = 0;
442            if (dbufCount+t >= dbufSize)
443                return RETVAL_DATA_ERROR;
444
445            uc = symToByte[mtfSymbol[0]];
446            byteCount[uc] += t;
447            while (t--)
448                dbuf[dbufCount++] = uc;
449        }
450        /* Is this the terminating symbol? */
451        if (nextSym > symTotal)
452            break;
453        /* At this point, nextSym indicates a new literal
454           character. Subtract one to get the position in the
455           MTF array at which this literal is currently to be
456           found. (Note that the result can't be -1 or 0,
457           because 0 and 1 are RUNA and RUNB. But another
458           instance of the first symbol in the mtf array,
459           position 0, would have been handled as part of a
460           run above. Therefore 1 unused mtf position minus 2
461           non-literal nextSym values equals -1.) */
462        if (dbufCount >= dbufSize)
463            return RETVAL_DATA_ERROR;
464        i = nextSym - 1;
465        uc = mtfSymbol[i];
466        /* Adjust the MTF array. Since we typically expect to
467         *move only a small number of symbols, and are bound
468         *by 256 in any case, using memmove here would
469         *typically be bigger and slower due to function call
470         *overhead and other assorted setup costs. */
471        do {
472            mtfSymbol[i] = mtfSymbol[i-1];
473        } while (--i);
474        mtfSymbol[0] = uc;
475        uc = symToByte[uc];
476        /* We have our literal byte. Save it into dbuf. */
477        byteCount[uc]++;
478        dbuf[dbufCount++] = (unsigned int)uc;
479    }
480    /* At this point, we've read all the Huffman-coded symbols
481       (and repeated runs) for this block from the input stream,
482       and decoded them into the intermediate buffer. There are
483       dbufCount many decoded bytes in dbuf[]. Now undo the
484       Burrows-Wheeler transform on dbuf. See
485       http://dogma.net/markn/articles/bwt/bwt.htm
486     */
487    /* Turn byteCount into cumulative occurrence counts of 0 to n-1. */
488    j = 0;
489    for (i = 0; i < 256; i++) {
490        k = j+byteCount[i];
491        byteCount[i] = j;
492        j = k;
493    }
494    /* Figure out what order dbuf would be in if we sorted it. */
495    for (i = 0; i < dbufCount; i++) {
496        uc = (unsigned char)(dbuf[i] & 0xff);
497        dbuf[byteCount[uc]] |= (i << 8);
498        byteCount[uc]++;
499    }
500    /* Decode first byte by hand to initialize "previous" byte.
501       Note that it doesn't get output, and if the first three
502       characters are identical it doesn't qualify as a run (hence
503       writeRunCountdown = 5). */
504    if (dbufCount) {
505        if (origPtr >= dbufCount)
506            return RETVAL_DATA_ERROR;
507        bd->writePos = dbuf[origPtr];
508        bd->writeCurrent = (unsigned char)(bd->writePos&0xff);
509        bd->writePos >>= 8;
510        bd->writeRunCountdown = 5;
511    }
512    bd->writeCount = dbufCount;
513
514    return RETVAL_OK;
515}
516
517/* Undo burrows-wheeler transform on intermediate buffer to produce output.
518   If start_bunzip was initialized with out_fd =-1, then up to len bytes of
519   data are written to outbuf. Return value is number of bytes written or
520   error (all errors are negative numbers). If out_fd!=-1, outbuf and len
521   are ignored, data is written to out_fd and return is RETVAL_OK or error.
522*/
523
524static int INIT read_bunzip(struct bunzip_data *bd, char *outbuf, int len)
525{
526    const unsigned int *dbuf;
527    int pos, xcurrent, previous, gotcount;
528
529    /* If last read was short due to end of file, return last block now */
530    if (bd->writeCount < 0)
531        return bd->writeCount;
532
533    gotcount = 0;
534    dbuf = bd->dbuf;
535    pos = bd->writePos;
536    xcurrent = bd->writeCurrent;
537
538    /* We will always have pending decoded data to write into the output
539       buffer unless this is the very first call (in which case we haven't
540       Huffman-decoded a block into the intermediate buffer yet). */
541
542    if (bd->writeCopies) {
543        /* Inside the loop, writeCopies means extra copies (beyond 1) */
544        --bd->writeCopies;
545        /* Loop outputting bytes */
546        for (;;) {
547            /* If the output buffer is full, snapshot
548             * state and return */
549            if (gotcount >= len) {
550                bd->writePos = pos;
551                bd->writeCurrent = xcurrent;
552                bd->writeCopies++;
553                return len;
554            }
555            /* Write next byte into output buffer, updating CRC */
556            outbuf[gotcount++] = xcurrent;
557            bd->writeCRC = (((bd->writeCRC) << 8)
558                ^bd->crc32Table[((bd->writeCRC) >> 24)
559                ^xcurrent]);
560            /* Loop now if we're outputting multiple
561             * copies of this byte */
562            if (bd->writeCopies) {
563                --bd->writeCopies;
564                continue;
565            }
566decode_next_byte:
567            if (!bd->writeCount--)
568                break;
569            /* Follow sequence vector to undo
570             * Burrows-Wheeler transform */
571            previous = xcurrent;
572            pos = dbuf[pos];
573            xcurrent = pos&0xff;
574            pos >>= 8;
575            /* After 3 consecutive copies of the same
576               byte, the 4th is a repeat count. We count
577               down from 4 instead *of counting up because
578               testing for non-zero is faster */
579            if (--bd->writeRunCountdown) {
580                if (xcurrent != previous)
581                    bd->writeRunCountdown = 4;
582            } else {
583                /* We have a repeated run, this byte
584                 * indicates the count */
585                bd->writeCopies = xcurrent;
586                xcurrent = previous;
587                bd->writeRunCountdown = 5;
588                /* Sometimes there are just 3 bytes
589                 * (run length 0) */
590                if (!bd->writeCopies)
591                    goto decode_next_byte;
592                /* Subtract the 1 copy we'd output
593                 * anyway to get extras */
594                --bd->writeCopies;
595            }
596        }
597        /* Decompression of this block completed successfully */
598        bd->writeCRC = ~bd->writeCRC;
599        bd->totalCRC = ((bd->totalCRC << 1) |
600                (bd->totalCRC >> 31)) ^ bd->writeCRC;
601        /* If this block had a CRC error, force file level CRC error. */
602        if (bd->writeCRC != bd->headerCRC) {
603            bd->totalCRC = bd->headerCRC+1;
604            return RETVAL_LAST_BLOCK;
605        }
606    }
607
608    /* Refill the intermediate buffer by Huffman-decoding next
609     * block of input */
610    /* (previous is just a convenient unused temp variable here) */
611    previous = get_next_block(bd);
612    if (previous) {
613        bd->writeCount = previous;
614        return (previous != RETVAL_LAST_BLOCK) ? previous : gotcount;
615    }
616    bd->writeCRC = 0xffffffffUL;
617    pos = bd->writePos;
618    xcurrent = bd->writeCurrent;
619    goto decode_next_byte;
620}
621
622static int INIT nofill(void *buf, unsigned int len)
623{
624    return -1;
625}
626
627/* Allocate the structure, read file header. If in_fd ==-1, inbuf must contain
628   a complete bunzip file (len bytes long). If in_fd!=-1, inbuf and len are
629   ignored, and data is read from file handle into temporary buffer. */
630static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, int len,
631                 int (*fill)(void*, unsigned int))
632{
633    struct bunzip_data *bd;
634    unsigned int i, j, c;
635    const unsigned int BZh0 =
636        (((unsigned int)'B') << 24)+(((unsigned int)'Z') << 16)
637        +(((unsigned int)'h') << 8)+(unsigned int)'0';
638
639    /* Figure out how much data to allocate */
640    i = sizeof(struct bunzip_data);
641
642    /* Allocate bunzip_data. Most fields initialize to zero. */
643    bd = *bdp = malloc(i);
644    if (!bd)
645        return RETVAL_OUT_OF_MEMORY;
646    memset(bd, 0, sizeof(struct bunzip_data));
647    /* Setup input buffer */
648    bd->inbuf = inbuf;
649    bd->inbufCount = len;
650    if (fill != NULL)
651        bd->fill = fill;
652    else
653        bd->fill = nofill;
654
655    /* Init the CRC32 table (big endian) */
656    for (i = 0; i < 256; i++) {
657        c = i << 24;
658        for (j = 8; j; j--)
659            c = c&0x80000000 ? (c << 1)^0x04c11db7 : (c << 1);
660        bd->crc32Table[i] = c;
661    }
662
663    /* Ensure that file starts with "BZh['1'-'9']." */
664    i = get_bits(bd, 32);
665    if (((unsigned int)(i-BZh0-1)) >= 9)
666        return RETVAL_NOT_BZIP_DATA;
667
668    /* Fourth byte (ascii '1'-'9'), indicates block size in units of 100k of
669       uncompressed data. Allocate intermediate buffer for block. */
670    bd->dbufSize = 100000*(i-BZh0);
671
672    bd->dbuf = large_malloc(bd->dbufSize * sizeof(int));
673    if (!bd->dbuf)
674        return RETVAL_OUT_OF_MEMORY;
675    return RETVAL_OK;
676}
677
678/* Example usage: decompress src_fd to dst_fd. (Stops at end of bzip2 data,
679   not end of file.) */
680STATIC int INIT bunzip2(unsigned char *buf, int len,
681            int(*fill)(void*, unsigned int),
682            int(*flush)(void*, unsigned int),
683            unsigned char *outbuf,
684            int *pos,
685            void(*error_fn)(char *x))
686{
687    struct bunzip_data *bd;
688    int i = -1;
689    unsigned char *inbuf;
690
691    set_error_fn(error_fn);
692    if (flush)
693        outbuf = malloc(BZIP2_IOBUF_SIZE);
694
695    if (!outbuf) {
696        error("Could not allocate output bufer");
697        return RETVAL_OUT_OF_MEMORY;
698    }
699    if (buf)
700        inbuf = buf;
701    else
702        inbuf = malloc(BZIP2_IOBUF_SIZE);
703    if (!inbuf) {
704        error("Could not allocate input bufer");
705        i = RETVAL_OUT_OF_MEMORY;
706        goto exit_0;
707    }
708    i = start_bunzip(&bd, inbuf, len, fill);
709    if (!i) {
710        for (;;) {
711            i = read_bunzip(bd, outbuf, BZIP2_IOBUF_SIZE);
712            if (i <= 0)
713                break;
714            if (!flush)
715                outbuf += i;
716            else
717                if (i != flush(outbuf, i)) {
718                    i = RETVAL_UNEXPECTED_OUTPUT_EOF;
719                    break;
720                }
721        }
722    }
723    /* Check CRC and release memory */
724    if (i == RETVAL_LAST_BLOCK) {
725        if (bd->headerCRC != bd->totalCRC)
726            error("Data integrity error when decompressing.");
727        else
728            i = RETVAL_OK;
729    } else if (i == RETVAL_UNEXPECTED_OUTPUT_EOF) {
730        error("Compressed file ends unexpectedly");
731    }
732    if (!bd)
733        goto exit_1;
734    if (bd->dbuf)
735        large_free(bd->dbuf);
736    if (pos)
737        *pos = bd->inbufPos;
738    free(bd);
739exit_1:
740    if (!buf)
741        free(inbuf);
742exit_0:
743    if (flush)
744        free(outbuf);
745    return i;
746}
747
748#ifdef PREBOOT
749STATIC int INIT decompress(unsigned char *buf, int len,
750            int(*fill)(void*, unsigned int),
751            int(*flush)(void*, unsigned int),
752            unsigned char *outbuf,
753            int *pos,
754            void(*error_fn)(char *x))
755{
756    return bunzip2(buf, len - 4, fill, flush, outbuf, pos, error_fn);
757}
758#endif
759

Archive Download this file



interactive