Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * $Source: /homes/cvs/ftape-stacked/ftape/compressor/lzrw3.c,v $ | |
3 | * $Revision: 1.1 $ | |
4 | * $Date: 1997/10/05 19:12:29 $ | |
5 | * | |
6 | * Implementation of Ross Williams lzrw3 algorithm. Adaption for zftape. | |
7 | * | |
8 | */ | |
9 | ||
10 | #include "../compressor/lzrw3.h" /* Defines single exported function "compress". */ | |
11 | ||
12 | /******************************************************************************/ | |
13 | /* */ | |
14 | /* LZRW3.C */ | |
15 | /* */ | |
16 | /******************************************************************************/ | |
17 | /* */ | |
18 | /* Author : Ross Williams. */ | |
19 | /* Date : 30-Jun-1991. */ | |
20 | /* Release : 1. */ | |
21 | /* */ | |
22 | /******************************************************************************/ | |
23 | /* */ | |
24 | /* This file contains an implementation of the LZRW3 data compression */ | |
25 | /* algorithm in C. */ | |
26 | /* */ | |
27 | /* The algorithm is a general purpose compression algorithm that runs fast */ | |
28 | /* and gives reasonable compression. The algorithm is a member of the Lempel */ | |
29 | /* Ziv family of algorithms and bases its compression on the presence in the */ | |
30 | /* data of repeated substrings. */ | |
31 | /* */ | |
32 | /* This algorithm is unpatented and the code is public domain. As the */ | |
33 | /* algorithm is based on the LZ77 class of algorithms, it is unlikely to be */ | |
34 | /* the subject of a patent challenge. */ | |
35 | /* */ | |
36 | /* Unlike the LZRW1 and LZRW1-A algorithms, the LZRW3 algorithm is */ | |
37 | /* deterministic and is guaranteed to yield the same compressed */ | |
38 | /* representation for a given file each time it is run. */ | |
39 | /* */ | |
40 | /* The LZRW3 algorithm was originally designed and implemented */ | |
41 | /* by Ross Williams on 31-Dec-1990. */ | |
42 | /* */ | |
43 | /* Here are the results of applying this code, compiled under THINK C 4.0 */ | |
44 | /* and running on a Mac-SE (8MHz 68000), to the standard calgary corpus. */ | |
45 | /* */ | |
46 | /* +----------------------------------------------------------------+ */ | |
47 | /* | DATA COMPRESSION TEST | */ | |
48 | /* | ===================== | */ | |
49 | /* | Time of run : Sun 30-Jun-1991 09:31PM | */ | |
50 | /* | Timing accuracy : One part in 100 | */ | |
51 | /* | Context length : 262144 bytes (= 256.0000K) | */ | |
52 | /* | Test suite : Calgary Corpus Suite | */ | |
53 | /* | Files in suite : 14 | */ | |
54 | /* | Algorithm : LZRW3 | */ | |
55 | /* | Note: All averages are calculated from the un-rounded values. | */ | |
56 | /* +----------------------------------------------------------------+ */ | |
57 | /* | File Name Length CxB ComLen %Remn Bits Com K/s Dec K/s | */ | |
58 | /* | ---------- ------ --- ------ ----- ---- ------- ------- | */ | |
59 | /* | rpus:Bib.D 111261 1 55033 49.5 3.96 19.46 32.27 | */ | |
60 | /* | us:Book1.D 768771 3 467962 60.9 4.87 17.03 31.07 | */ | |
61 | /* | us:Book2.D 610856 3 317102 51.9 4.15 19.39 34.15 | */ | |
62 | /* | rpus:Geo.D 102400 1 82424 80.5 6.44 11.65 18.18 | */ | |
63 | /* | pus:News.D 377109 2 205670 54.5 4.36 17.14 27.47 | */ | |
64 | /* | pus:Obj1.D 21504 1 13027 60.6 4.85 13.40 18.95 | */ | |
65 | /* | pus:Obj2.D 246814 1 116286 47.1 3.77 19.31 30.10 | */ | |
66 | /* | s:Paper1.D 53161 1 27522 51.8 4.14 18.60 31.15 | */ | |
67 | /* | s:Paper2.D 82199 1 45160 54.9 4.40 18.45 32.84 | */ | |
68 | /* | rpus:Pic.D 513216 2 122388 23.8 1.91 35.29 51.05 | */ | |
69 | /* | us:Progc.D 39611 1 19669 49.7 3.97 18.87 30.64 | */ | |
70 | /* | us:Progl.D 71646 1 28247 39.4 3.15 24.34 40.66 | */ | |
71 | /* | us:Progp.D 49379 1 19377 39.2 3.14 23.91 39.23 | */ | |
72 | /* | us:Trans.D 93695 1 33481 35.7 2.86 25.48 40.37 | */ | |
73 | /* +----------------------------------------------------------------+ */ | |
74 | /* | Average 224401 1 110953 50.0 4.00 20.17 32.72 | */ | |
75 | /* +----------------------------------------------------------------+ */ | |
76 | /* */ | |
77 | /******************************************************************************/ | |
78 | ||
79 | /******************************************************************************/ | |
80 | ||
81 | /* The following structure is returned by the "compress" function below when */ | |
82 | /* the user asks the function to return identifying information. */ | |
83 | /* The most important field in the record is the working memory field which */ | |
84 | /* tells the calling program how much working memory should be passed to */ | |
85 | /* "compress" when it is called to perform a compression or decompression. */ | |
86 | /* LZRW3 uses the same amount of memory during compression and decompression. */ | |
87 | /* For more information on this structure see "compress.h". */ | |
88 | ||
89 | #define U(X) ((ULONG) X) | |
90 | #define SIZE_P_BYTE (U(sizeof(UBYTE *))) | |
91 | #define SIZE_WORD (U(sizeof(UWORD ))) | |
92 | #define ALIGNMENT_FUDGE (U(16)) | |
93 | #define MEM_REQ ( U(4096)*(SIZE_P_BYTE) + ALIGNMENT_FUDGE ) | |
94 | ||
95 | static struct compress_identity identity = | |
96 | { | |
97 | U(0x032DDEA8), /* Algorithm identification number. */ | |
98 | MEM_REQ, /* Working memory (bytes) required. */ | |
99 | "LZRW3", /* Name of algorithm. */ | |
100 | "1.0", /* Version number of algorithm. */ | |
101 | "31-Dec-1990", /* Date of algorithm. */ | |
102 | "Public Domain", /* Copyright notice. */ | |
103 | "Ross N. Williams", /* Author of algorithm. */ | |
104 | "Renaissance Software", /* Affiliation of author. */ | |
105 | "Public Domain" /* Vendor of algorithm. */ | |
106 | }; | |
107 | ||
108 | LOCAL void compress_compress (UBYTE *,UBYTE *,ULONG,UBYTE *, LONG *); | |
109 | LOCAL void compress_decompress(UBYTE *,UBYTE *,LONG, UBYTE *, ULONG *); | |
110 | ||
111 | /******************************************************************************/ | |
112 | ||
113 | /* This function is the only function exported by this module. */ | |
114 | /* Depending on its first parameter, the function can be requested to */ | |
115 | /* compress a block of memory, decompress a block of memory, or to identify */ | |
116 | /* itself. For more information, see the specification file "compress.h". */ | |
117 | ||
118 | EXPORT void lzrw3_compress( | |
119 | UWORD action, /* Action to be performed. */ | |
120 | UBYTE *wrk_mem, /* Address of working memory we can use.*/ | |
121 | UBYTE *src_adr, /* Address of input data. */ | |
122 | LONG src_len, /* Length of input data. */ | |
123 | UBYTE *dst_adr, /* Address to put output data. */ | |
124 | void *p_dst_len /* Address of longword for length of output data.*/ | |
125 | ) | |
126 | { | |
127 | switch (action) | |
128 | { | |
129 | case COMPRESS_ACTION_IDENTITY: | |
130 | *((struct compress_identity **)p_dst_len)= &identity; | |
131 | break; | |
132 | case COMPRESS_ACTION_COMPRESS: | |
133 | compress_compress(wrk_mem,src_adr,src_len,dst_adr,(LONG *)p_dst_len); | |
134 | break; | |
135 | case COMPRESS_ACTION_DECOMPRESS: | |
136 | compress_decompress(wrk_mem,src_adr,src_len,dst_adr,(LONG *)p_dst_len); | |
137 | break; | |
138 | } | |
139 | } | |
140 | ||
141 | /******************************************************************************/ | |
142 | /* */ | |
143 | /* BRIEF DESCRIPTION OF THE LZRW3 ALGORITHM */ | |
144 | /* ======================================== */ | |
145 | /* The LZRW3 algorithm is identical to the LZRW1-A algorithm except that */ | |
146 | /* instead of transmitting history offsets, it transmits hash table indexes. */ | |
147 | /* In order to decode the indexes, the decompressor must maintain an */ | |
148 | /* identical hash table. Copy items are straightforward:when the decompressor */ | |
149 | /* receives a copy item, it simply looks up the hash table to translate the */ | |
150 | /* index into a pointer into the data already decompressed. To update the */ | |
151 | /* hash table, it replaces the same table entry with a pointer to the start */ | |
152 | /* of the newly decoded phrase. The tricky part is with literal items, for at */ | |
153 | /* the time that the decompressor receives a literal item the decompressor */ | |
154 | /* does not have the three bytes in the Ziv (that the compressor has) to */ | |
155 | /* perform the three-byte hash. To solve this problem, in LZRW3, both the */ | |
156 | /* compressor and decompressor are wired up so that they "buffer" these */ | |
157 | /* literals and update their hash tables only when three bytes are available. */ | |
158 | /* This makes the maximum buffering 2 bytes. */ | |
159 | /* */ | |
160 | /* Replacement of offsets by hash table indexes yields a few percent extra */ | |
161 | /* compression at the cost of some speed. LZRW3 is slower than LZRW1, LZRW1-A */ | |
162 | /* and LZRW2, but yields better compression. */ | |
163 | /* */ | |
164 | /* Extra compression could be obtained by using a hash table of depth two. */ | |
165 | /* However, increasing the depth above one incurs a significant decrease in */ | |
166 | /* compression speed which was not considered worthwhile. Another reason for */ | |
167 | /* keeping the depth down to one was to allow easy comparison with the */ | |
168 | /* LZRW1-A and LZRW2 algorithms so as to demonstrate the exact effect of the */ | |
169 | /* use of direct hash indexes. */ | |
170 | /* */ | |
171 | /* +---+ */ | |
172 | /* |___|4095 */ | |
173 | /* |___| */ | |
174 | /* +---------------------*_|<---+ /----+---\ */ | |
175 | /* | |___| +---|Hash | */ | |
176 | /* | |___| |Function| */ | |
177 | /* | |___| \--------/ */ | |
178 | /* | |___|0 ^ */ | |
179 | /* | +---+ | */ | |
180 | /* | Hash +-----+ */ | |
181 | /* | Table | */ | |
182 | /* | --- */ | |
183 | /* v ^^^ */ | |
184 | /* +-------------------------------------|----------------+ */ | |
185 | /* |||||||||||||||||||||||||||||||||||||||||||||||||||||||| */ | |
186 | /* +-------------------------------------|----------------+ */ | |
187 | /* | |1......18| | */ | |
188 | /* |<------- Lempel=History ------------>|<--Ziv-->| | */ | |
189 | /* | (=bytes already processed) |<-Still to go-->| */ | |
190 | /* |<-------------------- INPUT BLOCK ------------------->| */ | |
191 | /* */ | |
192 | /* The diagram above for LZRW3 looks almost identical to the diagram for */ | |
193 | /* LZRW1. The difference is that in LZRW3, the compressor transmits hash */ | |
194 | /* table indices instead of Lempel offsets. For this to work, the */ | |
195 | /* decompressor must maintain a hash table as well as the compressor and both */ | |
196 | /* compressor and decompressor must "buffer" literals, as the decompressor */ | |
197 | /* cannot hash phrases commencing with a literal until another two bytes have */ | |
198 | /* arrived. */ | |
199 | /* */ | |
200 | /* LZRW3 Algorithm Execution Summary */ | |
201 | /* --------------------------------- */ | |
202 | /* 1. Hash the first three bytes of the Ziv to yield a hash table index h. */ | |
203 | /* 2. Look up the hash table yielding history pointer p. */ | |
204 | /* 3. Match where p points with the Ziv. If there is a match of three or */ | |
205 | /* more bytes, code those bytes (in the Ziv) as a copy item, otherwise */ | |
206 | /* code the next byte in the Ziv as a literal item. */ | |
207 | /* 4. Update the hash table as possible subject to the constraint that only */ | |
208 | /* phrases commencing three bytes back from the Ziv can be hashed and */ | |
209 | /* entered into the hash table. (This enables the decompressor to keep */ | |
210 | /* pace). See the description and code for more details. */ | |
211 | /* */ | |
212 | /******************************************************************************/ | |
213 | /* */ | |
214 | /* DEFINITION OF COMPRESSED FILE FORMAT */ | |
215 | /* ==================================== */ | |
216 | /* * A compressed file consists of a COPY FLAG followed by a REMAINDER. */ | |
217 | /* * The copy flag CF uses up four bytes with the first byte being the */ | |
218 | /* least significant. */ | |
219 | /* * If CF=1, then the compressed file represents the remainder of the file */ | |
220 | /* exactly. Otherwise CF=0 and the remainder of the file consists of zero */ | |
221 | /* or more GROUPS, each of which represents one or more bytes. */ | |
222 | /* * Each group consists of two bytes of CONTROL information followed by */ | |
223 | /* sixteen ITEMs except for the last group which can contain from one */ | |
224 | /* to sixteen items. */ | |
225 | /* * An item can be either a LITERAL item or a COPY item. */ | |
226 | /* * Each item corresponds to a bit in the control bytes. */ | |
227 | /* * The first control byte corresponds to the first 8 items in the group */ | |
228 | /* with bit 0 corresponding to the first item in the group and bit 7 to */ | |
229 | /* the eighth item in the group. */ | |
230 | /* * The second control byte corresponds to the second 8 items in the group */ | |
231 | /* with bit 0 corresponding to the ninth item in the group and bit 7 to */ | |
232 | /* the sixteenth item in the group. */ | |
233 | /* * A zero bit in a control word means that the corresponding item is a */ | |
234 | /* literal item. A one bit corresponds to a copy item. */ | |
235 | /* * A literal item consists of a single byte which represents itself. */ | |
236 | /* * A copy item consists of two bytes that represent from 3 to 18 bytes. */ | |
237 | /* * The first byte in a copy item will be denoted C1. */ | |
238 | /* * The second byte in a copy item will be denoted C2. */ | |
239 | /* * Bits will be selected using square brackets. */ | |
240 | /* For example: C1[0..3] is the low nibble of the first control byte. */ | |
241 | /* of copy item C1. */ | |
242 | /* * The LENGTH of a copy item is defined to be C1[0..3]+3 which is a number */ | |
243 | /* in the range [3,18]. */ | |
244 | /* * The INDEX of a copy item is defined to be C1[4..7]*256+C2[0..8] which */ | |
245 | /* is a number in the range [0,4095]. */ | |
246 | /* * A copy item represents the sequence of bytes */ | |
247 | /* text[POS-OFFSET..POS-OFFSET+LENGTH-1] where */ | |
248 | /* text is the entire text of the uncompressed string. */ | |
249 | /* POS is the index in the text of the character following the */ | |
250 | /* string represented by all the items preceeding the item */ | |
251 | /* being defined. */ | |
252 | /* OFFSET is obtained from INDEX by looking up the hash table. */ | |
253 | /* */ | |
254 | /******************************************************************************/ | |
255 | ||
256 | /* The following #define defines the length of the copy flag that appears at */ | |
257 | /* the start of the compressed file. The value of four bytes was chosen */ | |
258 | /* because the fast_copy routine on my Macintosh runs faster if the source */ | |
259 | /* and destination blocks are relatively longword aligned. */ | |
260 | /* The actual flag data appears in the first byte. The rest are zeroed so as */ | |
261 | /* to normalize the compressed representation (i.e. not non-deterministic). */ | |
262 | #define FLAG_BYTES 4 | |
263 | ||
264 | /* The following #defines define the meaning of the values of the copy */ | |
265 | /* flag at the start of the compressed file. */ | |
266 | #define FLAG_COMPRESS 0 /* Signals that output was result of compression. */ | |
267 | #define FLAG_COPY 1 /* Signals that output was simply copied over. */ | |
268 | ||
269 | /* The 68000 microprocessor (on which this algorithm was originally developed */ | |
270 | /* is fussy about non-aligned arrays of words. To avoid these problems the */ | |
271 | /* following macro can be used to "waste" from 0 to 3 bytes so as to align */ | |
272 | /* the argument pointer. */ | |
273 | #define ULONG_ALIGN_UP(X) ((((ULONG)X)+sizeof(ULONG)-1)&~(sizeof(ULONG)-1)) | |
274 | ||
275 | ||
276 | /* The following constant defines the maximum length of an uncompressed item. */ | |
277 | /* This definition must not be changed; its value is hardwired into the code. */ | |
278 | /* The longest number of bytes that can be spanned by a single item is 18 */ | |
279 | /* for the longest copy item. */ | |
280 | #define MAX_RAW_ITEM (18) | |
281 | ||
282 | /* The following constant defines the maximum length of an uncompressed group.*/ | |
283 | /* This definition must not be changed; its value is hardwired into the code. */ | |
284 | /* A group contains at most 16 items which explains this definition. */ | |
285 | #define MAX_RAW_GROUP (16*MAX_RAW_ITEM) | |
286 | ||
287 | /* The following constant defines the maximum length of a compressed group. */ | |
288 | /* This definition must not be changed; its value is hardwired into the code. */ | |
289 | /* A compressed group consists of two control bytes followed by up to 16 */ | |
290 | /* compressed items each of which can have a maximum length of two bytes. */ | |
291 | #define MAX_CMP_GROUP (2+16*2) | |
292 | ||
293 | /* The following constant defines the number of entries in the hash table. */ | |
294 | /* This definition must not be changed; its value is hardwired into the code. */ | |
295 | #define HASH_TABLE_LENGTH (4096) | |
296 | ||
297 | /* LZRW3, unlike LZRW1(-A), must initialize its hash table so as to enable */ | |
298 | /* the compressor and decompressor to stay in step maintaining identical hash */ | |
299 | /* tables. In an early version of the algorithm, the tables were simply */ | |
300 | /* initialized to zero and a check for zero was included just before the */ | |
301 | /* matching code. However, this test costs time. A better solution is to */ | |
302 | /* initialize all the entries in the hash table to point to a constant */ | |
303 | /* string. The decompressor does the same. This solution requires no extra */ | |
304 | /* test. The contents of the string do not matter so long as the string is */ | |
305 | /* the same for the compressor and decompressor and contains at least */ | |
306 | /* MAX_RAW_ITEM bytes. I chose consecutive decimal digits because they do not */ | |
307 | /* have white space problems (e.g. there is no chance that the compiler will */ | |
308 | /* replace more than one space by a TAB) and because they make the length of */ | |
309 | /* the string obvious by inspection. */ | |
310 | #define START_STRING_18 ((UBYTE *) "123456789012345678") | |
311 | ||
312 | /* In this algorithm, hash values have to be calculated at more than one */ | |
313 | /* point. The following macro neatens the code up for this. */ | |
314 | #define HASH(PTR) \ | |
315 | (((40543*(((*(PTR))<<8)^((*((PTR)+1))<<4)^(*((PTR)+2))))>>4) & 0xFFF) | |
316 | ||
317 | /******************************************************************************/ | |
318 | ||
319 | /* Input : Hand over the required amount of working memory in p_wrk_mem. */ | |
320 | /* Input : Specify input block using p_src_first and src_len. */ | |
321 | /* Input : Point p_dst_first to the start of the output zone (OZ). */ | |
322 | /* Input : Point p_dst_len to a ULONG to receive the output length. */ | |
323 | /* Input : Input block and output zone must not overlap. */ | |
324 | /* Output : Length of output block written to *p_dst_len. */ | |
325 | /* Output : Output block in Mem[p_dst_first..p_dst_first+*p_dst_len-1]. May */ | |
326 | /* Output : write in OZ=Mem[p_dst_first..p_dst_first+src_len+MAX_CMP_GROUP-1].*/ | |
327 | /* Output : Upon completion guaranteed *p_dst_len<=src_len+FLAG_BYTES. */ | |
328 | LOCAL void compress_compress(UBYTE *p_wrk_mem, | |
329 | UBYTE *p_src_first, ULONG src_len, | |
330 | UBYTE *p_dst_first, LONG *p_dst_len) | |
331 | { | |
332 | /* p_src and p_dst step through the source and destination blocks. */ | |
333 | register UBYTE *p_src = p_src_first; | |
334 | register UBYTE *p_dst = p_dst_first; | |
335 | ||
336 | /* The following variables are never modified and are used in the */ | |
337 | /* calculations that determine when the main loop terminates. */ | |
338 | UBYTE *p_src_post = p_src_first+src_len; | |
339 | UBYTE *p_dst_post = p_dst_first+src_len; | |
340 | UBYTE *p_src_max1 = p_src_first+src_len-MAX_RAW_ITEM; | |
341 | UBYTE *p_src_max16 = p_src_first+src_len-MAX_RAW_ITEM*16; | |
342 | ||
343 | /* The variables 'p_control' and 'control' are used to buffer control bits. */ | |
344 | /* Before each group is processed, the next two bytes of the output block */ | |
345 | /* are set aside for the control word for the group about to be processed. */ | |
346 | /* 'p_control' is set to point to the first byte of that word. Meanwhile, */ | |
347 | /* 'control' buffers the control bits being generated during the processing */ | |
348 | /* of the group. Instead of having a counter to keep track of how many items */ | |
349 | /* have been processed (=the number of bits in the control word), at the */ | |
350 | /* start of each group, the top word of 'control' is filled with 1 bits. */ | |
351 | /* As 'control' is shifted for each item, the 1 bits in the top word are */ | |
352 | /* absorbed or destroyed. When they all run out (i.e. when the top word is */ | |
353 | /* all zero bits, we know that we are at the end of a group. */ | |
354 | # define TOPWORD 0xFFFF0000 | |
355 | UBYTE *p_control; | |
356 | register ULONG control=TOPWORD; | |
357 | ||
358 | /* THe variable 'hash' always points to the first element of the hash table. */ | |
359 | UBYTE **hash= (UBYTE **) ULONG_ALIGN_UP(p_wrk_mem); | |
360 | ||
361 | /* The following two variables represent the literal buffer. p_h1 points to */ | |
362 | /* the hash table entry corresponding to the youngest literal. p_h2 points */ | |
363 | /* to the hash table entry corresponding to the second youngest literal. */ | |
364 | /* Note: p_h1=0=>p_h2=0 because zero values denote absence of a pending */ | |
365 | /* literal. The variables are initialized to zero meaning an empty "buffer". */ | |
366 | UBYTE **p_h1=NULL; | |
367 | UBYTE **p_h2=NULL; | |
368 | ||
369 | /* To start, we write the flag bytes. Being optimistic, we set the flag to */ | |
370 | /* FLAG_COMPRESS. The remaining flag bytes are zeroed so as to keep the */ | |
371 | /* algorithm deterministic. */ | |
372 | *p_dst++=FLAG_COMPRESS; | |
373 | {UWORD i; for (i=2;i<=FLAG_BYTES;i++) *p_dst++=0;} | |
374 | ||
375 | /* Reserve the first word of output as the control word for the first group. */ | |
376 | /* Note: This is undone at the end if the input block is empty. */ | |
377 | p_control=p_dst; p_dst+=2; | |
378 | ||
379 | /* Initialize all elements of the hash table to point to a constant string. */ | |
380 | /* Use of an unrolled loop speeds this up considerably. */ | |
381 | {UWORD i; UBYTE **p_h=hash; | |
382 | # define ZH *p_h++=START_STRING_18 | |
383 | for (i=0;i<256;i++) /* 256=HASH_TABLE_LENGTH/16. */ | |
384 | {ZH;ZH;ZH;ZH; | |
385 | ZH;ZH;ZH;ZH; | |
386 | ZH;ZH;ZH;ZH; | |
387 | ZH;ZH;ZH;ZH;} | |
388 | } | |
389 | ||
390 | /* The main loop processes either 1 or 16 items per iteration. As its */ | |
391 | /* termination logic is complicated, I have opted for an infinite loop */ | |
392 | /* structure containing 'break' and 'goto' statements. */ | |
393 | while (TRUE) | |
394 | {/* Begin main processing loop. */ | |
395 | ||
396 | /* Note: All the variables here except unroll should be defined within */ | |
397 | /* the inner loop. Unfortunately the loop hasn't got a block. */ | |
398 | register UBYTE *p; /* Scans through targ phrase during matching. */ | |
399 | register UBYTE *p_ziv= NULL ; /* Points to first byte of current Ziv. */ | |
400 | register UWORD unroll; /* Loop counter for unrolled inner loop. */ | |
401 | register UWORD index; /* Index of current hash table entry. */ | |
402 | register UBYTE **p_h0 = NULL ; /* Pointer to current hash table entry. */ | |
403 | ||
404 | /* Test for overrun and jump to overrun code if necessary. */ | |
405 | if (p_dst>p_dst_post) | |
406 | goto overrun; | |
407 | ||
408 | /* The following cascade of if statements efficiently catches and deals */ | |
409 | /* with varying degrees of closeness to the end of the input block. */ | |
410 | /* When we get very close to the end, we stop updating the table and */ | |
411 | /* code the remaining bytes as literals. This makes the code simpler. */ | |
412 | unroll=16; | |
413 | if (p_src>p_src_max16) | |
414 | { | |
415 | unroll=1; | |
416 | if (p_src>p_src_max1) | |
417 | { | |
418 | if (p_src==p_src_post) | |
419 | break; | |
420 | else | |
421 | goto literal; | |
422 | } | |
423 | } | |
424 | ||
425 | /* This inner unrolled loop processes 'unroll' (whose value is either 1 */ | |
426 | /* or 16) items. I have chosen to implement this loop with labels and */ | |
427 | /* gotos to heighten the ease with which the loop may be implemented with */ | |
428 | /* a single decrement and branch instruction in assembly language and */ | |
429 | /* also because the labels act as highly readable place markers. */ | |
430 | /* (Also because we jump into the loop for endgame literals (see above)). */ | |
431 | ||
432 | begin_unrolled_loop: | |
433 | ||
434 | /* To process the next phrase, we hash the next three bytes and use */ | |
435 | /* the resultant hash table index to look up the hash table. A pointer */ | |
436 | /* to the entry is stored in p_h0 so as to avoid an array lookup. The */ | |
437 | /* hash table entry *p_h0 is looked up yielding a pointer p to a */ | |
438 | /* potential match of the Ziv in the history. */ | |
439 | index=HASH(p_src); | |
440 | p_h0=&hash[index]; | |
441 | p=*p_h0; | |
442 | ||
443 | /* Having looked up the candidate position, we are in a position to */ | |
444 | /* attempt a match. The match loop has been unrolled using the PS */ | |
445 | /* macro so that failure within the first three bytes automatically */ | |
446 | /* results in the literal branch being taken. The coding is simple. */ | |
447 | /* p_ziv saves p_src so we can let p_src wander. */ | |
448 | # define PS *p++!=*p_src++ | |
449 | p_ziv=p_src; | |
450 | if (PS || PS || PS) | |
451 | { | |
452 | /* Literal. */ | |
453 | ||
454 | /* Code the literal byte as itself and a zero control bit. */ | |
455 | p_src=p_ziv; literal: *p_dst++=*p_src++; control&=0xFFFEFFFF; | |
456 | ||
457 | /* We have just coded a literal. If we had two pending ones, that */ | |
458 | /* makes three and we can update the hash table. */ | |
459 | if (p_h2!=0) | |
460 | {*p_h2=p_ziv-2;} | |
461 | ||
462 | /* In any case, rotate the hash table pointers for next time. */ | |
463 | p_h2=p_h1; p_h1=p_h0; | |
464 | ||
465 | } | |
466 | else | |
467 | { | |
468 | /* Copy */ | |
469 | ||
470 | /* Match up to 15 remaining bytes using an unrolled loop and code. */ | |
471 | #if 0 | |
472 | PS || PS || PS || PS || PS || PS || PS || PS || | |
473 | PS || PS || PS || PS || PS || PS || PS || p_src++; | |
474 | #else | |
475 | if ( | |
476 | !( PS || PS || PS || PS || PS || PS || PS || PS || | |
477 | PS || PS || PS || PS || PS || PS || PS ) | |
478 | ) p_src++; | |
479 | #endif | |
480 | *p_dst++=((index&0xF00)>>4)|(--p_src-p_ziv-3); | |
481 | *p_dst++=index&0xFF; | |
482 | ||
483 | /* As we have just coded three bytes, we are now in a position to */ | |
484 | /* update the hash table with the literal bytes that were pending */ | |
485 | /* upon the arrival of extra context bytes. */ | |
486 | if (p_h1!=0) | |
487 | { | |
488 | if (p_h2) | |
489 | {*p_h2=p_ziv-2; p_h2=NULL;} | |
490 | *p_h1=p_ziv-1; p_h1=NULL; | |
491 | } | |
492 | ||
493 | /* In any case, we can update the hash table based on the current */ | |
494 | /* position as we just coded at least three bytes in a copy items. */ | |
495 | *p_h0=p_ziv; | |
496 | ||
497 | } | |
498 | control>>=1; | |
499 | ||
500 | /* This loop is all set up for a decrement and jump instruction! */ | |
501 | #ifndef linux | |
502 | ` end_unrolled_loop: if (--unroll) goto begin_unrolled_loop; | |
503 | #else | |
504 | /* end_unrolled_loop: */ if (--unroll) goto begin_unrolled_loop; | |
505 | #endif | |
506 | ||
507 | /* At this point it will nearly always be the end of a group in which */ | |
508 | /* case, we have to do some control-word processing. However, near the */ | |
509 | /* end of the input block, the inner unrolled loop is only executed once. */ | |
510 | /* This necessitates the 'if' test. */ | |
511 | if ((control&TOPWORD)==0) | |
512 | { | |
513 | /* Write the control word to the place we saved for it in the output. */ | |
514 | *p_control++= control &0xFF; | |
515 | *p_control = (control>>8) &0xFF; | |
516 | ||
517 | /* Reserve the next word in the output block for the control word */ | |
518 | /* for the group about to be processed. */ | |
519 | p_control=p_dst; p_dst+=2; | |
520 | ||
521 | /* Reset the control bits buffer. */ | |
522 | control=TOPWORD; | |
523 | } | |
524 | ||
525 | } /* End main processing loop. */ | |
526 | ||
527 | /* After the main processing loop has executed, all the input bytes have */ | |
528 | /* been processed. However, the control word has still to be written to the */ | |
529 | /* word reserved for it in the output at the start of the most recent group. */ | |
530 | /* Before writing, the control word has to be shifted so that all the bits */ | |
531 | /* are in the right place. The "empty" bit positions are filled with 1s */ | |
532 | /* which partially fill the top word. */ | |
533 | while(control&TOPWORD) control>>=1; | |
534 | *p_control++= control &0xFF; | |
535 | *p_control++=(control>>8) &0xFF; | |
536 | ||
537 | /* If the last group contained no items, delete the control word too. */ | |
538 | if (p_control==p_dst) p_dst-=2; | |
539 | ||
540 | /* Write the length of the output block to the dst_len parameter and return. */ | |
541 | *p_dst_len=p_dst-p_dst_first; | |
542 | return; | |
543 | ||
544 | /* Jump here as soon as an overrun is detected. An overrun is defined to */ | |
545 | /* have occurred if p_dst>p_dst_first+src_len. That is, the moment the */ | |
546 | /* length of the output written so far exceeds the length of the input block.*/ | |
547 | /* The algorithm checks for overruns at least at the end of each group */ | |
548 | /* which means that the maximum overrun is MAX_CMP_GROUP bytes. */ | |
549 | /* Once an overrun occurs, the only thing to do is to set the copy flag and */ | |
550 | /* copy the input over. */ | |
551 | overrun: | |
552 | #if 0 | |
553 | *p_dst_first=FLAG_COPY; | |
554 | fast_copy(p_src_first,p_dst_first+FLAG_BYTES,src_len); | |
555 | *p_dst_len=src_len+FLAG_BYTES; | |
556 | #else | |
557 | fast_copy(p_src_first,p_dst_first,src_len); | |
558 | *p_dst_len= -src_len; /* return a negative number to indicate uncompressed data */ | |
559 | #endif | |
560 | } | |
561 | ||
562 | /******************************************************************************/ | |
563 | ||
564 | /* Input : Hand over the required amount of working memory in p_wrk_mem. */ | |
565 | /* Input : Specify input block using p_src_first and src_len. */ | |
566 | /* Input : Point p_dst_first to the start of the output zone. */ | |
567 | /* Input : Point p_dst_len to a ULONG to receive the output length. */ | |
568 | /* Input : Input block and output zone must not overlap. User knows */ | |
569 | /* Input : upperbound on output block length from earlier compression. */ | |
570 | /* Input : In any case, maximum expansion possible is nine times. */ | |
571 | /* Output : Length of output block written to *p_dst_len. */ | |
572 | /* Output : Output block in Mem[p_dst_first..p_dst_first+*p_dst_len-1]. */ | |
573 | /* Output : Writes only in Mem[p_dst_first..p_dst_first+*p_dst_len-1]. */ | |
574 | LOCAL void compress_decompress( UBYTE *p_wrk_mem, | |
575 | UBYTE *p_src_first, LONG src_len, | |
576 | UBYTE *p_dst_first, ULONG *p_dst_len) | |
577 | { | |
578 | /* Byte pointers p_src and p_dst scan through the input and output blocks. */ | |
579 | register UBYTE *p_src = p_src_first+FLAG_BYTES; | |
580 | register UBYTE *p_dst = p_dst_first; | |
581 | /* we need to avoid a SEGV when trying to uncompress corrupt data */ | |
582 | register UBYTE *p_dst_post = p_dst_first + *p_dst_len; | |
583 | ||
584 | /* The following two variables are never modified and are used to control */ | |
585 | /* the main loop. */ | |
586 | UBYTE *p_src_post = p_src_first+src_len; | |
587 | UBYTE *p_src_max16 = p_src_first+src_len-(MAX_CMP_GROUP-2); | |
588 | ||
589 | /* The hash table is the only resident of the working memory. The hash table */ | |
590 | /* contains HASH_TABLE_LENGTH=4096 pointers to positions in the history. To */ | |
591 | /* keep Macintoshes happy, it is longword aligned. */ | |
592 | UBYTE **hash = (UBYTE **) ULONG_ALIGN_UP(p_wrk_mem); | |
593 | ||
594 | /* The variable 'control' is used to buffer the control bits which appear in */ | |
595 | /* groups of 16 bits (control words) at the start of each compressed group. */ | |
596 | /* When each group is read, bit 16 of the register is set to one. Whenever */ | |
597 | /* a new bit is needed, the register is shifted right. When the value of the */ | |
598 | /* register becomes 1, we know that we have reached the end of a group. */ | |
599 | /* Initializing the register to 1 thus instructs the code to follow that it */ | |
600 | /* should read a new control word immediately. */ | |
601 | register ULONG control=1; | |
602 | ||
603 | /* The value of 'literals' is always in the range 0..3. It is the number of */ | |
604 | /* consecutive literal items just seen. We have to record this number so as */ | |
605 | /* to know when to update the hash table. When literals gets to 3, there */ | |
606 | /* have been three consecutive literals and we can update at the position of */ | |
607 | /* the oldest of the three. */ | |
608 | register UWORD literals=0; | |
609 | ||
610 | /* Check the leading copy flag to see if the compressor chose to use a copy */ | |
611 | /* operation instead of a compression operation. If a copy operation was */ | |
612 | /* used, then all we need to do is copy the data over, set the output length */ | |
613 | /* and return. */ | |
614 | #if 0 | |
615 | if (*p_src_first==FLAG_COPY) | |
616 | { | |
617 | fast_copy(p_src_first+FLAG_BYTES,p_dst_first,src_len-FLAG_BYTES); | |
618 | *p_dst_len=src_len-FLAG_BYTES; | |
619 | return; | |
620 | } | |
621 | #else | |
622 | if ( src_len < 0 ) | |
623 | { | |
624 | fast_copy(p_src_first,p_dst_first,-src_len ); | |
625 | *p_dst_len = (ULONG)-src_len; | |
626 | return; | |
627 | } | |
628 | #endif | |
629 | ||
630 | /* Initialize all elements of the hash table to point to a constant string. */ | |
631 | /* Use of an unrolled loop speeds this up considerably. */ | |
632 | {UWORD i; UBYTE **p_h=hash; | |
633 | # define ZJ *p_h++=START_STRING_18 | |
634 | for (i=0;i<256;i++) /* 256=HASH_TABLE_LENGTH/16. */ | |
635 | {ZJ;ZJ;ZJ;ZJ; | |
636 | ZJ;ZJ;ZJ;ZJ; | |
637 | ZJ;ZJ;ZJ;ZJ; | |
638 | ZJ;ZJ;ZJ;ZJ;} | |
639 | } | |
640 | ||
641 | /* The outer loop processes either 1 or 16 items per iteration depending on */ | |
642 | /* how close p_src is to the end of the input block. */ | |
643 | while (p_src!=p_src_post) | |
644 | {/* Start of outer loop */ | |
645 | ||
646 | register UWORD unroll; /* Counts unrolled loop executions. */ | |
647 | ||
648 | /* When 'control' has the value 1, it means that the 16 buffered control */ | |
649 | /* bits that were read in at the start of the current group have all been */ | |
650 | /* shifted out and that all that is left is the 1 bit that was injected */ | |
651 | /* into bit 16 at the start of the current group. When we reach the end */ | |
652 | /* of a group, we have to load a new control word and inject a new 1 bit. */ | |
653 | if (control==1) | |
654 | { | |
655 | control=0x10000|*p_src++; | |
656 | control|=(*p_src++)<<8; | |
657 | } | |
658 | ||
659 | /* If it is possible that we are within 16 groups from the end of the */ | |
660 | /* input, execute the unrolled loop only once, else process a whole group */ | |
661 | /* of 16 items by looping 16 times. */ | |
662 | unroll= p_src<=p_src_max16 ? 16 : 1; | |
663 | ||
664 | /* This inner loop processes one phrase (item) per iteration. */ | |
665 | while (unroll--) | |
666 | { /* Begin unrolled inner loop. */ | |
667 | ||
668 | /* Process a literal or copy item depending on the next control bit. */ | |
669 | if (control&1) | |
670 | { | |
671 | /* Copy item. */ | |
672 | ||
673 | register UBYTE *p; /* Points to place from which to copy. */ | |
674 | register UWORD lenmt; /* Length of copy item minus three. */ | |
675 | register UBYTE **p_hte; /* Pointer to current hash table entry.*/ | |
676 | register UBYTE *p_ziv=p_dst; /* Pointer to start of current Ziv. */ | |
677 | ||
678 | /* Read and dismantle the copy word. Work out from where to copy. */ | |
679 | lenmt=*p_src++; | |
680 | p_hte=&hash[((lenmt&0xF0)<<4)|*p_src++]; | |
681 | p=*p_hte; | |
682 | lenmt&=0xF; | |
683 | ||
684 | /* Now perform the copy using a half unrolled loop. */ | |
685 | *p_dst++=*p++; | |
686 | *p_dst++=*p++; | |
687 | *p_dst++=*p++; | |
688 | while (lenmt--) | |
689 | *p_dst++=*p++; | |
690 | ||
691 | /* Because we have just received 3 or more bytes in a copy item */ | |
692 | /* (whose bytes we have just installed in the output), we are now */ | |
693 | /* in a position to flush all the pending literal hashings that had */ | |
694 | /* been postponed for lack of bytes. */ | |
695 | if (literals>0) | |
696 | { | |
697 | register UBYTE *r=p_ziv-literals; | |
698 | hash[HASH(r)]=r; | |
699 | if (literals==2) | |
700 | {r++; hash[HASH(r)]=r;} | |
701 | literals=0; | |
702 | } | |
703 | ||
704 | /* In any case, we can immediately update the hash table with the */ | |
705 | /* current position. We don't need to do a HASH(...) to work out */ | |
706 | /* where to put the pointer, as the compressor just told us!!! */ | |
707 | *p_hte=p_ziv; | |
708 | ||
709 | } | |
710 | else | |
711 | { | |
712 | /* Literal item. */ | |
713 | ||
714 | /* Copy over the literal byte. */ | |
715 | *p_dst++=*p_src++; | |
716 | ||
717 | /* If we now have three literals waiting to be hashed into the hash */ | |
718 | /* table, we can do one of them now (because there are three). */ | |
719 | if (++literals == 3) | |
720 | {register UBYTE *p=p_dst-3; hash[HASH(p)]=p; literals=2;} | |
721 | } | |
722 | ||
723 | /* Shift the control buffer so the next control bit is in bit 0. */ | |
724 | control>>=1; | |
725 | #if 1 | |
726 | if (p_dst > p_dst_post) | |
727 | { | |
728 | /* Shit: we tried to decompress corrupt data */ | |
729 | *p_dst_len = 0; | |
730 | return; | |
731 | } | |
732 | #endif | |
733 | } /* End unrolled inner loop. */ | |
734 | ||
735 | } /* End of outer loop */ | |
736 | ||
737 | /* Write the length of the decompressed data before returning. */ | |
738 | *p_dst_len=p_dst-p_dst_first; | |
739 | } | |
740 | ||
741 | /******************************************************************************/ | |
742 | /* End of LZRW3.C */ | |
743 | /******************************************************************************/ |