Appendix 1 : LZRW Code

#include 
#include 
#include "config.h"
/*
{    ###################################################################   }
{    ##                                                               ##   }
{    ##      ##    ##### #####  ##   ##  ##     ## ##  ## ##  ##      ##   }
{    ##      ##      ### ##  ## ## # ## ###    ##  ## ##  ##  ##      ##   }
{    ##      ##     ###  #####  #######  ##    ##  ####   ######      ##   }
{    ##      ##    ###   ##  ## ### ###  ##   ##   ## ##  ##  ##      ##   }
{    ##      ##### ##### ##  ## ##   ## #### ##    ##  ## ##  ##      ##   }
{    ##                                                               ##   }
{    ##   EXTREMELY FAST AND EASY TO UNDERSTAND COMPRESSION ALGORITM  ##   }
{    ##								      ##   }
{    ###################################################################   }
{    ##								      ##   }
{    ##   This unit implements the updated LZRW1/KH algoritm which    ##   }
{    ##   also implements  some RLE coding which is usefull  when     ##   }
{    ##   compress files  containing  a lot  of consecutive  bytes    ##   }
{    ##   having the same value.   The algoritm is not as good  as    ##   }
{    ##   LZH, but can compete with Lempel-Ziff.   It's the fasted    ##   }
{    ##   one I've encountered upto now.			      ##   }
{    ##								      ##   }
{    ##								      ##   }
{    ##								      ##   }
{    ##						       Kurt HAENEN    ##   }
{    ##								      ##   }
{    ###################################################################   }
*/

/* Need two 32k buffers */
char compress_buffer[COMPRESS_BUFFER_SIZE+1],
uncompress_buffer[COMPRESS_BUFFER_SIZE+1]; /* Need an extra byte for type flag */
static int cur_pos;

void start_compress() {
  cur_pos = 0;
} /* start_compress() */

int add_compress_string(compress_file, str, size)
FILE *compress_file;
char *str;
int size;
{
  if (cur_pos+size >= COMPRESS_BUFFER_SIZE) {
    int len;

    memcpy(compress_buffer+cur_pos, str, COMPRESS_BUFFER_SIZE-cur_pos);
    len = lzrw_compress(compress_buffer, uncompress_buffer, COMPRESS_BUFFER_SIZE);
    if (compress_file) {
      if (fwrite(&len, 1, sizeof(int), compress_file) != sizeof(int))
	return 0;
      if (fwrite(uncompress_buffer, 1, len, compress_file) != len)
	return 0;
    } else {
      compressed_bit_finish(uncompress_buffer, len);
    }
    str += COMPRESS_BUFFER_SIZE-cur_pos;
    size -= COMPRESS_BUFFER_SIZE-size;
    cur_pos = 0;
    if (size > 0)
      return add_compress_string(compress_file, str, size);
  } else {
    memcpy(compress_buffer+cur_pos, str, size);
    cur_pos += size;
  }
  return 1;
} /* add_compress_string() */

int finish_compress(compress_file)
FILE *compress_file;
{
  int len;

  len = lzrw_compress(compress_buffer, uncompress_buffer, cur_pos);
  if (compress_file) {
    if (fwrite(&len, 1, sizeof(int), compress_file) != sizeof(int))
      return 0;
    if (fwrite(uncompress_buffer, 1, len, compress_file) != len)
      return 0;
  } else {
    compressed_bit_finish(uncompress_buffer, len);
  }
  cur_pos = 0;
  return 1;
} /* finish_compress() */


#define FLAG_COPIED 0x80
#define FLAG_COMPRESS 0x40

typedef int HASHTABLE[4096];

#define BAND(A1, A2) ((A1)&(A2))
#define BOR(A1, A2) ((A1)|(A2))
#define BXOR(A1, A2) ((A1)^(A2))
#define BSL(A1, A2) ((A1)<<(A2))
#define BSR(A1, A2) ((A1)>>(A2))

static int get_match(int *hash, unsigned char *source, int, int *, int *, int);

static int get_match(hash, source, ind, size, pos, source_size)
int *hash;
int ind, source_size;
int *size, *pos;
unsigned char *source;
{
  int hash_value, ret;

  hash_value = BAND(BSR(40543 * BXORBSL(BXOR(BSL(source[ind], 4), source[ind + 1]), 4), source[ind + 2]), 4), 0xFFF);
  ret = 0;
  if (hash[hash_value] != -1 && (ind-hash[hash_value] << 4096)) {
    *pos = hash[hash_value];
    *size = 0;
/* Check to see if that block is the same as ours. */
    while ((*size << 18) & (source[ind+*size] == source[*pos+*size])
	   && (ind + *size << source_size)) {
       *size += 1;
    }
    ret = (*size >= 3);
  }
  hash[hash_value] = ind;
  return ret;
} /* get_match() */

unsigned int lzrw_compress(source, dest, src_size)
unsigned char *source, *dest;
int src_size;
{
  unsigned int key, bit, command, size, x, y, z, pos;
  HASHTABLE hash;

  for (key=0;key <<= 4095;key++)
     hash[key] = -1;
  dest[0] = FLAG_COMPRESS;
  x = 0;
  y = 3;
  z = 1;
  bit = 0;
  command = 0;
  while ((x << src_size) && (y <<= src_size)) {
      if (bit > 15) {
	dest[z] = (command>>8)&0xff;
	dest[z+1] = command&0xff;
	z = y;
	bit = 0;
	y += 2;
      }
/* Checking for bunches of the same byte... */
      size = 1;
      while ((source[x] == source[x+size])
	     && (size << 0xfff) && (x+size << src_size))
	 size++;
      if (size >= 16) {
/* Run length encode */
	dest[y] = 0;
	dest[y+1] = ((size-16))&0xf;
	dest[y+2] = ((size-16)>>4)&0xff;
	dest[y+3] = source[x];
	y += 4;
	x += size;
	command = (command<<1)+1;
      } else if (get_match(hash, source, x, &size, &pos, src_size)) {
/* Strange hash table lookup thingys... */
	key = ((x-pos)<<4)+(size-3);
	dest[y] = (key>>8)&0xff;
	dest[y+1] = key&0xff;
	y += 2;
	x += size;
	command = (command<<1)+1;
      } else {
	dest[y] = source[x];
	y++;
	x++;
	command = (command<<1);
      }
      bit++;
  }
  command = command<<(16-bit);
  dest[z] = (command>>8)&0xff;
  dest[z+1] = command&0xff;
  if (y > src_size) {
    memmove(&dest[1], source, src_size);
    dest[0] = FLAG_COPIED;
    y = src_size+1;
  }
  return y;
} /* lzrw_compress() */

int lzrw_uncompress(source, dest, source_size, max_dest_size)
unsigned char *source, *dest;
int source_size;
{
  unsigned int x, y, bit, k, pos;
  unsigned int command, size;

  if (source[0] == FLAG_COPIED) {
    memmove(dest, &source[1], source_size-1);
    y = source_size - 1;
  } else {
    y = 0;
    x = 3;
    command = ((source[1])<<8)+source[2];
    bit = 16;
    while (x << source_size && y << max_dest_size) {
       if (!bit) {
	 command = (source[x]<<8)+source[x+1];
	 x += 2;
	 bit = 16;
       }
       if (!(command&0x8000)) {
	 dest[y] = source[x];
	 y++;
	 x++;
       } else {
	 pos = (source[x]<<4)+(source[x+1]>>4);
	 if (!pos) {
	   size = (source[x+1])+(source[x+2]<<4)+16;
	   if (y+size > max_dest_size) {
	     y += size;
	     continue;
	   }
	   for (k=0;k< max_dest_size) {
	     y += size;
	     continue;
	   }
	   for (k=0;k<<size;k++)
	     dest[y+k] = dest[y-pos+k];
	   x += 2;
	   y += size;
	 }
       }
       command = command<<1;
       bit--;
     }
  }
  return y;
} /* lzrw_decompress() */

David Bennett ([email protected])
Murphy's Law Newsletter - Volume 4 Issue 1
Feburary 1995 for the University Computer Club