_AN EXISTENTIAL DICTIONARY_ by Edwin T. Floyd [LISTING ONE] {$A+,B-,D-,E-,F+,I+,L-,N-,O-,R-,S-,V+} Unit Dict; Interface { DICT.PAS dictionary object and methods to create and use a superimposed code dictionary. Copyright Edwin T. Floyd, 1990. } Type Dictionary = Object DictArray : Pointer; { Pointer to dictionary bit array } DictCount : LongInt; { Number of key entries in this dictionary } DictSize : Word; { Number of bytes in dictionary bit array } DictBits : Byte; { Number of bits per key entry } Constructor Init(MaxKeys : Word; BitsPerKey : Byte); { Initialize dictionary, specify maximum keys and bits per key. } Constructor RestoreDictionary(FileName : String); { Restore dictionary saved on disk by SaveDictionary } { Note: Use either Init or RestoreDictionary, not both. } Destructor Done; { Release storage allocated to dictionary. } Function DictionarySize : Word; { Returns number of bytes that will be written by SaveDictionary. } Procedure SaveDictionary(FileName : String); { Save dictionary in a disk file. } Function InsertString(Var s : String) : Boolean; { Insert string in dictionary; returns TRUE if string is already there. } Function StringInDictionary(Var s : String) : Boolean; { Returns TRUE if string is in dictionary. } Function InsertBlock(Var Data; Len : Word) : Boolean; { Insert block in dictionary; returns TRUE if block is already there. } Function BlockInDictionary(Var Data; Len : Word) : Boolean; { Returns TRUE if block is in dictionary. } Function InsertHash(Hash : LongInt) : Boolean; { Insert hash in dictionary; returns TRUE if hash is already there. } Function HashInDictionary(Hash : LongInt) : Boolean; { Returns TRUE if hash is in dictionary. } Function EstError : Real; { Returns estimated probability of error. } Function ActError : Real; { Returns actual probability of error (slow, counts bits). } End; Function DictionaryBytes(MaxKeys : LongInt; BitsPerKey : Byte) : LongInt; { Returns the size in bytes of the optimal dictionary bit table for the indicated key and bit-per-key counts. } Function DictHash(Var Data; Len : Word) : LongInt; { Hash data block to a positive long integer. } Implementation Const MagicNumber = $E501205F; { Used to validate dictionary save file } RandMult = 16807; { =7**5; RandMult must be expressable in 16 bits. 48271 may give better "randomness" (see ACM ref.) } ShuffleBits = 3; ShuffleShift = 16 - ShuffleBits; ShufTableEnd = $FFFF Shr ShuffleShift; HashSeed : Word = 26; { Initial hash seed } RandSeed : LongInt = 1; { Random number seed: 0 < RandSeed < 2**31-1 } Type SaveFileHeader = Record { Header for dictionary save file (all numbers are byte-reversed) } Magic : LongInt; { Magic number for validity test } BitsCount : LongInt; { Bits-per-key and entry count } Size : Word; { Size of dictionary bit map in bytes } End; Var ShufTable : Array[0..ShufTableEnd] Of LongInt; NextOut : Word; Function IRand : LongInt; { Return next "minimal standard", 31 bit pseudo-random integer. This function actually computes (RandSeed * RandMult) Mod (2**31-1) where RandMult is a 16 bit quantity and RandSeed is 32 bits (See Carta, CACM 1/90). } Inline( $A1/>RandSeed+2/ { mov ax,[>RandSeed+2]} $BF/>RandMult/ { mov di,>RandMult} $F7/$E7/ { mul di} $89/$C3/ { mov bx,ax} $89/$D1/ { mov cx,dx} $A1/>RandSeed/ { mov ax,[>RandSeed]} $F7/$E7/ { mul di} $01/$DA/ { add dx,bx} $83/$D1/$00/ { adc cx,0 ; cx:dx:ax = Seed * Mult } $D0/$E6/ { shl dh,1 ; split p & q at 31 bits } $D1/$D1/ { rcl cx,1} $D0/$EE/ { shr dh,1 ; cx = p, dx:ax = q } $01/$C8/ { add ax,cx} $83/$D2/$00/ { adc dx,0 ; dx:ax = p + q } $71/$09/ { jno done} $05/$01/$00/ { add ax,1 ; overflow, inc(p + q) } $83/$D2/$00/ { adc dx,0} $80/$E6/$7F/ { and dh,$7F ; limit to 31 bits } {done:} $A3/>RandSeed/ { mov [>RandSeed],ax} $89/$16/>RandSeed+2); { mov [>RandSeed+2],dx} Function Hash(Seed : LongInt; Var Data; Len : Word) : LongInt; { Hash a block of data into a random long integer. This is actually equivalent to the following: RandSeed := Seed; Hash := 0; For i := 1 To Len Do Hash := Hash + (IRand * (Data[i] + $FF00); Hash := Hash AND $7FFFFFFF; If Hash = 0 Then Inc(Hash); Overflow is ignored. The seed is kept in registers; RandSeed is not affected by this routine. } Inline( $59/ { pop cx ; cx := len} $5E/ { pop si ; bx:si := @data} $5B/ { pop bx} $58/ { pop ax ; dx:ax := seed} $5A/ { pop dx} $E3/$59/ { jcxz alldone} $FC/ { cld} $1E/ { push ds} $8E/$DB/ { mov ds,bx} $55/ { push bp} $31/$DB/ { xor bx,bx} $53/ { push bx ; zero accumulator} $53/ { push bx} $89/$E5/ { mov bp,sp} {next: ; for each byte of data...} $51/ { push cx} $BF/>RandMult/ { mov di,>RandMult} $89/$C3/ { mov bx,ax} $89/$D0/ { mov ax,dx ; compute next seed} $F7/$E7/ { mul di} $93/ { xchg ax,bx} $89/$D1/ { mov cx,dx} $F7/$E7/ { mul di} $01/$DA/ { add dx,bx} $83/$D1/$00/ { adc cx,0 ; cx:dx:ax = Seed * Mult} $D0/$E6/ { shl dh,1 ; split p & q at 31 bits} $D1/$D1/ { rcl cx,1} $D0/$EE/ { shr dh,1 ; cx = p, dx:ax = q} $01/$C8/ { add ax,cx} $83/$D2/$00/ { adc dx,0 ; dx:ax = p + q} $71/$09/ { jno noovfl} $05/$01/$00/ { add ax,1 ; overflow, inc(p + q)} $83/$D2/$00/ { adc dx,0} $80/$E6/$7F/ { and dh,$7F ; limit to 31 bits} {noovfl:} $89/$C3/ { mov bx,ax ; save seed} $89/$D1/ { mov cx,dx} $AC/ { lodsb ; get next byte + $FF00} $B4/$FF/ { mov ah,$FF} $89/$C7/ { mov di,ax} $F7/$E1/ { mul cx ; multiply by seed} $97/ { xchg ax,di} $F7/$E3/ { mul bx} $01/$FA/ { add dx,di} $01/$46/$00/ { add [bp+0],ax ; accumulate} $11/$56/$02/ { adc [bp+2],dx} $89/$D8/ { mov ax,bx} $89/$CA/ { mov dx,cx} $59/ { pop cx} $E2/$B9/ { loop next ; until out of data} {;} $58/ { pop ax} $5A/ { pop dx} $5D/ { pop bp} $1F/ { pop ds} $80/$E6/$7F/ { and dh,$7F} {alldone:} $89/$C3/ { mov bx,ax} $09/$D3/ { or bx,dx} $75/$01/ { jnz exit} $40); { inc ax} {exit:} Procedure Shuffle; { Load the shuffle table } Begin For NextOut := 0 To ShufTableEnd Do ShufTable[NextOut] := IRand; NextOut := Word(IRand) Shr ShuffleShift; End; Function SIRand : LongInt; { Return the next shuffled random number } Var y : LongInt; Begin y := ShufTable[NextOut]; ShufTable[NextOut] := IRand; NextOut := Word(y) Shr ShuffleShift; SIRand := y; End; Function TestBit(Var BitArray; Size : Word; BitNo : LongInt) : Boolean; { Returns TRUE if indicated bit number, modulo size of bit array, is set. Size is in bytes. } Inline( {; dx:ax := BitNo} $58/ { pop ax} $5A/ { pop dx} {; bl := bit mask} $88/$C1/ { mov cl,al} $80/$E1/$07/ { and cl,$07} $B3/$80/ { mov bl,$80} $D2/$EB/ { shr bl,cl} {; dx:ax := byte offset} $D1/$EA/ { shr dx,1} $D1/$D8/ { rcr ax,1} $D1/$EA/ { shr dx,1} $D1/$D8/ { rcr ax,1} $D1/$EA/ { shr dx,1} $D1/$D8/ { rcr ax,1} {; dx := byte offset} $5F/ { pop di} $39/$D7/ { cmp di,dx} $77/$0E/ { ja quickdiv} {; protect against overflow} $89/$F9/ { mov cx,di} {protloop:} $D1/$E1/ { shl cx,1} $39/$D1/ { cmp cx,dx} $76/$FA/ { jbe protloop} $F7/$F1/ { div cx} $89/$D0/ { mov ax,dx} $31/$D2/ { xor dx,dx} {quickdiv:} $F7/$F7/ { div di} {; es:di := seg:ofs of byte} $5F/ { pop di} $01/$D7/ { add di,dx} $07/ { pop es} {; test bit} $30/$C0/ { xor al,al} $26/$22/$1D/ { es:and bl,[di]} $74/$02/ { jz notset} $FE/$C0); { inc al} {notset:} Function SetBit(Var BitArray; Size : Word; BitNo : LongInt) : Boolean; { Sets the indicated bit number modulo size of bit array. Returns TRUE if bit was already set. Size is in bytes. } Inline( {; dx:ax := BitNo} $58/ { pop ax} $5A/ { pop dx} {; bl := bit mask} $88/$C1/ { mov cl,al} $80/$E1/$07/ { and cl,$07} $B3/$80/ { mov bl,$80} $D2/$EB/ { shr bl,cl} {; dx:ax := byte offset} $D1/$EA/ { shr dx,1} $D1/$D8/ { rcr ax,1} $D1/$EA/ { shr dx,1} $D1/$D8/ { rcr ax,1} $D1/$EA/ { shr dx,1} $D1/$D8/ { rcr ax,1} {; dx := byte offset mod size } $5F/ { pop di} $39/$D7/ { cmp di,dx} $77/$0E/ { ja quickdiv} {; protect against overflow} $89/$F9/ { mov cx,di} {protloop:} $D1/$E1/ { shl cx,1} $39/$D1/ { cmp cx,dx} $76/$FA/ { jbe protloop} $F7/$F1/ { div cx} $89/$D0/ { mov ax,dx} $31/$D2/ { xor dx,dx} {quickdiv:} $F7/$F7/ { div di} {; es:di := seg:ofs of byte} $5F/ { pop di} $01/$D7/ { add di,dx} $07/ { pop es} {; test bit} $30/$C0/ { xor al,al} $88/$DC/ { mov ah,bl} $26/$22/$25/ { es:and ah,[di]} $74/$04/ { jz notset} $FE/$C0/ { inc al} $EB/$03/ { jmp short set} {notset:} $26/$08/$1D); { es:or [di],bl} {set:} Function LongSwap(n : LongInt) : LongInt; { Reverse bytes in a LongInt. } Inline( $5A/ { pop dx} $58/ { pop ax} $86/$C4/ { xchg ah,al} $86/$D6); { xchg dh,dl} Function DictionaryBytes(MaxKeys : LongInt; BitsPerKey : Byte) : LongInt; Begin DictionaryBytes := Round(MaxKeys * BitsPerKey / (-Ln(0.5) * 8)); End; Function DictHash(Var Data; Len : Word) : LongInt; Begin DictHash := Hash(Hash(HashSeed, Data, Len), Data, Len); End; Constructor Dictionary.Init(MaxKeys : Word; BitsPerKey : Byte); Var DictBytes : LongInt; Begin DictBytes := DictionaryBytes(MaxKeys, BitsPerKey); If DictBytes > $FFF0 Then Begin WriteLn(DictBytes, ' bytes optimal for dictionary, but ', $FFF0, ' is maximum size dictionary. Using max size.'); DictBytes := $FFF0; End Else If DictBytes > MaxAvail Then Begin WriteLn(DictBytes, ' bytes optimal for dictionary, but only ', MaxAvail, ' bytes are available. Using ', MaxAvail); DictBytes := MaxAvail; End Else If DictBytes < 16 Then DictBytes := 16; DictSize := DictBytes; GetMem(DictArray, DictSize); FillChar(DictArray^, DictSize, 0); DictCount := 0; DictBits := BitsPerKey; End; Constructor Dictionary.RestoreDictionary(FileName : String); Var Header : SaveFileHeader; DictBytes : LongInt; f : File; OldMode : Byte; Begin OldMode := FileMode; FileMode := $40; Assign(f, FileName); Reset(f, 1); BlockRead(f, Header, SizeOf(Header)); With Header Do Begin Magic := LongSwap(Magic); Size := Swap(Size); DictBytes := FileSize(f) - SizeOf(Header); If (Magic <> MagicNumber) Or (Size <> DictBytes) Or (Size < 16) Or (Size > $FFF0) Then Begin WriteLn('File ', FileName, ' is not a dictionary save file.'); Halt(1); End; DictSize := Size; DictBits := BitsCount And $FF; DictCount := LongSwap(BitsCount And $FFFFFF00); GetMem(DictArray, DictSize); BlockRead(f, DictArray^, DictSize); Close(f); FileMode := OldMode; End; End; Destructor Dictionary.Done; Begin FreeMem(DictArray, DictSize); DictArray := Nil; DictSize := 0; DictBits := 0; DictCount := 0; End; Function Dictionary.DictionarySize : Word; Begin DictionarySize := DictSize + SizeOf(SaveFileHeader); End; Function Dictionary.InsertString(Var s : String) : Boolean; Begin InsertString := InsertBlock(s[1], Length(s)); End; Function Dictionary.StringInDictionary(Var s : String) : Boolean; Begin StringInDictionary := BlockInDictionary(s[1], Length(s)); End; Function Dictionary.InsertBlock(Var Data; Len : Word) : Boolean; Begin InsertBlock := InsertHash(DictHash(Data, Len)); End; Function Dictionary.BlockInDictionary(Var Data; Len : Word) : Boolean; Begin BlockInDictionary := HashInDictionary(DictHash(Data, Len)); End; Function Dictionary.InsertHash(Hash : LongInt) : Boolean; Var i : Byte; InDict : Boolean; Begin InDict := True; RandSeed := Hash; Shuffle; For i := 1 To DictBits Do If Not SetBit(DictArray^, DictSize, SIRand) Then InDict := False; If Not InDict Then Inc(DictCount); InsertHash := InDict; End; Function Dictionary.HashInDictionary(Hash : LongInt) : Boolean; Var i : Byte; InDict : Boolean; Begin InDict := True; RandSeed := Hash; Shuffle; i := 0; While (i < DictBits) And InDict Do Begin If Not TestBit(DictArray^, DictSize, SIRand) Then InDict := False; Inc(i); End; HashInDictionary := InDict; End; Procedure Dictionary.SaveDictionary(FileName : String); Var Header : SaveFileHeader; f : File; Begin Assign(f, FileName); ReWrite(f, 1); With Header Do Begin Magic := LongSwap(MagicNumber); Size := Swap(DictSize); BitsCount := LongSwap(DictCount) + DictBits; End; BlockWrite(f, Header, SizeOf(Header)); BlockWrite(f, DictArray^, DictSize); Close(f); End; Function Dictionary.EstError : Real; Begin EstError := Exp(Ln(1.0-Exp(-(DictCount*DictBits)/(DictSize*8.0)))*DictBits); End; Function Dictionary.ActError : Real; Var AllBits, BitsOn, i : LongInt; Begin AllBits := LongInt(DictSize) * 8; BitsOn := 0; For i := 0 To Pred(AllBits) Do If TestBit(DictArray^, DictSize, i) Then Inc(BitsOn); ActError := Exp(Ln(BitsOn / AllBits) * DictBits); End; End.