unit Babylon;

interface

uses
  Windows, Messages, SysUtils, Classes, Graphics, Controls, Forms, Dialogs;

type
  TBabylon = class(TComponent)
  private
    { Private declarations }
    translations:array[1..100] of string;
    TransCount:Integer;

    function GetResult (index:integer):string;
  protected
    { Protected declarations }
  public
    { Public declarations }
    function Translate (OriginalWord:string; DICpath:string; TargetLanguage:string):integer;
    property results[index:integer]:string read GetResult;
    property ResultsCount:Integer read TransCount;
  published
    { Published declarations }
  end;

procedure Register;

implementation

{$r babylon.res}

function TBabylon.GetResult (index:integer):string;
begin
 if index>transcount then result:='' else result:=translations[index];
end;

function TBabylon.Translate (OriginalWord:string; DICpath:string; TargetLanguage:string):Integer;
var IsHebrewTarget:Boolean;
    MainIndex:integer;
    IndFile,DefFile:File;
    SecInd,SecDef,SecDef2:longword;
    MaxWordSize,MinWordSize:Byte;
    WordCount:LongWord;
    NumberOfWords:word;
    c:integer;
    BytesToSkip:LongWord;
    FoundWord:Boolean;
    CurWord:ShortString;
    DefPointer:LongWord;

procedure OpenFiles;
var goodpath:string;
begin
 GoodPath:=DICpath;
 if (GoodPath<>'') and (Goodpath[length (goodpath)]<>'\') then
  GoodPath:=GoodPath+'\';
 assignfile (IndFile,GoodPath+'english.dic');
 reset (IndFile,1);
 assignfile (DefFile,GoodPath+'engto'+copy (Targetlanguage,1,3)+'.dic');
 reset (DefFile,1);
end;

function WordIndex (TheWord:string):LongWord;
var c:integer;
    temp:LongWord;

function LetterVal (ch:char):byte;      // calcualte the value of a letter (in 5 bits)
begin
 case ch of
  'A'..'Z': result:=ord(ch)-ord('A')+2;
  'a'..'z': result:=ord(ch)-ord('a')+2;
  '\': result:=1;
  else result:=0;
 end;
end;

begin
 Temp:=0;                               // calcualte a hash, 15 bits from 3 first letters
 c:=1;
 while (c<4) do begin
  Temp:=Temp*28;
  if length (TheWord)>=c then inc (Temp,LetterVal (TheWord[c]));
  inc (c);
 end;
 result:=temp;
end;

procedure ReadDefinition;

const Hebshortcuts:array[0..31]of string=( 'XX', '', '', '', '', '', '', '',
	                                   '', '', '', '', '', '', '', '',
                                           '', '', '', '', '', '', '', '',
	                                   '', '', '', '', '', '', '', '' ) ;

      Engshortcuts:array[0..31]of string=( '<0>' , 'ion' , 'ies' , 'ing' , 'ous' , 'ses' ,
                                           'al' , 'an' ,
                                           'at' , 'ed' , 'en' , 'er' , 'es' , 'ia' , 'ic' , 'ie',
                                           'in' , 'io' , 'is' , 'it' , 'le' , 'ly' , 'ne' , 'on',
                                           'or' , 'ra' , 'se' , 'ss' , 'st' , 'te' , 'ti' , 'th' );

var attributes:array[1..32] of byte;
    WordSize:byte;
    TargetWord:String;
    Data:word;
    LSB,MSB:byte;

function TranslateChar (BabChar:Integer):Char;
const HebCharacters=' ;,''.';
      EngCharacters='abcdefghijklmnopqrstuvwxyz* ;,''.';
begin                   // decode 1 character from 5-bit (end or heb) into 8-bit
 if IsHebrewTarget then result:=HebCharacters[BabChar+1]
                   else result:=EngCharacters[BabChar+1];
end;

procedure ReadWord;
begin
 blockread (DefFile,WordSize,1);                // read the word's length
 TargetWord:='';                                // empty the result
 while (length (TargetWord)<WordSize) do begin  // while we need to read more...
  blockread (DefFile,Data,2);                   // read a word
   if (Data and $8000)=0 then begin             // if the most significant bit is off, its 3 5-bit letters
    TargetWord:=TargetWord+TranslateChar (Data shr 0 and $1F);
    TargetWord:=TargetWord+TranslateChar (Data shr 5 and $1F);
    TargetWord:=TargetWord+TranslateChar (Data shr 10 and $1F);
   end else begin                               // else, its an MSB/LSB encoding
    LSB:=Data and $FF;
    MSB:=Data shr 8 and $7F;

    if LSB>=32 then begin                       // if the LSB>=32, its just a character, decode if necessary
     if LSB<=126 then TargetWord:=TargetWord+char (LSB);
     if (LSB>=128) and (LSB<128+32) then TargetWord:=TargetWord+TranslateChar (LSB-128);
    end else begin                              // <32 means a shortcut, find it and add it
     if IsHebrewTarget then TargetWord:=TargetWord+HebShortCuts[LSB]
                       else TargetWord:=TargetWord+EngShortCuts[LSB];
    end;

    if MSB>=32 then begin                       /// smae with MSB
     TargetWord:=TargetWord+char (MSB);
    end else begin
     if MSB>0 then begin
      if IsHebrewTarget then TargetWord:=TargetWord+HebShortCuts[MSB]
                        else TargetWord:=TargetWord+EngShortCuts[MSB];
     end;
    end;
   end;
 end;
end;

begin
 blockread (DefFile,attributes,7);   // read the 7 bytes of attributes

 ReadWord;                           // read a "fake" word before the real definition
 ReadWord;                           // read the real definition

 TargetWord:=copy (TargetWord,1,WordSize);
                                     // truncate garbage
 inc (TransCount);
 Translations[TransCount]:=TargetWord;
end;

begin
 TransCount:=0;
 if uppercase(copy (TargetLanguage,1,3))='HEB' then IsHebrewTarget:=true else IsHebrewTarget:=false;
 try
  OpenFiles;
 except
  result:=4;
  exit;
 end;

 MainIndex:=100+4*WordIndex (OriginalWord);     // calcualte the basic hash
 seek (IndFile,MainIndex);                      // seek both files to it
 seek (DefFile,MainIndex);
 blockread (IndFile,SecInd,4);                  // read a pointer from each file
 blockread (DefFile,SecDef,4);
 blockread (DefFile,SecDef2,4);                 // read the next pointer in the target file
 if (SecDef=SecDef2) then begin                 // if its the same, there's nothing that starts with those 3 letters
  result:=1;
  exit;
 end;

 seek (IndFile,SecInd);                         // go to the pointer
 blockread (IndFile,MinWordSize,1);             // get min & max word lengths
 blockread (IndFile,MaxWordSize,1);
 if (length (OriginalWord)>MaxWordSize) or (length (OriginalWord)<MinWordSize) then begin
  result:=2;                                    // if we're longer or shorter, return an error
  exit;
 end;

 c:=MinWordSize;                                // calculate a skip of all the shorter words
 WordCount:=0;
 BytesToSkip:=0;
 while (c<length (OriginalWord)) do begin
  blockread (IndFile,NumberOfWords,2);
  Inc (WordCount,NumberOfWords);
  inc (BytesToSkip,(c-3)*NumberOfWords);
  inc (c);
 end;
 blockread (IndFile,NumberOfWords,2);
 inc (BytesToSkip,(MaxWordSize-length (OriginalWord))*2);

 seek (IndFile,BytesToSkip+FilePos (IndFile));  // skip the shorter words

 FoundWord:=false;
 CurWord:=OriginalWord;
 while (NumberOfWords>0) do begin               // while there are more words of this length
  blockread (IndFile,CurWord[4],length (OriginalWord)-3);
                                                // read the remaining letters of the word (after first 3 that we hashed)
  if uppercase (OriginalWord)=uppercase (CurWord) then begin
                                                // if its our word..
   seek (DefFile,SecDef+4*WordCount);           // find the pointer to it in the traget file
   blockread (DefFile,DefPointer,4);            // read this pointer
   if DefPointer>$FFFFFF then begin             // if its not a pointer to a definition, but to another word with the same definition...
    MainIndex:=100+4*((DefPointer shr 16) and $FFFF);
    seek (DefFile,MainIndex);                   // get a pointer to this new word...
    blockread (DefFile,SecDef,4);
    seek (DefFile,SecDef+4*(DefPointer and $FFFF));
    blockread (DefFile,DefPointer,4);
   end;
   Seek (DefFile,DefPointer);                   // seek to the pointer

   ReadDefinition;                              // decode the definition
   FoundWord:=true;                             // and mark we found it
  end;
  dec (NumberOfWords);
  inc (WordCount);
 end;
 if (not FoundWord) then begin                  // no words found, report it
  result:=3;
  exit;
 end;

 result:=0;
end;

procedure Register;
begin
  RegisterComponents('Samples', [TBabylon]);
end;

end.
