{ Conversion between various types of coding schemes.                  }
{ Coding schemes considered:                                           }
{  PC-Latin2, ISO-Latin2, KOI8, Kamenickych, IBM852, Cork              }
{  to small letters, to capitals, to ASCII                             }
{  Prague, May 26, 1993                 Oldrich Ulrych                 }
program cstocs;

uses dos,ouunit,strings;

const PROGRAMNAME = 'CSTOCS';              { name of this program   }
      NFTNAME     = PROGRAMNAME+'.#$-';    { name of temporary file if needed}
      BMAX        = 60000;                 { length of working buffers}
      CR          = chr(13);
      CRLF        = CR+chr(10);

const LETTERS     = [ord('A')..ord('Z'),ord('a')..ord('z')];
      DIGITS      = [ord('0')..ord('9')];
      HEXA        = [ord('0')..ord('9'),ord('A')..ord('F')];
      OCTAL       = [ord('0')..ord('7')];
      STOPCHARS   = [ord('\')];
      SPACES      = [#8,#10,#13,' '];

const uccase   : boolean = false;    { conversion to upper case }
      lccase   : boolean = false;    { conversion to lower case }
      ascase   : boolean = false;    { conversion to ASCII }
      noncheck : boolean = false;    { check for definition of spec. chars}
      icount   : integer = 0;        { counter of "bad" input character }
      ocount   : integer = 0;        { counter of "bad" output character }
      wcount   : integer = 0;
      teximprovs  : boolean = false;
      checkupper  : boolean = true;  { checks all non-alphabet upper charecters}
      ci       : byte = 255;         { coding scheme of input file }
      co       : byte = 255;         { coding scheme of output file }
			fileexists:boolean = true;     { output file exists}

type pbuf         = array[0..BMAX+TMAX] of byte;
     alphabet     = array[0..255] of byte;
     byteptr      = ^byte;

var  nfi, nfo  : string;      { names of input and output files }
     nft       : string;      { names of temporary file         }
     fi, fo    : file;        { input and output files }
     bufi, bufo: ^pbuf;       { input and output buffer }
     buft      : ^pbuf;       { temporary buffer }
     li        : longint;     { length of input file }
     nib, ntb, nob: word;
     pib, oldpib  : word;
     ptri, ptro: string;
     cs        : string;
     lcs       : byte;
     caseno    : byte;
     bracket   : boolean;
     scannedcs : boolean;
     auxs      : string;
     auxc, j   : integer;

procedure typeinfo(haltcode:integer);     { writes help                      }
begin;                                    { and halts with exit code HALTCODE}
writeln;
writeln(PROGRAMNAME+'v. 1.0 convertor of various coding schemes.            ');
writeln;
writeln('     '+PROGRAMNAME+' [options] i_file [options] [o_file] [options]');
writeln;
writeln('Options:');
writeln('-in       n is the coding page of input file (obligatory!)   ');
writeln('-om       m is the coding page of output file out_file       ');
writeln('-u        conversion to upper case                           ');
writeln('-l        conversion to lower case                           ');
writeln('-a        conversion to ascii (alphabet)                     ');
writeln('i_file    input file                                         ');
writeln('o_file    output file                                        ');
writeln('-n        doesn''t check if input file starts with           ');
writeln('          definitions of special characters                  ');
writeln('Coding pages:     0 - TeX sequences           ');
writeln('                  1 - Kamenicky               4 - KOI8');
writeln('                  2 - PC-Latin 2              5 - IBM page 852');
writeln('                  3 - ISO8859-Latin 2         6 - Cork scheme');
writeln('Return code: 0 succesful run,  1 bad number of parameters       ');
writeln('             2 bad code page,  3 bad input file, 5 warnings');
writeln('             6 errors ');
halt(haltcode);
end;

procedure commandline(var ci:byte; var nfi:string;{ code page and name of input}
          var co:byte; var nfo:string); { code page and name of output         }
var code    : integer;
    i,j     : integer;
    dir     : dirstr;
    name    : namestr;
    ext     : extstr;
    found   : searchrec;
    namei   : string;
    option  : string;
    optstr  : string;
begin;  j := paramcount; 
if j = 0 then typeinfo(1);    { bad number of parameters             }
nfi := ''; nfo := '';
i := 1;
while (i <= j) do
  if commandstr(i,option) then
    begin;
    if length(option) = 0 then typeinfo(2);
    fsplit(option,dir,name,ext);
    findfirst(dir+name+ext,anyfile,found);
    if (doserror <> 0) and (length(nfi) = 0) then typeinfo(3);
    if (length(nfi) > 0) then
      if (doserror <> 0) then fileexists := false else fileexists := true;
    option := dir+name+ext;
    if length(nfo) > 0 then typeinfo(1)
    else
      if length(nfi) > 0 then nfo := option
      else nfi := option;
    end;
if length(nfi) = 0 then typeinfo(1);
if length(nfo) = 0 then nfo := nfi;
i := 1;
while (i <= j) do
 if commandopt(i,option,optstr) then
   begin;
   if length(option) = 1 then
     case option[1] of
       'u': uccase  := true;
       'l': lccase  := true;
       'a': ascase  := true;
       'n': noncheck:= true;
       'i': begin;  val(optstr,ci,code);  if code <> 0 then typeinfo(2); end;
       'o': begin;  val(optstr,co,code);  if code <> 0 then typeinfo(2); end;
       else typeinfo(1);
       end;
  end;
nft := dir+NFTNAME;
if ci=255 then ci := co;
if co=255 then co := ci;
end;

procedure readinput(var li:longint; var pib,nib:word); { reads blok from input}
var maxr : word;
begin;
move(bufi^[pib],bufi^[0],nib-pib);
nib := nib-pib;  pib := 0;
maxr := BMAX - nib;
if maxr > li then maxr := li;
dec(li,maxr);
blockread(fi,bufi^[nib],maxr);
inc(nib,maxr);
end;

procedure writeoutput(var nob:word);    { writes block to output}
begin;
{$I-}
blockwrite(fo,bufo^[0],nob);
{$I+}
if IOresult > 0 then
  begin; writeln('Write device error - space ?'); halt(6); end;
nob := 0;
end;

procedure transinput(var pib:word; nib:word;
                     var ptri:string; var ntb:word);
var i     : word;
    j     : byte;
    ptrt  : string;
begin;  ptrt := ptri; ptrt[0] := #0;
for i:=pib to nib-1 do  buft^[i-pib] := ord(ptrt[bufi^[i]]);
oldpib := pib;
ntb := nib-pib;
pib := nib;
end;

procedure transoutput(var ntb:word; var ptro:string);
var i     : word;
    j     : byte;
    ptrt  : string;
begin;  ptrt := ptro; ptrt[0] := #0;
for i:=0 to ntb-1 do
  begin;
  bufo^[i] := ord(ptrt[buft^[i]]);
  if bufo^[i] = 0 then
    begin; inc(wcount);
    if ci = 0 then  bufo^[i] := buft^[i]
    else  bufo^[i] := bufi^[i+oldpib];
    for j:=128 to 255 do  if bufo^[i] = ord(ptrt[j]) then inc(ocount);
    end;
  end;
{$I-}
blockwrite(fo,bufo^[0],ntb); ntb := 0;
{$I+}
if IOresult > 0 then
  begin; writeln('Write device error - space ?'); halt(6); end;
end;

procedure texoutput(var ntb:word);
var i, j  : word;
    k, l  : integer;
begin;   j := 0;
for i := 0 to ntb-1 do
  begin; k := buft^[i];
  if (k = 0) and (ci > 0) then  begin k := bufi^[i+oldpib]; inc(icount); end;
  l := ord(reftex[k,0]);
  move(reftex[k,1],bufo^[j],l);
  inc(j,l);
  if j > BMAX - TMAX then
    begin;
    {$I-}
    blockwrite(fo,bufo^[0],j);
    {$I+}
    if IOresult > 0 then
      begin; writeln('Write device error - space ?'); halt(6); end;
    j := 0;
    end;
  end;
if j > 0 then
  begin;   {$I-}
  blockwrite(fo,bufo^[0],j);
  {$I+}
  if IOresult > 0 then
    begin; writeln('Write device error - space ?'); halt(6); end;
  end;
ntb := 0 ;
end;

procedure texinput(var pib,nib,ntb:word);

  function copybufi(var pib,nib,ntb:word) : boolean;
  var pia   : word;
	begin;  if ( li > 0 ) and ( pib > nib - 100 ) then
		begin; copybufi := false; exit;
		end;
  pia := pib;
  while (pia<nib) and not (bufi^[pia] in STOPCHARS) do inc(pia);
  if (pia>pib) and bracket then
     begin; buft^[ntb] := ord('{'); inc(ntb); bracket := false; end;
  move(bufi^[pib],buft^[ntb],pia-pib);
  inc(ntb,pia-pib);
  pib := pia;
  if pib <> nib  then copybufi := true
  else copybufi := false;
  end;

  function scancs(var pib,nib:word;  var lcs: byte) : string;
  var auxs : string;
      pia  : word;
  begin;  {if (li > 0) and (pib > nib - 100) then
  readinput(li,pib,nib); }
  auxs := chr(bufi^[pib]);  inc(pib);
  if pib < nib then  begin;  auxs := auxs + chr(bufi^[pib]);  inc(pib);  end;
  if ord(auxs[2]) in LETTERS then
    while (pib<nib) and (bufi^[pib] in LETTERS) do
      begin;  auxs := auxs + chr(bufi^[pib]);  inc(pib);  end;
  lcs := length(auxs);
  while (pib<nib) and (chr(bufi^[pib]) in SPACES) do
    begin;  auxs := auxs + chr(bufi^[pib]);  inc(pib);  end;
  scancs := auxs;
  end;

  function scanno(var pib,nib:word;  var lno: byte) : string;
  var auxs : string;
  begin;
  if pib < nib then
    case bufi^[pib] of
      ord(''''): begin;  auxs := ''''; inc(pib);
            while (pib<nib) and (bufi^[pib] in OCTAL) do
               begin; auxs := auxs + chr(bufi^[pib]);  inc(pib);  end;
            end;
      ord('"') : begin;  auxs := ''''; inc(pib);
            while (pib<nib) and (bufi^[pib] in HEXA) do
               begin; auxs := auxs + chr(bufi^[pib]);  inc(pib);  end;
            end;
      else  begin;  auxs := '';
            while (pib<nib) and (bufi^[pib] in DIGITS) do
               begin; auxs := auxs + chr(bufi^[pib]);  inc(pib);  end;
            end;
      end;
  lno := length(auxs);
  while (pib<nib) and (chr(bufi^[pib]) in SPACES) do
    begin;  auxs := auxs + chr(bufi^[pib]);  inc(pib);  end;
  scanno := auxs;
  end;

  function scanch(var pib,nib:word;  var ch: char) : string;
  var auxs : string;
  begin;  ch := #0;  auxs := '';
  if pib < nib then
    begin;  auxs := chr(bufi^[pib]); inc(pib);
    if auxs[1] = '{' then
      begin;
      if pib<nib then begin; auxs := auxs + chr(bufi^[pib]); inc(pib); end;
      if pib<nib then begin; auxs := auxs + chr(bufi^[pib]); inc(pib); end;
      if (auxs[1] = '{') and (auxs[3] = '}') then ch := auxs[2];
      if (auxs[1] = '{') and (auxs[2] = '\') then
        begin;
        if pib<nib then begin; auxs := auxs + chr(bufi^[pib]); inc(pib); end;
        if (length(auxs)=4) and (auxs[4]='}') then ch := auxs[3];
        end;
      end
    else
      if auxs[1] = '\' then
        begin;
        if pib<nib then begin; auxs := auxs + chr(bufi^[pib]); inc(pib); end;
        if ((pib<nib) and not (bufi^[pib] in LETTERS)) or (pib=nib) then
          ch := auxs[2];
        end
       else  ch :=auxs[1];
    end;
  scanch := auxs;
  end;

  function valno(snum:string):byte;
  var base,i,j,k:byte;
      ch        :char;
  begin;
  case snum[1] of
     '''' : begin; base := 8;   delete(snum,1,1);
            end;
     '"'  : begin; base := 16;  delete(snum,1,1);
            end;
     else   begin; base := 10;
            end;
     end;
  i:=0;
  for j:=1 to length(snum) do
    begin;  ch := upcase(snum[j]);
    if ord(ch) > 64 then  i := i * base + ord(ch) - ord('A') + 10
    else i := i * base + ord(ch) - ord('0');
    end;
  valno := i;
  end;

  procedure movebracket(var ntb: word);
  begin;
  if bracket then  begin; buft^[ntb] := ord('{'); inc(ntb);  end;
  bracket := false;
  end;

  function putchars(var ntb : word; var cs: string; caseno: byte;var ch: char)
                                                                     : boolean;

  var onechar : char;

     procedure copycs;
     begin;  movebracket(ntb);
     move(cs[1],buft^[ntb],ord(cs[0])); inc(ntb,ord(cs[0])-1);
     putchars := false;
     end;

  begin;  putchars := true;
  case caseno of
     1 : case ch of                 { \accent23 }
           'u' : onechar := '';           'U' : onechar := '';
           else  copycs;
           end;
     2 : case ch of                 { \v }
           'c' : onechar := '';           'C' : onechar := '';
           'd' : onechar := '';           'D' : onechar := '';
           'e' : onechar := '';           'E' : onechar := '';
           'l' : onechar := '';           'L' : onechar := '';
           'n' : onechar := '';           'N' : onechar := '';
           'r' : onechar := '';           'R' : onechar := '';
           's' : onechar := '';           'S' : onechar := '';
           't' : onechar := '';           'T' : onechar := '';
           'z' : onechar := '';           'Z' : onechar := '';
           else  copycs;
           end;
     3 : case ch of                 { \' }
           'a' : onechar := '';           'A' : onechar := '';
           'e' : onechar := '';           'E' : onechar := '';
           'i' : onechar := '';           'I' : onechar := '';
           'l' : onechar := '';           'L' : onechar := '';
           'o' : onechar := '';           'O' : onechar := '';
           'r' : onechar := '';           'R' : onechar := '';
           'u' : onechar := '';           'U' : onechar := '';
           'y' : onechar := '';           'Y' : onechar := '';
           else  copycs;
           end;
     4 : case ch of                 { \" }
           'u' : onechar := '';           'U' : onechar := '';
           'a' : onechar := '';           'A' : onechar := '';
           'o' : onechar := '';           'O' : onechar := '';
           else  copycs;
           end;
     5 : case ch of                 { \^ }
           'o' : onechar := '';           'O' : onechar := '';
           else  copycs;
           end;
    21 : onechar := '';
    22 : onechar := '';
    23 : onechar := '';
    24 : onechar := '';
    end;
  ch := onechar;
  end;

  procedure ignoregroup(var pib,nib:word; var cs:string);
  begin;
  if (pib < nib-1) and (chr(bufi^[pib]) = '{') and (chr(bufi^[pib+1]) = '}') then
    begin;  cs := cs + '{}';
    inc(pib,2);
    end;
  end;

var auxst  : string;
    accno  : string;
    chars  : string;
    lno    : byte;
    ch     : char;

begin;
while copybufi(pib,nib,ntb) do
	begin;  caseno := 0; scannedcs := false;
  case bufi^[pib] of
{    ord('{') :  begin; bracket := true;  inc(pib);  end;}
    ord('\') :  begin;  cs := scancs(pib,nib,lcs);
                auxst := copy(cs,1,lcs);
                if auxst='\accent' then
                  begin; accno := scanno(pib,nib,lno);
                  cs := cs + accno;
                  lcs := length(cs);
                  case valno(copy(accno,1,lno)) of

										 23 : caseno := 1;
                     20 : caseno := 2;
                     18 : caseno := 3;
                    127 : caseno := 4;
                     94 : caseno := 5;
                     else  begin;  movebracket(ntb);
                           move(cs[1],buft^[ntb],lcs); inc(ntb,lcs);
                           end;
                     end;
                  end
                else if auxst='\v'   then   caseno := 2
                else if auxst='\'''  then   caseno := 3
                else if auxst='\"'   then   caseno := 4
                else if auxst='\^'   then   caseno := 5
                else if auxst=CZED   then   caseno := 21
                else if auxst=CZET   then   caseno := 22
                else if auxst=SLOSL  then   caseno := 23
                else if auxst=SLOLL  then   caseno := 24
                else     begin; movebracket(ntb); lcs := length(cs);
                         move(cs[1],buft^[ntb],lcs); inc(ntb,lcs);
                         end;
                if (caseno>0) and (caseno<20) then
                  begin;  chars := scanch(pib,nib,ch);
                  cs := cs + chars;
                  end;
                if caseno>0 then
                  begin;
                  if putchars(ntb,cs,caseno,ch) then
                    begin;
                    if bracket and (pib < nib ) and (bufi^[pib] = ord('}')) then
                      begin; buft^[ntb] := ord(ch); inc(ntb); inc(pib); end
                    else
                      begin; movebracket(ntb);
                      buft^[ntb] := ord(ch); inc(ntb);
                      end;
                    end;
                  bracket := false;
                  ignoregroup(pib,nib,cs);
                  end;
                end;
    else        begin;  movebracket(ntb);
                buft^[ntb] := bufi^[pib]; inc(ntb); inc(pib);
                end;
    end;
  end;
end;

procedure ucconv(ntb:word);
var i : word;
begin;  uccodet[0] := #0;
for i:=0 to ntb-1 do  buft^[i] := ord(uccodet[buft^[i]]);
end;

procedure lcconv(ntb:word);
var i : word;
begin;  lccodet[0] := #0;
for i:=0 to ntb-1 do  buft^[i] := ord(lccodet[buft^[i]]);
end;

procedure asconv(ntb:word);
var i : word;
begin;  ascodet[0] := #0;
for i:=0 to ntb-1 do  buft^[i] := ord(ascodet[buft^[i]]);
end;

begin;  new(bufi);  new(bufo);  new(buft);
commandline(ci,nfi,co,nfo);   { fileexists = true if output file exists }
case ci of
   0 : ;
   1 : ptri := ascii + kamr;
   2 : ptri := ascii + pclr;
   3 : ptri := ascii + isor;
   4 : ptri := ascii + koir;
   5 : ptri := ascii + ibmr;
   6 : ptri := ascii + corr;
   else typeinfo(2);
   end;
case co of
   0 : ;
   1 : ptro := ascii + rkam;
   2 : ptro := ascii + rpcl;
   3 : ptro := ascii + riso;
   4 : ptro := ascii + rkoi;
   5 : ptro := ascii + ribm;
   6 : ptro := ascii + rcor;
   else typeinfo(2);
   end;
assign(fi,nfi);  reset(fi,1);
assign(fo,nft);  {$I-} rewrite(fo,1); {$I+}
if IOresult > 0 then
  begin;
  writeln('Write device error - cannot create the output file.');
  halt(6);
  end;
li := filesize(fi);                 { unread length of input file }
nib := 0;                           { points next free byte in input buffer}
ntb := 0;                           { points next free byte in temporary buffer}
pib := 0;                           { points next unread byte in input buffer}
bracket := false;
if co = 0 then
  begin;
  auxs := DEFCZEDA+CRLF; move(auxs[1],buft^[ntb],length(auxs)); inc(ntb,length(auxs));
  auxs := DEFCZEDB+CRLF; move(auxs[1],buft^[ntb],length(auxs)); inc(ntb,length(auxs));
  auxs := DEFCZETA+CRLF; move(auxs[1],buft^[ntb],length(auxs)); inc(ntb,length(auxs));
  auxs := DEFCZETB+CRLF; move(auxs[1],buft^[ntb],length(auxs)); inc(ntb,length(auxs));
  auxs := DEFSLOSL+CRLF; move(auxs[1],buft^[ntb],length(auxs)); inc(ntb,length(auxs));
  auxs := DEFSLOLL+CRLF; move(auxs[1],buft^[ntb],length(auxs)); inc(ntb,length(auxs));
  auxs := ENDSPEC+CRLF+CRLF; move(auxs[1],buft^[ntb],length(auxs)); inc(ntb,length(auxs));
  texoutput(ntb);
  end;
if (li > 0) then readinput(li,pib,nib);
if not noncheck then
  begin; j := 0; auxc := 1;
	while (auxc < 10) and not noncheck do
    begin; auxs := '';
		while(length(auxs) < 254) and (bufi^[j] <> ord(CR)) do
			begin; auxs := auxs + chr(bufi^[j]); inc(j);
			end;
		inc(j,2);
    inc(auxc);
    if auxs = ENDSPEC then
      begin;
      pib := j;
      if bufi^[j] = ord(CR) then inc(pib,2);
      noncheck := true;
      end;
    end;
  end;
while (li > 0) or (pib < nib) do
  begin;
  if (li > 0) then readinput(li,pib,nib);
  if ci > 0 then transinput(pib,nib,ptri,ntb)  { bufi -> buft }
	else
    texinput(pib,nib,ntb);
  if not lccase and uccase then ucconv(ntb);
  if not uccase and lccase then lcconv(ntb);
  if ascase then asconv(ntb);
	if co > 0 then
		transoutput(ntb,ptro)      { buft -> bufo }
	else
		texoutput(ntb);
  end;
close(fi);  close(fo);
if wcount > 0 then
  begin;
  writeln('Warning: ',wcount,' non-alphabet character(s) in input file.'+CRLF,
           '        (They are unchanged.)');
  caseno := 5;
  end
else caseno := 0;
if (icount + ocount = 0)  then
  if fileexists then
    begin;  assign(fi,nfo);
    erase(fi);
    rename(fo,nfo);
    end
  else rename(fo,nfo)
else
  begin;
  if ocount > 0 then
    writeln('ERROR !! ',ocount,
           ' input non-alphabet character(s) will look like some '+CRLF,
           '       alphabet character(s) in the output file !!!'+CRLF,
           '       (Reverse conversion doesn''t give the original.)');
  if icount > 0 then
    begin;
    writeln('ERROR !! ',icount,
           ' input character(s) above 127 cannot be converted '+CRLF,
           '       to \TeX sequences !!');
    end;
  writeln('Converted file is saved in  '+NFTNAME);
  caseno := 6;
  end;
halt(caseno);
end.
