unit HTMLReplace;

{Search and Replace in HTML files}

interface

uses SysUtils,Dialogs;

procedure ReplaceStringsHTML(FileName,_OldStr,_NewStr:string;Options:TFindOptions);
{this procedure takes an HTML file stored at FileName, searches for all occurrences
of _OldStr and replaces them with the string _NewStr
Options:TFindOptions is only used to determine if replace is case sensitive
The procedure replaces texts interrupted by markup but does not iterate over
subframes. Global variables are used instead of function parameters to increase
the speed of replace procedures.
You are free to use this code. If you find an error please write to bugs(AT)supermemo(.)com

This code is part of SuperMemo 2001 source code
Written at SuperMemo R&D, December 2001
Used in TWeb component derived from TWebBrowser employed in HTML-based incremental reading
Replacing 2000 texts in a 500K file takes about 1 second. The same file takes 11 seconds
to load to Internet Explorer 6.0}
implementation

uses Basic,Files,Dial;

{those units can be removed and simple replacement procedures can be used:
const nl=#13#10
EError = ShowMessage
ReadStringFromTXTFile and WriteStringToTXTFile are simple to write and available on request}

type TMatchRecord=record
     StartPos:integer;
     MatchPos:byte;
     ReplaceStr:string;
     end;

{Global variables are used instead of function parameters to increase the speed}
var MatchRecs:array of TMatchRecord;
    OldPosit,NewPosit:integer;
    MatchRecsNo:byte;
    OldText,NewText,OldStr,NewStr:string;
    CaseSensitive:boolean;

procedure AddNewMatch;
begin
  inc(MatchRecsNo);
  SetLength(MatchRecs,MatchRecsNo);
  MatchRecs[MatchRecsNo-1].StartPos:=NewPosit-1;
  MatchRecs[MatchRecsNo-1].MatchPos:=0;
  MatchRecs[MatchRecsNo-1].ReplaceStr:='';
end;

procedure DeleteMatch(MatchNo:byte);
var m:byte;
begin
  for m:=MatchNo to MatchRecsNo-1 do
      MatchRecs[m-1]:=MatchRecs[m];
  dec(MatchRecsNo);
  {SetLength(MatchRecsNo) is not used to speed up the process}
end;

procedure ReplaceMatch(MatchNo:byte);
var strg:string;
    dif:integer;
begin
  NewText:=copy(NewText,1,MatchRecs[MatchNo-1].StartPos-1); {copy the correct part of the new text}
  NewText:=NewText+MatchRecs[MatchNo-1].ReplaceStr; {copy the replacement text}
  dif:=length(NewStr)-length(OldStr);
  if dif>0 then begin {add the rest of new new string if it is longer than the old one}
     strg:=copy(NewStr,length(OldStr)+1,dif);
     NewText:=NewText+strg;
     end;
  NewPosit:=length(NewText)+1;
end;

procedure AugmentMatch(MatchNo:byte;ch:char);
begin
  with MatchRecs[MatchNo-1] do begin
     if MatchPos<=length(NewStr) then
        ReplaceStr:=ReplaceStr+NewStr[MatchPos];
     if MatchPos=length(OldStr) then begin {full match found}
        ReplaceMatch(MatchNo); {replace the match text with NewStr}
        DeleteMatch(MatchNo); {delete the successful match}
        end;
     end;
end;

procedure CheckMatch(MatchNo:byte;ch:char);
var Equal:boolean;
begin
  with MatchRecs[MatchNo-1] do begin
     inc(MatchPos);
     if CaseSensitive then
        Equal:=OldStr[MatchPos]=ch
     else
        Equal:=UpCase(OldStr[MatchPos])=UpCase(ch);
     if Equal then
        AugmentMatch(MatchNo,ch) {keep on adding chars to a match that is successful}
     else
        DeleteMatch(MatchNo); {delete a match that failed}
     end;
end;

procedure CheckMatches(ch:char);
var m:byte;
    Equal:boolean;
begin
  NewText:=NewText+ch;
  inc(NewPosit);
  if CaseSensitive then
     Equal:=ch=OldStr[1]
  else
     Equal:=UpCase(ch)=UpCase(OldStr[1]);
  if Equal then
     AddNewMatch; {if the first character matches the search string, create a new match}
  for m:=MatchRecsNo downto 1 do {DeleteMatch renumbers matches - this is why the downward checkup}
      CheckMatch(m,ch); {delete all matches that fail on this character}
end;

procedure AddToMatches(ch:char); {copy character to all match instances}
var m:byte;
begin
  for m:=1 to MatchRecsNo do
      MatchRecs[m-1].ReplaceStr:=MatchRecs[m-1].ReplaceStr+ch;
  NewText:=NewText+ch;
  Inc(NewPosit);
end;

procedure PrepareString(var TheString:string);
begin
  ReplaceString(TheString,'&','&amp;'); {replace all & with &amp; in TheString}
  ReplaceString(TheString,'<','&lt;');
  ReplaceString(TheString,'>','&gt;');
end;

procedure ReplaceStringsHTML(FileName,_OldStr,_NewStr:string;Options:TFindOptions);
var InMarkup:boolean;
    ch:char;
begin
  try
     OldStr:=_OldStr;
     NewStr:=_NewStr;
     PrepareString(OldStr);
     PrepareString(NewStr);
     OldText:=Files.ReadStringFromTXTFile(FileName); {read the file into a string}
     NewText:='';
     OldPosit:=1;
     NewPosit:=1;
     MatchRecsNo:=0;
     InMarkup:=false;
     CaseSensitive:=(frMatchCase in Options);
     while OldPosit<=length(OldText) do begin
       ch:=OldText[OldPosit];
       if ch='<' then
          InMarkup:=true;
       if InMarkup then
          AddToMatches(ch) {copy markup literally}
       else
          CheckMatches(ch); {replace texts that match the search string}
       if ch='>' then
          InMarkup:=false;
       inc(OldPosit);
       end;
     Files.WriteStringToTXTFile(FileName,NewText); {write the string back to the original file}
     MatchRecs:=nil;
  except
    on E:Exception do EError('Error replacing texts in HTML file'+nl+FileName,E);
    end;
end;

end.