Talk:System.Classes.TStrings.LoadFromFile

From RAD Studio API Documentation
Jump to: navigation, search

When loading extremely large files with LoadFromFile, Windows memory can become fragmented. For example, loading a 265MB file might work once but not twice in a row.

Here is some code that can be used to avoid the EOutOfMemory exception.

type
  TStringsIO = class
  private
    class procedure AddText(Strings: TStrings; const Text: string);
  public
    class procedure LoadFromFile(Strings: TStrings; const FileName: string); overload;
    class procedure LoadFromFile(Strings: TStrings; const FileName: string;
      Encoding: TEncoding); overload;
    class procedure LoadFromStream(Strings: TStrings; Stream: TStream); overload;
    class procedure LoadFromStream(Strings: TStrings; Stream: TStream;
      Encoding: TEncoding); overload;
  end;

class procedure TStringsIO.AddText(Strings: TStrings; const Text: string);
var
  S: TStrings;
begin
  S := TStringList.Create;
  try
    S.Text := Text;
    Strings.AddStrings(S);
  finally
    S.Free;
  end;
end;

class procedure TStringsIO.LoadFromFile(Strings: TStrings;
  const FileName: string);
begin
  LoadFromFile(Strings, FileName, nil);
end;

class procedure TStringsIO.LoadFromFile(Strings: TStrings;
  const FileName: string; Encoding: TEncoding);
var
  Stream: TStream;
begin
  Stream := TFileStream.Create(FileName, fmOpenRead or fmShareDenyWrite);
  try
    LoadFromStream(Strings, Stream, Encoding);
  finally
    Stream.Free;
  end;
end;

class procedure TStringsIO.LoadFromStream(Strings: TStrings; Stream: TStream);
begin
  LoadFromStream(Strings, Stream, nil);
end;

class procedure TStringsIO.LoadFromStream(Strings: TStrings; Stream: TStream;
  Encoding: TEncoding);
const
  MaxBufferSize = 128 * 1024;
  HalfBufferSize = MaxBufferSize div 2;
var
  Size: Int64;
  Buffer: TBytes;
  BufferSize: Integer;
  ReadSize: Integer;
  ExtractIndex: Integer;
  ExtractSize: Integer;
  UnprocessedSize: Integer;
begin
  Strings.Clear;
  SetLength(Buffer, MaxBufferSize);
  BufferSize := 0;

  while True do
  begin
    Size := Stream.Size - Stream.Position;
    if Size <= 0 then
      Break;

    // fill the buffer
    ReadSize := Min(Size, MaxBufferSize - BufferSize);
    Stream.Read(Buffer[BufferSize], ReadSize);
    Inc(BufferSize, ReadSize);

    // detect an encoding and skip BOM
    if Encoding = nil then
      ExtractIndex := TEncoding.GetBufferEncoding(Buffer, Encoding)
    else
      ExtractIndex := 0;

    ExtractSize := BufferSize;
    if ReadSize < Size then
    begin
      // postpone processing of the last incomplete line
      while (ExtractSize > HalfBufferSize) and
        (Buffer[ExtractSize - 1] <> 10) do
        Dec(ExtractSize);
    end;

    // parse and add complete lines
    if ExtractSize > 0 then
      AddText(Strings, Encoding.GetString(Buffer, ExtractIndex,
        ExtractSize - ExtractIndex));

    // move unprocessed buffer
    UnprocessedSize := BufferSize - ExtractSize;
    if UnprocessedSize > 0 then
      Move(Buffer[ExtractSize], Buffer[0], UnprocessedSize);
    BufferSize := UnprocessedSize;
  end;
end;

Usage

TStringsIO.LoadFromFile(MyStringList, 'd:\bigfile.txt');