Skip to content

Commit

Permalink
feat: Tests for UTF16/UTF32 with BOM
Browse files Browse the repository at this point in the history
Clean-up code to use `TBytes` instead of String
  • Loading branch information
gcarreno committed Jan 15, 2024
1 parent 06e8c59 commit d393f64
Show file tree
Hide file tree
Showing 3 changed files with 103 additions and 119 deletions.
13 changes: 8 additions & 5 deletions src/text/opp.text.pas
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,14 @@ interface
;

const
cBOMUTF8 : String = #$EF#$BB#$BF; // EF BB BF
cBOMUTF16BE : String = #$FE#$FF; //FE FF
cBOMUTF16LE : String = #$FF#$FE; //FF FE
cBOMUTF32BE : String = #$00#$00#$FE#$FF; //00 00 FE FF
cBOMUTF32LE : String = #$00#$00#$FF#$FE; //00 00 FF FE
cBOMUTF8 : TBytes = ($EF, $BB, $BF);
cBOMUTF16BE : TBytes = ($FE, $FF);
cBOMUTF16LE : TBytes = ($FF, $FE);
cBOMUTF32BE : TBytes = ($00, $00, $FE, $FF);
cBOMUTF32LE : TBytes = ($00, $00, $FF, $FE);
cBOMUTF8Len = 3;
cBOMUTF16Len = 2;
cBOMUTF32Len = 4;

type
{ TTextFileType }
Expand Down
151 changes: 40 additions & 111 deletions src/text/opp.text.sourcefile.pas
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,7 @@ implementation

constructor TTextSourceFile.Create(const AFileName: String);
var
buffer: Byte;
bytesread: Int64;
buffer: TBytes;
BOMTest: String;
begin
FSourceFileStream:= nil;
Expand All @@ -74,137 +73,67 @@ constructor TTextSourceFile.Create(const AFileName: String);

FFileType:= tftUnknown;
FHasBOM:= False;
buffer:= 0;

// For UTF32
FSourceFileStream.Position:= 0; // Just in case
BOMTest:= EmptyStr;
if FSourceFileStream.Size >= 4 then
begin
bytesread:= FSourceFileStream.Read(buffer, SizeOf(buffer));
if bytesRead = 0 then raise ETextSourceFilePrematureEOF.Create(
Format(
rsETextSourceFilePrematureEOF,
[ FFilename ]
)
);
BOMTest:= BOMTest + Char(buffer);

bytesread:= FSourceFileStream.Read(buffer, SizeOf(buffer));
if bytesRead = 0 then raise ETextSourceFilePrematureEOF.Create(
Format(
rsETextSourceFilePrematureEOF,
[ FFilename ]
)
);
BOMTest:= BOMTest + Char(buffer);

bytesread:= FSourceFileStream.Read(buffer, SizeOf(buffer));
if bytesRead = 0 then raise ETextSourceFilePrematureEOF.Create(
Format(
rsETextSourceFilePrematureEOF,
[ FFilename ]
)
);
BOMTest:= BOMTest + Char(buffer);

bytesread:= FSourceFileStream.Read(buffer, SizeOf(buffer));
if bytesRead = 0 then raise ETextSourceFilePrematureEOF.Create(
Format(
rsETextSourceFilePrematureEOF,
[ FFilename ]
)
);
BOMTest:= BOMTest + Char(buffer);

if BOMTest = cBOMUTF32BE then
begin
FFileType:= tftUTF32BE;
FHasBOM:= True;
end
else
if BOMTest = cBOMUTF16LE then
begin
FFileType:= tftUTF32LE;
FHasBOM:= True;
end
end;

// For UTF8
FSourceFileStream.Position:= 0; // Just in case
BOMTest:= EmptyStr;
if FSourceFileStream.Size >= 3 then
if (FFileType = tftUnknown) and (FSourceFileStream.Size >= 3) then
begin
bytesread:= FSourceFileStream.Read(buffer, SizeOf(buffer));
if bytesRead = 0 then raise ETextSourceFilePrematureEOF.Create(
Format(
rsETextSourceFilePrematureEOF,
[ FFilename ]
)
);
BOMTest:= BOMTest + Char(buffer);

bytesread:= FSourceFileStream.Read(buffer, SizeOf(buffer));
if bytesRead = 0 then raise ETextSourceFilePrematureEOF.Create(
Format(
rsETextSourceFilePrematureEOF,
[ FFilename ]
)
);
BOMTest:= BOMTest + Char(buffer);

bytesread:= FSourceFileStream.Read(buffer, SizeOf(buffer));
if bytesRead = 0 then raise ETextSourceFilePrematureEOF.Create(
Format(
rsETextSourceFilePrematureEOF,
[ FFilename ]
)
);
BOMTest:= BOMTest + Char(buffer);

if Copy(BOMTest, 1, 3) = cBOMUTF8 then
FSourceFileStream.Position:= 0; // Just in case
BOMTest:= EmptyStr;

SetLength(buffer, cBOMUTF8Len);
FSourceFileStream.Read(buffer[0], cBOMUTF8Len);

if CompareByte(buffer[0], cBOMUTF8[0], cBOMUTF8Len) = 0 then
begin
FFileType:= tftUTF8;
FHasBOM:= True;
end;
end;

// For UTF16
FSourceFileStream.Position:= 0; // Just in case
BOMTest:= EmptyStr;
if FSourceFileStream.Size >= 2 then
if (FFileType = tftUnknown) and (FSourceFileStream.Size >= 2) then
begin
bytesread:= FSourceFileStream.Read(buffer, SizeOf(buffer));
if bytesRead = 0 then raise ETextSourceFilePrematureEOF.Create(
Format(
rsETextSourceFilePrematureEOF,
[ FFilename ]
)
);
BOMTest:= BOMTest + Char(buffer);

bytesread:= FSourceFileStream.Read(buffer, SizeOf(buffer));
if bytesRead = 0 then raise ETextSourceFilePrematureEOF.Create(
Format(
rsETextSourceFilePrematureEOF,
[ FFilename ]
)
);
BOMTest:= BOMTest + Char(buffer);

if Copy(BOMTest, 1, 2) = cBOMUTF16BE then
FSourceFileStream.Position:= 0; // Just in case
BOMTest:= EmptyStr;

SetLength(buffer, cBOMUTF16Len);
FSourceFileStream.Read(buffer[0], cBOMUTF16Len);

if CompareByte(buffer[0], cBOMUTF16BE[0], cBOMUTF16Len) = 0 then
begin
FFileType:= tftUTF16BE;
FHasBOM:= True;
end
else
if Copy(BOMTest, 1, 2) = cBOMUTF16LE then
if CompareByte(buffer[0], cBOMUTF16LE[0], cBOMUTF16Len) = 0 then
begin
FFileType:= tftUTF16LE;
FHasBOM:= True;
end;
end;

// For UTF32
if (FFileType = tftUnknown) and (FSourceFileStream.Size >= 4) then
begin
FSourceFileStream.Position:= 0;
BOMTest:= EmptyStr;

SetLength(buffer, cBOMUTF32Len);
FSourceFileStream.Read(buffer[0], cBOMUTF32Len);

if CompareByte(buffer[0], cBOMUTF32BE[0], cBOMUTF32Len) = 0 then
begin
FFileType:= tftUTF32BE;
FHasBOM:= True;
end
else
if CompareByte(buffer[0], cBOMUTF32LE[0], cBOMUTF32Len) = 0 then
begin
FFileType:= tftUTF32LE;
FHasBOM:= True;
end;
end;

case FFileType of
tftUnknown: begin
{ #todo 999 -ogcarreno : We are assuming this for the time being }
Expand Down
58 changes: 55 additions & 3 deletions tests/text/testobjectpascalparsertextsourcefile.pas
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ TTestObjectPascalParserTextSourceFile= class(TTestCase)
procedure TestObjectPascalParserTextSourceGetNextCharAnsi;
procedure TestObjectPascalParserTextSourceGetNextCharUTF8;
procedure TestObjectPascalParserTextSourceGetNextCharBOMUTF8;
procedure TestObjectPascalParserTextSourceGetNextCharBOMUTF16BE;
procedure TestObjectPascalParserTextSourceGetNextCharBOMUTF16LE;
procedure TestObjectPascalParserTextSourceGetNextCharBOMUTF32BE;
procedure TestObjectPascalParserTextSourceGetNextCharBOMUTF32LE;
end;

implementation
Expand All @@ -40,9 +44,13 @@ implementation
;

const
cSourceFileContentAnsi = 'program';
cSourceFileContentUTF8 = 'program Test🌟';
cSourceFileContentBOMUTF8 = #$EF#$BB#$BF'program Test🌟';
cSourceFileContentAnsi = 'program';
cSourceFileContentUTF8 = 'program Test🌟';
cSourceFileContentBOMUTF8 = #$EF#$BB#$BF'program Test🌟';
cSourceFileContentBOMUTF16BE = #$FE#$FF;
cSourceFileContentBOMUTF16LE = #$FF#$FE;
cSourceFileContentBOMUTF32BE = #$00#$00#$FE#$FF;
cSourceFileContentBOMUTF32LE = #$00#$00#$FF#$FE;

procedure TTestObjectPascalParserTextSourceFile.TestSourceFileCreateException;
begin
Expand Down Expand Up @@ -311,6 +319,50 @@ procedure TTestObjectPascalParserTextSourceFile.TestObjectPascalParserTextSource
end;
end;

procedure TTestObjectPascalParserTextSourceFile.TestObjectPascalParserTextSourceGetNextCharBOMUTF16BE;
begin
FSourceFile:= TTextSourceFile.Create(DumpToTempFile(cSourceFileContentBOMUTF16BE));
try
AssertEquals('Text Source File is UTF16BE', TextFileTypeToString(tftUTF16BE), TextFileTypeToString(FSourceFile.FileType));
AssertTrue('Text Source File Has BOM', FSourceFile.HasBOM);
finally
FSourceFile.Free;
end;
end;

procedure TTestObjectPascalParserTextSourceFile.TestObjectPascalParserTextSourceGetNextCharBOMUTF16LE;
begin
FSourceFile:= TTextSourceFile.Create(DumpToTempFile(cSourceFileContentBOMUTF16LE));
try
AssertEquals('Text Source File is UTF16LE', TextFileTypeToString(tftUTF16LE), TextFileTypeToString(FSourceFile.FileType));
AssertTrue('Text Source File Has BOM', FSourceFile.HasBOM);
finally
FSourceFile.Free;
end;
end;

procedure TTestObjectPascalParserTextSourceFile.TestObjectPascalParserTextSourceGetNextCharBOMUTF32BE;
begin
FSourceFile:= TTextSourceFile.Create(DumpToTempFile(cSourceFileContentBOMUTF32BE));
try
AssertEquals('Text Source File is UTF32BE', TextFileTypeToString(tftUTF32BE), TextFileTypeToString(FSourceFile.FileType));
AssertTrue('Text Source File Has BOM', FSourceFile.HasBOM);
finally
FSourceFile.Free;
end;
end;

procedure TTestObjectPascalParserTextSourceFile.TestObjectPascalParserTextSourceGetNextCharBOMUTF32LE;
begin
FSourceFile:= TTextSourceFile.Create(DumpToTempFile(cSourceFileContentBOMUTF32LE));
try
AssertEquals('Text Source File is UTF32LE', TextFileTypeToString(tftUTF32LE), TextFileTypeToString(FSourceFile.FileType));
AssertTrue('Text Source File Has BOM', FSourceFile.HasBOM);
finally
FSourceFile.Free;
end;
end;

initialization

RegisterTest(TTestObjectPascalParserTextSourceFile);
Expand Down

0 comments on commit d393f64

Please sign in to comment.