CharacterSurrogates (Delphi)
Contents
Description
This example demonstrates a routine which can be used to convert an UTF16 string to a UCS4 string. This routine uses the functionality exposed by the Character unit.
Code
function ConvertStringToUTF32(const S: String): UCS4String;
var
I, Next: Integer;
C4: UCS4Char;
begin
{ Set the result at length of S + (one #0 terminator) }
SetLength(Result, Length(S) + 1);
if Length(S) = 0 then
Exit;
{ Start the conversion }
I := 1;
Next := 0;
while I <= Length(S) do
begin
if IsSurrogate(S, I) then
begin
{
The character at position I is a surrogate, this
means that I and I+1 must be a surrogate pair.
}
if not IsSurrogatePair(S, I) then
raise EConvertError.Create('Bad UTF16 input string!');
{ S[I] -> high surrogate and S[I+1] -> low surrogate }
if (IsHighSurrogate(S, I)) and (IsLowSurrogate(S, I + 1)) then
begin
{ Create the UCS4 chacarter from the pair }
C4 := ConvertToUtf32(S, I);
{ Skip one more char }
Inc(I);
end
else
raise EConvertError.Create('Bad UTF16 input string!');
end else
C4 := ConvertToUtf32(S, I); { Create the UCS4 chacarter from the UTF16 char }
{ Add the character }
Result[Next] := C4;
{ Increase positions }
Inc(Next);
Inc(I);
end;
{ Adjust size }
if Length(Result) <> (Next + 1) then
SetLength(Result, Next + 1);
{ Add a trailing \0 }
Result[Next] := 0;
end;
Uses
- System.UCS4Char ( fr | de | ja )
- System.SysUtils.EConvertError ( fr | de | ja )
- System.Character.IsSurrogate ( fr | de | ja )
- System.Character.IsSurrogatePair ( fr | de | ja )
- System.Character.IsHighSurrogate ( fr | de | ja )
- System.Character.IsLowSurrogate ( fr | de | ja )
- System.Character.ConvertToUtf32 ( fr | de | ja )