TCharacterSurrogates (C++)
Description
This example demonstrates a routine which can be used to convert an UTF16 string to a UCS4 string. This routine uses the functionality exposed by the TCharacter class.
Code
UCS4Char* __fastcall ConvertStringToUTF32(String s, int& outLen)
{
UCS4Char c4;
UCS4Char* result = NULL;
/* Set the result at length of S + \0 terminator */
result = new UCS4Char[s.Length() + 1];
if (s.Length() == 0)
return result;
/* Start the conversion */
int i = 1;
int next = 0;
while (i <= s.Length())
{
if (TCharacter::IsSurrogate(s, i))
{
/*
The character at position I is a surrogate, this
means that I and I+1 must be a surrogate pair.
*/
if (!TCharacter::IsSurrogatePair(s, i))
throw new EConvertError("Bad UTF16 input string!");
/* S[I] -> high surrogate and S[I+1] -> low surrogate */
if (TCharacter::IsHighSurrogate(s, i) &&
TCharacter::IsLowSurrogate(s, i + 1))
{
/* Create the UCS4 chacarter from the pair */
c4 = TCharacter::ConvertToUtf32(s, i);
/* Skip one more char */
i++;
} else
throw new EConvertError("Bad UTF16 input string!");
} else
c4 = TCharacter::ConvertToUtf32(s, i); // Create the UCS4 chacarter from the UTF16 char
/* Add the character */
result[next++] = c4;
/* Increase positions */
i++;
}
/* Add a trailing \0 */
result[next] = 0;
outLen = next + 1;
return result;
}