TCharacterSurrogates (C++)

From RAD Studio Code Examples
Jump to: navigation, search

Description

This example demonstrates a routine which can be used to convert an UTF16 string to a UCS4 string. This routine uses the functionality exposed by the TCharacter class.

Code

UCS4Char* __fastcall ConvertStringToUTF32(String s, int& outLen)
{
	UCS4Char c4;
	UCS4Char* result = NULL;

	/* Set the result at length of S + \0 terminator */
	result = new UCS4Char[s.Length() + 1];

	if (s.Length() == 0)
		return result;

	/* Start the conversion */
	int i = 1;
	int next = 0;

	while (i <= s.Length())
	{
		if (TCharacter::IsSurrogate(s, i))
		{
			/*
				The character at position I is a surrogate, this
				means that I and I+1 must be a surrogate pair.
			*/

			if (!TCharacter::IsSurrogatePair(s, i))
				throw new EConvertError("Bad UTF16 input string!");

			/* S[I] -> high surrogate and S[I+1] -> low surrogate */
			if (TCharacter::IsHighSurrogate(s, i) &&
				TCharacter::IsLowSurrogate(s, i + 1))
			{
				/* Create the UCS4 chacarter from the pair */
				c4 = TCharacter::ConvertToUtf32(s, i);
				/* Skip one more char */
				i++;
			} else
				throw new EConvertError("Bad UTF16 input string!");
		} else
			c4 = TCharacter::ConvertToUtf32(s, i); // Create the UCS4 chacarter from the UTF16 char

		/* Add the character */
		result[next++] = c4;

		/* Increase positions */
		i++;
	}

	/* Add a trailing \0 */
	result[next] = 0;
	outLen = next + 1;

	return result;
}

Uses