21 char *&ResultPtr,
const UTF8 *&ErrorPtr) {
22 assert(WideCharWidth == 1 || WideCharWidth == 2 || WideCharWidth == 4);
25 if (WideCharWidth == 1) {
26 const UTF8 *Pos =
reinterpret_cast<const UTF8*
>(Source.
begin());
32 ResultPtr += Source.
size();
34 }
else if (WideCharWidth == 2) {
38 UTF16 *targetStart =
reinterpret_cast<UTF16*
>(ResultPtr);
41 &sourceStart, sourceStart + Source.
size(),
42 &targetStart, targetStart + Source.
size(), flags);
44 ResultPtr =
reinterpret_cast<char*
>(targetStart);
46 ErrorPtr = sourceStart;
47 }
else if (WideCharWidth == 4) {
51 UTF32 *targetStart =
reinterpret_cast<UTF32*
>(ResultPtr);
54 &sourceStart, sourceStart + Source.
size(),
55 &targetStart, targetStart + Source.
size(), flags);
57 ResultPtr =
reinterpret_cast<char*
>(targetStart);
59 ErrorPtr = sourceStart;
62 &&
"ConvertUTF8toUTFXX exhausted target buffer");
68 const UTF32 *SourceEnd = SourceStart + 1;
69 UTF8 *TargetStart =
reinterpret_cast<UTF8 *
>(ResultPtr);
70 UTF8 *TargetEnd = TargetStart + 4;
72 &TargetStart, TargetEnd,
77 ResultPtr =
reinterpret_cast<char*
>(TargetStart);
82 return (S.
size() >= 2 &&
83 ((S[0] ==
'\xff' && S[1] ==
'\xfe') ||
84 (S[0] ==
'\xfe' && S[1] ==
'\xff')));
91 if (SrcBytes.
size() % 2)
99 const UTF16 *SrcEnd =
reinterpret_cast<const UTF16 *
>(SrcBytes.
end());
102 std::vector<UTF16> ByteSwapped;
104 ByteSwapped.insert(ByteSwapped.end(), Src, SrcEnd);
105 for (
unsigned I = 0,
E = ByteSwapped.size();
I !=
E; ++
I)
107 Src = &ByteSwapped[0];
108 SrcEnd = &ByteSwapped[ByteSwapped.size() - 1] + 1;
118 UTF8 *Dst =
reinterpret_cast<UTF8 *
>(&Out[0]);
119 UTF8 *DstEnd = Dst + Out.size();
130 Out.resize(reinterpret_cast<char *>(Dst) - &Out[0]);
148 if (SrcUTF8.
empty()) {
154 const UTF8 *Src =
reinterpret_cast<const UTF8 *
>(SrcUTF8.
begin());
155 const UTF8 *SrcEnd =
reinterpret_cast<const UTF8 *
>(SrcUTF8.
end());
163 UTF16 *Dst = &DstUTF16[0];
175 DstUTF16.
resize(Dst - &DstUTF16[0]);
181 static_assert(
sizeof(
wchar_t) == 1 ||
sizeof(
wchar_t) == 2 ||
182 sizeof(
wchar_t) == 4,
183 "Expected wchar_t to be 1, 2, or 4 bytes");
185 template <
typename TResult>
191 Result.resize(Source.
size() + 1);
192 char *ResultPtr =
reinterpret_cast<char *
>(&Result[0]);
193 const UTF8 *ErrorPtr;
198 Result.resize(reinterpret_cast<wchar_t *>(ResultPtr) - &Result[0]);
215 if (
sizeof(
wchar_t) == 1) {
216 const UTF8 *Start =
reinterpret_cast<const UTF8 *
>(Source.data());
218 reinterpret_cast<const UTF8 *
>(Source.data() + Source.size());
221 Result.resize(Source.size());
222 memcpy(&Result[0], Source.data(), Source.size());
224 }
else if (
sizeof(
wchar_t) == 2) {
229 }
else if (
sizeof(
wchar_t) == 4) {
230 const UTF32 *Start =
reinterpret_cast<const UTF32 *
>(Source.data());
232 reinterpret_cast<const UTF32 *
>(Source.data() + Source.size());
234 UTF8 *ResultPtr =
reinterpret_cast<UTF8 *
>(&Result[0]);
235 UTF8 *ResultEnd =
reinterpret_cast<UTF8 *
>(&Result[0] + Result.size());
238 Result.resize(reinterpret_cast<char *>(ResultPtr) - &Result[0]);
246 "Control should never reach this point; see static_assert further up");
bool hasUTF16ByteOrderMark(ArrayRef< char > SrcBytes)
Returns true if a blob of text starts with a UTF-16 big or little endian byte order mark...
This class represents lattice values for constants.
void push_back(const T &Elt)
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE size_t size() const
size - Get the string size.
#define UNI_MAX_UTF8_BYTES_PER_CODE_POINT
ConversionResult ConvertUTF8toUTF32(const UTF8 **sourceStart, const UTF8 *sourceEnd, UTF32 **targetStart, UTF32 *targetEnd, ConversionFlags flags)
Convert a partial UTF8 sequence to UTF32.
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
ConversionResult ConvertUTF16toUTF8(const UTF16 **sourceStart, const UTF16 *sourceEnd, UTF8 **targetStart, UTF8 *targetEnd, ConversionFlags flags)
static bool ConvertUTF8toWideInternal(llvm::StringRef Source, TResult &Result)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
#define UNI_UTF16_BYTE_ORDER_MARK_SWAPPED
#define UNI_UTF16_BYTE_ORDER_MARK_NATIVE
bool convertWideToUTF8(const std::wstring &Source, std::string &Result)
Converts a std::wstring to a UTF-8 encoded std::string.
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool empty() const
empty - Check if the string is empty.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
bool convertUTF8ToUTF16String(StringRef SrcUTF8, SmallVectorImpl< UTF16 > &DstUTF16)
Converts a UTF-8 string into a UTF-16 string with native endianness.
ConversionResult ConvertUTF8toUTF16(const UTF8 **sourceStart, const UTF8 *sourceEnd, UTF16 **targetStart, UTF16 *targetEnd, ConversionFlags flags)
uint16_t SwapByteOrder_16(uint16_t value)
SwapByteOrder_16 - This function returns a byte-swapped representation of the 16-bit argument...
ConversionResult ConvertUTF32toUTF8(const UTF32 **sourceStart, const UTF32 *sourceEnd, UTF8 **targetStart, UTF8 *targetEnd, ConversionFlags flags)
bool ConvertUTF8toWide(unsigned WideCharWidth, llvm::StringRef Source, char *&ResultPtr, const UTF8 *&ErrorPtr)
Convert an UTF8 StringRef to UTF8, UTF16, or UTF32 depending on WideCharWidth.
bool ConvertCodePointToUTF8(unsigned Source, char *&ResultPtr)
Convert an Unicode code point to UTF8 sequence.
size_t size() const
size - Get the array size.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool convertUTF16ToUTF8String(ArrayRef< char > SrcBytes, std::string &Out)
Converts a stream of raw bytes assumed to be UTF16 into a UTF8 std::string.
LLVM_NODISCARD bool empty() const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
StringRef - Represent a constant reference to a string, i.e.
bool empty() const
empty - Check if the array is empty.