59 #if defined(__clang__) && defined(__has_warning) 60 # if __has_warning("-Wimplicit-fallthrough") 61 # define ConvertUTF_DISABLE_WARNINGS \ 62 _Pragma("clang diagnostic push") \ 63 _Pragma("clang diagnostic ignored \"-Wimplicit-fallthrough\"") 64 # define ConvertUTF_RESTORE_WARNINGS \ 65 _Pragma("clang diagnostic pop") 67 #elif defined(__GNUC__) && __GNUC__ > 6 68 # define ConvertUTF_DISABLE_WARNINGS \ 69 _Pragma("GCC diagnostic push") \ 70 _Pragma("GCC diagnostic ignored \"-Wimplicit-fallthrough\"") 71 # define ConvertUTF_RESTORE_WARNINGS \ 72 _Pragma("GCC diagnostic pop") 74 #ifndef ConvertUTF_DISABLE_WARNINGS 75 # define ConvertUTF_DISABLE_WARNINGS 77 #ifndef ConvertUTF_RESTORE_WARNINGS 78 # define ConvertUTF_RESTORE_WARNINGS 90 #define UNI_SUR_HIGH_START (UTF32)0xD800 91 #define UNI_SUR_HIGH_END (UTF32)0xDBFF 92 #define UNI_SUR_LOW_START (UTF32)0xDC00 93 #define UNI_SUR_LOW_END (UTF32)0xDFFF 105 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
106 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
107 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
108 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
109 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
110 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
111 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
112 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
121 0x03C82080UL, 0xFA082080UL, 0x82082080UL };
146 const UTF32** sourceStart,
const UTF32* sourceEnd,
149 const UTF32* source = *sourceStart;
150 UTF16* target = *targetStart;
151 while (source < sourceEnd) {
153 if (target >= targetEnd) {
168 *target++ = (
UTF16)ch;
178 if (target + 1 >= targetEnd) {
187 *sourceStart = source;
188 *targetStart = target;
195 const UTF16** sourceStart,
const UTF16* sourceEnd,
198 const UTF16* source = *sourceStart;
199 UTF32* target = *targetStart;
201 while (source < sourceEnd) {
202 const UTF16* oldSource = source;
207 if (source < sourceEnd) {
232 if (target >= targetEnd) {
238 *sourceStart = source;
239 *targetStart = target;
242 fprintf(stderr,
"ConvertUTF16toUTF32 illegal seq 0x%04x,%04x\n", ch, ch2);
249 const UTF16** sourceStart,
const UTF16* sourceEnd,
252 const UTF16* source = *sourceStart;
253 UTF8* target = *targetStart;
254 while (source < sourceEnd) {
256 unsigned short bytesToWrite = 0;
257 const UTF32 byteMask = 0xBF;
258 const UTF32 byteMark = 0x80;
259 const UTF16* oldSource = source;
264 if (source < sourceEnd) {
290 if (ch < (
UTF32)0x80) { bytesToWrite = 1;
291 }
else if (ch < (
UTF32)0x800) { bytesToWrite = 2;
292 }
else if (ch < (
UTF32)0x10000) { bytesToWrite = 3;
293 }
else if (ch < (
UTF32)0x110000) { bytesToWrite = 4;
294 }
else { bytesToWrite = 3;
298 target += bytesToWrite;
299 if (target > targetEnd) {
303 switch (bytesToWrite) {
304 case 4: *--target = (
UTF8)((ch | byteMark) & byteMask); ch >>= 6;
305 case 3: *--target = (
UTF8)((ch | byteMark) & byteMask); ch >>= 6;
306 case 2: *--target = (
UTF8)((ch | byteMark) & byteMask); ch >>= 6;
307 case 1: *--target = (
UTF8)(ch | firstByteMark[bytesToWrite]);
309 target += bytesToWrite;
311 *sourceStart = source;
312 *targetStart = target;
319 const UTF32** sourceStart,
const UTF32* sourceEnd,
322 const UTF32* source = *sourceStart;
323 UTF8* target = *targetStart;
324 while (source < sourceEnd) {
326 unsigned short bytesToWrite = 0;
327 const UTF32 byteMask = 0xBF;
328 const UTF32 byteMark = 0x80;
342 if (ch < (
UTF32)0x80) { bytesToWrite = 1;
343 }
else if (ch < (
UTF32)0x800) { bytesToWrite = 2;
344 }
else if (ch < (
UTF32)0x10000) { bytesToWrite = 3;
346 }
else { bytesToWrite = 3;
351 target += bytesToWrite;
352 if (target > targetEnd) {
356 switch (bytesToWrite) {
357 case 4: *--target = (
UTF8)((ch | byteMark) & byteMask); ch >>= 6;
358 case 3: *--target = (
UTF8)((ch | byteMark) & byteMask); ch >>= 6;
359 case 2: *--target = (
UTF8)((ch | byteMark) & byteMask); ch >>= 6;
360 case 1: *--target = (
UTF8) (ch | firstByteMark[bytesToWrite]);
362 target += bytesToWrite;
364 *sourceStart = source;
365 *targetStart = target;
384 const UTF8 *srcptr = source+length;
386 default:
return false;
388 case 4:
if ((a = (*--srcptr)) < 0x80 || a > 0xBF)
return false;
389 case 3:
if ((a = (*--srcptr)) < 0x80 || a > 0xBF)
return false;
390 case 2:
if ((a = (*--srcptr)) < 0x80 || a > 0xBF)
return false;
394 case 0xE0:
if (a < 0xA0)
return false;
break;
395 case 0xED:
if (a > 0x9F)
return false;
break;
396 case 0xF0:
if (a < 0x90)
return false;
break;
397 case 0xF4:
if (a > 0x8F)
return false;
break;
398 default:
if (a < 0x80)
return false;
401 case 1:
if (*source >= 0x80 && *source < 0xC2)
return false;
403 if (*source > 0xF4)
return false;
414 int length = trailingBytesForUTF8[*source]+1;
415 if (length > sourceEnd - source) {
425 const UTF8 *sourceEnd) {
439 if (source == sourceEnd)
449 if (b1 >= 0xC2 && b1 <= 0xDF) {
457 if (source == sourceEnd)
464 return (b2 >= 0xA0 && b2 <= 0xBF) ? 2 : 1;
466 if (b1 >= 0xE1 && b1 <= 0xEC) {
467 return (b2 >= 0x80 && b2 <= 0xBF) ? 2 : 1;
470 return (b2 >= 0x80 && b2 <= 0x9F) ? 2 : 1;
472 if (b1 >= 0xEE && b1 <= 0xEF) {
473 return (b2 >= 0x80 && b2 <= 0xBF) ? 2 : 1;
476 if (b2 >= 0x90 && b2 <= 0xBF) {
477 if (source == sourceEnd)
481 return (b3 >= 0x80 && b3 <= 0xBF) ? 3 : 2;
485 if (b1 >= 0xF1 && b1 <= 0xF3) {
486 if (b2 >= 0x80 && b2 <= 0xBF) {
487 if (source == sourceEnd)
491 return (b3 >= 0x80 && b3 <= 0xBF) ? 3 : 2;
496 if (b2 >= 0x80 && b2 <= 0x8F) {
497 if (source == sourceEnd)
501 return (b3 >= 0x80 && b3 <= 0xBF) ? 3 : 2;
506 assert((b1 >= 0x80 && b1 <= 0xC1) || b1 >= 0xF5);
521 return trailingBytesForUTF8[
first] + 1;
531 while (*source != sourceEnd) {
532 int length = trailingBytesForUTF8[**source] + 1;
533 if (length > sourceEnd - *source || !
isLegalUTF8(*source, length))
543 const UTF8** sourceStart,
const UTF8* sourceEnd,
546 const UTF8* source = *sourceStart;
547 UTF16* target = *targetStart;
548 while (source < sourceEnd) {
550 unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
551 if (extraBytesToRead >= sourceEnd - source) {
562 switch (extraBytesToRead) {
563 case 5: ch += *source++; ch <<= 6;
564 case 4: ch += *source++; ch <<= 6;
565 case 3: ch += *source++; ch <<= 6;
566 case 2: ch += *source++; ch <<= 6;
567 case 1: ch += *source++; ch <<= 6;
568 case 0: ch += *source++;
570 ch -= offsetsFromUTF8[extraBytesToRead];
572 if (target >= targetEnd) {
573 source -= (extraBytesToRead+1);
580 source -= (extraBytesToRead+1);
587 *target++ = (
UTF16)ch;
592 source -= (extraBytesToRead+1);
599 if (target + 1 >= targetEnd) {
600 source -= (extraBytesToRead+1);
608 *sourceStart = source;
609 *targetStart = target;
616 const UTF8** sourceStart,
const UTF8* sourceEnd,
620 const UTF8* source = *sourceStart;
621 UTF32* target = *targetStart;
622 while (source < sourceEnd) {
624 unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
625 if (extraBytesToRead >= sourceEnd - source) {
642 if (target >= targetEnd) {
666 switch (extraBytesToRead) {
667 case 5: ch += *source++; ch <<= 6;
668 case 4: ch += *source++; ch <<= 6;
669 case 3: ch += *source++; ch <<= 6;
670 case 2: ch += *source++; ch <<= 6;
671 case 1: ch += *source++; ch <<= 6;
672 case 0: ch += *source++;
674 ch -= offsetsFromUTF8[extraBytesToRead];
683 source -= (extraBytesToRead+1);
697 *sourceStart = source;
698 *targetStart = target;
703 const UTF8 *sourceEnd,
712 const UTF8 *sourceEnd,
UTF32 **targetStart,
static const UTF32 halfBase
This class represents lattice values for constants.
static const char trailingBytesForUTF8[256]
ConversionResult ConvertUTF8toUTF32(const UTF8 **sourceStart, const UTF8 *sourceEnd, UTF32 **targetStart, UTF32 *targetEnd, ConversionFlags flags)
Convert a partial UTF8 sequence to UTF32.
#define UNI_MAX_LEGAL_UTF32
#define UNI_REPLACEMENT_CHAR
ConversionResult ConvertUTF16toUTF8(const UTF16 **sourceStart, const UTF16 *sourceEnd, UTF8 **targetStart, UTF8 *targetEnd, ConversionFlags flags)
static ConversionResult ConvertUTF8toUTF32Impl(const UTF8 **sourceStart, const UTF8 *sourceEnd, UTF32 **targetStart, UTF32 *targetEnd, ConversionFlags flags, Boolean InputIsPartial)
ConversionResult ConvertUTF32toUTF16(const UTF32 **sourceStart, const UTF32 *sourceEnd, UTF16 **targetStart, UTF16 *targetEnd, ConversionFlags flags)
unsigned getNumBytesForUTF8(UTF8 firstByte)
static Boolean isLegalUTF8(const UTF8 *source, int length)
#define ConvertUTF_DISABLE_WARNINGS
ConversionResult ConvertUTF8toUTF16(const UTF8 **sourceStart, const UTF8 *sourceEnd, UTF16 **targetStart, UTF16 *targetEnd, ConversionFlags flags)
ConversionResult ConvertUTF32toUTF8(const UTF32 **sourceStart, const UTF32 *sourceEnd, UTF8 **targetStart, UTF8 *targetEnd, ConversionFlags flags)
Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd)
Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd)
ConversionResult ConvertUTF8toUTF32Partial(const UTF8 **sourceStart, const UTF8 *sourceEnd, UTF32 **targetStart, UTF32 *targetEnd, ConversionFlags flags)
Convert a partial UTF8 sequence to UTF32.
static const int halfShift
static const UTF32 offsetsFromUTF8[6]
#define UNI_SUR_LOW_START
ConversionResult ConvertUTF16toUTF32(const UTF16 **sourceStart, const UTF16 *sourceEnd, UTF32 **targetStart, UTF32 *targetEnd, ConversionFlags flags)
static unsigned findMaximalSubpartOfIllFormedUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd)
static const UTF8 firstByteMark[7]
#define ConvertUTF_RESTORE_WARNINGS
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
#define UNI_SUR_HIGH_START
static const UTF32 halfMask