38 #include <system_error> 67 switch (uint8_t(Input[0])) {
69 if (Input.
size() >= 4) {
71 && uint8_t(Input[2]) == 0xFE
72 && uint8_t(Input[3]) == 0xFF)
74 if (Input[1] == 0 && Input[2] == 0 && Input[3] != 0)
78 if (Input.
size() >= 2 && Input[1] != 0)
82 if ( Input.
size() >= 4
83 && uint8_t(Input[1]) == 0xFE
88 if (Input.
size() >= 2 && uint8_t(Input[1]) == 0xFE)
92 if (Input.
size() >= 2 && uint8_t(Input[1]) == 0xFF)
96 if ( Input.
size() >= 3
97 && uint8_t(Input[1]) == 0xBB
98 && uint8_t(Input[2]) == 0xBF)
104 if (Input.
size() >= 4 && Input[1] == 0 && Input[2] == 0 && Input[3] == 0)
107 if (Input.
size() >= 2 && Input[1] == 0)
114 void Node::anchor() {}
115 void NullNode::anchor() {}
116 void ScalarNode::anchor() {}
117 void BlockScalarNode::anchor() {}
118 void KeyValueNode::anchor() {}
119 void MappingNode::anchor() {}
120 void SequenceNode::anchor() {}
121 void AliasNode::anchor() {}
188 return Tok == Other.Tok;
204 if ((*Position & 0x80) == 0) {
205 return std::make_pair(*Position, 1);
209 if (Position + 1 != End &&
210 ((*Position & 0xE0) == 0xC0) &&
211 ((*(Position + 1) & 0xC0) == 0x80)) {
212 uint32_t codepoint = ((*Position & 0x1F) << 6) |
213 (*(Position + 1) & 0x3F);
214 if (codepoint >= 0x80)
215 return std::make_pair(codepoint, 2);
219 if (Position + 2 != End &&
220 ((*Position & 0xF0) == 0xE0) &&
221 ((*(Position + 1) & 0xC0) == 0x80) &&
222 ((*(Position + 2) & 0xC0) == 0x80)) {
223 uint32_t codepoint = ((*Position & 0x0F) << 12) |
224 ((*(Position + 1) & 0x3F) << 6) |
225 (*(Position + 2) & 0x3F);
228 if (codepoint >= 0x800 &&
229 (codepoint < 0xD800 || codepoint > 0xDFFF))
230 return std::make_pair(codepoint, 3);
234 if (Position + 3 != End &&
235 ((*Position & 0xF8) == 0xF0) &&
236 ((*(Position + 1) & 0xC0) == 0x80) &&
237 ((*(Position + 2) & 0xC0) == 0x80) &&
238 ((*(Position + 3) & 0xC0) == 0x80)) {
239 uint32_t codepoint = ((*Position & 0x07) << 18) |
240 ((*(Position + 1) & 0x3F) << 12) |
241 ((*(Position + 2) & 0x3F) << 6) |
242 (*(Position + 3) & 0x3F);
243 if (codepoint >= 0x10000 && codepoint <= 0x10FFFF)
244 return std::make_pair(codepoint, 4);
246 return std::make_pair(0, 0);
256 std::error_code *EC =
nullptr);
258 std::error_code *EC =
nullptr);
299 return StringRef(Current, End - Current);
385 void advanceWhile(SkipWhileFunc Func);
390 void scan_ns_uri_char();
407 bool consumeLineBreakIfPresent();
418 void removeStaleSimpleKeyCandidates();
421 void removeSimpleKeyCandidatesOnFlowLevel(
unsigned Level);
425 bool unrollIndent(
int ToColumn);
429 bool rollIndent(
int ToColumn
438 void scanToNextToken();
441 bool scanStreamStart();
444 bool scanStreamEnd();
447 bool scanDirective();
450 bool scanDocumentIndicator(
bool IsStart);
453 bool scanFlowCollectionStart(
bool IsSequence);
456 bool scanFlowCollectionEnd(
bool IsSequence);
459 bool scanFlowEntry();
462 bool scanBlockEntry();
471 bool scanFlowScalar(
bool IsDoubleQuoted);
474 bool scanPlainScalar();
477 bool scanAliasOrAnchor(
bool IsAlias);
480 bool scanBlockScalar(
bool IsLiteral);
483 char scanBlockChompingIndicator();
486 unsigned scanBlockIndentationIndicator();
491 bool scanBlockScalarHeader(
char &ChompingIndicator,
unsigned &IndentIndicator,
497 bool findBlockScalarIndent(
unsigned &BlockIndent,
unsigned BlockExitIndent,
498 unsigned &LineBreaks,
bool &IsDone);
503 bool scanBlockScalarIndent(
unsigned BlockIndent,
unsigned BlockExitIndent,
510 bool fetchMoreTokens();
537 bool IsStartOfStream;
540 bool IsSimpleKeyAllowed;
568 if (UnicodeScalarValue <= 0x7F) {
569 Result.
push_back(UnicodeScalarValue & 0x7F);
570 }
else if (UnicodeScalarValue <= 0x7FF) {
571 uint8_t FirstByte = 0xC0 | ((UnicodeScalarValue & 0x7C0) >> 6);
572 uint8_t SecondByte = 0x80 | (UnicodeScalarValue & 0x3F);
575 }
else if (UnicodeScalarValue <= 0xFFFF) {
576 uint8_t FirstByte = 0xE0 | ((UnicodeScalarValue & 0xF000) >> 12);
577 uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
578 uint8_t ThirdByte = 0x80 | (UnicodeScalarValue & 0x3F);
582 }
else if (UnicodeScalarValue <= 0x10FFFF) {
583 uint8_t FirstByte = 0xF0 | ((UnicodeScalarValue & 0x1F0000) >> 18);
584 uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0x3F000) >> 12);
585 uint8_t ThirdByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
586 uint8_t FourthByte = 0x80 | (UnicodeScalarValue & 0x3F);
601 OS <<
"Stream-Start: ";
604 OS <<
"Stream-End: ";
607 OS <<
"Version-Directive: ";
610 OS <<
"Tag-Directive: ";
613 OS <<
"Document-Start: ";
616 OS <<
"Document-End: ";
619 OS <<
"Block-Entry: ";
625 OS <<
"Block-Sequence-Start: ";
628 OS <<
"Block-Mapping-Start: ";
631 OS <<
"Flow-Entry: ";
634 OS <<
"Flow-Sequence-Start: ";
637 OS <<
"Flow-Sequence-End: ";
640 OS <<
"Flow-Mapping-Start: ";
643 OS <<
"Flow-Mapping-End: ";
655 OS <<
"Block Scalar: ";
669 OS << T.
Range <<
"\n";
692 std::string EscapedInput;
695 EscapedInput +=
"\\\\";
697 EscapedInput +=
"\\\"";
699 EscapedInput +=
"\\0";
701 EscapedInput +=
"\\a";
703 EscapedInput +=
"\\b";
705 EscapedInput +=
"\\t";
707 EscapedInput +=
"\\n";
709 EscapedInput +=
"\\v";
711 EscapedInput +=
"\\f";
713 EscapedInput +=
"\\r";
715 EscapedInput +=
"\\e";
716 else if ((
unsigned char)*i < 0x20) {
718 EscapedInput +=
"\\x" + std::string(2 - HexStr.size(),
'0') + HexStr;
719 }
else if (*i & 0x80) {
722 if (UnicodeScalarValue.second == 0) {
726 EscapedInput.insert(EscapedInput.end(), Val.
begin(), Val.
end());
730 if (UnicodeScalarValue.first == 0x85)
731 EscapedInput +=
"\\N";
732 else if (UnicodeScalarValue.first == 0xA0)
733 EscapedInput +=
"\\_";
734 else if (UnicodeScalarValue.first == 0x2028)
735 EscapedInput +=
"\\L";
736 else if (UnicodeScalarValue.first == 0x2029)
737 EscapedInput +=
"\\P";
738 else if (!EscapePrintable &&
740 EscapedInput +=
StringRef(i, UnicodeScalarValue.second);
742 std::string HexStr =
utohexstr(UnicodeScalarValue.first);
743 if (HexStr.size() <= 2)
744 EscapedInput +=
"\\x" + std::string(2 - HexStr.size(),
'0') + HexStr;
745 else if (HexStr.size() <= 4)
746 EscapedInput +=
"\\u" + std::string(4 - HexStr.size(),
'0') + HexStr;
747 else if (HexStr.size() <= 8)
748 EscapedInput +=
"\\U" + std::string(8 - HexStr.size(),
'0') + HexStr;
750 i += UnicodeScalarValue.second - 1;
752 EscapedInput.push_back(*i);
759 : SM(sm), ShowColors(ShowColors), EC(EC) {
765 : SM(SM_), ShowColors(ShowColors), EC(EC) {
770 InputBuffer = Buffer;
777 IsStartOfStream =
true;
778 IsSimpleKeyAllowed =
true;
780 std::unique_ptr<MemoryBuffer> InputBufferOwner =
788 bool NeedMore =
false;
790 if (TokenQueue.
empty() || NeedMore) {
791 if (!fetchMoreTokens()) {
794 return TokenQueue.
front();
798 "fetchMoreTokens lied about getting tokens!");
800 removeStaleSimpleKeyCandidates();
802 SK.Tok = TokenQueue.
begin();
808 return TokenQueue.
front();
814 if (!TokenQueue.
empty())
819 if (TokenQueue.
empty())
829 if ( *Position == 0x09
830 || (*Position >= 0x20 && *Position <= 0x7E))
834 if (uint8_t(*Position) & 0x80) {
837 && u8d.first != 0xFEFF
838 && ( u8d.first == 0x85
839 || ( u8d.first >= 0xA0
840 && u8d.first <= 0xD7FF)
841 || ( u8d.first >= 0xE000
842 && u8d.first <= 0xFFFD)
843 || ( u8d.first >= 0x10000
844 && u8d.first <= 0x10FFFF)))
845 return Position + u8d.second;
853 if (*Position == 0x0D) {
854 if (Position + 1 != End && *(Position + 1) == 0x0A)
859 if (*Position == 0x0A)
867 if (*Position ==
' ')
875 if (*Position ==
' ' || *Position ==
'\t')
883 if (*Position ==
' ' || *Position ==
'\t')
885 return skip_nb_char(Position);
899 void Scanner::advanceWhile(SkipWhileFunc Func) {
900 auto Final = skip_while(Func, Current);
901 Column += Final - Current;
906 return (C >=
'0' && C <=
'9')
907 || (C >=
'a' && C <=
'z')
908 || (C >=
'A' && C <=
'Z');
913 || (C >=
'a' && C <=
'z')
914 || (C >=
'A' && C <=
'Z');
917 void Scanner::scan_ns_uri_char() {
921 if (( *Current ==
'%' 926 ||
StringRef(Current, 1).find_first_of(
"#;/?:@&=+$,_.!~*'()[]")
936 if (Expected >= 0x80)
940 if (uint8_t(*Current) >= 0x80)
942 if (uint8_t(*Current) == Expected) {
950 void Scanner::skip(
uint32_t Distance) {
953 assert(Current <= End &&
"Skipped past the end");
959 return *Position ==
' ' || *Position ==
'\t' || *Position ==
'\r' ||
963 bool Scanner::consumeLineBreakIfPresent() {
964 auto Next = skip_b_break(Current);
976 if (IsSimpleKeyAllowed) {
980 SK.Column = AtColumn;
981 SK.IsRequired = IsRequired;
982 SK.FlowLevel = FlowLevel;
987 void Scanner::removeStaleSimpleKeyCandidates() {
989 i != SimpleKeys.
end();) {
990 if (i->Line != Line || i->Column + 1024 < Column) {
992 setError(
"Could not find expected : for simple key" 993 , i->Tok->Range.begin());
994 i = SimpleKeys.
erase(i);
1000 void Scanner::removeSimpleKeyCandidatesOnFlowLevel(
unsigned Level) {
1001 if (!SimpleKeys.
empty() && (SimpleKeys.
end() - 1)->FlowLevel == Level)
1005 bool Scanner::unrollIndent(
int ToColumn) {
1011 while (Indent > ToColumn) {
1021 bool Scanner::rollIndent(
int ToColumn
1026 if (Indent < ToColumn) {
1033 TokenQueue.
insert(InsertPoint, T);
1038 void Scanner::skipComment() {
1039 if (*Current !=
'#')
1052 void Scanner::scanToNextToken() {
1054 while (*Current ==
' ' || *Current ==
'\t') {
1069 IsSimpleKeyAllowed =
true;
1073 bool Scanner::scanStreamStart() {
1074 IsStartOfStream =
false;
1082 Current += EI.second;
1086 bool Scanner::scanStreamEnd() {
1095 IsSimpleKeyAllowed =
false;
1104 bool Scanner::scanDirective() {
1108 IsSimpleKeyAllowed =
false;
1113 Current = skip_while(&Scanner::skip_ns_char, Current);
1115 Current = skip_while(&Scanner::skip_s_white, Current);
1118 if (Name ==
"YAML") {
1119 Current = skip_while(&Scanner::skip_ns_char, Current);
1124 }
else if(Name ==
"TAG") {
1125 Current = skip_while(&Scanner::skip_ns_char, Current);
1126 Current = skip_while(&Scanner::skip_s_white, Current);
1127 Current = skip_while(&Scanner::skip_ns_char, Current);
1136 bool Scanner::scanDocumentIndicator(
bool IsStart) {
1139 IsSimpleKeyAllowed =
false;
1149 bool Scanner::scanFlowCollectionStart(
bool IsSequence) {
1158 saveSimpleKeyCandidate(--TokenQueue.
end(), Column - 1,
false);
1161 IsSimpleKeyAllowed =
true;
1166 bool Scanner::scanFlowCollectionEnd(
bool IsSequence) {
1167 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1168 IsSimpleKeyAllowed =
false;
1180 bool Scanner::scanFlowEntry() {
1181 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1182 IsSimpleKeyAllowed =
true;
1191 bool Scanner::scanBlockEntry() {
1193 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1194 IsSimpleKeyAllowed =
true;
1203 bool Scanner::scanKey() {
1207 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1208 IsSimpleKeyAllowed = !FlowLevel;
1218 bool Scanner::scanValue() {
1221 if (!SimpleKeys.
empty()) {
1225 T.
Range = SK.Tok->Range;
1227 for (i = TokenQueue.
begin(), e = TokenQueue.
end(); i != e; ++i) {
1231 assert(i != e &&
"SimpleKey not in token queue!");
1232 i = TokenQueue.
insert(i, T);
1237 IsSimpleKeyAllowed =
false;
1241 IsSimpleKeyAllowed = !FlowLevel;
1261 assert(Position - 1 >= First);
1265 while (I >= First && *I ==
'\\') --
I;
1268 return (Position - 1 - I) % 2 == 1;
1271 bool Scanner::scanFlowScalar(
bool IsDoubleQuoted) {
1273 unsigned ColStart = Column;
1274 if (IsDoubleQuoted) {
1277 while (Current != End && *Current !=
'"')
1281 }
while ( Current != End
1282 && *(Current - 1) ==
'\\' 1288 if (Current + 1 < End && *Current ==
'\'' && *(Current + 1) ==
'\'') {
1291 }
else if (*Current ==
'\'')
1295 i = skip_b_break(Current);
1310 if (Current == End) {
1311 setError(
"Expected quote at end of scalar", Current);
1321 saveSimpleKeyCandidate(--TokenQueue.
end(), ColStart,
false);
1323 IsSimpleKeyAllowed =
false;
1328 bool Scanner::scanPlainScalar() {
1330 unsigned ColStart = Column;
1331 unsigned LeadingBlanks = 0;
1332 assert(Indent >= -1 &&
"Indent must be >= -1 !");
1333 unsigned indent =
static_cast<unsigned>(Indent + 1);
1335 if (*Current ==
'#')
1338 while (!isBlankOrBreak(Current)) {
1339 if ( FlowLevel && *Current ==
':' 1340 && !(isBlankOrBreak(Current + 1) || *(Current + 1) ==
',')) {
1341 setError(
"Found unexpected ':' while scanning a plain scalar", Current);
1346 if ( (*Current ==
':' && isBlankOrBreak(Current + 1))
1348 && (
StringRef(Current, 1).find_first_of(
",:?[]{}")
1360 if (!isBlankOrBreak(Current))
1365 while (isBlankOrBreak(Tmp)) {
1368 if (LeadingBlanks && (Column < indent) && *Tmp ==
'\t') {
1369 setError(
"Found invalid tab character in indentation", Tmp);
1375 i = skip_b_break(Tmp);
1384 if (!FlowLevel && Column < indent)
1389 if (Start == Current) {
1390 setError(
"Got empty plain scalar", Start);
1399 saveSimpleKeyCandidate(--TokenQueue.
end(), ColStart,
false);
1401 IsSimpleKeyAllowed =
false;
1406 bool Scanner::scanAliasOrAnchor(
bool IsAlias) {
1408 unsigned ColStart = Column;
1411 if ( *Current ==
'[' || *Current ==
']' 1412 || *Current ==
'{' || *Current ==
'}' 1423 if (Start == Current) {
1424 setError(
"Got empty alias or anchor", Start);
1434 saveSimpleKeyCandidate(--TokenQueue.
end(), ColStart,
false);
1436 IsSimpleKeyAllowed =
false;
1441 char Scanner::scanBlockChompingIndicator() {
1442 char Indicator =
' ';
1443 if (Current != End && (*Current ==
'+' || *Current ==
'-')) {
1444 Indicator = *Current;
1456 if (ChompingIndicator ==
'-')
1458 if (ChompingIndicator ==
'+')
1461 return Str.
empty() ? 0 : 1;
1464 unsigned Scanner::scanBlockIndentationIndicator() {
1465 unsigned Indent = 0;
1466 if (Current != End && (*Current >=
'1' && *Current <=
'9')) {
1473 bool Scanner::scanBlockScalarHeader(
char &ChompingIndicator,
1474 unsigned &IndentIndicator,
bool &IsDone) {
1475 auto Start = Current;
1477 ChompingIndicator = scanBlockChompingIndicator();
1478 IndentIndicator = scanBlockIndentationIndicator();
1480 if (ChompingIndicator ==
' ')
1481 ChompingIndicator = scanBlockChompingIndicator();
1482 Current = skip_while(&Scanner::skip_s_white, Current);
1485 if (Current == End) {
1494 if (!consumeLineBreakIfPresent()) {
1495 setError(
"Expected a line break after block scalar header", Current);
1501 bool Scanner::findBlockScalarIndent(
unsigned &BlockIndent,
1502 unsigned BlockExitIndent,
1503 unsigned &LineBreaks,
bool &IsDone) {
1504 unsigned MaxAllSpaceLineCharacters = 0;
1508 advanceWhile(&Scanner::skip_s_space);
1509 if (skip_nb_char(Current) != Current) {
1511 if (Column <= BlockExitIndent) {
1516 BlockIndent = Column;
1517 if (MaxAllSpaceLineCharacters > BlockIndent) {
1519 "Leading all-spaces line must be smaller than the block indent",
1520 LongestAllSpaceLine);
1525 if (skip_b_break(Current) != Current &&
1526 Column > MaxAllSpaceLineCharacters) {
1529 MaxAllSpaceLineCharacters = Column;
1530 LongestAllSpaceLine = Current;
1534 if (Current == End) {
1539 if (!consumeLineBreakIfPresent()) {
1548 bool Scanner::scanBlockScalarIndent(
unsigned BlockIndent,
1549 unsigned BlockExitIndent,
bool &IsDone) {
1551 while (Column < BlockIndent) {
1552 auto I = skip_s_space(Current);
1559 if (skip_nb_char(Current) == Current)
1562 if (Column <= BlockExitIndent) {
1567 if (Column < BlockIndent) {
1568 if (Current != End && *Current ==
'#') {
1572 setError(
"A text line is less indented than the block scalar", Current);
1578 bool Scanner::scanBlockScalar(
bool IsLiteral) {
1580 assert(*Current ==
'|' || *Current ==
'>');
1583 char ChompingIndicator;
1584 unsigned BlockIndent;
1585 bool IsDone =
false;
1586 if (!scanBlockScalarHeader(ChompingIndicator, BlockIndent, IsDone))
1591 auto Start = Current;
1592 unsigned BlockExitIndent = Indent < 0 ? 0 : (
unsigned)Indent;
1593 unsigned LineBreaks = 0;
1594 if (BlockIndent == 0) {
1595 if (!findBlockScalarIndent(BlockIndent, BlockExitIndent, LineBreaks,
1603 if (!scanBlockScalarIndent(BlockIndent, BlockExitIndent, IsDone))
1609 auto LineStart = Current;
1610 advanceWhile(&Scanner::skip_nb_char);
1611 if (LineStart != Current) {
1612 Str.
append(LineBreaks,
'\n');
1621 if (!consumeLineBreakIfPresent())
1626 if (Current == End && !LineBreaks)
1633 IsSimpleKeyAllowed =
true;
1643 bool Scanner::scanTag() {
1645 unsigned ColStart = Column;
1647 if (Current == End || isBlankOrBreak(Current));
1648 else if (*Current ==
'<') {
1655 Current = skip_while(&Scanner::skip_ns_char, Current);
1664 saveSimpleKeyCandidate(--TokenQueue.
end(), ColStart,
false);
1666 IsSimpleKeyAllowed =
false;
1671 bool Scanner::fetchMoreTokens() {
1672 if (IsStartOfStream)
1673 return scanStreamStart();
1678 return scanStreamEnd();
1680 removeStaleSimpleKeyCandidates();
1682 unrollIndent(Column);
1684 if (Column == 0 && *Current ==
'%')
1685 return scanDirective();
1687 if (Column == 0 && Current + 4 <= End
1689 && *(Current + 1) ==
'-' 1690 && *(Current + 2) ==
'-' 1691 && (Current + 3 == End || isBlankOrBreak(Current + 3)))
1692 return scanDocumentIndicator(
true);
1694 if (Column == 0 && Current + 4 <= End
1696 && *(Current + 1) ==
'.' 1697 && *(Current + 2) ==
'.' 1698 && (Current + 3 == End || isBlankOrBreak(Current + 3)))
1699 return scanDocumentIndicator(
false);
1701 if (*Current ==
'[')
1702 return scanFlowCollectionStart(
true);
1704 if (*Current ==
'{')
1705 return scanFlowCollectionStart(
false);
1707 if (*Current ==
']')
1708 return scanFlowCollectionEnd(
true);
1710 if (*Current ==
'}')
1711 return scanFlowCollectionEnd(
false);
1713 if (*Current ==
',')
1714 return scanFlowEntry();
1716 if (*Current ==
'-' && isBlankOrBreak(Current + 1))
1717 return scanBlockEntry();
1719 if (*Current ==
'?' && (FlowLevel || isBlankOrBreak(Current + 1)))
1722 if (*Current ==
':' && (FlowLevel || isBlankOrBreak(Current + 1)))
1725 if (*Current ==
'*')
1726 return scanAliasOrAnchor(
true);
1728 if (*Current ==
'&')
1729 return scanAliasOrAnchor(
false);
1731 if (*Current ==
'!')
1734 if (*Current ==
'|' && !FlowLevel)
1735 return scanBlockScalar(
true);
1737 if (*Current ==
'>' && !FlowLevel)
1738 return scanBlockScalar(
false);
1740 if (*Current ==
'\'')
1741 return scanFlowScalar(
false);
1743 if (*Current ==
'"')
1744 return scanFlowScalar(
true);
1748 if (!(isBlankOrBreak(Current)
1750 || (*Current ==
'-' && !isBlankOrBreak(Current + 1))
1751 || (!FlowLevel && (*Current ==
'?' || *Current ==
':')
1752 && isBlankOrBreak(Current + 1))
1753 || (!FlowLevel && *Current ==
':' 1754 && Current + 2 < End
1755 && *(Current + 1) ==
':' 1756 && !isBlankOrBreak(Current + 2)))
1757 return scanPlainScalar();
1759 setError(
"Unrecognized character while tokenizing.");
1764 std::error_code *EC)
1765 : scanner(new
Scanner(Input, SM, ShowColors, EC)), CurrentDoc() {}
1768 std::error_code *EC)
1769 : scanner(new
Scanner(InputBuffer, SM, ShowColors, EC)), CurrentDoc() {}
1789 CurrentDoc.reset(
new Document(*
this));
1804 : Doc(D),
TypeID(Type), Anchor(A), Tag(T) {
1811 if (!Raw.
empty() && Raw !=
"!") {
1814 Ret =
Doc->getTagMap().find(
"!")->second;
1818 Ret =
Doc->getTagMap().find(
"!!")->second;
1823 std::map<StringRef, StringRef>::const_iterator It =
1824 Doc->getTagMap().find(TagHandle);
1825 if (It !=
Doc->getTagMap().end())
1830 T.Range = TagHandle;
1840 return "tag:yaml.org,2002:null";
1844 return "tag:yaml.org,2002:str";
1846 return "tag:yaml.org,2002:map";
1848 return "tag:yaml.org,2002:seq";
1855 return Doc->peekNext();
1859 return Doc->getNext();
1863 return Doc->parseBlockNode();
1867 return Doc->NodeAllocator;
1871 Doc->setError(Msg, Tok);
1875 return Doc->failed();
1880 if (
Value[0] ==
'"') {
1886 return unescapeDoubleQuoted(UnquotedValue, i, Storage);
1887 return UnquotedValue;
1888 }
else if (
Value[0] ==
'\'') {
1898 Storage.
insert(Storage.
end(), Valid.begin(), Valid.end());
1900 UnquotedValue = UnquotedValue.
substr(i + 2);
1905 return UnquotedValue;
1908 return Value.rtrim(
' ');
1921 Storage.
insert(Storage.
end(), Valid.begin(), Valid.end());
1923 UnquotedValue = UnquotedValue.
substr(i);
1925 assert(!UnquotedValue.
empty() &&
"Can't be empty!");
1928 switch (UnquotedValue[0]) {
1932 if ( UnquotedValue.
size() > 1
1933 && (UnquotedValue[1] ==
'\r' || UnquotedValue[1] ==
'\n'))
1934 UnquotedValue = UnquotedValue.
substr(1);
1935 UnquotedValue = UnquotedValue.
substr(1);
1938 if (UnquotedValue.
size() == 1)
1941 UnquotedValue = UnquotedValue.
substr(1);
1942 switch (UnquotedValue[0]) {
1946 setError(
"Unrecognized escape code!", T);
1952 if ( UnquotedValue.
size() > 1
1953 && (UnquotedValue[1] ==
'\r' || UnquotedValue[1] ==
'\n'))
1954 UnquotedValue = UnquotedValue.
substr(1);
2011 if (UnquotedValue.
size() < 3)
2014 unsigned int UnicodeScalarValue;
2017 UnicodeScalarValue = 0xFFFD;
2019 UnquotedValue = UnquotedValue.
substr(2);
2023 if (UnquotedValue.
size() < 5)
2026 unsigned int UnicodeScalarValue;
2029 UnicodeScalarValue = 0xFFFD;
2031 UnquotedValue = UnquotedValue.
substr(4);
2035 if (UnquotedValue.
size() < 9)
2038 unsigned int UnicodeScalarValue;
2041 UnicodeScalarValue = 0xFFFD;
2043 UnquotedValue = UnquotedValue.
substr(8);
2047 UnquotedValue = UnquotedValue.
substr(1);
2098 setError(
"Unexpected token in Key Value.", t);
2114 void MappingNode::increment() {
2117 CurrentEntry =
nullptr;
2121 CurrentEntry->skip();
2122 if (
Type == MT_Inline) {
2124 CurrentEntry =
nullptr;
2132 }
else if (
Type == MT_Block) {
2137 CurrentEntry =
nullptr;
2140 setError(
"Unexpected token. Expected Key or Block End", T);
2144 CurrentEntry =
nullptr;
2158 CurrentEntry =
nullptr;
2161 setError(
"Unexpected token. Expected Key, Flow Entry, or Flow " 2165 CurrentEntry =
nullptr;
2173 CurrentEntry =
nullptr;
2177 CurrentEntry->skip();
2179 if (SeqType == ST_Block) {
2184 if (!CurrentEntry) {
2186 CurrentEntry =
nullptr;
2192 CurrentEntry =
nullptr;
2195 setError(
"Unexpected token. Expected Block Entry or Block End." 2200 CurrentEntry =
nullptr;
2202 }
else if (SeqType == ST_Indentless) {
2207 if (!CurrentEntry) {
2209 CurrentEntry =
nullptr;
2215 CurrentEntry =
nullptr;
2217 }
else if (SeqType == ST_Flow) {
2222 WasPreviousTokenFlowEntry =
true;
2230 CurrentEntry =
nullptr;
2235 setError(
"Could not find closing ]!", T);
2238 CurrentEntry =
nullptr;
2241 if (!WasPreviousTokenFlowEntry) {
2242 setError(
"Expected , between entries!", T);
2244 CurrentEntry =
nullptr;
2249 if (!CurrentEntry) {
2252 WasPreviousTokenFlowEntry =
false;
2261 TagMap[
"!!"] =
"tag:yaml.org,2002:";
2263 if (parseDirectives())
2271 if (stream.scanner->failed())
2286 Token &Document::peekNext() {
2287 return stream.scanner->peekNext();
2290 Token Document::getNext() {
2291 return stream.scanner->getNext();
2294 void Document::setError(
const Twine &Message,
Token &Location)
const {
2295 stream.scanner->setError(Message, Location.
Range.
begin());
2298 bool Document::failed()
const {
2299 return stream.scanner->failed();
2314 setError(
"Already encountered an anchor for this node!", T);
2317 AnchorInfo = getNext();
2319 goto parse_property;
2322 setError(
"Already encountered a tag for this node!", T);
2325 TagInfo = getNext();
2327 goto parse_property;
2337 return new (NodeAllocator)
SequenceNode( stream.CurrentDoc
2343 return new (NodeAllocator)
2350 return new (NodeAllocator)
2357 return new (NodeAllocator)
2364 return new (NodeAllocator)
2371 return new (NodeAllocator)
2380 return new (NodeAllocator)
2386 return new (NodeAllocator)
2397 return new (NodeAllocator)
NullNode(stream.CurrentDoc);
2405 bool Document::parseDirectives() {
2406 bool isDirective =
false;
2410 parseTAGDirective();
2413 parseYAMLDirective();
2421 void Document::parseYAMLDirective() {
2425 void Document::parseTAGDirective() {
2426 Token Tag = getNext();
2433 TagMap[TagHandle] = TagPrefix;
2436 bool Document::expectToken(
int TK) {
2439 setError(
"Unexpected token", T);
Represents a range in source code.
std::unique_ptr< Document > & Doc
bool operator==(const BinaryRef &LHS, const BinaryRef &RHS)
LLVM_NODISCARD std::string str() const
str - Get the contents as an std::string.
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
This class represents lattice values for constants.
StringRef getRawTag() const
Get the tag as it was written in the document.
const char * getBufferEnd() const
A linked-list with a custom, local allocator.
Node(unsigned int Type, std::unique_ptr< Document > &, StringRef Anchor, StringRef Tag)
Not a valid Unicode encoding.
Scanner(StringRef Input, SourceMgr &SM, bool ShowColors=true, std::error_code *EC=nullptr)
bool scanTokens(StringRef Input)
Scans all tokens in input without outputting anything.
std::pair< UnicodeEncodingForm, unsigned > EncodingInfo
EncodingInfo - Holds the encoding type and length of the byte order mark if it exists.
void push_back(const T &Elt)
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE size_t size() const
size - Get the string size.
static std::unique_ptr< MemoryBuffer > getMemBuffer(StringRef InputData, StringRef BufferName="", bool RequiresNullTerminator=true)
Open the specified memory range as a MemoryBuffer.
std::string Value
The value of a block scalar node.
Represents a YAML sequence created from either a block sequence for a flow sequence.
static LLVM_ATTRIBUTE_NOINLINE bool wasEscaped(StringRef::iterator First, StringRef::iterator Position)
LLVM_NODISCARD size_t find_last_of(char C, size_t From=npos) const
Find the last character in the string that is C, or npos if not found.
Node * getKey()
Parse and return the key.
std::string escape(StringRef Input, bool EscapePrintable=true)
Escape Input for a double quoted scalar; if EscapePrintable is true, all UTF8 sequences will be escap...
void resetAlloc()
Reset the underlying allocator.
document_iterator begin()
Represents an alias to a Node with an anchor.
LLVM_NODISCARD StringRef ltrim(char Char) const
Return string with consecutive Char characters starting from the the left removed.
void reserve(size_type N)
#define LLVM_ATTRIBUTE_NOINLINE
void PrintMessage(raw_ostream &OS, SMLoc Loc, DiagKind Kind, const Twine &Msg, ArrayRef< SMRange > Ranges=None, ArrayRef< SMFixIt > FixIts=None, bool ShowColors=true) const
Emit a message about the specified location with the specified string.
amdgpu Simplify well known AMD library false Value Value const Twine & Name
static bool is_ns_hex_digit(const char C)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_NODISCARD StringRef copy(Allocator &A) const
bool failed()
Returns true if an error occurred while parsing.
std::error_code make_error_code(BitcodeError E)
TypeID
Definitions of all of the base types for the Type system.
Tagged union holding either a T or a Error.
static EncodingInfo getUnicodeEncoding(StringRef Input)
getUnicodeEncoding - Reads up to the first 4 bytes to determine the Unicode encoding form of Input...
Position
Position to insert a new instruction relative to an existing instruction.
StringRef str() const
Explicit conversion to StringRef.
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
bool isPrintable(int UCS)
Determines if a character is likely to be displayed correctly on the terminal.
Node * getRoot()
Parse and return the root level node.
unsigned AddNewSourceBuffer(std::unique_ptr< MemoryBuffer > F, SMLoc IncludeLoc)
Add a new source buffer to this source manager.
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool empty() const
empty - Check if the string is empty.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
std::pair< uint32_t, unsigned > UTF8Decoded
The Unicode scalar value of a UTF-8 minimal well-formed code unit subsequence and the subsequence's l...
void append(in_iter S, in_iter E)
Append from an iterator pair.
bool dumpTokens(StringRef Input, raw_ostream &)
Dump all the tokens in this stream to OS.
initializer< Ty > init(const Ty &Val)
IteratorImpl< T, typename list_type::iterator > iterator
The instances of the Type class are immutable: once they are created, they are never changed...
void printError(Node *N, const Twine &Msg)
Allocate memory in an ever growing pool, as if by bump-pointer.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
static unsigned getChompedLineBreaks(char ChompingIndicator, unsigned LineBreaks, StringRef Str)
Get the number of line breaks after chomping.
Document(Stream &ParentStream)
StringRef Range
A string of length 0 or more whose begin() points to the logical location of the token in the input...
void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message, ArrayRef< SMRange > Ranges=None)
Token getNext()
Parse the next token and pop it from the queue.
This owns the files read by a parser, handles include stacks, and handles diagnostic wrangling...
iterator erase(const_iterator CI)
BumpPtrAllocator & getAllocator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
A scalar node is an opaque datum that can be presented as a series of zero or more Unicode scalar val...
static Cursor skipComment(Cursor C)
Skip a line comment and return the updated cursor.
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
void setError(const Twine &Message, StringRef::iterator Position)
std::enable_if< std::numeric_limits< T >::is_signed, bool >::type getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
void setError(const Twine &Message, Token &Location) const
Node * getValue()
Parse and return the value.
testing::Matcher< const detail::ErrorHolder & > Failed()
bool skip()
Finish parsing the current document and return true if there are more.
enum llvm::yaml::Token::TokenKind Kind
Token & peekNext()
Parse the next token and return it without popping it.
This class represents a YAML stream potentially containing multiple documents.
LLVM_NODISCARD T pop_back_val()
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
iterator insert(iterator I, T &&V)
static void encodeUTF8(uint32_t UnicodeScalarValue, SmallVectorImpl< char > &Result)
encodeUTF8 - Encode UnicodeScalarValue in UTF-8 and append it to result.
void setError(const Twine &Message)
iterator insert(iterator I, T &&Elt)
static UTF8Decoded decodeUTF8(StringRef Range)
static SMLoc getFromPointer(const char *Ptr)
Stream(StringRef Input, SourceMgr &, bool ShowColors=true, std::error_code *EC=nullptr)
This keeps a reference to the string referenced by Input.
unsigned int getType() const
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
LLVM_NODISCARD bool empty() const
LLVM_NODISCARD size_t find_first_of(char C, size_t From=0) const
Find the first character in the string that is C, or npos if not found.
Token - A single YAML token.
A block scalar node is an opaque datum that can be presented as a series of zero or more Unicode scal...
Represents a YAML map created from either a block map for a flow map.
Scans YAML tokens from a MemoryBuffer.
Iterator abstraction for Documents over a Stream.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
const char * getBufferStart() const
LLVM Value Representation.
This class implements an extremely fast bulk output stream that can only output to a stream...
std::string getVerbatimTag() const
Get the verbatium tag for a given Node.
StringRef - Represent a constant reference to a string, i.e.
StringRef getValue(SmallVectorImpl< char > &Storage) const
Gets the value of this node as a StringRef.
Represents a location in source code.
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE size_t find(char C, size_t From=0) const
Search for the first character C in the string.
An inline mapping node is used for "[key: value]".
Node * parseBlockNode()
Root for parsing a node. Returns a single node.
SMRange getSourceRange() const
std::string utohexstr(uint64_t X, bool LowerCase=false)
Abstract base class for all Nodes.
static bool is_ns_word_char(const char C)
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.