YAMLParser.h 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587
  1. //===--- YAMLParser.h - Simple YAML parser --------------------------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This is a YAML 1.2 parser.
  11. //
  12. // See http://www.yaml.org/spec/1.2/spec.html for the full standard.
  13. //
  14. // This currently does not implement the following:
  15. // * Multi-line literal folding.
  16. // * Tag resolution.
  17. // * UTF-16.
  18. // * BOMs anywhere other than the first Unicode scalar value in the file.
  19. //
  20. // The most important class here is Stream. This represents a YAML stream with
  21. // 0, 1, or many documents.
  22. //
  23. // SourceMgr sm;
  24. // StringRef input = getInput();
  25. // yaml::Stream stream(input, sm);
  26. //
  27. // for (yaml::document_iterator di = stream.begin(), de = stream.end();
  28. // di != de; ++di) {
  29. // yaml::Node *n = di->getRoot();
  30. // if (n) {
  31. // // Do something with n...
  32. // } else
  33. // break;
  34. // }
  35. //
  36. //===----------------------------------------------------------------------===//
  37. #ifndef LLVM_SUPPORT_YAMLPARSER_H
  38. #define LLVM_SUPPORT_YAMLPARSER_H
  39. #include "llvm/ADT/StringRef.h"
  40. #include "llvm/Support/Allocator.h"
  41. #include "llvm/Support/SMLoc.h"
  42. #include <limits>
  43. #include <map>
  44. #include <utility>
  45. namespace llvm {
  46. class MemoryBufferRef;
  47. class SourceMgr;
  48. class Twine;
  49. class raw_ostream;
  50. namespace yaml {
  51. class document_iterator;
  52. class Document;
  53. class Node;
  54. class Scanner;
  55. struct Token;
  56. /// \brief Dump all the tokens in this stream to OS.
  57. /// \returns true if there was an error, false otherwise.
  58. bool dumpTokens(StringRef Input, raw_ostream &);
  59. /// \brief Scans all tokens in input without outputting anything. This is used
  60. /// for benchmarking the tokenizer.
  61. /// \returns true if there was an error, false otherwise.
  62. bool scanTokens(StringRef Input);
  63. /// \brief Escape \a Input for a double quoted scalar.
  64. std::string escape(StringRef Input);
  65. /// \brief This class represents a YAML stream potentially containing multiple
  66. /// documents.
  67. class Stream {
  68. public:
  69. /// \brief This keeps a reference to the string referenced by \p Input.
  70. Stream(StringRef Input, SourceMgr &, bool ShowColors = true);
  71. Stream(MemoryBufferRef InputBuffer, SourceMgr &, bool ShowColors = true);
  72. ~Stream();
  73. document_iterator begin();
  74. document_iterator end();
  75. void skip();
  76. bool failed();
  77. bool validate() {
  78. skip();
  79. return !failed();
  80. }
  81. void printError(Node *N, const Twine &Msg);
  82. private:
  83. std::unique_ptr<Scanner> scanner;
  84. std::unique_ptr<Document> CurrentDoc;
  85. friend class Document;
  86. };
  87. /// \brief Abstract base class for all Nodes.
  88. class Node {
  89. virtual void anchor();
  90. public:
  91. enum NodeKind {
  92. NK_Null,
  93. NK_Scalar,
  94. NK_BlockScalar,
  95. NK_KeyValue,
  96. NK_Mapping,
  97. NK_Sequence,
  98. NK_Alias
  99. };
  100. Node(unsigned int Type, std::unique_ptr<Document> &, StringRef Anchor,
  101. StringRef Tag);
  102. /// \brief Get the value of the anchor attached to this node. If it does not
  103. /// have one, getAnchor().size() will be 0.
  104. StringRef getAnchor() const { return Anchor; }
  105. /// \brief Get the tag as it was written in the document. This does not
  106. /// perform tag resolution.
  107. StringRef getRawTag() const { return Tag; }
  108. /// \brief Get the verbatium tag for a given Node. This performs tag resoluton
  109. /// and substitution.
  110. std::string getVerbatimTag() const;
  111. SMRange getSourceRange() const { return SourceRange; }
  112. void setSourceRange(SMRange SR) { SourceRange = SR; }
  113. // These functions forward to Document and Scanner.
  114. Token &peekNext();
  115. Token getNext();
  116. Node *parseBlockNode();
  117. BumpPtrAllocator &getAllocator();
  118. void setError(const Twine &Message, Token &Location) const;
  119. bool failed() const;
  120. virtual void skip() {}
  121. unsigned int getType() const { return TypeID; }
  122. void *operator new(size_t Size, BumpPtrAllocator &Alloc,
  123. size_t Alignment = 16) throw() {
  124. return Alloc.Allocate(Size, Alignment);
  125. }
  126. void operator delete(void *Ptr, BumpPtrAllocator &Alloc, size_t Size) throw() {
  127. Alloc.Deallocate(Ptr, Size);
  128. }
  129. protected:
  130. std::unique_ptr<Document> &Doc;
  131. SMRange SourceRange;
  132. void operator delete(void *) throw() {}
  133. ~Node() = default;
  134. private:
  135. unsigned int TypeID;
  136. StringRef Anchor;
  137. /// \brief The tag as typed in the document.
  138. StringRef Tag;
  139. };
  140. /// \brief A null value.
  141. ///
  142. /// Example:
  143. /// !!null null
  144. class NullNode final : public Node {
  145. void anchor() override;
  146. public:
  147. NullNode(std::unique_ptr<Document> &D)
  148. : Node(NK_Null, D, StringRef(), StringRef()) {}
  149. static inline bool classof(const Node *N) { return N->getType() == NK_Null; }
  150. };
  151. /// \brief A scalar node is an opaque datum that can be presented as a
  152. /// series of zero or more Unicode scalar values.
  153. ///
  154. /// Example:
  155. /// Adena
  156. class ScalarNode final : public Node {
  157. void anchor() override;
  158. public:
  159. ScalarNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
  160. StringRef Val)
  161. : Node(NK_Scalar, D, Anchor, Tag), Value(Val) {
  162. SMLoc Start = SMLoc::getFromPointer(Val.begin());
  163. SMLoc End = SMLoc::getFromPointer(Val.end());
  164. SourceRange = SMRange(Start, End);
  165. }
  166. // Return Value without any escaping or folding or other fun YAML stuff. This
  167. // is the exact bytes that are contained in the file (after conversion to
  168. // utf8).
  169. StringRef getRawValue() const { return Value; }
  170. /// \brief Gets the value of this node as a StringRef.
  171. ///
  172. /// \param Storage is used to store the content of the returned StringRef iff
  173. /// it requires any modification from how it appeared in the source.
  174. /// This happens with escaped characters and multi-line literals.
  175. StringRef getValue(SmallVectorImpl<char> &Storage) const;
  176. static inline bool classof(const Node *N) {
  177. return N->getType() == NK_Scalar;
  178. }
  179. private:
  180. StringRef Value;
  181. StringRef unescapeDoubleQuoted(StringRef UnquotedValue,
  182. StringRef::size_type Start,
  183. SmallVectorImpl<char> &Storage) const;
  184. };
  185. /// \brief A block scalar node is an opaque datum that can be presented as a
  186. /// series of zero or more Unicode scalar values.
  187. ///
  188. /// Example:
  189. /// |
  190. /// Hello
  191. /// World
  192. class BlockScalarNode final : public Node {
  193. void anchor() override;
  194. public:
  195. BlockScalarNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
  196. StringRef Value, StringRef RawVal)
  197. : Node(NK_BlockScalar, D, Anchor, Tag), Value(Value) {
  198. SMLoc Start = SMLoc::getFromPointer(RawVal.begin());
  199. SMLoc End = SMLoc::getFromPointer(RawVal.end());
  200. SourceRange = SMRange(Start, End);
  201. }
  202. /// \brief Gets the value of this node as a StringRef.
  203. StringRef getValue() const { return Value; }
  204. static inline bool classof(const Node *N) {
  205. return N->getType() == NK_BlockScalar;
  206. }
  207. private:
  208. StringRef Value;
  209. };
  210. /// \brief A key and value pair. While not technically a Node under the YAML
  211. /// representation graph, it is easier to treat them this way.
  212. ///
  213. /// TODO: Consider making this not a child of Node.
  214. ///
  215. /// Example:
  216. /// Section: .text
  217. class KeyValueNode final : public Node {
  218. void anchor() override;
  219. public:
  220. KeyValueNode(std::unique_ptr<Document> &D)
  221. : Node(NK_KeyValue, D, StringRef(), StringRef()), Key(nullptr),
  222. Value(nullptr) {}
  223. /// \brief Parse and return the key.
  224. ///
  225. /// This may be called multiple times.
  226. ///
  227. /// \returns The key, or nullptr if failed() == true.
  228. Node *getKey();
  229. /// \brief Parse and return the value.
  230. ///
  231. /// This may be called multiple times.
  232. ///
  233. /// \returns The value, or nullptr if failed() == true.
  234. Node *getValue();
  235. void skip() override {
  236. getKey()->skip();
  237. if (Node *Val = getValue())
  238. Val->skip();
  239. }
  240. static inline bool classof(const Node *N) {
  241. return N->getType() == NK_KeyValue;
  242. }
  243. private:
  244. Node *Key;
  245. Node *Value;
  246. };
  247. /// \brief This is an iterator abstraction over YAML collections shared by both
  248. /// sequences and maps.
  249. ///
  250. /// BaseT must have a ValueT* member named CurrentEntry and a member function
  251. /// increment() which must set CurrentEntry to 0 to create an end iterator.
  252. template <class BaseT, class ValueT>
  253. class basic_collection_iterator
  254. : public std::iterator<std::forward_iterator_tag, ValueT> {
  255. public:
  256. basic_collection_iterator() : Base(nullptr) {}
  257. basic_collection_iterator(BaseT *B) : Base(B) {}
  258. ValueT *operator->() const {
  259. assert(Base && Base->CurrentEntry && "Attempted to access end iterator!");
  260. return Base->CurrentEntry;
  261. }
  262. ValueT &operator*() const {
  263. assert(Base && Base->CurrentEntry &&
  264. "Attempted to dereference end iterator!");
  265. return *Base->CurrentEntry;
  266. }
  267. operator ValueT *() const {
  268. assert(Base && Base->CurrentEntry && "Attempted to access end iterator!");
  269. return Base->CurrentEntry;
  270. }
  271. bool operator!=(const basic_collection_iterator &Other) const {
  272. if (Base != Other.Base)
  273. return true;
  274. return (Base && Other.Base) &&
  275. Base->CurrentEntry != Other.Base->CurrentEntry;
  276. }
  277. basic_collection_iterator &operator++() {
  278. assert(Base && "Attempted to advance iterator past end!");
  279. Base->increment();
  280. // Create an end iterator.
  281. if (!Base->CurrentEntry)
  282. Base = nullptr;
  283. return *this;
  284. }
  285. private:
  286. BaseT *Base;
  287. };
  288. // The following two templates are used for both MappingNode and Sequence Node.
  289. template <class CollectionType>
  290. typename CollectionType::iterator begin(CollectionType &C) {
  291. assert(C.IsAtBeginning && "You may only iterate over a collection once!");
  292. C.IsAtBeginning = false;
  293. typename CollectionType::iterator ret(&C);
  294. ++ret;
  295. return ret;
  296. }
  297. template <class CollectionType> void skip(CollectionType &C) {
  298. // TODO: support skipping from the middle of a parsed collection ;/
  299. assert((C.IsAtBeginning || C.IsAtEnd) && "Cannot skip mid parse!");
  300. if (C.IsAtBeginning)
  301. for (typename CollectionType::iterator i = begin(C), e = C.end(); i != e;
  302. ++i)
  303. i->skip();
  304. }
  305. /// \brief Represents a YAML map created from either a block map for a flow map.
  306. ///
  307. /// This parses the YAML stream as increment() is called.
  308. ///
  309. /// Example:
  310. /// Name: _main
  311. /// Scope: Global
  312. class MappingNode final : public Node {
  313. void anchor() override;
  314. public:
  315. enum MappingType {
  316. MT_Block,
  317. MT_Flow,
  318. MT_Inline ///< An inline mapping node is used for "[key: value]".
  319. };
  320. MappingNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
  321. MappingType MT)
  322. : Node(NK_Mapping, D, Anchor, Tag), Type(MT), IsAtBeginning(true),
  323. IsAtEnd(false), CurrentEntry(nullptr) {}
  324. friend class basic_collection_iterator<MappingNode, KeyValueNode>;
  325. typedef basic_collection_iterator<MappingNode, KeyValueNode> iterator;
  326. template <class T> friend typename T::iterator yaml::begin(T &);
  327. template <class T> friend void yaml::skip(T &);
  328. iterator begin() { return yaml::begin(*this); }
  329. iterator end() { return iterator(); }
  330. void skip() override { yaml::skip(*this); }
  331. static inline bool classof(const Node *N) {
  332. return N->getType() == NK_Mapping;
  333. }
  334. private:
  335. MappingType Type;
  336. bool IsAtBeginning;
  337. bool IsAtEnd;
  338. KeyValueNode *CurrentEntry;
  339. void increment();
  340. };
  341. /// \brief Represents a YAML sequence created from either a block sequence for a
  342. /// flow sequence.
  343. ///
  344. /// This parses the YAML stream as increment() is called.
  345. ///
  346. /// Example:
  347. /// - Hello
  348. /// - World
  349. class SequenceNode final : public Node {
  350. void anchor() override;
  351. public:
  352. enum SequenceType {
  353. ST_Block,
  354. ST_Flow,
  355. // Use for:
  356. //
  357. // key:
  358. // - val1
  359. // - val2
  360. //
  361. // As a BlockMappingEntry and BlockEnd are not created in this case.
  362. ST_Indentless
  363. };
  364. SequenceNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
  365. SequenceType ST)
  366. : Node(NK_Sequence, D, Anchor, Tag), SeqType(ST), IsAtBeginning(true),
  367. IsAtEnd(false),
  368. WasPreviousTokenFlowEntry(true), // Start with an imaginary ','.
  369. CurrentEntry(nullptr) {}
  370. friend class basic_collection_iterator<SequenceNode, Node>;
  371. typedef basic_collection_iterator<SequenceNode, Node> iterator;
  372. template <class T> friend typename T::iterator yaml::begin(T &);
  373. template <class T> friend void yaml::skip(T &);
  374. void increment();
  375. iterator begin() { return yaml::begin(*this); }
  376. iterator end() { return iterator(); }
  377. void skip() override { yaml::skip(*this); }
  378. static inline bool classof(const Node *N) {
  379. return N->getType() == NK_Sequence;
  380. }
  381. private:
  382. SequenceType SeqType;
  383. bool IsAtBeginning;
  384. bool IsAtEnd;
  385. bool WasPreviousTokenFlowEntry;
  386. Node *CurrentEntry;
  387. };
  388. /// \brief Represents an alias to a Node with an anchor.
  389. ///
  390. /// Example:
  391. /// *AnchorName
  392. class AliasNode final : public Node {
  393. void anchor() override;
  394. public:
  395. AliasNode(std::unique_ptr<Document> &D, StringRef Val)
  396. : Node(NK_Alias, D, StringRef(), StringRef()), Name(Val) {}
  397. StringRef getName() const { return Name; }
  398. Node *getTarget();
  399. static inline bool classof(const Node *N) { return N->getType() == NK_Alias; }
  400. private:
  401. StringRef Name;
  402. };
  403. /// \brief A YAML Stream is a sequence of Documents. A document contains a root
  404. /// node.
  405. class Document {
  406. public:
  407. /// \brief Root for parsing a node. Returns a single node.
  408. Node *parseBlockNode();
  409. Document(Stream &ParentStream);
  410. /// \brief Finish parsing the current document and return true if there are
  411. /// more. Return false otherwise.
  412. bool skip();
  413. /// \brief Parse and return the root level node.
  414. Node *getRoot() {
  415. if (Root)
  416. return Root;
  417. return Root = parseBlockNode();
  418. }
  419. const std::map<StringRef, StringRef> &getTagMap() const { return TagMap; }
  420. private:
  421. friend class Node;
  422. friend class document_iterator;
  423. /// \brief Stream to read tokens from.
  424. Stream &stream;
  425. /// \brief Used to allocate nodes to. All are destroyed without calling their
  426. /// destructor when the document is destroyed.
  427. BumpPtrAllocator NodeAllocator;
  428. /// \brief The root node. Used to support skipping a partially parsed
  429. /// document.
  430. Node *Root;
  431. /// \brief Maps tag prefixes to their expansion.
  432. std::map<StringRef, StringRef> TagMap;
  433. Token &peekNext();
  434. Token getNext();
  435. void setError(const Twine &Message, Token &Location) const;
  436. bool failed() const;
  437. /// \brief Parse %BLAH directives and return true if any were encountered.
  438. bool parseDirectives();
  439. /// \brief Parse %YAML
  440. void parseYAMLDirective();
  441. /// \brief Parse %TAG
  442. void parseTAGDirective();
  443. /// \brief Consume the next token and error if it is not \a TK.
  444. bool expectToken(int TK);
  445. };
  446. /// \brief Iterator abstraction for Documents over a Stream.
  447. class document_iterator {
  448. public:
  449. document_iterator() : Doc(nullptr) {}
  450. document_iterator(std::unique_ptr<Document> &D) : Doc(&D) {}
  451. bool operator==(const document_iterator &Other) {
  452. if (isAtEnd() || Other.isAtEnd())
  453. return isAtEnd() && Other.isAtEnd();
  454. return Doc == Other.Doc;
  455. }
  456. bool operator!=(const document_iterator &Other) { return !(*this == Other); }
  457. document_iterator operator++() {
  458. assert(Doc && "incrementing iterator past the end.");
  459. if (!(*Doc)->skip()) {
  460. Doc->reset(nullptr);
  461. } else {
  462. Stream &S = (*Doc)->stream;
  463. Doc->reset(new Document(S));
  464. }
  465. return *this;
  466. }
  467. Document &operator*() { return *Doc->get(); }
  468. std::unique_ptr<Document> &operator->() { return *Doc; }
  469. private:
  470. bool isAtEnd() const { return !Doc || !*Doc; }
  471. std::unique_ptr<Document> *Doc;
  472. };
  473. } // End namespace yaml.
  474. } // End namespace llvm.
  475. #endif