XQueryTokenizer.cs 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206
  1. //
  2. // XQueryTokenizer.cs
  3. //
  4. // Author:
  5. // Atsushi Enomoto <[email protected]>
  6. //
  7. // Copyright (C) 2004 Novell, Inc (http://www.novell.com)
  8. //
  9. // Permission is hereby granted, free of charge, to any person obtaining
  10. // a copy of this software and associated documentation files (the
  11. // "Software"), to deal in the Software without restriction, including
  12. // without limitation the rights to use, copy, modify, merge, publish,
  13. // distribute, sublicense, and/or sell copies of the Software, and to
  14. // permit persons to whom the Software is furnished to do so, subject to
  15. // the following conditions:
  16. //
  17. // The above copyright notice and this permission notice shall be
  18. // included in all copies or substantial portions of the Software.
  19. //
  20. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  21. // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  22. // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  23. // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  24. // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  25. // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  26. // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  27. //
  28. #if NET_2_0
  29. using System;
  30. using System.Collections;
  31. using System.Collections.Generic;
  32. using System.IO;
  33. using System.Security.Policy;
  34. using System.Xml;
  35. using System.Xml.Query;
  36. using System.Xml.Schema;
  37. using System.Xml.XPath;
  38. using Mono.Xml.XQuery;
  39. using Mono.Xml.XPath2;
  40. namespace Mono.Xml.XQuery.Parser
  41. {
  42. // FIXME: make internal in the future
  43. public class XQueryTokenizer
  44. : Mono.Xml.XQuery.Parser.yyParser.yyInput, IXmlLineInfo
  45. {
  46. int line = 1;
  47. int column = 0;
  48. bool nextIncrementLine;
  49. // namespace resolver
  50. XmlNamespaceManager nsResolver;
  51. string defaultFunctionNamespace = XQueryFunction.Namespace;
  52. // input source
  53. TextReader source;
  54. int peekChar = -1;
  55. // token info
  56. int currentToken;
  57. string prefixName;
  58. object tokenValue;
  59. int lookAheadToken = -1;
  60. object lookAheadTokenValue;
  61. // state info
  62. WhitespaceHandling ws = WhitespaceHandling.Arbitrary;
  63. ParseState state = ParseState.Default;
  64. Stack stateStack;
  65. char [] buffer = new char [30];
  66. int bufferIndex;
  67. public XQueryTokenizer (TextReader reader)
  68. {
  69. this.source = reader;
  70. stateStack = new Stack ();
  71. nsResolver = new XmlNamespaceManager (new NameTable ());
  72. nsResolver.AddNamespace ("xs", XmlSchema.Namespace);
  73. nsResolver.AddNamespace ("xdt", XmlSchema.XdtNamespace);
  74. // FIXME: Are they really predefined?
  75. nsResolver.AddNamespace ("xsi", XmlSchema.InstanceNamespace);
  76. nsResolver.AddNamespace ("fn", "http://www.w3.org/2003/11/xpath-functions");
  77. nsResolver.AddNamespace ("local", "http://www.w3.org/2003/11/xquery-local-functions");
  78. }
  79. internal IXmlNamespaceResolver NSResolver {
  80. get { return nsResolver; }
  81. }
  82. internal string DefaultFunctionNamespace {
  83. get { return defaultFunctionNamespace; }
  84. set { defaultFunctionNamespace = value; }
  85. }
  86. public void AddNamespace (string prefix, string ns)
  87. {
  88. nsResolver.AddNamespace (prefix, ns);
  89. }
  90. public bool advance ()
  91. {
  92. if (currentToken < 0)
  93. return false;
  94. if (lookAheadToken >= 0) {
  95. tokenValue = lookAheadTokenValue;
  96. currentToken = lookAheadToken;
  97. lookAheadToken = -1;
  98. }
  99. else
  100. currentToken = ParseToken ();
  101. return currentToken >= 0;
  102. }
  103. public int token ()
  104. {
  105. return currentToken;
  106. }
  107. public object value ()
  108. {
  109. return tokenValue;
  110. }
  111. public bool HasLineInfo ()
  112. {
  113. return true;
  114. }
  115. public int LineNumber {
  116. get { return line; }
  117. }
  118. public int LinePosition {
  119. get { return column; }
  120. }
  121. internal WhitespaceHandling Space {
  122. get { return ws; }
  123. set { ws = value; }
  124. }
  125. internal ParseState State {
  126. get { return state; }
  127. set {
  128. // Console.Error.WriteLine ("**** eno **** state transition from {0} to {1}, stack count = {2}", state, value, stateStack.Count);
  129. //foreach (ParseState ps in stateStack.ToArray ()) Console.Error.WriteLine ("***** eno ***** " + ps);
  130. state = value;
  131. }
  132. }
  133. internal void PushState (ParseState newState)
  134. {
  135. stateStack.Push (newState);
  136. // Console.Error.WriteLine ("**** eno **** state pushed {0}, added stack count = {1}", newState, stateStack.Count);
  137. //foreach (ParseState ps in stateStack.ToArray ()) Console.Error.WriteLine ("***** eno ***** " + ps);
  138. }
  139. internal void PopState ()
  140. {
  141. if (stateStack.Count == 0)
  142. throw Error ("Internal state transition error. State stack is empty.");
  143. state = (ParseState) stateStack.Pop ();
  144. // Console.Error.WriteLine ("**** eno **** state pop, now as {0}, stack count = {1}", state, stateStack.Count);
  145. //foreach (ParseState ps in stateStack.ToArray ()) Console.Error.WriteLine ("***** eno ***** " + ps);
  146. }
  147. private XmlQueryCompileException Error (string message)
  148. {
  149. return new XmlQueryCompileException (message, this, null, null);
  150. }
  151. private int ParseToken ()
  152. {
  153. bufferIndex = 0;
  154. switch (ws) {
  155. case WhitespaceHandling.Arbitrary:
  156. SkipWhitespaces ();
  157. break;
  158. case WhitespaceHandling.Explicit:
  159. if (!XmlChar.IsWhitespace (PeekChar ()))
  160. throw Error ("Whitespace is required.");
  161. goto case WhitespaceHandling.Arbitrary;
  162. }
  163. int c = PeekChar ();
  164. if (c < 0)
  165. return -1;
  166. // FIXME: consider DOUBLE_LITERAL
  167. if (Char.IsNumber ((char) c)) {
  168. tokenValue = ReadDecimal (false);
  169. return Token.DECIMAL_LITERAL;
  170. }
  171. switch (state) {
  172. case ParseState.OccurenceIndicator:
  173. return ParseOccurenceIndicator ();
  174. case ParseState.XmlPIContent:
  175. return ParseXmlPIContent ();
  176. case ParseState.XmlComment:
  177. return ParseXmlCommentContent ();
  178. case ParseState.ElementContent:
  179. return ParseElementContent ();
  180. default:
  181. return ParseDefault ();
  182. }
  183. }
  184. private int ParseXmlPIContent ()
  185. {
  186. // FIXME: handle ??> correctly
  187. while (true) {
  188. int c = PeekChar ();
  189. if (c < 0)
  190. throw Error ("Unexpected end of query text inside XML processing instruction content");
  191. if (c == '?') {
  192. ReadChar ();
  193. if (PeekChar () == '>') {
  194. ReadChar ();
  195. tokenValue = CreateValueString ();
  196. return Token.XML_PI_TO_END;
  197. }
  198. else
  199. AddValueChar ('?');
  200. }
  201. else
  202. AddValueChar ((char) c);
  203. }
  204. }
  205. private int ParseXmlCommentContent ()
  206. {
  207. // FIXME: handle ---> correctly
  208. while (true) {
  209. int c = PeekChar ();
  210. if (c < 0)
  211. throw Error ("Unexpected end of query text inside XML comment content");
  212. if (c == '-') {
  213. ReadChar ();
  214. if (PeekChar () == '-') {
  215. ReadChar ();
  216. if (PeekChar () == '>') {
  217. tokenValue = CreateValueString ();
  218. return Token.XML_COMMENT_TO_END;
  219. } else {
  220. AddValueChar ('-');
  221. AddValueChar ('-');
  222. }
  223. }
  224. else
  225. AddValueChar ('-');
  226. }
  227. else
  228. AddValueChar ((char) c);
  229. }
  230. }
  231. private int ParseXmlCDataContent ()
  232. {
  233. // FIXME: handle ]]]> correctly
  234. while (true) {
  235. int c = PeekChar ();
  236. if (c < 0)
  237. throw Error ("Unexpected end of query text inside XML CDATA section content");
  238. if (c == ']') {
  239. ReadChar ();
  240. if (PeekChar () == ']') {
  241. ReadChar ();
  242. if (PeekChar () == '>') {
  243. tokenValue = CreateValueString ();
  244. return Token.XML_CDATA_TO_END;
  245. } else {
  246. AddValueChar (']');
  247. AddValueChar (']');
  248. }
  249. }
  250. else
  251. AddValueChar (']');
  252. }
  253. else
  254. AddValueChar ((char) c);
  255. }
  256. }
  257. private int ParseElementContent ()
  258. {
  259. tokenValue = null;
  260. int c = PeekChar ();
  261. if (c < 0)
  262. throw Error ("Unexpected end of query text inside XML processing instruction content");
  263. switch ((char) c) {
  264. case '<':
  265. case '{':
  266. return ParseDefault ();
  267. }
  268. while (true) {
  269. c = PeekChar ();
  270. if (c < 0)
  271. throw Error ("Unexpected end of query text inside XML processing instruction content");
  272. switch ((char) c) {
  273. case '&':
  274. ReadChar ();
  275. ReadPredefinedEntity ();
  276. continue;
  277. case '<':
  278. tokenValue += CreateValueString ();
  279. return Token.ELEM_CONTENT_LITERAL;
  280. default:
  281. AddValueChar ((char) c);
  282. ReadChar ();
  283. continue;
  284. }
  285. }
  286. }
  287. private void ReadPredefinedEntity ()
  288. {
  289. string token = ReadOneToken ();
  290. Expect (";");
  291. switch (token) {
  292. case "lt":
  293. AddValueChar ('<');
  294. return;
  295. case "gt":
  296. AddValueChar ('>');
  297. return;
  298. case "amp":
  299. AddValueChar ('&');
  300. return;
  301. case "quot":
  302. AddValueChar ('"');
  303. return;
  304. case "apos":
  305. AddValueChar ('\'');
  306. return;
  307. default:
  308. throw Error (String.Format ("Unexpected general entity name: {0} .", token));
  309. }
  310. }
  311. // FIXME: not used as yet
  312. private int ParseExtContent ()
  313. {
  314. // FIXME: handle :::) correctly
  315. while (true) {
  316. int c = PeekChar ();
  317. if (c < 0)
  318. throw Error ("Unexpected end of query text inside external content");
  319. if (c == ':') {
  320. ReadChar ();
  321. if (PeekChar () == ':') {
  322. ReadChar ();
  323. if (PeekChar () == ')') {
  324. tokenValue = CreateValueString ();
  325. return Token.EXT_CONTENT;
  326. } else {
  327. AddValueChar (':');
  328. AddValueChar (':');
  329. }
  330. }
  331. else
  332. AddValueChar (':');
  333. }
  334. else
  335. AddValueChar ((char) c);
  336. }
  337. }
  338. private int ParseOccurenceIndicator ()
  339. {
  340. state = ParseState.Operator;
  341. switch (PeekChar ()) {
  342. case '?':
  343. ReadChar ();
  344. return Token.QUESTION;
  345. case '*':
  346. ReadChar ();
  347. return Token.ASTERISK;
  348. case '+':
  349. ReadChar ();
  350. return Token.PLUS;
  351. default:
  352. return ParseOperator ();
  353. }
  354. }
  355. private int ParseOperator ()
  356. {
  357. // TODO: implement
  358. return ParseDefault ();
  359. }
  360. private int ParseDefault ()
  361. {
  362. int c = ReadChar ();
  363. switch (c) {
  364. case '.':
  365. if (PeekChar () == '.') {
  366. ReadChar ();
  367. return Token.DOT2;
  368. }
  369. else if (Char.IsNumber ((char) PeekChar ())) {
  370. tokenValue = ReadDecimal (true);
  371. }
  372. return Token.DOT;
  373. case ',':
  374. return Token.COMMA;
  375. case ';':
  376. return Token.SEMICOLON;
  377. case '(':
  378. if (PeekChar () == ':') {
  379. ReadChar ();
  380. if (PeekChar () == ':') {
  381. ReadChar ();
  382. return Token.PRAGMA_OPEN;
  383. }
  384. return Token.OPEN_PAREN_COLON;
  385. }
  386. return Token.OPEN_PAREN;
  387. case ')':
  388. return Token.CLOSE_PAREN;
  389. case ':':
  390. switch (PeekChar ()) {
  391. case ':':
  392. ReadChar ();
  393. if (PeekChar () == ')') {
  394. ReadChar ();
  395. return Token.PRAGMA_CLOSE;
  396. }
  397. return Token.COLON2;
  398. case ')':
  399. ReadChar ();
  400. return Token.CLOSE_PAREN_COLON;
  401. case '=':
  402. ReadChar ();
  403. return Token.COLON_EQUAL;
  404. }
  405. return Token.COLON;
  406. case '[':
  407. return Token.OPEN_BRACKET;
  408. case ']':
  409. return Token.CLOSE_BRACKET;
  410. case '{':
  411. return Token.OPEN_CURLY;
  412. case '}':
  413. return Token.CLOSE_CURLY;
  414. case '$':
  415. return Token.DOLLAR;
  416. case '\'':
  417. // FIXME: consider in the future
  418. /*
  419. if (state == ParseState.StartTag) {
  420. if (PeekChar () == '\'') {
  421. // FIXME: this code is VERY inefficient
  422. ReadChar ();
  423. tokenValue = "'";
  424. return Token.STRING_LITERAL;
  425. }
  426. return Token.APOS;
  427. }
  428. */
  429. tokenValue = ReadQuoted ('\'');
  430. return Token.STRING_LITERAL;
  431. case '"':
  432. // FIXME: consider in the future
  433. /*
  434. if (state == ParseState.StartTag) {
  435. if (PeekChar () == '"') {
  436. // FIXME: this code is VERY inefficient
  437. ReadChar ();
  438. tokenValue = "\"";
  439. return Token.STRING_LITERAL;
  440. }
  441. return Token.QUOT;
  442. }
  443. */
  444. tokenValue = ReadQuoted ('"');
  445. return Token.STRING_LITERAL;
  446. case '=':
  447. return Token.EQUAL;
  448. case '<':
  449. // only happens when state is ElementContent
  450. // (otherwise it might be "/foo</bar")
  451. if (state == ParseState.ElementContent) {
  452. switch ((char) PeekChar ()) {
  453. case '/':
  454. ReadChar ();
  455. return Token.END_TAG_START;
  456. case '!':
  457. ReadChar ();
  458. switch (PeekChar ()) {
  459. case '-':
  460. ReadChar ();
  461. if (ReadChar () != '-')
  462. throw Error ("Invalid sequence of characters '<!-'.");
  463. return Token.XML_COMMENT_START;
  464. case '[':
  465. ReadChar ();
  466. Expect ("CDATA[");
  467. return Token.XML_CDATA_START;
  468. }
  469. throw Error ("Invalid sequence of characters '<!'.");
  470. case '?':
  471. ReadChar ();
  472. return Token.XML_PI_START;
  473. default:
  474. return Token.LESSER;
  475. }
  476. }
  477. switch (PeekChar ()) {
  478. case '<':
  479. ReadChar ();
  480. return Token.LESSER2;
  481. case '=':
  482. ReadChar ();
  483. return Token.LESSER_EQUAL;
  484. }
  485. return Token.LESSER;
  486. case '>':
  487. switch (PeekChar ()) {
  488. case '>':
  489. ReadChar ();
  490. return Token.GREATER2;
  491. case '=':
  492. ReadChar ();
  493. return Token.GREATER_EQUAL;
  494. }
  495. return Token.GREATER;
  496. case '|':
  497. return Token.BAR;
  498. case '*':
  499. if (PeekChar () == ':') {
  500. ReadChar ();
  501. // FIXME: more check
  502. tokenValue = new XmlQualifiedName (ReadOneToken (), "*");
  503. return Token.WILD_PREFIX;
  504. }
  505. return Token.ASTERISK;
  506. case '+':
  507. return Token.PLUS;
  508. case '-':
  509. return Token.MINUS;
  510. case '/':
  511. // only happens when state is StartTag
  512. // (otherwise it might be "/>$extvar")
  513. if (state == ParseState.StartTag && PeekChar () == '>') {
  514. ReadChar ();
  515. return Token.EMPTY_TAG_CLOSE;
  516. }
  517. if (PeekChar () == '/') {
  518. ReadChar ();
  519. return Token.SLASH2;
  520. }
  521. return Token.SLASH;
  522. case '?':
  523. return Token.QUESTION;
  524. case '@':
  525. return Token.AT;
  526. }
  527. peekChar = c;
  528. prefixName = null;
  529. string name = ReadOneToken ();
  530. tokenValue = name;
  531. bool validKeyword = false;
  532. switch (state) {
  533. case ParseState.XmlSpaceDecl:
  534. switch (name) {
  535. case "preserve":
  536. return Token.PRESERVE;
  537. case "strip":
  538. return Token.STRIP;
  539. }
  540. break;
  541. case ParseState.CloseKindTest:
  542. if (name == "nillable")
  543. return Token.NILLABLE;
  544. break;
  545. case ParseState.ExtKey:
  546. switch (name) {
  547. case "pragma":
  548. return Token.PRAGMA;
  549. case "extension":
  550. return Token.EXTENSION;
  551. }
  552. break;
  553. case ParseState.KindTest:
  554. switch (name) {
  555. case "context":
  556. return Token.CONTEXT;
  557. case "element":
  558. return Token.ELEMENT;
  559. case "global":
  560. return Token.GLOBAL;
  561. case "type":
  562. return Token.TYPE;
  563. }
  564. break;
  565. case ParseState.ItemType:
  566. switch (name) {
  567. case "attribute":
  568. return Token.ATTRIBUTE;
  569. case "comment":
  570. return Token.COMMENT;
  571. case "document-node":
  572. return Token.DOCUMENT_NODE;
  573. case "element":
  574. return Token.ELEMENT;
  575. case "empty":
  576. return Token.EMPTY;
  577. case "item":
  578. return Token.ITEM;
  579. case "node":
  580. return Token.NODE;
  581. case "processing-instruction":
  582. return Token.PROCESSING_INSTRUCTION;
  583. case "text":
  584. return Token.TEXT;
  585. }
  586. break;
  587. case ParseState.NamespaceKeyword:
  588. switch (name) {
  589. case "declare":
  590. return Token.DECLARE;
  591. case "default":
  592. return Token.DEFAULT;
  593. case "element":
  594. return Token.ELEMENT;
  595. case "function":
  596. return Token.FUNCTION;
  597. case "namespace":
  598. return Token.NAMESPACE;
  599. }
  600. break;
  601. case ParseState.OccurenceIndicator:
  602. case ParseState.Operator:
  603. switch (name) {
  604. case "and":
  605. case "as":
  606. case "ascending":
  607. case "at":
  608. case "base-uri":
  609. case "by":
  610. case "case":
  611. case "cast":
  612. case "castable":
  613. case "collation":
  614. case "declare":
  615. case "default":
  616. case "descending":
  617. case "div":
  618. case "element":
  619. case "else":
  620. case "empty":
  621. case "eq":
  622. case "every":
  623. case "except":
  624. case "external":
  625. case "for":
  626. case "function":
  627. case "ge":
  628. case "global":
  629. case "greatest":
  630. case "gt":
  631. case "idiv":
  632. case "import":
  633. case "in":
  634. case "instance":
  635. case "intersect":
  636. case "is":
  637. case "lax":
  638. case "le":
  639. case "least":
  640. case "let":
  641. case "lt":
  642. case "mod":
  643. case "module":
  644. case "namespace":
  645. case "ne":
  646. case "of":
  647. case "or":
  648. case "order":
  649. case "ordered":
  650. case "ordering":
  651. case "return":
  652. case "satisfies":
  653. case "schema":
  654. case "skip":
  655. case "some":
  656. case "stable":
  657. case "strict":
  658. case "then":
  659. case "to":
  660. case "treat":
  661. case "typwswitch":
  662. case "union":
  663. case "unordered":
  664. case "variable":
  665. case "where":
  666. case "xmlspace":
  667. validKeyword = true;
  668. break;
  669. }
  670. break;
  671. case ParseState.Default:
  672. switch (name) {
  673. case "ancestor":
  674. case "ancestor-or-self":
  675. case "as":
  676. case "attribute":
  677. case "base-uri":
  678. case "child":
  679. case "collation":
  680. case "comment":
  681. case "construction":
  682. case "declare":
  683. case "default":
  684. case "descendant":
  685. case "descendant-or-self":
  686. case "document":
  687. case "document-node":
  688. case "element":
  689. case "every":
  690. case "following":
  691. case "following-sibling":
  692. case "for":
  693. case "function":
  694. case "global":
  695. case "if":
  696. case "import":
  697. case "lax":
  698. case "let":
  699. case "module":
  700. case "namespace":
  701. case "node":
  702. case "ordered":
  703. case "parent":
  704. case "preceding":
  705. case "preceding-sibling":
  706. case "processing-instruction":
  707. case "schema":
  708. case "self":
  709. case "some":
  710. case "strict":
  711. case "strip":
  712. case "text":
  713. case "typeswitch":
  714. case "unordered":
  715. case "validate":
  716. case "validation":
  717. case "version":
  718. case "xmlspace":
  719. case "xquery":
  720. validKeyword = true;
  721. break;
  722. }
  723. break;
  724. }
  725. if (validKeyword) {
  726. switch (name) {
  727. case "xquery":
  728. return Token.XQUERY;
  729. case "version":
  730. return Token.VERSION;
  731. case "pragma":
  732. return Token.PRAGMA;
  733. case "extension":
  734. return Token.EXTENSION;
  735. case "module":
  736. return Token.MODULE;
  737. case "namespace":
  738. return Token.NAMESPACE;
  739. case "declare":
  740. return Token.DECLARE;
  741. case "xmlspace":
  742. return Token.XMLSPACE;
  743. case "preserve":
  744. return Token.PRESERVE;
  745. case "strip":
  746. return Token.STRIP;
  747. case "default":
  748. return Token.DEFAULT;
  749. case "construction":
  750. return Token.CONSTRUCTION;
  751. case "ordering":
  752. return Token.ORDERING;
  753. case "ordered":
  754. return Token.ORDERED;
  755. case "unordered":
  756. return Token.UNORDERED;
  757. case "document-node":
  758. return Token.DOCUMENT_NODE;
  759. case "document":
  760. return Token.DOCUMENT;
  761. case "element":
  762. return Token.ELEMENT;
  763. case "attribute":
  764. return Token.ATTRIBUTE;
  765. case "processing-instruction":
  766. return Token.PROCESSING_INSTRUCTION;
  767. case "comment":
  768. return Token.COMMENT;
  769. case "text":
  770. return Token.TEXT;
  771. case "node":
  772. return Token.NODE;
  773. case "function":
  774. return Token.FUNCTION;
  775. case "collation":
  776. return Token.COLLATION;
  777. case "base-uri":
  778. return Token.BASEURI;
  779. case "import":
  780. return Token.IMPORT;
  781. case "schema":
  782. return Token.SCHEMA;
  783. case "at":
  784. return Token.AT;
  785. case "variable":
  786. return Token.VARIABLE;
  787. case "as":
  788. return Token.AS;
  789. case "external":
  790. return Token.EXTERNAL;
  791. case "validation":
  792. return Token.VALIDATION;
  793. case "lax":
  794. return Token.LAX;
  795. case "strict":
  796. return Token.STRICT;
  797. case "skip":
  798. return Token.SKIP;
  799. case "return":
  800. return Token.RETURN;
  801. case "for":
  802. return Token.FOR;
  803. case "let":
  804. return Token.LET;
  805. case "in":
  806. return Token.IN;
  807. case "where":
  808. return Token.WHERE;
  809. case "order":
  810. return Token.ORDER;
  811. case "by":
  812. return Token.BY;
  813. case "stable":
  814. return Token.STABLE;
  815. case "ascending":
  816. return Token.ASCENDING;
  817. case "descending":
  818. return Token.DESCENDING;
  819. case "empty":
  820. return Token.EMPTY;
  821. case "greatest":
  822. return Token.GREATEST;
  823. case "least":
  824. return Token.LEAST;
  825. case "some":
  826. return Token.SOME;
  827. case "every":
  828. return Token.EVERY;
  829. case "satisfies":
  830. return Token.SATISFIES;
  831. case "is":
  832. return Token.IS;
  833. case "to":
  834. return Token.TO;
  835. case "eq":
  836. return Token.EQ;
  837. case "ne":
  838. return Token.NE;
  839. case "lt":
  840. return Token.LT;
  841. case "le":
  842. return Token.LE;
  843. case "gt":
  844. return Token.GT;
  845. case "ge":
  846. return Token.GE;
  847. case "and":
  848. return Token.AND;
  849. case "or":
  850. return Token.OR;
  851. case "instance":
  852. return Token.INSTANCE;
  853. case "of":
  854. return Token.OF;
  855. case "if":
  856. return Token.IF;
  857. case "then":
  858. return Token.THEN;
  859. case "else":
  860. return Token.ELSE;
  861. case "typeswitch":
  862. return Token.TYPESWITCH;
  863. case "case":
  864. return Token.CASE;
  865. case "treat":
  866. return Token.TREAT;
  867. case "castable":
  868. return Token.CASTABLE;
  869. case "cast":
  870. return Token.CAST;
  871. case "div":
  872. return Token.DIV;
  873. case "idiv":
  874. return Token.IDIV;
  875. case "mod":
  876. return Token.MOD;
  877. case "union":
  878. return Token.UNION;
  879. case "intersect":
  880. return Token.INTERSECT;
  881. case "except":
  882. return Token.EXCEPT;
  883. case "validate":
  884. return Token.VALIDATE;
  885. case "context":
  886. return Token.CONTEXT;
  887. case "nillable":
  888. return Token.NILLABLE;
  889. case "item":
  890. return Token.ITEM;
  891. case "global":
  892. return Token.GLOBAL;
  893. case "type":
  894. return Token.TYPE;
  895. case "child":
  896. return Token.CHILD;
  897. case "descendant":
  898. return Token.DESCENDANT;
  899. case "self":
  900. return Token.SELF;
  901. case "descendant-or-self":
  902. return Token.DESCENDANT_OR_SELF;
  903. case "following-sibling":
  904. return Token.FOLLOWING_SIBLING;
  905. case "following":
  906. return Token.FOLLOWING;
  907. case "parent":
  908. return Token.PARENT;
  909. case "ancestor":
  910. return Token.ANCESTOR;
  911. case "preceding":
  912. return Token.PRECEDING;
  913. case "preceding-sibling":
  914. return Token.PRECEDING_SIBLING;
  915. case "ancestor-or-self":
  916. return Token.ANCESTOR_OR_SELF;
  917. }
  918. }
  919. switch (state) {
  920. case ParseState.NamespaceDecl:
  921. case ParseState.NamespaceKeyword:
  922. case ParseState.XmlSpaceDecl:
  923. case ParseState.KindTestForPI:
  924. case ParseState.XmlPI:
  925. return Token.NCNAME;
  926. }
  927. if (PeekChar () == ':') {
  928. ReadChar ();
  929. prefixName = name;
  930. switch (PeekChar ()) {
  931. case '*':
  932. ReadChar ();
  933. name = "*";
  934. break;
  935. case '=': // ex. let foo:= ...
  936. ReadChar ();
  937. tokenValue = new XmlQualifiedName (name, nsResolver.DefaultNamespace);
  938. lookAheadToken = Token.COLON_EQUAL;
  939. return Token.QNAME;
  940. default:
  941. name = ReadOneToken ();
  942. break;
  943. }
  944. string ns = nsResolver.LookupNamespace (prefixName);
  945. if (ns == null)
  946. throw Error (String.Format ("Prefix '{0}' is not mapped to any namespace URI.", prefixName));
  947. tokenValue = new XmlQualifiedName (name, ns);
  948. prefixName = null;
  949. return name == "*" ? Token.WILD_LOCALNAME : Token.QNAME;
  950. }
  951. tokenValue = new XmlQualifiedName (name);
  952. return Token.QNAME;
  953. }
  954. private int PeekChar ()
  955. {
  956. if (peekChar == -1)
  957. peekChar = source.Read ();
  958. return peekChar;
  959. }
  960. private int ReadChar ()
  961. {
  962. int ret;
  963. if (peekChar != -1) {
  964. ret = peekChar;
  965. peekChar = -1;
  966. }
  967. else
  968. ret = source.Read ();
  969. if (nextIncrementLine) {
  970. line++;
  971. column = 0;
  972. nextIncrementLine = false;
  973. }
  974. column++;
  975. switch (ret) {
  976. case '\r':
  977. break;
  978. case '\n':
  979. nextIncrementLine = true;
  980. goto default;
  981. default:
  982. break;
  983. }
  984. return ret;
  985. }
  986. private void SkipWhitespaces ()
  987. {
  988. while (true) {
  989. switch (PeekChar ()) {
  990. case ' ':
  991. case '\t':
  992. case '\r':
  993. case '\n':
  994. ReadChar ();
  995. continue;
  996. default:
  997. return;
  998. }
  999. }
  1000. }
  1001. private void AddValueChar (char c)
  1002. {
  1003. if (bufferIndex == buffer.Length) {
  1004. char [] newBuf = new char [bufferIndex * 2];
  1005. Array.Copy (buffer, newBuf, bufferIndex);
  1006. buffer = newBuf;
  1007. }
  1008. buffer [bufferIndex++] = c;
  1009. }
  1010. private string CreateValueString ()
  1011. {
  1012. return new string (buffer, 0, bufferIndex);
  1013. }
  1014. private void Expect (string expected)
  1015. {
  1016. for (int i = 0; i < expected.Length; i++)
  1017. if (ReadChar () != expected [i])
  1018. throw Error (String.Format ("Expected token '{0}' did not appear.", expected));
  1019. }
  1020. // TODO: parse three quoted
  1021. private string ReadQuoted (char quoteChar)
  1022. {
  1023. bufferIndex = 0;
  1024. bool loop = true;
  1025. do {
  1026. int c = ReadChar ();
  1027. switch (c) {
  1028. case -1:
  1029. case '"':
  1030. if (quoteChar == '"')
  1031. loop = false;
  1032. break;
  1033. case '\'':
  1034. if (quoteChar == '\'')
  1035. loop = false;
  1036. break;
  1037. default:
  1038. AddValueChar ((char) c);
  1039. break;
  1040. }
  1041. } while (loop);
  1042. return CreateValueString ();
  1043. }
  1044. private decimal ReadDecimal (bool floatingPoint)
  1045. {
  1046. bufferIndex = 0;
  1047. do {
  1048. int c = PeekChar ();
  1049. if (c < 0) {
  1050. ReadChar ();
  1051. break;
  1052. }
  1053. // FIXME: more complex
  1054. if (Char.IsNumber ((char) c)) {
  1055. ReadChar ();
  1056. AddValueChar ((char) c);
  1057. continue;
  1058. }
  1059. else
  1060. break;
  1061. } while (true);
  1062. string s = (floatingPoint ? "" : ".") + CreateValueString ();
  1063. return decimal.Parse (s);
  1064. }
  1065. private string ReadOneToken ()
  1066. {
  1067. bufferIndex = 0;
  1068. bool loop = true;
  1069. do {
  1070. int c = PeekChar ();
  1071. switch (c) {
  1072. case -1:
  1073. case ' ':
  1074. case '\t':
  1075. case '\r':
  1076. case '\n':
  1077. loop = false;
  1078. break;
  1079. default:
  1080. if (!IsTokenContinuable (c)) {
  1081. if (c == ':') {
  1082. if (prefixName != null)
  1083. throw new XmlQueryCompileException ("Invalid colon was found.");
  1084. prefixName = CreateValueString ();
  1085. }
  1086. loop = false;
  1087. break;
  1088. }
  1089. ReadChar ();
  1090. AddValueChar ((char) c);
  1091. break;
  1092. }
  1093. } while (loop);
  1094. return CreateValueString ();
  1095. }
  1096. private bool IsTokenContinuable (int c)
  1097. {
  1098. switch (c) {
  1099. case '-':
  1100. case '_':
  1101. case '.':
  1102. return true;
  1103. }
  1104. return XmlChar.IsNCNameChar (c);
  1105. }
  1106. }
  1107. public enum WhitespaceHandling {
  1108. Arbitrary,
  1109. Explicit,
  1110. Significant
  1111. }
  1112. public enum ParseState {
  1113. Default,
  1114. Operator,
  1115. NamespaceDecl,
  1116. NamespaceKeyword,
  1117. XmlSpaceDecl,
  1118. ItemType,
  1119. KindTest,
  1120. KindTestForPI,
  1121. CloseKindTest,
  1122. OccurenceIndicator,
  1123. SchemaContextStep,
  1124. VarName,
  1125. StartTag,
  1126. ElementContent,
  1127. EndTag,
  1128. XmlComment,
  1129. ExprComment,
  1130. ExtKey,
  1131. XmlPI,
  1132. XmlPIContent,
  1133. CDataSection,
  1134. QuotAttributeContent,
  1135. AposAttributeContent,
  1136. }
  1137. }
  1138. #endif