XQueryTokenizer.cs 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224
  1. //
  2. // XQueryTokenizer.cs
  3. //
  4. // Author:
  5. // Atsushi Enomoto <[email protected]>
  6. //
  7. // Copyright (C) 2004 Novell, Inc (http://www.novell.com)
  8. //
  9. // Permission is hereby granted, free of charge, to any person obtaining
  10. // a copy of this software and associated documentation files (the
  11. // "Software"), to deal in the Software without restriction, including
  12. // without limitation the rights to use, copy, modify, merge, publish,
  13. // distribute, sublicense, and/or sell copies of the Software, and to
  14. // permit persons to whom the Software is furnished to do so, subject to
  15. // the following conditions:
  16. //
  17. // The above copyright notice and this permission notice shall be
  18. // included in all copies or substantial portions of the Software.
  19. //
  20. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  21. // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  22. // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  23. // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  24. // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  25. // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  26. // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  27. //
  28. #if NET_2_0
  29. using System;
  30. using System.Collections;
  31. using System.Collections.Generic;
  32. using System.IO;
  33. using System.Security.Policy;
  34. using System.Xml;
  35. using System.Xml.Query;
  36. using System.Xml.Schema;
  37. using System.Xml.XPath;
  38. using Mono.Xml.XQuery;
  39. using Mono.Xml.XPath2;
  40. namespace Mono.Xml.XQuery.Parser
  41. {
  42. // FIXME: make internal in the future
  43. public class XQueryTokenizer
  44. : Mono.Xml.XQuery.Parser.yyParser.yyInput, IXmlLineInfo
  45. {
  46. int line = 1;
  47. int column = 0;
  48. bool nextIncrementLine;
  49. // namespace resolver
  50. XmlNamespaceManager nsResolver;
  51. string defaultFunctionNamespace = XQueryFunction.Namespace;
  52. // input source
  53. TextReader source;
  54. int peekChar = -1;
  55. // token info
  56. int currentToken;
  57. string prefixName;
  58. object tokenValue;
  59. int lookAheadToken = -1;
  60. object lookAheadTokenValue;
  61. // state info
  62. WhitespaceHandling ws = WhitespaceHandling.Arbitrary;
  63. ParseState state = ParseState.Default;
  64. Stack stateStack;
  65. char [] buffer = new char [30];
  66. int bufferIndex;
  67. public XQueryTokenizer (TextReader reader)
  68. {
  69. this.source = reader;
  70. stateStack = new Stack ();
  71. nsResolver = new XmlNamespaceManager (new NameTable ());
  72. nsResolver.AddNamespace ("xs", XmlSchema.Namespace);
  73. nsResolver.AddNamespace ("xdt", XmlSchema.XdtNamespace);
  74. // FIXME: Are they really predefined?
  75. nsResolver.AddNamespace ("xsi", XmlSchema.InstanceNamespace);
  76. nsResolver.AddNamespace ("fn", "http://www.w3.org/2003/11/xpath-functions");
  77. nsResolver.AddNamespace ("local", "http://www.w3.org/2003/11/xquery-local-functions");
  78. }
  79. internal IXmlNamespaceResolver NSResolver {
  80. get { return nsResolver; }
  81. }
  82. internal string DefaultFunctionNamespace {
  83. get { return defaultFunctionNamespace; }
  84. set { defaultFunctionNamespace = value; }
  85. }
  86. public void AddNamespace (string prefix, string ns)
  87. {
  88. nsResolver.AddNamespace (prefix, ns);
  89. }
  90. public bool advance ()
  91. {
  92. if (currentToken < 0)
  93. return false;
  94. if (lookAheadToken >= 0) {
  95. tokenValue = lookAheadTokenValue;
  96. currentToken = lookAheadToken;
  97. lookAheadToken = -1;
  98. }
  99. else
  100. currentToken = ParseToken ();
  101. return currentToken >= 0;
  102. }
  103. public int token ()
  104. {
  105. return currentToken;
  106. }
  107. public object value ()
  108. {
  109. return tokenValue;
  110. }
  111. public bool HasLineInfo ()
  112. {
  113. return true;
  114. }
  115. public int LineNumber {
  116. get { return line; }
  117. }
  118. public int LinePosition {
  119. get { return column; }
  120. }
  121. internal WhitespaceHandling Space {
  122. get { return ws; }
  123. set { ws = value; }
  124. }
  125. internal ParseState State {
  126. get { return state; }
  127. set {
  128. // Console.Error.WriteLine ("**** eno **** state transition from {0} to {1}, stack count = {2}", state, value, stateStack.Count);
  129. //foreach (ParseState ps in stateStack.ToArray ()) Console.Error.WriteLine ("***** eno ***** " + ps);
  130. state = value;
  131. }
  132. }
  133. internal void PushState (ParseState newState)
  134. {
  135. stateStack.Push (newState);
  136. // Console.Error.WriteLine ("**** eno **** state pushed {0}, added stack count = {1}", newState, stateStack.Count);
  137. //foreach (ParseState ps in stateStack.ToArray ()) Console.Error.WriteLine ("***** eno ***** " + ps);
  138. }
  139. internal void PopState ()
  140. {
  141. if (stateStack.Count == 0)
  142. throw Error ("Internal state transition error. State stack is empty.");
  143. state = (ParseState) stateStack.Pop ();
  144. // Console.Error.WriteLine ("**** eno **** state pop, now as {0}, stack count = {1}", state, stateStack.Count);
  145. //foreach (ParseState ps in stateStack.ToArray ()) Console.Error.WriteLine ("***** eno ***** " + ps);
  146. }
  147. private XmlQueryCompileException Error (string message)
  148. {
  149. return new XmlQueryCompileException (message, this, null, null);
  150. }
  151. private int ParseToken ()
  152. {
  153. bufferIndex = 0;
  154. switch (ws) {
  155. case WhitespaceHandling.Arbitrary:
  156. SkipWhitespaces ();
  157. break;
  158. case WhitespaceHandling.Explicit:
  159. if (!XmlChar.IsWhitespace (PeekChar ()))
  160. throw Error ("Whitespace is required.");
  161. goto case WhitespaceHandling.Arbitrary;
  162. }
  163. int c = PeekChar ();
  164. if (c < 0)
  165. return -1;
  166. // FIXME: consider DOUBLE_LITERAL
  167. if (Char.IsNumber ((char) c)) {
  168. tokenValue = ReadDecimal (false);
  169. return Token.DECIMAL_LITERAL;
  170. }
  171. switch (state) {
  172. case ParseState.OccurenceIndicator:
  173. return ParseOccurenceIndicator ();
  174. case ParseState.XmlPIContent:
  175. return ParseXmlPIContent ();
  176. case ParseState.XmlComment:
  177. return ParseXmlCommentContent ();
  178. case ParseState.ElementContent:
  179. return ParseElementContent ();
  180. default:
  181. return ParseDefault ();
  182. }
  183. }
  184. private int ParseXQueryComment ()
  185. {
  186. while (true) {
  187. int c = ReadChar ();
  188. if (c < 0)
  189. throw Error ("Unexpected end of query text inside XML processing instruction content");
  190. if (c == ':') {
  191. if (PeekChar () == ')') {
  192. ReadChar ();
  193. tokenValue = CreateValueString ();
  194. return Token.XML_PI_TO_END;
  195. }
  196. else
  197. AddValueChar (':');
  198. }
  199. else
  200. AddValueChar ((char) c);
  201. }
  202. }
  203. private int ParseXmlPIContent ()
  204. {
  205. while (true) {
  206. int c = ReadChar ();
  207. if (c < 0)
  208. throw Error ("Unexpected end of query text inside XML processing instruction content");
  209. if (c == '?') {
  210. if (PeekChar () == '>') {
  211. ReadChar ();
  212. tokenValue = CreateValueString ();
  213. return Token.XML_PI_TO_END;
  214. }
  215. else
  216. AddValueChar ('?');
  217. }
  218. else
  219. AddValueChar ((char) c);
  220. }
  221. }
  222. private int ParseXmlCommentContent ()
  223. {
  224. // FIXME: handle ---> correctly
  225. while (true) {
  226. int c = ReadChar ();
  227. if (c < 0)
  228. throw Error ("Unexpected end of query text inside XML comment content");
  229. if (c == '-') {
  230. if (PeekChar () == '-') {
  231. ReadChar ();
  232. if (PeekChar () == '>') {
  233. tokenValue = CreateValueString ();
  234. return Token.XML_COMMENT_TO_END;
  235. } else {
  236. AddValueChar ('-');
  237. AddValueChar ('-');
  238. }
  239. }
  240. else
  241. AddValueChar ('-');
  242. }
  243. else
  244. AddValueChar ((char) c);
  245. }
  246. }
  247. private int ParseXmlCDataContent ()
  248. {
  249. // FIXME: handle ]]]> correctly
  250. while (true) {
  251. int c = ReadChar ();
  252. if (c < 0)
  253. throw Error ("Unexpected end of query text inside XML CDATA section content");
  254. if (c == ']') {
  255. ReadChar ();
  256. if (PeekChar () == ']') {
  257. ReadChar ();
  258. if (PeekChar () == '>') {
  259. tokenValue = CreateValueString ();
  260. return Token.XML_CDATA_TO_END;
  261. } else {
  262. AddValueChar (']');
  263. AddValueChar (']');
  264. }
  265. }
  266. else
  267. AddValueChar (']');
  268. }
  269. else
  270. AddValueChar ((char) c);
  271. }
  272. }
  273. private int ParseElementContent ()
  274. {
  275. tokenValue = null;
  276. int c = PeekChar ();
  277. if (c < 0)
  278. throw Error ("Unexpected end of query text inside XML processing instruction content");
  279. switch ((char) c) {
  280. case '<':
  281. case '{':
  282. return ParseDefault ();
  283. }
  284. while (true) {
  285. c = PeekChar ();
  286. if (c < 0)
  287. throw Error ("Unexpected end of query text inside XML processing instruction content");
  288. switch ((char) c) {
  289. case '&':
  290. ReadChar ();
  291. ReadPredefinedEntity ();
  292. continue;
  293. case '<':
  294. tokenValue += CreateValueString ();
  295. return Token.ELEM_CONTENT_LITERAL;
  296. default:
  297. AddValueChar ((char) c);
  298. ReadChar ();
  299. continue;
  300. }
  301. }
  302. }
  303. private void ReadPredefinedEntity ()
  304. {
  305. string token = ReadOneToken ();
  306. Expect (";");
  307. switch (token) {
  308. case "lt":
  309. AddValueChar ('<');
  310. return;
  311. case "gt":
  312. AddValueChar ('>');
  313. return;
  314. case "amp":
  315. AddValueChar ('&');
  316. return;
  317. case "quot":
  318. AddValueChar ('"');
  319. return;
  320. case "apos":
  321. AddValueChar ('\'');
  322. return;
  323. default:
  324. throw Error (String.Format ("Unexpected general entity name: {0} .", token));
  325. }
  326. }
  327. // FIXME: not used as yet
  328. private int ParseExtContent ()
  329. {
  330. // FIXME: handle :::) correctly
  331. while (true) {
  332. int c = PeekChar ();
  333. if (c < 0)
  334. throw Error ("Unexpected end of query text inside external content");
  335. if (c == ':') {
  336. ReadChar ();
  337. if (PeekChar () == ':') {
  338. ReadChar ();
  339. if (PeekChar () == ')') {
  340. tokenValue = CreateValueString ();
  341. return Token.EXT_CONTENT;
  342. } else {
  343. AddValueChar (':');
  344. AddValueChar (':');
  345. }
  346. }
  347. else
  348. AddValueChar (':');
  349. }
  350. else
  351. AddValueChar ((char) c);
  352. }
  353. }
  354. private int ParseOccurenceIndicator ()
  355. {
  356. state = ParseState.Operator;
  357. switch (PeekChar ()) {
  358. case '?':
  359. ReadChar ();
  360. return Token.QUESTION;
  361. case '*':
  362. ReadChar ();
  363. return Token.ASTERISK;
  364. case '+':
  365. ReadChar ();
  366. return Token.PLUS;
  367. default:
  368. return ParseOperator ();
  369. }
  370. }
  371. private int ParseOperator ()
  372. {
  373. // TODO: implement
  374. return ParseDefault ();
  375. }
  376. private int ParseDefault ()
  377. {
  378. int c = ReadChar ();
  379. switch (c) {
  380. case '.':
  381. if (PeekChar () == '.') {
  382. ReadChar ();
  383. return Token.DOT2;
  384. }
  385. else if (Char.IsNumber ((char) PeekChar ())) {
  386. tokenValue = ReadDecimal (true);
  387. }
  388. return Token.DOT;
  389. case ',':
  390. return Token.COMMA;
  391. case ';':
  392. return Token.SEMICOLON;
  393. case '(':
  394. if (PeekChar () == ':') {
  395. ReadChar ();
  396. if (PeekChar () == ':') {
  397. ReadChar ();
  398. return Token.PRAGMA_OPEN;
  399. }
  400. ParseXQueryComment ();
  401. return ParseToken (); // start again
  402. }
  403. return Token.OPEN_PAREN;
  404. case ')':
  405. return Token.CLOSE_PAREN;
  406. case ':':
  407. switch (PeekChar ()) {
  408. case ':':
  409. ReadChar ();
  410. if (PeekChar () == ')') {
  411. ReadChar ();
  412. return Token.PRAGMA_CLOSE;
  413. }
  414. return Token.COLON2;
  415. case ')':
  416. ReadChar ();
  417. return Token.CLOSE_PAREN_COLON;
  418. case '=':
  419. ReadChar ();
  420. return Token.COLON_EQUAL;
  421. }
  422. return Token.COLON;
  423. case '[':
  424. return Token.OPEN_BRACKET;
  425. case ']':
  426. return Token.CLOSE_BRACKET;
  427. case '{':
  428. return Token.OPEN_CURLY;
  429. case '}':
  430. return Token.CLOSE_CURLY;
  431. case '$':
  432. return Token.DOLLAR;
  433. case '\'':
  434. // FIXME: consider in the future
  435. /*
  436. if (state == ParseState.StartTag) {
  437. if (PeekChar () == '\'') {
  438. // FIXME: this code is VERY inefficient
  439. ReadChar ();
  440. tokenValue = "'";
  441. return Token.STRING_LITERAL;
  442. }
  443. return Token.APOS;
  444. }
  445. */
  446. tokenValue = ReadQuoted ('\'');
  447. return Token.STRING_LITERAL;
  448. case '"':
  449. // FIXME: consider in the future
  450. /*
  451. if (state == ParseState.StartTag) {
  452. if (PeekChar () == '"') {
  453. // FIXME: this code is VERY inefficient
  454. ReadChar ();
  455. tokenValue = "\"";
  456. return Token.STRING_LITERAL;
  457. }
  458. return Token.QUOT;
  459. }
  460. */
  461. tokenValue = ReadQuoted ('"');
  462. return Token.STRING_LITERAL;
  463. case '=':
  464. return Token.EQUAL;
  465. case '<':
  466. // only happens when state is ElementContent
  467. // (otherwise it might be "/foo</bar")
  468. if (state == ParseState.ElementContent) {
  469. switch ((char) PeekChar ()) {
  470. case '/':
  471. ReadChar ();
  472. return Token.END_TAG_START;
  473. case '!':
  474. ReadChar ();
  475. switch (PeekChar ()) {
  476. case '-':
  477. ReadChar ();
  478. if (ReadChar () != '-')
  479. throw Error ("Invalid sequence of characters '<!-'.");
  480. return Token.XML_COMMENT_START;
  481. case '[':
  482. ReadChar ();
  483. Expect ("CDATA[");
  484. return Token.XML_CDATA_START;
  485. }
  486. throw Error ("Invalid sequence of characters '<!'.");
  487. case '?':
  488. ReadChar ();
  489. return Token.XML_PI_START;
  490. default:
  491. return Token.LESSER;
  492. }
  493. }
  494. switch (PeekChar ()) {
  495. case '<':
  496. ReadChar ();
  497. return Token.LESSER2;
  498. case '=':
  499. ReadChar ();
  500. return Token.LESSER_EQUAL;
  501. }
  502. return Token.LESSER;
  503. case '>':
  504. switch (PeekChar ()) {
  505. case '>':
  506. ReadChar ();
  507. return Token.GREATER2;
  508. case '=':
  509. ReadChar ();
  510. return Token.GREATER_EQUAL;
  511. }
  512. return Token.GREATER;
  513. case '|':
  514. return Token.BAR;
  515. case '*':
  516. if (PeekChar () == ':') {
  517. ReadChar ();
  518. // FIXME: more check
  519. tokenValue = new XmlQualifiedName (ReadOneToken (), "*");
  520. return Token.WILD_PREFIX;
  521. }
  522. return Token.ASTERISK;
  523. case '+':
  524. return Token.PLUS;
  525. case '-':
  526. return Token.MINUS;
  527. case '/':
  528. // only happens when state is StartTag
  529. // (otherwise it might be "/>$extvar")
  530. if (state == ParseState.StartTag && PeekChar () == '>') {
  531. ReadChar ();
  532. return Token.EMPTY_TAG_CLOSE;
  533. }
  534. if (PeekChar () == '/') {
  535. ReadChar ();
  536. return Token.SLASH2;
  537. }
  538. return Token.SLASH;
  539. case '?':
  540. return Token.QUESTION;
  541. case '@':
  542. return Token.AT;
  543. }
  544. peekChar = c;
  545. prefixName = null;
  546. string name = ReadOneToken ();
  547. tokenValue = name;
  548. bool validKeyword = false;
  549. switch (state) {
  550. case ParseState.XmlSpaceDecl:
  551. switch (name) {
  552. case "preserve":
  553. return Token.PRESERVE;
  554. case "strip":
  555. return Token.STRIP;
  556. }
  557. break;
  558. case ParseState.CloseKindTest:
  559. if (name == "nillable")
  560. return Token.NILLABLE;
  561. break;
  562. case ParseState.ExtKey:
  563. switch (name) {
  564. case "pragma":
  565. return Token.PRAGMA;
  566. case "extension":
  567. return Token.EXTENSION;
  568. }
  569. break;
  570. case ParseState.KindTest:
  571. switch (name) {
  572. case "context":
  573. return Token.CONTEXT;
  574. case "element":
  575. return Token.ELEMENT;
  576. case "global":
  577. return Token.GLOBAL;
  578. case "type":
  579. return Token.TYPE;
  580. }
  581. break;
  582. case ParseState.ItemType:
  583. switch (name) {
  584. case "attribute":
  585. return Token.ATTRIBUTE;
  586. case "comment":
  587. return Token.COMMENT;
  588. case "document-node":
  589. return Token.DOCUMENT_NODE;
  590. case "element":
  591. return Token.ELEMENT;
  592. case "empty":
  593. return Token.EMPTY;
  594. case "item":
  595. return Token.ITEM;
  596. case "node":
  597. return Token.NODE;
  598. case "processing-instruction":
  599. return Token.PROCESSING_INSTRUCTION;
  600. case "text":
  601. return Token.TEXT;
  602. }
  603. break;
  604. case ParseState.NamespaceKeyword:
  605. switch (name) {
  606. case "declare":
  607. return Token.DECLARE;
  608. case "default":
  609. return Token.DEFAULT;
  610. case "element":
  611. return Token.ELEMENT;
  612. case "function":
  613. return Token.FUNCTION;
  614. case "namespace":
  615. return Token.NAMESPACE;
  616. }
  617. break;
  618. case ParseState.OccurenceIndicator:
  619. case ParseState.Operator:
  620. switch (name) {
  621. case "and":
  622. case "as":
  623. case "ascending":
  624. case "at":
  625. case "base-uri":
  626. case "by":
  627. case "case":
  628. case "cast":
  629. case "castable":
  630. case "collation":
  631. case "declare":
  632. case "default":
  633. case "descending":
  634. case "div":
  635. case "element":
  636. case "else":
  637. case "empty":
  638. case "eq":
  639. case "every":
  640. case "except":
  641. case "external":
  642. case "for":
  643. case "function":
  644. case "ge":
  645. case "global":
  646. case "greatest":
  647. case "gt":
  648. case "idiv":
  649. case "import":
  650. case "in":
  651. case "instance":
  652. case "intersect":
  653. case "is":
  654. case "lax":
  655. case "le":
  656. case "least":
  657. case "let":
  658. case "lt":
  659. case "mod":
  660. case "module":
  661. case "namespace":
  662. case "ne":
  663. case "of":
  664. case "or":
  665. case "order":
  666. case "ordered":
  667. case "ordering":
  668. case "return":
  669. case "satisfies":
  670. case "schema":
  671. case "skip":
  672. case "some":
  673. case "stable":
  674. case "strict":
  675. case "then":
  676. case "to":
  677. case "treat":
  678. case "typwswitch":
  679. case "union":
  680. case "unordered":
  681. case "variable":
  682. case "where":
  683. case "xmlspace":
  684. validKeyword = true;
  685. break;
  686. }
  687. break;
  688. case ParseState.Default:
  689. switch (name) {
  690. case "ancestor":
  691. case "ancestor-or-self":
  692. case "as":
  693. case "attribute":
  694. case "base-uri":
  695. case "child":
  696. case "collation":
  697. case "comment":
  698. case "construction":
  699. case "declare":
  700. case "default":
  701. case "descendant":
  702. case "descendant-or-self":
  703. case "document":
  704. case "document-node":
  705. case "element":
  706. case "every":
  707. case "following":
  708. case "following-sibling":
  709. case "for":
  710. case "function":
  711. case "global":
  712. case "if":
  713. case "import":
  714. case "lax":
  715. case "let":
  716. case "module":
  717. case "namespace":
  718. case "node":
  719. case "ordered":
  720. case "parent":
  721. case "preceding":
  722. case "preceding-sibling":
  723. case "processing-instruction":
  724. case "schema":
  725. case "self":
  726. case "some":
  727. case "strict":
  728. case "strip":
  729. case "text":
  730. case "typeswitch":
  731. case "unordered":
  732. case "validate":
  733. case "validation":
  734. case "version":
  735. case "xmlspace":
  736. case "xquery":
  737. validKeyword = true;
  738. break;
  739. }
  740. break;
  741. }
  742. if (validKeyword) {
  743. switch (name) {
  744. case "xquery":
  745. return Token.XQUERY;
  746. case "version":
  747. return Token.VERSION;
  748. case "pragma":
  749. return Token.PRAGMA;
  750. case "extension":
  751. return Token.EXTENSION;
  752. case "module":
  753. return Token.MODULE;
  754. case "namespace":
  755. return Token.NAMESPACE;
  756. case "declare":
  757. return Token.DECLARE;
  758. case "xmlspace":
  759. return Token.XMLSPACE;
  760. case "preserve":
  761. return Token.PRESERVE;
  762. case "strip":
  763. return Token.STRIP;
  764. case "default":
  765. return Token.DEFAULT;
  766. case "construction":
  767. return Token.CONSTRUCTION;
  768. case "ordering":
  769. return Token.ORDERING;
  770. case "ordered":
  771. return Token.ORDERED;
  772. case "unordered":
  773. return Token.UNORDERED;
  774. case "document-node":
  775. return Token.DOCUMENT_NODE;
  776. case "document":
  777. return Token.DOCUMENT;
  778. case "element":
  779. return Token.ELEMENT;
  780. case "attribute":
  781. return Token.ATTRIBUTE;
  782. case "processing-instruction":
  783. return Token.PROCESSING_INSTRUCTION;
  784. case "comment":
  785. return Token.COMMENT;
  786. case "text":
  787. return Token.TEXT;
  788. case "node":
  789. return Token.NODE;
  790. case "function":
  791. return Token.FUNCTION;
  792. case "collation":
  793. return Token.COLLATION;
  794. case "base-uri":
  795. return Token.BASEURI;
  796. case "import":
  797. return Token.IMPORT;
  798. case "schema":
  799. return Token.SCHEMA;
  800. case "at":
  801. return Token.AT;
  802. case "variable":
  803. return Token.VARIABLE;
  804. case "as":
  805. return Token.AS;
  806. case "external":
  807. return Token.EXTERNAL;
  808. case "validation":
  809. return Token.VALIDATION;
  810. case "lax":
  811. return Token.LAX;
  812. case "strict":
  813. return Token.STRICT;
  814. case "skip":
  815. return Token.SKIP;
  816. case "return":
  817. return Token.RETURN;
  818. case "for":
  819. return Token.FOR;
  820. case "let":
  821. return Token.LET;
  822. case "in":
  823. return Token.IN;
  824. case "where":
  825. return Token.WHERE;
  826. case "order":
  827. return Token.ORDER;
  828. case "by":
  829. return Token.BY;
  830. case "stable":
  831. return Token.STABLE;
  832. case "ascending":
  833. return Token.ASCENDING;
  834. case "descending":
  835. return Token.DESCENDING;
  836. case "empty":
  837. return Token.EMPTY;
  838. case "greatest":
  839. return Token.GREATEST;
  840. case "least":
  841. return Token.LEAST;
  842. case "some":
  843. return Token.SOME;
  844. case "every":
  845. return Token.EVERY;
  846. case "satisfies":
  847. return Token.SATISFIES;
  848. case "is":
  849. return Token.IS;
  850. case "to":
  851. return Token.TO;
  852. case "eq":
  853. return Token.EQ;
  854. case "ne":
  855. return Token.NE;
  856. case "lt":
  857. return Token.LT;
  858. case "le":
  859. return Token.LE;
  860. case "gt":
  861. return Token.GT;
  862. case "ge":
  863. return Token.GE;
  864. case "and":
  865. return Token.AND;
  866. case "or":
  867. return Token.OR;
  868. case "instance":
  869. return Token.INSTANCE;
  870. case "of":
  871. return Token.OF;
  872. case "if":
  873. return Token.IF;
  874. case "then":
  875. return Token.THEN;
  876. case "else":
  877. return Token.ELSE;
  878. case "typeswitch":
  879. return Token.TYPESWITCH;
  880. case "case":
  881. return Token.CASE;
  882. case "treat":
  883. return Token.TREAT;
  884. case "castable":
  885. return Token.CASTABLE;
  886. case "cast":
  887. return Token.CAST;
  888. case "div":
  889. return Token.DIV;
  890. case "idiv":
  891. return Token.IDIV;
  892. case "mod":
  893. return Token.MOD;
  894. case "union":
  895. return Token.UNION;
  896. case "intersect":
  897. return Token.INTERSECT;
  898. case "except":
  899. return Token.EXCEPT;
  900. case "validate":
  901. return Token.VALIDATE;
  902. case "context":
  903. return Token.CONTEXT;
  904. case "nillable":
  905. return Token.NILLABLE;
  906. case "item":
  907. return Token.ITEM;
  908. case "global":
  909. return Token.GLOBAL;
  910. case "type":
  911. return Token.TYPE;
  912. case "child":
  913. return Token.CHILD;
  914. case "descendant":
  915. return Token.DESCENDANT;
  916. case "self":
  917. return Token.SELF;
  918. case "descendant-or-self":
  919. return Token.DESCENDANT_OR_SELF;
  920. case "following-sibling":
  921. return Token.FOLLOWING_SIBLING;
  922. case "following":
  923. return Token.FOLLOWING;
  924. case "parent":
  925. return Token.PARENT;
  926. case "ancestor":
  927. return Token.ANCESTOR;
  928. case "preceding":
  929. return Token.PRECEDING;
  930. case "preceding-sibling":
  931. return Token.PRECEDING_SIBLING;
  932. case "ancestor-or-self":
  933. return Token.ANCESTOR_OR_SELF;
  934. }
  935. }
  936. switch (state) {
  937. case ParseState.NamespaceDecl:
  938. case ParseState.NamespaceKeyword:
  939. case ParseState.XmlSpaceDecl:
  940. case ParseState.KindTestForPI:
  941. case ParseState.XmlPI:
  942. return Token.NCNAME;
  943. }
  944. if (PeekChar () == ':') {
  945. ReadChar ();
  946. prefixName = name;
  947. switch (PeekChar ()) {
  948. case '*':
  949. ReadChar ();
  950. name = "*";
  951. break;
  952. case '=': // ex. let foo:= ...
  953. ReadChar ();
  954. tokenValue = new XmlQualifiedName (name, nsResolver.DefaultNamespace);
  955. lookAheadToken = Token.COLON_EQUAL;
  956. return Token.QNAME;
  957. default:
  958. name = ReadOneToken ();
  959. break;
  960. }
  961. string ns = nsResolver.LookupNamespace (prefixName);
  962. if (ns == null)
  963. throw Error (String.Format ("Prefix '{0}' is not mapped to any namespace URI.", prefixName));
  964. tokenValue = new XmlQualifiedName (name, ns);
  965. prefixName = null;
  966. return name == "*" ? Token.WILD_LOCALNAME : Token.QNAME;
  967. }
  968. tokenValue = new XmlQualifiedName (name);
  969. return Token.QNAME;
  970. }
  971. private int PeekChar ()
  972. {
  973. if (peekChar == -1)
  974. peekChar = source.Read ();
  975. return peekChar;
  976. }
  977. private int ReadChar ()
  978. {
  979. int ret;
  980. if (peekChar != -1) {
  981. ret = peekChar;
  982. peekChar = -1;
  983. }
  984. else
  985. ret = source.Read ();
  986. if (nextIncrementLine) {
  987. line++;
  988. column = 0;
  989. nextIncrementLine = false;
  990. }
  991. column++;
  992. switch (ret) {
  993. case '\r':
  994. break;
  995. case '\n':
  996. nextIncrementLine = true;
  997. goto default;
  998. default:
  999. break;
  1000. }
  1001. return ret;
  1002. }
  1003. private void SkipWhitespaces ()
  1004. {
  1005. while (true) {
  1006. switch (PeekChar ()) {
  1007. case ' ':
  1008. case '\t':
  1009. case '\r':
  1010. case '\n':
  1011. ReadChar ();
  1012. continue;
  1013. default:
  1014. return;
  1015. }
  1016. }
  1017. }
  1018. private void AddValueChar (char c)
  1019. {
  1020. if (bufferIndex == buffer.Length) {
  1021. char [] newBuf = new char [bufferIndex * 2];
  1022. Array.Copy (buffer, newBuf, bufferIndex);
  1023. buffer = newBuf;
  1024. }
  1025. buffer [bufferIndex++] = c;
  1026. }
  1027. private string CreateValueString ()
  1028. {
  1029. return new string (buffer, 0, bufferIndex);
  1030. }
  1031. private void Expect (string expected)
  1032. {
  1033. for (int i = 0; i < expected.Length; i++)
  1034. if (ReadChar () != expected [i])
  1035. throw Error (String.Format ("Expected token '{0}' did not appear.", expected));
  1036. }
  1037. // TODO: parse three quoted
  1038. private string ReadQuoted (char quoteChar)
  1039. {
  1040. bufferIndex = 0;
  1041. bool loop = true;
  1042. do {
  1043. int c = ReadChar ();
  1044. switch (c) {
  1045. case -1:
  1046. case '"':
  1047. if (quoteChar == '"')
  1048. loop = false;
  1049. break;
  1050. case '\'':
  1051. if (quoteChar == '\'')
  1052. loop = false;
  1053. break;
  1054. default:
  1055. AddValueChar ((char) c);
  1056. break;
  1057. }
  1058. } while (loop);
  1059. return CreateValueString ();
  1060. }
  1061. private decimal ReadDecimal (bool floatingPoint)
  1062. {
  1063. bufferIndex = 0;
  1064. bool cond = true;
  1065. do {
  1066. int c = PeekChar ();
  1067. if (c < 0) {
  1068. cond = false;
  1069. }
  1070. // FIXME: more complex
  1071. else if (Char.IsNumber ((char) c) || c == '.') {
  1072. ReadChar ();
  1073. AddValueChar ((char) c);
  1074. continue;
  1075. }
  1076. else
  1077. cond = false;
  1078. } while (cond);
  1079. string s = (floatingPoint ? "." : "") + CreateValueString ();
  1080. return decimal.Parse (s);
  1081. }
  1082. private string ReadOneToken ()
  1083. {
  1084. bufferIndex = 0;
  1085. bool loop = true;
  1086. do {
  1087. int c = PeekChar ();
  1088. switch (c) {
  1089. case -1:
  1090. case ' ':
  1091. case '\t':
  1092. case '\r':
  1093. case '\n':
  1094. loop = false;
  1095. break;
  1096. default:
  1097. if (!IsTokenContinuable (c)) {
  1098. if (c == ':') {
  1099. if (prefixName != null)
  1100. throw new XmlQueryCompileException ("Invalid colon was found.");
  1101. prefixName = CreateValueString ();
  1102. }
  1103. loop = false;
  1104. break;
  1105. }
  1106. ReadChar ();
  1107. AddValueChar ((char) c);
  1108. break;
  1109. }
  1110. } while (loop);
  1111. return CreateValueString ();
  1112. }
  1113. private bool IsTokenContinuable (int c)
  1114. {
  1115. switch (c) {
  1116. case '-':
  1117. case '_':
  1118. case '.':
  1119. return true;
  1120. }
  1121. return XmlChar.IsNCNameChar (c);
  1122. }
  1123. }
  1124. public enum WhitespaceHandling {
  1125. Arbitrary,
  1126. Explicit,
  1127. Significant
  1128. }
  1129. public enum ParseState {
  1130. Default,
  1131. Operator,
  1132. NamespaceDecl,
  1133. NamespaceKeyword,
  1134. XmlSpaceDecl,
  1135. ItemType,
  1136. KindTest,
  1137. KindTestForPI,
  1138. CloseKindTest,
  1139. OccurenceIndicator,
  1140. SchemaContextStep,
  1141. VarName,
  1142. StartTag,
  1143. ElementContent,
  1144. EndTag,
  1145. XmlComment,
  1146. ExprComment,
  1147. ExtKey,
  1148. XmlPI,
  1149. XmlPIContent,
  1150. CDataSection,
  1151. QuotAttributeContent,
  1152. AposAttributeContent,
  1153. }
  1154. }
  1155. #endif