XQueryTokenizer.cs 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284
  1. //
  2. // XQueryTokenizer.cs
  3. //
  4. // Author:
  5. // Atsushi Enomoto <[email protected]>
  6. //
  7. // Copyright (C) 2004 Novell, Inc (http://www.novell.com)
  8. //
  9. // Permission is hereby granted, free of charge, to any person obtaining
  10. // a copy of this software and associated documentation files (the
  11. // "Software"), to deal in the Software without restriction, including
  12. // without limitation the rights to use, copy, modify, merge, publish,
  13. // distribute, sublicense, and/or sell copies of the Software, and to
  14. // permit persons to whom the Software is furnished to do so, subject to
  15. // the following conditions:
  16. //
  17. // The above copyright notice and this permission notice shall be
  18. // included in all copies or substantial portions of the Software.
  19. //
  20. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  21. // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  22. // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  23. // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  24. // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  25. // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  26. // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  27. //
  28. #if NET_2_0
  29. using System;
  30. using System.Collections;
  31. using System.Collections.Generic;
  32. using System.IO;
  33. using System.Security.Policy;
  34. using System.Xml;
  35. using System.Xml.Query;
  36. using System.Xml.Schema;
  37. using System.Xml.XPath;
  38. using Mono.Xml.XQuery;
  39. using Mono.Xml.XPath2;
  40. namespace Mono.Xml.XQuery.Parser
  41. {
  42. // FIXME: make internal in the future
  43. public class XQueryTokenizer
  44. : Mono.Xml.XQuery.Parser.yyParser.yyInput, IXmlLineInfo
  45. {
  46. int line = 1;
  47. int column = 0;
  48. bool nextIncrementLine;
  49. // namespace resolver
  50. XmlNamespaceManager nsResolver;
  51. string defaultFunctionNamespace = XQueryFunction.Namespace;
  52. // input source
  53. TextReader source;
  54. int peekChar = -1;
  55. // token info
  56. int currentToken;
  57. string prefixName;
  58. object tokenValue;
  59. int lookAheadToken = -1;
  60. object lookAheadTokenValue;
  61. // state info
  62. WhitespaceHandling ws = WhitespaceHandling.Arbitrary;
  63. ParseState state = ParseState.Default;
  64. Stack stateStack;
  65. char [] buffer = new char [30];
  66. int bufferIndex;
  67. public XQueryTokenizer (TextReader reader)
  68. {
  69. this.source = reader;
  70. stateStack = new Stack ();
  71. nsResolver = new XmlNamespaceManager (new NameTable ());
  72. nsResolver.AddNamespace ("xs", XmlSchema.Namespace);
  73. nsResolver.AddNamespace ("xdt", XmlSchema.XdtNamespace);
  74. // FIXME: Are they really predefined?
  75. nsResolver.AddNamespace ("xsi", XmlSchema.InstanceNamespace);
  76. nsResolver.AddNamespace ("fn", "http://www.w3.org/2003/11/xpath-functions");
  77. nsResolver.AddNamespace ("local", "http://www.w3.org/2003/11/xquery-local-functions");
  78. }
  79. internal IXmlNamespaceResolver NSResolver {
  80. get { return nsResolver; }
  81. }
  82. internal string DefaultFunctionNamespace {
  83. get { return defaultFunctionNamespace; }
  84. set { defaultFunctionNamespace = value; }
  85. }
  86. public void AddNamespace (string prefix, string ns)
  87. {
  88. nsResolver.AddNamespace (prefix, ns);
  89. }
  90. public bool advance ()
  91. {
  92. if (currentToken < 0)
  93. return false;
  94. if (lookAheadToken >= 0) {
  95. tokenValue = lookAheadTokenValue;
  96. currentToken = lookAheadToken;
  97. lookAheadToken = -1;
  98. }
  99. else
  100. currentToken = ParseToken ();
  101. return currentToken >= 0;
  102. }
  103. public int token ()
  104. {
  105. return currentToken;
  106. }
  107. public object value ()
  108. {
  109. return tokenValue;
  110. }
  111. public bool HasLineInfo ()
  112. {
  113. return true;
  114. }
  115. public int LineNumber {
  116. get { return line; }
  117. }
  118. public int LinePosition {
  119. get { return column; }
  120. }
  121. internal WhitespaceHandling Space {
  122. get { return ws; }
  123. set { ws = value; }
  124. }
  125. internal ParseState State {
  126. get { return state; }
  127. set {
  128. // Console.Error.WriteLine ("**** eno **** state transition from {0} to {1}, stack count = {2}", state, value, stateStack.Count);
  129. //foreach (ParseState ps in stateStack.ToArray ()) Console.Error.WriteLine ("***** eno ***** " + ps);
  130. state = value;
  131. }
  132. }
  133. internal void PushState (ParseState newState)
  134. {
  135. stateStack.Push (newState);
  136. // Console.Error.WriteLine ("**** eno **** state pushed {0}, added stack count = {1}", newState, stateStack.Count);
  137. //foreach (ParseState ps in stateStack.ToArray ()) Console.Error.WriteLine ("***** eno ***** " + ps);
  138. }
  139. internal void PopState ()
  140. {
  141. if (stateStack.Count == 0)
  142. throw Error ("Internal state transition error. State stack is empty.");
  143. state = (ParseState) stateStack.Pop ();
  144. // Console.Error.WriteLine ("**** eno **** state pop, now as {0}, stack count = {1}", state, stateStack.Count);
  145. //foreach (ParseState ps in stateStack.ToArray ()) Console.Error.WriteLine ("***** eno ***** " + ps);
  146. }
  147. private XmlQueryCompileException Error (string message)
  148. {
  149. return new XmlQueryCompileException (message, this, null, null);
  150. }
  151. private int ParseToken ()
  152. {
  153. bufferIndex = 0;
  154. switch (state) {
  155. case ParseState.StartTag:
  156. break;
  157. default:
  158. SkipWhitespaces ();
  159. break;
  160. }
  161. /*
  162. switch (ws) {
  163. case WhitespaceHandling.Arbitrary:
  164. SkipWhitespaces ();
  165. break;
  166. case WhitespaceHandling.Explicit:
  167. if (!XmlChar.IsWhitespace (PeekChar ()))
  168. throw Error ("Whitespace is required.");
  169. goto case WhitespaceHandling.Arbitrary;
  170. }
  171. */
  172. int c = PeekChar ();
  173. if (c < 0)
  174. return -1;
  175. // FIXME: consider DOUBLE_LITERAL
  176. if (Char.IsNumber ((char) c)) {
  177. tokenValue = ReadDecimal (false);
  178. return Token.DECIMAL_LITERAL;
  179. }
  180. switch (state) {
  181. case ParseState.OccurenceIndicator:
  182. return ParseOccurenceIndicator ();
  183. case ParseState.XmlPIContent:
  184. return ParseXmlPIContent ();
  185. case ParseState.XmlComment:
  186. return ParseXmlCommentContent ();
  187. case ParseState.ElementContent:
  188. return ParseElementContent ();
  189. case ParseState.StartTag:
  190. return ParseStartTag ();
  191. case ParseState.QuotAttributeContent:
  192. return ParseAttributeContent ('"');
  193. case ParseState.AposAttributeContent:
  194. return ParseAttributeContent ('\'');
  195. default:
  196. return ParseDefault ();
  197. }
  198. }
  199. private int ParseXQueryComment ()
  200. {
  201. while (true) {
  202. int c = ReadChar ();
  203. if (c < 0)
  204. throw Error ("Unexpected end of query text inside XML processing instruction content");
  205. if (c == ':') {
  206. if (PeekChar () == ')') {
  207. ReadChar ();
  208. tokenValue = CreateValueString ();
  209. return Token.XML_PI_TO_END;
  210. }
  211. else
  212. AddValueChar (':');
  213. }
  214. else
  215. AddValueChar ((char) c);
  216. }
  217. }
  218. private int ParseXmlPIContent ()
  219. {
  220. while (true) {
  221. int c = ReadChar ();
  222. if (c < 0)
  223. throw Error ("Unexpected end of query text inside XML processing instruction content");
  224. if (c == '?') {
  225. if (PeekChar () == '>') {
  226. ReadChar ();
  227. tokenValue = CreateValueString ();
  228. return Token.XML_PI_TO_END;
  229. }
  230. else
  231. AddValueChar ('?');
  232. }
  233. else
  234. AddValueChar ((char) c);
  235. }
  236. }
  237. private int ParseXmlCommentContent ()
  238. {
  239. // FIXME: handle ---> correctly
  240. while (true) {
  241. int c = ReadChar ();
  242. if (c < 0)
  243. throw Error ("Unexpected end of query text inside XML comment content");
  244. if (c == '-') {
  245. if (PeekChar () == '-') {
  246. ReadChar ();
  247. if (PeekChar () == '>') {
  248. tokenValue = CreateValueString ();
  249. return Token.XML_COMMENT_TO_END;
  250. } else {
  251. AddValueChar ('-');
  252. AddValueChar ('-');
  253. }
  254. }
  255. else
  256. AddValueChar ('-');
  257. }
  258. else
  259. AddValueChar ((char) c);
  260. }
  261. }
  262. private int ParseXmlCDataContent ()
  263. {
  264. // FIXME: handle ]]]> correctly
  265. while (true) {
  266. int c = ReadChar ();
  267. if (c < 0)
  268. throw Error ("Unexpected end of query text inside XML CDATA section content");
  269. if (c == ']') {
  270. ReadChar ();
  271. if (PeekChar () == ']') {
  272. ReadChar ();
  273. if (PeekChar () == '>') {
  274. tokenValue = CreateValueString ();
  275. return Token.XML_CDATA_TO_END;
  276. } else {
  277. AddValueChar (']');
  278. AddValueChar (']');
  279. }
  280. }
  281. else
  282. AddValueChar (']');
  283. }
  284. else
  285. AddValueChar ((char) c);
  286. }
  287. }
  288. private int ParseElementContent ()
  289. {
  290. tokenValue = null;
  291. int c = PeekChar ();
  292. if (c < 0)
  293. throw Error ("Unexpected end of query text inside XML processing instruction content");
  294. switch ((char) c) {
  295. case '<':
  296. case '{':
  297. return ParseDefault ();
  298. }
  299. while (true) {
  300. c = PeekChar ();
  301. if (c < 0)
  302. throw Error ("Unexpected end of query text inside XML processing instruction content");
  303. switch ((char) c) {
  304. case '&':
  305. ReadChar ();
  306. ReadPredefinedEntity ();
  307. continue;
  308. case '<':
  309. tokenValue += CreateValueString ();
  310. return Token.ELEM_CONTENT_LITERAL;
  311. default:
  312. AddValueChar ((char) c);
  313. ReadChar ();
  314. continue;
  315. }
  316. }
  317. }
  318. private void ReadPredefinedEntity ()
  319. {
  320. string token = ReadOneToken ();
  321. Expect (";");
  322. switch (token) {
  323. case "lt":
  324. AddValueChar ('<');
  325. return;
  326. case "gt":
  327. AddValueChar ('>');
  328. return;
  329. case "amp":
  330. AddValueChar ('&');
  331. return;
  332. case "quot":
  333. AddValueChar ('"');
  334. return;
  335. case "apos":
  336. AddValueChar ('\'');
  337. return;
  338. default:
  339. throw Error (String.Format ("Unexpected general entity name: {0} .", token));
  340. }
  341. }
  342. // FIXME: not used as yet
  343. private int ParseExtContent ()
  344. {
  345. // FIXME: handle :::) correctly
  346. while (true) {
  347. int c = PeekChar ();
  348. if (c < 0)
  349. throw Error ("Unexpected end of query text inside external content");
  350. if (c == ':') {
  351. ReadChar ();
  352. if (PeekChar () == ':') {
  353. ReadChar ();
  354. if (PeekChar () == ')') {
  355. tokenValue = CreateValueString ();
  356. return Token.EXT_CONTENT;
  357. } else {
  358. AddValueChar (':');
  359. AddValueChar (':');
  360. }
  361. }
  362. else
  363. AddValueChar (':');
  364. }
  365. else
  366. AddValueChar ((char) c);
  367. }
  368. }
  369. private int ParseOccurenceIndicator ()
  370. {
  371. state = ParseState.Operator;
  372. switch (PeekChar ()) {
  373. case '?':
  374. ReadChar ();
  375. return Token.QUESTION;
  376. case '*':
  377. ReadChar ();
  378. return Token.ASTERISK;
  379. case '+':
  380. ReadChar ();
  381. return Token.PLUS;
  382. default:
  383. return ParseOperator ();
  384. }
  385. }
  386. private int ParseStartTag ()
  387. {
  388. int c = PeekChar ();
  389. switch (c) {
  390. case '\'':
  391. ReadChar ();
  392. return Token.APOS;
  393. case '"':
  394. ReadChar ();
  395. return Token.QUOT;
  396. case '>':
  397. ReadChar ();
  398. return Token.GREATER;
  399. case '/':
  400. ReadChar ();
  401. Expect (">");
  402. return Token.EMPTY_TAG_CLOSE;
  403. }
  404. // FIXME: there seems a bug in the spec that StartTag
  405. // state must accept QName without heading space for
  406. // start tag name.
  407. // if (!XmlChar.IsWhitespace (PeekChar ()))
  408. // throw Error ("Whitespace is required.");
  409. SkipWhitespaces ();
  410. return ParseDefault (); // only QName is allowed here.
  411. }
  412. private int ParseAttributeContent (char closeChar)
  413. {
  414. int t = Token.ATT_VALUE_LITERAL;
  415. while (true) {
  416. int c = PeekChar ();
  417. if (c < 0)
  418. throw Error ("Unexpected end of attribute value content.");
  419. if (c == closeChar) {
  420. ReadChar ();
  421. c = PeekChar ();
  422. if (c == closeChar) {
  423. ReadChar ();
  424. AddValueChar (closeChar);
  425. }
  426. else
  427. t = closeChar == '"' ? Token.QUOT : Token.APOS;
  428. }
  429. else if (c == '{') {
  430. ReadChar ();
  431. c = PeekChar ();
  432. if (c == '{') {
  433. ReadChar ();
  434. AddValueChar ('{');
  435. }
  436. else
  437. t = Token.OPEN_CURLY;
  438. }
  439. else
  440. AddValueChar ((char) ReadChar ());
  441. if (t != Token.ATT_VALUE_LITERAL) {
  442. if (bufferIndex > 0) {
  443. lookAheadToken = t;
  444. tokenValue = CreateValueString ();
  445. return Token.ATT_VALUE_LITERAL;
  446. }
  447. else
  448. return t;
  449. }
  450. }
  451. }
  452. private int ParseOperator ()
  453. {
  454. // TODO: implement
  455. return ParseDefault ();
  456. }
  457. private int ParseDefault ()
  458. {
  459. int c = ReadChar ();
  460. switch (c) {
  461. case '.':
  462. if (PeekChar () == '.') {
  463. ReadChar ();
  464. return Token.DOT2;
  465. }
  466. else if (Char.IsNumber ((char) PeekChar ())) {
  467. tokenValue = ReadDecimal (true);
  468. }
  469. return Token.DOT;
  470. case ',':
  471. return Token.COMMA;
  472. case ';':
  473. return Token.SEMICOLON;
  474. case '(':
  475. if (PeekChar () == ':') {
  476. ReadChar ();
  477. if (PeekChar () == ':') {
  478. ReadChar ();
  479. return Token.PRAGMA_OPEN;
  480. }
  481. ParseXQueryComment ();
  482. return ParseToken (); // start again
  483. }
  484. return Token.OPEN_PAREN;
  485. case ')':
  486. return Token.CLOSE_PAREN;
  487. case ':':
  488. switch (PeekChar ()) {
  489. case ':':
  490. ReadChar ();
  491. if (PeekChar () == ')') {
  492. ReadChar ();
  493. return Token.PRAGMA_CLOSE;
  494. }
  495. return Token.COLON2;
  496. case ')':
  497. ReadChar ();
  498. return Token.CLOSE_PAREN_COLON;
  499. case '=':
  500. ReadChar ();
  501. return Token.COLON_EQUAL;
  502. }
  503. return Token.COLON;
  504. case '[':
  505. return Token.OPEN_BRACKET;
  506. case ']':
  507. return Token.CLOSE_BRACKET;
  508. case '{':
  509. return Token.OPEN_CURLY;
  510. case '}':
  511. return Token.CLOSE_CURLY;
  512. case '$':
  513. return Token.DOLLAR;
  514. case '\'':
  515. tokenValue = ReadQuoted ('\'');
  516. return Token.STRING_LITERAL;
  517. case '"':
  518. tokenValue = ReadQuoted ('"');
  519. return Token.STRING_LITERAL;
  520. case '=':
  521. return Token.EQUAL;
  522. case '<':
  523. // only happens when state is ElementContent
  524. // (otherwise it might be "/foo</bar")
  525. if (state == ParseState.ElementContent) {
  526. switch ((char) PeekChar ()) {
  527. case '/':
  528. ReadChar ();
  529. return Token.END_TAG_START;
  530. case '!':
  531. ReadChar ();
  532. switch (PeekChar ()) {
  533. case '-':
  534. ReadChar ();
  535. if (ReadChar () != '-')
  536. throw Error ("Invalid sequence of characters '<!-'.");
  537. return Token.XML_COMMENT_START;
  538. case '[':
  539. ReadChar ();
  540. Expect ("CDATA[");
  541. return Token.XML_CDATA_START;
  542. }
  543. throw Error ("Invalid sequence of characters '<!'.");
  544. case '?':
  545. ReadChar ();
  546. return Token.XML_PI_START;
  547. default:
  548. return Token.LESSER;
  549. }
  550. }
  551. switch (PeekChar ()) {
  552. case '<':
  553. ReadChar ();
  554. return Token.LESSER2;
  555. case '=':
  556. ReadChar ();
  557. return Token.LESSER_EQUAL;
  558. }
  559. return Token.LESSER;
  560. case '>':
  561. switch (PeekChar ()) {
  562. case '>':
  563. ReadChar ();
  564. return Token.GREATER2;
  565. case '=':
  566. ReadChar ();
  567. return Token.GREATER_EQUAL;
  568. }
  569. return Token.GREATER;
  570. case '|':
  571. return Token.BAR;
  572. case '*':
  573. if (PeekChar () == ':') {
  574. ReadChar ();
  575. // FIXME: more check
  576. tokenValue = new XmlQualifiedName (ReadOneToken (), "*");
  577. return Token.WILD_PREFIX;
  578. }
  579. return Token.ASTERISK;
  580. case '+':
  581. return Token.PLUS;
  582. case '-':
  583. return Token.MINUS;
  584. case '/':
  585. // only happens when state is StartTag
  586. // (otherwise it might be "/>$extvar")
  587. if (state == ParseState.StartTag && PeekChar () == '>') {
  588. ReadChar ();
  589. return Token.EMPTY_TAG_CLOSE;
  590. }
  591. if (PeekChar () == '/') {
  592. ReadChar ();
  593. return Token.SLASH2;
  594. }
  595. return Token.SLASH;
  596. case '?':
  597. return Token.QUESTION;
  598. case '@':
  599. return Token.AT;
  600. }
  601. peekChar = c;
  602. prefixName = null;
  603. string name = ReadOneToken ();
  604. tokenValue = name;
  605. bool validKeyword = false;
  606. switch (state) {
  607. case ParseState.XmlSpaceDecl:
  608. switch (name) {
  609. case "preserve":
  610. return Token.PRESERVE;
  611. case "strip":
  612. return Token.STRIP;
  613. }
  614. break;
  615. case ParseState.CloseKindTest:
  616. if (name == "nillable")
  617. return Token.NILLABLE;
  618. break;
  619. case ParseState.ExtKey:
  620. switch (name) {
  621. case "pragma":
  622. return Token.PRAGMA;
  623. case "extension":
  624. return Token.EXTENSION;
  625. }
  626. break;
  627. case ParseState.KindTest:
  628. switch (name) {
  629. case "context":
  630. return Token.CONTEXT;
  631. case "element":
  632. return Token.ELEMENT;
  633. case "global":
  634. return Token.GLOBAL;
  635. case "type":
  636. return Token.TYPE;
  637. }
  638. break;
  639. case ParseState.ItemType:
  640. switch (name) {
  641. case "attribute":
  642. return Token.ATTRIBUTE;
  643. case "comment":
  644. return Token.COMMENT;
  645. case "document-node":
  646. return Token.DOCUMENT_NODE;
  647. case "element":
  648. return Token.ELEMENT;
  649. case "empty":
  650. return Token.EMPTY;
  651. case "item":
  652. return Token.ITEM;
  653. case "node":
  654. return Token.NODE;
  655. case "processing-instruction":
  656. return Token.PROCESSING_INSTRUCTION;
  657. case "text":
  658. return Token.TEXT;
  659. }
  660. break;
  661. case ParseState.NamespaceKeyword:
  662. switch (name) {
  663. case "declare":
  664. return Token.DECLARE;
  665. case "default":
  666. return Token.DEFAULT;
  667. case "element":
  668. return Token.ELEMENT;
  669. case "function":
  670. return Token.FUNCTION;
  671. case "namespace":
  672. return Token.NAMESPACE;
  673. }
  674. break;
  675. case ParseState.OccurenceIndicator:
  676. case ParseState.Operator:
  677. switch (name) {
  678. case "and":
  679. case "as":
  680. case "ascending":
  681. case "at":
  682. case "base-uri":
  683. case "by":
  684. case "case":
  685. case "cast":
  686. case "castable":
  687. case "collation":
  688. case "declare":
  689. case "default":
  690. case "descending":
  691. case "div":
  692. case "element":
  693. case "else":
  694. case "empty":
  695. case "eq":
  696. case "every":
  697. case "except":
  698. case "external":
  699. case "for":
  700. case "function":
  701. case "ge":
  702. case "global":
  703. case "greatest":
  704. case "gt":
  705. case "idiv":
  706. case "import":
  707. case "in":
  708. case "instance":
  709. case "intersect":
  710. case "is":
  711. case "lax":
  712. case "le":
  713. case "least":
  714. case "let":
  715. case "lt":
  716. case "mod":
  717. case "module":
  718. case "namespace":
  719. case "ne":
  720. case "of":
  721. case "or":
  722. case "order":
  723. case "ordered":
  724. case "ordering":
  725. case "return":
  726. case "satisfies":
  727. case "schema":
  728. case "skip":
  729. case "some":
  730. case "stable":
  731. case "strict":
  732. case "then":
  733. case "to":
  734. case "treat":
  735. case "typwswitch":
  736. case "union":
  737. case "unordered":
  738. case "variable":
  739. case "where":
  740. case "xmlspace":
  741. validKeyword = true;
  742. break;
  743. }
  744. break;
  745. case ParseState.Default:
  746. switch (name) {
  747. case "ancestor":
  748. case "ancestor-or-self":
  749. case "as":
  750. case "attribute":
  751. case "base-uri":
  752. case "child":
  753. case "collation":
  754. case "comment":
  755. case "construction":
  756. case "declare":
  757. case "default":
  758. case "descendant":
  759. case "descendant-or-self":
  760. case "document":
  761. case "document-node":
  762. case "element":
  763. case "every":
  764. case "following":
  765. case "following-sibling":
  766. case "for":
  767. case "function":
  768. case "global":
  769. case "if":
  770. case "import":
  771. case "lax":
  772. case "let":
  773. case "module":
  774. case "namespace":
  775. case "node":
  776. case "ordered":
  777. case "parent":
  778. case "preceding":
  779. case "preceding-sibling":
  780. case "processing-instruction":
  781. case "schema":
  782. case "self":
  783. case "some":
  784. case "strict":
  785. case "strip":
  786. case "text":
  787. case "typeswitch":
  788. case "unordered":
  789. case "validate":
  790. case "validation":
  791. case "version":
  792. case "xmlspace":
  793. case "xquery":
  794. validKeyword = true;
  795. break;
  796. }
  797. break;
  798. }
  799. if (validKeyword) {
  800. switch (name) {
  801. case "xquery":
  802. return Token.XQUERY;
  803. case "version":
  804. return Token.VERSION;
  805. case "pragma":
  806. return Token.PRAGMA;
  807. case "extension":
  808. return Token.EXTENSION;
  809. case "module":
  810. return Token.MODULE;
  811. case "namespace":
  812. return Token.NAMESPACE;
  813. case "declare":
  814. return Token.DECLARE;
  815. case "xmlspace":
  816. return Token.XMLSPACE;
  817. case "preserve":
  818. return Token.PRESERVE;
  819. case "strip":
  820. return Token.STRIP;
  821. case "default":
  822. return Token.DEFAULT;
  823. case "construction":
  824. return Token.CONSTRUCTION;
  825. case "ordering":
  826. return Token.ORDERING;
  827. case "ordered":
  828. return Token.ORDERED;
  829. case "unordered":
  830. return Token.UNORDERED;
  831. case "document-node":
  832. return Token.DOCUMENT_NODE;
  833. case "document":
  834. return Token.DOCUMENT;
  835. case "element":
  836. return Token.ELEMENT;
  837. case "attribute":
  838. return Token.ATTRIBUTE;
  839. case "processing-instruction":
  840. return Token.PROCESSING_INSTRUCTION;
  841. case "comment":
  842. return Token.COMMENT;
  843. case "text":
  844. return Token.TEXT;
  845. case "node":
  846. return Token.NODE;
  847. case "function":
  848. return Token.FUNCTION;
  849. case "collation":
  850. return Token.COLLATION;
  851. case "base-uri":
  852. return Token.BASEURI;
  853. case "import":
  854. return Token.IMPORT;
  855. case "schema":
  856. return Token.SCHEMA;
  857. case "at":
  858. return Token.AT;
  859. case "variable":
  860. return Token.VARIABLE;
  861. case "as":
  862. return Token.AS;
  863. case "external":
  864. return Token.EXTERNAL;
  865. case "validation":
  866. return Token.VALIDATION;
  867. case "lax":
  868. return Token.LAX;
  869. case "strict":
  870. return Token.STRICT;
  871. case "skip":
  872. return Token.SKIP;
  873. case "return":
  874. return Token.RETURN;
  875. case "for":
  876. return Token.FOR;
  877. case "let":
  878. return Token.LET;
  879. case "in":
  880. return Token.IN;
  881. case "where":
  882. return Token.WHERE;
  883. case "order":
  884. return Token.ORDER;
  885. case "by":
  886. return Token.BY;
  887. case "stable":
  888. return Token.STABLE;
  889. case "ascending":
  890. return Token.ASCENDING;
  891. case "descending":
  892. return Token.DESCENDING;
  893. case "empty":
  894. return Token.EMPTY;
  895. case "greatest":
  896. return Token.GREATEST;
  897. case "least":
  898. return Token.LEAST;
  899. case "some":
  900. return Token.SOME;
  901. case "every":
  902. return Token.EVERY;
  903. case "satisfies":
  904. return Token.SATISFIES;
  905. case "is":
  906. return Token.IS;
  907. case "to":
  908. return Token.TO;
  909. case "eq":
  910. return Token.EQ;
  911. case "ne":
  912. return Token.NE;
  913. case "lt":
  914. return Token.LT;
  915. case "le":
  916. return Token.LE;
  917. case "gt":
  918. return Token.GT;
  919. case "ge":
  920. return Token.GE;
  921. case "and":
  922. return Token.AND;
  923. case "or":
  924. return Token.OR;
  925. case "instance":
  926. return Token.INSTANCE;
  927. case "of":
  928. return Token.OF;
  929. case "if":
  930. return Token.IF;
  931. case "then":
  932. return Token.THEN;
  933. case "else":
  934. return Token.ELSE;
  935. case "typeswitch":
  936. return Token.TYPESWITCH;
  937. case "case":
  938. return Token.CASE;
  939. case "treat":
  940. return Token.TREAT;
  941. case "castable":
  942. return Token.CASTABLE;
  943. case "cast":
  944. return Token.CAST;
  945. case "div":
  946. return Token.DIV;
  947. case "idiv":
  948. return Token.IDIV;
  949. case "mod":
  950. return Token.MOD;
  951. case "union":
  952. return Token.UNION;
  953. case "intersect":
  954. return Token.INTERSECT;
  955. case "except":
  956. return Token.EXCEPT;
  957. case "validate":
  958. return Token.VALIDATE;
  959. case "context":
  960. return Token.CONTEXT;
  961. case "nillable":
  962. return Token.NILLABLE;
  963. case "item":
  964. return Token.ITEM;
  965. case "global":
  966. return Token.GLOBAL;
  967. case "type":
  968. return Token.TYPE;
  969. case "child":
  970. return Token.CHILD;
  971. case "descendant":
  972. return Token.DESCENDANT;
  973. case "self":
  974. return Token.SELF;
  975. case "descendant-or-self":
  976. return Token.DESCENDANT_OR_SELF;
  977. case "following-sibling":
  978. return Token.FOLLOWING_SIBLING;
  979. case "following":
  980. return Token.FOLLOWING;
  981. case "parent":
  982. return Token.PARENT;
  983. case "ancestor":
  984. return Token.ANCESTOR;
  985. case "preceding":
  986. return Token.PRECEDING;
  987. case "preceding-sibling":
  988. return Token.PRECEDING_SIBLING;
  989. case "ancestor-or-self":
  990. return Token.ANCESTOR_OR_SELF;
  991. }
  992. }
  993. switch (state) {
  994. case ParseState.NamespaceDecl:
  995. case ParseState.NamespaceKeyword:
  996. case ParseState.XmlSpaceDecl:
  997. case ParseState.KindTestForPI:
  998. case ParseState.XmlPI:
  999. return Token.NCNAME;
  1000. }
  1001. if (PeekChar () == ':') {
  1002. ReadChar ();
  1003. prefixName = name;
  1004. switch (PeekChar ()) {
  1005. case '*':
  1006. ReadChar ();
  1007. name = "*";
  1008. break;
  1009. case '=': // ex. let foo:= ...
  1010. ReadChar ();
  1011. tokenValue = new XmlQualifiedName (name, nsResolver.DefaultNamespace);
  1012. lookAheadToken = Token.COLON_EQUAL;
  1013. return Token.QNAME;
  1014. default:
  1015. name = ReadOneToken ();
  1016. break;
  1017. }
  1018. string ns = nsResolver.LookupNamespace (prefixName);
  1019. if (ns == null)
  1020. throw Error (String.Format ("Prefix '{0}' is not mapped to any namespace URI.", prefixName));
  1021. tokenValue = new XmlQualifiedName (name, ns);
  1022. prefixName = null;
  1023. return name == "*" ? Token.WILD_LOCALNAME : Token.QNAME;
  1024. }
  1025. tokenValue = new XmlQualifiedName (name);
  1026. return Token.QNAME;
  1027. }
  1028. private int PeekChar ()
  1029. {
  1030. if (peekChar == -1)
  1031. peekChar = source.Read ();
  1032. return peekChar;
  1033. }
  1034. private int ReadChar ()
  1035. {
  1036. int ret;
  1037. if (peekChar != -1) {
  1038. ret = peekChar;
  1039. peekChar = -1;
  1040. }
  1041. else
  1042. ret = source.Read ();
  1043. if (nextIncrementLine) {
  1044. line++;
  1045. column = 0;
  1046. nextIncrementLine = false;
  1047. }
  1048. column++;
  1049. switch (ret) {
  1050. case '\r':
  1051. break;
  1052. case '\n':
  1053. nextIncrementLine = true;
  1054. goto default;
  1055. default:
  1056. break;
  1057. }
  1058. return ret;
  1059. }
  1060. private void SkipWhitespaces ()
  1061. {
  1062. while (true) {
  1063. switch (PeekChar ()) {
  1064. case ' ':
  1065. case '\t':
  1066. case '\r':
  1067. case '\n':
  1068. ReadChar ();
  1069. continue;
  1070. default:
  1071. return;
  1072. }
  1073. }
  1074. }
  1075. private void AddValueChar (char c)
  1076. {
  1077. if (bufferIndex == buffer.Length) {
  1078. char [] newBuf = new char [bufferIndex * 2];
  1079. Array.Copy (buffer, newBuf, bufferIndex);
  1080. buffer = newBuf;
  1081. }
  1082. buffer [bufferIndex++] = c;
  1083. }
  1084. private string CreateValueString ()
  1085. {
  1086. return new string (buffer, 0, bufferIndex);
  1087. }
  1088. private void Expect (string expected)
  1089. {
  1090. for (int i = 0; i < expected.Length; i++)
  1091. if (ReadChar () != expected [i])
  1092. throw Error (String.Format ("Expected token '{0}' did not appear.", expected));
  1093. }
  1094. // TODO: parse three quoted
  1095. private string ReadQuoted (char quoteChar)
  1096. {
  1097. bufferIndex = 0;
  1098. bool loop = true;
  1099. do {
  1100. int c = ReadChar ();
  1101. switch (c) {
  1102. case -1:
  1103. case '"':
  1104. if (quoteChar == '"')
  1105. loop = false;
  1106. break;
  1107. case '\'':
  1108. if (quoteChar == '\'')
  1109. loop = false;
  1110. break;
  1111. default:
  1112. AddValueChar ((char) c);
  1113. break;
  1114. }
  1115. } while (loop);
  1116. return CreateValueString ();
  1117. }
  1118. private decimal ReadDecimal (bool floatingPoint)
  1119. {
  1120. bufferIndex = 0;
  1121. bool cond = true;
  1122. do {
  1123. int c = PeekChar ();
  1124. if (c < 0) {
  1125. cond = false;
  1126. }
  1127. // FIXME: more complex
  1128. else if (Char.IsNumber ((char) c) || c == '.') {
  1129. ReadChar ();
  1130. AddValueChar ((char) c);
  1131. continue;
  1132. }
  1133. else
  1134. cond = false;
  1135. } while (cond);
  1136. string s = (floatingPoint ? "." : "") + CreateValueString ();
  1137. return decimal.Parse (s);
  1138. }
  1139. private string ReadOneToken ()
  1140. {
  1141. bufferIndex = 0;
  1142. bool loop = true;
  1143. do {
  1144. int c = PeekChar ();
  1145. switch (c) {
  1146. case -1:
  1147. case ' ':
  1148. case '\t':
  1149. case '\r':
  1150. case '\n':
  1151. loop = false;
  1152. break;
  1153. default:
  1154. if (!IsTokenContinuable (c)) {
  1155. if (c == ':') {
  1156. if (prefixName != null)
  1157. throw new XmlQueryCompileException ("Invalid colon was found.");
  1158. prefixName = CreateValueString ();
  1159. }
  1160. loop = false;
  1161. break;
  1162. }
  1163. ReadChar ();
  1164. AddValueChar ((char) c);
  1165. break;
  1166. }
  1167. } while (loop);
  1168. return CreateValueString ();
  1169. }
  1170. private bool IsTokenContinuable (int c)
  1171. {
  1172. switch (c) {
  1173. case '-':
  1174. case '_':
  1175. case '.':
  1176. return true;
  1177. }
  1178. return XmlChar.IsNCNameChar (c);
  1179. }
  1180. }
  1181. public enum WhitespaceHandling {
  1182. Arbitrary,
  1183. Explicit,
  1184. Significant
  1185. }
  1186. public enum ParseState {
  1187. Default,
  1188. Operator,
  1189. NamespaceDecl,
  1190. NamespaceKeyword,
  1191. XmlSpaceDecl,
  1192. ItemType,
  1193. KindTest,
  1194. KindTestForPI,
  1195. CloseKindTest,
  1196. OccurenceIndicator,
  1197. SchemaContextStep,
  1198. VarName,
  1199. StartTag,
  1200. ElementContent,
  1201. EndTag,
  1202. XmlComment,
  1203. ExprComment,
  1204. ExtKey,
  1205. XmlPI,
  1206. XmlPIContent,
  1207. CDataSection,
  1208. QuotAttributeContent,
  1209. AposAttributeContent,
  1210. }
  1211. }
  1212. #endif