XQueryTokenizer.cs 23 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076
  1. //
  2. // XQueryTokenizer.cs
  3. //
  4. // Author:
  5. // Atsushi Enomoto <[email protected]>
  6. //
  7. // Copyright (C) 2004 Novell, Inc (http://www.novell.com)
  8. //
  9. // Permission is hereby granted, free of charge, to any person obtaining
  10. // a copy of this software and associated documentation files (the
  11. // "Software"), to deal in the Software without restriction, including
  12. // without limitation the rights to use, copy, modify, merge, publish,
  13. // distribute, sublicense, and/or sell copies of the Software, and to
  14. // permit persons to whom the Software is furnished to do so, subject to
  15. // the following conditions:
  16. //
  17. // The above copyright notice and this permission notice shall be
  18. // included in all copies or substantial portions of the Software.
  19. //
  20. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  21. // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  22. // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  23. // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  24. // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  25. // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  26. // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  27. //
  28. #if NET_2_0
  29. using System;
  30. using System.Collections;
  31. using System.Collections.Generic;
  32. using System.IO;
  33. using System.Security.Policy;
  34. using System.Xml;
  35. using System.Xml.Query;
  36. using System.Xml.Schema;
  37. using System.Xml.XPath;
  38. using Mono.Xml.XQuery.SyntaxTree;
  39. using Mono.Xml.XPath2;
  40. namespace Mono.Xml.XQuery.Parser
  41. {
  42. // FIXME: make internal in the future
  43. public class XQueryTokenizer
  44. : Mono.Xml.XQuery.Parser.yyParser.yyInput, IXmlLineInfo
  45. {
  46. int line = 1;
  47. int column = 0;
  48. bool nextIncrementLine;
  49. // namespace resolver
  50. XmlNamespaceManager nsResolver;
  51. Evidence evidence;
  52. // input source
  53. TextReader source;
  54. int peekChar = -1;
  55. // token info
  56. int currentToken;
  57. string prefixName;
  58. object tokenValue;
  59. int lookAheadToken = -1;
  60. object lookAheadTokenValue;
  61. // state info
  62. WhitespaceHandling ws = WhitespaceHandling.Arbitrary;
  63. ParseState state = ParseState.Default;
  64. Stack stateStack;
  65. public XQueryTokenizer (TextReader reader, Evidence evidence)
  66. {
  67. this.source = reader;
  68. this.evidence = evidence;
  69. stateStack = new Stack ();
  70. nsResolver = new XmlNamespaceManager (new NameTable ());
  71. nsResolver.AddNamespace ("xs", XmlSchema.Namespace);
  72. nsResolver.AddNamespace ("xdt", XmlSchema.XdtNamespace);
  73. // FIXME: Are they really predefined?
  74. nsResolver.AddNamespace ("xsi", XmlSchema.InstanceNamespace);
  75. nsResolver.AddNamespace ("fn", "http://www.w3.org/2003/11/xpath-functions");
  76. nsResolver.AddNamespace ("local", "http://www.w3.org/2003/11/xquery-local-functions");
  77. }
  78. public void AddNamespace (string prefix, string ns)
  79. {
  80. nsResolver.AddNamespace (prefix, ns);
  81. }
  82. public bool advance ()
  83. {
  84. if (currentToken < 0)
  85. return false;
  86. if (lookAheadToken >= 0) {
  87. tokenValue = lookAheadTokenValue;
  88. currentToken = lookAheadToken;
  89. lookAheadToken = -1;
  90. }
  91. else
  92. currentToken = ParseToken ();
  93. return currentToken >= 0;
  94. }
  95. public int token ()
  96. {
  97. return currentToken;
  98. }
  99. public object value ()
  100. {
  101. return tokenValue;
  102. }
  103. public bool HasLineInfo ()
  104. {
  105. return true;
  106. }
  107. public int LineNumber {
  108. get { return line; }
  109. }
  110. public int LinePosition {
  111. get { return column; }
  112. }
  113. internal WhitespaceHandling Space {
  114. get { return ws; }
  115. set { ws = value; }
  116. }
  117. internal ParseState State {
  118. get { return state; }
  119. set {
  120. // Console.Error.WriteLine ("**** eno **** state transition from {0} to {1}, stack count = {2}", state, value, stateStack.Count);
  121. //foreach (ParseState ps in stateStack.ToArray ()) Console.Error.WriteLine ("***** eno ***** " + ps);
  122. state = value;
  123. }
  124. }
  125. internal void PushState (ParseState newState)
  126. {
  127. stateStack.Push (newState);
  128. // Console.Error.WriteLine ("**** eno **** state pushed {0}, added stack count = {1}", newState, stateStack.Count);
  129. //foreach (ParseState ps in stateStack.ToArray ()) Console.Error.WriteLine ("***** eno ***** " + ps);
  130. }
  131. internal void PopState ()
  132. {
  133. if (stateStack.Count == 0)
  134. throw Error ("Internal state transition error. State stack is empty.");
  135. state = (ParseState) stateStack.Pop ();
  136. // Console.Error.WriteLine ("**** eno **** state pop, now as {0}, stack count = {1}", state, stateStack.Count);
  137. //foreach (ParseState ps in stateStack.ToArray ()) Console.Error.WriteLine ("***** eno ***** " + ps);
  138. }
  139. private XmlQueryCompileException Error (string message)
  140. {
  141. return new XmlQueryCompileException (message, this, null, null);
  142. }
  143. private int ParseToken ()
  144. {
  145. switch (ws) {
  146. case WhitespaceHandling.Arbitrary:
  147. SkipWhitespaces ();
  148. break;
  149. case WhitespaceHandling.Explicit:
  150. if (!XmlChar.IsWhitespace (PeekChar ()))
  151. throw Error ("Whitespace is required.");
  152. goto case WhitespaceHandling.Arbitrary;
  153. }
  154. int c = PeekChar ();
  155. if (c < 0)
  156. return -1;
  157. if (Char.IsNumber ((char) c)) {
  158. tokenValue = ReadDecimal (false);
  159. return Token.NUMERIC_LITERAL;
  160. }
  161. switch (state) {
  162. case ParseState.OccurenceIndicator:
  163. return ParseOccurenceIndicator ();
  164. case ParseState.XmlPIContent:
  165. return ParseXmlPIContent ();
  166. default:
  167. return ParseDefault ();
  168. }
  169. }
  170. private int ParseXmlPIContent ()
  171. {
  172. // FIXME: handle ??> correctly
  173. while (true) {
  174. int c = PeekChar ();
  175. if (c < 0)
  176. throw Error ("Unexpected end of query text inside XML processing instruction content");
  177. if (c == '?') {
  178. ReadChar ();
  179. if (PeekChar () == '>') {
  180. ReadChar ();
  181. tokenValue = CreateValueString ();
  182. return Token.XML_PI_TO_END;
  183. }
  184. else
  185. AddValueChar ('?');
  186. }
  187. else
  188. AddValueChar ((char) c);
  189. }
  190. }
  191. private int ParseXmlCommentContent ()
  192. {
  193. // FIXME: handle ---> correctly
  194. while (true) {
  195. int c = PeekChar ();
  196. if (c < 0)
  197. throw Error ("Unexpected end of query text inside XML processing instruction content");
  198. if (c == '-') {
  199. ReadChar ();
  200. if (PeekChar () == '-') {
  201. ReadChar ();
  202. if (PeekChar () == '>') {
  203. tokenValue = CreateValueString ();
  204. return Token.XML_COMMENT_TO_END;
  205. } else {
  206. AddValueChar ('-');
  207. AddValueChar ('-');
  208. }
  209. }
  210. else
  211. AddValueChar ('-');
  212. }
  213. else
  214. AddValueChar ((char) c);
  215. }
  216. }
  217. private int ParseOccurenceIndicator ()
  218. {
  219. state = ParseState.Operator;
  220. switch (PeekChar ()) {
  221. case '?':
  222. ReadChar ();
  223. return Token.QUESTION;
  224. case '*':
  225. ReadChar ();
  226. return Token.ASTERISK;
  227. case '+':
  228. ReadChar ();
  229. return Token.PLUS;
  230. default:
  231. return ParseOperator ();
  232. }
  233. }
  234. private int ParseOperator ()
  235. {
  236. // TODO: implement
  237. return ParseDefault ();
  238. }
  239. private int ParseDefault ()
  240. {
  241. int c = ReadChar ();
  242. switch (c) {
  243. case '.':
  244. if (PeekChar () == '.') {
  245. ReadChar ();
  246. return Token.DOT2;
  247. }
  248. else if (Char.IsNumber ((char) PeekChar ())) {
  249. tokenValue = ReadDecimal (true);
  250. }
  251. return Token.DOT;
  252. case ',':
  253. return Token.COMMA;
  254. case ';':
  255. return Token.SEMICOLON;
  256. case '(':
  257. if (PeekChar () == ':') {
  258. ReadChar ();
  259. if (PeekChar () == ':') {
  260. ReadChar ();
  261. return Token.PRAGMA_OPEN;
  262. }
  263. return Token.OPEN_PAREN_COLON;
  264. }
  265. return Token.OPEN_PAREN;
  266. case ')':
  267. return Token.CLOSE_PAREN;
  268. case ':':
  269. switch (PeekChar ()) {
  270. case ':':
  271. ReadChar ();
  272. if (PeekChar () == ')') {
  273. ReadChar ();
  274. return Token.PRAGMA_CLOSE;
  275. }
  276. return Token.COLON2;
  277. case ')':
  278. ReadChar ();
  279. return Token.CLOSE_PAREN_COLON;
  280. case '=':
  281. ReadChar ();
  282. return Token.COLON_EQUAL;
  283. }
  284. return Token.COLON;
  285. case '[':
  286. return Token.OPEN_BRACKET;
  287. case ']':
  288. return Token.CLOSE_BRACKET;
  289. case '{':
  290. return Token.OPEN_CURLY;
  291. case '}':
  292. return Token.CLOSE_CURLY;
  293. case '$':
  294. return Token.DOLLAR;
  295. case '\'':
  296. if (false) { // FIXME: remove in the future
  297. // if (state == ParseState.StartTag) {
  298. if (PeekChar () == '\'') {
  299. // FIXME: this code is VERY inefficient
  300. ReadChar ();
  301. tokenValue = "'";
  302. return Token.STRING_LITERAL;
  303. }
  304. return Token.APOS;
  305. }
  306. tokenValue = ReadQuoted ('\'');
  307. return Token.STRING_LITERAL;
  308. case '"':
  309. if (false) { // FIXME: remove in the future
  310. // if (state == ParseState.StartTag) {
  311. if (PeekChar () == '"') {
  312. // FIXME: this code is VERY inefficient
  313. ReadChar ();
  314. tokenValue = "\"";
  315. return Token.STRING_LITERAL;
  316. }
  317. return Token.QUOT;
  318. }
  319. tokenValue = ReadQuoted ('"');
  320. return Token.STRING_LITERAL;
  321. case '=':
  322. return Token.EQUAL;
  323. case '<':
  324. // only happens when state is ElementContent
  325. // (otherwise it might be "/foo</bar")
  326. if (state == ParseState.ElementContent) {
  327. switch ((char) PeekChar ()) {
  328. case '/':
  329. ReadChar ();
  330. return Token.END_TAG_START;
  331. case '!':
  332. ReadChar ();
  333. switch (PeekChar ()) {
  334. case '-':
  335. ReadChar ();
  336. if (ReadChar () != '-')
  337. throw Error ("Invalid sequence of characters '<!-'.");
  338. return Token.XML_COMMENT_START;
  339. case '[':
  340. ReadChar ();
  341. Expect ("CDATA[");
  342. return Token.XML_CDATA_START;
  343. }
  344. throw Error ("Invalid sequence of characters '<!'.");
  345. case '?':
  346. ReadChar ();
  347. return Token.XML_PI_START;
  348. default:
  349. return Token.LESSER;
  350. }
  351. }
  352. switch (PeekChar ()) {
  353. case '<':
  354. ReadChar ();
  355. return Token.LESSER2;
  356. case '=':
  357. ReadChar ();
  358. return Token.LESSER_EQUAL;
  359. }
  360. return Token.LESSER;
  361. case '>':
  362. switch (PeekChar ()) {
  363. case '>':
  364. ReadChar ();
  365. return Token.GREATER2;
  366. case '=':
  367. ReadChar ();
  368. return Token.GREATER_EQUAL;
  369. }
  370. return Token.GREATER;
  371. case '|':
  372. return Token.BAR;
  373. case '*':
  374. if (PeekChar () == ':') {
  375. ReadChar ();
  376. // FIXME: more check
  377. tokenValue = new XmlQualifiedName (ReadOneToken (), "*");
  378. return Token.WILD_PREFIX;
  379. }
  380. return Token.ASTERISK;
  381. case '+':
  382. return Token.PLUS;
  383. case '-':
  384. return Token.MINUS;
  385. case '/':
  386. // only happens when state is StartTag
  387. // (otherwise it might be "/>$extvar")
  388. if (state == ParseState.StartTag && PeekChar () == '>') {
  389. ReadChar ();
  390. return Token.EMPTY_TAG_CLOSE;
  391. }
  392. if (PeekChar () == '/') {
  393. ReadChar ();
  394. return Token.SLASH2;
  395. }
  396. return Token.SLASH;
  397. case '?':
  398. return Token.QUESTION;
  399. case '@':
  400. return Token.AT;
  401. }
  402. peekChar = c;
  403. prefixName = null;
  404. string name = ReadOneToken ();
  405. tokenValue = name;
  406. bool validKeyword = false;
  407. switch (state) {
  408. case ParseState.XmlSpaceDecl:
  409. switch (name) {
  410. case "preserve":
  411. return Token.PRESERVE;
  412. case "strip":
  413. return Token.STRIP;
  414. }
  415. break;
  416. case ParseState.CloseKindTest:
  417. if (name == "nillable")
  418. return Token.NILLABLE;
  419. break;
  420. case ParseState.ExtKey:
  421. switch (name) {
  422. case "pragma":
  423. return Token.PRAGMA;
  424. case "extension":
  425. return Token.EXTENSION;
  426. }
  427. break;
  428. case ParseState.KindTest:
  429. switch (name) {
  430. case "context":
  431. return Token.CONTEXT;
  432. case "element":
  433. return Token.ELEMENT;
  434. case "global":
  435. return Token.GLOBAL;
  436. case "type":
  437. return Token.TYPE;
  438. }
  439. break;
  440. case ParseState.ItemType:
  441. switch (name) {
  442. case "attribute":
  443. return Token.ATTRIBUTE;
  444. case "comment":
  445. return Token.COMMENT;
  446. case "document-node":
  447. return Token.DOCUMENT_NODE;
  448. case "element":
  449. return Token.ELEMENT;
  450. case "empty":
  451. return Token.EMPTY;
  452. case "item":
  453. return Token.ITEM;
  454. case "node":
  455. return Token.NODE;
  456. case "processing-instruction":
  457. return Token.PROCESSING_INSTRUCTION;
  458. case "text":
  459. return Token.TEXT;
  460. }
  461. break;
  462. case ParseState.NamespaceKeyword:
  463. switch (name) {
  464. case "declare":
  465. return Token.DECLARE;
  466. case "default":
  467. return Token.DEFAULT;
  468. case "element":
  469. return Token.ELEMENT;
  470. case "function":
  471. return Token.FUNCTION;
  472. case "namespace":
  473. return Token.NAMESPACE;
  474. }
  475. break;
  476. case ParseState.OccurenceIndicator:
  477. case ParseState.Operator:
  478. switch (name) {
  479. case "and":
  480. case "as":
  481. case "ascending":
  482. case "at":
  483. case "base-uri":
  484. case "by":
  485. case "case":
  486. case "cast":
  487. case "castable":
  488. case "collation":
  489. case "declare":
  490. case "default":
  491. case "descending":
  492. case "div":
  493. case "element":
  494. case "else":
  495. case "empty":
  496. case "eq":
  497. case "every":
  498. case "except":
  499. case "external":
  500. case "for":
  501. case "function":
  502. case "ge":
  503. case "global":
  504. case "greatest":
  505. case "gt":
  506. case "idiv":
  507. case "import":
  508. case "in":
  509. case "instance":
  510. case "intersect":
  511. case "is":
  512. case "lax":
  513. case "le":
  514. case "least":
  515. case "let":
  516. case "lt":
  517. case "mod":
  518. case "module":
  519. case "namespace":
  520. case "ne":
  521. case "of":
  522. case "or":
  523. case "order":
  524. case "ordered":
  525. case "ordering":
  526. case "return":
  527. case "satisfies":
  528. case "schema":
  529. case "skip":
  530. case "some":
  531. case "stable":
  532. case "strict":
  533. case "then":
  534. case "to":
  535. case "treat":
  536. case "typwswitch":
  537. case "union":
  538. case "unordered":
  539. case "variable":
  540. case "where":
  541. case "xmlspace":
  542. validKeyword = true;
  543. break;
  544. }
  545. break;
  546. case ParseState.Default:
  547. switch (name) {
  548. case "ancestor":
  549. case "ancestor-or-self":
  550. case "as":
  551. case "attribute":
  552. case "base-uri":
  553. case "child":
  554. case "collation":
  555. case "comment":
  556. case "construction":
  557. case "declare":
  558. case "default":
  559. case "descendant":
  560. case "descendant-or-self":
  561. case "document":
  562. case "document-node":
  563. case "element":
  564. case "every":
  565. case "following":
  566. case "following-sibling":
  567. case "for":
  568. case "function":
  569. case "global":
  570. case "if":
  571. case "import":
  572. case "lax":
  573. case "let":
  574. case "module":
  575. case "namespace":
  576. case "node":
  577. case "ordered":
  578. case "parent":
  579. case "preceding":
  580. case "preceding-sibling":
  581. case "processing-instruction":
  582. case "schema":
  583. case "self":
  584. case "some":
  585. case "strict":
  586. case "strip":
  587. case "text":
  588. case "typeswitch":
  589. case "unordered":
  590. case "validate":
  591. case "validation":
  592. case "version":
  593. case "xmlspace":
  594. case "xquery":
  595. validKeyword = true;
  596. break;
  597. }
  598. break;
  599. }
  600. if (validKeyword) {
  601. switch (name) {
  602. case "xquery":
  603. return Token.XQUERY;
  604. case "version":
  605. return Token.VERSION;
  606. case "pragma":
  607. return Token.PRAGMA;
  608. case "extension":
  609. return Token.EXTENSION;
  610. case "module":
  611. return Token.MODULE;
  612. case "namespace":
  613. return Token.NAMESPACE;
  614. case "declare":
  615. return Token.DECLARE;
  616. case "xmlspace":
  617. return Token.XMLSPACE;
  618. case "preserve":
  619. return Token.PRESERVE;
  620. case "strip":
  621. return Token.STRIP;
  622. case "default":
  623. return Token.DEFAULT;
  624. case "construction":
  625. return Token.CONSTRUCTION;
  626. case "ordering":
  627. return Token.ORDERING;
  628. case "ordered":
  629. return Token.ORDERED;
  630. case "unordered":
  631. return Token.UNORDERED;
  632. case "document-node":
  633. return Token.DOCUMENT_NODE;
  634. case "document":
  635. return Token.DOCUMENT;
  636. case "element":
  637. return Token.ELEMENT;
  638. case "attribute":
  639. return Token.ATTRIBUTE;
  640. case "processing-instruction":
  641. return Token.PROCESSING_INSTRUCTION;
  642. case "comment":
  643. return Token.COMMENT;
  644. case "text":
  645. return Token.TEXT;
  646. case "node":
  647. return Token.NODE;
  648. case "function":
  649. return Token.FUNCTION;
  650. case "collation":
  651. return Token.COLLATION;
  652. case "base-uri":
  653. return Token.BASEURI;
  654. case "import":
  655. return Token.IMPORT;
  656. case "schema":
  657. return Token.SCHEMA;
  658. case "at":
  659. return Token.AT;
  660. case "variable":
  661. return Token.VARIABLE;
  662. case "as":
  663. return Token.AS;
  664. case "external":
  665. return Token.EXTERNAL;
  666. case "validation":
  667. return Token.VALIDATION;
  668. case "lax":
  669. return Token.LAX;
  670. case "strict":
  671. return Token.STRICT;
  672. case "skip":
  673. return Token.SKIP;
  674. case "return":
  675. return Token.RETURN;
  676. case "for":
  677. return Token.FOR;
  678. case "let":
  679. return Token.LET;
  680. case "in":
  681. return Token.IN;
  682. case "where":
  683. return Token.WHERE;
  684. case "order":
  685. return Token.ORDER;
  686. case "by":
  687. return Token.BY;
  688. case "stable":
  689. return Token.STABLE;
  690. case "ascending":
  691. return Token.ASCENDING;
  692. case "descending":
  693. return Token.DESCENDING;
  694. case "empty":
  695. return Token.EMPTY;
  696. case "greatest":
  697. return Token.GREATEST;
  698. case "least":
  699. return Token.LEAST;
  700. case "some":
  701. return Token.SOME;
  702. case "every":
  703. return Token.EVERY;
  704. case "satisfies":
  705. return Token.SATISFIES;
  706. case "is":
  707. return Token.IS;
  708. case "to":
  709. return Token.TO;
  710. case "eq":
  711. return Token.EQ;
  712. case "ne":
  713. return Token.NE;
  714. case "lt":
  715. return Token.LT;
  716. case "le":
  717. return Token.LE;
  718. case "gt":
  719. return Token.GT;
  720. case "ge":
  721. return Token.GE;
  722. case "and":
  723. return Token.AND;
  724. case "or":
  725. return Token.OR;
  726. case "instance":
  727. return Token.INSTANCE;
  728. case "of":
  729. return Token.OF;
  730. case "if":
  731. return Token.IF;
  732. case "then":
  733. return Token.THEN;
  734. case "else":
  735. return Token.ELSE;
  736. case "typeswitch":
  737. return Token.TYPESWITCH;
  738. case "case":
  739. return Token.CASE;
  740. case "treat":
  741. return Token.TREAT;
  742. case "castable":
  743. return Token.CASTABLE;
  744. case "cast":
  745. return Token.CAST;
  746. case "div":
  747. return Token.DIV;
  748. case "idiv":
  749. return Token.IDIV;
  750. case "mod":
  751. return Token.MOD;
  752. case "union":
  753. return Token.UNION;
  754. case "intersect":
  755. return Token.INTERSECT;
  756. case "except":
  757. return Token.EXCEPT;
  758. case "validate":
  759. return Token.VALIDATE;
  760. case "context":
  761. return Token.CONTEXT;
  762. case "nillable":
  763. return Token.NILLABLE;
  764. case "item":
  765. return Token.ITEM;
  766. case "global":
  767. return Token.GLOBAL;
  768. case "type":
  769. return Token.TYPE;
  770. case "child":
  771. return Token.CHILD;
  772. case "descendant":
  773. return Token.DESCENDANT;
  774. case "self":
  775. return Token.SELF;
  776. case "descendant-or-self":
  777. return Token.DESCENDANT_OR_SELF;
  778. case "following-sibling":
  779. return Token.FOLLOWING_SIBLING;
  780. case "following":
  781. return Token.FOLLOWING;
  782. case "parent":
  783. return Token.PARENT;
  784. case "ancestor":
  785. return Token.ANCESTOR;
  786. case "preceding":
  787. return Token.PRECEDING;
  788. case "preceding-sibling":
  789. return Token.PRECEDING_SIBLING;
  790. case "ancestor-or-self":
  791. return Token.ANCESTOR_OR_SELF;
  792. }
  793. }
  794. switch (state) {
  795. case ParseState.NamespaceDecl:
  796. case ParseState.NamespaceKeyword:
  797. case ParseState.XmlSpaceDecl:
  798. case ParseState.KindTestForPI:
  799. case ParseState.XmlPI:
  800. return Token.NCNAME;
  801. }
  802. if (PeekChar () == ':') {
  803. ReadChar ();
  804. prefixName = name;
  805. switch (PeekChar ()) {
  806. case '*':
  807. ReadChar ();
  808. name = "*";
  809. break;
  810. case '=': // ex. let foo:= ...
  811. ReadChar ();
  812. tokenValue = new XmlQualifiedName (name, nsResolver.DefaultNamespace);
  813. lookAheadToken = Token.COLON_EQUAL;
  814. return Token.QNAME;
  815. default:
  816. name = ReadOneToken ();
  817. break;
  818. }
  819. string ns = nsResolver.LookupNamespace (prefixName);
  820. if (ns == null)
  821. throw Error (String.Format ("Prefix '{0}' is not mapped to any namespace URI.", prefixName));
  822. tokenValue = new XmlQualifiedName (name, ns);
  823. prefixName = null;
  824. return name == "*" ? Token.WILD_LOCALNAME : Token.QNAME;
  825. }
  826. tokenValue = new XmlQualifiedName (name);
  827. return Token.QNAME;
  828. }
  829. private int PeekChar ()
  830. {
  831. if (peekChar == -1)
  832. peekChar = source.Read ();
  833. return peekChar;
  834. }
  835. private int ReadChar ()
  836. {
  837. int ret;
  838. if (peekChar != -1) {
  839. ret = peekChar;
  840. peekChar = -1;
  841. }
  842. else
  843. ret = source.Read ();
  844. if (nextIncrementLine) {
  845. line++;
  846. column = 0;
  847. nextIncrementLine = false;
  848. }
  849. column++;
  850. switch (ret) {
  851. case '\r':
  852. break;
  853. case '\n':
  854. nextIncrementLine = true;
  855. goto default;
  856. default:
  857. break;
  858. }
  859. return ret;
  860. }
  861. private void SkipWhitespaces ()
  862. {
  863. while (true) {
  864. switch (PeekChar ()) {
  865. case ' ':
  866. case '\t':
  867. case '\r':
  868. case '\n':
  869. ReadChar ();
  870. continue;
  871. default:
  872. return;
  873. }
  874. }
  875. }
  876. char [] buffer = new char [30];
  877. int bufferIndex;
  878. private void AddValueChar (char c)
  879. {
  880. if (bufferIndex == buffer.Length) {
  881. char [] newBuf = new char [bufferIndex * 2];
  882. Array.Copy (buffer, newBuf, bufferIndex);
  883. buffer = newBuf;
  884. }
  885. buffer [bufferIndex++] = c;
  886. }
  887. private string CreateValueString ()
  888. {
  889. return new string (buffer, 0, bufferIndex);
  890. }
  891. private void Expect (string expected)
  892. {
  893. for (int i = 0; i < expected.Length; i++)
  894. if (ReadChar () != expected [i])
  895. throw Error (String.Format ("Expected token '{0}' did not appear.", expected));
  896. }
  897. // TODO: parse three quoted
  898. private string ReadQuoted (char quoteChar)
  899. {
  900. bufferIndex = 0;
  901. bool loop = true;
  902. do {
  903. int c = ReadChar ();
  904. switch (c) {
  905. case -1:
  906. case '"':
  907. if (quoteChar == '"')
  908. loop = false;
  909. break;
  910. case '\'':
  911. if (quoteChar == '\'')
  912. loop = false;
  913. break;
  914. default:
  915. AddValueChar ((char) c);
  916. break;
  917. }
  918. } while (loop);
  919. return CreateValueString ();
  920. }
  921. private decimal ReadDecimal (bool floatingPoint)
  922. {
  923. bufferIndex = 0;
  924. do {
  925. int c = PeekChar ();
  926. if (c < 0) {
  927. ReadChar ();
  928. break;
  929. }
  930. // FIXME: more complex
  931. if (Char.IsNumber ((char) c)) {
  932. ReadChar ();
  933. AddValueChar ((char) c);
  934. continue;
  935. }
  936. else
  937. break;
  938. } while (true);
  939. string s = (floatingPoint ? "" : ".") + CreateValueString ();
  940. return decimal.Parse (s);
  941. }
  942. private string ReadOneToken ()
  943. {
  944. bufferIndex = 0;
  945. bool loop = true;
  946. do {
  947. int c = PeekChar ();
  948. switch (c) {
  949. case -1:
  950. case ' ':
  951. case '\t':
  952. case '\r':
  953. case '\n':
  954. loop = false;
  955. break;
  956. default:
  957. if (!IsTokenContinuable (c)) {
  958. if (c == ':') {
  959. if (prefixName != null)
  960. throw new XmlQueryCompileException ("Invalid colon was found.");
  961. prefixName = CreateValueString ();
  962. }
  963. loop = false;
  964. break;
  965. }
  966. ReadChar ();
  967. AddValueChar ((char) c);
  968. break;
  969. }
  970. } while (loop);
  971. return CreateValueString ();
  972. }
  973. private bool IsTokenContinuable (int c)
  974. {
  975. switch (c) {
  976. case '-':
  977. case '_':
  978. case '.':
  979. return true;
  980. }
  981. return XmlChar.IsNCNameChar (c);
  982. }
  983. }
  984. public enum WhitespaceHandling {
  985. Arbitrary,
  986. Explicit,
  987. Significant
  988. }
  989. public enum ParseState {
  990. Default,
  991. Operator,
  992. NamespaceDecl,
  993. NamespaceKeyword,
  994. XmlSpaceDecl,
  995. ItemType,
  996. KindTest,
  997. KindTestForPI,
  998. CloseKindTest,
  999. OccurenceIndicator,
  1000. SchemaContextStep,
  1001. VarName,
  1002. StartTag,
  1003. ElementContent,
  1004. EndTag,
  1005. XmlComment,
  1006. ExprComment,
  1007. ExtKey,
  1008. XmlPI,
  1009. XmlPIContent,
  1010. CDataSection,
  1011. QuotAttributeContent,
  1012. AposAttributeContent,
  1013. }
  1014. }
  1015. #endif