Browse Source

XPath, use a perfect hash to recognize all possible keywords.

git-svn-id: trunk@15638 -
sergei 15 years ago
parent
commit
0f5795baaf
3 changed files with 222 additions and 65 deletions
  1. 1 0
      .gitattributes
  2. 59 65
      packages/fcl-xml/src/xpath.pp
  3. 162 0
      packages/fcl-xml/src/xpathkw.inc

+ 1 - 0
.gitattributes

@@ -2323,6 +2323,7 @@ packages/fcl-xml/src/xmlstreaming.pp svneol=native#text/plain
 packages/fcl-xml/src/xmlutils.pp svneol=native#text/plain
 packages/fcl-xml/src/xmlwrite.pp svneol=native#text/plain
 packages/fcl-xml/src/xpath.pp svneol=native#text/plain
+packages/fcl-xml/src/xpathkw.inc svneol=native#text/plain
 packages/fcl-xml/tests/README.txt svneol=native#text/plain
 packages/fcl-xml/tests/README_DOM.txt svneol=native#text/plain
 packages/fcl-xml/tests/api.xml svneol=native#text/plain

+ 59 - 65
packages/fcl-xml/src/xpath.pp

@@ -95,6 +95,23 @@ type
     tkPipe                      // "|"
   );
 
+  TXPathKeyword = (
+    // axis names
+    xkNone, xkAncestor,  xkAncestorOrSelf,  xkAttribute,  xkChild,
+    xkDescendant, xkDescendantOrSelf, xkFollowing, xkFollowingSibling,
+    xkNamespace, xkParent, xkPreceding, xkPrecedingSibling, xkSelf,
+    // node tests
+    xkComment, xkText, xkProcessingInstruction, xkNode,
+    // operators
+    xkAnd, xkOr, xkDiv, xkMod,
+    // standard functions
+    xkLast, xkPosition, xkCount, xkId, xkLocalName, xkNamespaceUri,
+    xkName, xkString, xkConcat, xkStartsWith, xkContains,
+    xkSubstringBefore, xkSubstringAfter, xkSubstring,
+    xkStringLength, xkNormalizeSpace, xkTranslate, xkBoolean,
+    xkNot, xkTrue, xkFalse, xkLang, xkNumber, xkSum, xkFloor,
+    xkCeiling, xkRound
+  );
 
 { XPath expression parse tree }
 
@@ -347,6 +364,7 @@ type
     FTokenStart: DOMPChar;
     FTokenLength: Integer;
     FPrefixLength: Integer;
+    FTokenId: TXPathKeyword;
     FResolver: TXPathNSResolver;
     procedure Error(const Msg: String);
     procedure ParsePredicates(var Dest: TXPathNodeArray);
@@ -485,6 +503,23 @@ implementation
 
 uses Math, xmlutils;
 
+{$i xpathkw.inc}
+
+const
+  AxisNameKeywords = [xkAncestor..xkSelf];
+  AxisNameMap: array[xkAncestor..xkSelf] of TAxis = (
+    axisAncestor, axisAncestorOrSelf, axisAttribute, axisChild,
+    axisDescendant, axisDescendantOrSelf, axisFollowing,
+    axisFollowingSibling, axisNamespace, axisParent, axisPreceding,
+    axisPrecedingSibling, axisSelf
+  );
+  NodeTestKeywords = [xkComment..xkNode];
+  NodeTestMap: array[xkComment..xkNode] of TNodeTestType = (
+    ntCommentNode, ntTextNode, ntPINode, ntAnyNode
+  );
+
+  FunctionKeywords = [xkLast..xkRound];
+
 { Helper functions }
 
 function NodeToText(Node: TDOMNode): DOMString;
@@ -1593,6 +1628,10 @@ begin
   FCurToken := Result;
   if Result in [tkIdentifier, tkNSNameTest, tkNumber, tkString, tkVariable] then
     SetString(FCurTokenString, FTokenStart, FTokenLength);
+  if Result = tkIdentifier then
+    FTokenId := LookupXPathKeyword(FTokenStart, FTokenLength)
+  else
+    FTokenId := xkNone;
 end;
 
 function TXPathScanner.SkipToken(tok: TXPathToken): Boolean; { inline? }
@@ -1832,36 +1871,10 @@ begin
     end
     else if (CurToken = tkIdentifier) and (PeekToken = tkColonColon) then  // [5] AxisName '::'
     begin
-      // Check for [6] AxisName
-      if CurTokenString = 'ancestor' then
-        Axis := axisAncestor
-      else if CurTokenString = 'ancestor-or-self' then
-        Axis := axisAncestorOrSelf
-      else if CurTokenString = 'attribute' then
-        Axis := axisAttribute
-      else if CurTokenString = 'child' then
-        Axis := axisChild
-      else if CurTokenString = 'descendant' then
-        Axis := axisDescendant
-      else if CurTokenString = 'descendant-or-self' then
-        Axis := axisDescendantOrSelf
-      else if CurTokenString = 'following' then
-        Axis := axisFollowing
-      else if CurTokenString = 'following-sibling' then
-        Axis := axisFollowingSibling
-      else if CurTokenString = 'namespace' then
-        Axis := axisNamespace
-      else if CurTokenString = 'parent' then
-        Axis := axisParent
-      else if CurTokenString = 'preceding' then
-        Axis := axisPreceding
-      else if CurTokenString = 'preceding-sibling' then
-        Axis := axisPrecedingSibling
-      else if CurTokenString = 'self' then
-        Axis := axisSelf
+      if FTokenId in AxisNameKeywords then
+        Axis := AxisNameMap[FTokenId]
       else
         Error(SParserBadAxisName);
-
       NextToken;  // skip identifier and the '::'
       NextToken;
     end
@@ -1874,15 +1887,6 @@ begin
 end;
 
 function TXPathScanner.ParseNodeTest(Axis: TAxis): TStep; // [7]
-
-  procedure NeedBrackets;
-  begin
-    NextToken;
-    if NextToken <> tkRightBracket then
-       Error(SParserExpectedRightBracket);
-    NextToken;
-  end;
-
 var
   nodeType: TNodeTestType;
   nodeName: DOMString;
@@ -1910,33 +1914,26 @@ begin
     // Check for case [38] NodeType
     if PeekToken = tkLeftBracket then
     begin
-      if CurTokenString = 'comment' then
-      begin
-        NeedBrackets;
-        nodeType := ntCommentNode;
-      end
-      else if CurTokenString = 'text' then
+      if FTokenId in NodeTestKeywords then
       begin
-        NeedBrackets;
-        nodeType := ntTextNode;
-      end
-      else if CurTokenString = 'processing-instruction' then
-      begin
-        NextToken;   { skip '('; we know it's there }
-        if NextToken = tkString then
+        nodeType := NodeTestMap[FTokenId];
+        if FTokenId = xkProcessingInstruction then
+        begin
+          NextToken;
+          if NextToken = tkString then
+          begin
+            nodeName := CurTokenString;
+            NextToken;
+          end;
+        end
+        else
         begin
-          nodeName := CurTokenString;
+          NextToken;
           NextToken;
         end;
         if CurToken <> tkRightBracket then
           Error(SParserExpectedRightBracket);
         NextToken;
-        nodeType := ntPINode;
-      end
-      else if CurTokenString = 'node' then
-      begin
-        NeedBrackets;
-        nodeType := ntAnyNode;
       end
       else
         Error(SParserBadNodeType);
@@ -2029,10 +2026,7 @@ begin
   Result := nil;
   // Try to detect whether a LocationPath [1] or a FilterExpr [20] follows
   if ((CurToken = tkIdentifier) and (PeekToken = tkLeftBracket) and
-    (CurTokenString <> 'comment') and
-    (CurTokenString <> 'text') and
-    (CurTokenString <> 'processing-instruction') and
-    (CurTokenString <> 'node')) or
+    not (FTokenId in NodeTestKeywords)) or
     (CurToken in [tkVariable, tkLeftBracket, tkString, tkNumber]) then
   begin
     // second, third or fourth case of [19]
@@ -2083,7 +2077,7 @@ end;
 function TXPathScanner.ParseOrExpr: TXPathExprNode;  // [21]
 begin
   Result := ParseAndExpr;
-  while (CurToken = tkIdentifier) and (CurTokenString = 'or') do
+  while FTokenId = xkOr do
   begin
     NextToken;
     Result := TXPathBooleanOpNode.Create(opOr, Result, ParseAndExpr);
@@ -2093,7 +2087,7 @@ end;
 function TXPathScanner.ParseAndExpr: TXPathExprNode;  // [22]
 begin
   Result := ParseEqualityExpr;
-  while (CurToken = tkIdentifier) and (CurTokenString = 'and') do
+  while FTokenId = xkAnd do
   begin
     NextToken;
     Result := TXPathBooleanOpNode.Create(opAnd, Result, ParseEqualityExpr);
@@ -2163,9 +2157,9 @@ begin
       tkAsterisk:
         op := opMultiply;
       tkIdentifier:
-        if CurTokenString = 'div' then
+        if FTokenId = xkDiv then
           op := opDivide
-        else if CurTokenString = 'mod' then
+        else if FTokenId = xkMod then
           op := opMod
         else
           break;

+ 162 - 0
packages/fcl-xml/src/xpathkw.inc

@@ -0,0 +1,162 @@
+{
+    This file is part of the Free Component Library
+
+    A perfect hash for XPath keywords
+
+    See the file COPYING.FPC, included in this distribution,
+    for details about the copyright.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+ **********************************************************************}
+
+const
+  XPathKeywords: array [TXPathKeyword] of PWideChar = (
+    '',
+    #08'ancestor',
+    #16'ancestor-or-self',
+    #09'attribute',
+    #05'child',
+    #10'descendant',
+    #18'descendant-or-self',
+    #09'following',
+    #17'following-sibling',
+    #09'namespace',
+    #06'parent',
+    #09'preceding',
+    #17'preceding-sibling',
+    #04'self',
+    #07'comment',
+    #04'text',
+    #22'processing-instruction',
+    #04'node',
+    #03'and',
+    #02'or',
+    #03'div',
+    #03'mod',
+    #04'last',
+    #08'position',
+    #05'count',
+    #02'id',
+    #10'local-name',
+    #13'namespace-uri',
+    #04'name',
+    #06'string',
+    #06'concat',
+    #11'starts-with',
+    #08'contains',
+    #16'substring-before',
+    #15'substring-after',
+    #09'substring',
+    #13'string-length',
+    #15'normalize-space',
+    #09'translate',
+    #07'boolean',
+    #03'not',
+    #04'true',
+    #05'false',
+    #04'lang',
+    #06'number',
+    #03'sum',
+    #05'floor',
+    #07'ceiling',
+    #05'round'
+  );
+
+{ The following code is not very maintainable because it was hand-ported from 
+  C code generated by gperf. Unless a tool like gperf is ported or modified to
+  generate Pascal, modifying it will be painful.
+  The good side is that one shouldn't ever need to modify it. }
+
+  MaxHash = 55;
+
+  KeywordIndex: array[0..MaxHash-1] of TXPathKeyword = (
+    xkNone, xkNone,
+    xkId,
+    xkNone, xkNone, xkNone,
+    xkString,
+    xkSum,
+    xkParent,
+    xkSubstring,
+    xkNone,
+    xkComment,
+    xkName,
+    xkStringLength,
+    xkNumber,
+    xkSubstringAfter,
+    xkSubstringBefore,
+    xkNamespace,
+    xkFloor,
+    xkNormalizeSpace,
+    xkSelf,
+    xkNamespaceUri,
+    xkPreceding,
+    xkOr,
+    xkPosition,
+    xkText,
+    xkProcessingInstruction,
+    xkConcat,
+    xkLast,
+    xkContains,
+    xkPrecedingSibling,
+    xkAncestor,
+    xkFalse,
+    xkLocalName,
+    xkCount,
+    xkLang,
+    xkFollowing,
+    xkDescendant,
+    xkNode,
+    xkAncestorOrSelf,
+    xkBoolean,
+    xkNot,
+    xkStartsWith,
+    xkAnd,
+    xkFollowingSibling,
+    xkDescendantOrSelf,
+    xkChild,
+    xkTrue,
+    xkCeiling,
+    xkMod,
+    xkDiv,
+    xkRound,
+    xkNone,
+    xkAttribute,
+    xkTranslate
+  );
+
+  AssoValues: array[97..122] of Byte = (
+    10, 31,  0, 13, 30, 11, 55, 55, 0, 41,
+    55, 10, 16,  4, 21,  2, 55, 17, 0, 14,
+    34, 29, 34, 55,  7, 55
+  );
+
+function LookupXPathKeyword(p: PWideChar; Len: Integer): TXPathKeyword;
+var
+  hash: Integer;
+  p1: PWideChar;
+begin
+  result := xkNone;
+  hash := Len;
+  if Len >= 1 then
+  begin
+    if (p^ >= 'a') and (p^ <= 'y') then
+      Inc(hash, AssoValues[ord(p^)])
+    else
+      Exit;
+    if Len > 2 then
+      if (p[2] >= 'a') and (p[2] <= 'y') then
+        Inc(hash, AssoValues[ord(p[2])+1])
+      else
+        Exit;
+  end;
+  if (hash >= 0) and (hash <= MaxHash) then
+  begin
+    p1 := XPathKeywords[KeywordIndex[hash]];
+    if (ord(p1^) = Len) and
+      CompareMem(p, p1+1, Len*sizeof(WideChar)) then
+        Result := KeywordIndex[hash];
+  end;
+end;