|
@@ -40,6 +40,7 @@ type
|
|
FEndOfStream: Boolean;
|
|
FEndOfStream: Boolean;
|
|
FScannerContext: TXMLScannerContext;
|
|
FScannerContext: TXMLScannerContext;
|
|
FTokenText: SAXString;
|
|
FTokenText: SAXString;
|
|
|
|
+ FRawTokenText: string;
|
|
FCurStringValueDelimiter: Char;
|
|
FCurStringValueDelimiter: Char;
|
|
FAttrNameRead: Boolean;
|
|
FAttrNameRead: Boolean;
|
|
protected
|
|
protected
|
|
@@ -103,7 +104,9 @@ procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream);
|
|
|
|
|
|
implementation
|
|
implementation
|
|
|
|
|
|
-uses htmldefs; // for entities...
|
|
|
|
|
|
+uses
|
|
|
|
+ xmlutils,
|
|
|
|
+ htmldefs; // for entities...
|
|
|
|
|
|
const
|
|
const
|
|
WhitespaceChars = [#9, #10, #13, ' '];
|
|
WhitespaceChars = [#9, #10, #13, ' '];
|
|
@@ -154,6 +157,7 @@ begin
|
|
|
|
|
|
BufferPos := 0;
|
|
BufferPos := 0;
|
|
while (BufferPos < BufferSize) and not FStopFlag do
|
|
while (BufferPos < BufferSize) and not FStopFlag do
|
|
|
|
+ begin
|
|
case ScannerContext of
|
|
case ScannerContext of
|
|
scUnknown:
|
|
scUnknown:
|
|
case Buffer[BufferPos] of
|
|
case Buffer[BufferPos] of
|
|
@@ -176,7 +180,7 @@ begin
|
|
case Buffer[BufferPos] of
|
|
case Buffer[BufferPos] of
|
|
#9, #10, #13, ' ':
|
|
#9, #10, #13, ' ':
|
|
begin
|
|
begin
|
|
- FTokenText := FTokenText + Buffer[BufferPos];
|
|
|
|
|
|
+ FRawTokenText := FRawTokenText + Buffer[BufferPos];
|
|
Inc(BufferPos);
|
|
Inc(BufferPos);
|
|
end;
|
|
end;
|
|
'&':
|
|
'&':
|
|
@@ -190,7 +194,7 @@ begin
|
|
EnterNewScannerContext(scTag);
|
|
EnterNewScannerContext(scTag);
|
|
end;
|
|
end;
|
|
else
|
|
else
|
|
- FScannerContext := scText
|
|
|
|
|
|
+ FScannerContext := scText;
|
|
end;
|
|
end;
|
|
scText:
|
|
scText:
|
|
case Buffer[BufferPos] of
|
|
case Buffer[BufferPos] of
|
|
@@ -206,7 +210,7 @@ begin
|
|
end;
|
|
end;
|
|
else
|
|
else
|
|
begin
|
|
begin
|
|
- FTokenText := FTokenText + Buffer[BufferPos];
|
|
|
|
|
|
+ FRawTokenText := FRawTokenText + Buffer[BufferPos];
|
|
Inc(BufferPos);
|
|
Inc(BufferPos);
|
|
end;
|
|
end;
|
|
end;
|
|
end;
|
|
@@ -220,7 +224,7 @@ begin
|
|
EnterNewScannerContext(scUnknown)
|
|
EnterNewScannerContext(scUnknown)
|
|
else
|
|
else
|
|
begin
|
|
begin
|
|
- FTokenText := FTokenText + Buffer[BufferPos];
|
|
|
|
|
|
+ FRawTokenText := FRawTokenText + Buffer[BufferPos];
|
|
Inc(BufferPos);
|
|
Inc(BufferPos);
|
|
end;
|
|
end;
|
|
scTag:
|
|
scTag:
|
|
@@ -237,13 +241,13 @@ begin
|
|
FAttrNameRead := False;
|
|
FAttrNameRead := False;
|
|
end;
|
|
end;
|
|
end;
|
|
end;
|
|
- FTokenText := FTokenText + Buffer[BufferPos];
|
|
|
|
|
|
+ FRawTokenText := FRawTokenText + Buffer[BufferPos];
|
|
Inc(BufferPos);
|
|
Inc(BufferPos);
|
|
end;
|
|
end;
|
|
'=':
|
|
'=':
|
|
begin
|
|
begin
|
|
FAttrNameRead := True;
|
|
FAttrNameRead := True;
|
|
- FTokenText := FTokenText + Buffer[BufferPos];
|
|
|
|
|
|
+ FRawTokenText := FRawTokenText + Buffer[BufferPos];
|
|
Inc(BufferPos);
|
|
Inc(BufferPos);
|
|
end;
|
|
end;
|
|
'>':
|
|
'>':
|
|
@@ -254,99 +258,101 @@ begin
|
|
end;
|
|
end;
|
|
else
|
|
else
|
|
begin
|
|
begin
|
|
- FTokenText := FTokenText + Buffer[BufferPos];
|
|
|
|
|
|
+ FRawTokenText := FRawTokenText + Buffer[BufferPos];
|
|
Inc(BufferPos);
|
|
Inc(BufferPos);
|
|
end;
|
|
end;
|
|
end;
|
|
end;
|
|
- end;
|
|
|
|
|
|
+ end; // case ScannerContext of
|
|
|
|
+ end; // while not endOfBuffer
|
|
end;
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
|
|
-procedure TSAXXMLReader.EnterNewScannerContext(NewContext: TXMLScannerContext);
|
|
|
|
-
|
|
|
|
- function SplitTagString(const s: String; var Attr: TSAXAttributes): String;
|
|
|
|
- var
|
|
|
|
- i, j: Integer;
|
|
|
|
- AttrName: String;
|
|
|
|
- ValueDelimiter: Char;
|
|
|
|
- DoIncJ: Boolean;
|
|
|
|
|
|
+function SplitTagString(const s: SAXString; var Attr: TSAXAttributes): SAXString;
|
|
|
|
+var
|
|
|
|
+ i, j: Integer;
|
|
|
|
+ AttrName: SAXString;
|
|
|
|
+ ValueDelimiter: WideChar;
|
|
|
|
+ DoIncJ: Boolean;
|
|
|
|
+begin
|
|
|
|
+ Attr := nil;
|
|
|
|
+ i := 0;
|
|
|
|
+ repeat
|
|
|
|
+ Inc(i)
|
|
|
|
+ until (i > Length(s)) or IsXMLWhitespace(s[i]);
|
|
|
|
+
|
|
|
|
+ if i > Length(s) then
|
|
|
|
+ Result := s
|
|
|
|
+ else
|
|
begin
|
|
begin
|
|
- Attr := nil;
|
|
|
|
- i := 0;
|
|
|
|
- repeat
|
|
|
|
- Inc(i)
|
|
|
|
- until (i > Length(s)) or (s[i] in WhitespaceChars);
|
|
|
|
-
|
|
|
|
- if i > Length(s) then
|
|
|
|
- Result := LowerCase(s)
|
|
|
|
- else
|
|
|
|
- begin
|
|
|
|
- Result := LowerCase(Copy(s, 1, i - 1));
|
|
|
|
- Attr := TSAXAttributes.Create;
|
|
|
|
|
|
+ Result := Copy(s, 1, i - 1);
|
|
|
|
+ Attr := TSAXAttributes.Create;
|
|
|
|
+ Inc(i);
|
|
|
|
|
|
|
|
+ while (i <= Length(s)) and IsXMLWhitespace(s[i]) do
|
|
Inc(i);
|
|
Inc(i);
|
|
|
|
|
|
- while (i <= Length(s)) and (s[i] in WhitespaceChars) do
|
|
|
|
- Inc(i);
|
|
|
|
-
|
|
|
|
- SetLength(AttrName, 0);
|
|
|
|
- j := i;
|
|
|
|
|
|
+ SetLength(AttrName, 0);
|
|
|
|
+ j := i;
|
|
|
|
|
|
- while j <= Length(s) do
|
|
|
|
- if s[j] = '=' then
|
|
|
|
|
|
+ while j <= Length(s) do
|
|
|
|
+ if s[j] = '=' then
|
|
|
|
+ begin
|
|
|
|
+ AttrName := Copy(s, i, j - i);
|
|
|
|
+ Inc(j);
|
|
|
|
+ if (j < Length(s)) and ((s[j] = '''') or (s[j] = '"')) then
|
|
begin
|
|
begin
|
|
- AttrName := LowerCase(Copy(s, i, j - i));
|
|
|
|
|
|
+ ValueDelimiter := s[j];
|
|
Inc(j);
|
|
Inc(j);
|
|
- if (j < Length(s)) and ((s[j] = '''') or (s[j] = '"')) then
|
|
|
|
|
|
+ end else
|
|
|
|
+ ValueDelimiter := #0;
|
|
|
|
+ i := j;
|
|
|
|
+ DoIncJ := False;
|
|
|
|
+ while j <= Length(s) do
|
|
|
|
+ if ValueDelimiter = #0 then
|
|
|
|
+ if IsXMLWhitespace(s[j]) then
|
|
|
|
+ break
|
|
|
|
+ else
|
|
|
|
+ Inc(j)
|
|
|
|
+ else if s[j] = ValueDelimiter then
|
|
begin
|
|
begin
|
|
- ValueDelimiter := s[j];
|
|
|
|
- Inc(j);
|
|
|
|
|
|
+ DoIncJ := True;
|
|
|
|
+ break
|
|
end else
|
|
end else
|
|
- ValueDelimiter := #0;
|
|
|
|
- i := j;
|
|
|
|
- DoIncJ := False;
|
|
|
|
- while j <= Length(s) do
|
|
|
|
- if ValueDelimiter = #0 then
|
|
|
|
- if s[j] in WhitespaceChars then
|
|
|
|
- break
|
|
|
|
- else
|
|
|
|
- Inc(j)
|
|
|
|
- else if s[j] = ValueDelimiter then
|
|
|
|
- begin
|
|
|
|
- DoIncJ := True;
|
|
|
|
- break
|
|
|
|
- end else
|
|
|
|
- Inc(j);
|
|
|
|
|
|
+ Inc(j);
|
|
|
|
|
|
|
|
+ if IsXMLName(AttrName) then
|
|
Attr.AddAttribute('', AttrName, '', '', Copy(s, i, j - i));
|
|
Attr.AddAttribute('', AttrName, '', '', Copy(s, i, j - i));
|
|
|
|
|
|
- if DoIncJ then
|
|
|
|
- Inc(j);
|
|
|
|
|
|
+ if DoIncJ then
|
|
|
|
+ Inc(j);
|
|
|
|
|
|
- while (j <= Length(s)) and (s[j] in WhitespaceChars) do
|
|
|
|
- Inc(j);
|
|
|
|
- i := j;
|
|
|
|
- end
|
|
|
|
- else if s[j] in WhitespaceChars then
|
|
|
|
- begin
|
|
|
|
- Attr.AddAttribute('', Copy(s, i, j - i), '', '', '');
|
|
|
|
|
|
+ while (j <= Length(s)) and IsXMLWhitespace(s[j]) do
|
|
Inc(j);
|
|
Inc(j);
|
|
- while (j <= Length(s)) and (s[j] in WhitespaceChars) do
|
|
|
|
- Inc(j);
|
|
|
|
- i := j;
|
|
|
|
- end else
|
|
|
|
|
|
+ i := j;
|
|
|
|
+ end
|
|
|
|
+ else if IsXMLWhitespace(s[j]) then
|
|
|
|
+ begin
|
|
|
|
+ if IsXMLName(@s[i], j-i) then
|
|
|
|
+ Attr.AddAttribute('', Copy(s, i, j - i), '', '', '');
|
|
|
|
+ Inc(j);
|
|
|
|
+ while (j <= Length(s)) and IsXMLWhitespace(s[j]) do
|
|
Inc(j);
|
|
Inc(j);
|
|
- end;
|
|
|
|
|
|
+ i := j;
|
|
|
|
+ end else
|
|
|
|
+ Inc(j);
|
|
end;
|
|
end;
|
|
|
|
+end;
|
|
|
|
|
|
|
|
+procedure TSAXXMLReader.EnterNewScannerContext(NewContext: TXMLScannerContext);
|
|
var
|
|
var
|
|
Attr: TSAXAttributes;
|
|
Attr: TSAXAttributes;
|
|
- TagName: String;
|
|
|
|
|
|
+ TagName: SAXString;
|
|
Ent: SAXChar;
|
|
Ent: SAXChar;
|
|
begin
|
|
begin
|
|
|
|
+ FTokenText := FRawTokenText; // this is where conversion takes place
|
|
case ScannerContext of
|
|
case ScannerContext of
|
|
scWhitespace:
|
|
scWhitespace:
|
|
- DoIgnorableWhitespace(PSAXChar(TokenText), 1, Length(TokenText));
|
|
|
|
|
|
+ DoIgnorableWhitespace(PSAXChar(TokenText), 0, Length(TokenText));
|
|
scText:
|
|
scText:
|
|
DoCharacters(PSAXChar(TokenText), 0, Length(TokenText));
|
|
DoCharacters(PSAXChar(TokenText), 0, Length(TokenText));
|
|
scEntityReference:
|
|
scEntityReference:
|
|
@@ -397,7 +403,8 @@ begin
|
|
end;
|
|
end;
|
|
end;
|
|
end;
|
|
FScannerContext := NewContext;
|
|
FScannerContext := NewContext;
|
|
- SetLength(FTokenText, 0);
|
|
|
|
|
|
+ FTokenText := '';
|
|
|
|
+ FRawTokenText := '';
|
|
FCurStringValueDelimiter := #0;
|
|
FCurStringValueDelimiter := #0;
|
|
FAttrNameRead := False;
|
|
FAttrNameRead := False;
|
|
end;
|
|
end;
|