Browse Source

* Patch from Sergei Gorelkin:
* Changed the design of input decoders so they process data by chunks
instead of char-by-char. It is much faster, and allows supporting
external pluggable decoders.
+ Interface for external decoders.
* ResolvePredefined() is rewritten so it doesn't call CompareMem five
times do determine a single char.
* ParseCharRef renamed to ParseRef, because it parses entity refs as
well.
* Added guard conditions to prevent integer overflows in ParseRef.
* ContextPush(TXMLCharSource) merged into Initialize().

xmliconv.pas is a new unit, containing an libiconv-based decoder. It depends on existing iconvenc package, and
thus supports all platforms that are supported by iconvenc.

xmliconv_windows.pas is the variation that allows to use libiconv functionality on Windows (It would require
the iconv.dll to be distributed with the application, but since I haven't succeeded yet in writing a native
Windows decoder, this is better than nothing).

git-svn-id: trunk@12582 -

michael 16 years ago
parent
commit
aecbef27b2

+ 2 - 0
.gitattributes

@@ -1661,6 +1661,8 @@ packages/fcl-xml/src/wtagsintf.inc svneol=native#text/plain
 packages/fcl-xml/src/xhtml.pp svneol=native#text/plain
 packages/fcl-xml/src/xmlcfg.pp svneol=native#text/plain
 packages/fcl-xml/src/xmlconf.pp svneol=native#text/plain
+packages/fcl-xml/src/xmliconv.pas svneol=native#text/plain
+packages/fcl-xml/src/xmliconv_windows.pas svneol=native#text/plain
 packages/fcl-xml/src/xmlread.pp svneol=native#text/plain
 packages/fcl-xml/src/xmlstreaming.pp svneol=native#text/plain
 packages/fcl-xml/src/xmlutils.pp svneol=native#text/plain

+ 98 - 19
packages/fcl-xml/Makefile

@@ -1,5 +1,5 @@
 #
-# Don't edit, this file is generated by FPCMake Version 2.0.0 [2008/10/22]
+# Don't edit, this file is generated by FPCMake Version 2.0.0 [2008/11/14]
 #
 default: all
 MAKEFILETARGETS=i386-linux i386-go32v2 i386-win32 i386-os2 i386-freebsd i386-beos i386-haiku i386-netbsd i386-solaris i386-qnx i386-netware i386-openbsd i386-wdosx i386-darwin i386-emx i386-watcom i386-netwlibc i386-wince i386-embedded i386-symbian m68k-linux m68k-freebsd m68k-netbsd m68k-amiga m68k-atari m68k-openbsd m68k-palmos m68k-embedded powerpc-linux powerpc-netbsd powerpc-amiga powerpc-macos powerpc-darwin powerpc-morphos powerpc-embedded sparc-linux sparc-netbsd sparc-solaris sparc-embedded x86_64-linux x86_64-freebsd x86_64-darwin x86_64-win64 x86_64-embedded arm-linux arm-palmos arm-darwin arm-wince arm-gba arm-nds arm-embedded arm-symbian powerpc64-linux powerpc64-darwin powerpc64-embedded avr-embedded armeb-linux armeb-embedded
@@ -265,19 +265,19 @@ PACKAGESDIR:=$(wildcard $(FPCDIR) $(FPCDIR)/packages $(FPCDIR)/packages/base $(F
 override PACKAGE_NAME=fcl-xml
 override PACKAGE_VERSION=2.2.2
 ifeq ($(FULL_TARGET),i386-linux)
-override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml
+override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml  xmliconv
 endif
 ifeq ($(FULL_TARGET),i386-go32v2)
 override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml
 endif
 ifeq ($(FULL_TARGET),i386-win32)
-override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml
+override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml  xmliconv_windows
 endif
 ifeq ($(FULL_TARGET),i386-os2)
 override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml
 endif
 ifeq ($(FULL_TARGET),i386-freebsd)
-override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml
+override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml  xmliconv
 endif
 ifeq ($(FULL_TARGET),i386-beos)
 override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml
@@ -304,7 +304,7 @@ ifeq ($(FULL_TARGET),i386-wdosx)
 override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml
 endif
 ifeq ($(FULL_TARGET),i386-darwin)
-override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml
+override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml  xmliconv
 endif
 ifeq ($(FULL_TARGET),i386-emx)
 override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml
@@ -325,10 +325,10 @@ ifeq ($(FULL_TARGET),i386-symbian)
 override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml
 endif
 ifeq ($(FULL_TARGET),m68k-linux)
-override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml
+override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml  xmliconv
 endif
 ifeq ($(FULL_TARGET),m68k-freebsd)
-override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml
+override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml  xmliconv
 endif
 ifeq ($(FULL_TARGET),m68k-netbsd)
 override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml
@@ -349,7 +349,7 @@ ifeq ($(FULL_TARGET),m68k-embedded)
 override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml
 endif
 ifeq ($(FULL_TARGET),powerpc-linux)
-override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml
+override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml  xmliconv
 endif
 ifeq ($(FULL_TARGET),powerpc-netbsd)
 override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml
@@ -361,7 +361,7 @@ ifeq ($(FULL_TARGET),powerpc-macos)
 override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml
 endif
 ifeq ($(FULL_TARGET),powerpc-darwin)
-override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml
+override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml  xmliconv
 endif
 ifeq ($(FULL_TARGET),powerpc-morphos)
 override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml
@@ -370,7 +370,7 @@ ifeq ($(FULL_TARGET),powerpc-embedded)
 override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml
 endif
 ifeq ($(FULL_TARGET),sparc-linux)
-override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml
+override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml  xmliconv
 endif
 ifeq ($(FULL_TARGET),sparc-netbsd)
 override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml
@@ -382,28 +382,28 @@ ifeq ($(FULL_TARGET),sparc-embedded)
 override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml
 endif
 ifeq ($(FULL_TARGET),x86_64-linux)
-override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml
+override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml  xmliconv
 endif
 ifeq ($(FULL_TARGET),x86_64-freebsd)
-override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml
+override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml  xmliconv
 endif
 ifeq ($(FULL_TARGET),x86_64-darwin)
-override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml
+override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml  xmliconv
 endif
 ifeq ($(FULL_TARGET),x86_64-win64)
-override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml
+override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml  xmliconv_windows
 endif
 ifeq ($(FULL_TARGET),x86_64-embedded)
 override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml
 endif
 ifeq ($(FULL_TARGET),arm-linux)
-override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml
+override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml  xmliconv
 endif
 ifeq ($(FULL_TARGET),arm-palmos)
 override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml
 endif
 ifeq ($(FULL_TARGET),arm-darwin)
-override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml
+override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml  xmliconv
 endif
 ifeq ($(FULL_TARGET),arm-wince)
 override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml
@@ -421,10 +421,10 @@ ifeq ($(FULL_TARGET),arm-symbian)
 override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml
 endif
 ifeq ($(FULL_TARGET),powerpc64-linux)
-override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml
+override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml  xmliconv
 endif
 ifeq ($(FULL_TARGET),powerpc64-darwin)
-override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml
+override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml  xmliconv
 endif
 ifeq ($(FULL_TARGET),powerpc64-embedded)
 override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml
@@ -433,7 +433,7 @@ ifeq ($(FULL_TARGET),avr-embedded)
 override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml
 endif
 ifeq ($(FULL_TARGET),armeb-linux)
-override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml
+override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml  xmliconv
 endif
 ifeq ($(FULL_TARGET),armeb-embedded)
 override TARGET_UNITS+=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml
@@ -1157,6 +1157,37 @@ endif
 else
 CROSSBINDIR=
 endif
+ifeq ($(OS_SOURCE),linux)
+ifndef GCCLIBDIR
+ifeq ($(CPU_TARGET),i386)
+ifneq ($(findstring x86_64,$(shell uname -a)),)
+ifeq ($(BINUTILSPREFIX),)
+GCCLIBDIR:=$(shell dirname `gcc -m32 -print-libgcc-file-name`)
+endif
+endif
+endif
+ifeq ($(CPU_TARGET),powerpc64)
+ifeq ($(BINUTILSPREFIX),)
+GCCLIBDIR:=$(shell dirname `gcc -m64 -print-libgcc-file-name`)
+endif
+endif
+endif
+ifndef GCCLIBDIR
+CROSSGCC=$(strip $(wildcard $(addsuffix /$(BINUTILSPREFIX)gcc$(SRCEXEEXT),$(SEARCHPATH))))
+ifneq ($(CROSSGCC),)
+GCCLIBDIR:=$(shell dirname `$(CROSSGCC) -print-libgcc-file-name`)
+endif
+endif
+ifndef OTHERLIBDIR
+OTHERLIBDIR:=$(shell grep -v "^\#" /etc/ld.so.conf | awk '{ ORS=" "; print $1 }')
+endif
+endif
+ifdef inUnix
+ifeq ($(OS_SOURCE),netbsd)
+OTHERLIBDIR+=/usr/pkg/lib
+endif
+export GCCLIBDIR OTHERLIB
+endif
 BATCHEXT=.bat
 LOADEREXT=.as
 EXEEXT=.exe
@@ -1717,6 +1748,7 @@ override REQUIRE_PACKAGES=rtl fcl-base
 ifeq ($(FULL_TARGET),i386-linux)
 REQUIRE_PACKAGES_RTL=1
 REQUIRE_PACKAGES_FCL-BASE=1
+REQUIRE_PACKAGES_ICONVENC=1
 endif
 ifeq ($(FULL_TARGET),i386-go32v2)
 REQUIRE_PACKAGES_RTL=1
@@ -1735,6 +1767,7 @@ endif
 ifeq ($(FULL_TARGET),i386-freebsd)
 REQUIRE_PACKAGES_RTL=1
 REQUIRE_PACKAGES_FCL-BASE=1
+REQUIRE_PACKAGES_ICONVENC=1
 endif
 ifeq ($(FULL_TARGET),i386-beos)
 REQUIRE_PACKAGES_RTL=1
@@ -1771,6 +1804,7 @@ endif
 ifeq ($(FULL_TARGET),i386-darwin)
 REQUIRE_PACKAGES_RTL=1
 REQUIRE_PACKAGES_FCL-BASE=1
+REQUIRE_PACKAGES_ICONVENC=1
 endif
 ifeq ($(FULL_TARGET),i386-emx)
 REQUIRE_PACKAGES_RTL=1
@@ -1799,10 +1833,12 @@ endif
 ifeq ($(FULL_TARGET),m68k-linux)
 REQUIRE_PACKAGES_RTL=1
 REQUIRE_PACKAGES_FCL-BASE=1
+REQUIRE_PACKAGES_ICONVENC=1
 endif
 ifeq ($(FULL_TARGET),m68k-freebsd)
 REQUIRE_PACKAGES_RTL=1
 REQUIRE_PACKAGES_FCL-BASE=1
+REQUIRE_PACKAGES_ICONVENC=1
 endif
 ifeq ($(FULL_TARGET),m68k-netbsd)
 REQUIRE_PACKAGES_RTL=1
@@ -1831,6 +1867,7 @@ endif
 ifeq ($(FULL_TARGET),powerpc-linux)
 REQUIRE_PACKAGES_RTL=1
 REQUIRE_PACKAGES_FCL-BASE=1
+REQUIRE_PACKAGES_ICONVENC=1
 endif
 ifeq ($(FULL_TARGET),powerpc-netbsd)
 REQUIRE_PACKAGES_RTL=1
@@ -1847,6 +1884,7 @@ endif
 ifeq ($(FULL_TARGET),powerpc-darwin)
 REQUIRE_PACKAGES_RTL=1
 REQUIRE_PACKAGES_FCL-BASE=1
+REQUIRE_PACKAGES_ICONVENC=1
 endif
 ifeq ($(FULL_TARGET),powerpc-morphos)
 REQUIRE_PACKAGES_RTL=1
@@ -1859,6 +1897,7 @@ endif
 ifeq ($(FULL_TARGET),sparc-linux)
 REQUIRE_PACKAGES_RTL=1
 REQUIRE_PACKAGES_FCL-BASE=1
+REQUIRE_PACKAGES_ICONVENC=1
 endif
 ifeq ($(FULL_TARGET),sparc-netbsd)
 REQUIRE_PACKAGES_RTL=1
@@ -1875,14 +1914,17 @@ endif
 ifeq ($(FULL_TARGET),x86_64-linux)
 REQUIRE_PACKAGES_RTL=1
 REQUIRE_PACKAGES_FCL-BASE=1
+REQUIRE_PACKAGES_ICONVENC=1
 endif
 ifeq ($(FULL_TARGET),x86_64-freebsd)
 REQUIRE_PACKAGES_RTL=1
 REQUIRE_PACKAGES_FCL-BASE=1
+REQUIRE_PACKAGES_ICONVENC=1
 endif
 ifeq ($(FULL_TARGET),x86_64-darwin)
 REQUIRE_PACKAGES_RTL=1
 REQUIRE_PACKAGES_FCL-BASE=1
+REQUIRE_PACKAGES_ICONVENC=1
 endif
 ifeq ($(FULL_TARGET),x86_64-win64)
 REQUIRE_PACKAGES_RTL=1
@@ -1897,6 +1939,7 @@ endif
 ifeq ($(FULL_TARGET),arm-linux)
 REQUIRE_PACKAGES_RTL=1
 REQUIRE_PACKAGES_FCL-BASE=1
+REQUIRE_PACKAGES_ICONVENC=1
 endif
 ifeq ($(FULL_TARGET),arm-palmos)
 REQUIRE_PACKAGES_RTL=1
@@ -1905,6 +1948,7 @@ endif
 ifeq ($(FULL_TARGET),arm-darwin)
 REQUIRE_PACKAGES_RTL=1
 REQUIRE_PACKAGES_FCL-BASE=1
+REQUIRE_PACKAGES_ICONVENC=1
 endif
 ifeq ($(FULL_TARGET),arm-wince)
 REQUIRE_PACKAGES_RTL=1
@@ -1929,10 +1973,12 @@ endif
 ifeq ($(FULL_TARGET),powerpc64-linux)
 REQUIRE_PACKAGES_RTL=1
 REQUIRE_PACKAGES_FCL-BASE=1
+REQUIRE_PACKAGES_ICONVENC=1
 endif
 ifeq ($(FULL_TARGET),powerpc64-darwin)
 REQUIRE_PACKAGES_RTL=1
 REQUIRE_PACKAGES_FCL-BASE=1
+REQUIRE_PACKAGES_ICONVENC=1
 endif
 ifeq ($(FULL_TARGET),powerpc64-embedded)
 REQUIRE_PACKAGES_RTL=1
@@ -1945,6 +1991,7 @@ endif
 ifeq ($(FULL_TARGET),armeb-linux)
 REQUIRE_PACKAGES_RTL=1
 REQUIRE_PACKAGES_FCL-BASE=1
+REQUIRE_PACKAGES_ICONVENC=1
 endif
 ifeq ($(FULL_TARGET),armeb-embedded)
 REQUIRE_PACKAGES_RTL=1
@@ -2002,6 +2049,32 @@ ifdef UNITDIR_FCL-BASE
 override COMPILER_UNITDIR+=$(UNITDIR_FCL-BASE)
 endif
 endif
+ifdef REQUIRE_PACKAGES_ICONVENC
+PACKAGEDIR_ICONVENC:=$(firstword $(subst /Makefile.fpc,,$(strip $(wildcard $(addsuffix /iconvenc/Makefile.fpc,$(PACKAGESDIR))))))
+ifneq ($(PACKAGEDIR_ICONVENC),)
+ifneq ($(wildcard $(PACKAGEDIR_ICONVENC)/units/$(TARGETSUFFIX)),)
+UNITDIR_ICONVENC=$(PACKAGEDIR_ICONVENC)/units/$(TARGETSUFFIX)
+else
+UNITDIR_ICONVENC=$(PACKAGEDIR_ICONVENC)
+endif
+ifdef CHECKDEPEND
+$(PACKAGEDIR_ICONVENC)/$(FPCMADE):
+	$(MAKE) -C $(PACKAGEDIR_ICONVENC) $(FPCMADE)
+override ALLDEPENDENCIES+=$(PACKAGEDIR_ICONVENC)/$(FPCMADE)
+endif
+else
+PACKAGEDIR_ICONVENC=
+UNITDIR_ICONVENC:=$(subst /Package.fpc,,$(strip $(wildcard $(addsuffix /iconvenc/Package.fpc,$(UNITSDIR)))))
+ifneq ($(UNITDIR_ICONVENC),)
+UNITDIR_ICONVENC:=$(firstword $(UNITDIR_ICONVENC))
+else
+UNITDIR_ICONVENC=
+endif
+endif
+ifdef UNITDIR_ICONVENC
+override COMPILER_UNITDIR+=$(UNITDIR_ICONVENC)
+endif
+endif
 ifdef REQUIRE_PACKAGES_WINUNITS-BASE
 PACKAGEDIR_WINUNITS-BASE:=$(firstword $(subst /Makefile.fpc,,$(strip $(wildcard $(addsuffix /winunits-base/Makefile.fpc,$(PACKAGESDIR))))))
 ifneq ($(PACKAGEDIR_WINUNITS-BASE),)
@@ -2173,6 +2246,12 @@ endif
 endif
 ifdef LINKSHARED
 endif
+ifdef GCCLIBDIR
+override FPCOPT+=-Fl$(GCCLIBDIR)
+endif
+ifdef OTHERLIBDIR
+override FPCOPT+=$(addprefix -Fl,$(OTHERLIBDIR))
+endif
 ifdef OPT
 override FPCOPT+=$(OPT)
 endif

+ 8 - 0
packages/fcl-xml/Makefile.fpc

@@ -8,10 +8,18 @@ version=2.2.2
 
 [target]
 units=htmldefs sax xmlutils dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmwrite xpath htmlelements htmlwriter xmlconf sax_xml
+units_linux=xmliconv
+units_freebsd=xmliconv
+units_darwin=xmliconv
+units_win64=xmliconv_windows
+units_win32=xmliconv_windows
 rsts=sax xpath htmlwriter xmlconf
 
 [require]
 packages=fcl-base
+packages_linux=iconvenc
+packages_darwin=iconvenc
+packages_freebsd=iconvenc
 
 [compiler]
 options=-S2h

+ 78 - 0
packages/fcl-xml/src/xmliconv.pas

@@ -0,0 +1,78 @@
+{
+    This file is part of the Free Component Library
+
+    libiconv-based XML decoder.
+    Copyright (c) 2009 by Sergei Gorelkin, [email protected]
+
+    See the file COPYING.FPC, included in this distribution,
+    for details about the copyright.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+ **********************************************************************}
+unit xmliconv;
+
+interface
+
+implementation
+
+uses
+  xmlread, iconvenc, unixtype, baseunix, initc;
+
+const
+{$ifdef FPC_LITTLE_ENDIAN}
+  utf16_encoding = 'UTF-16LE';
+{$else  FPC_LITTLE_ENDIAN}
+  utf16_encoding = 'UTF-16BE';
+{$endif  FPC_LITTLE_ENDIAN}
+
+function Iconv_Decode(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
+var
+  OutChars: size_t;
+  InChars: size_t;
+begin
+  OutChars := OutCnt * sizeof(WideChar);
+  InChars := InCnt;
+  Result := iconv(Context, @InBuf, @InChars, @OutBuf, @OutChars);
+  InCnt := InChars;
+  OutCnt := OutChars div sizeof(WideChar);
+  if Result = -1 then
+  begin
+    case cerrno of
+// when iconv reports insufficient input or output space, still return
+// a positive number of converted chars
+      ESysE2BIG, ESysEINVAL:
+        Result := OutCnt - (OutChars div sizeof(WideChar));
+    else
+      Result := -cerrno;
+    end;
+  end;
+end;
+
+procedure Iconv_Cleanup(Context: Pointer); stdcall;
+begin
+  iconv_close(Context);
+end;
+
+function GetIconvDecoder(const AEncoding: string; out Decoder: TDecoder): Boolean; stdcall;
+var
+  f: iconv_t;
+begin
+  f := iconv_open(utf16_encoding, PChar(AEncoding));
+  if f <> Pointer(-1) then
+  begin
+    Decoder.Context := f;
+    Decoder.Decode := @Iconv_Decode;
+    Decoder.Cleanup := @Iconv_Cleanup;
+    Result := True;
+  end
+  else
+    Result := False;
+end;
+
+initialization
+  RegisterDecoder(@GetIconvDecoder);
+
+end.

+ 81 - 0
packages/fcl-xml/src/xmliconv_windows.pas

@@ -0,0 +1,81 @@
+{
+    This file is part of the Free Component Library
+
+    libiconv-based XML decoder (Windows version).
+    Binds to the native (not Cygwin or Mingw) build of libiconv.
+    Copyright (c) 2009 by Sergei Gorelkin, [email protected]
+
+    See the file COPYING.FPC, included in this distribution,
+    for details about the copyright.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+ **********************************************************************}
+
+unit xmliconv_windows;
+
+interface
+
+implementation
+
+uses
+  xmlread;
+
+type
+  iconv_t = Pointer;
+
+const
+  iconvlib = 'iconv.dll';
+
+function iconv_open(ToCode, FromCode: PChar): iconv_t; cdecl; external iconvlib name 'libiconv_open';
+function iconv(__cd: iconv_t; __inbuf: PPChar; var __inbytesleft: size_t; __outbuf:ppchar; var __outbytesleft: size_t): size_t; cdecl; external iconvlib name 'libiconv';
+function iconv_close(cd: iconv_t): Integer; cdecl; external iconvlib name 'libiconv_close';
+
+function errno_location: PInteger; cdecl; external 'msvcrt.dll' name '_errno';
+
+function Iconv_Decode(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
+var
+  OutChars: size_t;
+begin
+  OutChars := OutCnt * sizeof(WideChar);
+  Result := iconv(Context, @InBuf, InCnt, @OutBuf, OutChars);
+  OutCnt := OutChars div sizeof(WideChar);
+  if Result = -1 then
+  begin
+    case errno_location^ of
+// when iconv reports insufficient input or output space, still return
+// a positive number of converted chars
+      7, 22:  Result := OutCnt - (OutChars div sizeof(WideChar));
+    else
+      Result := -errno_location^;
+    end;
+  end;
+end;
+
+procedure Iconv_Cleanup(Context: Pointer); stdcall;
+begin
+  iconv_close(Context);
+end;
+
+function GetIconvDecoder(const AEncoding: string; out Decoder: TDecoder): Boolean; stdcall;
+var
+  f: iconv_t;
+begin
+  f := iconv_open('UCS-2-INTERNAL', PChar(AEncoding));
+  if f <> Pointer(-1) then
+  begin
+    Decoder.Context := f;
+    Decoder.Decode := @Iconv_Decode;
+    Decoder.Cleanup := @Iconv_Cleanup;
+    Result := True;
+  end
+  else
+    Result := False;
+end;
+
+initialization
+  RegisterDecoder(@GetIconvDecoder);
+
+end.

+ 229 - 131
packages/fcl-xml/src/xmlread.pp

@@ -120,6 +120,15 @@ type
     property OnError: TXMLErrorEvent read FOnError write FOnError;
   end;
 
+  TDecoder = record
+    Context: Pointer;
+    Decode: function(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
+    Cleanup: procedure(Context: Pointer); stdcall;
+  end;
+
+  TGetDecoderProc = function(const AEncoding: string; out Decoder: TDecoder): Boolean; stdcall;
+
+procedure RegisterDecoder(Proc: TGetDecoderProc);
 
 // =======================================================
 
@@ -196,17 +205,15 @@ type
     property PublicID: WideString read GetPublicID write FPublicID;
   end;
 
-  TXMLDecodingSource = class;
-  TDecoder = function(Src: TXMLDecodingSource): WideChar;
   TXMLDecodingSource = class(TXMLCharSource)
   private
     FCharBuf: PChar;
     FCharBufEnd: PChar;
     FBufStart: PWideChar;
     FDecoder: TDecoder;
+    FHasBOM: Boolean;
     FFixedUCS2: string;
     FBufSize: Integer;
-    FSurrogate: WideChar;
     procedure DecodingError(const Msg: string);
   protected
     function Reload: Boolean; override;
@@ -338,8 +345,7 @@ type
     procedure Initialize(ASource: TXMLCharSource);
     function DoParseAttValue(Delim: WideChar): Boolean;
     procedure DoParseFragment;
-    function ContextPush(AEntity: TDOMEntityEx): Boolean; overload;
-    procedure ContextPush(ASrc: TXMLCharSource); overload;
+    function ContextPush(AEntity: TDOMEntityEx): Boolean;
     function ContextPop: Boolean;
     procedure XML11_BuildTables;
     procedure ParseQuantity(CP: TContentParticle);
@@ -391,7 +397,7 @@ type
     function  ResolvePredefined: Boolean;
     procedure IncludeEntity(InAttr: Boolean);
     procedure StartPE;
-    function  ParseCharRef(var ToFill: TWideCharBuf): Boolean;        // [66]
+    function  ParseRef(var ToFill: TWideCharBuf): Boolean;              // [67]
     function  ParseExternalID(out SysID, PubID: WideString;             // [75]
       SysIdOptional: Boolean): Boolean;
 
@@ -411,7 +417,7 @@ type
     procedure ValidateDTD;
     procedure ValidateRoot;
     procedure ValidationError(const Msg: string; const args: array of const; LineOffs: Integer = -1);
-    procedure DoAttrText(ch: PWideChar; Count: Integer);    
+    procedure DoAttrText(ch: PWideChar; Count: Integer);
     procedure DTDReloadHook;
     procedure ConvertSource(SrcIn: TXMLInputSource; out SrcOut: TXMLCharSource);
     // Some SAX-alike stuff (at a very early stage)
@@ -459,87 +465,163 @@ type
 const
   NullLocation: TLocation = (Line: 0; LinePos: 0);
 
-function Decode_UCS2(Src: TXMLDecodingSource): WideChar;
+{ Decoders }
+
+var
+  Decoders: array of TGetDecoderProc;
+
+procedure RegisterDecoder(Proc: TGetDecoderProc);
+var
+  L: Integer;
 begin
-  Result := PWideChar(Src.FCharBuf)^;
-  Inc(Src.FCharBuf, sizeof(WideChar));
+  L := Length(Decoders);
+  SetLength(Decoders, L+1);
+  Decoders[L] := Proc;
 end;
 
-function Decode_UCS2_Swapped(Src: TXMLDecodingSource): WideChar;
+function FindDecoder(const AEncoding: string; out Decoder: TDecoder): Boolean;
+var
+  I: Integer;
 begin
-  Result := WideChar((ord(Src.FCharBuf^) shl 8) or ord(Src.FCharBuf[1]));
-  Inc(Src.FCharBuf, sizeof(WideChar));
+  Result := False;
+  for I := 0 to High(Decoders) do
+    if Decoders[I](AEncoding, Decoder) then
+    begin
+      Result := True;
+      Exit;
+    end;
 end;
 
+function Decode_UCS2(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
+var
+  cnt: Cardinal;
+begin
+  cnt := OutCnt;         // num of widechars
+  if cnt > InCnt div sizeof(WideChar) then
+    cnt := InCnt div sizeof(WideChar);
+  Move(InBuf^, OutBuf^, cnt * sizeof(WideChar));
+  Dec(InCnt, cnt*sizeof(WideChar));
+  Dec(OutCnt, cnt);
+  Result := cnt;
+end;
 
-function Decode_UTF8_mb(Src: TXMLDecodingSource; First: WideChar): WideChar;
-const
-  MaxCode: array[0..3] of Cardinal = ($7F, $7FF, $FFFF, $1FFFFF);
+function Decode_UCS2_Swapped(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
 var
-  Value: Cardinal;
-  I, bc: Integer;
+  I: Integer;
+  cnt: Cardinal;
+  InPtr: PChar;
 begin
-  if ord(First) and $40 = 0 then
-    Src.DecodingError('Invalid UTF-8 sequence start byte');
-  bc := 1;
-  if ord(First) and $20 <> 0 then
+  cnt := OutCnt;         // num of widechars
+  if cnt > InCnt div sizeof(WideChar) then
+    cnt := InCnt div sizeof(WideChar);
+  InPtr := InBuf;
+  for I := 0 to cnt-1 do
   begin
-    Inc(bc);
-    if ord(First) and $10 <> 0 then
-    begin
-      Inc(bc);
-      if ord(First) and $8 <> 0 then
-        Src.DecodingError('UCS4 character out of supported range');
-    end;
+    OutBuf[I] := WideChar((ord(InPtr^) shl 8) or ord(InPtr[1]));
+    Inc(InPtr, 2);
   end;
-  // DONE: (?) check that bc bytes available
-  if Src.FCharBufEnd-Src.FCharBuf < bc then
-    Src.FetchData;
+  Dec(InCnt, cnt*sizeof(WideChar));
+  Dec(OutCnt, cnt);
+  Result := cnt;
+end;
 
-  Value := ord(First);
-  I := bc;  // note: I is never zero
-  while bc > 0 do
+function Decode_88591(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
+var
+  I: Integer;
+  cnt: Cardinal;
+begin
+  cnt := OutCnt;         // num of widechars
+  if cnt > InCnt then
+    cnt := InCnt;
+  for I := 0 to cnt-1 do
+    OutBuf[I] := WideChar(ord(InBuf[I]));
+  Dec(InCnt, cnt);
+  Dec(OutCnt, cnt);
+  Result := cnt;
+end;
+
+function Decode_UTF8(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
+const
+  MaxCode: array[1..4] of Cardinal = ($7F, $7FF, $FFFF, $1FFFFF);
+var
+  i, j, bc: Cardinal;
+  Value: Cardinal;
+begin
+  result := 0;
+  i := OutCnt;
+  while (i > 0) and (InCnt > 0) do
   begin
-    if Src.FCharBuf^ in [#$80..#$BF] then
-      Value := (Value shl 6) or (Cardinal(Src.FCharBuf^) and $3F)
+    bc := 1;
+    Value := ord(InBuf^);
+    if Value < $80 then
+      OutBuf^ := WideChar(Value)
     else
-      Src.DecodingError('Invalid byte in UTF-8 sequence');
-    Inc(Src.FCharBuf);
-    Dec(bc);
-  end;
-  Value := Value and MaxCode[I];
-  // RFC2279 check
-  if Value <= MaxCode[I-1] then
-    Src.DecodingError('Invalid UTF-8 sequence');
-  case Value of
-    0..$D7FF, $E000..$FFFF:
+    begin
+      if Value < $C2 then
       begin
-        Result := WideChar(Value);
-        Exit;
+        Result := -1;
+        Break;
       end;
-    $10000..$10FFFF:
+      Inc(bc);
+      if Value > $DF then
       begin
-        Result := WideChar($D7C0 + (Value shr 10));
-        Src.FSurrogate := WideChar($DC00 xor (Value and $3FF));
-        Exit;
+        Inc(bc);
+        if Value > $EF then
+        begin
+          Inc(bc);
+          if Value > $F7 then  // never encountered in the tests.
+          begin
+            Result := -1;
+            Break;
+          end;
+        end;
+      end;
+      if InCnt < bc then
+        Break;
+      j := 1;
+      while j < bc do
+      begin
+        if InBuf[j] in [#$80..#$BF] then
+          Value := (Value shl 6) or (Cardinal(InBuf[j]) and $3F)
+        else
+        begin
+          Result := -1;
+          Break;
+        end;
+        Inc(j);
       end;
+      Value := Value and MaxCode[bc];
+      // RFC2279 check
+      if Value <= MaxCode[bc-1] then
+      begin
+        Result := -1;
+        Break;
+      end;
+      case Value of
+        0..$D7FF, $E000..$FFFF: OutBuf^ := WideChar(Value);
+        $10000..$10FFFF:
+        begin
+          if i < 2 then Break;
+          OutBuf^ := WideChar($D7C0 + (Value shr 10));
+          OutBuf[1] := WideChar($DC00 xor (Value and $3FF));
+          Inc(OutBuf); // once here
+          Dec(i);
+        end
+        else
+        begin
+          Result := -1;
+          Break;
+        end;
+      end;
+    end;
+    Inc(OutBuf);
+    Inc(InBuf, bc);
+    Dec(InCnt, bc);
+    Dec(i);
   end;
-  Src.DecodingError('UCS4 character out of supported range');
-  Result := #0; // supress warning
-end;
-
-function Decode_UTF8(Src: TXMLDecodingSource): WideChar;
-begin
-  Result := WideChar(byte(Src.FCharBuf^));
-  Inc(Src.FCharBuf);
-  if Result >= #$80 then
-    Result := Decode_UTF8_mb(Src, Result);
-end;
-
-function Decode_8859_1(Src: TXMLDecodingSource): WideChar;
-begin
-  Result := WideChar(ord(Src.FCharBuf^));
-  Inc(Src.FCharBuf);
+  if Result >= 0 then
+    Result := OutCnt-i;
+  OutCnt := i;
 end;
 
 function Is_8859_1(const AEncoding: string): Boolean;
@@ -556,16 +638,6 @@ begin
             SameText(AEncoding, 'ISO8859-1');
 end;
 
-// TODO: List of registered/supported decoders
-function FindDecoder(const Encoding: string): TDecoder;
-begin
-  if Is_8859_1(Encoding) then
-    Result := @Decode_8859_1
-  else
-    Result := nil;
-end;
-
-
 procedure BufAllocate(var ABuffer: TWideCharBuf; ALength: Integer);
 begin
   ABuffer.MaxLength := ALength;
@@ -678,7 +750,7 @@ begin
     node := Context.ParentNode
   else
     node := Context;
-  // TODO: replacing document isn't yet supported  
+  // TODO: replacing document isn't yet supported
   if (Action = xaReplaceChildren) and (node.NodeType = DOCUMENT_NODE) then
     raise EDOMNotSupported.Create('DOMParser.ParseWithContext');
 
@@ -711,9 +783,7 @@ begin
   end;
 end;
 
-// TODO: These classes still cannot be considered as the final solution...
-
-{ TXMLInputSource }
+{ TXMLCharSource }
 
 constructor TXMLCharSource.Create(const AData: WideString);
 begin
@@ -810,6 +880,8 @@ end;
 destructor TXMLDecodingSource.Destroy;
 begin
   FreeMem(FBufStart);
+  if Assigned(FDecoder.Cleanup) then
+    FDecoder.Cleanup(FDecoder.Context);
   inherited Destroy;
 end;
 
@@ -834,8 +906,8 @@ end;
 
 function TXMLDecodingSource.Reload: Boolean;
 var
-  c: WideChar;
-  r: Integer;
+  r, inLeft: Cardinal;
+  rslt: Integer;
 begin
   if DTDSubsetType = dsInternal then
     FReader.DTDReloadHook;
@@ -846,25 +918,35 @@ begin
   FBuf := FBufStart;
   FBufEnd := FBufStart + r;
 
-  while FBufEnd < FBufStart + FBufSize do
-  begin
-    if FCharBufEnd <= FCharBuf then
+  repeat
+    inLeft := FCharBufEnd - FCharBuf;
+    if inLeft < 4 then                      // may contain an incomplete char
     begin
       FetchData;
-      if FCharBufEnd <= FCharBuf then
+      inLeft := FCharBufEnd - FCharBuf;
+      if inLeft <= 0 then
         Break;
     end;
-    if FSurrogate <> #0 then
-    begin
-      c := FSurrogate;
-      FSurrogate := #0;
-    end
+    r := FBufStart + FBufSize - FBufEnd;
+    if r = 0 then
+      Break;
+    rslt := FDecoder.Decode(FDecoder.Context, FCharBuf, inLeft, FBufEnd, r);
+    { Sanity checks: r and inLeft must not increase. }
+    if inLeft + FCharBuf <= FCharBufEnd then
+      FCharBuf := FCharBufEnd - inLeft
+    else
+      DecodingError('Decoder error: input byte count out of bounds');
+    if r + FBufEnd <= FBufStart + FBufSize then
+      FBufEnd := FBufStart + FBufSize - r
     else
-      c := FDecoder(Self);
+      DecodingError('Decoder error: output char count out of bounds');
+
+    if rslt = 0 then
+      Break
+    else if rslt < 0 then
+      DecodingError('Invalid character in input stream');
+  until False;
 
-    FBufEnd^ := c;
-    Inc(FBufEnd);
-  end;
   FBufEnd^ := #0;
   Result := FBuf < FBufEnd;
 end;
@@ -877,25 +959,30 @@ begin
   inherited;
   FLineNo := 1;
   FXml11Rules := FReader.FXML11;
-  FDecoder := @Decode_UTF8;
+
+  FDecoder.Decode := @Decode_UTF8;
+
   FFixedUCS2 := '';
   if FCharBufEnd-FCharBuf > 1 then
   begin
     if (FCharBuf[0] = #$FE) and (FCharBuf[1] = #$FF) then
     begin
       FFixedUCS2 := 'UTF-16BE';
-      FDecoder := {$IFNDEF ENDIAN_BIG} @Decode_UCS2_Swapped {$ELSE} @Decode_UCS2 {$ENDIF};
+      FDecoder.Decode := {$IFNDEF ENDIAN_BIG} @Decode_UCS2_Swapped {$ELSE} @Decode_UCS2 {$ENDIF};
     end
     else if (FCharBuf[0] = #$FF) and (FCharBuf[1] = #$FE) then
     begin
       FFixedUCS2 := 'UTF-16LE';
-      FDecoder := {$IFDEF ENDIAN_BIG} @Decode_UCS2_Swapped {$ELSE} @Decode_UCS2 {$ENDIF};
+      FDecoder.Decode := {$IFDEF ENDIAN_BIG} @Decode_UCS2_Swapped {$ELSE} @Decode_UCS2 {$ENDIF};
     end;
   end;
   FBufSize := 6;             //  possible BOM and '<?xml'
   Reload;
   if FBuf^ = #$FEFF then
+  begin
+    FHasBOM := True;
     Inc(FBuf);
+  end;
   LFPos := FBuf-1;
   if CompareMem(FBuf, @XmlSign[0], sizeof(XmlSign)) then
   begin
@@ -920,8 +1007,12 @@ begin
        SameText(AEncoding, 'unicode');
     Exit;
   end;
-  NewDecoder := FindDecoder(AEncoding);
-  if Assigned(NewDecoder) then
+// TODO: must fail when a byte-based stream is labeled as word-based.
+// see rmt-e2e-61, it now fails but for a completely different reason.
+  FillChar(NewDecoder, sizeof(TDecoder), 0);
+  if Is_8859_1(AEncoding) then
+    FDecoder.Decode := @Decode_88591
+  else if FindDecoder(AEncoding, NewDecoder) then
     FDecoder := NewDecoder
   else
     Result := False;
@@ -1094,6 +1185,7 @@ end;
 
 procedure TXMLReader.Initialize(ASource: TXMLCharSource);
 begin
+  ASource.FParent := FSource;
   FSource := ASource;
   FSource.FReader := Self;
   FSource.Initialize;
@@ -1446,23 +1538,36 @@ var
   wc: WideChar;
 begin
   Result := False;
-  if BufEquals(FName, 'amp') then
-    wc := '&'
-  else if BufEquals(FName, 'apos') then
-    wc := ''''
-  else if BufEquals(FName, 'gt') then
-    wc := '>'
-  else if BufEquals(FName, 'lt') then
-    wc := '<'
-  else if BufEquals(FName, 'quot') then
-    wc := '"'
-  else
-    Exit;
+  with FName do
+  begin
+    if (Length = 2) and (Buffer[1] = 't') then
+    begin
+      if Buffer[0] = 'l' then
+        wc := '<'
+      else if Buffer[0] = 'g' then
+        wc := '>'
+      else Exit;
+    end
+    else if Buffer[0] = 'a' then
+    begin
+      if (Length = 3) and (Buffer[1] = 'm') and (Buffer[2] = 'p') then
+        wc := '&'
+      else if (Length = 4) and (Buffer[1] = 'p') and (Buffer[2] = 'o') and
+       (Buffer[3] = 's') then
+        wc := ''''
+      else Exit;  
+    end
+    else if (Length = 4) and (Buffer[0] = 'q') and (Buffer[1] = 'u') and
+      (Buffer[2] = 'o') and (Buffer[3] ='t') then
+      wc := '"'
+    else
+      Exit;
+  end; // with
   BufAppend(FValue, wc);
   Result := True;
 end;
 
-function TXMLReader.ParseCharRef(var ToFill: TWideCharBuf): Boolean;           // [66]
+function TXMLReader.ParseRef(var ToFill: TWideCharBuf): Boolean;  // [67]
 var
   Value: Integer;
 begin
@@ -1481,7 +1586,7 @@ begin
         Break;
       end;
       GetChar;
-    until False
+    until Value > $10FFFF
     else
     repeat
       case FSource.FBuf^ of
@@ -1490,7 +1595,7 @@ begin
         Break;
       end;
       GetChar;
-    until False;
+    until Value > $10FFFF;
 
     case Value of
       $01..$08, $0B..$0C, $0E..$1F:
@@ -1531,7 +1636,7 @@ begin
       FatalError('Character ''<'' is not allowed in attribute value')
     else if wc = '&' then
     begin
-      if ParseCharRef(FValue) or ResolvePredefined then
+      if ParseRef(FValue) or ResolvePredefined then
         Continue;
       // have to insert entity or reference
       if FValue.Length > 0 then
@@ -1591,16 +1696,10 @@ begin
   AEntity.FOnStack := True;
   Src.FEntity := AEntity;
 
-  ContextPush(Src);
+  Initialize(Src);
   Result := True;
 end;
 
-procedure TXMLReader.ContextPush(ASrc: TXMLCharSource);
-begin
-  ASrc.FParent := FSource;
-  Initialize(ASrc);
-end;
-
 function TXMLReader.ContextPop: Boolean;
 var
   Src: TXMLCharSource;
@@ -1996,7 +2095,7 @@ begin
   begin
     if ResolveEntity(FDocType.SystemID, FDocType.PublicID, Src) then
     begin
-      ContextPush(Src);
+      Initialize(Src);
       try
         Src.DTDSubsetType := dsExternal;
         ParseMarkupDecl;
@@ -2086,7 +2185,6 @@ begin
       CurrentCP.Def := FindOrCreateElDef;
 
     ParseQuantity(CurrentCP);
-
     SkipWhitespace;
     if FSource.FBuf^ = ')' then
       Break;
@@ -2367,7 +2465,7 @@ begin
     else if wc = '&' then
     begin
 // expand CharRefs, bypass (but check for well-formedness) EntityRefs
-      if not ParseCharRef(FEntityValue) then
+      if not ParseRef(FEntityValue) then
       begin
         BufAppend(FEntityValue, '&');
         BufAppendChunk(FEntityValue, FName.Buffer, FName.Buffer + FName.Length);
@@ -2695,7 +2793,7 @@ begin
           if FCurrContentType = ctEmpty then
             ValidationError('References are illegal in EMPTY elements', []);
 
-          if ParseCharRef(FValue) or ResolvePredefined then
+          if ParseRef(FValue) or ResolvePredefined then
             nonWs := True // CharRef to whitespace is not considered whitespace
           else
           begin
@@ -2791,8 +2889,8 @@ begin
   ExpectChar('>');
 
   ProcessDefaultAttributes(NewElem, ElDef);
+  PushVC(ElDef);  // this increases FNesting
 
-  PushVC(ElDef);
   // SAX: ContentHandler.StartElement(...)
   // SAX: ContentHandler.StartPrefixMapping(...)