// HtmlAgilityPack V1.0 - Simon Mourier
using System;
using System.IO;
using System.Text;
using System.Collections;
namespace HtmlAgilityPack
{
///
/// Represents the type of fragement in a mixed code document.
///
public enum MixedCodeDocumentFragmentType
{
///
/// The fragment contains code.
///
Code,
///
/// The fragment contains text.
///
Text,
}
///
/// Represents a fragment of code in a mixed code document.
///
public class MixedCodeDocumentCodeFragment: MixedCodeDocumentFragment
{
internal string _code;
internal MixedCodeDocumentCodeFragment(MixedCodeDocument doc):
base(doc, MixedCodeDocumentFragmentType.Code)
{
}
///
/// Gets the fragment code text.
///
public string Code
{
get
{
if (_code == null)
{
_code = FragmentText.Substring(_doc.TokenCodeStart.Length,
FragmentText.Length - _doc.TokenCodeEnd.Length - _doc.TokenCodeStart.Length -1).Trim();
if (_code.StartsWith("="))
{
_code = _doc.TokenResponseWrite + _code.Substring(1, _code.Length-1);
}
}
return _code;
}
set
{
_code = value;
}
}
}
///
/// Represents a fragment of text in a mixed code document.
///
public class MixedCodeDocumentTextFragment: MixedCodeDocumentFragment
{
internal MixedCodeDocumentTextFragment(MixedCodeDocument doc):
base(doc, MixedCodeDocumentFragmentType.Text)
{
}
///
/// Gets the fragment text.
///
public string Text
{
get
{
return FragmentText;
}
set
{
base._fragmenttext = value;
}
}
}
///
/// Represents a base class for fragments in a mixed code document.
///
public abstract class MixedCodeDocumentFragment
{
internal MixedCodeDocumentFragmentType _type;
internal MixedCodeDocument _doc;
internal int _index;
internal int _length;
internal int _line;
internal int _lineposition;
internal string _fragmenttext;
internal MixedCodeDocumentFragment(MixedCodeDocument doc, MixedCodeDocumentFragmentType type)
{
_doc = doc;
_type = type;
switch(type)
{
case MixedCodeDocumentFragmentType.Text:
_doc._textfragments.Append(this);
break;
case MixedCodeDocumentFragmentType.Code:
_doc._codefragments.Append(this);
break;
}
_doc._fragments.Append(this);
}
///
/// Gets the type of fragment.
///
public MixedCodeDocumentFragmentType FragmentType
{
get
{
return _type;
}
}
///
/// Gets the fragment position in the document's stream.
///
public int StreamPosition
{
get
{
return _index;
}
}
///
/// Gets the line number of the fragment.
///
public int Line
{
get
{
return _line;
}
}
///
/// Gets the line position (column) of the fragment.
///
public int LinePosition
{
get
{
return _lineposition;
}
}
///
/// Gets the fragement text.
///
public string FragmentText
{
get
{
if (_fragmenttext == null)
{
_fragmenttext = _doc._text.Substring(_index, _length);
}
return _fragmenttext;
}
}
}
///
/// Represents a list of mixed code fragments.
///
public class MixedCodeDocumentFragmentList: IEnumerable
{
private MixedCodeDocument _doc;
private ArrayList _items = new ArrayList();
internal MixedCodeDocumentFragmentList(MixedCodeDocument doc)
{
_doc = doc;
}
///
/// Appends a fragment to the list of fragments.
///
/// The fragment to append. May not be null.
public void Append(MixedCodeDocumentFragment newFragment)
{
if (newFragment == null)
{
throw new ArgumentNullException("newFragment");
}
_items.Add(newFragment);
}
///
/// Prepends a fragment to the list of fragments.
///
/// The fragment to append. May not be null.
public void Prepend(MixedCodeDocumentFragment newFragment)
{
if (newFragment == null)
{
throw new ArgumentNullException("newFragment");
}
_items.Insert(0, newFragment);
}
///
/// Remove a fragment from the list of fragments. If this fragment was not in the list, an exception will be raised.
///
/// The fragment to remove. May not be null.
public void Remove(MixedCodeDocumentFragment fragment)
{
if (fragment == null)
{
throw new ArgumentNullException("fragment");
}
int index = GetFragmentIndex(fragment);
if (index == -1)
{
throw new IndexOutOfRangeException();
}
RemoveAt(index);
}
///
/// Remove a fragment from the list of fragments, using its index in the list.
///
/// The index of the fragment to remove.
public void RemoveAt(int index)
{
MixedCodeDocumentFragment frag = (MixedCodeDocumentFragment)_items[index];
_items.RemoveAt(index);
}
///
/// Remove all fragments from the list.
///
public void RemoveAll()
{
_items.Clear();
}
///
/// Gets the number of fragments contained in the list.
///
public int Count
{
get
{
return _items.Count;
}
}
internal int GetFragmentIndex(MixedCodeDocumentFragment fragment)
{
if (fragment == null)
{
throw new ArgumentNullException("fragment");
}
for(int i=0;i<_items.Count;i++)
{
if (((MixedCodeDocumentFragment)_items[i])==fragment)
{
return i;
}
}
return -1;
}
///
/// Gets a fragment from the list using its index.
///
public MixedCodeDocumentFragment this[int index]
{
get
{
return _items[index] as MixedCodeDocumentFragment;
}
}
internal void Clear()
{
_items.Clear();
}
///
/// Gets an enumerator that can iterate through the fragment list.
///
public MixedCodeDocumentFragmentEnumerator GetEnumerator()
{
return new MixedCodeDocumentFragmentEnumerator(_items);
}
///
/// Gets an enumerator that can iterate through the fragment list.
///
IEnumerator IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
///
/// Represents a fragment enumerator.
///
public class MixedCodeDocumentFragmentEnumerator: IEnumerator
{
int _index;
ArrayList _items;
internal MixedCodeDocumentFragmentEnumerator(ArrayList items)
{
_items = items;
_index = -1;
}
///
/// Sets the enumerator to its initial position, which is before the first element in the collection.
///
public void Reset()
{
_index = -1;
}
///
/// Advances the enumerator to the next element of the collection.
///
/// true if the enumerator was successfully advanced to the next element; false if the enumerator has passed the end of the collection.
public bool MoveNext()
{
_index++;
return (_index<_items.Count);
}
///
/// Gets the current element in the collection.
///
public MixedCodeDocumentFragment Current
{
get
{
return (MixedCodeDocumentFragment)(_items[_index]);
}
}
///
/// Gets the current element in the collection.
///
object IEnumerator.Current
{
get
{
return (Current);
}
}
}
}
///
/// Represents a document with mixed code and text. ASP, ASPX, JSP, are good example of such documents.
///
public class MixedCodeDocument
{
private System.Text.Encoding _streamencoding = null;
internal string _text;
internal MixedCodeDocumentFragmentList _fragments;
internal MixedCodeDocumentFragmentList _codefragments;
internal MixedCodeDocumentFragmentList _textfragments;
private ParseState _state;
private int _index;
private int _c;
private int _line;
private int _lineposition;
private MixedCodeDocumentFragment _currentfragment;
///
/// Gets or sets the token representing code start.
///
public string TokenCodeStart = "<%";
///
/// Gets or sets the token representing code end.
///
public string TokenCodeEnd = "%>";
///
/// Gets or sets the token representing code directive.
///
public string TokenDirective = "@";
///
/// Gets or sets the token representing response write directive.
///
public string TokenResponseWrite = "Response.Write ";
private string TokenTextBlock = "TextBlock({0})";
///
/// Creates a mixed code document instance.
///
public MixedCodeDocument()
{
_codefragments = new MixedCodeDocumentFragmentList(this);
_textfragments = new MixedCodeDocumentFragmentList(this);
_fragments = new MixedCodeDocumentFragmentList(this);
}
///
/// Loads a mixed code document from a stream.
///
/// The input stream.
public void Load(Stream stream)
{
Load(new StreamReader(stream));
}
///
/// Loads a mixed code document from a stream.
///
/// The input stream.
/// Indicates whether to look for byte order marks at the beginning of the file.
public void Load(Stream stream, bool detectEncodingFromByteOrderMarks)
{
Load(new StreamReader(stream, detectEncodingFromByteOrderMarks));
}
///
/// Loads a mixed code document from a stream.
///
/// The input stream.
/// The character encoding to use.
public void Load(Stream stream, Encoding encoding)
{
Load(new StreamReader(stream, encoding));
}
///
/// Loads a mixed code document from a stream.
///
/// The input stream.
/// The character encoding to use.
/// Indicates whether to look for byte order marks at the beginning of the file.
public void Load(Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks)
{
Load(new StreamReader(stream, encoding, detectEncodingFromByteOrderMarks));
}
///
/// Loads a mixed code document from a stream.
///
/// The input stream.
/// The character encoding to use.
/// Indicates whether to look for byte order marks at the beginning of the file.
/// The minimum buffer size.
public void Load(Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks, int buffersize)
{
Load(new StreamReader(stream, encoding, detectEncodingFromByteOrderMarks, buffersize));
}
///
/// Loads a mixed code document from a file.
///
/// The complete file path to be read.
public void Load(string path)
{
Load(new StreamReader(path));
}
///
/// Loads a mixed code document from a file.
///
/// The complete file path to be read.
/// Indicates whether to look for byte order marks at the beginning of the file.
public void Load(string path, bool detectEncodingFromByteOrderMarks)
{
Load(new StreamReader(path, detectEncodingFromByteOrderMarks));
}
///
/// Loads a mixed code document from a file.
///
/// The complete file path to be read.
/// The character encoding to use.
public void Load(string path, Encoding encoding)
{
Load(new StreamReader(path, encoding));
}
///
/// Loads a mixed code document from a file.
///
/// The complete file path to be read.
/// The character encoding to use.
/// Indicates whether to look for byte order marks at the beginning of the file.
public void Load(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks)
{
Load(new StreamReader(path, encoding, detectEncodingFromByteOrderMarks));
}
///
/// Loads a mixed code document from a file.
///
/// The complete file path to be read.
/// The character encoding to use.
/// Indicates whether to look for byte order marks at the beginning of the file.
/// The minimum buffer size.
public void Load(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks, int buffersize)
{
Load(new StreamReader(path, encoding, detectEncodingFromByteOrderMarks, buffersize));
}
///
/// Loads a mixed document from a text
///
/// The text to load.
public void LoadHtml(string html)
{
Load(new StringReader(html));
}
///
/// Loads the mixed code document from the specified TextReader.
///
/// The TextReader used to feed the HTML data into the document.
public void Load(TextReader reader)
{
_codefragments.Clear();
_textfragments.Clear();
// all pseudo constructors get down to this one
StreamReader sr = reader as StreamReader;
if (sr != null)
{
_streamencoding = sr.CurrentEncoding;
}
_text = reader.ReadToEnd();
reader.Close();
Parse();
}
internal System.Text.Encoding GetOutEncoding()
{
if (_streamencoding != null)
return _streamencoding;
return System.Text.Encoding.Default;
}
///
/// Gets the encoding of the stream used to read the document.
///
public System.Text.Encoding StreamEncoding
{
get
{
return _streamencoding;
}
}
///
/// Gets the list of code fragments in the document.
///
public MixedCodeDocumentFragmentList CodeFragments
{
get
{
return _codefragments;
}
}
///
/// Gets the list of text fragments in the document.
///
public MixedCodeDocumentFragmentList TextFragments
{
get
{
return _textfragments;
}
}
///
/// Gets the list of all fragments in the document.
///
public MixedCodeDocumentFragmentList Fragments
{
get
{
return _fragments;
}
}
///
/// Saves the mixed document to the specified stream.
///
/// The stream to which you want to save.
public void Save(Stream outStream)
{
StreamWriter sw = new StreamWriter(outStream, GetOutEncoding());
Save(sw);
}
///
/// Saves the mixed document to the specified stream.
///
/// The stream to which you want to save.
/// The character encoding to use.
public void Save(Stream outStream, System.Text.Encoding encoding)
{
StreamWriter sw = new StreamWriter(outStream, encoding);
Save(sw);
}
///
/// Saves the mixed document to the specified file.
///
/// The location of the file where you want to save the document.
public void Save(string filename)
{
StreamWriter sw = new StreamWriter(filename, false, GetOutEncoding());
Save(sw);
}
///
/// Saves the mixed document to the specified file.
///
/// The location of the file where you want to save the document.
/// The character encoding to use.
public void Save(string filename, System.Text.Encoding encoding)
{
StreamWriter sw = new StreamWriter(filename, false, encoding);
Save(sw);
}
///
/// Saves the mixed document to the specified StreamWriter.
///
/// The StreamWriter to which you want to save.
public void Save(StreamWriter writer)
{
Save((TextWriter)writer);
}
///
/// Saves the mixed document to the specified TextWriter.
///
/// The TextWriter to which you want to save.
public void Save(TextWriter writer)
{
writer.Flush();
}
///
/// Gets the code represented by the mixed code document seen as a template.
///
public string Code
{
get
{
string s = "";
int i = 0;
foreach(MixedCodeDocumentFragment frag in _fragments)
{
switch(frag._type)
{
case MixedCodeDocumentFragmentType.Text:
s += TokenResponseWrite + string.Format(TokenTextBlock, i) + "\n";
i++;
break;
case MixedCodeDocumentFragmentType.Code:
s += ((MixedCodeDocumentCodeFragment)frag).Code + "\n";
break;
}
}
return s;
}
}
///
/// Create a text fragment instances.
///
/// The newly created text fragment instance.
public MixedCodeDocumentTextFragment CreateTextFragment()
{
return (MixedCodeDocumentTextFragment)CreateFragment(MixedCodeDocumentFragmentType.Text);
}
///
/// Create a code fragment instances.
///
/// The newly created code fragment instance.
public MixedCodeDocumentCodeFragment CreateCodeFragment()
{
return (MixedCodeDocumentCodeFragment)CreateFragment(MixedCodeDocumentFragmentType.Code);
}
internal MixedCodeDocumentFragment CreateFragment(MixedCodeDocumentFragmentType type)
{
switch(type)
{
case MixedCodeDocumentFragmentType.Text:
return new MixedCodeDocumentTextFragment(this);
case MixedCodeDocumentFragmentType.Code:
return new MixedCodeDocumentCodeFragment(this);
default:
throw new NotSupportedException();
}
}
private void SetPosition()
{
_currentfragment._line = _line;
_currentfragment._lineposition = _lineposition;
_currentfragment._index = _index - 1;
_currentfragment._length = 0;
}
private void IncrementPosition()
{
_index++;
if (_c == 10)
{
_lineposition = 1;
_line++;
}
else
_lineposition++;
}
private enum ParseState
{
Text,
Code
}
private void Parse()
{
_state = ParseState.Text;
_index = 0;
_currentfragment = CreateFragment(MixedCodeDocumentFragmentType.Text);
while (_index<_text.Length)
{
_c = _text[_index];
IncrementPosition();
switch(_state)
{
case ParseState.Text:
if (_index+TokenCodeStart.Length<_text.Length)
{
if (_text.Substring(_index-1, TokenCodeStart.Length) == TokenCodeStart)
{
_state = ParseState.Code;
_currentfragment._length = _index -1 - _currentfragment._index;
_currentfragment = CreateFragment(MixedCodeDocumentFragmentType.Code);
SetPosition();
continue;
}
}
break;
case ParseState.Code:
if (_index+TokenCodeEnd.Length<_text.Length)
{
if (_text.Substring(_index-1, TokenCodeEnd.Length) == TokenCodeEnd)
{
_state = ParseState.Text;
_currentfragment._length = _index + TokenCodeEnd.Length - _currentfragment._index;
_index += TokenCodeEnd.Length;
_lineposition += TokenCodeEnd.Length;
_currentfragment = CreateFragment(MixedCodeDocumentFragmentType.Text);
SetPosition();
continue;
}
}
break;
}
}
_currentfragment._length = _index - _currentfragment._index;
}
}
}