XmlNormalizer.cs 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347
  1. using System;
  2. using System.IO;
  3. using System.Xml;
  4. using System.Xml.XPath;
  5. using System.Collections;
  6. using System.Reflection;
  7. using System.ComponentModel;
  8. using System.Text;
  9. namespace XmlNormalizer {
  10. /// <summary>
  11. /// Summary description for Class1.
  12. /// </summary>
  13. class XmlNormalizer {
  14. class OptionLetterAttribute:Attribute{
  15. char _c;
  16. public OptionLetterAttribute(char c):base(){
  17. _c = c;
  18. }
  19. public override string ToString() {
  20. return _c.ToString();
  21. }
  22. }
  23. XmlDocument doc;
  24. bool _removeWhiteSpace;
  25. bool _sortAttributes;
  26. bool _removeAttributes;
  27. bool _removeNamespacesAndPrefixes;
  28. bool _removeText;
  29. bool _removeAll;
  30. bool _newLines;
  31. [OptionLetter('w')]
  32. [Description("remove white space")]
  33. public bool RemoveWhiteSpace {
  34. get {return _removeWhiteSpace;}
  35. set {_removeWhiteSpace=value;}
  36. }
  37. [OptionLetter('s')]
  38. [Description("sort attributes")]
  39. public bool SortAttributes {
  40. get {return _sortAttributes;}
  41. set {_sortAttributes=value;}
  42. }
  43. [OptionLetter('a')]
  44. [Description("remove attributes")]
  45. public bool RemoveAttributes {
  46. get {return _removeAttributes;}
  47. set {_removeAttributes=value;}
  48. }
  49. [OptionLetter('p')]
  50. [Description("remove namespaces and prefixes")]
  51. public bool RemoveNamespacesAndPrefixes {
  52. get {return _removeNamespacesAndPrefixes;}
  53. set {_removeNamespacesAndPrefixes=value;}
  54. }
  55. [OptionLetter('t')]
  56. [Description("remove text nodes")]
  57. public bool RemoveText {
  58. get {return _removeText;}
  59. set {_removeText=value;}
  60. }
  61. [OptionLetter('n')]
  62. [Description("remove all except element nodes")]
  63. public bool RemoveAll {
  64. get {return _removeAll;}
  65. set {_removeAll=value;}
  66. }
  67. [OptionLetter('x')]
  68. [Description("insert newlines before elements")]
  69. public bool NewLines {
  70. get {return _newLines;}
  71. set {_newLines=value;}
  72. }
  73. [OptionLetter('m')]
  74. [Description("minimal normalizing")]
  75. public bool MinimalNormalizing {
  76. get {return false;}
  77. }
  78. public XmlNormalizer ()
  79. :this ("") {
  80. }
  81. public XmlNormalizer (string options) {
  82. ParseOptions(options);
  83. }
  84. public void Process(TextReader rd) {
  85. doc=new XmlDocument();
  86. doc.PreserveWhitespace = true;
  87. string fileContents = rd.ReadToEnd();
  88. try {
  89. doc.LoadXml (fileContents);
  90. }
  91. catch (Exception x) {
  92. StringBuilder sb = new StringBuilder ();
  93. sb.Append ("<NormalizerRoot>");
  94. sb.Append (fileContents);
  95. sb.Append ("</NormalizerRoot>");
  96. doc.LoadXml (sb.ToString ());
  97. }
  98. if (RemoveText)
  99. RemoveWhiteSpace = true;
  100. if (RemoveAll)
  101. RemoveNamespacesAndPrefixes = true;
  102. XmlDocument newDoc = new XmlDocument();
  103. CopyNodes(newDoc, doc, newDoc);
  104. doc=newDoc;
  105. }
  106. void CopyNodes (XmlDocument newDoc, XmlNode fromParent, XmlNode toParent) {
  107. if (fromParent.HasChildNodes)
  108. foreach (XmlNode c in fromParent.ChildNodes)
  109. CopyNode (newDoc, c, toParent);
  110. if (fromParent.Attributes != null) {
  111. string [] keys = new string [fromParent.Attributes.Count];
  112. for (int i=0; i<fromParent.Attributes.Count; i++) {
  113. keys[i] = fromParent.Attributes[i].Name;
  114. }
  115. if (SortAttributes){
  116. Array.Sort(keys);
  117. }
  118. for (int i=0; i<keys.Length; i++) {
  119. CopyNode (newDoc, fromParent.Attributes[keys[i]], toParent);
  120. }
  121. }
  122. }
  123. void CopyNode (XmlDocument newDoc, XmlNode from, XmlNode toParent) {
  124. if (RemoveAll && from.NodeType != XmlNodeType.Element)
  125. return;
  126. XmlNode child = null;
  127. bool newLineNode = false;
  128. switch (from.NodeType) {
  129. case XmlNodeType.Element:
  130. newLineNode = true;
  131. if (RemoveNamespacesAndPrefixes)
  132. child = newDoc.CreateElement (from.LocalName);
  133. else {
  134. XmlElement e = from as XmlElement;
  135. child = newDoc.CreateElement (e.Prefix, e.LocalName, e.NamespaceURI);
  136. }
  137. break;
  138. case XmlNodeType.Attribute: {
  139. if (RemoveAttributes)
  140. return;
  141. XmlAttribute fromAttr = from as XmlAttribute;
  142. if (!fromAttr.Specified)
  143. return;
  144. XmlAttribute a;
  145. if (RemoveNamespacesAndPrefixes)
  146. a = newDoc.CreateAttribute (fromAttr.LocalName);
  147. else
  148. a = newDoc.CreateAttribute (fromAttr.Prefix, fromAttr.LocalName, fromAttr.NamespaceURI);
  149. toParent.Attributes.Append(a);
  150. CopyNodes (newDoc, from, a);
  151. return;
  152. }
  153. case XmlNodeType.CDATA:
  154. newLineNode = true;
  155. child = newDoc.CreateCDataSection ((from as XmlCDataSection).Data);
  156. break;
  157. case XmlNodeType.Comment:
  158. if (RemoveWhiteSpace)
  159. return;
  160. newLineNode = true;
  161. child = newDoc.CreateComment ((from as XmlComment).Data);
  162. break;
  163. case XmlNodeType.ProcessingInstruction:
  164. newLineNode = true;
  165. XmlProcessingInstruction pi = from as XmlProcessingInstruction;
  166. child = newDoc.CreateProcessingInstruction (pi.Target, pi.Data);
  167. break;
  168. case XmlNodeType.DocumentType:
  169. newLineNode = true;
  170. toParent.AppendChild (from.CloneNode (true));
  171. return;
  172. case XmlNodeType.EntityReference:
  173. child = newDoc.CreateEntityReference ((from as XmlEntityReference).Name);
  174. break;
  175. case XmlNodeType.SignificantWhitespace:
  176. if (RemoveWhiteSpace)
  177. return;
  178. child = newDoc.CreateSignificantWhitespace (from.Value);
  179. break;
  180. case XmlNodeType.Text:
  181. if (RemoveText)
  182. return;
  183. newLineNode = true;
  184. child = newDoc.CreateTextNode (from.Value);
  185. break;
  186. case XmlNodeType.Whitespace:
  187. if (RemoveWhiteSpace)
  188. return;
  189. child = newDoc.CreateWhitespace (from.Value);
  190. break;
  191. case XmlNodeType.XmlDeclaration:
  192. newLineNode = true;
  193. XmlDeclaration d = from as XmlDeclaration;
  194. XmlDeclaration d1 = newDoc.CreateXmlDeclaration (d.Version, d.Encoding, d.Standalone);
  195. newDoc.InsertBefore(d1, newDoc.DocumentElement);
  196. return;
  197. }
  198. if (NewLines && newLineNode && toParent.NodeType != XmlNodeType.Attribute) {
  199. XmlSignificantWhitespace s = newDoc.CreateSignificantWhitespace("\r\n");
  200. toParent.AppendChild (s);
  201. }
  202. toParent.AppendChild(child);
  203. CopyNodes (newDoc, from, child);
  204. }
  205. public void ParseOptions (string options) {
  206. _removeWhiteSpace = false;
  207. _sortAttributes = false;
  208. _removeAttributes = false;
  209. _removeNamespacesAndPrefixes = false;
  210. _removeText = false;
  211. _removeAll = false;
  212. _newLines = false;
  213. foreach (PropertyInfo pi in typeof (XmlNormalizer).GetProperties()) {
  214. string option = pi.GetCustomAttributes(typeof(OptionLetterAttribute),true)[0].ToString();
  215. if (options.IndexOf(option) == -1)
  216. continue;
  217. pi.GetSetMethod().Invoke (this, new object [] {true});
  218. }
  219. }
  220. public static Hashtable GetOptions() {
  221. Hashtable h = new Hashtable();
  222. foreach (PropertyInfo pi in typeof (XmlNormalizer).GetProperties()) {
  223. string option = pi.GetCustomAttributes(typeof(OptionLetterAttribute),true)[0].ToString();
  224. string descr = (pi.GetCustomAttributes(typeof(DescriptionAttribute), true)[0] as DescriptionAttribute).Description;
  225. h[option] = descr;
  226. }
  227. return h;
  228. }
  229. public void Output(XmlWriter wr) {
  230. doc.WriteTo(wr);
  231. }
  232. public void Output(TextWriter wr) {
  233. Output (new XmlTextWriter (wr));
  234. }
  235. void ProcessFile (string inputfile, string outputfile) {
  236. StreamWriter wr = null;
  237. StreamReader rd = null;
  238. try {
  239. wr = new StreamWriter (outputfile);
  240. rd = new StreamReader (inputfile);
  241. ProcessFile (rd, wr);
  242. } catch (Exception) {
  243. if (wr != null)
  244. wr.Close ();
  245. if (rd != null)
  246. rd.Close ();
  247. wr = null;
  248. rd = null;
  249. File.Copy (inputfile, outputfile, true);
  250. } finally {
  251. if (wr != null)
  252. wr.Close ();
  253. if (rd != null)
  254. rd.Close ();
  255. }
  256. }
  257. void ProcessFile (TextReader input, TextWriter output) {
  258. XmlTextWriter xwr = new XmlTextWriter (output);
  259. Process (input);
  260. Output (xwr);
  261. }
  262. void ProcessDirectory (string inputdir, string outputdir) {
  263. if (!Directory.Exists (outputdir))
  264. Directory.CreateDirectory (outputdir);
  265. DirectoryInfo idi = new DirectoryInfo(inputdir);
  266. foreach (FileInfo fi in idi.GetFiles()) {
  267. string outputfile = Path.Combine(outputdir, fi.Name);
  268. ProcessFile (fi.FullName, outputfile);
  269. }
  270. foreach (DirectoryInfo di in idi.GetDirectories())
  271. ProcessDirectory (di.FullName, Path.Combine(outputdir, di.Name));
  272. }
  273. #if !XML_NORMALIZER_NO_MAIN
  274. /// <summary>
  275. /// The main entry point for the application.
  276. /// </summary>
  277. [STAThread]
  278. static int Main(string[] args) {
  279. if (args.Length < 2 || args[0].Length < 2 || args[0][0] != '-') {
  280. PrintUsage();
  281. return 1;
  282. }
  283. XmlNormalizer norm = new XmlNormalizer (args[0].Substring(1));
  284. if (File.Exists(args[1])) {
  285. if (args.Length != 2) {
  286. PrintUsage();
  287. return 1;
  288. }
  289. norm.ProcessFile(new StreamReader (args[1]), Console.Out);
  290. }
  291. else if (Directory.Exists (args[1])) {
  292. if (args.Length != 3) {
  293. PrintUsage();
  294. return 1;
  295. }
  296. norm.ProcessDirectory (args[1], args[2]);
  297. }
  298. else {
  299. Console.Error.WriteLine("Path not found: {0}", args[1]);
  300. return 2;
  301. }
  302. return 0;
  303. }
  304. static void PrintUsage () {
  305. Console.Error.WriteLine("Usage: xmlnorm -<flags> <inputfile>");
  306. Console.Error.WriteLine("Or: xmlnorm -<flags> <inputdir> <outputdir>");
  307. Console.Error.WriteLine("\tFlags:");
  308. foreach (DictionaryEntry de in XmlNormalizer.GetOptions())
  309. Console.Error.WriteLine ("\t{0}\t{1}", de.Key, de.Value);
  310. }
  311. #endif
  312. }
  313. }