CapabilitiesResult.cs 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342
  1. #if NET_2_0
  2. /*
  3. Used to determine Browser Capabilities by the Browsers UserAgent String and related
  4. Browser supplied Headers.
  5. Copyright (C) 2002-Present Owen Brady (Ocean at xvision.com)
  6. Permission is hereby granted, free of charge, to any person obtaining a copy
  7. of this software and associated documentation files (the "Software"), to deal
  8. in the Software without restriction, including without limitation the rights
  9. to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10. copies of the Software, and to permit persons to whom the Software is furnished
  11. to do so, subject to the following conditions:
  12. The above copyright notice and this permission notice shall be included in all
  13. copies or substantial portions of the Software.
  14. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
  15. INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
  16. PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  17. HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  18. OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  19. SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  20. */
  21. namespace System.Web.Configuration
  22. {
  23. using System;
  24. using System.Collections.Generic;
  25. using System.Text;
  26. using System.Reflection;
  27. using System.IO;
  28. internal class CapabilitiesResult : System.Web.HttpBrowserCapabilities
  29. {
  30. private static string[] RandomRoboBotKeywords;
  31. /// <summary>
  32. /// Initializes a new instance of the Result class.
  33. /// </summary>
  34. /// <param name="items">
  35. /// This is the data which this class will be handle request made though this class.
  36. /// </param>
  37. internal CapabilitiesResult(System.Collections.IDictionary items)
  38. : base()
  39. {
  40. base.Capabilities = items;
  41. }
  42. static CapabilitiesResult () {
  43. //---------------------------------------------------------------
  44. //Copies out a list of keywords stored in an Embeded file, which
  45. //will be used to help determine if a browser is
  46. //IsRandomRoboBotUserAgent.
  47. //---------------------------------------------------------------
  48. Assembly asm = Assembly.GetExecutingAssembly();
  49. Stream CP = asm.GetManifestResourceStream("RandomRoboBotKeywords.txt");
  50. using (StreamReader Read = new StreamReader(CP, System.Text.Encoding.Default)) {
  51. RandomRoboBotKeywords = System.Text.RegularExpressions.Regex.Split(Read.ReadToEnd(), System.Environment.NewLine);
  52. }
  53. }
  54. /// <summary>
  55. ///
  56. /// </summary>
  57. /// <param name="name"></param>
  58. /// <param name="value"></param>
  59. internal void AddCapabilities(string name, string value)
  60. {
  61. this.Capabilities[name] = value;
  62. }
  63. internal virtual string Replace(string item)
  64. {
  65. if (item.IndexOf("$") > -1)
  66. {
  67. //nasty hack to convert regular expression replacement text into Capability item
  68. //which we can use to replace with the actual values they are looking for.
  69. System.Text.RegularExpressions.MatchCollection regxmatch;
  70. regxmatch = System.Text.RegularExpressions.Regex.Matches(item, @"\$\{(?'Capability'\w*)\}");
  71. if (regxmatch.Count == 0)
  72. {
  73. return item;
  74. }
  75. for (int i = 0;i <= regxmatch.Count - 1;i++)
  76. {
  77. if (regxmatch[i].Success == true)
  78. {
  79. string c = regxmatch[i].Result("${Capability}");
  80. item = item.Replace("${" + c + "}", this[c]);
  81. }
  82. }
  83. }
  84. if (item.IndexOf("%") > -1)
  85. {
  86. //nasty hack to convert regular expression replacement text into Capability item
  87. //which we can use to replace with the actual values they are looking for.
  88. System.Text.RegularExpressions.MatchCollection regxmatch;
  89. regxmatch = System.Text.RegularExpressions.Regex.Matches(item, @"\%\{(?'Capability'\w*)\}");
  90. if (regxmatch.Count == 0)
  91. {
  92. return item;
  93. }
  94. for (int i = 0;i <= regxmatch.Count - 1;i++)
  95. {
  96. if (regxmatch[i].Success == true)
  97. {
  98. string c = regxmatch[i].Result("${Capability}");
  99. item = item.Replace("%{" + c + "}", this[c]);
  100. }
  101. }
  102. }
  103. return item;
  104. }
  105. /// <summary>
  106. /// Gets the Operating System that the browser is running on.
  107. /// </summary>
  108. public string OS
  109. {
  110. get
  111. {
  112. return this["os"];
  113. }
  114. }
  115. /// <summary>
  116. /// Gets the browsers Build.
  117. /// </summary>
  118. public string BrowserBuild
  119. {
  120. get
  121. {
  122. return this["BrowserBuild"];
  123. }
  124. }
  125. /// <summary>
  126. /// Name of the Browser Rendering Engine, when known.
  127. /// </summary>
  128. public string BrowserRenderingEngine
  129. {
  130. get
  131. {
  132. return this["HtmlEngine"];
  133. }
  134. }
  135. /// <summary>
  136. /// Gets if the Browser was identified as a bot, as a mater of elimination of all other possible
  137. /// options currently availible.
  138. /// </summary>
  139. public bool IsRobot
  140. {
  141. get
  142. {
  143. if (string.Compare(this["IsMobileDevice"], "true", true, System.Globalization.CultureInfo.CurrentCulture) == 0)
  144. {
  145. return false;
  146. }
  147. else if (string.Compare(this["IsBot"], "true", true, System.Globalization.CultureInfo.CurrentCulture) == 0)
  148. {
  149. return true;
  150. }
  151. else if (string.Compare(this["crawler"], "true", true, System.Globalization.CultureInfo.CurrentCulture) == 0)
  152. {
  153. return true;
  154. }
  155. else if (string.Compare(this["Unknown"], "true", true, System.Globalization.CultureInfo.CurrentCulture) == 0)
  156. {
  157. return true;
  158. }
  159. else if (string.Compare(this.Browser, "Unknown", true, System.Globalization.CultureInfo.CurrentCulture) == 0)
  160. {
  161. return true;
  162. }
  163. else if (string.Compare(this.Browser, "IE", true, System.Globalization.CultureInfo.CurrentCulture) == 0)
  164. {
  165. //too many fake IE's out there this should remove a few of the low
  166. //hanging fruit.
  167. if (string.IsNullOrEmpty(this.Platform) == true)
  168. {
  169. return true;
  170. }
  171. else if (string.Compare(this.Platform, "Unknown", true, System.Globalization.CultureInfo.CurrentCulture) == 0)
  172. {
  173. return true;
  174. }
  175. else if (string.Compare(this[""], "....../1.0", true, System.Globalization.CultureInfo.CurrentCulture) == 0)
  176. {
  177. //I hate Scrapters This one hit me today. Lets see how it like it now geting 403's
  178. return true;
  179. }
  180. }
  181. return false;
  182. }
  183. }
  184. public bool IsSyndicationReader
  185. {
  186. get
  187. {
  188. if (string.Compare(this["IsSyndicationReader"], "true", true, System.Globalization.CultureInfo.CurrentCulture) == 0)
  189. {
  190. return true;
  191. }
  192. return false;
  193. }
  194. }
  195. public bool IsUnknown
  196. {
  197. get
  198. {
  199. if (string.Compare(this["Unknown"], "true", true, System.Globalization.CultureInfo.CurrentCulture) == 0)
  200. {
  201. return true;
  202. }
  203. return false;
  204. }
  205. }
  206. /// <summary>
  207. /// Used to Identify Robobots that are using randomly generated Useragents
  208. /// that are nonsensical in nature/gibberish.
  209. /// </summary>
  210. /// <remarks>
  211. /// Current implementation is more of an elimination of common traits, which
  212. /// most Useragent/browser have. Which leave us with what can be assumed as
  213. /// randomized useragent names, which serve no purpose cept to drive stats
  214. /// programs nuts.
  215. /// </remarks>
  216. public bool IsRandomRobobotUserAgent
  217. {
  218. get
  219. {
  220. #region Check for Common Words in UserAgents
  221. //---------------------------------------------------------------
  222. //Quick Checks to see if the Bot has been identified by a name
  223. //from the headers provided.
  224. //---------------------------------------------------------------
  225. if (this.IsRobot == false)
  226. {
  227. //---------------------------------------------------------------
  228. //Since we can determine its not a Robot. We must have enough
  229. //details to prove its not a random useragent, and we move on.
  230. //---------------------------------------------------------------
  231. return false;
  232. }
  233. else if (this.IsSyndicationReader == true)
  234. {
  235. //---------------------------------------------------------------
  236. //Since we can determine its not a Rss/Atom Feed Reader. We must
  237. //have enough details to prove its not a random useragent, and we
  238. //move on.
  239. //---------------------------------------------------------------
  240. return false;
  241. }
  242. else if (string.Compare(this.Browser, "Unknown", true, System.Globalization.CultureInfo.CurrentCulture) != 0)
  243. {
  244. //---------------------------------------------------------------
  245. //Browser name was able to be determined then the Useragent had
  246. //enough details, thus not a random Useragent.
  247. //---------------------------------------------------------------
  248. return false;
  249. }
  250. else if (string.Compare(this.Platform, "Unknown", true, System.Globalization.CultureInfo.CurrentCulture) != 0)
  251. {
  252. //---------------------------------------------------------------
  253. //Assume if a platform was able to be determine then the Useragent
  254. //is more then likely not randomized name.
  255. //---------------------------------------------------------------
  256. return false;
  257. }
  258. else if (string.IsNullOrEmpty(this.UserAgent) == true)
  259. {
  260. //---------------------------------------------------------------
  261. //Null or empty. ^he Programer was just to lazy which to give it a
  262. //name, which is fine with me but doesn't not count as a Randomized
  263. //Browser Agent, since it doesn't have a Useragent at all to begin
  264. //with.
  265. //---------------------------------------------------------------
  266. return false;
  267. }
  268. //---------------------------------------------------------------
  269. //I assume ones under 8 charactors are not really randomly named
  270. //but the coder was just lazy or picked a short name.
  271. //---------------------------------------------------------------
  272. if (this.UserAgent.Length < 8)
  273. {
  274. return false;
  275. }
  276. //---------------------------------------------------------------
  277. //Up to this point I have not seen a randomly generated Agent string
  278. //with a period in it.
  279. //---------------------------------------------------------------
  280. if (this.UserAgent.IndexOf('.') > -1)
  281. {
  282. return false;
  283. }
  284. //---------------------------------------------------------------
  285. //Compare keywords often found in useragents to the current useragent
  286. //and if we find one we assume its not a randomized useragent.
  287. //---------------------------------------------------------------
  288. foreach (string keyword in RandomRoboBotKeywords)
  289. {
  290. if (keyword.Length <= this.UserAgent.Length)
  291. {
  292. if (this.UserAgent.IndexOf(keyword, StringComparison.CurrentCultureIgnoreCase) != -1)
  293. {
  294. return false;
  295. }
  296. }
  297. }
  298. #endregion
  299. //---------------------------------------------------------------
  300. //Since it made it though all the checks I assume that the useragent
  301. //doesn't match any known format that I can determine, and label it
  302. //a randomized Useragent/browser. AKA SPAM / Scraper / Pests Bots.
  303. //---------------------------------------------------------------
  304. return true;
  305. }
  306. }
  307. /// <summary>
  308. /// Gets the keys returned from processing.
  309. /// </summary>
  310. public System.Collections.Specialized.StringCollection Keys
  311. {
  312. get
  313. {
  314. string[] a = new string[this.Capabilities.Keys.Count];
  315. this.Capabilities.Keys.CopyTo(a, 0);
  316. System.Array.Sort(a);
  317. System.Collections.Specialized.StringCollection l;
  318. l = new System.Collections.Specialized.StringCollection();
  319. l.AddRange(a);
  320. return l;
  321. }
  322. }
  323. public string UserAgent
  324. {
  325. get
  326. {
  327. return this[""];
  328. }
  329. }
  330. }
  331. }
  332. #endif