|
@@ -6,7 +6,24 @@ from difflib import unified_diff
|
|
|
class FortuneHTMLParser(HTMLParser):
|
|
|
body = []
|
|
|
|
|
|
- valid = '<!doctype html><html><head><title>Fortunes</title></head><body><table><tr><th>id</th><th>message</th></tr><tr><td>11</td><td><script>alert("This should not be displayed in a browser alert box.");</script></td></tr><tr><td>4</td><td>A bad random number generator: 1, 1, 1, 1, 1, 4.33e+67, 1, 1, 1</td></tr><tr><td>5</td><td>A computer program does what you tell it to do, not what you want it to do.</td></tr><tr><td>2</td><td>A computer scientist is someone who fixes things that aren't broken.</td></tr><tr><td>8</td><td>A list is only as strong as its weakest link. — Donald Knuth</td></tr><tr><td>0</td><td>Additional fortune added at request time.</td></tr><tr><td>3</td><td>After enough decimal places, nobody gives a damn.</td></tr><tr><td>7</td><td>Any program that runs right is obsolete.</td></tr><tr><td>10</td><td>Computers make very fast, very accurate mistakes.</td></tr><tr><td>6</td><td>Emacs is a nice operating system, but I prefer UNIX. — Tom Christaensen</td></tr><tr><td>9</td><td>Feature: A bug with seniority.</td></tr><tr><td>1</td><td>fortune: No such file or directory</td></tr><tr><td>12</td><td>フレームワークのベンチマーク</td></tr></table></body></html>'
|
|
|
+ valid = '''<!doctype html><html>
|
|
|
+<head><title>Fortunes</title></head>
|
|
|
+<body><table>
|
|
|
+<tr><th>id</th><th>message</th></tr>
|
|
|
+<tr><td>11</td><td><script>alert("This should not be displayed in a browser alert box.");</script></td></tr>
|
|
|
+<tr><td>4</td><td>A bad random number generator: 1, 1, 1, 1, 1, 4.33e+67, 1, 1, 1</td></tr>
|
|
|
+<tr><td>5</td><td>A computer program does what you tell it to do, not what you want it to do.</td></tr>
|
|
|
+<tr><td>2</td><td>A computer scientist is someone who fixes things that aren't broken.</td></tr>
|
|
|
+<tr><td>8</td><td>A list is only as strong as its weakest link. — Donald Knuth</td></tr>
|
|
|
+<tr><td>0</td><td>Additional fortune added at request time.</td></tr>
|
|
|
+<tr><td>3</td><td>After enough decimal places, nobody gives a damn.</td></tr>
|
|
|
+<tr><td>7</td><td>Any program that runs right is obsolete.</td></tr>
|
|
|
+<tr><td>10</td><td>Computers make very fast, very accurate mistakes.</td></tr>
|
|
|
+<tr><td>6</td><td>Emacs is a nice operating system, but I prefer UNIX. — Tom Christaensen</td></tr>
|
|
|
+<tr><td>9</td><td>Feature: A bug with seniority.</td></tr>
|
|
|
+<tr><td>1</td><td>fortune: No such file or directory</td></tr>
|
|
|
+<tr><td>12</td><td>フレームワークのベンチマーク</td></tr>
|
|
|
+</table></body></html>'''
|
|
|
|
|
|
# Is called when a doctype or other such tag is read in.
|
|
|
# For our purposes, we assume this is only going to be
|
|
@@ -70,6 +87,10 @@ class FortuneHTMLParser(HTMLParser):
|
|
|
# each one wrapped in "<" and ">".
|
|
|
def handle_starttag(self, tag, attrs):
|
|
|
self.body.append("<{t}>".format(t=tag))
|
|
|
+
|
|
|
+ # Append a newline after the <table> and <html>
|
|
|
+ if tag.lower() == 'table' or tag.lower() == 'html':
|
|
|
+ self.body.append("\n")
|
|
|
|
|
|
# This is called whenever data is presented inside of a
|
|
|
# start and end tag. Generally, this will only ever be
|
|
@@ -106,15 +127,26 @@ class FortuneHTMLParser(HTMLParser):
|
|
|
def handle_endtag(self, tag):
|
|
|
self.body.append("</{t}>".format(t=tag))
|
|
|
|
|
|
+ # Append a newline after each </tr> and </head>
|
|
|
+ if tag.lower() == 'tr' or tag.lower() == 'head':
|
|
|
+ self.body.append("\n")
|
|
|
+
|
|
|
# Returns whether the HTML input parsed by this parser
|
|
|
# is valid against our known "fortune" spec.
|
|
|
# The parsed data in 'body' is joined on empty strings
|
|
|
# and checked for equality against our spec.
|
|
|
def isValidFortune(self, out):
|
|
|
body = ''.join(self.body)
|
|
|
- diff = self.valid == body
|
|
|
- if not diff:
|
|
|
+ same = self.valid == body
|
|
|
+ diff_lines = []
|
|
|
+ if not same:
|
|
|
+ out.write("Oh no! I compared %s\n\n\nto.....%s" % (self.valid, body))
|
|
|
out.write("Fortune invalid. Diff following:\n")
|
|
|
- diff_str = ''.join(unified_diff(self.valid.split(' '), body.split(' '), fromfile='Valid', tofile='Response', n=5))
|
|
|
- out.write(diff_str)
|
|
|
- return diff
|
|
|
+ headers_left = 3
|
|
|
+ for line in unified_diff(self.valid.split('\n'), body.split('\n'), fromfile='Valid', tofile='Response', n=0):
|
|
|
+ diff_lines.append(line)
|
|
|
+ out.write(line)
|
|
|
+ headers_left -= 1
|
|
|
+ if headers_left <= 0:
|
|
|
+ out.write('\n')
|
|
|
+ return (same, diff_lines)
|