hb_test_tools.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491
  1. #!/usr/bin/env python3
  2. import sys, os, re, difflib, unicodedata, errno, cgi, itertools
  3. from itertools import *
  4. diff_symbols = "-+=*&^%$#@!~/"
  5. diff_colors = ['red', 'green', 'blue']
  6. def codepoints(s):
  7. return (ord (u) for u in s)
  8. class ColorFormatter:
  9. class Null:
  10. @staticmethod
  11. def start_color (c): return ''
  12. @staticmethod
  13. def end_color (): return ''
  14. @staticmethod
  15. def escape (s): return s
  16. @staticmethod
  17. def newline (): return '\n'
  18. class ANSI:
  19. @staticmethod
  20. def start_color (c):
  21. return {
  22. 'red': '\033[41;37;1m',
  23. 'green': '\033[42;37;1m',
  24. 'blue': '\033[44;37;1m',
  25. }[c]
  26. @staticmethod
  27. def end_color ():
  28. return '\033[m'
  29. @staticmethod
  30. def escape (s): return s
  31. @staticmethod
  32. def newline (): return '\n'
  33. class HTML:
  34. @staticmethod
  35. def start_color (c):
  36. return '<span style="background:%s">' % c
  37. @staticmethod
  38. def end_color ():
  39. return '</span>'
  40. @staticmethod
  41. def escape (s): return cgi.escape (s)
  42. @staticmethod
  43. def newline (): return '<br/>\n'
  44. @staticmethod
  45. def Auto (argv = [], out = sys.stdout):
  46. format = ColorFormatter.ANSI
  47. if "--format" in argv:
  48. argv.remove ("--format")
  49. format = ColorFormatter.ANSI
  50. if "--format=ansi" in argv:
  51. argv.remove ("--format=ansi")
  52. format = ColorFormatter.ANSI
  53. if "--format=html" in argv:
  54. argv.remove ("--format=html")
  55. format = ColorFormatter.HTML
  56. if "--no-format" in argv:
  57. argv.remove ("--no-format")
  58. format = ColorFormatter.Null
  59. return format
  60. class DiffColorizer:
  61. diff_regex = re.compile ('([a-za-z0-9_]*)([^a-za-z0-9_]?)')
  62. def __init__ (self, formatter, colors=diff_colors, symbols=diff_symbols):
  63. self.formatter = formatter
  64. self.colors = colors
  65. self.symbols = symbols
  66. def colorize_lines (self, lines):
  67. lines = (l if l else '' for l in lines)
  68. ss = [self.diff_regex.sub (r'\1\n\2\n', l).splitlines (True) for l in lines]
  69. oo = ["",""]
  70. st = [False, False]
  71. for l in difflib.Differ().compare (*ss):
  72. if l[0] == '?':
  73. continue
  74. if l[0] == ' ':
  75. for i in range(2):
  76. if st[i]:
  77. oo[i] += self.formatter.end_color ()
  78. st[i] = False
  79. oo = [o + self.formatter.escape (l[2:]) for o in oo]
  80. continue
  81. if l[0] in self.symbols:
  82. i = self.symbols.index (l[0])
  83. if not st[i]:
  84. oo[i] += self.formatter.start_color (self.colors[i])
  85. st[i] = True
  86. oo[i] += self.formatter.escape (l[2:])
  87. continue
  88. for i in range(2):
  89. if st[i]:
  90. oo[i] += self.formatter.end_color ()
  91. st[i] = False
  92. oo = [o.replace ('\n', '') for o in oo]
  93. return [s1+s2+self.formatter.newline () for (s1,s2) in zip (self.symbols, oo) if s2]
  94. def colorize_diff (self, f):
  95. lines = [None, None]
  96. for l in f:
  97. if l[0] not in self.symbols:
  98. yield self.formatter.escape (l).replace ('\n', self.formatter.newline ())
  99. continue
  100. i = self.symbols.index (l[0])
  101. if lines[i]:
  102. # Flush
  103. for line in self.colorize_lines (lines):
  104. yield line
  105. lines = [None, None]
  106. lines[i] = l[1:]
  107. if (all (lines)):
  108. # Flush
  109. for line in self.colorize_lines (lines):
  110. yield line
  111. lines = [None, None]
  112. if (any (lines)):
  113. # Flush
  114. for line in self.colorize_lines (lines):
  115. yield line
  116. class ZipDiffer:
  117. @staticmethod
  118. def diff_files (files, symbols=diff_symbols):
  119. files = tuple (files) # in case it's a generator, copy it
  120. try:
  121. for lines in itertools.zip_longest (*files):
  122. if all (lines[0] == line for line in lines[1:]):
  123. sys.stdout.writelines ([" ", lines[0]])
  124. continue
  125. for i, l in enumerate (lines):
  126. if l:
  127. sys.stdout.writelines ([symbols[i], l])
  128. except IOError as e:
  129. if e.errno != errno.EPIPE:
  130. sys.exit ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror))
  131. class DiffFilters:
  132. @staticmethod
  133. def filter_failures (f):
  134. for key, lines in DiffHelpers.separate_test_cases (f):
  135. lines = list (lines)
  136. if not DiffHelpers.test_passed (lines):
  137. for l in lines: yield l
  138. class Stat:
  139. def __init__ (self):
  140. self.count = 0
  141. self.freq = 0
  142. def add (self, test):
  143. self.count += 1
  144. self.freq += test.freq
  145. class Stats:
  146. def __init__ (self):
  147. self.passed = Stat ()
  148. self.failed = Stat ()
  149. self.total = Stat ()
  150. def add (self, test):
  151. self.total.add (test)
  152. if test.passed:
  153. self.passed.add (test)
  154. else:
  155. self.failed.add (test)
  156. def mean (self):
  157. return float (self.passed.count) / self.total.count
  158. def variance (self):
  159. return (float (self.passed.count) / self.total.count) * \
  160. (float (self.failed.count) / self.total.count)
  161. def stddev (self):
  162. return self.variance () ** .5
  163. def zscore (self, population):
  164. """Calculate the standard score.
  165. Population is the Stats for population.
  166. Self is Stats for sample.
  167. Returns larger absolute value if sample is highly unlikely to be random.
  168. Anything outside of -3..+3 is very unlikely to be random.
  169. See: https://en.wikipedia.org/wiki/Standard_score"""
  170. return (self.mean () - population.mean ()) / population.stddev ()
  171. class DiffSinks:
  172. @staticmethod
  173. def print_stat (f):
  174. passed = 0
  175. failed = 0
  176. # XXX port to Stats, but that would really slow us down here
  177. for key, lines in DiffHelpers.separate_test_cases (f):
  178. if DiffHelpers.test_passed (lines):
  179. passed += 1
  180. else:
  181. failed += 1
  182. total = passed + failed
  183. print ("%d out of %d tests passed. %d failed (%g%%)" % (passed, total, failed, 100. * failed / total))
  184. class Test:
  185. def __init__ (self, lines):
  186. self.freq = 1
  187. self.passed = True
  188. self.identifier = None
  189. self.text = None
  190. self.unicodes = None
  191. self.glyphs = None
  192. for l in lines:
  193. symbol = l[0]
  194. if symbol != ' ':
  195. self.passed = False
  196. i = 1
  197. if ':' in l:
  198. i = l.index (':')
  199. if not self.identifier:
  200. self.identifier = l[1:i]
  201. i = i + 2 # Skip colon and space
  202. j = -1
  203. if l[j] == '\n':
  204. j -= 1
  205. brackets = l[i] + l[j]
  206. l = l[i+1:-2]
  207. if brackets == '()':
  208. self.text = l
  209. elif brackets == '<>':
  210. self.unicodes = Unicode.parse (l)
  211. elif brackets == '[]':
  212. # XXX we don't handle failed tests here
  213. self.glyphs = l
  214. class DiffHelpers:
  215. @staticmethod
  216. def separate_test_cases (f):
  217. '''Reads lines from f, and if the lines have identifiers, ie.
  218. have a colon character, groups them by identifier,
  219. yielding lists of all lines with the same identifier.'''
  220. def identifier (l):
  221. if ':' in l[1:]:
  222. return l[1:l.index (':')]
  223. return l
  224. return groupby (f, key=identifier)
  225. @staticmethod
  226. def test_passed (lines):
  227. lines = list (lines)
  228. # XXX This is a hack, but does the job for now.
  229. if any (l.find("space+0|space+0") >= 0 for l in lines if l[0] == '+'): return True
  230. if any (l.find("uni25CC") >= 0 for l in lines if l[0] == '+'): return True
  231. if any (l.find("dottedcircle") >= 0 for l in lines if l[0] == '+'): return True
  232. if any (l.find("glyph0") >= 0 for l in lines if l[0] == '+'): return True
  233. if any (l.find("gid0") >= 0 for l in lines if l[0] == '+'): return True
  234. if any (l.find("notdef") >= 0 for l in lines if l[0] == '+'): return True
  235. return all (l[0] == ' ' for l in lines)
  236. class FilterHelpers:
  237. @staticmethod
  238. def filter_printer_function (filter_callback):
  239. def printer (f):
  240. for line in filter_callback (f):
  241. print (line)
  242. return printer
  243. @staticmethod
  244. def filter_printer_function_no_newline (filter_callback):
  245. def printer (f):
  246. for line in filter_callback (f):
  247. sys.stdout.writelines ([line])
  248. return printer
  249. class Ngram:
  250. @staticmethod
  251. def generator (n):
  252. def gen (f):
  253. l = []
  254. for x in f:
  255. l.append (x)
  256. if len (l) == n:
  257. yield tuple (l)
  258. l[:1] = []
  259. gen.n = n
  260. return gen
  261. class UtilMains:
  262. @staticmethod
  263. def process_multiple_files (callback, mnemonic = "FILE"):
  264. if "--help" in sys.argv:
  265. sys.exit ("Usage: %s %s..." % (sys.argv[0], mnemonic))
  266. try:
  267. files = sys.argv[1:] if len (sys.argv) > 1 else ['-']
  268. for s in files:
  269. callback (FileHelpers.open_file_or_stdin (s))
  270. except IOError as e:
  271. if e.errno != errno.EPIPE:
  272. sys.exit ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror))
  273. @staticmethod
  274. def process_multiple_args (callback, mnemonic):
  275. if len (sys.argv) == 1 or "--help" in sys.argv:
  276. sys.exit ("Usage: %s %s..." % (sys.argv[0], mnemonic))
  277. try:
  278. for s in sys.argv[1:]:
  279. callback (s)
  280. except IOError as e:
  281. if e.errno != errno.EPIPE:
  282. sys.exit ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror))
  283. @staticmethod
  284. def filter_multiple_strings_or_stdin (callback, mnemonic, \
  285. separator = " ", \
  286. concat_separator = False):
  287. if "--help" in sys.argv:
  288. sys.exit ("""Usage:
  289. %s %s...
  290. or:
  291. %s
  292. When called with no arguments, input is read from standard input.
  293. """ % (sys.argv[0], mnemonic, sys.argv[0]))
  294. try:
  295. if len (sys.argv) == 1:
  296. while (1):
  297. line = sys.stdin.readline ()
  298. if not len (line):
  299. break
  300. if line[-1] == '\n':
  301. line = line[:-1]
  302. print (callback (line))
  303. else:
  304. args = sys.argv[1:]
  305. if concat_separator != False:
  306. args = [concat_separator.join (args)]
  307. print (separator.join (callback (x) for x in (args)))
  308. except IOError as e:
  309. if e.errno != errno.EPIPE:
  310. sys.exit ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror))
  311. class Unicode:
  312. @staticmethod
  313. def decode (s):
  314. return ','.join ("U+%04X" % cp for cp in codepoints (s))
  315. @staticmethod
  316. def parse (s):
  317. s = re.sub (r"0[xX]", " ", s)
  318. s = re.sub (r"[<+\->{},;&#\\xXuUnNiI\n\t]", " ", s)
  319. return [int (x, 16) for x in s.split ()]
  320. @staticmethod
  321. def encode (s):
  322. return ''.join (chr (x) for x in Unicode.parse (s))
  323. shorthands = {
  324. "ZERO WIDTH NON-JOINER": "ZWNJ",
  325. "ZERO WIDTH JOINER": "ZWJ",
  326. "NARROW NO-BREAK SPACE": "NNBSP",
  327. "COMBINING GRAPHEME JOINER": "CGJ",
  328. "LEFT-TO-RIGHT MARK": "LRM",
  329. "RIGHT-TO-LEFT MARK": "RLM",
  330. "LEFT-TO-RIGHT EMBEDDING": "LRE",
  331. "RIGHT-TO-LEFT EMBEDDING": "RLE",
  332. "POP DIRECTIONAL FORMATTING": "PDF",
  333. "LEFT-TO-RIGHT OVERRIDE": "LRO",
  334. "RIGHT-TO-LEFT OVERRIDE": "RLO",
  335. }
  336. @staticmethod
  337. def pretty_name (u):
  338. try:
  339. s = unicodedata.name (u)
  340. except ValueError:
  341. return "XXX"
  342. s = re.sub (".* LETTER ", "", s)
  343. s = re.sub (".* VOWEL SIGN (.*)", r"\1-MATRA", s)
  344. s = re.sub (".* SIGN ", "", s)
  345. s = re.sub (".* COMBINING ", "", s)
  346. if re.match (".* VIRAMA", s):
  347. s = "HALANT"
  348. if s in Unicode.shorthands:
  349. s = Unicode.shorthands[s]
  350. return s
  351. @staticmethod
  352. def pretty_names (s):
  353. s = re.sub (r"[<+>\\uU]", " ", s)
  354. s = re.sub (r"0[xX]", " ", s)
  355. s = [chr (int (x, 16)) for x in re.split ('[, \n]', s) if len (x)]
  356. return ' + '.join (Unicode.pretty_name (x) for x in s)
  357. class FileHelpers:
  358. @staticmethod
  359. def open_file_or_stdin (f):
  360. if f == '-':
  361. return sys.stdin
  362. return open (f)
  363. class Manifest:
  364. @staticmethod
  365. def read (s, strict = True):
  366. if not os.path.exists (s):
  367. if strict:
  368. sys.exit ("%s: %s does not exist" % (sys.argv[0], s))
  369. return
  370. s = os.path.normpath (s)
  371. if os.path.isdir (s):
  372. try:
  373. m = open (os.path.join (s, "MANIFEST"))
  374. items = [x.strip () for x in m.readlines ()]
  375. for f in items:
  376. for p in Manifest.read (os.path.join (s, f)):
  377. yield p
  378. except IOError:
  379. if strict:
  380. sys.exit ("%s: %s does not exist" % (sys.argv[0], os.path.join (s, "MANIFEST")))
  381. return
  382. else:
  383. yield s
  384. @staticmethod
  385. def update_recursive (s):
  386. for dirpath, dirnames, filenames in os.walk (s, followlinks=True):
  387. for f in ["MANIFEST", "README", "LICENSE", "COPYING", "AUTHORS", "SOURCES", "ChangeLog"]:
  388. if f in dirnames:
  389. dirnames.remove (f)
  390. if f in filenames:
  391. filenames.remove (f)
  392. dirnames.sort ()
  393. filenames.sort ()
  394. ms = os.path.join (dirpath, "MANIFEST")
  395. print (" GEN %s" % ms)
  396. m = open (ms, "w")
  397. for f in filenames:
  398. print (f, file=m)
  399. for f in dirnames:
  400. print (f, file=m)
  401. for f in dirnames:
  402. Manifest.update_recursive (os.path.join (dirpath, f))
  403. if __name__ == '__main__':
  404. pass