convert_css_test_suite_to_rml.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385
  1. # This source file is part of RmlUi, the HTML/CSS Interface Middleware
  2. #
  3. # For the latest information, see http://github.com/mikke89/RmlUi
  4. #
  5. # Copyright (c) 2008-2014 CodePoint Ltd, Shift Technology Ltd, and contributors
  6. # Copyright (c) 2019-2023 The RmlUi Team, and contributors
  7. #
  8. # Permission is hereby granted, free of charge, to any person obtaining a copy
  9. # of this software and associated documentation files (the "Software"), to deal
  10. # in the Software without restriction, including without limitation the rights
  11. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  12. # copies of the Software, and to permit persons to whom the Software is
  13. # furnished to do so, subject to the following conditions:
  14. #
  15. # The above copyright notice and this permission notice shall be included in all
  16. # copies or substantial portions of the Software.
  17. #
  18. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  21. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  22. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  23. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  24. # SOFTWARE.
  25. import os
  26. import re
  27. import sys
  28. import argparse
  29. parser = argparse.ArgumentParser(description=\
  30. '''Convert the W3C CSS 2.1 test suite to RML documents for testing in RmlUi.
  31. Instructions:
  32. 1. Fetch the CSS tests archive from here: https://www.w3.org/Style/CSS/Test/CSS2.1/
  33. 2. Extract the 'xhtml1' folder, and point the 'in_dir' argument to this directory.
  34. 3. Call this script with the 'out_dir' argument pointing to a directory of your choosing.
  35. The resulting tests can be opened in the `Visual tests` application. Set the environment variable
  36. `RMLUI_VISUAL_TESTS_RML_DIRECTORIES` to the 'out_dir' used above. After opening the application, use
  37. the arrow keys Up/Down to change the test suite directory.
  38. This script can also be used with the CSS3 test suites, such as the one for flexbox:
  39. https://test.csswg.org/suites/css-flexbox-1_dev/nightly-unstable/
  40. ''',
  41. formatter_class=argparse.RawTextHelpFormatter)
  42. parser.add_argument('in_dir',
  43. help="Input directory which contains the 'xhtml1' (.xht) files to be converted.")
  44. parser.add_argument('out_dir',
  45. help="Output directory for the converted RML files.")
  46. parser.add_argument('--clean', action='store_true',
  47. help='Will *delete* all existing *.rml files in the output directory.')
  48. parser.add_argument('--match',
  49. help="Only process file names containing the given string.")
  50. args = parser.parse_args()
  51. in_dir = args.in_dir
  52. out_dir = args.out_dir
  53. out_ref_dir = os.path.join(out_dir, r'reference')
  54. match_files = args.match
  55. if not os.path.isdir(in_dir):
  56. print("Error: Specified input directory '{}' does not exist.".format(out_dir))
  57. exit()
  58. if not os.path.exists(out_dir):
  59. try:
  60. os.mkdir(out_dir)
  61. except Exception as e:
  62. print('Error: Failed to create output directory {}'.format(out_dir))
  63. if not os.path.exists(out_ref_dir):
  64. try:
  65. os.mkdir(out_ref_dir)
  66. except Exception as e:
  67. print('Error: Failed to create reference output directory {}'.format(out_ref_dir))
  68. if not os.path.isdir(out_dir) or not os.path.isdir(out_ref_dir):
  69. print("Error: Specified output directory '{}' or reference '{}' are not directories.".format(out_dir, out_ref_dir))
  70. exit()
  71. if args.clean:
  72. print("Deleting all *.rml files in output directory '{}' and reference directory '{}'".format(out_dir, out_ref_dir))
  73. for del_dir in [out_dir, out_ref_dir]:
  74. for file in os.listdir(del_dir):
  75. path = os.path.join(del_dir, file)
  76. try:
  77. if os.path.isfile(path) and file.endswith('.rml'):
  78. os.unlink(path)
  79. except Exception as e:
  80. print('Failed to delete {}. Reason: {}'.format(path, e))
  81. html_color_mapping = {
  82. "lightblue": "#add8e6",
  83. "lightgrey": "#d3d3d3",
  84. "lightgray": "#d3d3d3",
  85. "lightgreen": "#90ee90",
  86. "pink": "#ffc0cb",
  87. "coral": "#ff7f50",
  88. "slateblue": "#6a5acd",
  89. "steelblue": "#4682b4",
  90. "tan": "#d2b48c",
  91. "violet": "#ee82ee",
  92. }
  93. def border_format(side: str, type: str, content: str):
  94. # Side: (empty)/-top/-right/-bottom/-left
  95. # Type: (empty)/-width/-style/-color
  96. content = content.replace("thick", "5px")
  97. content = content.replace("medium", "3px")
  98. content = content.replace("thin", "1px")
  99. if type == "-width":
  100. return "border" + side + type + ": " + content
  101. if type == "-color":
  102. color = content.strip()
  103. if color in html_color_mapping:
  104. color = html_color_mapping[color]
  105. return "border" + side + type + ": " + color
  106. # Convert style to width. This is not perfect, but seems to be the most used case.
  107. if type == "-style":
  108. content = content.replace("none", "0px").replace("hidden", "0px")
  109. # We may want to only match "solid" here, and cancel the test if it contains any other styles which are unsupported.
  110. content = re.sub(r'\b[a-z]+\b', '3px', content, flags = re.IGNORECASE)
  111. return "border" + side + "-width: " + content
  112. # Next are the shorthand properties, they should contain max a single size, a single style, and a single color.
  113. width = re.search(r'\b([0-9]+(\.[0-9]+)?[a-z]+|0)\b', content, flags = re.IGNORECASE)
  114. if width:
  115. width = width.group(1)
  116. style_pattern = r'none|solid|hidden|dotted|dashed|double|groove|ridge|inset|outset|sold'
  117. style = re.search(style_pattern, content, flags = re.IGNORECASE)
  118. if style:
  119. style = style.group(0)
  120. if style == "none" or style == "hidden":
  121. width = "0px"
  122. content = re.sub(style_pattern, "", content)
  123. color = re.search(r'\b([a-z]+|#[0-9a-f]+)\b', content)
  124. if color:
  125. color = color.group(1)
  126. if color in html_color_mapping:
  127. color = html_color_mapping[color]
  128. else:
  129. color = "black"
  130. width = width or "3px"
  131. return "border" + side + ": " + width + " " + color
  132. def border_find_replace(line: str):
  133. new_line = ""
  134. prev_end = 0
  135. pattern = r"border(-(top|right|bottom|left))?(-(width|style|color))?:([^;}\"]+)([;}\"])"
  136. for match in re.finditer(pattern, line, flags = re.IGNORECASE):
  137. side = match.group(1) or ""
  138. type = match.group(3) or ""
  139. content = match.group(5)
  140. suffix = match.group(6)
  141. replacement = border_format(side, type, content) + suffix
  142. new_line += line[prev_end:match.start()] + replacement
  143. prev_end = match.end()
  144. new_line += line[prev_end:]
  145. return new_line
  146. assert( border_find_replace("margin:10px; border:20px solid black; padding:30px;") == 'margin:10px; border: 20px black; padding:30px;' )
  147. assert( border_find_replace(" border-left: 7px solid navy; border-right: 17px solid navy; } ") == ' border-left: 7px navy; border-right: 17px navy; } ' )
  148. assert( border_find_replace(" border: blue solid 3px; ") == ' border: 3px blue; ' )
  149. assert( border_find_replace(" border: solid lime; ") == ' border: 3px lime; ' )
  150. assert( border_find_replace(" border: 1px pink; ") == ' border: 1px #ffc0cb; ' )
  151. assert( border_find_replace(" border-color: pink; ") == ' border-color: #ffc0cb; ' )
  152. assert( border_find_replace(" border: 0; ") == ' border: 0 black; ' )
  153. assert( border_find_replace(" border-bottom: 0.25em solid green; ") == ' border-bottom: 0.25em green; ' )
  154. assert( border_find_replace(" border-width: 0; ") == ' border-width: 0; ' )
  155. assert( border_find_replace(" border-left: orange solid 1em; ") == ' border-left: 1em orange; ' )
  156. assert( border_find_replace(" border-style: solid none solid solid; ") == ' border-width: 3px 0px 3px 3px; ' )
  157. assert( border_find_replace(" border: solid; border-style: solid none solid solid; border-style: solid solid solid none; ") == ' border: 3px black; border-width: 3px 0px 3px 3px; border-width: 3px 3px 3px 0px; ' )
  158. assert( border_find_replace(" p + .set {border-top: solid orange} ") == ' p + .set {border-top: 3px orange} ' )
  159. assert( border_find_replace(r'<span style="border-right: none; border-left: none" class="outer">') == '<span style="border-right: 0px black; border-left: 0px black" class="outer">' )
  160. reference_links = []
  161. def process_file(in_file):
  162. in_path = os.path.join(in_dir, in_file)
  163. out_file = os.path.splitext(in_file)[0] + '.rml'
  164. out_path = os.path.join(out_dir, out_file)
  165. f = open(in_path, 'r', encoding="utf8")
  166. lines = f.readlines()
  167. f.close()
  168. data = ''
  169. reference_link = ''
  170. in_style = False
  171. for line in lines:
  172. if re.search(r'<style', line, flags = re.IGNORECASE):
  173. in_style = True
  174. if re.search(r'</style', line, flags = re.IGNORECASE):
  175. in_style = False
  176. if in_style:
  177. line = re.sub(r'(^|[^<])html', r'\1body', line, flags = re.IGNORECASE)
  178. line = re.sub(r'<!--', r'/*', line, flags = re.IGNORECASE)
  179. line = re.sub(r'-->', r'*/', line, flags = re.IGNORECASE)
  180. reference_link_search_candidates = [
  181. r'(<link href="(reference/[^"]+))\.xht(" rel="match" ?/>)',
  182. r'(<link rel="match" href="(reference/[^"]+))\.xht(" ?/>)',
  183. ]
  184. for reference_link_search in reference_link_search_candidates:
  185. reference_link_match = re.search(reference_link_search, line, flags = re.IGNORECASE)
  186. if reference_link_match:
  187. reference_link = reference_link_match[2] + '.xht'
  188. line = re.sub(reference_link_search, r'\1.rml\3', line, flags = re.IGNORECASE)
  189. break
  190. line = re.sub(r'<!DOCTYPE[^>]*>\s*', '', line, flags = re.IGNORECASE)
  191. line = re.sub(r' xmlns="[^"]+"', '', line, flags = re.IGNORECASE)
  192. line = re.sub(r'<(/?)html[^>]*>', r'<\1rml>', line, flags = re.IGNORECASE)
  193. line = re.sub(r'^(\s*)(.*<head[^>]*>)', r'\1\2\n\1\1<link type="text/rcss" href="/../Tests/Data/style.rcss" />', line, flags = re.IGNORECASE)
  194. line = re.sub(r'<style[^>]*><!\[CDATA\[\s*', '<style>\n', line, flags = re.IGNORECASE)
  195. line = re.sub(r'\]\]></style>', '</style>', line, flags = re.IGNORECASE)
  196. line = re.sub(r'direction:\s*ltr\s*;?', r'', line, flags = re.IGNORECASE)
  197. line = re.sub(r'list-style(-type)?:\s*none\s*;?', r'', line, flags = re.IGNORECASE)
  198. line = re.sub(r'(font(-size):[^;}\"]*)xxx-large', r'\1 2.0em', line, flags = re.IGNORECASE)
  199. line = re.sub(r'(font(-size):[^;}\"]*)xx-large', r'\1 1.7em', line, flags = re.IGNORECASE)
  200. line = re.sub(r'(font(-size):[^;}\"]*)x-large', r'\1 1.3em', line, flags = re.IGNORECASE)
  201. line = re.sub(r'(font(-size):[^;}\"]*)large', r'\1 1.15em', line, flags = re.IGNORECASE)
  202. line = re.sub(r'(font(-size):[^;}\"]*)medium', r'\1 1.0em', line, flags = re.IGNORECASE)
  203. line = re.sub(r'(font(-size):[^;}\"]*)small', r'\1 0.9em', line, flags = re.IGNORECASE)
  204. line = re.sub(r'(font(-size):[^;}\"]*)x-small', r'\1 0.7em', line, flags = re.IGNORECASE)
  205. line = re.sub(r'(font(-size):[^;}\"]*)xx-small', r'\1 0.5em', line, flags = re.IGNORECASE)
  206. line = re.sub(r'font:[^;}]*\b([0-9]+[a-z]+)\b[^;}]*([;}])', r'font-size: \1 \2', line, flags = re.IGNORECASE)
  207. line = re.sub(r'font-family:[^;}]*[;}]', r'', line, flags = re.IGNORECASE)
  208. line = re.sub(r'(line-height:)\s*normal', r'\1 1.2em', line, flags = re.IGNORECASE)
  209. line = re.sub(r'-moz-box-sizing', r'box-sizing', line, flags = re.IGNORECASE)
  210. line = re.sub(r'cyan', r'aqua', line, flags = re.IGNORECASE)
  211. line = re.sub(r'flex: none;', r'flex: 0 0 auto;', line, flags = re.IGNORECASE)
  212. line = re.sub(r'align-content:\s*(start|end)', r'align-content: flex-\1', line, flags = re.IGNORECASE)
  213. line = re.sub(r'justify-content:\s*left', r'justify-content: flex-start', line, flags = re.IGNORECASE)
  214. line = re.sub(r'justify-content:\s*right', r'justify-content: flex-end', line, flags = re.IGNORECASE)
  215. line = re.sub(r'table-layout:[^;}]*[;}]', r'', line, flags = re.IGNORECASE)
  216. if re.search(r'background:[^;}\"]*fixed', line, flags = re.IGNORECASE):
  217. print("File '{}' skipped since it uses unsupported background.".format(in_file))
  218. return False
  219. line = re.sub(r'background:(\s*([a-z]+|#[0-9a-f]+)\s*[;}\"])', r'background-color:\1', line, flags = re.IGNORECASE)
  220. prev_end = 0
  221. new_line = ""
  222. for match in re.finditer(r'background-color:([^;]*)([;"])', line, flags = re.IGNORECASE):
  223. color = match.group(1).strip()
  224. delimiter = match.group(2)
  225. if color in html_color_mapping:
  226. color = html_color_mapping[color]
  227. new_line += line[prev_end:match.start()] + 'background-color: ' + color + delimiter
  228. prev_end = match.end()
  229. new_line += line[prev_end:]
  230. line = new_line
  231. prev_end = 0
  232. new_line = ""
  233. for match in re.finditer(r'calc\(\s*(\d+)(\w{1,3})\s*/\s*(\d)\s*\)', line, flags = re.IGNORECASE):
  234. num = match.group(1)
  235. unit = match.group(2)
  236. den = match.group(3)
  237. calc_result = "{}{}".format(float(num) / float(den), unit)
  238. new_line += line[prev_end:match.start()] + calc_result
  239. prev_end = match.end()
  240. new_line += line[prev_end:]
  241. line = new_line
  242. line = border_find_replace(line)
  243. if in_style and not '<' in line:
  244. line = line.replace('&gt;', '>')
  245. flags_match = re.search(r'<meta.*name="flags" content="([^"]*)" ?/>', line, flags = re.IGNORECASE) or re.search(r'<meta.*content="([^"]*)".*name="flags".*?/>', line, flags = re.IGNORECASE)
  246. if flags_match and flags_match[1] != '' and flags_match[1] != 'interactive':
  247. print("File '{}' skipped due to flags '{}'".format(in_file, flags_match[1]))
  248. return False
  249. if re.search(r'display:[^;]*(table-caption|table-header-group|table-footer-group|run-in|list-item)', line, flags = re.IGNORECASE):
  250. print("File '{}' skipped since it uses unsupported display modes.".format(in_file))
  251. return False
  252. if re.search(r'visibility:[^;]*collapse|z-index:\s*[0-9\.]+%', line, flags = re.IGNORECASE):
  253. print("File '{}' skipped since it uses unsupported visibility.".format(in_file))
  254. return False
  255. if re.search(r'data:|support/|<img|<iframe', line, flags = re.IGNORECASE):
  256. print("File '{}' skipped since it uses data or images.".format(in_file))
  257. return False
  258. if re.search(r'<script>', line, flags = re.IGNORECASE):
  259. print("File '{}' skipped since it uses scripts.".format(in_file))
  260. return False
  261. if in_style and re.search(r':before|:after|@media|\s\+\s', line, flags = re.IGNORECASE):
  262. print("File '{}' skipped since it uses unsupported CSS selectors.".format(in_file))
  263. return False
  264. if re.search(r'(: ?inherit ?;)|(!\s*important)|[0-9\.]+(ch|ex)[\s;}]', line, flags = re.IGNORECASE):
  265. print("File '{}' skipped since it uses unsupported CSS values.".format(in_file))
  266. return False
  267. if re.search(r'@font-face|font:|ahem', line, flags = re.IGNORECASE):
  268. print("File '{}' skipped since it uses special fonts.".format(in_file))
  269. return False
  270. if re.search(r'\b((direction:[^;]*[;"])|(content:[^;]*[;"])|(outline:[^;]*[;"])|(quote:[^;]*[;"])|(border-spacing:[^;]*[;"])|(border-collapse:[^;]*[;"])|(background:[^;]*[;"]))', line, flags = re.IGNORECASE)\
  271. or re.search(r'\b((font-variant:[^;]*[;"])|(font-kerning:[^;]*[;"])|(font-feature-settings:[^;]*[;"])|(background-image:[^;]*[;"])|(caption-side:[^;]*[;"])|(clip:[^;]*[;"])|(page-break-inside:[^;]*[;"])|(word-spacing:[^;]*[;"]))', line, flags = re.IGNORECASE)\
  272. or re.search(r'\b((writing-mode:[^;]*[;"])|(text-orientation:[^;]*[;"])|(text-indent:[^;]*[;"])|(page-break-after:[^;]*[;"])|(page-break-before:[^;]*[;"])|(column[^:]*:[^;]*[;"])|(empty-cells:[^;]*[;"]))', line, flags = re.IGNORECASE)\
  273. or re.search(r'\b((aspect-ratio:[^;]*[;"])|(flex-basis:[^;]*[;"])|(order:[^;]*[;"]))', line, flags = re.IGNORECASE):
  274. print("File '{}' skipped since it uses unsupported CSS properties.".format(in_file))
  275. return False
  276. data += line
  277. f = open(out_path, 'w', encoding="utf8")
  278. f.write(data)
  279. f.close()
  280. if reference_link:
  281. reference_links.append(reference_link)
  282. print("File '{}' processed successfully!".format(in_file))
  283. return True
  284. file_block_filters = ['charset','font','list','text-decoration','text-indent','text-transform','bidi','cursor',
  285. 'uri','stylesheet','word-spacing','table-anonymous','outline','at-rule','at-import','attribute',
  286. 'style','quote','rtl','ltr','first-line','first-letter','first-page','import','border','toc',
  287. 'chapter','character-encoding','escape','media','contain-','grid','case-insensitive',
  288. 'containing-block-initial','multicol','system-colors']
  289. def should_block(name):
  290. for file_block_filter in file_block_filters:
  291. if file_block_filter in name:
  292. print("File '{}' skipped due to unsupported feature '{}'".format(name, file_block_filter))
  293. return True
  294. return False
  295. in_dir_list = os.listdir(in_dir)
  296. if match_files:
  297. in_dir_list = [ name for name in in_dir_list if match_files in name ]
  298. total_files = len(in_dir_list)
  299. in_dir_list = [ name for name in in_dir_list if name.endswith(".xht") and not should_block(name) ]
  300. processed_files = 0
  301. processed_reference_files = 0
  302. for in_file in in_dir_list:
  303. if process_file(in_file):
  304. processed_files += 1
  305. final_reference_links = reference_links[:]
  306. total_reference_files = len(final_reference_links)
  307. reference_links.clear()
  308. for in_ref_file in final_reference_links:
  309. if process_file(in_ref_file):
  310. processed_reference_files += 1
  311. print(f"""
  312. Done!
  313. Total test files: {total_files}
  314. Skipped test files: {total_files - processed_files}
  315. Parsed test files: {processed_files}
  316. Total reference files: {total_reference_files}
  317. Skipped reference files: {total_reference_files - processed_reference_files}
  318. Ignored alternate references: {len(reference_links)}
  319. Parsed reference files: {processed_reference_files}""")