convert_css_test_suite_to_rml.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225
  1. import os
  2. import re
  3. import sys
  4. import argparse
  5. parser = argparse.ArgumentParser(description=\
  6. '''Convert the W3C CSS 2.1 test suite to RML documents for testing in RmlUi.
  7. Fetch the CSS tests archive from here: https://www.w3.org/Style/CSS/Test/CSS2.1/
  8. Extract the 'xhtml1' folder and point the 'in_dir' argument to this directory.''')
  9. parser.add_argument('in_dir',
  10. help="Input directory which contains the 'xhtml1' (.xht) files to be converted.")
  11. parser.add_argument('out_dir',
  12. help="Output directory for the converted RML files.")
  13. parser.add_argument('--clean', action='store_true',
  14. help='Will *delete* all existing *.rml files in the output directory.')
  15. parser.add_argument('--match',
  16. help="Only process file names containing the given string.")
  17. args = parser.parse_args()
  18. in_dir = args.in_dir
  19. out_dir = args.out_dir
  20. out_ref_dir = os.path.join(out_dir, r'reference')
  21. match_files = args.match
  22. if not os.path.isdir(in_dir):
  23. print("Error: Specified input directory '{}' does not exist.".format(out_dir))
  24. exit()
  25. if not os.path.exists(out_dir):
  26. try:
  27. os.mkdir(out_dir)
  28. except Exception as e:
  29. print('Error: Failed to create output directory {}'.format(out_dir))
  30. if not os.path.exists(out_ref_dir):
  31. try:
  32. os.mkdir(out_ref_dir)
  33. except Exception as e:
  34. print('Error: Failed to create reference output directory {}'.format(out_ref_dir))
  35. if not os.path.isdir(out_dir) or not os.path.isdir(out_ref_dir):
  36. print("Error: Specified output directory '{}' or reference '{}' are not directories.".format(out_dir, out_ref_dir))
  37. exit()
  38. if args.clean:
  39. print("Deleting all *.rml files in output directory '{}' and reference directory '{}'".format(out_dir, out_ref_dir))
  40. for del_dir in [out_dir, out_ref_dir]:
  41. for file in os.listdir(del_dir):
  42. path = os.path.join(del_dir, file)
  43. try:
  44. if os.path.isfile(path) and file.endswith('.rml'):
  45. os.unlink(path)
  46. except Exception as e:
  47. print('Failed to delete {}. Reason: {}'.format(path, e))
  48. reference_links = []
  49. def process_file(in_file):
  50. in_path = os.path.join(in_dir, in_file)
  51. out_file = os.path.splitext(in_file)[0] + '.rml'
  52. out_path = os.path.join(out_dir, out_file)
  53. f = open(in_path, 'r', encoding="utf8")
  54. lines = f.readlines()
  55. f.close()
  56. data = ''
  57. reference_link = ''
  58. in_style = False
  59. for line in lines:
  60. if re.search(r'<style', line, flags = re.IGNORECASE):
  61. in_style = True
  62. if re.search(r'</style', line, flags = re.IGNORECASE):
  63. in_style = False
  64. if in_style:
  65. line = re.sub(r'(^|[^<])html', r'\1body', line, flags = re.IGNORECASE)
  66. reference_link_search_candidates = [
  67. r'(<link href="(reference/[^"]+))\.xht(" rel="match" ?/>)',
  68. r'(<link rel="match" href="(reference/[^"]+))\.xht(" ?/>)',
  69. ]
  70. for reference_link_search in reference_link_search_candidates:
  71. reference_link_match = re.search(reference_link_search, line, flags = re.IGNORECASE)
  72. if reference_link_match:
  73. reference_link = reference_link_match[2] + '.xht'
  74. line = re.sub(reference_link_search, r'\1.rml\3', line, flags = re.IGNORECASE)
  75. break
  76. line = re.sub(r'<!DOCTYPE[^>]*>\s*', '', line, flags = re.IGNORECASE)
  77. line = re.sub(r' xmlns="[^"]+"', '', line, flags = re.IGNORECASE)
  78. line = re.sub(r'<(/?)html[^>]*>', r'<\1rml>', line, flags = re.IGNORECASE)
  79. line = re.sub(r'^(\s*)(.*<head[^>]*>)', r'\1\2\n\1\1<link type="text/rcss" href="/../Tests/Data/style.rcss" />', line, flags = re.IGNORECASE)
  80. line = re.sub(r'direction:\s*ltr\s*;?', r'', line, flags = re.IGNORECASE)
  81. line = re.sub(r'list-style(-type)?:\s*none\s*;?', r'', line, flags = re.IGNORECASE)
  82. line = re.sub(r'max-height:\s*none;', r'max-height: -1px;', line, flags = re.IGNORECASE)
  83. line = re.sub(r'max-width:\s*none;', r'max-width: -1px;', line, flags = re.IGNORECASE)
  84. line = re.sub(r'(font-size:)\s*xxx-large', r'\1 2.0em', line, flags = re.IGNORECASE)
  85. line = re.sub(r'(font-size:)\s*xx-large', r'\1 1.7em', line, flags = re.IGNORECASE)
  86. line = re.sub(r'(font-size:)\s*x-large', r'\1 1.3em', line, flags = re.IGNORECASE)
  87. line = re.sub(r'(font-size:)\s*large', r'\1 1.15em', line, flags = re.IGNORECASE)
  88. line = re.sub(r'(font-size:)\s*medium', r'\1 1.0em', line, flags = re.IGNORECASE)
  89. line = re.sub(r'(font-size:)\s*small', r'\1 0.9em', line, flags = re.IGNORECASE)
  90. line = re.sub(r'(font-size:)\s*x-small', r'\1 0.7em', line, flags = re.IGNORECASE)
  91. line = re.sub(r'(font-size:)\s*xx-small', r'\1 0.5em', line, flags = re.IGNORECASE)
  92. line = re.sub(r'(line-height:)\s*normal', r'\1 1.2em', line, flags = re.IGNORECASE)
  93. line = re.sub(r'cyan', r'aqua', line, flags = re.IGNORECASE)
  94. if re.search(r'background:[^;}\"]*fixed', line, flags = re.IGNORECASE):
  95. print("File '{}' skipped since it uses unsupported background.".format(in_file))
  96. return False
  97. line = re.sub(r'background:(\s*([a-z]+|#[0-9a-f]+)\s*[;}\"])', r'background-color:\1', line, flags = re.IGNORECASE)
  98. # Try to fix up borders to match the RmlUi syntax. This conversion might ruin some tests.
  99. line = re.sub(r'(border(-(top|right|bottom|left))?)-style:\s*solid(\s*[;}"])', r'\1-width: 3px\4', line, flags = re.IGNORECASE)
  100. line = re.sub(r'(border(-(top|right|bottom|left))?):\s*none(\s*[;}"])', r'\1-width: 0px\4', line, flags = re.IGNORECASE)
  101. line = re.sub(r'(border[^:]*:[^;]*)thin', r'\1 1px', line, flags = re.IGNORECASE)
  102. line = re.sub(r'(border[^:]*:[^;]*)medium', r'\1 3px', line, flags = re.IGNORECASE)
  103. line = re.sub(r'(border[^:]*:[^;]*)thick', r'\1 5px', line, flags = re.IGNORECASE)
  104. line = re.sub(r'(border[^:]*:[^;]*)none', r'\1 0px', line, flags = re.IGNORECASE)
  105. line = re.sub(r'(border[^:]*:\s*[0-9][^\s;}]*)\s+soli?d', r'\1 ', line, flags = re.IGNORECASE)
  106. line = re.sub(r'(border[^:]*:\s*[^0-9;}]*)soli?d', r'\1 3px', line, flags = re.IGNORECASE)
  107. if re.search(r'border[^;]*(hidden|dotted|dashed|double|groove|ridge|inset|outset)', line, flags = re.IGNORECASE) \
  108. or re.search(r'border[^:]*-style:', line, flags = re.IGNORECASE):
  109. print("File '{}' skipped since it uses unsupported border styles.".format(in_file))
  110. return False
  111. line = re.sub(r'(border(-(top|right|bottom|left))?:\s*)[0-9][^\s;}]*(\s+[0-9][^\s;}]*[;}])', r'\1 \4', line, flags = re.IGNORECASE)
  112. line = re.sub(r'(border(-(top|right|bottom|left))?:\s*[0-9\.]+[a-z]+\s+)[0-9\.]+[a-z]+([^;}]*[;}])', r'\1 \4', line, flags = re.IGNORECASE)
  113. line = re.sub(r'(border(-(top|right|bottom|left))?:\s*[0-9\.]+[a-z]+\s+)[0-9\.]+[a-z]+([^;}]*[;}])', r'\1 \4', line, flags = re.IGNORECASE)
  114. line = re.sub(r'(border:)[^;]*none([^;]*;)', r'\1 0px \2', line, flags = re.IGNORECASE)
  115. if in_style and not '<' in line:
  116. line = line.replace('&gt;', '>')
  117. flags_match = re.search(r'<meta.*name="flags" content="([^"]*)" ?/>', line, flags = re.IGNORECASE)
  118. if flags_match and flags_match[1] != '' and flags_match[1] != 'interactive':
  119. print("File '{}' skipped due to flags '{}'".format(in_file, flags_match[1]))
  120. return False
  121. if re.search(r'(display:[^;]*(table|run-in|list-item))|(<table)', line, flags = re.IGNORECASE):
  122. print("File '{}' skipped since it uses tables.".format(in_file))
  123. return False
  124. if re.search(r'visibility:[^;]*collapse|z-index:\s*[0-9\.]+%', line, flags = re.IGNORECASE):
  125. print("File '{}' skipped since it uses unsupported visibility.".format(in_file))
  126. return False
  127. if re.search(r'data:|support/|<img|<iframe', line, flags = re.IGNORECASE):
  128. print("File '{}' skipped since it uses data or images.".format(in_file))
  129. return False
  130. if re.search(r'<script>', line, flags = re.IGNORECASE):
  131. print("File '{}' skipped since it uses scripts.".format(in_file))
  132. return False
  133. if in_style and re.search(r':before|:after|@media|\s\+\s', line, flags = re.IGNORECASE):
  134. print("File '{}' skipped since it uses unsupported CSS selectors.".format(in_file))
  135. return False
  136. if re.search(r'(: ?inherit ?;)|(!\s*important)|[0-9\.]+(ch|ex)[\s;}]', line, flags = re.IGNORECASE):
  137. print("File '{}' skipped since it uses unsupported CSS values.".format(in_file))
  138. return False
  139. if re.search(r'font(-family)?:', line, flags = re.IGNORECASE):
  140. print("File '{}' skipped since it modifies fonts.".format(in_file))
  141. return False
  142. if re.search(r'(direction:[^;]*[;"])|(content:[^;]*[;"])|(outline:[^;]*[;"])|(quote:[^;]*[;"])|(border-spacing:[^;]*[;"])|(border-collapse:[^;]*[;"])|(background:[^;]*[;"])|(box-sizing:[^;]*[;"])', line, flags = re.IGNORECASE)\
  143. or re.search(r'(font-variant:[^;]*[;"])|(font-kerning:[^;]*[;"])|(font-feature-settings:[^;]*[;"])|(background-image:[^;]*[;"])|(caption-side:[^;]*[;"])|(clip:[^;]*[;"])|(page-break-inside:[^;]*[;"])|(word-spacing:[^;]*[;"])', line, flags = re.IGNORECASE)\
  144. or re.search(r'(writing-mode:[^;]*[;"])|(text-orientation:[^;]*[;"])|(text-indent:[^;]*[;"])|(page-break-after:[^;]*[;"])|(column[^:]*:[^;]*[;"])|(empty-cells:[^;]*[;"])', line, flags = re.IGNORECASE):
  145. print("File '{}' skipped since it uses unsupported CSS properties.".format(in_file))
  146. return False
  147. data += line
  148. f = open(out_path, 'w', encoding="utf8")
  149. f.write(data)
  150. f.close()
  151. if reference_link:
  152. reference_links.append(reference_link)
  153. print("File '{}' processed successfully!".format(in_file))
  154. return True
  155. file_block_filters = ['charset','font','list','text-decoration','text-indent','text-transform','bidi','cursor',
  156. 'uri','stylesheet','word-spacing','table','outline','at-rule','at-import','attribute',
  157. 'style','quote','rtl','ltr','first-line','first-letter','first-page','import','border',
  158. 'chapter','character-encoding','escape','media','contain-','grid','case-insensitive',
  159. 'containing-block-initial','multicol','system-colors']
  160. def should_block(name):
  161. for file_block_filter in file_block_filters:
  162. if file_block_filter in name:
  163. print("File '{}' skipped due to unsupported feature '{}'".format(name, file_block_filter))
  164. return True
  165. return False
  166. in_dir_list = os.listdir(in_dir)
  167. if match_files:
  168. in_dir_list = [ name for name in in_dir_list if match_files in name ]
  169. total_files = len(in_dir_list)
  170. in_dir_list = [ name for name in in_dir_list if name.endswith(".xht") and not should_block(name) ]
  171. processed_files = 0
  172. processed_reference_files = 0
  173. for in_file in in_dir_list:
  174. if process_file(in_file):
  175. processed_files += 1
  176. final_reference_links = reference_links[:]
  177. total_reference_files = len(final_reference_links)
  178. reference_links.clear()
  179. for in_ref_file in final_reference_links:
  180. if process_file(in_ref_file):
  181. processed_reference_files += 1
  182. print('\nDone!\n\nTotal test files: {}\nSkipped test files: {}\nParsed test files: {}\n\nTotal reference files: {}\nSkipped reference files: {}\nIgnored alternate references: {}\nParsed reference files: {}'\
  183. .format(total_files, total_files - processed_files, processed_files, total_reference_files, total_reference_files - processed_reference_files, len(reference_links), processed_reference_files ))