dasm_x86.lua 58 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945
  1. ------------------------------------------------------------------------------
  2. -- DynASM x86/x64 module.
  3. --
  4. -- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
  5. -- See dynasm.lua for full copyright notice.
  6. ------------------------------------------------------------------------------
  7. local x64 = x64
  8. -- Module information:
  9. local _info = {
  10. arch = x64 and "x64" or "x86",
  11. description = "DynASM x86/x64 module",
  12. version = "1.3.0",
  13. vernum = 10300,
  14. release = "2011-05-05",
  15. author = "Mike Pall",
  16. license = "MIT",
  17. }
  18. -- Exported glue functions for the arch-specific module.
  19. local _M = { _info = _info }
  20. -- Cache library functions.
  21. local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs
  22. local assert, unpack, setmetatable = assert, unpack or table.unpack, setmetatable
  23. local _s = string
  24. local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
  25. local find, match, gmatch, gsub = _s.find, _s.match, _s.gmatch, _s.gsub
  26. local concat, sort = table.concat, table.sort
  27. local bit = bit or require("bit")
  28. local band, shl, shr = bit.band, bit.lshift, bit.rshift
  29. -- Inherited tables and callbacks.
  30. local g_opt, g_arch
  31. local wline, werror, wfatal, wwarn
  32. -- Action name list.
  33. -- CHECK: Keep this in sync with the C code!
  34. local action_names = {
  35. -- int arg, 1 buffer pos:
  36. "DISP", "IMM_S", "IMM_B", "IMM_W", "IMM_D", "IMM_WB", "IMM_DB",
  37. -- action arg (1 byte), int arg, 1 buffer pos (reg/num):
  38. "VREG", "SPACE", -- !x64: VREG support NYI.
  39. -- ptrdiff_t arg, 1 buffer pos (address): !x64
  40. "SETLABEL", "REL_A",
  41. -- action arg (1 byte) or int arg, 2 buffer pos (link, offset):
  42. "REL_LG", "REL_PC",
  43. -- action arg (1 byte) or int arg, 1 buffer pos (link):
  44. "IMM_LG", "IMM_PC",
  45. -- action arg (1 byte) or int arg, 1 buffer pos (offset):
  46. "LABEL_LG", "LABEL_PC",
  47. -- action arg (1 byte), 1 buffer pos (offset):
  48. "ALIGN",
  49. -- action args (2 bytes), no buffer pos.
  50. "EXTERN",
  51. -- action arg (1 byte), no buffer pos.
  52. "ESC",
  53. -- no action arg, no buffer pos.
  54. "MARK",
  55. -- action arg (1 byte), no buffer pos, terminal action:
  56. "SECTION",
  57. -- no args, no buffer pos, terminal action:
  58. "STOP"
  59. }
  60. -- Maximum number of section buffer positions for dasm_put().
  61. -- CHECK: Keep this in sync with the C code!
  62. local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines.
  63. -- Action name -> action number (dynamically generated below).
  64. local map_action = {}
  65. -- First action number. Everything below does not need to be escaped.
  66. local actfirst = 256-#action_names
  67. -- Action list buffer and string (only used to remove dupes).
  68. local actlist = {}
  69. local actstr = ""
  70. -- Argument list for next dasm_put(). Start with offset 0 into action list.
  71. local actargs = { 0 }
  72. -- Current number of section buffer positions for dasm_put().
  73. local secpos = 1
  74. ------------------------------------------------------------------------------
  75. -- Compute action numbers for action names.
  76. for n,name in ipairs(action_names) do
  77. local num = actfirst + n - 1
  78. map_action[name] = num
  79. end
  80. -- Dump action names and numbers.
  81. local function dumpactions(out)
  82. out:write("DynASM encoding engine action codes:\n")
  83. for n,name in ipairs(action_names) do
  84. local num = map_action[name]
  85. out:write(format(" %-10s %02X %d\n", name, num, num))
  86. end
  87. out:write("\n")
  88. end
  89. -- Write action list buffer as a huge static C array.
  90. local function writeactions(out, name)
  91. local nn = #actlist
  92. local last = actlist[nn] or 255
  93. actlist[nn] = nil -- Remove last byte.
  94. if nn == 0 then nn = 1 end
  95. out:write("static const unsigned char ", name, "[", nn, "] = {\n")
  96. local s = " "
  97. for n,b in ipairs(actlist) do
  98. s = s..b..","
  99. if #s >= 75 then
  100. assert(out:write(s, "\n"))
  101. s = " "
  102. end
  103. end
  104. out:write(s, last, "\n};\n\n") -- Add last byte back.
  105. end
  106. ------------------------------------------------------------------------------
  107. -- Add byte to action list.
  108. local function wputxb(n)
  109. assert(n >= 0 and n <= 255 and n % 1 == 0, "byte out of range")
  110. actlist[#actlist+1] = n
  111. end
  112. -- Add action to list with optional arg. Advance buffer pos, too.
  113. local function waction(action, a, num)
  114. wputxb(assert(map_action[action], "bad action name `"..action.."'"))
  115. if a then actargs[#actargs+1] = a end
  116. if a or num then secpos = secpos + (num or 1) end
  117. end
  118. -- Add call to embedded DynASM C code.
  119. local function wcall(func, args)
  120. wline(format("dasm_%s(Dst, %s);", func, concat(args, ", ")), true)
  121. end
  122. -- Delete duplicate action list chunks. A tad slow, but so what.
  123. local function dedupechunk(offset)
  124. local al, as = actlist, actstr
  125. local chunk = char(unpack(al, offset+1, #al))
  126. local orig = find(as, chunk, 1, true)
  127. if orig then
  128. actargs[1] = orig-1 -- Replace with original offset.
  129. for i=offset+1,#al do al[i] = nil end -- Kill dupe.
  130. else
  131. actstr = as..chunk
  132. end
  133. end
  134. -- Flush action list (intervening C code or buffer pos overflow).
  135. local function wflush(term)
  136. local offset = actargs[1]
  137. if #actlist == offset then return end -- Nothing to flush.
  138. if not term then waction("STOP") end -- Terminate action list.
  139. dedupechunk(offset)
  140. wcall("put", actargs) -- Add call to dasm_put().
  141. actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put().
  142. secpos = 1 -- The actionlist offset occupies a buffer position, too.
  143. end
  144. -- Put escaped byte.
  145. local function wputb(n)
  146. if n >= actfirst then waction("ESC") end -- Need to escape byte.
  147. wputxb(n)
  148. end
  149. ------------------------------------------------------------------------------
  150. -- Global label name -> global label number. With auto assignment on 1st use.
  151. local next_global = 10
  152. local map_global = setmetatable({}, { __index = function(t, name)
  153. if not match(name, "^[%a_][%w_@]*$") then werror("bad global label") end
  154. local n = next_global
  155. if n > 246 then werror("too many global labels") end
  156. next_global = n + 1
  157. t[name] = n
  158. return n
  159. end})
  160. -- Dump global labels.
  161. local function dumpglobals(out, lvl)
  162. local t = {}
  163. for name, n in pairs(map_global) do t[n] = name end
  164. out:write("Global labels:\n")
  165. for i=10,next_global-1 do
  166. out:write(format(" %s\n", t[i]))
  167. end
  168. out:write("\n")
  169. end
  170. -- Write global label enum.
  171. local function writeglobals(out, prefix)
  172. local t = {}
  173. for name, n in pairs(map_global) do t[n] = name end
  174. out:write("enum {\n")
  175. for i=10,next_global-1 do
  176. out:write(" ", prefix, gsub(t[i], "@.*", ""), ",\n")
  177. end
  178. out:write(" ", prefix, "_MAX\n};\n")
  179. end
  180. -- Write global label names.
  181. local function writeglobalnames(out, name)
  182. local t = {}
  183. for name, n in pairs(map_global) do t[n] = name end
  184. out:write("static const char *const ", name, "[] = {\n")
  185. for i=10,next_global-1 do
  186. out:write(" \"", t[i], "\",\n")
  187. end
  188. out:write(" (const char *)0\n};\n")
  189. end
  190. ------------------------------------------------------------------------------
  191. -- Extern label name -> extern label number. With auto assignment on 1st use.
  192. local next_extern = -1
  193. local map_extern = setmetatable({}, { __index = function(t, name)
  194. -- No restrictions on the name for now.
  195. local n = next_extern
  196. if n < -256 then werror("too many extern labels") end
  197. next_extern = n - 1
  198. t[name] = n
  199. return n
  200. end})
  201. -- Dump extern labels.
  202. local function dumpexterns(out, lvl)
  203. local t = {}
  204. for name, n in pairs(map_extern) do t[-n] = name end
  205. out:write("Extern labels:\n")
  206. for i=1,-next_extern-1 do
  207. out:write(format(" %s\n", t[i]))
  208. end
  209. out:write("\n")
  210. end
  211. -- Write extern label names.
  212. local function writeexternnames(out, name)
  213. local t = {}
  214. for name, n in pairs(map_extern) do t[-n] = name end
  215. out:write("static const char *const ", name, "[] = {\n")
  216. for i=1,-next_extern-1 do
  217. out:write(" \"", t[i], "\",\n")
  218. end
  219. out:write(" (const char *)0\n};\n")
  220. end
  221. ------------------------------------------------------------------------------
  222. -- Arch-specific maps.
  223. local map_archdef = {} -- Ext. register name -> int. name.
  224. local map_reg_rev = {} -- Int. register name -> ext. name.
  225. local map_reg_num = {} -- Int. register name -> register number.
  226. local map_reg_opsize = {} -- Int. register name -> operand size.
  227. local map_reg_valid_base = {} -- Int. register name -> valid base register?
  228. local map_reg_valid_index = {} -- Int. register name -> valid index register?
  229. local map_reg_needrex = {} -- Int. register name -> need rex vs. no rex.
  230. local reg_list = {} -- Canonical list of int. register names.
  231. local map_type = {} -- Type name -> { ctype, reg }
  232. local ctypenum = 0 -- Type number (for _PTx macros).
  233. local addrsize = x64 and "q" or "d" -- Size for address operands.
  234. -- Helper functions to fill register maps.
  235. local function mkrmap(sz, cl, names)
  236. local cname = format("@%s", sz)
  237. reg_list[#reg_list+1] = cname
  238. map_archdef[cl] = cname
  239. map_reg_rev[cname] = cl
  240. map_reg_num[cname] = -1
  241. map_reg_opsize[cname] = sz
  242. if sz == addrsize or sz == "d" then
  243. map_reg_valid_base[cname] = true
  244. map_reg_valid_index[cname] = true
  245. end
  246. if names then
  247. for n,name in ipairs(names) do
  248. local iname = format("@%s%x", sz, n-1)
  249. reg_list[#reg_list+1] = iname
  250. map_archdef[name] = iname
  251. map_reg_rev[iname] = name
  252. map_reg_num[iname] = n-1
  253. map_reg_opsize[iname] = sz
  254. if sz == "b" and n > 4 then map_reg_needrex[iname] = false end
  255. if sz == addrsize or sz == "d" then
  256. map_reg_valid_base[iname] = true
  257. map_reg_valid_index[iname] = true
  258. end
  259. end
  260. end
  261. for i=0,(x64 and sz ~= "f") and 15 or 7 do
  262. local needrex = sz == "b" and i > 3
  263. local iname = format("@%s%x%s", sz, i, needrex and "R" or "")
  264. if needrex then map_reg_needrex[iname] = true end
  265. local name
  266. if sz == "o" then name = format("xmm%d", i)
  267. elseif sz == "f" then name = format("st%d", i)
  268. else name = format("r%d%s", i, sz == addrsize and "" or sz) end
  269. map_archdef[name] = iname
  270. if not map_reg_rev[iname] then
  271. reg_list[#reg_list+1] = iname
  272. map_reg_rev[iname] = name
  273. map_reg_num[iname] = i
  274. map_reg_opsize[iname] = sz
  275. if sz == addrsize or sz == "d" then
  276. map_reg_valid_base[iname] = true
  277. map_reg_valid_index[iname] = true
  278. end
  279. end
  280. end
  281. reg_list[#reg_list+1] = ""
  282. end
  283. -- Integer registers (qword, dword, word and byte sized).
  284. if x64 then
  285. mkrmap("q", "Rq", {"rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi"})
  286. end
  287. mkrmap("d", "Rd", {"eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi"})
  288. mkrmap("w", "Rw", {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"})
  289. mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"})
  290. map_reg_valid_index[map_archdef.esp] = false
  291. if x64 then map_reg_valid_index[map_archdef.rsp] = false end
  292. map_archdef["Ra"] = "@"..addrsize
  293. -- FP registers (internally tword sized, but use "f" as operand size).
  294. mkrmap("f", "Rf")
  295. -- SSE registers (oword sized, but qword and dword accessible).
  296. mkrmap("o", "xmm")
  297. -- Operand size prefixes to codes.
  298. local map_opsize = {
  299. byte = "b", word = "w", dword = "d", qword = "q", oword = "o", tword = "t",
  300. aword = addrsize,
  301. }
  302. -- Operand size code to number.
  303. local map_opsizenum = {
  304. b = 1, w = 2, d = 4, q = 8, o = 16, t = 10,
  305. }
  306. -- Operand size code to name.
  307. local map_opsizename = {
  308. b = "byte", w = "word", d = "dword", q = "qword", o = "oword", t = "tword",
  309. f = "fpword",
  310. }
  311. -- Valid index register scale factors.
  312. local map_xsc = {
  313. ["1"] = 0, ["2"] = 1, ["4"] = 2, ["8"] = 3,
  314. }
  315. -- Condition codes.
  316. local map_cc = {
  317. o = 0, no = 1, b = 2, nb = 3, e = 4, ne = 5, be = 6, nbe = 7,
  318. s = 8, ns = 9, p = 10, np = 11, l = 12, nl = 13, le = 14, nle = 15,
  319. c = 2, nae = 2, nc = 3, ae = 3, z = 4, nz = 5, na = 6, a = 7,
  320. pe = 10, po = 11, nge = 12, ge = 13, ng = 14, g = 15,
  321. }
  322. -- Reverse defines for registers.
  323. function _M.revdef(s)
  324. return gsub(s, "@%w+", map_reg_rev)
  325. end
  326. -- Dump register names and numbers
  327. local function dumpregs(out)
  328. out:write("Register names, sizes and internal numbers:\n")
  329. for _,reg in ipairs(reg_list) do
  330. if reg == "" then
  331. out:write("\n")
  332. else
  333. local name = map_reg_rev[reg]
  334. local num = map_reg_num[reg]
  335. local opsize = map_opsizename[map_reg_opsize[reg]]
  336. out:write(format(" %-5s %-8s %s\n", name, opsize,
  337. num < 0 and "(variable)" or num))
  338. end
  339. end
  340. end
  341. ------------------------------------------------------------------------------
  342. -- Put action for label arg (IMM_LG, IMM_PC, REL_LG, REL_PC).
  343. local function wputlabel(aprefix, imm, num)
  344. if type(imm) == "number" then
  345. if imm < 0 then
  346. waction("EXTERN")
  347. wputxb(aprefix == "IMM_" and 0 or 1)
  348. imm = -imm-1
  349. else
  350. waction(aprefix.."LG", nil, num);
  351. end
  352. wputxb(imm)
  353. else
  354. waction(aprefix.."PC", imm, num)
  355. end
  356. end
  357. -- Put signed byte or arg.
  358. local function wputsbarg(n)
  359. if type(n) == "number" then
  360. if n < -128 or n > 127 then
  361. werror("signed immediate byte out of range")
  362. end
  363. if n < 0 then n = n + 256 end
  364. wputb(n)
  365. else waction("IMM_S", n) end
  366. end
  367. -- Put unsigned byte or arg.
  368. local function wputbarg(n)
  369. if type(n) == "number" then
  370. if n < 0 or n > 255 then
  371. werror("unsigned immediate byte out of range")
  372. end
  373. wputb(n)
  374. else waction("IMM_B", n) end
  375. end
  376. -- Put unsigned word or arg.
  377. local function wputwarg(n)
  378. if type(n) == "number" then
  379. if shr(n, 16) ~= 0 then
  380. werror("unsigned immediate word out of range")
  381. end
  382. wputb(band(n, 255)); wputb(shr(n, 8));
  383. else waction("IMM_W", n) end
  384. end
  385. -- Put signed or unsigned dword or arg.
  386. local function wputdarg(n)
  387. local tn = type(n)
  388. if tn == "number" then
  389. wputb(band(n, 255))
  390. wputb(band(shr(n, 8), 255))
  391. wputb(band(shr(n, 16), 255))
  392. wputb(shr(n, 24))
  393. elseif tn == "table" then
  394. wputlabel("IMM_", n[1], 1)
  395. else
  396. waction("IMM_D", n)
  397. end
  398. end
  399. -- Put operand-size dependent number or arg (defaults to dword).
  400. local function wputszarg(sz, n)
  401. if not sz or sz == "d" or sz == "q" then wputdarg(n)
  402. elseif sz == "w" then wputwarg(n)
  403. elseif sz == "b" then wputbarg(n)
  404. elseif sz == "s" then wputsbarg(n)
  405. else werror("bad operand size") end
  406. end
  407. -- Put multi-byte opcode with operand-size dependent modifications.
  408. local function wputop(sz, op, rex)
  409. local r
  410. if rex ~= 0 and not x64 then werror("bad operand size") end
  411. if sz == "w" then wputb(102) end
  412. -- Needs >32 bit numbers, but only for crc32 eax, word [ebx]
  413. if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end
  414. if op >= 16777216 then wputb(shr(op, 24)); op = band(op, 0xffffff) end
  415. if op >= 65536 then
  416. if rex ~= 0 then
  417. local opc3 = band(op, 0xffff00)
  418. if opc3 == 0x0f3a00 or opc3 == 0x0f3800 then
  419. wputb(64 + band(rex, 15)); rex = 0
  420. end
  421. end
  422. wputb(shr(op, 16)); op = band(op, 0xffff)
  423. end
  424. if op >= 256 then
  425. local b = shr(op, 8)
  426. if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0 end
  427. wputb(b)
  428. op = band(op, 255)
  429. end
  430. if rex ~= 0 then wputb(64 + band(rex, 15)) end
  431. if sz == "b" then op = op - 1 end
  432. wputb(op)
  433. end
  434. -- Put ModRM or SIB formatted byte.
  435. local function wputmodrm(m, s, rm, vs, vrm)
  436. assert(m < 4 and s < 16 and rm < 16, "bad modrm operands")
  437. wputb(shl(m, 6) + shl(band(s, 7), 3) + band(rm, 7))
  438. end
  439. -- Put ModRM/SIB plus optional displacement.
  440. local function wputmrmsib(t, imark, s, vsreg)
  441. local vreg, vxreg
  442. local reg, xreg = t.reg, t.xreg
  443. if reg and reg < 0 then reg = 0; vreg = t.vreg end
  444. if xreg and xreg < 0 then xreg = 0; vxreg = t.vxreg end
  445. if s < 0 then s = 0 end
  446. -- Register mode.
  447. if sub(t.mode, 1, 1) == "r" then
  448. wputmodrm(3, s, reg)
  449. if vsreg then waction("VREG", vsreg); wputxb(2) end
  450. if vreg then waction("VREG", vreg); wputxb(0) end
  451. return
  452. end
  453. local disp = t.disp
  454. local tdisp = type(disp)
  455. -- No base register?
  456. if not reg then
  457. local riprel = false
  458. if xreg then
  459. -- Indexed mode with index register only.
  460. -- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp)
  461. wputmodrm(0, s, 4)
  462. if imark == "I" then waction("MARK") end
  463. if vsreg then waction("VREG", vsreg); wputxb(2) end
  464. wputmodrm(t.xsc, xreg, 5)
  465. if vxreg then waction("VREG", vxreg); wputxb(3) end
  466. else
  467. -- Pure 32 bit displacement.
  468. if x64 and tdisp ~= "table" then
  469. wputmodrm(0, s, 4) -- [disp] -> (0, s, esp) (0, esp, ebp)
  470. if imark == "I" then waction("MARK") end
  471. wputmodrm(0, 4, 5)
  472. else
  473. riprel = x64
  474. wputmodrm(0, s, 5) -- [disp|rip-label] -> (0, s, ebp)
  475. if imark == "I" then waction("MARK") end
  476. end
  477. if vsreg then waction("VREG", vsreg); wputxb(2) end
  478. end
  479. if riprel then -- Emit rip-relative displacement.
  480. if match("UWSiI", imark) then
  481. werror("NYI: rip-relative displacement followed by immediate")
  482. end
  483. -- The previous byte in the action buffer cannot be 0xe9 or 0x80-0x8f.
  484. wputlabel("REL_", disp[1], 2)
  485. else
  486. wputdarg(disp)
  487. end
  488. return
  489. end
  490. local m
  491. if tdisp == "number" then -- Check displacement size at assembly time.
  492. if disp == 0 and band(reg, 7) ~= 5 then -- [ebp] -> [ebp+0] (in SIB, too)
  493. if not vreg then m = 0 end -- Force DISP to allow [Rd(5)] -> [ebp+0]
  494. elseif disp >= -128 and disp <= 127 then m = 1
  495. else m = 2 end
  496. elseif tdisp == "table" then
  497. m = 2
  498. end
  499. -- Index register present or esp as base register: need SIB encoding.
  500. if xreg or band(reg, 7) == 4 then
  501. wputmodrm(m or 2, s, 4) -- ModRM.
  502. if m == nil or imark == "I" then waction("MARK") end
  503. if vsreg then waction("VREG", vsreg); wputxb(2) end
  504. wputmodrm(t.xsc or 0, xreg or 4, reg) -- SIB.
  505. if vxreg then waction("VREG", vxreg); wputxb(3) end
  506. if vreg then waction("VREG", vreg); wputxb(1) end
  507. else
  508. wputmodrm(m or 2, s, reg) -- ModRM.
  509. if (imark == "I" and (m == 1 or m == 2)) or
  510. (m == nil and (vsreg or vreg)) then waction("MARK") end
  511. if vsreg then waction("VREG", vsreg); wputxb(2) end
  512. if vreg then waction("VREG", vreg); wputxb(1) end
  513. end
  514. -- Put displacement.
  515. if m == 1 then wputsbarg(disp)
  516. elseif m == 2 then wputdarg(disp)
  517. elseif m == nil then waction("DISP", disp) end
  518. end
  519. ------------------------------------------------------------------------------
  520. -- Return human-readable operand mode string.
  521. local function opmodestr(op, args)
  522. local m = {}
  523. for i=1,#args do
  524. local a = args[i]
  525. m[#m+1] = sub(a.mode, 1, 1)..(a.opsize or "?")
  526. end
  527. return op.." "..concat(m, ",")
  528. end
  529. -- Convert number to valid integer or nil.
  530. local function toint(expr)
  531. local n = tonumber(expr)
  532. if n then
  533. if n % 1 ~= 0 or n < -2147483648 or n > 4294967295 then
  534. werror("bad integer number `"..expr.."'")
  535. end
  536. return n
  537. end
  538. end
  539. -- Parse immediate expression.
  540. local function immexpr(expr)
  541. -- &expr (pointer)
  542. if sub(expr, 1, 1) == "&" then
  543. return "iPJ", format("(ptrdiff_t)(%s)", sub(expr,2))
  544. end
  545. local prefix = sub(expr, 1, 2)
  546. -- =>expr (pc label reference)
  547. if prefix == "=>" then
  548. return "iJ", sub(expr, 3)
  549. end
  550. -- ->name (global label reference)
  551. if prefix == "->" then
  552. return "iJ", map_global[sub(expr, 3)]
  553. end
  554. -- [<>][1-9] (local label reference)
  555. local dir, lnum = match(expr, "^([<>])([1-9])$")
  556. if dir then -- Fwd: 247-255, Bkwd: 1-9.
  557. return "iJ", lnum + (dir == ">" and 246 or 0)
  558. end
  559. local extname = match(expr, "^extern%s+(%S+)$")
  560. if extname then
  561. return "iJ", map_extern[extname]
  562. end
  563. -- expr (interpreted as immediate)
  564. return "iI", expr
  565. end
  566. -- Parse displacement expression: +-num, +-expr, +-opsize*num
  567. local function dispexpr(expr)
  568. local disp = expr == "" and 0 or toint(expr)
  569. if disp then return disp end
  570. local c, dispt = match(expr, "^([+-])%s*(.+)$")
  571. if c == "+" then
  572. expr = dispt
  573. elseif not c then
  574. werror("bad displacement expression `"..expr.."'")
  575. end
  576. local opsize, tailops = match(dispt, "^(%w+)%s*%*%s*(.+)$")
  577. local ops, imm = map_opsize[opsize], toint(tailops)
  578. if ops and imm then
  579. if c == "-" then imm = -imm end
  580. return imm*map_opsizenum[ops]
  581. end
  582. local mode, iexpr = immexpr(dispt)
  583. if mode == "iJ" then
  584. if c == "-" then werror("cannot invert label reference") end
  585. return { iexpr }
  586. end
  587. return expr -- Need to return original signed expression.
  588. end
  589. -- Parse register or type expression.
  590. local function rtexpr(expr)
  591. if not expr then return end
  592. local tname, ovreg = match(expr, "^([%w_]+):(@[%w_]+)$")
  593. local tp = map_type[tname or expr]
  594. if tp then
  595. local reg = ovreg or tp.reg
  596. local rnum = map_reg_num[reg]
  597. if not rnum then
  598. werror("type `"..(tname or expr).."' needs a register override")
  599. end
  600. if not map_reg_valid_base[reg] then
  601. werror("bad base register override `"..(map_reg_rev[reg] or reg).."'")
  602. end
  603. return reg, rnum, tp
  604. end
  605. return expr, map_reg_num[expr]
  606. end
  607. -- Parse operand and return { mode, opsize, reg, xreg, xsc, disp, imm }.
  608. local function parseoperand(param)
  609. local t = {}
  610. local expr = param
  611. local opsize, tailops = match(param, "^(%w+)%s*(.+)$")
  612. if opsize then
  613. t.opsize = map_opsize[opsize]
  614. if t.opsize then expr = tailops end
  615. end
  616. local br = match(expr, "^%[%s*(.-)%s*%]$")
  617. repeat
  618. if br then
  619. t.mode = "xm"
  620. -- [disp]
  621. t.disp = toint(br)
  622. if t.disp then
  623. t.mode = x64 and "xm" or "xmO"
  624. break
  625. end
  626. -- [reg...]
  627. local tp
  628. local reg, tailr = match(br, "^([@%w_:]+)%s*(.*)$")
  629. reg, t.reg, tp = rtexpr(reg)
  630. if not t.reg then
  631. -- [expr]
  632. t.mode = x64 and "xm" or "xmO"
  633. t.disp = dispexpr("+"..br)
  634. break
  635. end
  636. if t.reg == -1 then
  637. t.vreg, tailr = match(tailr, "^(%b())(.*)$")
  638. if not t.vreg then werror("bad variable register expression") end
  639. end
  640. -- [xreg*xsc] or [xreg*xsc+-disp] or [xreg*xsc+-expr]
  641. local xsc, tailsc = match(tailr, "^%*%s*([1248])%s*(.*)$")
  642. if xsc then
  643. if not map_reg_valid_index[reg] then
  644. werror("bad index register `"..map_reg_rev[reg].."'")
  645. end
  646. t.xsc = map_xsc[xsc]
  647. t.xreg = t.reg
  648. t.vxreg = t.vreg
  649. t.reg = nil
  650. t.vreg = nil
  651. t.disp = dispexpr(tailsc)
  652. break
  653. end
  654. if not map_reg_valid_base[reg] then
  655. werror("bad base register `"..map_reg_rev[reg].."'")
  656. end
  657. -- [reg] or [reg+-disp]
  658. t.disp = toint(tailr) or (tailr == "" and 0)
  659. if t.disp then break end
  660. -- [reg+xreg...]
  661. local xreg, tailx = match(tailr, "^+%s*([@%w_:]+)%s*(.*)$")
  662. xreg, t.xreg, tp = rtexpr(xreg)
  663. if not t.xreg then
  664. -- [reg+-expr]
  665. t.disp = dispexpr(tailr)
  666. break
  667. end
  668. if not map_reg_valid_index[xreg] then
  669. werror("bad index register `"..map_reg_rev[xreg].."'")
  670. end
  671. if t.xreg == -1 then
  672. t.vxreg, tailx = match(tailx, "^(%b())(.*)$")
  673. if not t.vxreg then werror("bad variable register expression") end
  674. end
  675. -- [reg+xreg*xsc...]
  676. local xsc, tailsc = match(tailx, "^%*%s*([1248])%s*(.*)$")
  677. if xsc then
  678. t.xsc = map_xsc[xsc]
  679. tailx = tailsc
  680. end
  681. -- [...] or [...+-disp] or [...+-expr]
  682. t.disp = dispexpr(tailx)
  683. else
  684. -- imm or opsize*imm
  685. local imm = toint(expr)
  686. if not imm and sub(expr, 1, 1) == "*" and t.opsize then
  687. imm = toint(sub(expr, 2))
  688. if imm then
  689. imm = imm * map_opsizenum[t.opsize]
  690. t.opsize = nil
  691. end
  692. end
  693. if imm then
  694. if t.opsize then werror("bad operand size override") end
  695. local m = "i"
  696. if imm == 1 then m = m.."1" end
  697. if imm >= 4294967168 and imm <= 4294967295 then imm = imm-4294967296 end
  698. if imm >= -128 and imm <= 127 then m = m.."S" end
  699. t.imm = imm
  700. t.mode = m
  701. break
  702. end
  703. local tp
  704. local reg, tailr = match(expr, "^([@%w_:]+)%s*(.*)$")
  705. reg, t.reg, tp = rtexpr(reg)
  706. if t.reg then
  707. if t.reg == -1 then
  708. t.vreg, tailr = match(tailr, "^(%b())(.*)$")
  709. if not t.vreg then werror("bad variable register expression") end
  710. end
  711. -- reg
  712. if tailr == "" then
  713. if t.opsize then werror("bad operand size override") end
  714. t.opsize = map_reg_opsize[reg]
  715. if t.opsize == "f" then
  716. t.mode = t.reg == 0 and "fF" or "f"
  717. else
  718. if reg == "@w4" or (x64 and reg == "@d4") then
  719. wwarn("bad idea, try again with `"..(x64 and "rsp'" or "esp'"))
  720. end
  721. t.mode = t.reg == 0 and "rmR" or (reg == "@b1" and "rmC" or "rm")
  722. end
  723. t.needrex = map_reg_needrex[reg]
  724. break
  725. end
  726. -- type[idx], type[idx].field, type->field -> [reg+offset_expr]
  727. if not tp then werror("bad operand `"..param.."'") end
  728. t.mode = "xm"
  729. t.disp = format(tp.ctypefmt, tailr)
  730. else
  731. t.mode, t.imm = immexpr(expr)
  732. if sub(t.mode, -1) == "J" then
  733. if t.opsize and t.opsize ~= addrsize then
  734. werror("bad operand size override")
  735. end
  736. t.opsize = addrsize
  737. end
  738. end
  739. end
  740. until true
  741. return t
  742. end
  743. ------------------------------------------------------------------------------
  744. -- x86 Template String Description
  745. -- ===============================
  746. --
  747. -- Each template string is a list of [match:]pattern pairs,
  748. -- separated by "|". The first match wins. No match means a
  749. -- bad or unsupported combination of operand modes or sizes.
  750. --
  751. -- The match part and the ":" is omitted if the operation has
  752. -- no operands. Otherwise the first N characters are matched
  753. -- against the mode strings of each of the N operands.
  754. --
  755. -- The mode string for each operand type is (see parseoperand()):
  756. -- Integer register: "rm", +"R" for eax, ax, al, +"C" for cl
  757. -- FP register: "f", +"F" for st0
  758. -- Index operand: "xm", +"O" for [disp] (pure offset)
  759. -- Immediate: "i", +"S" for signed 8 bit, +"1" for 1,
  760. -- +"I" for arg, +"P" for pointer
  761. -- Any: +"J" for valid jump targets
  762. --
  763. -- So a match character "m" (mixed) matches both an integer register
  764. -- and an index operand (to be encoded with the ModRM/SIB scheme).
  765. -- But "r" matches only a register and "x" only an index operand
  766. -- (e.g. for FP memory access operations).
  767. --
  768. -- The operand size match string starts right after the mode match
  769. -- characters and ends before the ":". "dwb" or "qdwb" is assumed, if empty.
  770. -- The effective data size of the operation is matched against this list.
  771. --
  772. -- If only the regular "b", "w", "d", "q", "t" operand sizes are
  773. -- present, then all operands must be the same size. Unspecified sizes
  774. -- are ignored, but at least one operand must have a size or the pattern
  775. -- won't match (use the "byte", "word", "dword", "qword", "tword"
  776. -- operand size overrides. E.g.: mov dword [eax], 1).
  777. --
  778. -- If the list has a "1" or "2" prefix, the operand size is taken
  779. -- from the respective operand and any other operand sizes are ignored.
  780. -- If the list contains only ".", all operand sizes are ignored.
  781. -- If the list has a "/" prefix, the concatenated (mixed) operand sizes
  782. -- are compared to the match.
  783. --
  784. -- E.g. "rrdw" matches for either two dword registers or two word
  785. -- registers. "Fx2dq" matches an st0 operand plus an index operand
  786. -- pointing to a dword (float) or qword (double).
  787. --
  788. -- Every character after the ":" is part of the pattern string:
  789. -- Hex chars are accumulated to form the opcode (left to right).
  790. -- "n" disables the standard opcode mods
  791. -- (otherwise: -1 for "b", o16 prefix for "w", rex.w for "q")
  792. -- "X" Force REX.W.
  793. -- "r"/"R" adds the reg. number from the 1st/2nd operand to the opcode.
  794. -- "m"/"M" generates ModRM/SIB from the 1st/2nd operand.
  795. -- The spare 3 bits are either filled with the last hex digit or
  796. -- the result from a previous "r"/"R". The opcode is restored.
  797. --
  798. -- All of the following characters force a flush of the opcode:
  799. -- "o"/"O" stores a pure 32 bit disp (offset) from the 1st/2nd operand.
  800. -- "S" stores a signed 8 bit immediate from the last operand.
  801. -- "U" stores an unsigned 8 bit immediate from the last operand.
  802. -- "W" stores an unsigned 16 bit immediate from the last operand.
  803. -- "i" stores an operand sized immediate from the last operand.
  804. -- "I" dito, but generates an action code to optionally modify
  805. -- the opcode (+2) for a signed 8 bit immediate.
  806. -- "J" generates one of the REL action codes from the last operand.
  807. --
  808. ------------------------------------------------------------------------------
  809. -- Template strings for x86 instructions. Ordered by first opcode byte.
  810. -- Unimplemented opcodes (deliberate omissions) are marked with *.
  811. local map_op = {
  812. -- 00-05: add...
  813. -- 06: *push es
  814. -- 07: *pop es
  815. -- 08-0D: or...
  816. -- 0E: *push cs
  817. -- 0F: two byte opcode prefix
  818. -- 10-15: adc...
  819. -- 16: *push ss
  820. -- 17: *pop ss
  821. -- 18-1D: sbb...
  822. -- 1E: *push ds
  823. -- 1F: *pop ds
  824. -- 20-25: and...
  825. es_0 = "26",
  826. -- 27: *daa
  827. -- 28-2D: sub...
  828. cs_0 = "2E",
  829. -- 2F: *das
  830. -- 30-35: xor...
  831. ss_0 = "36",
  832. -- 37: *aaa
  833. -- 38-3D: cmp...
  834. ds_0 = "3E",
  835. -- 3F: *aas
  836. inc_1 = x64 and "m:FF0m" or "rdw:40r|m:FF0m",
  837. dec_1 = x64 and "m:FF1m" or "rdw:48r|m:FF1m",
  838. push_1 = (x64 and "rq:n50r|rw:50r|mq:nFF6m|mw:FF6m" or
  839. "rdw:50r|mdw:FF6m").."|S.:6AS|ib:n6Ai|i.:68i",
  840. pop_1 = x64 and "rq:n58r|rw:58r|mq:n8F0m|mw:8F0m" or "rdw:58r|mdw:8F0m",
  841. -- 60: *pusha, *pushad, *pushaw
  842. -- 61: *popa, *popad, *popaw
  843. -- 62: *bound rdw,x
  844. -- 63: x86: *arpl mw,rw
  845. movsxd_2 = x64 and "rm/qd:63rM",
  846. fs_0 = "64",
  847. gs_0 = "65",
  848. o16_0 = "66",
  849. a16_0 = not x64 and "67" or nil,
  850. a32_0 = x64 and "67",
  851. -- 68: push idw
  852. -- 69: imul rdw,mdw,idw
  853. -- 6A: push ib
  854. -- 6B: imul rdw,mdw,S
  855. -- 6C: *insb
  856. -- 6D: *insd, *insw
  857. -- 6E: *outsb
  858. -- 6F: *outsd, *outsw
  859. -- 70-7F: jcc lb
  860. -- 80: add... mb,i
  861. -- 81: add... mdw,i
  862. -- 82: *undefined
  863. -- 83: add... mdw,S
  864. test_2 = "mr:85Rm|rm:85rM|Ri:A9ri|mi:F70mi",
  865. -- 86: xchg rb,mb
  866. -- 87: xchg rdw,mdw
  867. -- 88: mov mb,r
  868. -- 89: mov mdw,r
  869. -- 8A: mov r,mb
  870. -- 8B: mov r,mdw
  871. -- 8C: *mov mdw,seg
  872. lea_2 = "rx1dq:8DrM",
  873. -- 8E: *mov seg,mdw
  874. -- 8F: pop mdw
  875. nop_0 = "90",
  876. xchg_2 = "Rrqdw:90R|rRqdw:90r|rm:87rM|mr:87Rm",
  877. cbw_0 = "6698",
  878. cwde_0 = "98",
  879. cdqe_0 = "4898",
  880. cwd_0 = "6699",
  881. cdq_0 = "99",
  882. cqo_0 = "4899",
  883. -- 9A: *call iw:idw
  884. wait_0 = "9B",
  885. fwait_0 = "9B",
  886. pushf_0 = "9C",
  887. pushfd_0 = not x64 and "9C",
  888. pushfq_0 = x64 and "9C",
  889. popf_0 = "9D",
  890. popfd_0 = not x64 and "9D",
  891. popfq_0 = x64 and "9D",
  892. sahf_0 = "9E",
  893. lahf_0 = "9F",
  894. mov_2 = "OR:A3o|RO:A1O|mr:89Rm|rm:8BrM|rib:nB0ri|ridw:B8ri|mi:C70mi",
  895. movsb_0 = "A4",
  896. movsw_0 = "66A5",
  897. movsd_0 = "A5",
  898. cmpsb_0 = "A6",
  899. cmpsw_0 = "66A7",
  900. cmpsd_0 = "A7",
  901. -- A8: test Rb,i
  902. -- A9: test Rdw,i
  903. stosb_0 = "AA",
  904. stosw_0 = "66AB",
  905. stosd_0 = "AB",
  906. lodsb_0 = "AC",
  907. lodsw_0 = "66AD",
  908. lodsd_0 = "AD",
  909. scasb_0 = "AE",
  910. scasw_0 = "66AF",
  911. scasd_0 = "AF",
  912. -- B0-B7: mov rb,i
  913. -- B8-BF: mov rdw,i
  914. -- C0: rol... mb,i
  915. -- C1: rol... mdw,i
  916. ret_1 = "i.:nC2W",
  917. ret_0 = "C3",
  918. -- C4: *les rdw,mq
  919. -- C5: *lds rdw,mq
  920. -- C6: mov mb,i
  921. -- C7: mov mdw,i
  922. -- C8: *enter iw,ib
  923. leave_0 = "C9",
  924. -- CA: *retf iw
  925. -- CB: *retf
  926. int3_0 = "CC",
  927. int_1 = "i.:nCDU",
  928. into_0 = "CE",
  929. -- CF: *iret
  930. -- D0: rol... mb,1
  931. -- D1: rol... mdw,1
  932. -- D2: rol... mb,cl
  933. -- D3: rol... mb,cl
  934. -- D4: *aam ib
  935. -- D5: *aad ib
  936. -- D6: *salc
  937. -- D7: *xlat
  938. -- D8-DF: floating point ops
  939. -- E0: *loopne
  940. -- E1: *loope
  941. -- E2: *loop
  942. -- E3: *jcxz, *jecxz
  943. -- E4: *in Rb,ib
  944. -- E5: *in Rdw,ib
  945. -- E6: *out ib,Rb
  946. -- E7: *out ib,Rdw
  947. call_1 = x64 and "mq:nFF2m|J.:E8nJ" or "md:FF2m|J.:E8J",
  948. jmp_1 = x64 and "mq:nFF4m|J.:E9nJ" or "md:FF4m|J.:E9J", -- short: EB
  949. -- EA: *jmp iw:idw
  950. -- EB: jmp ib
  951. -- EC: *in Rb,dx
  952. -- ED: *in Rdw,dx
  953. -- EE: *out dx,Rb
  954. -- EF: *out dx,Rdw
  955. lock_0 = "F0",
  956. int1_0 = "F1",
  957. repne_0 = "F2",
  958. repnz_0 = "F2",
  959. rep_0 = "F3",
  960. repe_0 = "F3",
  961. repz_0 = "F3",
  962. -- F4: *hlt
  963. cmc_0 = "F5",
  964. -- F6: test... mb,i; div... mb
  965. -- F7: test... mdw,i; div... mdw
  966. clc_0 = "F8",
  967. stc_0 = "F9",
  968. -- FA: *cli
  969. cld_0 = "FC",
  970. std_0 = "FD",
  971. -- FE: inc... mb
  972. -- FF: inc... mdw
  973. -- misc ops
  974. not_1 = "m:F72m",
  975. neg_1 = "m:F73m",
  976. mul_1 = "m:F74m",
  977. imul_1 = "m:F75m",
  978. div_1 = "m:F76m",
  979. idiv_1 = "m:F77m",
  980. imul_2 = "rmqdw:0FAFrM|rIqdw:69rmI|rSqdw:6BrmS|riqdw:69rmi",
  981. imul_3 = "rmIqdw:69rMI|rmSqdw:6BrMS|rmiqdw:69rMi",
  982. movzx_2 = "rm/db:0FB6rM|rm/qb:|rm/wb:0FB6rM|rm/dw:0FB7rM|rm/qw:",
  983. movsx_2 = "rm/db:0FBErM|rm/qb:|rm/wb:0FBErM|rm/dw:0FBFrM|rm/qw:",
  984. bswap_1 = "rqd:0FC8r",
  985. bsf_2 = "rmqdw:0FBCrM",
  986. bsr_2 = "rmqdw:0FBDrM",
  987. bt_2 = "mrqdw:0FA3Rm|miqdw:0FBA4mU",
  988. btc_2 = "mrqdw:0FBBRm|miqdw:0FBA7mU",
  989. btr_2 = "mrqdw:0FB3Rm|miqdw:0FBA6mU",
  990. bts_2 = "mrqdw:0FABRm|miqdw:0FBA5mU",
  991. shld_3 = "mriqdw:0FA4RmU|mrCqdw:0FA5Rm",
  992. shrd_3 = "mriqdw:0FACRmU|mrCqdw:0FADRm",
  993. rdtsc_0 = "0F31", -- P1+
  994. cpuid_0 = "0FA2", -- P1+
  995. -- floating point ops
  996. fst_1 = "ff:DDD0r|xd:D92m|xq:nDD2m",
  997. fstp_1 = "ff:DDD8r|xd:D93m|xq:nDD3m|xt:DB7m",
  998. fld_1 = "ff:D9C0r|xd:D90m|xq:nDD0m|xt:DB5m",
  999. fpop_0 = "DDD8", -- Alias for fstp st0.
  1000. fist_1 = "xw:nDF2m|xd:DB2m",
  1001. fistp_1 = "xw:nDF3m|xd:DB3m|xq:nDF7m",
  1002. fild_1 = "xw:nDF0m|xd:DB0m|xq:nDF5m",
  1003. fxch_0 = "D9C9",
  1004. fxch_1 = "ff:D9C8r",
  1005. fxch_2 = "fFf:D9C8r|Fff:D9C8R",
  1006. fucom_1 = "ff:DDE0r",
  1007. fucom_2 = "Fff:DDE0R",
  1008. fucomp_1 = "ff:DDE8r",
  1009. fucomp_2 = "Fff:DDE8R",
  1010. fucomi_1 = "ff:DBE8r", -- P6+
  1011. fucomi_2 = "Fff:DBE8R", -- P6+
  1012. fucomip_1 = "ff:DFE8r", -- P6+
  1013. fucomip_2 = "Fff:DFE8R", -- P6+
  1014. fcomi_1 = "ff:DBF0r", -- P6+
  1015. fcomi_2 = "Fff:DBF0R", -- P6+
  1016. fcomip_1 = "ff:DFF0r", -- P6+
  1017. fcomip_2 = "Fff:DFF0R", -- P6+
  1018. fucompp_0 = "DAE9",
  1019. fcompp_0 = "DED9",
  1020. fldenv_1 = "x.:D94m",
  1021. fnstenv_1 = "x.:D96m",
  1022. fstenv_1 = "x.:9BD96m",
  1023. fldcw_1 = "xw:nD95m",
  1024. fstcw_1 = "xw:n9BD97m",
  1025. fnstcw_1 = "xw:nD97m",
  1026. fstsw_1 = "Rw:n9BDFE0|xw:n9BDD7m",
  1027. fnstsw_1 = "Rw:nDFE0|xw:nDD7m",
  1028. fclex_0 = "9BDBE2",
  1029. fnclex_0 = "DBE2",
  1030. fnop_0 = "D9D0",
  1031. -- D9D1-D9DF: unassigned
  1032. fchs_0 = "D9E0",
  1033. fabs_0 = "D9E1",
  1034. -- D9E2: unassigned
  1035. -- D9E3: unassigned
  1036. ftst_0 = "D9E4",
  1037. fxam_0 = "D9E5",
  1038. -- D9E6: unassigned
  1039. -- D9E7: unassigned
  1040. fld1_0 = "D9E8",
  1041. fldl2t_0 = "D9E9",
  1042. fldl2e_0 = "D9EA",
  1043. fldpi_0 = "D9EB",
  1044. fldlg2_0 = "D9EC",
  1045. fldln2_0 = "D9ED",
  1046. fldz_0 = "D9EE",
  1047. -- D9EF: unassigned
  1048. f2xm1_0 = "D9F0",
  1049. fyl2x_0 = "D9F1",
  1050. fptan_0 = "D9F2",
  1051. fpatan_0 = "D9F3",
  1052. fxtract_0 = "D9F4",
  1053. fprem1_0 = "D9F5",
  1054. fdecstp_0 = "D9F6",
  1055. fincstp_0 = "D9F7",
  1056. fprem_0 = "D9F8",
  1057. fyl2xp1_0 = "D9F9",
  1058. fsqrt_0 = "D9FA",
  1059. fsincos_0 = "D9FB",
  1060. frndint_0 = "D9FC",
  1061. fscale_0 = "D9FD",
  1062. fsin_0 = "D9FE",
  1063. fcos_0 = "D9FF",
  1064. -- SSE, SSE2
  1065. andnpd_2 = "rmo:660F55rM",
  1066. andnps_2 = "rmo:0F55rM",
  1067. andpd_2 = "rmo:660F54rM",
  1068. andps_2 = "rmo:0F54rM",
  1069. clflush_1 = "x.:0FAE7m",
  1070. cmppd_3 = "rmio:660FC2rMU",
  1071. cmpps_3 = "rmio:0FC2rMU",
  1072. cmpsd_3 = "rrio:F20FC2rMU|rxi/oq:",
  1073. cmpss_3 = "rrio:F30FC2rMU|rxi/od:",
  1074. comisd_2 = "rro:660F2FrM|rx/oq:",
  1075. comiss_2 = "rro:0F2FrM|rx/od:",
  1076. cvtdq2pd_2 = "rro:F30FE6rM|rx/oq:",
  1077. cvtdq2ps_2 = "rmo:0F5BrM",
  1078. cvtpd2dq_2 = "rmo:F20FE6rM",
  1079. cvtpd2ps_2 = "rmo:660F5ArM",
  1080. cvtpi2pd_2 = "rx/oq:660F2ArM",
  1081. cvtpi2ps_2 = "rx/oq:0F2ArM",
  1082. cvtps2dq_2 = "rmo:660F5BrM",
  1083. cvtps2pd_2 = "rro:0F5ArM|rx/oq:",
  1084. cvtsd2si_2 = "rr/do:F20F2DrM|rr/qo:|rx/dq:|rxq:",
  1085. cvtsd2ss_2 = "rro:F20F5ArM|rx/oq:",
  1086. cvtsi2sd_2 = "rm/od:F20F2ArM|rm/oq:F20F2ArXM",
  1087. cvtsi2ss_2 = "rm/od:F30F2ArM|rm/oq:F30F2ArXM",
  1088. cvtss2sd_2 = "rro:F30F5ArM|rx/od:",
  1089. cvtss2si_2 = "rr/do:F20F2CrM|rr/qo:|rxd:|rx/qd:",
  1090. cvttpd2dq_2 = "rmo:660FE6rM",
  1091. cvttps2dq_2 = "rmo:F30F5BrM",
  1092. cvttsd2si_2 = "rr/do:F20F2CrM|rr/qo:|rx/dq:|rxq:",
  1093. cvttss2si_2 = "rr/do:F30F2CrM|rr/qo:|rxd:|rx/qd:",
  1094. fxsave_1 = "x.:0FAE0m",
  1095. fxrstor_1 = "x.:0FAE1m",
  1096. ldmxcsr_1 = "xd:0FAE2m",
  1097. lfence_0 = "0FAEE8",
  1098. maskmovdqu_2 = "rro:660FF7rM",
  1099. mfence_0 = "0FAEF0",
  1100. movapd_2 = "rmo:660F28rM|mro:660F29Rm",
  1101. movaps_2 = "rmo:0F28rM|mro:0F29Rm",
  1102. movd_2 = "rm/od:660F6ErM|rm/oq:660F6ErXM|mr/do:660F7ERm|mr/qo:",
  1103. movdqa_2 = "rmo:660F6FrM|mro:660F7FRm",
  1104. movdqu_2 = "rmo:F30F6FrM|mro:F30F7FRm",
  1105. movhlps_2 = "rro:0F12rM",
  1106. movhpd_2 = "rx/oq:660F16rM|xr/qo:n660F17Rm",
  1107. movhps_2 = "rx/oq:0F16rM|xr/qo:n0F17Rm",
  1108. movlhps_2 = "rro:0F16rM",
  1109. movlpd_2 = "rx/oq:660F12rM|xr/qo:n660F13Rm",
  1110. movlps_2 = "rx/oq:0F12rM|xr/qo:n0F13Rm",
  1111. movmskpd_2 = "rr/do:660F50rM",
  1112. movmskps_2 = "rr/do:0F50rM",
  1113. movntdq_2 = "xro:660FE7Rm",
  1114. movnti_2 = "xrqd:0FC3Rm",
  1115. movntpd_2 = "xro:660F2BRm",
  1116. movntps_2 = "xro:0F2BRm",
  1117. movq_2 = "rro:F30F7ErM|rx/oq:|xr/qo:n660FD6Rm",
  1118. movsd_2 = "rro:F20F10rM|rx/oq:|xr/qo:nF20F11Rm",
  1119. movss_2 = "rro:F30F10rM|rx/od:|xr/do:F30F11Rm",
  1120. movupd_2 = "rmo:660F10rM|mro:660F11Rm",
  1121. movups_2 = "rmo:0F10rM|mro:0F11Rm",
  1122. orpd_2 = "rmo:660F56rM",
  1123. orps_2 = "rmo:0F56rM",
  1124. packssdw_2 = "rmo:660F6BrM",
  1125. packsswb_2 = "rmo:660F63rM",
  1126. packuswb_2 = "rmo:660F67rM",
  1127. paddb_2 = "rmo:660FFCrM",
  1128. paddd_2 = "rmo:660FFErM",
  1129. paddq_2 = "rmo:660FD4rM",
  1130. paddsb_2 = "rmo:660FECrM",
  1131. paddsw_2 = "rmo:660FEDrM",
  1132. paddusb_2 = "rmo:660FDCrM",
  1133. paddusw_2 = "rmo:660FDDrM",
  1134. paddw_2 = "rmo:660FFDrM",
  1135. pand_2 = "rmo:660FDBrM",
  1136. pandn_2 = "rmo:660FDFrM",
  1137. pause_0 = "F390",
  1138. pavgb_2 = "rmo:660FE0rM",
  1139. pavgw_2 = "rmo:660FE3rM",
  1140. pcmpeqb_2 = "rmo:660F74rM",
  1141. pcmpeqd_2 = "rmo:660F76rM",
  1142. pcmpeqw_2 = "rmo:660F75rM",
  1143. pcmpgtb_2 = "rmo:660F64rM",
  1144. pcmpgtd_2 = "rmo:660F66rM",
  1145. pcmpgtw_2 = "rmo:660F65rM",
  1146. pextrw_3 = "rri/do:660FC5rMU|xri/wo:660F3A15nrMU", -- Mem op: SSE4.1 only.
  1147. pinsrw_3 = "rri/od:660FC4rMU|rxi/ow:",
  1148. pmaddwd_2 = "rmo:660FF5rM",
  1149. pmaxsw_2 = "rmo:660FEErM",
  1150. pmaxub_2 = "rmo:660FDErM",
  1151. pminsw_2 = "rmo:660FEArM",
  1152. pminub_2 = "rmo:660FDArM",
  1153. pmovmskb_2 = "rr/do:660FD7rM",
  1154. pmulhuw_2 = "rmo:660FE4rM",
  1155. pmulhw_2 = "rmo:660FE5rM",
  1156. pmullw_2 = "rmo:660FD5rM",
  1157. pmuludq_2 = "rmo:660FF4rM",
  1158. por_2 = "rmo:660FEBrM",
  1159. prefetchnta_1 = "xb:n0F180m",
  1160. prefetcht0_1 = "xb:n0F181m",
  1161. prefetcht1_1 = "xb:n0F182m",
  1162. prefetcht2_1 = "xb:n0F183m",
  1163. psadbw_2 = "rmo:660FF6rM",
  1164. pshufd_3 = "rmio:660F70rMU",
  1165. pshufhw_3 = "rmio:F30F70rMU",
  1166. pshuflw_3 = "rmio:F20F70rMU",
  1167. pslld_2 = "rmo:660FF2rM|rio:660F726mU",
  1168. pslldq_2 = "rio:660F737mU",
  1169. psllq_2 = "rmo:660FF3rM|rio:660F736mU",
  1170. psllw_2 = "rmo:660FF1rM|rio:660F716mU",
  1171. psrad_2 = "rmo:660FE2rM|rio:660F724mU",
  1172. psraw_2 = "rmo:660FE1rM|rio:660F714mU",
  1173. psrld_2 = "rmo:660FD2rM|rio:660F722mU",
  1174. psrldq_2 = "rio:660F733mU",
  1175. psrlq_2 = "rmo:660FD3rM|rio:660F732mU",
  1176. psrlw_2 = "rmo:660FD1rM|rio:660F712mU",
  1177. psubb_2 = "rmo:660FF8rM",
  1178. psubd_2 = "rmo:660FFArM",
  1179. psubq_2 = "rmo:660FFBrM",
  1180. psubsb_2 = "rmo:660FE8rM",
  1181. psubsw_2 = "rmo:660FE9rM",
  1182. psubusb_2 = "rmo:660FD8rM",
  1183. psubusw_2 = "rmo:660FD9rM",
  1184. psubw_2 = "rmo:660FF9rM",
  1185. punpckhbw_2 = "rmo:660F68rM",
  1186. punpckhdq_2 = "rmo:660F6ArM",
  1187. punpckhqdq_2 = "rmo:660F6DrM",
  1188. punpckhwd_2 = "rmo:660F69rM",
  1189. punpcklbw_2 = "rmo:660F60rM",
  1190. punpckldq_2 = "rmo:660F62rM",
  1191. punpcklqdq_2 = "rmo:660F6CrM",
  1192. punpcklwd_2 = "rmo:660F61rM",
  1193. pxor_2 = "rmo:660FEFrM",
  1194. rcpps_2 = "rmo:0F53rM",
  1195. rcpss_2 = "rro:F30F53rM|rx/od:",
  1196. rsqrtps_2 = "rmo:0F52rM",
  1197. rsqrtss_2 = "rmo:F30F52rM",
  1198. sfence_0 = "0FAEF8",
  1199. shufpd_3 = "rmio:660FC6rMU",
  1200. shufps_3 = "rmio:0FC6rMU",
  1201. stmxcsr_1 = "xd:0FAE3m",
  1202. ucomisd_2 = "rro:660F2ErM|rx/oq:",
  1203. ucomiss_2 = "rro:0F2ErM|rx/od:",
  1204. unpckhpd_2 = "rmo:660F15rM",
  1205. unpckhps_2 = "rmo:0F15rM",
  1206. unpcklpd_2 = "rmo:660F14rM",
  1207. unpcklps_2 = "rmo:0F14rM",
  1208. xorpd_2 = "rmo:660F57rM",
  1209. xorps_2 = "rmo:0F57rM",
  1210. -- SSE3 ops
  1211. fisttp_1 = "xw:nDF1m|xd:DB1m|xq:nDD1m",
  1212. addsubpd_2 = "rmo:660FD0rM",
  1213. addsubps_2 = "rmo:F20FD0rM",
  1214. haddpd_2 = "rmo:660F7CrM",
  1215. haddps_2 = "rmo:F20F7CrM",
  1216. hsubpd_2 = "rmo:660F7DrM",
  1217. hsubps_2 = "rmo:F20F7DrM",
  1218. lddqu_2 = "rxo:F20FF0rM",
  1219. movddup_2 = "rmo:F20F12rM",
  1220. movshdup_2 = "rmo:F30F16rM",
  1221. movsldup_2 = "rmo:F30F12rM",
  1222. -- SSSE3 ops
  1223. pabsb_2 = "rmo:660F381CrM",
  1224. pabsd_2 = "rmo:660F381ErM",
  1225. pabsw_2 = "rmo:660F381DrM",
  1226. palignr_3 = "rmio:660F3A0FrMU",
  1227. phaddd_2 = "rmo:660F3802rM",
  1228. phaddsw_2 = "rmo:660F3803rM",
  1229. phaddw_2 = "rmo:660F3801rM",
  1230. phsubd_2 = "rmo:660F3806rM",
  1231. phsubsw_2 = "rmo:660F3807rM",
  1232. phsubw_2 = "rmo:660F3805rM",
  1233. pmaddubsw_2 = "rmo:660F3804rM",
  1234. pmulhrsw_2 = "rmo:660F380BrM",
  1235. pshufb_2 = "rmo:660F3800rM",
  1236. psignb_2 = "rmo:660F3808rM",
  1237. psignd_2 = "rmo:660F380ArM",
  1238. psignw_2 = "rmo:660F3809rM",
  1239. -- SSE4.1 ops
  1240. blendpd_3 = "rmio:660F3A0DrMU",
  1241. blendps_3 = "rmio:660F3A0CrMU",
  1242. blendvpd_3 = "rmRo:660F3815rM",
  1243. blendvps_3 = "rmRo:660F3814rM",
  1244. dppd_3 = "rmio:660F3A41rMU",
  1245. dpps_3 = "rmio:660F3A40rMU",
  1246. extractps_3 = "mri/do:660F3A17RmU|rri/qo:660F3A17RXmU",
  1247. insertps_3 = "rrio:660F3A41rMU|rxi/od:",
  1248. movntdqa_2 = "rmo:660F382ArM",
  1249. mpsadbw_3 = "rmio:660F3A42rMU",
  1250. packusdw_2 = "rmo:660F382BrM",
  1251. pblendvb_3 = "rmRo:660F3810rM",
  1252. pblendw_3 = "rmio:660F3A0ErMU",
  1253. pcmpeqq_2 = "rmo:660F3829rM",
  1254. pextrb_3 = "rri/do:660F3A14nRmU|rri/qo:|xri/bo:",
  1255. pextrd_3 = "mri/do:660F3A16RmU",
  1256. pextrq_3 = "mri/qo:660F3A16RmU",
  1257. -- pextrw is SSE2, mem operand is SSE4.1 only
  1258. phminposuw_2 = "rmo:660F3841rM",
  1259. pinsrb_3 = "rri/od:660F3A20nrMU|rxi/ob:",
  1260. pinsrd_3 = "rmi/od:660F3A22rMU",
  1261. pinsrq_3 = "rmi/oq:660F3A22rXMU",
  1262. pmaxsb_2 = "rmo:660F383CrM",
  1263. pmaxsd_2 = "rmo:660F383DrM",
  1264. pmaxud_2 = "rmo:660F383FrM",
  1265. pmaxuw_2 = "rmo:660F383ErM",
  1266. pminsb_2 = "rmo:660F3838rM",
  1267. pminsd_2 = "rmo:660F3839rM",
  1268. pminud_2 = "rmo:660F383BrM",
  1269. pminuw_2 = "rmo:660F383ArM",
  1270. pmovsxbd_2 = "rro:660F3821rM|rx/od:",
  1271. pmovsxbq_2 = "rro:660F3822rM|rx/ow:",
  1272. pmovsxbw_2 = "rro:660F3820rM|rx/oq:",
  1273. pmovsxdq_2 = "rro:660F3825rM|rx/oq:",
  1274. pmovsxwd_2 = "rro:660F3823rM|rx/oq:",
  1275. pmovsxwq_2 = "rro:660F3824rM|rx/od:",
  1276. pmovzxbd_2 = "rro:660F3831rM|rx/od:",
  1277. pmovzxbq_2 = "rro:660F3832rM|rx/ow:",
  1278. pmovzxbw_2 = "rro:660F3830rM|rx/oq:",
  1279. pmovzxdq_2 = "rro:660F3835rM|rx/oq:",
  1280. pmovzxwd_2 = "rro:660F3833rM|rx/oq:",
  1281. pmovzxwq_2 = "rro:660F3834rM|rx/od:",
  1282. pmuldq_2 = "rmo:660F3828rM",
  1283. pmulld_2 = "rmo:660F3840rM",
  1284. ptest_2 = "rmo:660F3817rM",
  1285. roundpd_3 = "rmio:660F3A09rMU",
  1286. roundps_3 = "rmio:660F3A08rMU",
  1287. roundsd_3 = "rrio:660F3A0BrMU|rxi/oq:",
  1288. roundss_3 = "rrio:660F3A0ArMU|rxi/od:",
  1289. -- SSE4.2 ops
  1290. crc32_2 = "rmqd:F20F38F1rM|rm/dw:66F20F38F1rM|rm/db:F20F38F0rM|rm/qb:",
  1291. pcmpestri_3 = "rmio:660F3A61rMU",
  1292. pcmpestrm_3 = "rmio:660F3A60rMU",
  1293. pcmpgtq_2 = "rmo:660F3837rM",
  1294. pcmpistri_3 = "rmio:660F3A63rMU",
  1295. pcmpistrm_3 = "rmio:660F3A62rMU",
  1296. popcnt_2 = "rmqdw:F30FB8rM",
  1297. -- SSE4a
  1298. extrq_2 = "rro:660F79rM",
  1299. extrq_3 = "riio:660F780mUU",
  1300. insertq_2 = "rro:F20F79rM",
  1301. insertq_4 = "rriio:F20F78rMUU",
  1302. lzcnt_2 = "rmqdw:F30FBDrM",
  1303. movntsd_2 = "xr/qo:nF20F2BRm",
  1304. movntss_2 = "xr/do:F30F2BRm",
  1305. -- popcnt is also in SSE4.2
  1306. }
  1307. ------------------------------------------------------------------------------
  1308. -- Arithmetic ops.
  1309. for name,n in pairs{ add = 0, ["or"] = 1, adc = 2, sbb = 3,
  1310. ["and"] = 4, sub = 5, xor = 6, cmp = 7 } do
  1311. local n8 = shl(n, 3)
  1312. map_op[name.."_2"] = format(
  1313. "mr:%02XRm|rm:%02XrM|mI1qdw:81%XmI|mS1qdw:83%XmS|Ri1qdwb:%02Xri|mi1qdwb:81%Xmi",
  1314. 1+n8, 3+n8, n, n, 5+n8, n)
  1315. end
  1316. -- Shift ops.
  1317. for name,n in pairs{ rol = 0, ror = 1, rcl = 2, rcr = 3,
  1318. shl = 4, shr = 5, sar = 7, sal = 4 } do
  1319. map_op[name.."_2"] = format("m1:D1%Xm|mC1qdwb:D3%Xm|mi:C1%XmU", n, n, n)
  1320. end
  1321. -- Conditional ops.
  1322. for cc,n in pairs(map_cc) do
  1323. map_op["j"..cc.."_1"] = format("J.:n0F8%XJ", n) -- short: 7%X
  1324. map_op["set"..cc.."_1"] = format("mb:n0F9%X2m", n)
  1325. map_op["cmov"..cc.."_2"] = format("rmqdw:0F4%XrM", n) -- P6+
  1326. end
  1327. -- FP arithmetic ops.
  1328. for name,n in pairs{ add = 0, mul = 1, com = 2, comp = 3,
  1329. sub = 4, subr = 5, div = 6, divr = 7 } do
  1330. local nc = 0xc0 + shl(n, 3)
  1331. local nr = nc + (n < 4 and 0 or (n % 2 == 0 and 8 or -8))
  1332. local fn = "f"..name
  1333. map_op[fn.."_1"] = format("ff:D8%02Xr|xd:D8%Xm|xq:nDC%Xm", nc, n, n)
  1334. if n == 2 or n == 3 then
  1335. map_op[fn.."_2"] = format("Fff:D8%02XR|Fx2d:D8%XM|Fx2q:nDC%XM", nc, n, n)
  1336. else
  1337. map_op[fn.."_2"] = format("Fff:D8%02XR|fFf:DC%02Xr|Fx2d:D8%XM|Fx2q:nDC%XM", nc, nr, n, n)
  1338. map_op[fn.."p_1"] = format("ff:DE%02Xr", nr)
  1339. map_op[fn.."p_2"] = format("fFf:DE%02Xr", nr)
  1340. end
  1341. map_op["fi"..name.."_1"] = format("xd:DA%Xm|xw:nDE%Xm", n, n)
  1342. end
  1343. -- FP conditional moves.
  1344. for cc,n in pairs{ b=0, e=1, be=2, u=3, nb=4, ne=5, nbe=6, nu=7 } do
  1345. local nc = 0xdac0 + shl(band(n, 3), 3) + shl(band(n, 4), 6)
  1346. map_op["fcmov"..cc.."_1"] = format("ff:%04Xr", nc) -- P6+
  1347. map_op["fcmov"..cc.."_2"] = format("Fff:%04XR", nc) -- P6+
  1348. end
  1349. -- SSE FP arithmetic ops.
  1350. for name,n in pairs{ sqrt = 1, add = 8, mul = 9,
  1351. sub = 12, min = 13, div = 14, max = 15 } do
  1352. map_op[name.."ps_2"] = format("rmo:0F5%XrM", n)
  1353. map_op[name.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n)
  1354. map_op[name.."pd_2"] = format("rmo:660F5%XrM", n)
  1355. map_op[name.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n)
  1356. end
  1357. ------------------------------------------------------------------------------
  1358. -- Process pattern string.
  1359. local function dopattern(pat, args, sz, op, needrex)
  1360. local digit, addin
  1361. local opcode = 0
  1362. local szov = sz
  1363. local narg = 1
  1364. local rex = 0
  1365. -- Limit number of section buffer positions used by a single dasm_put().
  1366. -- A single opcode needs a maximum of 5 positions.
  1367. if secpos+5 > maxsecpos then wflush() end
  1368. -- Process each character.
  1369. for c in gmatch(pat.."|", ".") do
  1370. if match(c, "%x") then -- Hex digit.
  1371. digit = byte(c) - 48
  1372. if digit > 48 then digit = digit - 39
  1373. elseif digit > 16 then digit = digit - 7 end
  1374. opcode = opcode*16 + digit
  1375. addin = nil
  1376. elseif c == "n" then -- Disable operand size mods for opcode.
  1377. szov = nil
  1378. elseif c == "X" then -- Force REX.W.
  1379. rex = 8
  1380. elseif c == "r" then -- Merge 1st operand regno. into opcode.
  1381. addin = args[1]; opcode = opcode + (addin.reg % 8)
  1382. if narg < 2 then narg = 2 end
  1383. elseif c == "R" then -- Merge 2nd operand regno. into opcode.
  1384. addin = args[2]; opcode = opcode + (addin.reg % 8)
  1385. narg = 3
  1386. elseif c == "m" or c == "M" then -- Encode ModRM/SIB.
  1387. local s
  1388. if addin then
  1389. s = addin.reg
  1390. opcode = opcode - band(s, 7) -- Undo regno opcode merge.
  1391. else
  1392. s = band(opcode, 15) -- Undo last digit.
  1393. opcode = shr(opcode, 4)
  1394. end
  1395. local nn = c == "m" and 1 or 2
  1396. local t = args[nn]
  1397. if narg <= nn then narg = nn + 1 end
  1398. if szov == "q" and rex == 0 then rex = rex + 8 end
  1399. if t.reg and t.reg > 7 then rex = rex + 1 end
  1400. if t.xreg and t.xreg > 7 then rex = rex + 2 end
  1401. if s > 7 then rex = rex + 4 end
  1402. if needrex then rex = rex + 16 end
  1403. wputop(szov, opcode, rex); opcode = nil
  1404. local imark = sub(pat, -1) -- Force a mark (ugly).
  1405. -- Put ModRM/SIB with regno/last digit as spare.
  1406. wputmrmsib(t, imark, s, addin and addin.vreg)
  1407. addin = nil
  1408. else
  1409. if opcode then -- Flush opcode.
  1410. if szov == "q" and rex == 0 then rex = rex + 8 end
  1411. if needrex then rex = rex + 16 end
  1412. if addin and addin.reg == -1 then
  1413. wputop(szov, opcode - 7, rex)
  1414. waction("VREG", addin.vreg); wputxb(0)
  1415. else
  1416. if addin and addin.reg > 7 then rex = rex + 1 end
  1417. wputop(szov, opcode, rex)
  1418. end
  1419. opcode = nil
  1420. end
  1421. if c == "|" then break end
  1422. if c == "o" then -- Offset (pure 32 bit displacement).
  1423. wputdarg(args[1].disp); if narg < 2 then narg = 2 end
  1424. elseif c == "O" then
  1425. wputdarg(args[2].disp); narg = 3
  1426. else
  1427. -- Anything else is an immediate operand.
  1428. local a = args[narg]
  1429. narg = narg + 1
  1430. local mode, imm = a.mode, a.imm
  1431. if mode == "iJ" and not match("iIJ", c) then
  1432. werror("bad operand size for label")
  1433. end
  1434. if c == "S" then
  1435. wputsbarg(imm)
  1436. elseif c == "U" then
  1437. wputbarg(imm)
  1438. elseif c == "W" then
  1439. wputwarg(imm)
  1440. elseif c == "i" or c == "I" then
  1441. if mode == "iJ" then
  1442. wputlabel("IMM_", imm, 1)
  1443. elseif mode == "iI" and c == "I" then
  1444. waction(sz == "w" and "IMM_WB" or "IMM_DB", imm)
  1445. else
  1446. wputszarg(sz, imm)
  1447. end
  1448. elseif c == "J" then
  1449. if mode == "iPJ" then
  1450. waction("REL_A", imm) -- !x64 (secpos)
  1451. else
  1452. wputlabel("REL_", imm, 2)
  1453. end
  1454. else
  1455. werror("bad char `"..c.."' in pattern `"..pat.."' for `"..op.."'")
  1456. end
  1457. end
  1458. end
  1459. end
  1460. end
  1461. ------------------------------------------------------------------------------
  1462. -- Mapping of operand modes to short names. Suppress output with '#'.
  1463. local map_modename = {
  1464. r = "reg", R = "eax", C = "cl", x = "mem", m = "mrm", i = "imm",
  1465. f = "stx", F = "st0", J = "lbl", ["1"] = "1",
  1466. I = "#", S = "#", O = "#",
  1467. }
  1468. -- Return a table/string showing all possible operand modes.
  1469. local function templatehelp(template, nparams)
  1470. if nparams == 0 then return "" end
  1471. local t = {}
  1472. for tm in gmatch(template, "[^%|]+") do
  1473. local s = map_modename[sub(tm, 1, 1)]
  1474. s = s..gsub(sub(tm, 2, nparams), ".", function(c)
  1475. return ", "..map_modename[c]
  1476. end)
  1477. if not match(s, "#") then t[#t+1] = s end
  1478. end
  1479. return t
  1480. end
  1481. -- Match operand modes against mode match part of template.
  1482. local function matchtm(tm, args)
  1483. for i=1,#args do
  1484. if not match(args[i].mode, sub(tm, i, i)) then return end
  1485. end
  1486. return true
  1487. end
  1488. -- Handle opcodes defined with template strings.
  1489. map_op[".template__"] = function(params, template, nparams)
  1490. if not params then return templatehelp(template, nparams) end
  1491. local args = {}
  1492. -- Zero-operand opcodes have no match part.
  1493. if #params == 0 then
  1494. dopattern(template, args, "d", params.op, nil)
  1495. return
  1496. end
  1497. -- Determine common operand size (coerce undefined size) or flag as mixed.
  1498. local sz, szmix, needrex
  1499. for i,p in ipairs(params) do
  1500. args[i] = parseoperand(p)
  1501. local nsz = args[i].opsize
  1502. if nsz then
  1503. if sz and sz ~= nsz then szmix = true else sz = nsz end
  1504. end
  1505. local nrex = args[i].needrex
  1506. if nrex ~= nil then
  1507. if needrex == nil then
  1508. needrex = nrex
  1509. elseif needrex ~= nrex then
  1510. werror("bad mix of byte-addressable registers")
  1511. end
  1512. end
  1513. end
  1514. -- Try all match:pattern pairs (separated by '|').
  1515. local gotmatch, lastpat
  1516. for tm in gmatch(template, "[^%|]+") do
  1517. -- Split off size match (starts after mode match) and pattern string.
  1518. local szm, pat = match(tm, "^(.-):(.*)$", #args+1)
  1519. if pat == "" then pat = lastpat else lastpat = pat end
  1520. if matchtm(tm, args) then
  1521. local prefix = sub(szm, 1, 1)
  1522. if prefix == "/" then -- Match both operand sizes.
  1523. if args[1].opsize == sub(szm, 2, 2) and
  1524. args[2].opsize == sub(szm, 3, 3) then
  1525. dopattern(pat, args, sz, params.op, needrex) -- Process pattern.
  1526. return
  1527. end
  1528. else -- Match common operand size.
  1529. local szp = sz
  1530. if szm == "" then szm = x64 and "qdwb" or "dwb" end -- Default sizes.
  1531. if prefix == "1" then szp = args[1].opsize; szmix = nil
  1532. elseif prefix == "2" then szp = args[2].opsize; szmix = nil end
  1533. if not szmix and (prefix == "." or match(szm, szp or "#")) then
  1534. dopattern(pat, args, szp, params.op, needrex) -- Process pattern.
  1535. return
  1536. end
  1537. end
  1538. gotmatch = true
  1539. end
  1540. end
  1541. local msg = "bad operand mode"
  1542. if gotmatch then
  1543. if szmix then
  1544. msg = "mixed operand size"
  1545. else
  1546. msg = sz and "bad operand size" or "missing operand size"
  1547. end
  1548. end
  1549. werror(msg.." in `"..opmodestr(params.op, args).."'")
  1550. end
  1551. ------------------------------------------------------------------------------
  1552. -- x64-specific opcode for 64 bit immediates and displacements.
  1553. if x64 then
  1554. function map_op.mov64_2(params)
  1555. if not params then return { "reg, imm", "reg, [disp]", "[disp], reg" } end
  1556. if secpos+2 > maxsecpos then wflush() end
  1557. local opcode, op64, sz, rex, vreg
  1558. local op64 = match(params[1], "^%[%s*(.-)%s*%]$")
  1559. if op64 then
  1560. local a = parseoperand(params[2])
  1561. if a.mode ~= "rmR" then werror("bad operand mode") end
  1562. sz = a.opsize
  1563. rex = sz == "q" and 8 or 0
  1564. opcode = 0xa3
  1565. else
  1566. op64 = match(params[2], "^%[%s*(.-)%s*%]$")
  1567. local a = parseoperand(params[1])
  1568. if op64 then
  1569. if a.mode ~= "rmR" then werror("bad operand mode") end
  1570. sz = a.opsize
  1571. rex = sz == "q" and 8 or 0
  1572. opcode = 0xa1
  1573. else
  1574. if sub(a.mode, 1, 1) ~= "r" or a.opsize ~= "q" then
  1575. werror("bad operand mode")
  1576. end
  1577. op64 = params[2]
  1578. if a.reg == -1 then
  1579. vreg = a.vreg
  1580. opcode = 0xb8
  1581. else
  1582. opcode = 0xb8 + band(a.reg, 7)
  1583. end
  1584. rex = a.reg > 7 and 9 or 8
  1585. end
  1586. end
  1587. wputop(sz, opcode, rex)
  1588. if vreg then waction("VREG", vreg); wputxb(0) end
  1589. waction("IMM_D", format("(unsigned int)(%s)", op64))
  1590. waction("IMM_D", format("(unsigned int)((%s)>>32)", op64))
  1591. end
  1592. end
  1593. ------------------------------------------------------------------------------
  1594. -- Pseudo-opcodes for data storage.
  1595. local function op_data(params)
  1596. if not params then return "imm..." end
  1597. local sz = sub(params.op, 2, 2)
  1598. if sz == "a" then sz = addrsize end
  1599. for _,p in ipairs(params) do
  1600. local a = parseoperand(p)
  1601. if sub(a.mode, 1, 1) ~= "i" or (a.opsize and a.opsize ~= sz) then
  1602. werror("bad mode or size in `"..p.."'")
  1603. end
  1604. if a.mode == "iJ" then
  1605. wputlabel("IMM_", a.imm, 1)
  1606. else
  1607. wputszarg(sz, a.imm)
  1608. end
  1609. if secpos+2 > maxsecpos then wflush() end
  1610. end
  1611. end
  1612. map_op[".byte_*"] = op_data
  1613. map_op[".sbyte_*"] = op_data
  1614. map_op[".word_*"] = op_data
  1615. map_op[".dword_*"] = op_data
  1616. map_op[".aword_*"] = op_data
  1617. ------------------------------------------------------------------------------
  1618. -- Pseudo-opcode to mark the position where the action list is to be emitted.
  1619. map_op[".actionlist_1"] = function(params)
  1620. if not params then return "cvar" end
  1621. local name = params[1] -- No syntax check. You get to keep the pieces.
  1622. wline(function(out) writeactions(out, name) end)
  1623. end
  1624. -- Pseudo-opcode to mark the position where the global enum is to be emitted.
  1625. map_op[".globals_1"] = function(params)
  1626. if not params then return "prefix" end
  1627. local prefix = params[1] -- No syntax check. You get to keep the pieces.
  1628. wline(function(out) writeglobals(out, prefix) end)
  1629. end
  1630. -- Pseudo-opcode to mark the position where the global names are to be emitted.
  1631. map_op[".globalnames_1"] = function(params)
  1632. if not params then return "cvar" end
  1633. local name = params[1] -- No syntax check. You get to keep the pieces.
  1634. wline(function(out) writeglobalnames(out, name) end)
  1635. end
  1636. -- Pseudo-opcode to mark the position where the extern names are to be emitted.
  1637. map_op[".externnames_1"] = function(params)
  1638. if not params then return "cvar" end
  1639. local name = params[1] -- No syntax check. You get to keep the pieces.
  1640. wline(function(out) writeexternnames(out, name) end)
  1641. end
  1642. ------------------------------------------------------------------------------
  1643. -- Label pseudo-opcode (converted from trailing colon form).
  1644. map_op[".label_2"] = function(params)
  1645. if not params then return "[1-9] | ->global | =>pcexpr [, addr]" end
  1646. if secpos+2 > maxsecpos then wflush() end
  1647. local a = parseoperand(params[1])
  1648. local mode, imm = a.mode, a.imm
  1649. if type(imm) == "number" and (mode == "iJ" or (imm >= 1 and imm <= 9)) then
  1650. -- Local label (1: ... 9:) or global label (->global:).
  1651. waction("LABEL_LG", nil, 1)
  1652. wputxb(imm)
  1653. elseif mode == "iJ" then
  1654. -- PC label (=>pcexpr:).
  1655. waction("LABEL_PC", imm)
  1656. else
  1657. werror("bad label definition")
  1658. end
  1659. -- SETLABEL must immediately follow LABEL_LG/LABEL_PC.
  1660. local addr = params[2]
  1661. if addr then
  1662. local a = parseoperand(addr)
  1663. if a.mode == "iPJ" then
  1664. waction("SETLABEL", a.imm)
  1665. else
  1666. werror("bad label assignment")
  1667. end
  1668. end
  1669. end
  1670. map_op[".label_1"] = map_op[".label_2"]
  1671. ------------------------------------------------------------------------------
  1672. -- Alignment pseudo-opcode.
  1673. map_op[".align_1"] = function(params)
  1674. if not params then return "numpow2" end
  1675. if secpos+1 > maxsecpos then wflush() end
  1676. local align = tonumber(params[1]) or map_opsizenum[map_opsize[params[1]]]
  1677. if align then
  1678. local x = align
  1679. -- Must be a power of 2 in the range (2 ... 256).
  1680. for i=1,8 do
  1681. x = x / 2
  1682. if x == 1 then
  1683. waction("ALIGN", nil, 1)
  1684. wputxb(align-1) -- Action byte is 2**n-1.
  1685. return
  1686. end
  1687. end
  1688. end
  1689. werror("bad alignment")
  1690. end
  1691. -- Spacing pseudo-opcode.
  1692. map_op[".space_2"] = function(params)
  1693. if not params then return "num [, filler]" end
  1694. if secpos+1 > maxsecpos then wflush() end
  1695. waction("SPACE", params[1])
  1696. local fill = params[2]
  1697. if fill then
  1698. fill = tonumber(fill)
  1699. if not fill or fill < 0 or fill > 255 then werror("bad filler") end
  1700. end
  1701. wputxb(fill or 0)
  1702. end
  1703. map_op[".space_1"] = map_op[".space_2"]
  1704. ------------------------------------------------------------------------------
  1705. -- Pseudo-opcode for (primitive) type definitions (map to C types).
  1706. map_op[".type_3"] = function(params, nparams)
  1707. if not params then
  1708. return nparams == 2 and "name, ctype" or "name, ctype, reg"
  1709. end
  1710. local name, ctype, reg = params[1], params[2], params[3]
  1711. if not match(name, "^[%a_][%w_]*$") then
  1712. werror("bad type name `"..name.."'")
  1713. end
  1714. local tp = map_type[name]
  1715. if tp then
  1716. werror("duplicate type `"..name.."'")
  1717. end
  1718. if reg and not map_reg_valid_base[reg] then
  1719. werror("bad base register `"..(map_reg_rev[reg] or reg).."'")
  1720. end
  1721. -- Add #type to defines. A bit unclean to put it in map_archdef.
  1722. map_archdef["#"..name] = "sizeof("..ctype..")"
  1723. -- Add new type and emit shortcut define.
  1724. local num = ctypenum + 1
  1725. map_type[name] = {
  1726. ctype = ctype,
  1727. ctypefmt = format("Dt%X(%%s)", num),
  1728. reg = reg,
  1729. }
  1730. wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype))
  1731. ctypenum = num
  1732. end
  1733. map_op[".type_2"] = map_op[".type_3"]
  1734. -- Dump type definitions.
  1735. local function dumptypes(out, lvl)
  1736. local t = {}
  1737. for name in pairs(map_type) do t[#t+1] = name end
  1738. sort(t)
  1739. out:write("Type definitions:\n")
  1740. for _,name in ipairs(t) do
  1741. local tp = map_type[name]
  1742. local reg = tp.reg and map_reg_rev[tp.reg] or ""
  1743. out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg))
  1744. end
  1745. out:write("\n")
  1746. end
  1747. ------------------------------------------------------------------------------
  1748. -- Set the current section.
  1749. function _M.section(num)
  1750. waction("SECTION")
  1751. wputxb(num)
  1752. wflush(true) -- SECTION is a terminal action.
  1753. end
  1754. ------------------------------------------------------------------------------
  1755. -- Dump architecture description.
  1756. function _M.dumparch(out)
  1757. out:write(format("DynASM %s version %s, released %s\n\n",
  1758. _info.arch, _info.version, _info.release))
  1759. dumpregs(out)
  1760. dumpactions(out)
  1761. end
  1762. -- Dump all user defined elements.
  1763. function _M.dumpdef(out, lvl)
  1764. dumptypes(out, lvl)
  1765. dumpglobals(out, lvl)
  1766. dumpexterns(out, lvl)
  1767. end
  1768. ------------------------------------------------------------------------------
  1769. -- Pass callbacks from/to the DynASM core.
  1770. function _M.passcb(wl, we, wf, ww)
  1771. wline, werror, wfatal, wwarn = wl, we, wf, ww
  1772. return wflush
  1773. end
  1774. -- Setup the arch-specific module.
  1775. function _M.setup(arch, opt)
  1776. g_arch, g_opt = arch, opt
  1777. end
  1778. -- Merge the core maps and the arch-specific maps.
  1779. function _M.mergemaps(map_coreop, map_def)
  1780. setmetatable(map_op, { __index = map_coreop })
  1781. setmetatable(map_def, { __index = map_archdef })
  1782. return map_op, map_def
  1783. end
  1784. return _M
  1785. ------------------------------------------------------------------------------