il.txt 37 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195
  1. ===========================
  2. QBE Intermediate Language
  3. ===========================
  4. - Table of Contents
  5. -------------------
  6. 1. <@ Basic Concepts >
  7. * <@ Input Files >
  8. * <@ BNF Notation >
  9. * <@ Sigils >
  10. * <@ Spacing >
  11. 2. <@ Types >
  12. * <@ Simple Types >
  13. * <@ Subtyping >
  14. 3. <@ Constants and Vals >
  15. 4. <@ Linkage >
  16. 5. <@ Definitions >
  17. * <@ Aggregate Types >
  18. * <@ Data >
  19. * <@ Functions >
  20. 6. <@ Control >
  21. * <@ Blocks >
  22. * <@ Jumps >
  23. 7. <@ Instructions >
  24. * <@ Arithmetic and Bits >
  25. * <@ Memory >
  26. * <@ Comparisons >
  27. * <@ Conversions >
  28. * <@ Cast and Copy >
  29. * <@ Call >
  30. * <@ Variadic >
  31. * <@ Phi >
  32. 8. <@ Instructions Index >
  33. - 1. Basic Concepts
  34. -------------------
  35. The intermediate language (IL) is a higher-level language
  36. than the machine's assembly language. It smoothes most
  37. of the irregularities of the underlying hardware and
  38. allows an infinite number of temporaries to be used.
  39. This higher abstraction level lets frontend programmers
  40. focus on language design issues.
  41. ~ Input Files
  42. ~~~~~~~~~~~~~
  43. The intermediate language is provided to QBE as text.
  44. Usually, one file is generated per each compilation unit from
  45. the frontend input language. An IL file is a sequence of
  46. <@ Definitions > for data, functions, and types. Once
  47. processed by QBE, the resulting file can be assembled and
  48. linked using a standard toolchain (e.g., GNU binutils).
  49. Here is a complete "Hello World" IL file which defines a
  50. function that prints to the screen. Since the string is
  51. not a first class object (only the pointer is) it is
  52. defined outside the function's body. Comments start with
  53. a # character and finish with the end of the line.
  54. # Define the string constant.
  55. data $str = { b "hello world", b 0 }
  56. export function w $main() {
  57. @start
  58. # Call the puts function with $str as argument.
  59. %r =w call $puts(l $str)
  60. ret 0
  61. }
  62. If you have read the LLVM language reference, you might
  63. recognize the example above. In comparison, QBE makes a
  64. much lighter use of types and the syntax is terser.
  65. ~ BNF Notation
  66. ~~~~~~~~~~~~~~
  67. The language syntax is vaporously described in the sections
  68. below using BNF syntax. The different BNF constructs used
  69. are listed below.
  70. * Keywords are enclosed between quotes;
  71. * `... | ...` expresses alternatives;
  72. * `( ... )` groups syntax;
  73. * `[ ... ]` marks the nested syntax as optional;
  74. * `( ... ),` designates a comma-separated list of the
  75. enclosed syntax;
  76. * `...*` and `...+` are used for arbitrary and
  77. at-least-once repetition respectively.
  78. ~ Sigils
  79. ~~~~~~~~
  80. The intermediate language makes heavy use of sigils, all
  81. user-defined names are prefixed with a sigil. This is
  82. to avoid keyword conflicts, and also to quickly spot the
  83. scope and nature of identifiers.
  84. * `:` is for user-defined <@ Aggregate Types>
  85. * `$` is for globals (represented by a pointer)
  86. * `%` is for function-scope temporaries
  87. * `@` is for block labels
  88. In this BNF syntax, we use `?IDENT` to designate an identifier
  89. starting with the sigil `?`.
  90. ~ Spacing
  91. ~~~~~~~~~
  92. `bnf
  93. NL := '\n'+
  94. Individual tokens in IL files must be separated by one or
  95. more spacing characters. Both spaces and tabs are recognized
  96. as spacing characters. In data and type definitions, newlines
  97. may also be used as spaces to prevent overly long lines. When
  98. exactly one of two consecutive tokens is a symbol (for example
  99. `,` or `=` or `{`), spacing may be omitted.
  100. - 2. Types
  101. ----------
  102. ~ Simple Types
  103. ~~~~~~~~~~~~~~
  104. `bnf
  105. BASETY := 'w' | 'l' | 's' | 'd' # Base types
  106. EXTTY := BASETY | 'b' | 'h' # Extended types
  107. The IL makes minimal use of types. By design, the types
  108. used are restricted to what is necessary for unambiguous
  109. compilation to machine code and C interfacing. Unlike LLVM,
  110. QBE is not using types as a means to safety; they are only
  111. here for semantic purposes.
  112. The four base types are `w` (word), `l` (long), `s` (single),
  113. and `d` (double), they stand respectively for 32-bit and
  114. 64-bit integers, and 32-bit and 64-bit floating-point numbers.
  115. There are no pointer types available; pointers are typed
  116. by an integer type sufficiently wide to represent all memory
  117. addresses (e.g., `l` on 64-bit architectures). Temporaries
  118. in the IL can only have a base type.
  119. Extended types contain base types plus `b` (byte) and `h`
  120. (half word), respectively for 8-bit and 16-bit integers.
  121. They are used in <@ Aggregate Types> and <@ Data> definitions.
  122. For C interfacing, the IL also provides user-defined aggregate
  123. types as well as signed and unsigned variants of the sub-word
  124. extended types. Read more about these types in the
  125. <@ Aggregate Types > and <@ Functions > sections.
  126. ~ Subtyping
  127. ~~~~~~~~~~~
  128. The IL has a minimal subtyping feature, for integer types only.
  129. Any value of type `l` can be used in a `w` context. In that
  130. case, only the 32 least significant bits of the word value
  131. are used.
  132. Make note that it is the opposite of the usual subtyping on
  133. integers (in C, we can safely use an `int` where a `long`
  134. is expected). A long value cannot be used in word context.
  135. The rationale is that a word can be signed or unsigned, so
  136. extending it to a long could be done in two ways, either
  137. by zero-extension, or by sign-extension.
  138. - 3. Constants and Vals
  139. -----------------------
  140. `bnf
  141. CONST :=
  142. ['-'] NUMBER # Decimal integer
  143. | 's_' FP # Single-precision float
  144. | 'd_' FP # Double-precision float
  145. | $IDENT # Global symbol
  146. DYNCONST :=
  147. CONST
  148. | 'thread' $IDENT # Thread-local symbol
  149. VAL :=
  150. DYNCONST
  151. | %IDENT
  152. Constants come in two kinds: compile-time constants and
  153. dynamic constants. Dynamic constants include compile-time
  154. constants and other symbol variants that are only known at
  155. program-load time or execution time. Consequently, dynamic
  156. constants can only occur in function bodies.
  157. The representation of integers is two's complement.
  158. Floating-point numbers are represented using the
  159. single-precision and double-precision formats of the
  160. IEEE 754 standard.
  161. Constants specify a sequence of bits and are untyped.
  162. They are always parsed as 64-bit blobs. Depending on
  163. the context surrounding a constant, only some of its
  164. bits are used. For example, in the program below, the
  165. two variables defined have the same value since the first
  166. operand of the subtraction is a word (32-bit) context.
  167. %x =w sub -1, 0
  168. %y =w sub 4294967295, 0
  169. Because specifying floating-point constants by their bits
  170. makes the code less readable, syntactic sugar is provided
  171. to express them. Standard scientific notation is prefixed
  172. with `s_` and `d_` for single and double precision numbers
  173. respectively. Once again, the following example defines twice
  174. the same double-precision constant.
  175. %x =d add d_0, d_-1
  176. %y =d add d_0, -4616189618054758400
  177. Global symbols can also be used directly as constants;
  178. they will be resolved and turned into actual numeric
  179. constants by the linker.
  180. When the `thread` keyword prefixes a symbol name, the
  181. symbol's numeric value is resolved at runtime in the
  182. thread-local storage.
  183. Vals are used as arguments in regular, phi, and jump
  184. instructions within function definitions. They are
  185. either constants or function-scope temporaries.
  186. - 4. Linkage
  187. ------------
  188. `bnf
  189. LINKAGE :=
  190. 'export' [NL]
  191. | 'thread' [NL]
  192. | 'section' SECNAME [NL]
  193. | 'section' SECNAME SECFLAGS [NL]
  194. SECNAME := '"' .... '"'
  195. SECFLAGS := '"' .... '"'
  196. Function and data definitions (see below) can specify
  197. linkage information to be passed to the assembler and
  198. eventually to the linker.
  199. The `export` linkage flag marks the defined item as
  200. visible outside the current file's scope. If absent,
  201. the symbol can only be referred to locally. Functions
  202. compiled by QBE and called from C need to be exported.
  203. The `thread` linkage flag can only qualify data
  204. definitions. It mandates that the object defined is
  205. stored in thread-local storage. Each time a runtime
  206. thread starts, the supporting platform runtime is in
  207. charge of making a new copy of the object for the
  208. fresh thread. Objects in thread-local storage must
  209. be accessed using the `thread $IDENT` syntax, as
  210. specified in the <@ Constants and Vals > section.
  211. A `section` flag can be specified to tell the linker to
  212. put the defined item in a certain section. The use of
  213. the section flag is platform dependent and we refer the
  214. user to the documentation of their assembler and linker
  215. for relevant information.
  216. section ".init_array"
  217. data $.init.f = { l $f }
  218. The section flag can be used to add function pointers to
  219. a global initialization list, as depicted above. Note
  220. that some platforms provide a BSS section that can be
  221. used to minimize the footprint of uniformly zeroed data.
  222. When this section is available, QBE will automatically
  223. make use of it and no section flag is required.
  224. The section and export linkage flags should each appear
  225. at most once in a definition. If multiple occurrences
  226. are present, QBE is free to use any.
  227. - 5. Definitions
  228. ----------------
  229. Definitions are the essential components of an IL file.
  230. They can define three types of objects: aggregate types,
  231. data, and functions. Aggregate types are never exported
  232. and do not compile to any code. Data and function
  233. definitions have file scope and are mutually recursive
  234. (even across IL files). Their visibility can be controlled
  235. using linkage flags.
  236. ~ Aggregate Types
  237. ~~~~~~~~~~~~~~~~~
  238. `bnf
  239. TYPEDEF :=
  240. # Regular type
  241. 'type' :IDENT '=' ['align' NUMBER]
  242. '{'
  243. ( SUBTY [NUMBER] ),
  244. '}'
  245. | # Union type
  246. 'type' :IDENT '=' ['align' NUMBER]
  247. '{'
  248. (
  249. '{'
  250. ( SUBTY [NUMBER] ),
  251. '}'
  252. )+
  253. '}'
  254. | # Opaque type
  255. 'type' :IDENT '=' 'align' NUMBER '{' NUMBER '}'
  256. SUBTY := EXTTY | :IDENT
  257. Aggregate type definitions start with the `type` keyword.
  258. They have file scope, but types must be defined before being
  259. referenced. The inner structure of a type is expressed by a
  260. comma-separated list of types enclosed in curly braces.
  261. type :fourfloats = { s, s, d, d }
  262. For ease of IL generation, a trailing comma is tolerated by
  263. the parser. In case many items of the same type are
  264. sequenced (like in a C array), the shorter array syntax
  265. can be used.
  266. type :abyteandmanywords = { b, w 100 }
  267. By default, the alignment of an aggregate type is the
  268. maximum alignment of its members. The alignment can be
  269. explicitly specified by the programmer.
  270. type :cryptovector = align 16 { w 4 }
  271. Union types allow the same chunk of memory to be used with
  272. different layouts. They are defined by enclosing multiple
  273. regular aggregate type bodies in a pair of curly braces.
  274. Size and alignment of union types are set to the maximum size
  275. and alignment of each variation or, in the case of alignment,
  276. can be explicitly specified.
  277. type :un9 = { { b } { s } }
  278. Opaque types are used when the inner structure of an
  279. aggregate cannot be specified; the alignment for opaque
  280. types is mandatory. They are defined simply by enclosing
  281. their size between curly braces.
  282. type :opaque = align 16 { 32 }
  283. ~ Data
  284. ~~~~~~
  285. `bnf
  286. DATADEF :=
  287. LINKAGE*
  288. 'data' $IDENT '=' ['align' NUMBER]
  289. '{'
  290. ( EXTTY DATAITEM+
  291. | 'z' NUMBER ),
  292. '}'
  293. DATAITEM :=
  294. $IDENT ['+' NUMBER] # Symbol and offset
  295. | '"' ... '"' # String
  296. | CONST # Constant
  297. Data definitions express objects that will be emitted in the
  298. compiled file. Their visibility and location in the compiled
  299. artifact are controlled with linkage flags described in the
  300. <@ Linkage > section.
  301. They define a global identifier (starting with the sigil
  302. `$`), that will contain a pointer to the object specified
  303. by the definition.
  304. Objects are described by a sequence of fields that start with
  305. a type letter. This letter can either be an extended type,
  306. or the `z` letter. If the letter used is an extended type,
  307. the data item following specifies the bits to be stored in
  308. the field. When several data items follow a letter, they
  309. initialize multiple fields of the same size.
  310. The members of a struct will be packed. This means that
  311. padding has to be emitted by the frontend when necessary.
  312. Alignment of the whole data objects can be manually specified,
  313. and when no alignment is provided, the maximum alignment from
  314. the platform is used.
  315. When the `z` letter is used the number following indicates
  316. the size of the field; the contents of the field are zero
  317. initialized. It can be used to add padding between fields
  318. or zero-initialize big arrays.
  319. Here are various examples of data definitions.
  320. # Three 32-bit values 1, 2, and 3
  321. # followed by a 0 byte.
  322. data $a = { w 1 2 3, b 0 }
  323. # A thousand bytes 0 initialized.
  324. data $b = { z 1000 }
  325. # An object containing two 64-bit
  326. # fields, one with all bits sets and the
  327. # other containing a pointer to the
  328. # object itself.
  329. data $c = { l -1, l $c }
  330. ~ Functions
  331. ~~~~~~~~~~~
  332. `bnf
  333. FUNCDEF :=
  334. LINKAGE*
  335. 'function' [ABITY] $IDENT '(' (PARAM), ')' [NL]
  336. '{' NL
  337. BLOCK+
  338. '}'
  339. PARAM :=
  340. ABITY %IDENT # Regular parameter
  341. | 'env' %IDENT # Environment parameter (first)
  342. | '...' # Variadic marker (last)
  343. SUBWTY := 'sb' | 'ub' | 'sh' | 'uh' # Sub-word types
  344. ABITY := BASETY | SUBWTY | :IDENT
  345. Function definitions contain the actual code to emit in
  346. the compiled file. They define a global symbol that
  347. contains a pointer to the function code. This pointer
  348. can be used in `call` instructions or stored in memory.
  349. The type given right before the function name is the
  350. return type of the function. All return values of this
  351. function must have this return type. If the return
  352. type is missing, the function must not return any value.
  353. The parameter list is a comma separated list of distinct
  354. temporary names prefixed by types. The types are used
  355. to correctly implement C compatibility. When an argument
  356. has an aggregate type, a pointer to the aggregate is passed
  357. by the caller. In the example below, we have to use a load
  358. instruction to get the value of the first (and only)
  359. member of the struct.
  360. type :one = { w }
  361. function w $getone(:one %p) {
  362. @start
  363. %val =w loadw %p
  364. ret %val
  365. }
  366. If a function accepts or returns values that are smaller
  367. than a word, such as `signed char` or `unsigned short` in C,
  368. one of the sub-word type must be used. The sub-word types
  369. `sb`, `ub`, `sh`, and `uh` stand, respectively, for signed
  370. and unsigned 8-bit values, and signed and unsigned 16-bit
  371. values. Parameters associated with a sub-word type of bit
  372. width N only have their N least significant bits set and
  373. have base type `w`. For example, the function
  374. function w $addbyte(w %a, sb %b) {
  375. @start
  376. %bw =w extsb %b
  377. %val =w add %a, %bw
  378. ret %val
  379. }
  380. needs to sign-extend its second argument before the
  381. addition. Dually, return values with sub-word types do
  382. not need to be sign or zero extended.
  383. If the parameter list ends with `...`, the function is
  384. a variadic function: it can accept a variable number of
  385. arguments. To access the extra arguments provided by
  386. the caller, use the `vastart` and `vaarg` instructions
  387. described in the <@ Variadic > section.
  388. Optionally, the parameter list can start with an
  389. environment parameter `env %e`. This special parameter is
  390. a 64-bit integer temporary (i.e., of type `l`). If the
  391. function does not use its environment parameter, callers
  392. can safely omit it. This parameter is invisible to a C
  393. caller: for example, the function
  394. export function w $add(env %e, w %a, w %b) {
  395. @start
  396. %c =w add %a, %b
  397. ret %c
  398. }
  399. must be given the C prototype `int add(int, int)`.
  400. The intended use of this feature is to pass the
  401. environment pointer of closures while retaining a
  402. very good compatibility with C. The <@ Call > section
  403. explains how to pass an environment parameter.
  404. Since global symbols are defined mutually recursive,
  405. there is no need for function declarations: a function
  406. can be referenced before its definition.
  407. Similarly, functions from other modules can be used
  408. without previous declaration. All the type information
  409. necessary to compile a call is in the instruction itself.
  410. The syntax and semantics for the body of functions
  411. are described in the <@ Control > section.
  412. - 6. Control
  413. ------------
  414. The IL represents programs as textual transcriptions of
  415. control flow graphs. The control flow is serialized as
  416. a sequence of blocks of straight-line code which are
  417. connected using jump instructions.
  418. ~ Blocks
  419. ~~~~~~~~
  420. `bnf
  421. BLOCK :=
  422. @IDENT NL # Block label
  423. ( PHI NL )* # Phi instructions
  424. ( INST NL )* # Regular instructions
  425. JUMP NL # Jump or return
  426. All blocks have a name that is specified by a label at
  427. their beginning. Then follows a sequence of instructions
  428. that have "fall-through" flow. Finally one jump terminates
  429. the block. The jump can either transfer control to another
  430. block of the same function or return; jumps are described
  431. further below.
  432. The first block in a function must not be the target of
  433. any jump in the program. If a jump to the function start
  434. is needed, the frontend must insert an empty prelude block
  435. at the beginning of the function.
  436. When one block jumps to the next block in the IL file,
  437. it is not necessary to write the jump instruction, it
  438. will be automatically added by the parser. For example
  439. the start block in the example below jumps directly
  440. to the loop block.
  441. function $loop() {
  442. @start
  443. @loop
  444. %x =w phi @start 100, @loop %x1
  445. %x1 =w sub %x, 1
  446. jnz %x1, @loop, @end
  447. @end
  448. ret
  449. }
  450. ~ Jumps
  451. ~~~~~~~
  452. `bnf
  453. JUMP :=
  454. 'jmp' @IDENT # Unconditional
  455. | 'jnz' VAL, @IDENT, @IDENT # Conditional
  456. | 'ret' [VAL] # Return
  457. | 'hlt' # Termination
  458. A jump instruction ends every block and transfers the
  459. control to another program location. The target of
  460. a jump must never be the first block in a function.
  461. The three kinds of jumps available are described in
  462. the following list.
  463. 1. Unconditional jump.
  464. Simply jumps to another block of the same function.
  465. 2. Conditional jump.
  466. When its word argument is non-zero, it jumps to its
  467. first label argument; otherwise it jumps to the other
  468. label. The argument must be of word type; because of
  469. subtyping a long argument can be passed, but only its
  470. least significant 32 bits will be compared to 0.
  471. 3. Function return.
  472. Terminates the execution of the current function,
  473. optionally returning a value to the caller. The value
  474. returned must be of the type given in the function
  475. prototype. If the function prototype does not specify
  476. a return type, no return value can be used.
  477. 4. Program termination.
  478. Terminates the execution of the program with a
  479. target-dependent error. This instruction can be used
  480. when it is expected that the execution never reaches
  481. the end of the block it closes; for example, after
  482. having called a function such as `exit()`.
  483. - 7. Instructions
  484. -----------------
  485. Instructions are the smallest piece of code in the IL, they
  486. form the body of <@ Blocks >. The IL uses a three-address
  487. code, which means that one instruction computes an operation
  488. between two operands and assigns the result to a third one.
  489. An instruction has both a name and a return type, this
  490. return type is a base type that defines the size of the
  491. instruction's result. The type of the arguments can be
  492. unambiguously inferred using the instruction name and the
  493. return type. For example, for all arithmetic instructions,
  494. the type of the arguments is the same as the return type.
  495. The two additions below are valid if `%y` is a word or a long
  496. (because of <@ Subtyping >).
  497. %x =w add 0, %y
  498. %z =w add %x, %x
  499. Some instructions, like comparisons and memory loads
  500. have operand types that differ from their return types.
  501. For instance, two floating points can be compared to give a
  502. word result (0 if the comparison succeeds, 1 if it fails).
  503. %c =w cgts %a, %b
  504. In the example above, both operands have to have single type.
  505. This is made explicit by the instruction suffix.
  506. The types of instructions are described below using a short
  507. type string. A type string specifies all the valid return
  508. types an instruction can have, its arity, and the type of
  509. its arguments depending on its return type.
  510. Type strings begin with acceptable return types, then
  511. follows, in parentheses, the possible types for the arguments.
  512. If the N-th return type of the type string is used for an
  513. instruction, the arguments must use the N-th type listed for
  514. them in the type string. When an instruction does not have a
  515. return type, the type string only contains the types of the
  516. arguments.
  517. The following abbreviations are used.
  518. * `T` stands for `wlsd`
  519. * `I` stands for `wl`
  520. * `F` stands for `sd`
  521. * `m` stands for the type of pointers on the target; on
  522. 64-bit architectures it is the same as `l`
  523. For example, consider the type string `wl(F)`, it mentions
  524. that the instruction has only one argument and that if the
  525. return type used is long, the argument must be of type double.
  526. ~ Arithmetic and Bits
  527. ~~~~~~~~~~~~~~~~~~~~~
  528. * `add`, `sub`, `div`, `mul` -- `T(T,T)`
  529. * `neg` -- `T(T)`
  530. * `udiv`, `rem`, `urem` -- `I(I,I)`
  531. * `or`, `xor`, `and` -- `I(I,I)`
  532. * `sar`, `shr`, `shl` -- `I(I,ww)`
  533. The base arithmetic instructions in the first bullet are
  534. available for all types, integers and floating points.
  535. When `div` is used with word or long return type, the
  536. arguments are treated as signed. The unsigned integral
  537. division is available as `udiv` instruction. When the
  538. result of a division is not an integer, it is truncated
  539. towards zero.
  540. The signed and unsigned remainder operations are available
  541. as `rem` and `urem`. The sign of the remainder is the same
  542. as the one of the dividend. Its magnitude is smaller than
  543. the divisor one. These two instructions and `udiv` are only
  544. available with integer arguments and result.
  545. Bitwise OR, AND, and XOR operations are available for both
  546. integer types. Logical operations of typical programming
  547. languages can be implemented using <@ Comparisons > and
  548. <@ Jumps >.
  549. Shift instructions `sar`, `shr`, and `shl`, shift right or
  550. left their first operand by the amount from the second
  551. operand. The shifting amount is taken modulo the size of
  552. the result type. Shifting right can either preserve the
  553. sign of the value (using `sar`), or fill the newly freed
  554. bits with zeroes (using `shr`). Shifting left always
  555. fills the freed bits with zeroes.
  556. Remark that an arithmetic shift right (`sar`) is only
  557. equivalent to a division by a power of two for non-negative
  558. numbers. This is because the shift right "truncates"
  559. towards minus infinity, while the division truncates
  560. towards zero.
  561. ~ Memory
  562. ~~~~~~~~
  563. * Store instructions.
  564. * `stored` -- `(d,m)`
  565. * `stores` -- `(s,m)`
  566. * `storel` -- `(l,m)`
  567. * `storew` -- `(w,m)`
  568. * `storeh` -- `(w,m)`
  569. * `storeb` -- `(w,m)`
  570. Store instructions exist to store a value of any base type
  571. and any extended type. Since halfwords and bytes are not
  572. first class in the IL, `storeh` and `storeb` take a word
  573. as argument. Only the first 16 or 8 bits of this word will
  574. be stored in memory at the address specified in the second
  575. argument.
  576. * Load instructions.
  577. * `loadd` -- `d(m)`
  578. * `loads` -- `s(m)`
  579. * `loadl` -- `l(m)`
  580. * `loadsw`, `loaduw` -- `I(mm)`
  581. * `loadsh`, `loaduh` -- `I(mm)`
  582. * `loadsb`, `loadub` -- `I(mm)`
  583. For types smaller than long, two variants of the load
  584. instruction are available: one will sign extend the loaded
  585. value, while the other will zero extend it. Note that
  586. all loads smaller than long can load to either a long or
  587. a word.
  588. The two instructions `loadsw` and `loaduw` have the same
  589. effect when they are used to define a word temporary.
  590. A `loadw` instruction is provided as syntactic sugar for
  591. `loadsw` to make explicit that the extension mechanism
  592. used is irrelevant.
  593. * Blits.
  594. * `blit` -- `(m,m,w)`
  595. The blit instruction copies in-memory data from its
  596. first address argument to its second address argument.
  597. The third argument is the number of bytes to copy. The
  598. source and destination spans are required to be either
  599. non-overlapping, or fully overlapping (source address
  600. identical to the destination address). The byte count
  601. argument must be a nonnegative numeric constant; it
  602. cannot be a temporary.
  603. One blit instruction may generate a number of
  604. instructions proportional to its byte count argument,
  605. consequently, it is recommended to keep this argument
  606. relatively small. If large copies are necessary, it is
  607. preferable that frontends generate calls to a supporting
  608. `memcpy` function.
  609. * Stack allocation.
  610. * `alloc4` -- `m(l)`
  611. * `alloc8` -- `m(l)`
  612. * `alloc16` -- `m(l)`
  613. These instructions allocate a chunk of memory on the
  614. stack. The number ending the instruction name is the
  615. alignment required for the allocated slot. QBE will
  616. make sure that the returned address is a multiple of
  617. that alignment value.
  618. Stack allocation instructions are used, for example,
  619. when compiling the C local variables, because their
  620. address can be taken. When compiling Fortran,
  621. temporaries can be used directly instead, because
  622. it is illegal to take the address of a variable.
  623. The following example makes use of some of the memory
  624. instructions. Pointers are stored in long temporaries.
  625. %A0 =l alloc4 8 # stack allocate an array A of 2 words
  626. %A1 =l add %A0, 4
  627. storew 43, %A0 # A[0] <- 43
  628. storew 255, %A1 # A[1] <- 255
  629. %v1 =w loadw %A0 # %v1 <- A[0] as word
  630. %v2 =w loadsb %A1 # %v2 <- A[1] as signed byte
  631. %v3 =w add %v1, %v2 # %v3 is 42 here
  632. ~ Comparisons
  633. ~~~~~~~~~~~~~
  634. Comparison instructions return an integer value (either a word
  635. or a long), and compare values of arbitrary types. The returned
  636. value is 1 if the two operands satisfy the comparison
  637. relation, or 0 otherwise. The names of comparisons respect
  638. a standard naming scheme in three parts.
  639. 1. All comparisons start with the letter `c`.
  640. 2. Then comes a comparison type. The following
  641. types are available for integer comparisons:
  642. * `eq` for equality
  643. * `ne` for inequality
  644. * `sle` for signed lower or equal
  645. * `slt` for signed lower
  646. * `sge` for signed greater or equal
  647. * `sgt` for signed greater
  648. * `ule` for unsigned lower or equal
  649. * `ult` for unsigned lower
  650. * `uge` for unsigned greater or equal
  651. * `ugt` for unsigned greater
  652. Floating point comparisons use one of these types:
  653. * `eq` for equality
  654. * `ne` for inequality
  655. * `le` for lower or equal
  656. * `lt` for lower
  657. * `ge` for greater or equal
  658. * `gt` for greater
  659. * `o` for ordered (no operand is a NaN)
  660. * `uo` for unordered (at least one operand is a NaN)
  661. Because floating point types always have a sign bit,
  662. all the comparisons available are signed.
  663. 3. Finally, the instruction name is terminated with a
  664. basic type suffix precising the type of the operands
  665. to be compared.
  666. For example, `cod` (`I(dd,dd)`) compares two double-precision
  667. floating point numbers and returns 1 if the two floating points
  668. are not NaNs, or 0 otherwise. The `csltw` (`I(ww,ww)`)
  669. instruction compares two words representing signed numbers and
  670. returns 1 when the first argument is smaller than the second one.
  671. ~ Conversions
  672. ~~~~~~~~~~~~~
  673. Conversion operations change the representation of a value,
  674. possibly modifying it if the target type cannot hold the value
  675. of the source type. Conversions can extend the precision of a
  676. temporary (e.g., from signed 8-bit to 32-bit), or convert a
  677. floating point into an integer and vice versa.
  678. * `extsw`, `extuw` -- `l(w)`
  679. * `extsh`, `extuh` -- `I(ww)`
  680. * `extsb`, `extub` -- `I(ww)`
  681. * `exts` -- `d(s)`
  682. * `truncd` -- `s(d)`
  683. * `stosi` -- `I(ss)`
  684. * `stoui` -- `I(ss)`
  685. * `dtosi` -- `I(dd)`
  686. * `dtoui` -- `I(dd)`
  687. * `swtof` -- `F(ww)`
  688. * `uwtof` -- `F(ww)`
  689. * `sltof` -- `F(ll)`
  690. * `ultof` -- `F(ll)`
  691. Extending the precision of a temporary is done using the
  692. `ext` family of instructions. Because QBE types do not
  693. specify the signedness (like in LLVM), extension instructions
  694. exist to sign-extend and zero-extend a value. For example,
  695. `extsb` takes a word argument and sign-extends the 8
  696. least-significant bits to a full word or long, depending on
  697. the return type.
  698. The instructions `exts` (extend single) and `truncd` (truncate
  699. double) are provided to change the precision of a floating
  700. point value. When the double argument of `truncd` cannot
  701. be represented as a single-precision floating point, it is
  702. truncated towards zero.
  703. Converting between signed integers and floating points is done
  704. using `stosi` (single to signed integer), `stoui` (single to
  705. unsigned integer, `dtosi` (double to signed integer), `dtoui`
  706. (double to unsigned integer), `swtof` (signed word to float),
  707. `uwtof` (unsigned word to float), `sltof` (signed long to
  708. float) and `ultof` (unsigned long to float).
  709. Because of <@ Subtyping >, there is no need to have an
  710. instruction to lower the precision of an integer temporary.
  711. ~ Cast and Copy
  712. ~~~~~~~~~~~~~~~
  713. The `cast` and `copy` instructions return the bits of their
  714. argument verbatim. However a `cast` will change an integer
  715. into a floating point of the same width and vice versa.
  716. * `cast` -- `wlsd(sdwl)`
  717. * `copy` -- `T(T)`
  718. Casts can be used to make bitwise operations on the
  719. representation of floating point numbers. For example
  720. the following program will compute the opposite of the
  721. single-precision floating point number `%f` into `%rs`.
  722. %b0 =w cast %f
  723. %b1 =w xor 2147483648, %b0 # flip the msb
  724. %rs =s cast %b1
  725. ~ Call
  726. ~~~~~~
  727. `bnf
  728. CALL := [%IDENT '=' ABITY] 'call' VAL '(' (ARG), ')'
  729. ARG :=
  730. ABITY VAL # Regular argument
  731. | 'env' VAL # Environment argument (first)
  732. | '...' # Variadic marker
  733. SUBWTY := 'sb' | 'ub' | 'sh' | 'uh' # Sub-word types
  734. ABITY := BASETY | SUBWTY | :IDENT
  735. The call instruction is special in several ways. It is not
  736. a three-address instruction and requires the type of all
  737. its arguments to be given. Also, the return type can be
  738. either a base type or an aggregate type. These specifics
  739. are required to compile calls with C compatibility (i.e.,
  740. to respect the ABI).
  741. When an aggregate type is used as argument type or return
  742. type, the value respectively passed or returned needs to be
  743. a pointer to a memory location holding the value. This is
  744. because aggregate types are not first-class citizens of
  745. the IL.
  746. Sub-word types are used for arguments and return values
  747. of width less than a word. Details on these types are
  748. presented in the <@ Functions > section. Arguments with
  749. sub-word types need not be sign or zero extended according
  750. to their type. Calls with a sub-word return type define
  751. a temporary of base type `w` with its most significant bits
  752. unspecified.
  753. Unless the called function does not return a value, a
  754. return temporary must be specified, even if it is never
  755. used afterwards.
  756. An environment parameter can be passed as first argument
  757. using the `env` keyword. The passed value must be a 64-bit
  758. integer. If the called function does not expect an environment
  759. parameter, it will be safely discarded. See the <@ Functions >
  760. section for more information about environment parameters.
  761. When the called function is variadic, there must be a `...`
  762. marker separating the named and variadic arguments.
  763. ~ Variadic
  764. ~~~~~~~~~~
  765. The `vastart` and `vaarg` instructions provide a portable
  766. way to access the extra parameters of a variadic function.
  767. * `vastart` -- `(m)`
  768. * `vaarg` -- `T(mmmm)`
  769. The `vastart` instruction initializes a *variable argument
  770. list* used to access the extra parameters of the enclosing
  771. variadic function. It is safe to call it multiple times.
  772. The `vaarg` instruction fetches the next argument from
  773. a variable argument list. It is currently limited to
  774. fetching arguments that have a base type. This instruction
  775. is essentially effectful: calling it twice in a row will
  776. return two consecutive arguments from the argument list.
  777. Both instructions take a pointer to a variable argument
  778. list as sole argument. The size and alignment of variable
  779. argument lists depend on the target used. However, it
  780. is possible to conservatively use the maximum size and
  781. alignment required by all the targets.
  782. type :valist = align 8 { 24 } # For amd64_sysv
  783. type :valist = align 8 { 32 } # For arm64
  784. type :valist = align 8 { 8 } # For rv64
  785. The following example defines a variadic function adding
  786. its first three arguments.
  787. function s $add3(s %a, ...) {
  788. @start
  789. %ap =l alloc8 32
  790. vastart %ap
  791. %r =s call $vadd(s %a, l %ap)
  792. ret %r
  793. }
  794. function s $vadd(s %a, l %ap) {
  795. @start
  796. %b =s vaarg %ap
  797. %c =s vaarg %ap
  798. %d =s add %a, %b
  799. %e =s add %d, %c
  800. ret %e
  801. }
  802. ~ Phi
  803. ~~~~~
  804. `bnf
  805. PHI := %IDENT '=' BASETY 'phi' ( @IDENT VAL ),
  806. First and foremost, phi instructions are NOT necessary when
  807. writing a frontend to QBE. One solution to avoid having to
  808. deal with SSA form is to use stack allocated variables for
  809. all source program variables and perform assignments and
  810. lookups using <@ Memory > operations. This is what LLVM
  811. users typically do.
  812. Another solution is to simply emit code that is not in SSA
  813. form! Contrary to LLVM, QBE is able to fixup programs not
  814. in SSA form without requiring the boilerplate of loading
  815. and storing in memory. For example, the following program
  816. will be correctly compiled by QBE.
  817. @start
  818. %x =w copy 100
  819. %s =w copy 0
  820. @loop
  821. %s =w add %s, %x
  822. %x =w sub %x, 1
  823. jnz %x, @loop, @end
  824. @end
  825. ret %s
  826. Now, if you want to know what phi instructions are and how
  827. to use them in QBE, you can read the following.
  828. Phi instructions are specific to SSA form. In SSA form
  829. values can only be assigned once, without phi instructions,
  830. this requirement is too strong to represent many programs.
  831. For example consider the following C program.
  832. int f(int x) {
  833. int y;
  834. if (x)
  835. y = 1;
  836. else
  837. y = 2;
  838. return y;
  839. }
  840. The variable `y` is assigned twice, the solution to
  841. translate it in SSA form is to insert a phi instruction.
  842. @ifstmt
  843. jnz %x, @ift, @iff
  844. @ift
  845. jmp @retstmt
  846. @iff
  847. jmp @retstmt
  848. @retstmt
  849. %y =w phi @ift 1, @iff 2
  850. ret %y
  851. Phi instructions return one of their arguments depending
  852. on where the control came from. In the example, `%y` is
  853. set to 1 if the `@ift` branch is taken, or it is set to
  854. 2 otherwise.
  855. An important remark about phi instructions is that QBE
  856. assumes that if a variable is defined by a phi it respects
  857. all the SSA invariants. So it is critical to not use phi
  858. instructions unless you know exactly what you are doing.
  859. - 8. Instructions Index
  860. -----------------------
  861. * <@ Arithmetic and Bits >:
  862. * `add`
  863. * `and`
  864. * `div`
  865. * `mul`
  866. * `neg`
  867. * `or`
  868. * `rem`
  869. * `sar`
  870. * `shl`
  871. * `shr`
  872. * `sub`
  873. * `udiv`
  874. * `urem`
  875. * `xor`
  876. * <@ Memory >:
  877. * `alloc16`
  878. * `alloc4`
  879. * `alloc8`
  880. * `blit`
  881. * `loadd`
  882. * `loadl`
  883. * `loads`
  884. * `loadsb`
  885. * `loadsh`
  886. * `loadsw`
  887. * `loadub`
  888. * `loaduh`
  889. * `loaduw`
  890. * `loadw`
  891. * `storeb`
  892. * `stored`
  893. * `storeh`
  894. * `storel`
  895. * `stores`
  896. * `storew`
  897. * <@ Comparisons >:
  898. * `ceqd`
  899. * `ceql`
  900. * `ceqs`
  901. * `ceqw`
  902. * `cged`
  903. * `cges`
  904. * `cgtd`
  905. * `cgts`
  906. * `cled`
  907. * `cles`
  908. * `cltd`
  909. * `clts`
  910. * `cned`
  911. * `cnel`
  912. * `cnes`
  913. * `cnew`
  914. * `cod`
  915. * `cos`
  916. * `csgel`
  917. * `csgew`
  918. * `csgtl`
  919. * `csgtw`
  920. * `cslel`
  921. * `cslew`
  922. * `csltl`
  923. * `csltw`
  924. * `cugel`
  925. * `cugew`
  926. * `cugtl`
  927. * `cugtw`
  928. * `culel`
  929. * `culew`
  930. * `cultl`
  931. * `cultw`
  932. * `cuod`
  933. * `cuos`
  934. * <@ Conversions >:
  935. * `dtosi`
  936. * `dtoui`
  937. * `exts`
  938. * `extsb`
  939. * `extsh`
  940. * `extsw`
  941. * `extub`
  942. * `extuh`
  943. * `extuw`
  944. * `sltof`
  945. * `ultof`
  946. * `stosi`
  947. * `stoui`
  948. * `swtof`
  949. * `uwtof`
  950. * `truncd`
  951. * <@ Cast and Copy > :
  952. * `cast`
  953. * `copy`
  954. * <@ Call >:
  955. * `call`
  956. * <@ Variadic >:
  957. * `vastart`
  958. * `vaarg`
  959. * <@ Phi >:
  960. * `phi`
  961. * <@ Jumps >:
  962. * `hlt`
  963. * `jmp`
  964. * `jnz`
  965. * `ret`