|
@@ -1,3 +1,12 @@
|
|
|
+// Glossary (because i don't know where else to put it)
|
|
|
+// IR - intermediate representation
|
|
|
+// SoN - sea of nodes (https://www.oracle.com/technetwork/java/javase/tech/c2-ir95-150110.pdf)
|
|
|
+// SSA - single static assignment
|
|
|
+// GVN - global value numbering
|
|
|
+// CSE - common subexpression elimination
|
|
|
+// DSE - dead store elimination
|
|
|
+// GCM - global code motion
|
|
|
+// SROA - scalar replacement of aggregates
|
|
|
#ifndef TB_CORE_H
|
|
|
#define TB_CORE_H
|
|
|
|
|
@@ -150,18 +159,19 @@ typedef enum TB_ISelMode {
|
|
|
|
|
|
typedef enum TB_DataTypeEnum {
|
|
|
// Integers, note void is an i0 and bool is an i1
|
|
|
- // i(0-2047)
|
|
|
+ // i(0-64)
|
|
|
TB_INT,
|
|
|
// Floating point numbers
|
|
|
// f{32,64}
|
|
|
TB_FLOAT,
|
|
|
// Pointers
|
|
|
- // ptr(0-2047)
|
|
|
TB_PTR,
|
|
|
// Tuples, these cannot be used in memory ops, just accessed via projections
|
|
|
TB_TUPLE,
|
|
|
- // represents control flow as a kind of data
|
|
|
+ // represents control flow for REGION, BRANCH
|
|
|
TB_CONTROL,
|
|
|
+ // represents memory (and I/O)
|
|
|
+ TB_MEMORY,
|
|
|
} TB_DataTypeEnum;
|
|
|
|
|
|
typedef enum TB_FloatFormat {
|
|
@@ -179,6 +189,7 @@ typedef union TB_DataType {
|
|
|
};
|
|
|
uint32_t raw;
|
|
|
} TB_DataType;
|
|
|
+static_assert(sizeof(TB_DataType) == 4, "im expecting this to be a uint32_t");
|
|
|
|
|
|
// classify data types
|
|
|
#define TB_IS_VOID_TYPE(x) ((x).type == TB_INT && (x).data == 0)
|
|
@@ -192,75 +203,136 @@ typedef union TB_DataType {
|
|
|
#define TB_GET_FLOAT_FORMAT(x) ((x).data)
|
|
|
#define TB_GET_PTR_ADDRSPACE(x) ((x).data)
|
|
|
|
|
|
+////////////////////////////////
|
|
|
+// ANNOTATIONS
|
|
|
+////////////////////////////////
|
|
|
+//
|
|
|
+// (A, B) -> (C, D)
|
|
|
+//
|
|
|
+// node takes A and B, produces C, D. if there's multiple
|
|
|
+// results we need to use projections and the indices are
|
|
|
+// based on the order seen here, proj0 is C, proj1 is D.
|
|
|
+//
|
|
|
+// (A, B) & C -> Int
|
|
|
+//
|
|
|
+// nodes takes A and B along with C in it's extra data. this is
|
|
|
+// where non-node inputs fit.
|
|
|
+//
|
|
|
typedef enum TB_NodeTypeEnum {
|
|
|
TB_NULL = 0,
|
|
|
|
|
|
- // Immediates
|
|
|
+ ////////////////////////////////
|
|
|
+ // CONSTANTS
|
|
|
+ ////////////////////////////////
|
|
|
TB_INTEGER_CONST,
|
|
|
TB_FLOAT32_CONST,
|
|
|
TB_FLOAT64_CONST,
|
|
|
|
|
|
- // only one per function
|
|
|
- TB_START, // fn()
|
|
|
-
|
|
|
- // regions represent the begining of BBs
|
|
|
- TB_REGION, // fn(preds: []region)
|
|
|
-
|
|
|
- // projection
|
|
|
- TB_PROJ,
|
|
|
-
|
|
|
- TB_CALL, // normal call
|
|
|
- TB_SYSCALL, // system call
|
|
|
-
|
|
|
- // Managed ops
|
|
|
- TB_SAFEPOINT,
|
|
|
-
|
|
|
- // Memory operations
|
|
|
- TB_STORE, // fn(r: control, addr: data, src: data)
|
|
|
- TB_MEMCPY,
|
|
|
- TB_MEMSET,
|
|
|
-
|
|
|
- // Atomics
|
|
|
- TB_ATOMIC_TEST_AND_SET,
|
|
|
- TB_ATOMIC_CLEAR,
|
|
|
-
|
|
|
- TB_ATOMIC_LOAD,
|
|
|
- TB_ATOMIC_XCHG,
|
|
|
- TB_ATOMIC_ADD,
|
|
|
- TB_ATOMIC_SUB,
|
|
|
- TB_ATOMIC_AND,
|
|
|
- TB_ATOMIC_XOR,
|
|
|
- TB_ATOMIC_OR,
|
|
|
-
|
|
|
- TB_ATOMIC_CMPXCHG,
|
|
|
- TB_DEBUGBREAK,
|
|
|
-
|
|
|
- // Terminators
|
|
|
- TB_BRANCH,
|
|
|
- TB_RET,
|
|
|
- TB_UNREACHABLE,
|
|
|
- TB_TRAP,
|
|
|
-
|
|
|
- TB_POISON,
|
|
|
-
|
|
|
- // Load
|
|
|
- TB_LOAD,
|
|
|
-
|
|
|
- // Pointers
|
|
|
- TB_LOCAL,
|
|
|
-
|
|
|
- TB_GET_SYMBOL_ADDRESS,
|
|
|
-
|
|
|
- TB_MEMBER_ACCESS,
|
|
|
- TB_ARRAY_ACCESS,
|
|
|
+ ////////////////////////////////
|
|
|
+ // MISCELLANEOUS
|
|
|
+ ////////////////////////////////
|
|
|
+ // this is an unspecified value, usually generated by the optimizer
|
|
|
+ // when malformed input is folded into an operation.
|
|
|
+ TB_POISON, // () -> Any
|
|
|
+ // projections just extract a single field of a tuple
|
|
|
+ TB_PROJ, // Tuple & Int -> Any
|
|
|
+ // this is a simple way to embed machine code into the code
|
|
|
+ TB_MACHINE_OP, // (Control, Memory) & Buffer -> (Control, Memory)
|
|
|
+ // reads the TSC on x64
|
|
|
+ TB_CYCLE_COUNTER, // (Control) -> Int64
|
|
|
+
|
|
|
+ ////////////////////////////////
|
|
|
+ // CONTROL
|
|
|
+ ////////////////////////////////
|
|
|
+ // there's only one START and STOP per function
|
|
|
+ TB_START, // () -> (Control, Memory, Data...)
|
|
|
+ TB_END, // (Control, Memory, Data?) -> ()
|
|
|
+ // regions are used to represent paths which have multiple entries.
|
|
|
+ // each input is a predecessor.
|
|
|
+ TB_REGION, // (Control...) -> (Control)
|
|
|
+ // phi nodes work the same as in SSA CFG, the value is based on which predecessor was taken.
|
|
|
+ // each input lines up with the regions such that region.in[i] will use phi.in[i+1] as the
|
|
|
+ // subsequent data.
|
|
|
+ TB_PHI, // (Control, Data...) -> Data
|
|
|
+ // branch is used to implement most control flow, it acts like a switch
|
|
|
+ // statement in C usually. they take a key and match against some cases,
|
|
|
+ // if they match, it'll jump to that successor, if none match it'll take
|
|
|
+ // the default successor.
|
|
|
+ //
|
|
|
+ // if (cond) { A; } else { B; } is just switch (cond) { case 0: B; default: A; }
|
|
|
+ //
|
|
|
+ // it's possible to not pass a key and the default successor is always called, this is
|
|
|
+ // a GOTO. tb_inst_goto, tb_inst_if can handle common cases for you.
|
|
|
+ TB_BRANCH, // (Control, Data?) -> (Control...)
|
|
|
+ // debugbreak will trap in a continuable manner.
|
|
|
+ TB_DEBUGBREAK, // (Control, Memory) -> (Control)
|
|
|
+ // trap will not be continuable but will stop execution.
|
|
|
+ TB_TRAP, // (Control) -> (Control)
|
|
|
+ // unreachable means it won't trap or be continuable.
|
|
|
+ TB_UNREACHABLE, // (Control) -> (Control)
|
|
|
+
|
|
|
+ ////////////////////////////////
|
|
|
+ // CONTROL + MEMORY
|
|
|
+ ////////////////////////////////
|
|
|
+ // nothing special, it's just a function call, 3rd argument here is the
|
|
|
+ // target pointer (or syscall number) and the rest are just data args.
|
|
|
+ TB_CALL, // (Control, Memory, Data, Data...) -> (Control, Memory, Data)
|
|
|
+ TB_SYSCALL, // (Control, Memory, Data, Data...) -> (Control, Memory, Data)
|
|
|
+ // safepoint polls are the same except they only trigger if the poll site
|
|
|
+ // says to (platform specific but almost always just the page being made
|
|
|
+ // unmapped/guard), 3rd argument is the poll site.
|
|
|
+ TB_SAFEPOINT_POLL, // (Control, Memory, Ptr, Data...) -> (Control)
|
|
|
+
|
|
|
+ ////////////////////////////////
|
|
|
+ // MEMORY
|
|
|
+ ////////////////////////////////
|
|
|
+ // LOAD and STORE are standard memory accesses, they can be folded away.
|
|
|
+ TB_LOAD, // (Memory, Ptr) -> Data
|
|
|
+ TB_STORE, // (Memory, Ptr, Data) -> Memory
|
|
|
+ // bulk memory ops.
|
|
|
+ TB_MEMCPY, // (Memory, Ptr, Ptr, Size) -> Memory
|
|
|
+ TB_MEMSET, // (Memory, Ptr, Int8, Size) -> Memory
|
|
|
+ // these memory accesses represent "volatile" which means
|
|
|
+ // they may produce side effects and thus cannot be eliminated.
|
|
|
+ TB_READ, // (Memory, Ptr) -> (Memory, Data)
|
|
|
+ TB_WRITE, // (Memory, Ptr, Data) -> (Memory, Data)
|
|
|
+ // atomics have multiple observers (if not they wouldn't need to
|
|
|
+ // be atomic) and thus produce side effects everywhere just like
|
|
|
+ // volatiles except they have synchronization guarentees. the atomic
|
|
|
+ // data ops will return the value before the operation is performed.
|
|
|
+ // Atomic CAS return the old value and a boolean for success (true if
|
|
|
+ // the value was changed)
|
|
|
+ TB_ATOMIC_LOAD, // (Memory, Ptr) -> (Memory, Data)
|
|
|
+ TB_ATOMIC_XCHG, // (Memory, Ptr, Data) -> (Memory, Data)
|
|
|
+ TB_ATOMIC_ADD, // (Memory, Ptr, Data) -> (Memory, Data)
|
|
|
+ TB_ATOMIC_SUB, // (Memory, Ptr, Data) -> (Memory, Data)
|
|
|
+ TB_ATOMIC_AND, // (Memory, Ptr, Data) -> (Memory, Data)
|
|
|
+ TB_ATOMIC_XOR, // (Memory, Ptr, Data) -> (Memory, Data)
|
|
|
+ TB_ATOMIC_OR, // (Memory, Ptr, Data) -> (Memory, Data)
|
|
|
+ TB_ATOMIC_CAS, // (Memory, Data, Data) -> (Memory, Data, Bool)
|
|
|
+
|
|
|
+ ////////////////////////////////
|
|
|
+ // POINTERS
|
|
|
+ ////////////////////////////////
|
|
|
+ // LOCAL will statically allocate stack space
|
|
|
+ TB_LOCAL, // () & (Int, Int) -> Ptr
|
|
|
+ // SYMBOL will return a pointer to a TB_Symbol
|
|
|
+ TB_SYMBOL, // () & TB_Symbol* -> Ptr
|
|
|
+ // offsets pointer by constant value
|
|
|
+ TB_MEMBER_ACCESS, // Ptr & Int -> Ptr
|
|
|
+ // arguments represent base, index, and stride respectively
|
|
|
+ // and will perform `base + index*stride`
|
|
|
+ TB_ARRAY_ACCESS, // (Ptr, Int) & Int -> Ptr
|
|
|
+ // converts an integer to a pointer
|
|
|
+ TB_INT2PTR, // Int -> Ptr
|
|
|
+ // converts a pointer to an integer
|
|
|
+ TB_PTR2INT, // Ptr -> Int
|
|
|
|
|
|
// Conversions
|
|
|
TB_TRUNCATE,
|
|
|
TB_FLOAT_EXT,
|
|
|
TB_SIGN_EXT,
|
|
|
TB_ZERO_EXT,
|
|
|
- TB_INT2PTR,
|
|
|
- TB_PTR2INT,
|
|
|
TB_UINT2FLOAT,
|
|
|
TB_FLOAT2UINT,
|
|
|
TB_INT2FLOAT,
|
|
@@ -315,18 +387,17 @@ typedef enum TB_NodeTypeEnum {
|
|
|
TB_CMP_FLE,
|
|
|
|
|
|
// Special ops
|
|
|
- // does full multiplication (64x64=128 and so on) returning
|
|
|
- // the low and high values in separate projections
|
|
|
+ // adds two paired integers to two other paired integers and returns
|
|
|
+ // a low and high value
|
|
|
+ TB_ADDPAIR,
|
|
|
+ // does full multiplication (64x64=128 and so on) returning
|
|
|
+ // the low and high values in separate projections
|
|
|
TB_MULPAIR,
|
|
|
|
|
|
- // PHI
|
|
|
- TB_PHI, // fn(r: region, x: []data)
|
|
|
-
|
|
|
// variadic
|
|
|
TB_VA_START,
|
|
|
|
|
|
// x86 intrinsics
|
|
|
- TB_X86INTRIN_RDTSC,
|
|
|
TB_X86INTRIN_LDMXCSR,
|
|
|
TB_X86INTRIN_STMXCSR,
|
|
|
TB_X86INTRIN_SQRT,
|
|
@@ -372,6 +443,9 @@ typedef struct TB_FunctionPrototype TB_FunctionPrototype;
|
|
|
|
|
|
typedef struct TB_Attrib TB_Attrib;
|
|
|
|
|
|
+// target-specific, just a unique ID for the registers
|
|
|
+typedef int TB_PhysicalReg;
|
|
|
+
|
|
|
// Refers generically to objects within a module
|
|
|
//
|
|
|
// TB_Function, TB_Global, and TB_External are all subtypes of TB_Symbol
|
|
@@ -412,12 +486,11 @@ typedef struct TB_Symbol {
|
|
|
typedef struct TB_Node TB_Node;
|
|
|
struct TB_Node {
|
|
|
TB_NodeType type;
|
|
|
+ uint16_t input_count; // number of node inputs.
|
|
|
TB_DataType dt;
|
|
|
- uint16_t input_count; // number of node inputs
|
|
|
- uint16_t extra_count; // number of bytes for extra operand data
|
|
|
|
|
|
- // local to the TB_Passes
|
|
|
- uint32_t lattice_id;
|
|
|
+ // makes it easier to track in graph walks
|
|
|
+ size_t gvn;
|
|
|
|
|
|
TB_Attrib* attribs;
|
|
|
TB_Node** inputs;
|
|
@@ -442,9 +515,8 @@ typedef struct { // TB_PROJ
|
|
|
int index;
|
|
|
} TB_NodeProj;
|
|
|
|
|
|
-typedef struct { // TB_INT
|
|
|
- uint64_t num_words;
|
|
|
- uint64_t words[];
|
|
|
+typedef struct { // TB_INTEGER_CONST
|
|
|
+ uint64_t value;
|
|
|
} TB_NodeInt;
|
|
|
|
|
|
typedef struct { // any compare operator
|
|
@@ -457,17 +529,26 @@ typedef struct { // any integer binary operator
|
|
|
|
|
|
typedef struct { // TB_MULPAIR
|
|
|
TB_Node *lo, *hi;
|
|
|
-} TB_NodeMulPair;
|
|
|
+} TB_NodeArithPair;
|
|
|
|
|
|
typedef struct {
|
|
|
TB_CharUnits align;
|
|
|
- bool is_volatile;
|
|
|
} TB_NodeMemAccess;
|
|
|
|
|
|
typedef struct {
|
|
|
TB_CharUnits size, align;
|
|
|
} TB_NodeLocal;
|
|
|
|
|
|
+typedef struct {
|
|
|
+ // this is the raw buffer
|
|
|
+ size_t length;
|
|
|
+ const uint8_t* data;
|
|
|
+
|
|
|
+ // represents the outputs, inputs and temporaries in that order
|
|
|
+ size_t outs, ins, tmps;
|
|
|
+ TB_PhysicalReg regs[];
|
|
|
+} TB_NodeMachineOp;
|
|
|
+
|
|
|
typedef struct {
|
|
|
float value;
|
|
|
} TB_NodeFloat32;
|
|
@@ -491,6 +572,8 @@ typedef struct {
|
|
|
typedef struct {
|
|
|
TB_MemoryOrder order;
|
|
|
TB_MemoryOrder order2;
|
|
|
+ TB_Node* proj0;
|
|
|
+ TB_Node* proj1;
|
|
|
} TB_NodeAtomic;
|
|
|
|
|
|
typedef struct {
|
|
@@ -506,9 +589,18 @@ typedef struct {
|
|
|
TB_Node* end;
|
|
|
const char* tag;
|
|
|
|
|
|
+ // position in a postorder walk
|
|
|
+ int postorder_id;
|
|
|
// immediate dominator (can be approximate)
|
|
|
int dom_depth;
|
|
|
TB_Node* dom;
|
|
|
+
|
|
|
+ // used for IR building only, stale after that.
|
|
|
+ //
|
|
|
+ // this represents the first and last memory values within a region,
|
|
|
+ // if a region ever has multiple predecessors we apply a join on these
|
|
|
+ // memory.
|
|
|
+ TB_Node *mem_in, *mem_out;
|
|
|
} TB_NodeRegion;
|
|
|
|
|
|
typedef struct TB_MultiOutput {
|
|
@@ -558,7 +650,7 @@ typedef struct {
|
|
|
#define TB_TYPE_F64 TB_DataType{ { TB_FLOAT, 0, TB_FLT_64 } }
|
|
|
#define TB_TYPE_BOOL TB_DataType{ { TB_INT, 0, 1 } }
|
|
|
#define TB_TYPE_PTR TB_DataType{ { TB_PTR, 0, 0 } }
|
|
|
-
|
|
|
+#define TB_TYPE_MEMORY TB_DataType{ { TB_MEMORY,0, 0 } }
|
|
|
#define TB_TYPE_INTN(N) TB_DataType{ { TB_INT, 0, (N) } }
|
|
|
#define TB_TYPE_PTRN(N) TB_DataType{ { TB_PTR, 0, (N) } }
|
|
|
|
|
@@ -575,8 +667,9 @@ typedef struct {
|
|
|
#define TB_TYPE_F64 (TB_DataType){ { TB_FLOAT, 0, TB_FLT_64 } }
|
|
|
#define TB_TYPE_BOOL (TB_DataType){ { TB_INT, 0, 1 } }
|
|
|
#define TB_TYPE_PTR (TB_DataType){ { TB_PTR, 0, 0 } }
|
|
|
-#define TB_TYPE_INTN(N) (TB_DataType){ { TB_INT, 0, (N) } }
|
|
|
-#define TB_TYPE_PTRN(N) (TB_DataType){ { TB_PTR, 0, (N) } }
|
|
|
+#define TB_TYPE_MEMORY (TB_DataType){ { TB_MEMORY,0, 0 } }
|
|
|
+#define TB_TYPE_INTN(N) (TB_DataType){ { TB_INT, 0, (N) } }
|
|
|
+#define TB_TYPE_PTRN(N) (TB_DataType){ { TB_PTR, 0, (N) } }
|
|
|
|
|
|
#endif
|
|
|
|
|
@@ -737,7 +830,7 @@ TB_API TB_External* tb_next_external(TB_External* e);
|
|
|
|
|
|
// this is used JIT scenarios to tell the compiler what externals map to
|
|
|
TB_API TB_ExternalType tb_extern_get_type(TB_External* e);
|
|
|
-TB_Global* tb_extern_transmute(TB_External* e, TB_DebugType* dbg_type, TB_Linkage linkage);
|
|
|
+TB_API TB_Global* tb_extern_transmute(TB_External* e, TB_DebugType* dbg_type, TB_Linkage linkage);
|
|
|
|
|
|
TB_API TB_External* tb_extern_create(TB_Module* m, ptrdiff_t len, const char* name, TB_ExternalType type);
|
|
|
|
|
@@ -884,6 +977,11 @@ TB_API void tb_default_print_callback(void* user_data, const char* fmt, ...);
|
|
|
TB_API void tb_inst_set_location(TB_Function* f, TB_SourceFile* file, int line, int column);
|
|
|
TB_API void tb_inst_reset_location(TB_Function* f);
|
|
|
|
|
|
+// this is where the STOP will be
|
|
|
+TB_API void tb_inst_set_exit_location(TB_Function* f, TB_SourceFile* file, int line, int column);
|
|
|
+
|
|
|
+TB_API bool tb_has_effects(TB_Node* n);
|
|
|
+
|
|
|
// if section is NULL, default to .text
|
|
|
TB_API TB_Function* tb_function_create(TB_Module* m, ptrdiff_t len, const char* name, TB_Linkage linkage, TB_ComdatType comdat);
|
|
|
|
|
@@ -927,9 +1025,12 @@ TB_API TB_Node* tb_inst_float2int(TB_Function* f, TB_Node* src, TB_DataType dt,
|
|
|
TB_API TB_Node* tb_inst_bitcast(TB_Function* f, TB_Node* src, TB_DataType dt);
|
|
|
|
|
|
TB_API TB_Node* tb_inst_local(TB_Function* f, TB_CharUnits size, TB_CharUnits align);
|
|
|
+
|
|
|
TB_API TB_Node* tb_inst_load(TB_Function* f, TB_DataType dt, TB_Node* addr, TB_CharUnits align, bool is_volatile);
|
|
|
TB_API void tb_inst_store(TB_Function* f, TB_DataType dt, TB_Node* addr, TB_Node* val, TB_CharUnits align, bool is_volatile);
|
|
|
|
|
|
+TB_API void tb_inst_safepoint_poll(TB_Function* f, TB_Node* addr, int input_count, TB_Node** inputs);
|
|
|
+
|
|
|
TB_API TB_Node* tb_inst_bool(TB_Function* f, bool imm);
|
|
|
TB_API TB_Node* tb_inst_sint(TB_Function* f, TB_DataType dt, int64_t imm);
|
|
|
TB_API TB_Node* tb_inst_uint(TB_Function* f, TB_DataType dt, uint64_t imm);
|
|
@@ -939,14 +1040,14 @@ TB_API TB_Node* tb_inst_cstring(TB_Function* f, const char* str);
|
|
|
TB_API TB_Node* tb_inst_string(TB_Function* f, size_t len, const char* str);
|
|
|
|
|
|
// write 'val' over 'count' bytes on 'dst'
|
|
|
-TB_API void tb_inst_memset(TB_Function* f, TB_Node* dst, TB_Node* val, TB_Node* count, TB_CharUnits align, bool is_volatile);
|
|
|
+TB_API void tb_inst_memset(TB_Function* f, TB_Node* dst, TB_Node* val, TB_Node* count, TB_CharUnits align);
|
|
|
|
|
|
// zero 'count' bytes on 'dst'
|
|
|
-TB_API void tb_inst_memzero(TB_Function* f, TB_Node* dst, TB_Node* count, TB_CharUnits align, bool is_volatile);
|
|
|
+TB_API void tb_inst_memzero(TB_Function* f, TB_Node* dst, TB_Node* count, TB_CharUnits align);
|
|
|
|
|
|
// performs a copy of 'count' elements from one memory location to another
|
|
|
// both locations cannot overlap.
|
|
|
-TB_API void tb_inst_memcpy(TB_Function* f, TB_Node* dst, TB_Node* src, TB_Node* count, TB_CharUnits align, bool is_volatile);
|
|
|
+TB_API void tb_inst_memcpy(TB_Function* f, TB_Node* dst, TB_Node* src, TB_Node* count, TB_CharUnits align);
|
|
|
|
|
|
// result = base + (index * stride)
|
|
|
TB_API TB_Node* tb_inst_array_access(TB_Function* f, TB_Node* base, TB_Node* index, int64_t stride);
|
|
@@ -1033,9 +1134,9 @@ TB_API TB_Node* tb_inst_cmp_fge(TB_Function* f, TB_Node* a, TB_Node* b);
|
|
|
|
|
|
// General intrinsics
|
|
|
TB_API TB_Node* tb_inst_va_start(TB_Function* f, TB_Node* a);
|
|
|
+TB_API TB_Node* tb_inst_cycle_counter(TB_Function* f);
|
|
|
|
|
|
// x86 Intrinsics
|
|
|
-TB_API TB_Node* tb_inst_x86_rdtsc(TB_Function* f);
|
|
|
TB_API TB_Node* tb_inst_x86_ldmxcsr(TB_Function* f, TB_Node* a);
|
|
|
TB_API TB_Node* tb_inst_x86_stmxcsr(TB_Function* f);
|
|
|
TB_API TB_Node* tb_inst_x86_sqrt(TB_Function* f, TB_Node* a);
|
|
@@ -1061,6 +1162,19 @@ TB_API void tb_inst_ret(TB_Function* f, size_t count, TB_Node** values);
|
|
|
////////////////////////////////
|
|
|
// Passes
|
|
|
////////////////////////////////
|
|
|
+typedef enum {
|
|
|
+ // allowed to remove PHIs nodes, this is
|
|
|
+ // helpful because the default IR building
|
|
|
+ // will produce tons of useless memory PHIs.
|
|
|
+ TB_PEEPHOLE_PHI = 1,
|
|
|
+
|
|
|
+ // it's allowed to fold memory operations (store or load elimination)
|
|
|
+ TB_PEEPHOLE_MEMORY = 2,
|
|
|
+
|
|
|
+ // just do every reduction rule i can provide you
|
|
|
+ TB_PEEPHOLE_ALL = 7,
|
|
|
+} TB_PeepholeFlags;
|
|
|
+
|
|
|
// Function analysis, optimizations, and codegen are all part of this
|
|
|
typedef struct TB_Passes TB_Passes;
|
|
|
|
|
@@ -1069,35 +1183,32 @@ TB_API TB_Passes* tb_pass_enter(TB_Function* f, TB_Arena* arena);
|
|
|
TB_API void tb_pass_exit(TB_Passes* opt);
|
|
|
|
|
|
// transformation passes:
|
|
|
-// peephole: runs most simple reductions on the code,
|
|
|
-// should be run after any bigger passes (it's incremental
|
|
|
-// so it's not that bad)
|
|
|
+// peephole: 99% of the optimizer, i'm sea of nodes pilled so i
|
|
|
+// break down most optimizations into local rewrites, it's
|
|
|
+// incremental and recommended to run after any non-peephole
|
|
|
+// pass.
|
|
|
//
|
|
|
-// mem2reg: lowers TB_LOCALs into SSA values, this makes more
|
|
|
+// mem2reg: lowers TB_LOCALs into SoN values, this makes more
|
|
|
// data flow analysis possible on the code and allows to codegen
|
|
|
// to place variables into registers.
|
|
|
//
|
|
|
-// cfg: performs simplifications on the CFG like `a && b => select(a, b, 0)`
|
|
|
-// or removing redundant branches.
|
|
|
-//
|
|
|
-// loop: NOT READY
|
|
|
-//
|
|
|
-TB_API bool tb_pass_peephole(TB_Passes* opt);
|
|
|
+// SROA: splits LOCALs into multiple to allow for more dataflow
|
|
|
+// analysis later on.
|
|
|
+TB_API void tb_pass_peephole(TB_Passes* opt, TB_PeepholeFlags flags);
|
|
|
+TB_API void tb_pass_sroa(TB_Passes* opt);
|
|
|
TB_API bool tb_pass_mem2reg(TB_Passes* opt);
|
|
|
-TB_API bool tb_pass_loop(TB_Passes* opt);
|
|
|
-TB_API bool tb_pass_cfg(TB_Passes* opt);
|
|
|
+
|
|
|
+TB_API void tb_pass_schedule(TB_Passes* opt);
|
|
|
|
|
|
// analysis
|
|
|
// print: prints IR in a flattened text form.
|
|
|
TB_API bool tb_pass_print(TB_Passes* opt);
|
|
|
|
|
|
-TB_API void tb_pass_schedule(TB_Passes* opt);
|
|
|
-
|
|
|
// codegen
|
|
|
TB_API TB_FunctionOutput* tb_pass_codegen(TB_Passes* opt, bool emit_asm);
|
|
|
|
|
|
TB_API void tb_pass_kill_node(TB_Passes* opt, TB_Node* n);
|
|
|
-TB_API bool tb_pass_mark(TB_Passes* opt, TB_Node* n);
|
|
|
+TB_API void tb_pass_mark(TB_Passes* opt, TB_Node* n);
|
|
|
TB_API void tb_pass_mark_users(TB_Passes* opt, TB_Node* n);
|
|
|
|
|
|
////////////////////////////////
|