Browse Source

Merge pull request #2905 from odin-lang/llvm-17-custom-passes

Explicit Optimization Passes for LLVM-17 Passes
gingerBill 1 year ago
parent
commit
51caa930ca
4 changed files with 413 additions and 16 deletions
  1. 393 3
      src/llvm_backend.cpp
  2. 5 1
      src/llvm_backend_general.cpp
  3. 5 2
      src/llvm_backend_opt.cpp
  4. 10 10
      src/tilde_stmt.cpp

+ 393 - 3
src/llvm_backend.cpp

@@ -1491,13 +1491,390 @@ gb_internal WORKER_TASK_PROC(lb_llvm_module_pass_worker_proc) {
 		array_add(&passes, "function(annotation-remarks)");
 		array_add(&passes, "function(annotation-remarks)");
 		break;
 		break;
 	case 1:
 	case 1:
-		array_add(&passes, "default<Os>");
+// default<Os>
+// Passes removed: coro, openmp, sroa
+		array_add(&passes, u8R"(
+annotation2metadata,
+forceattrs,
+inferattrs,
+function<eager-inv>(
+	lower-expect,
+	simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;no-switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-sink-common-insts;speculate-blocks;simplify-cond-branch>,
+	early-cse<>
+),
+ipsccp,
+called-value-propagation,
+globalopt,
+function<eager-inv>(
+	mem2reg,
+	instcombine<max-iterations=1000;no-use-loop-info>,
+	simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-sink-common-insts;speculate-blocks;simplify-cond-branch>),
+	require<globals-aa>,
+	function(
+		invalidate<aa>
+	),
+	require<profile-summary>,
+	cgscc(
+	devirt<4>(
+		inline<only-mandatory>,
+		inline,
+		function-attrs<skip-non-recursive>,
+		function<eager-inv;no-rerun>(
+			early-cse<memssa>,
+			speculative-execution,
+			jump-threading,
+			correlated-propagation,
+			simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-sink-common-insts;speculate-blocks;simplify-cond-branch>,
+			instcombine<max-iterations=1000;no-use-loop-info>,
+			aggressive-instcombine,
+			constraint-elimination,
+			tailcallelim,
+			simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-sink-common-insts;speculate-blocks;simplify-cond-branch>,
+			reassociate,
+			loop-mssa(
+				loop-instsimplify,
+				loop-simplifycfg,
+				licm<no-allowspeculation>,
+				loop-rotate<header-duplication;no-prepare-for-lto>,
+				licm<allowspeculation>,
+				simple-loop-unswitch<no-nontrivial;trivial>
+			),
+			simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-sink-common-insts;speculate-blocks;simplify-cond-branch>,
+			instcombine<max-iterations=1000;no-use-loop-info>,
+			loop(
+				loop-idiom,
+				indvars,
+				loop-deletion,
+				loop-unroll-full
+			),
+			vector-combine,
+			mldst-motion<no-split-footer-bb>,
+			gvn<>,
+			sccp,
+			bdce,
+			instcombine<max-iterations=1000;no-use-loop-info>,
+			jump-threading,
+			correlated-propagation,
+			adce,
+			memcpyopt,
+			dse,
+			move-auto-init,
+			loop-mssa(
+				licm<allowspeculation>
+			),
+			simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;hoist-common-insts;sink-common-insts;speculate-blocks;simplify-cond-branch>,
+			instcombine<max-iterations=1000;no-use-loop-info>
+		),
+		function-attrs,
+		function(
+			require<should-not-run-function-passes>
+		)
+	)
+),
+deadargelim,
+globalopt,
+globaldce,
+elim-avail-extern,
+rpo-function-attrs,
+recompute-globalsaa,
+function<eager-inv>(
+	float2int,
+	lower-constant-intrinsics,
+	loop(
+		loop-rotate<header-duplication;no-prepare-for-lto>,
+		loop-deletion
+	),
+	loop-distribute,
+	inject-tli-mappings,
+	loop-vectorize<no-interleave-forced-only;no-vectorize-forced-only;>,
+	loop-load-elim,
+	instcombine<max-iterations=1000;no-use-loop-info>,
+	simplifycfg<bonus-inst-threshold=1;forward-switch-cond;switch-range-to-icmp;switch-to-lookup;no-keep-loops;hoist-common-insts;sink-common-insts;speculate-blocks;simplify-cond-branch>,
+	slp-vectorizer,
+	vector-combine,
+	instcombine<max-iterations=1000;no-use-loop-info>,
+	loop-unroll<O2>,
+	transform-warning,
+	instcombine<max-iterations=1000;no-use-loop-info>,
+	loop-mssa(
+		licm<allowspeculation>
+	),
+	alignment-from-assumptions,
+	loop-sink,
+	instsimplify,
+	div-rem-pairs,
+	tailcallelim,
+	simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-sink-common-insts;speculate-blocks;simplify-cond-branch>
+),
+globaldce,
+constmerge,
+cg-profile,
+rel-lookup-table-converter,
+function(
+	annotation-remarks
+),
+verify
+)");
 		break;
 		break;
+// default<O2>
+// Passes removed: coro, openmp, sroa
 	case 2:
 	case 2:
-		array_add(&passes, "default<O2>");
+		array_add(&passes, u8R"(
+annotation2metadata,
+forceattrs,
+inferattrs,
+function<eager-inv>(
+	lower-expect,
+	simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;no-switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-sink-common-insts;speculate-blocks;simplify-cond-branch>,
+	early-cse<>
+),
+ipsccp,
+called-value-propagation,
+globalopt,
+function<eager-inv>(
+	mem2reg,
+	instcombine<max-iterations=1000;no-use-loop-info>,
+	simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-sink-common-insts;speculate-blocks;simplify-cond-branch>
+),
+require<globals-aa>,
+function(
+	invalidate<aa>
+),
+require<profile-summary>,
+cgscc(
+	devirt<4>(
+		inline<only-mandatory>,
+		inline,
+		function-attrs<skip-non-recursive>,
+		function<eager-inv;no-rerun>(
+			early-cse<memssa>,
+			speculative-execution,
+			jump-threading,
+			correlated-propagation,
+			simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-sink-common-insts;speculate-blocks;simplify-cond-branch>,
+			instcombine<max-iterations=1000;no-use-loop-info>,
+			aggressive-instcombine,
+			constraint-elimination,
+			libcalls-shrinkwrap,
+			tailcallelim,
+			simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-sink-common-insts;speculate-blocks;simplify-cond-branch>,
+			reassociate,
+			loop-mssa(
+				loop-instsimplify,
+				loop-simplifycfg,
+				licm<no-allowspeculation>,
+				loop-rotate<header-duplication;no-prepare-for-lto>,
+				licm<allowspeculation>,
+				simple-loop-unswitch<no-nontrivial;trivial>
+			),
+			simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-sink-common-insts;speculate-blocks;simplify-cond-branch>,
+			instcombine<max-iterations=1000;no-use-loop-info>,
+			loop(
+				loop-idiom,
+				indvars,
+				loop-deletion,
+				loop-unroll-full
+			),
+			vector-combine,
+			mldst-motion<no-split-footer-bb>,
+			gvn<>,
+			sccp,
+			bdce,
+			instcombine<max-iterations=1000;no-use-loop-info>,
+			jump-threading,
+			correlated-propagation,
+			adce,
+			memcpyopt,
+			dse,
+			move-auto-init,
+			loop-mssa(
+				licm<allowspeculation>
+			),
+			simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;hoist-common-insts;sink-common-insts;speculate-blocks;simplify-cond-branch>,
+			instcombine<max-iterations=1000;no-use-loop-info>
+		),
+		function-attrs,
+		function(
+			require<should-not-run-function-passes>
+		)
+	)
+),
+deadargelim,
+globalopt,
+globaldce,
+elim-avail-extern,
+rpo-function-attrs,
+recompute-globalsaa,
+function<eager-inv>(
+	float2int,
+	lower-constant-intrinsics,
+	loop(
+		loop-rotate<header-duplication;no-prepare-for-lto>,
+		loop-deletion
+	),
+	loop-distribute,
+	inject-tli-mappings,
+	loop-vectorize<no-interleave-forced-only;no-vectorize-forced-only;>,
+	loop-load-elim,
+	instcombine<max-iterations=1000;no-use-loop-info>,
+	simplifycfg<bonus-inst-threshold=1;forward-switch-cond;switch-range-to-icmp;switch-to-lookup;no-keep-loops;hoist-common-insts;sink-common-insts;speculate-blocks;simplify-cond-branch>,
+	slp-vectorizer,
+	vector-combine,
+	instcombine<max-iterations=1000;no-use-loop-info>,
+	loop-unroll<O2>,
+	transform-warning,
+	instcombine<max-iterations=1000;no-use-loop-info>,
+	loop-mssa(
+		licm<allowspeculation>
+	),
+	alignment-from-assumptions,
+	loop-sink,
+	instsimplify,
+	div-rem-pairs,
+	tailcallelim,
+	simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-sink-common-insts;speculate-blocks;simplify-cond-branch>
+),
+globaldce,
+constmerge,
+cg-profile,
+rel-lookup-table-converter,
+function(
+	annotation-remarks
+),
+verify
+)");
 		break;
 		break;
+
 	case 3:
 	case 3:
-		array_add(&passes, "default<O3>");
+// default<O3>
+// Passes removed: coro, openmp, sroa
+		array_add(&passes, u8R"(
+annotation2metadata,
+forceattrs,
+inferattrs,
+function<eager-inv>(
+	lower-expect,
+	simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;no-switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-sink-common-insts;speculate-blocks;simplify-cond-branch>,
+	early-cse<>,
+	callsite-splitting
+),
+ipsccp,
+called-value-propagation,
+globalopt,
+function<eager-inv>(
+	mem2reg,
+	instcombine<max-iterations=1000;no-use-loop-info>,
+	simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-sink-common-insts;speculate-blocks;simplify-cond-branch>
+),
+require<globals-aa>,
+function(
+	invalidate<aa>
+),
+require<profile-summary>,
+cgscc(
+	devirt<4>(
+		inline<only-mandatory>,
+		inline,
+		function-attrs<skip-non-recursive>,
+		argpromotion,
+		function<eager-inv;no-rerun>(
+			early-cse<memssa>,
+			speculative-execution,
+			jump-threading,
+			correlated-propagation,
+			simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-sink-common-insts;speculate-blocks;simplify-cond-branch>,
+			instcombine<max-iterations=1000;no-use-loop-info>,
+			aggressive-instcombine,
+			constraint-elimination,
+			libcalls-shrinkwrap,
+			tailcallelim,
+			simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-sink-common-insts;speculate-blocks;simplify-cond-branch>,
+			reassociate,
+			loop-mssa(
+				loop-instsimplify,
+				loop-simplifycfg,
+				licm<no-allowspeculation>,
+				loop-rotate<header-duplication;no-prepare-for-lto>,
+				licm<allowspeculation>,
+				simple-loop-unswitch<nontrivial;trivial>
+			),
+			simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-sink-common-insts;speculate-blocks;simplify-cond-branch>,
+			instcombine<max-iterations=1000;no-use-loop-info>,
+			loop(
+				loop-idiom,
+				indvars,
+				loop-deletion,
+				loop-unroll-full
+			),
+			vector-combine,
+			mldst-motion<no-split-footer-bb>,
+			gvn<>,
+			sccp,
+			bdce,
+			instcombine<max-iterations=1000;no-use-loop-info>,
+			jump-threading,
+			correlated-propagation,
+			adce,
+			memcpyopt,
+			dse,
+			move-auto-init,
+			loop-mssa(
+				licm<allowspeculation>
+			),
+			simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;hoist-common-insts;sink-common-insts;speculate-blocks;simplify-cond-branch>,
+			instcombine<max-iterations=1000;no-use-loop-info>
+		),
+		function-attrs,
+		function(
+			require<should-not-run-function-passes>
+		),
+	)
+),
+deadargelim,
+globalopt,
+globaldce,
+elim-avail-extern,
+rpo-function-attrs,
+recompute-globalsaa,
+function<eager-inv>(
+	float2int,
+	lower-constant-intrinsics,
+	chr,
+	loop(
+		loop-rotate<header-duplication;no-prepare-for-lto>,
+		loop-deletion
+	),
+	loop-distribute,
+	inject-tli-mappings,
+	loop-vectorize<no-interleave-forced-only;no-vectorize-forced-only;>,
+	loop-load-elim,
+	instcombine<max-iterations=1000;no-use-loop-info>,
+	simplifycfg<bonus-inst-threshold=1;forward-switch-cond;switch-range-to-icmp;switch-to-lookup;no-keep-loops;hoist-common-insts;sink-common-insts;speculate-blocks;simplify-cond-branch>,
+	slp-vectorizer,
+	vector-combine,
+	instcombine<max-iterations=1000;no-use-loop-info>,
+	loop-unroll<O3>,
+	transform-warning,
+	instcombine<max-iterations=1000;no-use-loop-info>,
+	loop-mssa(
+		licm<allowspeculation>
+	),
+	alignment-from-assumptions,
+	loop-sink,
+	instsimplify,
+	div-rem-pairs,
+	tailcallelim,
+	simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-sink-common-insts;speculate-blocks;simplify-cond-branch>
+),
+globaldce,
+constmerge,
+cg-profile,
+rel-lookup-table-converter,
+function(
+	annotation-remarks
+),
+verify
+)");
 		break;
 		break;
 	}
 	}
 
 
@@ -1528,6 +1905,19 @@ gb_internal WORKER_TASK_PROC(lb_llvm_module_pass_worker_proc) {
 		}
 		}
 		passes_str = gb_string_appendc(passes_str, passes[i]);
 		passes_str = gb_string_appendc(passes_str, passes[i]);
 	}
 	}
+	for (isize i = 0; i < gb_string_length(passes_str); /**/) {
+		switch (passes_str[i]) {
+		case ' ':
+		case '\n':
+		case '\t':
+			gb_memmove(&passes_str[i], &passes_str[i+1], gb_string_length(passes_str)-i);
+			GB_STRING_HEADER(passes_str)->length -= 1;
+			continue;
+		default:
+			i += 1;
+			break;
+		}
+	}
 
 
 	LLVMErrorRef llvm_err = LLVMRunPasses(wd->m->mod, passes_str, wd->target_machine, pb_options);
 	LLVMErrorRef llvm_err = LLVMRunPasses(wd->m->mod, passes_str, wd->target_machine, pb_options);
 
 

+ 5 - 1
src/llvm_backend_general.cpp

@@ -962,8 +962,12 @@ gb_internal bool lb_is_type_proc_recursive(Type *t) {
 
 
 gb_internal void lb_emit_store(lbProcedure *p, lbValue ptr, lbValue value) {
 gb_internal void lb_emit_store(lbProcedure *p, lbValue ptr, lbValue value) {
 	GB_ASSERT(value.value != nullptr);
 	GB_ASSERT(value.value != nullptr);
-	Type *a = type_deref(ptr.type);
 
 
+	if (LLVMIsUndef(value.value)) {
+		return;
+	}
+
+	Type *a = type_deref(ptr.type);
 	if (LLVMIsNull(value.value)) {
 	if (LLVMIsNull(value.value)) {
 		LLVMTypeRef src_t = llvm_addr_type(p->module, ptr);
 		LLVMTypeRef src_t = llvm_addr_type(p->module, ptr);
 		if (is_type_proc(a)) {
 		if (is_type_proc(a)) {

+ 5 - 2
src/llvm_backend_opt.cpp

@@ -322,7 +322,11 @@ gb_internal void lb_run_remove_dead_instruction_pass(lbProcedure *p) {
 				// NOTE(bill): Explicit instructions are set here because some instructions could have side effects
 				// NOTE(bill): Explicit instructions are set here because some instructions could have side effects
 				switch (LLVMGetInstructionOpcode(curr_instr)) {
 				switch (LLVMGetInstructionOpcode(curr_instr)) {
 				// case LLVMAlloca:
 				// case LLVMAlloca:
-
+				case LLVMLoad:
+					if (LLVMGetVolatile(curr_instr)) {
+						break;
+					}
+					/*fallthrough*/
 				case LLVMFNeg:
 				case LLVMFNeg:
 				case LLVMAdd:
 				case LLVMAdd:
 				case LLVMFAdd:
 				case LLVMFAdd:
@@ -342,7 +346,6 @@ gb_internal void lb_run_remove_dead_instruction_pass(lbProcedure *p) {
 				case LLVMAnd:
 				case LLVMAnd:
 				case LLVMOr:
 				case LLVMOr:
 				case LLVMXor:
 				case LLVMXor:
-				case LLVMLoad:
 				case LLVMGetElementPtr:
 				case LLVMGetElementPtr:
 				case LLVMTrunc:
 				case LLVMTrunc:
 				case LLVMZExt:
 				case LLVMZExt:

+ 10 - 10
src/tilde_stmt.cpp

@@ -1032,22 +1032,22 @@ gb_internal void cg_build_assignment(cgProcedure *p, Array<cgAddr> const &lvals,
 			continue;
 			continue;
 		}
 		}
 
 
-	    	Type *type = cg_addr_type(lval);
+		Type *type = cg_addr_type(lval);
 		if (!cg_addr_is_empty(lval)) {
 		if (!cg_addr_is_empty(lval)) {
 			GB_ASSERT_MSG(are_types_identical(init.type, type), "%s = %s", type_to_string(init.type), type_to_string(type));
 			GB_ASSERT_MSG(are_types_identical(init.type, type), "%s = %s", type_to_string(init.type), type_to_string(type));
 		}
 		}
 
 
 		if (init.kind == cgValue_Addr &&
 		if (init.kind == cgValue_Addr &&
 		    !cg_addr_is_empty(lval)) {
 		    !cg_addr_is_empty(lval)) {
-		    	// NOTE(bill): This is needed for certain constructs such as this:
-		    	// a, b = b, a
-		    	// NOTE(bill): This is a bodge and not necessarily a good way of doing things whatsoever
-		    	TB_CharUnits size  = cast(TB_CharUnits)type_size_of(type);
-		    	TB_CharUnits align = cast(TB_CharUnits)type_align_of(type);
-		    	TB_Node *copy = tb_inst_local(p->func, size, align);
-		    	tb_inst_memcpy(p->func, copy, init.node, tb_inst_uint(p->func, TB_TYPE_INT, size), align);
-		    	// use the copy instead
-		    	init.node = copy;
+			// NOTE(bill): This is needed for certain constructs such as this:
+			// a, b = b, a
+			// NOTE(bill): This is a bodge and not necessarily a good way of doing things whatsoever
+			TB_CharUnits size  = cast(TB_CharUnits)type_size_of(type);
+			TB_CharUnits align = cast(TB_CharUnits)type_align_of(type);
+			TB_Node *copy = tb_inst_local(p->func, size, align);
+			tb_inst_memcpy(p->func, copy, init.node, tb_inst_uint(p->func, TB_TYPE_INT, size), align);
+			// use the copy instead
+			init.node = copy;
 		}
 		}
 		inits[i] = init;
 		inits[i] = init;
 	}
 	}