Browse Source

Merge pull request #4893 from Barinzaya/for-bitset-ctz

Use CTZ in `for in bit_set` codegen
gingerBill 5 months ago
parent
commit
8214892b5b
1 changed files with 30 additions and 49 deletions
  1. 30 49
      src/llvm_backend_stmt.cpp

+ 30 - 49
src/llvm_backend_stmt.cpp

@@ -1114,62 +1114,43 @@ gb_internal void lb_build_range_stmt(lbProcedure *p, AstRangeStmt *rs, Scope *sc
 			}
 
 			Type *elem = et->BitSet.elem;
-			if (is_type_enum(elem)) {
-				i64 enum_count = 0;
-				lbValue values      = lb_enum_values_slice(p, elem, &enum_count);
-				lbValue values_data = lb_slice_elem(p, values);
-				lbValue max_count   = lb_const_int(m, t_int, enum_count);
-
-				lbAddr offset_ = lb_add_local_generated(p, t_int, false);
-				lb_addr_store(p, offset_, lb_const_int(m, t_int, 0));
-
-				loop = lb_create_block(p, "for.bit_set.enum.loop");
-				lb_emit_jump(p, loop);
-				lb_start_block(p, loop);
-
-				lbBlock *body_check = lb_create_block(p, "for.bit_set.enum.body-check");
-				lbBlock *body = lb_create_block(p, "for.bit_set.enum.body");
-				done = lb_create_block(p, "for.bit_set.enum.done");
-
-				lbValue offset = lb_addr_load(p, offset_);
-				lbValue cond = lb_emit_comp(p, Token_Lt, offset, max_count);
-				lb_emit_if(p, cond, body_check, done);
-				lb_start_block(p, body_check);
-
-				lbValue val_ptr = lb_emit_ptr_offset(p, values_data, offset);
-				lb_emit_increment(p, offset_.addr);
-				val = lb_emit_load(p, val_ptr);
-				val = lb_emit_conv(p, val, elem);
-
-				lbValue check = lb_build_binary_in(p, val, the_set, Token_in);
-				lb_emit_if(p, check, body, loop);
-				lb_start_block(p, body);
-			} else {
-				lbAddr offset_ = lb_add_local_generated(p, t_int, false);
-				lb_addr_store(p, offset_, lb_const_int(m, t_int, et->BitSet.lower));
+			Type *mask = bit_set_to_int(et);
+
+			lbValue all_mask = lb_const_value(p->module, mask, exact_bit_set_all_set_mask(et));
+			lbValue initial_mask = lb_emit_arith(p, Token_And, the_set, all_mask, mask);
+
+			if (rs->reverse) {
+				initial_mask = lb_emit_reverse_bits(p, initial_mask, mask);
+			}
 
-				lbValue max_count = lb_const_int(m, t_int, et->BitSet.upper);
+			lbAddr remaining = lb_add_local_generated(p, mask, false);
+			lb_addr_store(p, remaining, initial_mask);
 
-				loop = lb_create_block(p, "for.bit_set.range.loop");
-				lb_emit_jump(p, loop);
-				lb_start_block(p, loop);
+			loop = lb_create_block(p, "for.bit_set.loop");
+			lbBlock *body = lb_create_block(p, "for.bit_set.body");
+			done = lb_create_block(p, "for.bit_set.done");
 
-				lbBlock *body_check = lb_create_block(p, "for.bit_set.range.body-check");
-				lbBlock *body = lb_create_block(p, "for.bit_set.range.body");
-				done = lb_create_block(p, "for.bit_set.range.done");
+			lb_emit_jump(p, loop);
+			lb_start_block(p, loop);
 
-				lbValue offset = lb_addr_load(p, offset_);
-				lbValue cond = lb_emit_comp(p, Token_LtEq, offset, max_count);
-				lb_emit_if(p, cond, body_check, done);
-				lb_start_block(p, body_check);
+			lbValue remaining_val = lb_addr_load(p, remaining);
+			lbValue cond = lb_emit_comp(p, Token_NotEq, remaining_val, lb_zero(m, mask));
+			lb_emit_if(p, cond, body, done);
 
-				val = lb_emit_conv(p, offset, elem);
-				lb_emit_increment(p, offset_.addr);
+			lb_start_block(p, body);
+			val = lb_emit_count_trailing_zeros(p, remaining_val, mask);
+			val = lb_emit_conv(p, val, elem);
 
-				lbValue check = lb_build_binary_in(p, val, the_set, Token_in);
-				lb_emit_if(p, check, body, loop);
-				lb_start_block(p, body);
+			if (rs->reverse) {
+				val = lb_emit_arith(p, Token_Sub, lb_const_int(m, elem, et->BitSet.lower + 8*type_size_of(mask) - 1), val, elem);
+			} else {
+				val = lb_emit_arith(p, Token_Add, val, lb_const_int(m, elem, et->BitSet.lower), elem);
 			}
+
+			lbValue reduce_val = lb_emit_arith(p, Token_Sub, remaining_val, lb_const_int(m, mask, 1), mask);
+			remaining_val = lb_emit_arith(p, Token_And, remaining_val, reduce_val, mask);
+			lb_addr_store(p, remaining, remaining_val);
+
 			break;
 		}
 		default: