Browse Source

Merge pull request #5765 from odin-lang/bill/new-slice-sort

New `slice.sort` implementation
gingerBill 4 days ago
parent
commit
cadac43f14
5 changed files with 373 additions and 352 deletions
  1. 5 5
      core/slice/ptr.odin
  2. 210 33
      core/slice/sort.odin
  3. 152 314
      core/slice/sort_private.odin
  4. 4 0
      src/check_builtin.cpp
  5. 2 0
      src/checker_builtin_procs.hpp

+ 5 - 5
core/slice/ptr.odin

@@ -3,14 +3,14 @@ package slice
 import "base:builtin"
 import "base:builtin"
 import "base:runtime"
 import "base:runtime"
 
 
-ptr_add :: proc(p: $P/^$T, x: int) -> ^T {
+ptr_add :: proc "contextless" (p: $P/^$T, x: int) -> ^T {
 	return ([^]T)(p)[x:]
 	return ([^]T)(p)[x:]
 }
 }
-ptr_sub :: proc(p: $P/^$T, x: int) -> ^T {
+ptr_sub :: proc "contextless" (p: $P/^$T, x: int) -> ^T {
 	return ([^]T)(p)[-x:]
 	return ([^]T)(p)[-x:]
 }
 }
 
 
-ptr_swap_non_overlapping :: proc(x, y: rawptr, len: int) {
+ptr_swap_non_overlapping :: proc "contextless" (x, y: rawptr, len: int) {
 	if len <= 0 {
 	if len <= 0 {
 		return
 		return
 	}
 	}
@@ -44,7 +44,7 @@ ptr_swap_non_overlapping :: proc(x, y: rawptr, len: int) {
 	}
 	}
 }
 }
 
 
-ptr_swap_overlapping :: proc(x, y: rawptr, len: int) {
+ptr_swap_overlapping :: proc "contextless" (x, y: rawptr, len: int) {
 	if len <= 0 {
 	if len <= 0 {
 		return
 		return
 	}
 	}
@@ -68,7 +68,7 @@ ptr_swap_overlapping :: proc(x, y: rawptr, len: int) {
 }
 }
 
 
 
 
-ptr_rotate :: proc(left: int, mid: ^$T, right: int) {
+ptr_rotate :: proc "contextless" (left: int, mid: ^$T, right: int) {
 	when size_of(T) != 0 {
 	when size_of(T) != 0 {
 		left, mid, right := left, mid, right
 		left, mid, right := left, mid, right
 
 

+ 210 - 33
core/slice/sort.odin

@@ -6,6 +6,8 @@ Ordering :: enum {
 	Greater = +1,
 	Greater = +1,
 }
 }
 
 
+Generic_Cmp :: #type proc(lhs, rhs: rawptr, user_data: rawptr) -> Ordering
+
 @(require_results)
 @(require_results)
 cmp :: proc(a, b: $E) -> Ordering where ORD(E) {
 cmp :: proc(a, b: $E) -> Ordering where ORD(E) {
 	switch {
 	switch {
@@ -35,7 +37,16 @@ cmp_proc :: proc($E: typeid) -> (proc(E, E) -> Ordering) where ORD(E) {
 sort :: proc(data: $T/[]$E) where ORD(E) {
 sort :: proc(data: $T/[]$E) where ORD(E) {
 	when size_of(E) != 0 {
 	when size_of(E) != 0 {
 		if n := len(data); n > 1 {
 		if n := len(data); n > 1 {
-			_quick_sort_general(data, 0, n, _max_depth(n), struct{}{}, .Ordered)
+			raw := ([^]byte)(raw_data(data))
+			_smoothsort(raw, uint(len(data)), size_of(E), proc(lhs, rhs: rawptr, user_data: rawptr) -> Ordering {
+				x, y := (^E)(lhs)^, (^E)(rhs)^
+				if x < y {
+					return .Less
+				} else if x > y {
+					return .Greater
+				}
+				return .Equal
+			}, nil)
 		}
 		}
 	}
 	}
 }
 }
@@ -70,6 +81,23 @@ sort_by_indices_overwrite :: proc(data: $T/[]$E, indices: []int) {
 	swap_with_slice(data, temp)
 	swap_with_slice(data, temp)
 }
 }
 
 
+sort_from_permutation_indices :: proc(data: $T/[]$E, indices: []int) {
+	assert(len(data) == len(indices))
+	if len(indices) <= 1 {
+		return
+	}
+
+	for i in 0..<len(indices) {
+		index_to_swap := indices[i]
+
+		for index_to_swap < i {
+			index_to_swap = indices[index_to_swap]
+		}
+
+		ptr_swap_non_overlapping(&data[i], &data[index_to_swap], size_of(E))
+	}
+}
+
 // sort sorts a slice and returns a slice of the original indices
 // sort sorts a slice and returns a slice of the original indices
 // This sort is not guaranteed to be stable
 // This sort is not guaranteed to be stable
 sort_with_indices :: proc(data: $T/[]$E, allocator := context.allocator) -> (indices: []int) where ORD(E) {
 sort_with_indices :: proc(data: $T/[]$E, allocator := context.allocator) -> (indices: []int) where ORD(E) {
@@ -79,7 +107,22 @@ sort_with_indices :: proc(data: $T/[]$E, allocator := context.allocator) -> (ind
 			for _, idx in indices {
 			for _, idx in indices {
 				indices[idx] = idx
 				indices[idx] = idx
 			}
 			}
-			_quick_sort_general_with_indices(data, indices, 0, n, _max_depth(n), struct{}{}, .Ordered)
+
+			raw := ([^]byte)(raw_data(indices))
+			_smoothsort(raw, uint(len(indices)), size_of(int), proc(lhs, rhs: rawptr, user_data: rawptr) -> Ordering {
+				data := ([^]E)(user_data)
+
+				xi, yi := (^int)(lhs)^, (^int)(rhs)^
+				#no_bounds_check x, y := data[xi], data[yi]
+				if x < y {
+					return .Less
+				} else if x > y {
+					return .Greater
+				}
+				return .Equal
+			}, raw_data(data))
+
+			sort_from_permutation_indices(data, indices)
 		}
 		}
 		return indices
 		return indices
 	}
 	}
@@ -91,7 +134,39 @@ sort_with_indices :: proc(data: $T/[]$E, allocator := context.allocator) -> (ind
 sort_by :: proc(data: $T/[]$E, less: proc(i, j: E) -> bool) {
 sort_by :: proc(data: $T/[]$E, less: proc(i, j: E) -> bool) {
 	when size_of(E) != 0 {
 	when size_of(E) != 0 {
 		if n := len(data); n > 1 {
 		if n := len(data); n > 1 {
-			_quick_sort_general(data, 0, n, _max_depth(n), less, .Less)
+			raw := ([^]byte)(raw_data(data))
+			_smoothsort(raw, uint(len(data)), size_of(E), proc(lhs, rhs: rawptr, user_data: rawptr) -> Ordering {
+				x, y := (^E)(lhs)^, (^E)(rhs)^
+				less := (proc(E, E) -> bool)(user_data)
+				switch {
+				case less(x, y): return .Less
+				case less(y, x): return .Greater
+				}
+				return .Equal
+			}, rawptr(less))
+		}
+	}
+}
+
+sort_by_with_data :: proc(data: $T/[]$E, less: proc(i, j: E, user_data: rawptr) -> bool, user_data: rawptr) {
+	when size_of(E) != 0 {
+		if n := len(data); n > 1 {
+			Context :: struct {
+				less:      proc(i, j: E, user_data: rawptr) -> bool,
+				user_data: rawptr,
+			}
+			ctx := &Context{less, user_data}
+
+			raw := ([^]byte)(raw_data(data))
+			_smoothsort(raw, uint(len(data)), size_of(E), proc(lhs, rhs: rawptr, user_data: rawptr) -> Ordering {
+				x, y := (^E)(lhs)^, (^E)(rhs)^
+				ctx := (^Context)(user_data)
+				switch {
+				case ctx.less(x, y, ctx.user_data): return .Less
+				case ctx.less(y, x, ctx.user_data): return .Greater
+				}
+				return .Equal
+			}, ctx)
 		}
 		}
 	}
 	}
 }
 }
@@ -105,8 +180,59 @@ sort_by_with_indices :: proc(data: $T/[]$E, less: proc(i, j: E) -> bool, allocat
 			for _, idx in indices {
 			for _, idx in indices {
 				indices[idx] = idx
 				indices[idx] = idx
 			}
 			}
-			_quick_sort_general_with_indices(data, indices, 0, n, _max_depth(n), less, .Less)
-			return indices
+
+			Context :: struct{
+				less: proc(i, j: E) -> bool,
+				data: T,
+			}
+			ctx := &Context{less, data}
+
+			raw := ([^]byte)(raw_data(indices))
+			_smoothsort(raw, uint(len(indices)), size_of(int), proc(lhs, rhs: rawptr, user_data: rawptr) -> Ordering {
+				ctx := (^Context)(user_data)
+				xi, yi := (^int)(lhs)^, (^int)(rhs)^
+				x, y := ctx.data[xi], ctx.data[yi]
+				switch {
+				case ctx.less(x, y): return .Less
+				case ctx.less(y, x): return .Greater
+				}
+				return .Equal
+			}, ctx)
+
+			sort_from_permutation_indices(data, indices)
+		}
+	}
+	return indices
+}
+
+sort_by_with_indices_with_data :: proc(data: $T/[]$E, less: proc(i, j: E, user_data: rawptr) -> bool, user_data: rawptr, allocator := context.allocator) -> (indices : []int) {
+	indices = make([]int, len(data), allocator)
+	when size_of(E) != 0 {
+		if n := len(data); n > 1 {
+			for _, idx in indices {
+				indices[idx] = idx
+			}
+
+			Context :: struct{
+				less: proc(i, j: E, user_data: rawptr) -> bool,
+				data: T,
+				user_data: rawptr,
+			}
+			ctx := &Context{less, data, user_data}
+
+			raw := ([^]byte)(raw_data(indices))
+			_smoothsort(raw, uint(len(indices)), size_of(int), proc(lhs, rhs: rawptr, user_data: rawptr) -> Ordering {
+				ctx := (^Context)(user_data)
+				xi, yi := (^int)(lhs)^, (^int)(rhs)^
+				x, y := ctx.data[xi], ctx.data[yi]
+				switch {
+				case ctx.less(x, y, ctx.user_data): return .Less
+				case ctx.less(y, x, ctx.user_data): return .Greater
+				}
+				return .Equal
+			}, ctx)
+
+			sort_from_permutation_indices(data, indices)
 		}
 		}
 	}
 	}
 	return indices
 	return indices
@@ -115,11 +241,47 @@ sort_by_with_indices :: proc(data: $T/[]$E, less: proc(i, j: E) -> bool, allocat
 sort_by_cmp :: proc(data: $T/[]$E, cmp: proc(i, j: E) -> Ordering) {
 sort_by_cmp :: proc(data: $T/[]$E, cmp: proc(i, j: E) -> Ordering) {
 	when size_of(E) != 0 {
 	when size_of(E) != 0 {
 		if n := len(data); n > 1 {
 		if n := len(data); n > 1 {
-			_quick_sort_general(data, 0, n, _max_depth(n), cmp, .Cmp)
+			raw := ([^]byte)(raw_data(data))
+			_smoothsort(raw, uint(len(data)), size_of(E), proc(lhs, rhs: rawptr, user_data: rawptr) -> Ordering {
+				x, y := (^E)(lhs)^, (^E)(rhs)^
+				cmp := cast(proc(E, E) -> Ordering)(user_data)
+				return cmp(x, y)
+			}, rawptr(cmp))
 		}
 		}
 	}
 	}
 }
 }
 
 
+
+sort_by_cmp_with_data :: proc(data: $T/[]$E, cmp: proc(i, j: E, user_data: rawptr) -> Ordering, user_data: rawptr) {
+	when size_of(E) != 0 {
+		if n := len(data); n > 1 {
+			Context :: struct{
+				cmp: proc(i, j: E, user_data: rawptr) -> Ordering,
+				user_data: rawptr,
+			}
+			ctx := &Context{cmp, user_data}
+
+			raw := ([^]byte)(raw_data(data))
+			_smoothsort(raw, uint(len(data)), size_of(E), proc(lhs, rhs: rawptr, user_data: rawptr) -> Ordering {
+				x, y := (^E)(lhs)^, (^E)(rhs)^
+				ctx := (^Context)(user_data)
+				return ctx.cmp(x, y, ctx.user_data)
+			}, ctx)
+		}
+	}
+}
+
+
+sort_by_generic_cmp :: proc(data: $T/[]$E, cmp: Generic_Cmp, user_data: rawptr) {
+	when size_of(E) != 0 {
+		if n := len(data); n > 1 {
+			raw := ([^]byte)(raw_data(data))
+			_smoothsort(raw, uint(len(data)), size_of(E), cmp, user_data)
+		}
+	}
+}
+
+
 // stable_sort sorts a slice
 // stable_sort sorts a slice
 stable_sort :: proc(data: $T/[]$E) where ORD(E) {
 stable_sort :: proc(data: $T/[]$E) where ORD(E) {
 	when size_of(E) != 0 {
 	when size_of(E) != 0 {
@@ -188,37 +350,60 @@ reverse_sort :: proc(data: $T/[]$E) where ORD(E) {
 
 
 
 
 reverse_sort_by :: proc(data: $T/[]$E, less: proc(i, j: E) -> bool) {
 reverse_sort_by :: proc(data: $T/[]$E, less: proc(i, j: E) -> bool) {
-	context._internal = rawptr(less)
-	sort_by(data, proc(i, j: E) -> bool {
-		k := (proc(i, j: E) -> bool)(context._internal)
-		return k(j, i)
-	})
+	sort_by_with_data(data, proc(i, j: E, user_data: rawptr) -> bool {
+		less := (proc(E, E) -> bool)(user_data)
+		return less(j, i)
+	}, rawptr(less))
 }
 }
 
 
 reverse_sort_by_cmp :: proc(data: $T/[]$E, cmp: proc(i, j: E) -> Ordering) {
 reverse_sort_by_cmp :: proc(data: $T/[]$E, cmp: proc(i, j: E) -> Ordering) {
 	context._internal = rawptr(cmp)
 	context._internal = rawptr(cmp)
-	sort_by_cmp(data, proc(i, j: E) -> Ordering {
-		k := (proc(i, j: E) -> Ordering)(context._internal)
+	sort_by_cmp_with_data(data, proc(i, j: E, user_data: rawptr) -> Ordering {
+		k := (proc(i, j: E) -> Ordering)(user_data)
 		return k(j, i)
 		return k(j, i)
-	})
+	}, rawptr(data))
 }
 }
 
 
 
 
 // TODO(bill): Should `sort_by_key` exist or is `sort_by` more than enough?
 // TODO(bill): Should `sort_by_key` exist or is `sort_by` more than enough?
 sort_by_key :: proc(data: $T/[]$E, key: proc(E) -> $K) where ORD(K) {
 sort_by_key :: proc(data: $T/[]$E, key: proc(E) -> $K) where ORD(K) {
-	context._internal = rawptr(key)
-	sort_by(data, proc(i, j: E) -> bool {
-		k := (proc(E) -> K)(context._internal)
-		return k(i) < k(j)
-	})
+	Context :: struct {
+		key: proc(E) -> K,
+	}
+	ctx := &Context{key}
+
+	sort_by_generic_cmp(data, proc(lhs, rhs: rawptr, user_data: rawptr) -> Ordering {
+		i, j := (^E)(lhs)^, (^E)(rhs)^
+
+		ctx := (^Context)(user_data)
+		a := ctx.key(i)
+		b := ctx.key(j)
+		switch {
+		case a < b: return .Less
+		case a > b: return .Greater
+		}
+		return .Equal
+	}, ctx)
 }
 }
 
 
 reverse_sort_by_key :: proc(data: $T/[]$E, key: proc(E) -> $K) where ORD(K) {
 reverse_sort_by_key :: proc(data: $T/[]$E, key: proc(E) -> $K) where ORD(K) {
-	context._internal = rawptr(key)
-	sort_by(data, proc(i, j: E) -> bool {
-		k := (proc(E) -> K)(context._internal)
-		return k(j) < k(i)
-	})
+	Context :: struct {
+		key: proc(E) -> K,
+	}
+	ctx := &Context{key}
+
+	sort_by_generic_cmp(data, proc(lhs, rhs: rawptr, user_data: rawptr) -> Ordering {
+		i, j := (^E)(lhs)^, (^E)(rhs)^
+
+		ctx := (^Context)(user_data)
+		a := ctx.key(i)
+		b := ctx.key(j)
+		switch {
+		case a < b: return .Greater
+		case a > b: return .Less
+		}
+		return .Equal
+	}, ctx)
 }
 }
 
 
 @(require_results)
 @(require_results)
@@ -229,12 +414,4 @@ is_sorted_by_key :: proc(array: $T/[]$E, key: proc(E) -> $K) -> bool where ORD(K
 		}
 		}
 	}
 	}
 	return true
 	return true
-}
-
-@(private, require_results)
-_max_depth :: proc(n: int) -> (depth: int) { // 2*ceil(log2(n+1))
-	for i := n; i > 0; i >>= 1 {
-		depth += 1
-	}
-	return depth * 2
-}
+}

+ 152 - 314
core/slice/sort_private.odin

@@ -1,6 +1,7 @@
 #+private
 #+private
 package slice
 package slice
 
 
+import "base:builtin"
 import "base:intrinsics"
 import "base:intrinsics"
 _ :: intrinsics
 _ :: intrinsics
 
 
@@ -12,171 +13,6 @@ Sort_Kind :: enum {
 	Cmp,
 	Cmp,
 }
 }
 
 
-_quick_sort_general :: proc(data: $T/[]$E, a, b, max_depth: int, call: $P, $KIND: Sort_Kind) where (ORD(E) && KIND == .Ordered) || (KIND != .Ordered) #no_bounds_check {
-	less :: #force_inline proc(a, b: E, call: P) -> bool {
-		when KIND == .Ordered {
-			return a < b
-		} else when KIND == .Less {
-			return call(a, b)
-		} else when KIND == .Cmp {
-			return call(a, b) == .Less
-		} else {
-			#panic("unhandled Sort_Kind")
-		}
-	}
-
-	insertion_sort :: proc(data: $T/[]$E, a, b: int, call: P) #no_bounds_check {
-		for i in a+1..<b {
-			for j := i; j > a && less(data[j], data[j-1], call); j -= 1 {
-				swap(data, j, j-1)
-			}
-		}
-	}
-
-	heap_sort :: proc(data: $T/[]$E, a, b: int, call: P) #no_bounds_check {
-		sift_down :: proc(data: T, lo, hi, first: int, call: P) #no_bounds_check {
-			root := lo
-			for {
-				child := 2*root + 1
-				if child >= hi {
-					break
-				}
-				if child+1 < hi && less(data[first+child], data[first+child+1], call) {
-					child += 1
-				}
-				if !less(data[first+root], data[first+child], call) {
-					return
-				}
-				swap(data, first+root, first+child)
-				root = child
-			}
-		}
-
-
-		first, lo, hi := a, 0, b-a
-
-		for i := (hi-1)/2; i >= 0; i -= 1 {
-			sift_down(data, i, hi, first, call)
-		}
-
-		for i := hi-1; i >= 0; i -= 1 {
-			swap(data, first, first+i)
-			sift_down(data, lo, i, first, call)
-		}
-	}
-
-	median3 :: proc(data: T, m1, m0, m2: int, call: P) #no_bounds_check {
-		if less(data[m1], data[m0], call) {
-			swap(data, m1, m0)
-		}
-		if less(data[m2], data[m1], call) {
-			swap(data, m2, m1)
-			if less(data[m1], data[m0], call) {
-				swap(data, m1, m0)
-			}
-		}
-	}
-
-	do_pivot :: proc(data: T, lo, hi: int, call: P) -> (midlo, midhi: int) #no_bounds_check {
-		m := int(uint(lo+hi)>>1)
-		if hi-lo > 40 {
-			s := (hi-lo)/8
-			median3(data, lo, lo+s, lo+s*2, call)
-			median3(data, m, m-s, m+s, call)
-			median3(data, hi-1, hi-1-s, hi-1-s*2, call)
-		}
-		median3(data, lo, m, hi-1, call)
-
-		pivot := lo
-		a, c := lo+1, hi-1
-
-
-		for ; a < c && less(data[a], data[pivot], call); a += 1 {
-		}
-		b := a
-
-		for {
-			for ; b < c && !less(data[pivot], data[b], call); b += 1 { // data[b] <= pivot
-			}
-			for ; b < c && less(data[pivot], data[c-1], call); c -=1 { // data[c-1] > pivot
-			}
-			if b >= c {
-				break
-			}
-
-			swap(data, b, c-1)
-			b += 1
-			c -= 1
-		}
-
-		protect := hi-c < 5
-		if !protect && hi-c < (hi-lo)/4 {
-			dups := 0
-			if !less(data[pivot], data[hi-1], call) {
-				swap(data, c, hi-1)
-				c += 1
-				dups += 1
-			}
-			if !less(data[b-1], data[pivot], call) {
-				b -= 1
-				dups += 1
-			}
-
-			if !less(data[m], data[pivot], call) {
-				swap(data, m, b-1)
-				b -= 1
-				dups += 1
-			}
-			protect = dups > 1
-		}
-		if protect {
-			for {
-				for ; a < b && !less(data[b-1], data[pivot], call); b -= 1 {
-				}
-				for ; a < b && less(data[a], data[pivot], call); a += 1 {
-				}
-				if a >= b {
-					break
-				}
-				swap(data, a, b-1)
-				a += 1
-				b -= 1
-			}
-		}
-		swap(data, pivot, b-1)
-		return b-1, c
-	}
-
-
-	a, b, max_depth := a, b, max_depth
-
-	for b-a > 12 { // only use shell sort for lengths <= 12
-		if max_depth == 0 {
-			heap_sort(data, a, b, call)
-			return
-		}
-		max_depth -= 1
-		mlo, mhi := do_pivot(data, a, b, call)
-		if mlo-a < b-mhi {
-			_quick_sort_general(data, a, mlo, max_depth, call, KIND)
-			a = mhi
-		} else {
-			_quick_sort_general(data, mhi, b, max_depth, call, KIND)
-			b = mlo
-		}
-	}
-	if b-a > 1 {
-		// Shell short with gap 6
-		for i in a+6..<b {
-			if less(data[i], data[i-6], call) {
-				swap(data, i, i-6)
-			}
-		}
-		insertion_sort(data, a, b, call)
-	}
-}
-
-
 _stable_sort_general :: proc(data: $T/[]$E, call: $P, $KIND: Sort_Kind) where (ORD(E) && KIND == .Ordered) || (KIND != .Ordered) #no_bounds_check {
 _stable_sort_general :: proc(data: $T/[]$E, call: $P, $KIND: Sort_Kind) where (ORD(E) && KIND == .Ordered) || (KIND != .Ordered) #no_bounds_check {
 	less :: #force_inline proc(a, b: E, call: P) -> bool {
 	less :: #force_inline proc(a, b: E, call: P) -> bool {
 		when KIND == .Ordered {
 		when KIND == .Ordered {
@@ -200,179 +36,181 @@ _stable_sort_general :: proc(data: $T/[]$E, call: $P, $KIND: Sort_Kind) where (O
 	}
 	}
 }
 }
 
 
-_quick_sort_general_with_indices :: proc(data: $T/[]$E, indices: []int, a, b, max_depth: int, call: $P, $KIND: Sort_Kind) where (ORD(E) && KIND == .Ordered) || (KIND != .Ordered) #no_bounds_check {
-	less :: #force_inline proc(a, b: E, call: P) -> bool {
-		when KIND == .Ordered {
-			return a < b
-		} else when KIND == .Less {
-			return call(a, b)
-		} else when KIND == .Cmp {
-			return call(a, b) == .Less
-		} else {
-			#panic("unhandled Sort_Kind")
+@(private)
+_smoothsort :: proc(base: [^]byte, nel: uint, width: uint, cmp: Generic_Cmp, arg: rawptr) {
+	pntz :: proc "contextless" (p: [2]uint) -> int {
+		r := intrinsics.count_trailing_zeros(p[0] - 1)
+		if r != 0 {
+			return int(r)
 		}
 		}
-	}
-
-	insertion_sort :: proc(data: $T/[]$E, indices: []int, a, b: int, call: P) #no_bounds_check {
-		for i in a+1..<b {
-			for j := i; j > a && less(data[j], data[j-1], call); j -= 1 {
-				swap(data, j, j-1)
-				swap(indices, j, j-1)
-			}
+		r = (8*size_of(uint) + intrinsics.count_trailing_zeros(p[1]))
+		if r != 8*size_of(uint) {
+			return int(r)
 		}
 		}
+		return 0
 	}
 	}
 
 
-	heap_sort :: proc(data: $T/[]$E, indices: []int, a, b: int, call: P) #no_bounds_check {
-		sift_down :: proc(data: T, indices: []int, lo, hi, first: int, call: P) #no_bounds_check {
-			root := lo
-			for {
-				child := 2*root + 1
-				if child >= hi {
-					break
-				}
-				if child+1 < hi && less(data[first+child], data[first+child+1], call) {
-					child += 1
-				}
-				if !less(data[first+root], data[first+child], call) {
-					return
-				}
-				swap(data, first+root, first+child)
-				swap(indices, first+root, first+child)
-				root = child
-			}
-		}
-
-
-		first, lo, hi := a, 0, b-a
-
-		for i := (hi-1)/2; i >= 0; i -= 1 {
-			sift_down(data, indices, i, hi, first, call)
+	shl :: proc "contextless" (p: []uint, n: int) {
+		n := n
+		if n >= 8*size_of(uint) {
+			n -= 8*size_of(uint)
+			p[1] = p[0]
+			p[0] = 0
 		}
 		}
-
-		for i := hi-1; i >= 0; i -= 1 {
-			swap(data, first, first+i)
-			swap(indices, first, first+i)
-			sift_down(data, indices, lo, i, first, call)
+		p[1] <<= uint(n)
+		p[0] |= p[0] >> uint(8*size_of(uint) - n)
+		p[0] <<= uint(n)
+	}
+	shr :: proc "contextless" (p: []uint, n: int) {
+		n := n
+		if n >= 8*size_of(uint) {
+			n -= 8*size_of(uint)
+			p[0] = p[1]
+			p[1] = 0
 		}
 		}
+		p[0] >>= uint(n)
+		p[0] |= p[1] << uint(8*size_of(uint) - n)
+		p[1] >>= uint(n)
 	}
 	}
 
 
-	median3 :: proc(data: T, indices: []int, m1, m0, m2: int, call: P) #no_bounds_check {
-		if less(data[m1], data[m0], call) {
-			swap(data, m1, m0)
-			swap(indices, m1, m0)
+	cycle :: proc "contextless" (width: uint, data: [][^]byte, n: int) {
+		if len(data) < 2 {
+			return
 		}
 		}
-		if less(data[m2], data[m1], call) {
-			swap(data, m2, m1)
-			swap(indices, m2, m1)
-			if less(data[m1], data[m0], call) {
-				swap(data, m1, m0)
-				swap(indices, m1, m0)
+		buf: [256]u8 = ---
+		data[n] = raw_data(buf[:])
+		width := width
+		for width != 0 {
+			l := builtin.min(size_of(buf), int(width))
+			copy(data[n][:l], data[0][:l])
+			for i in 0..<n {
+				copy(data[i][:l], data[i+1][:l])
+				data[i] = data[i][l:]
+			}
+			width -= uint(l)
+		}
+	}
+
+	sift :: proc(head: [^]byte, width: uint, cmp: Generic_Cmp, arg: rawptr, pshift: int, lp: []uint) {
+		head := head
+		buf: [14*size_of(uint)+1][^]byte = ---
+		buf[0] = head
+		i := 1
+		pshift := pshift
+		for pshift > 1 {
+			rt := head[-width:]
+			lf := head[-width:][-lp[pshift - 2]:]
+			if cmp(buf[0], lf, arg) >= .Equal && cmp(buf[0], rt, arg) >= .Equal {
+				break
 			}
 			}
+			if cmp(lf, rt, arg) >= .Equal {
+				buf[i], head = lf, lf
+				pshift -= 1
+			} else {
+				buf[i], head = rt, rt
+				pshift -= 2
+			}
+			i += 1
 		}
 		}
+		cycle(width, buf[:], i)
 	}
 	}
 
 
-	do_pivot :: proc(data: T, indices: []int, lo, hi: int, call: P) -> (midlo, midhi: int) #no_bounds_check {
-		m := int(uint(lo+hi)>>1)
-		if hi-lo > 40 {
-			s := (hi-lo)/8
-			median3(data, indices, lo, lo+s, lo+s*2, call)
-			median3(data, indices, m, m-s, m+s, call)
-			median3(data, indices, hi-1, hi-1-s, hi-1-s*2, call)
-		}
-		median3(data, indices, lo, m, hi-1, call)
-
-		pivot := lo
-		a, c := lo+1, hi-1
+	trinkle :: proc(head: [^]byte, width: uint, cmp: Generic_Cmp, arg: rawptr, pp: []uint, pshift: int, trusty: bool, lp: []uint) {
+		head := head
 
 
+		p := [2]uint{pp[0], pp[1]}
 
 
-		for ; a < c && less(data[a], data[pivot], call); a += 1 {
-		}
-		b := a
+		buf: [14*size_of(uint)+1][^]byte = ---
+		buf[0] = head
 
 
-		for {
-			for ; b < c && !less(data[pivot], data[b], call); b += 1 { // data[b] <= pivot
-			}
-			for ; b < c && less(data[pivot], data[c-1], call); c -=1 { // data[c-1] > pivot
-			}
-			if b >= c {
+		i := 1
+		trail := 0
+		pshift := pshift
+		trusty := trusty
+		for p[0] != 1 || p[1] != 0 {
+			stepson := head[-lp[pshift]:]
+			if cmp(stepson, buf[0], arg) <= .Equal {
 				break
 				break
 			}
 			}
-
-			swap(data, b, c-1)
-			swap(indices, b, c-1)
-			b += 1
-			c -= 1
-		}
-
-		protect := hi-c < 5
-		if !protect && hi-c < (hi-lo)/4 {
-			dups := 0
-			if !less(data[pivot], data[hi-1], call) {
-				swap(data, c, hi-1)
-				swap(indices, c, hi-1)
-				c += 1
-				dups += 1
-			}
-			if !less(data[b-1], data[pivot], call) {
-				b -= 1
-				dups += 1
-			}
-
-			if !less(data[m], data[pivot], call) {
-				swap(data, m, b-1)
-				swap(indices, m, b-1)
-				b -= 1
-				dups += 1
-			}
-			protect = dups > 1
-		}
-		if protect {
-			for {
-				for ; a < b && !less(data[b-1], data[pivot], call); b -= 1 {
-				}
-				for ; a < b && less(data[a], data[pivot], call); a += 1 {
-				}
-				if a >= b {
+			if !trusty && pshift > 1 {
+				rt := head[-width:]
+				lf := head[-width:][-lp[pshift-2]:]
+				if cmp(rt, stepson, arg) >= .Equal || cmp(lf, stepson, arg) >= .Equal {
 					break
 					break
 				}
 				}
-				swap(data, a, b-1)
-				swap(indices, a, b-1)
-				a += 1
-				b -= 1
 			}
 			}
+			buf[i] = stepson
+			head = stepson
+			trail = pntz(p)
+			shr(p[:], trail)
+			pshift += trail
+			trusty = false
+			i += 1
 		}
 		}
-		swap(data, pivot, b-1)
-		swap(indices, pivot, b-1)
-		return b-1, c
-	}
-
-	assert(len(data) == len(indices))
-
-	a, b, max_depth := a, b, max_depth
-
-	for b-a > 12 { // only use shell sort for lengths <= 12
-		if max_depth == 0 {
-			heap_sort(data, indices, a, b, call)
+		if trusty {
 			return
 			return
 		}
 		}
-		max_depth -= 1
-		mlo, mhi := do_pivot(data, indices, a, b, call)
-		if mlo-a < b-mhi {
-			_quick_sort_general_with_indices(data, indices, a, mlo, max_depth, call, KIND)
-			a = mhi
-		} else {
-			_quick_sort_general_with_indices(data, indices, mhi, b, max_depth, call, KIND)
-			b = mlo
-		}
+		cycle(width, buf[:], i)
+		sift(head, width, cmp, arg, pshift, lp)
 	}
 	}
-	if b-a > 1 {
-		// Shell short with gap 6
-		for i in a+6..<b {
-			if less(data[i], data[i-6], call) {
-				swap(data, i, i-6)
-				swap(indices, i, i-6)
-			}
+
+	size := nel * width
+	if size == 0 {
+		return
+	}
+
+	lp: [12*size_of(uint)]uint = ---
+	lp[1] = width
+	lp[0] = lp[1]
+	for i := 2; true; i += 1 {
+		lp[i] = lp[i-2] + lp[i-1] + width
+		if lp[i] >= size {
+			break
 		}
 		}
-		insertion_sort(data, indices, a, b, call)
 	}
 	}
-}
+
+	head := base
+	high := head[size - width:]
+	p := [2]uint{1, 0}
+	pshift := 1
+	for head < high {
+		if (p[0] & 3) == 3 {
+			sift(head, width, cmp, arg, pshift, lp[:])
+			shr(p[:], 2)
+			pshift += 2
+		} else {
+			if lp[pshift - 1] >= uint(uintptr(high) - uintptr(head)) {
+				trinkle(head, width, cmp, arg, p[:], pshift, false, lp[:])
+			} else {
+				sift(head, width, cmp, arg, pshift, lp[:])
+			}
+			if pshift == 1 {
+				shl(p[:], 1)
+				pshift = 0
+			} else {
+				shl(p[:], pshift - 1)
+				pshift = 1
+			}
+		}
+		p[0] |= 1
+		head = head[width:]
+	}
+	trinkle(head, width, cmp, arg, p[:], pshift, false, lp[:])
+	for pshift != 1 || p[0] != 1 || p[1] != 0 {
+		if pshift <= 1 {
+			trail := pntz(p)
+			shr(p[:], trail)
+			pshift += trail
+		} else {
+			shl(p[:], 2)
+			pshift -= 2
+			p[0] ~= 7
+			shr(p[:], 1)
+			trinkle(head[-width:][-lp[pshift]:], width, cmp, arg, p[:], pshift + 1, true, lp[:])
+			shl(p[:], 1)
+			p[0] |= 1
+			trinkle(head[-width:], width, cmp, arg, p[:], pshift, true, lp[:])
+		}
+		head = head[-width:]
+	}
+}

+ 4 - 0
src/check_builtin.cpp

@@ -20,6 +20,8 @@ gb_global BuiltinTypeIsProc *builtin_type_is_procs[BuiltinProc__type_simple_bool
 	is_type_quaternion,
 	is_type_quaternion,
 	is_type_string,
 	is_type_string,
 	is_type_string16,
 	is_type_string16,
+	is_type_cstring,
+	is_type_cstring16,
 	is_type_typeid,
 	is_type_typeid,
 	is_type_any,
 	is_type_any,
 	is_type_endian_platform,
 	is_type_endian_platform,
@@ -6424,6 +6426,8 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As
 	case BuiltinProc_type_is_quaternion:
 	case BuiltinProc_type_is_quaternion:
 	case BuiltinProc_type_is_string:
 	case BuiltinProc_type_is_string:
 	case BuiltinProc_type_is_string16:
 	case BuiltinProc_type_is_string16:
+	case BuiltinProc_type_is_cstring:
+	case BuiltinProc_type_is_cstring16:
 	case BuiltinProc_type_is_typeid:
 	case BuiltinProc_type_is_typeid:
 	case BuiltinProc_type_is_any:
 	case BuiltinProc_type_is_any:
 	case BuiltinProc_type_is_endian_platform:
 	case BuiltinProc_type_is_endian_platform:

+ 2 - 0
src/checker_builtin_procs.hpp

@@ -251,6 +251,8 @@ BuiltinProc__type_simple_boolean_begin,
 	BuiltinProc_type_is_quaternion,
 	BuiltinProc_type_is_quaternion,
 	BuiltinProc_type_is_string,
 	BuiltinProc_type_is_string,
 	BuiltinProc_type_is_string16,
 	BuiltinProc_type_is_string16,
+	BuiltinProc_type_is_cstring,
+	BuiltinProc_type_is_cstring16,
 	BuiltinProc_type_is_typeid,
 	BuiltinProc_type_is_typeid,
 	BuiltinProc_type_is_any,
 	BuiltinProc_type_is_any,