Browse Source

Merge pull request #1005 from Kelimion/maths

Add `abs_f16` support + endian versions of maths routines.
gingerBill 4 years ago
parent
commit
599d0cf6ac
2 changed files with 538 additions and 162 deletions
  1. 537 162
      core/math/math.odin
  2. 1 0
      src/check_builtin.cpp

+ 537 - 162
core/math/math.odin

@@ -96,45 +96,186 @@ foreign _ {
 	ldexp_f64 :: proc(val: f64, exp: i32) -> f64 ---;
 }
 
-sqrt      :: proc{sqrt_f16,    sqrt_f32,    sqrt_f64};
-sin       :: proc{sin_f16,     sin_f32,     sin_f64};
-cos       :: proc{cos_f16,     cos_f32,     cos_f64};
-pow       :: proc{pow_f16,     pow_f32,     pow_f64};
-fmuladd   :: proc{fmuladd_f16, fmuladd_f32, fmuladd_f64};
-ln        :: proc{ln_f16,      ln_f32,      ln_f64};
-exp       :: proc{exp_f16,     exp_f32,     exp_f64};
+sqrt_f16le :: proc(x: f16le) -> f16le { return #force_inline f16le(sqrt_f16(f16(x))); }
+sqrt_f16be :: proc(x: f16be) -> f16be { return #force_inline f16be(sqrt_f16(f16(x))); }
+sqrt_f32le :: proc(x: f32le) -> f32le { return #force_inline f32le(sqrt_f32(f32(x))); }
+sqrt_f32be :: proc(x: f32be) -> f32be { return #force_inline f32be(sqrt_f32(f32(x))); }
+sqrt_f64le :: proc(x: f64le) -> f64le { return #force_inline f64le(sqrt_f64(f64(x))); }
+sqrt_f64be :: proc(x: f64be) -> f64be { return #force_inline f64be(sqrt_f64(f64(x))); }
+sqrt       :: proc{
+	sqrt_f16, sqrt_f16le, sqrt_f16be,
+	sqrt_f32, sqrt_f32le, sqrt_f32be,
+	sqrt_f64, sqrt_f64le, sqrt_f64be,
+};
 
-ldexp :: proc{ldexp_f16, ldexp_f32, ldexp_f64};
+sin_f16le :: proc(θ: f16le) -> f16le { return #force_inline f16le(sin_f16(f16(θ))); }
+sin_f16be :: proc(θ: f16be) -> f16be { return #force_inline f16be(sin_f16(f16(θ))); }
+sin_f32le :: proc(θ: f32le) -> f32le { return #force_inline f32le(sin_f32(f32(θ))); }
+sin_f32be :: proc(θ: f32be) -> f32be { return #force_inline f32be(sin_f32(f32(θ))); }
+sin_f64le :: proc(θ: f64le) -> f64le { return #force_inline f64le(sin_f64(f64(θ))); }
+sin_f64be :: proc(θ: f64be) -> f64be { return #force_inline f64be(sin_f64(f64(θ))); }
+sin       :: proc{
+	sin_f16, sin_f16le, sin_f16be,
+	sin_f32, sin_f32le, sin_f32be,
+	sin_f64, sin_f64le, sin_f64be,
+};
 
-log_f16 :: proc(x, base: f16) -> f16 { return ln(x) / ln(base); }
-log_f32 :: proc(x, base: f32) -> f32 { return ln(x) / ln(base); }
-log_f64 :: proc(x, base: f64) -> f64 { return ln(x) / ln(base); }
-log     :: proc{log_f16, log_f32, log_f64};
+cos_f16le :: proc(θ: f16le) -> f16le { return #force_inline f16le(cos_f16(f16(θ))); }
+cos_f16be :: proc(θ: f16be) -> f16be { return #force_inline f16be(cos_f16(f16(θ))); }
+cos_f32le :: proc(θ: f32le) -> f32le { return #force_inline f32le(cos_f32(f32(θ))); }
+cos_f32be :: proc(θ: f32be) -> f32be { return #force_inline f32be(cos_f32(f32(θ))); }
+cos_f64le :: proc(θ: f64le) -> f64le { return #force_inline f64le(cos_f64(f64(θ))); }
+cos_f64be :: proc(θ: f64be) -> f64be { return #force_inline f64be(cos_f64(f64(θ))); }
+cos       :: proc{
+	cos_f16, cos_f16le, cos_f16be,
+	cos_f32, cos_f32le, cos_f32be,
+	cos_f64, cos_f64le, cos_f64be,
+};
+
+pow_f16le :: proc(x, power: f16le) -> f16le { return #force_inline f16le(pow_f16(f16(x), f16(power))); }
+pow_f16be :: proc(x, power: f16be) -> f16be { return #force_inline f16be(pow_f16(f16(x), f16(power))); }
+pow_f32le :: proc(x, power: f32le) -> f32le { return #force_inline f32le(pow_f32(f32(x), f32(power))); }
+pow_f32be :: proc(x, power: f32be) -> f32be { return #force_inline f32be(pow_f32(f32(x), f32(power))); }
+pow_f64le :: proc(x, power: f64le) -> f64le { return #force_inline f64le(pow_f64(f64(x), f64(power))); }
+pow_f64be :: proc(x, power: f64be) -> f64be { return #force_inline f64be(pow_f64(f64(x), f64(power))); }
+pow       :: proc{
+	pow_f16, pow_f16le, pow_f16be,
+	pow_f32, pow_f32le, pow_f32be,
+	pow_f64, pow_f64le, pow_f64be,
+};
+
+fmuladd_f16le :: proc(a, b, c: f16le) -> f16le { return #force_inline f16le(fmuladd_f16(f16(a), f16(b), f16(c))); }
+fmuladd_f16be :: proc(a, b, c: f16be) -> f16be { return #force_inline f16be(fmuladd_f16(f16(a), f16(b), f16(c))); }
+fmuladd_f32le :: proc(a, b, c: f32le) -> f32le { return #force_inline f32le(fmuladd_f32(f32(a), f32(b), f32(c))); }
+fmuladd_f32be :: proc(a, b, c: f32be) -> f32be { return #force_inline f32be(fmuladd_f32(f32(a), f32(b), f32(c))); }
+fmuladd_f64le :: proc(a, b, c: f64le) -> f64le { return #force_inline f64le(fmuladd_f64(f64(a), f64(b), f64(c))); }
+fmuladd_f64be :: proc(a, b, c: f64be) -> f64be { return #force_inline f64be(fmuladd_f64(f64(a), f64(b), f64(c))); }
+fmuladd       :: proc{
+	fmuladd_f16, fmuladd_f16le, fmuladd_f16be,
+	fmuladd_f32, fmuladd_f32le, fmuladd_f32be,
+	fmuladd_f64, fmuladd_f64le, fmuladd_f64be,
+};
+
+ln_f16le :: proc(x: f16le) -> f16le { return #force_inline f16le(ln_f16(f16(x))); }
+ln_f16be :: proc(x: f16be) -> f16be { return #force_inline f16be(ln_f16(f16(x))); }
+ln_f32le :: proc(x: f32le) -> f32le { return #force_inline f32le(ln_f32(f32(x))); }
+ln_f32be :: proc(x: f32be) -> f32be { return #force_inline f32be(ln_f32(f32(x))); }
+ln_f64le :: proc(x: f64le) -> f64le { return #force_inline f64le(ln_f64(f64(x))); }
+ln_f64be :: proc(x: f64be) -> f64be { return #force_inline f64be(ln_f64(f64(x))); }
+ln       :: proc{
+	ln_f16, ln_f16le, ln_f16be,
+	ln_f32, ln_f32le, ln_f32be,
+	ln_f64, ln_f64le, ln_f64be,
+};
 
-log2_f16 :: proc(x: f16) -> f16 { return ln(x)/LN2; }
-log2_f32 :: proc(x: f32) -> f32 { return ln(x)/LN2; }
-log2_f64 :: proc(x: f64) -> f64 { return ln(x)/LN2; }
-log2     :: proc{log2_f16, log2_f32, log2_f64};
+exp_f16le :: proc(x: f16le) -> f16le { return #force_inline f16le(exp_f16(f16(x))); }
+exp_f16be :: proc(x: f16be) -> f16be { return #force_inline f16be(exp_f16(f16(x))); }
+exp_f32le :: proc(x: f32le) -> f32le { return #force_inline f32le(exp_f32(f32(x))); }
+exp_f32be :: proc(x: f32be) -> f32be { return #force_inline f32be(exp_f32(f32(x))); }
+exp_f64le :: proc(x: f64le) -> f64le { return #force_inline f64le(exp_f64(f64(x))); }
+exp_f64be :: proc(x: f64be) -> f64be { return #force_inline f64be(exp_f64(f64(x))); }
+exp       :: proc{
+	exp_f16, exp_f16le, exp_f16be,
+	exp_f32, exp_f32le, exp_f32be,
+	exp_f64, exp_f64le, exp_f64be,
+};
+
+ldexp_f16le :: proc(val: f16le, exp: i32) -> f16le { return #force_inline f16le(ldexp_f16(f16(val), exp)); }
+ldexp_f16be :: proc(val: f16be, exp: i32) -> f16be { return #force_inline f16be(ldexp_f16(f16(val), exp)); }
+ldexp_f32le :: proc(val: f32le, exp: i32) -> f32le { return #force_inline f32le(ldexp_f32(f32(val), exp)); }
+ldexp_f32be :: proc(val: f32be, exp: i32) -> f32be { return #force_inline f32be(ldexp_f32(f32(val), exp)); }
+ldexp_f64le :: proc(val: f64le, exp: i32) -> f64le { return #force_inline f64le(ldexp_f64(f64(val), exp)); }
+ldexp_f64be :: proc(val: f64be, exp: i32) -> f64be { return #force_inline f64be(ldexp_f64(f64(val), exp)); }
+ldexp       :: proc{
+	ldexp_f16, ldexp_f16le, ldexp_f16be,
+	ldexp_f32, ldexp_f32le, ldexp_f32be,
+	ldexp_f64, ldexp_f64le, ldexp_f64be,
+};
+
+
+log_f16   :: proc(x, base: f16)   -> f16   { return ln(x) / ln(base); }
+log_f16le :: proc(x, base: f16le) -> f16le { return f16le(log_f16(f16(x), f16(base))); }
+log_f16be :: proc(x, base: f16be) -> f16be { return f16be(log_f16(f16(x), f16(base))); }
+
+log_f32   :: proc(x, base: f32)   -> f32   { return ln(x) / ln(base); }
+log_f32le :: proc(x, base: f32le) -> f32le { return f32le(log_f32(f32(x), f32(base))); }
+log_f32be :: proc(x, base: f32be) -> f32be { return f32be(log_f32(f32(x), f32(base))); }
+
+log_f64   :: proc(x, base: f64)   -> f64   { return ln(x) / ln(base); }
+log_f64le :: proc(x, base: f64le) -> f64le { return f64le(log_f64(f64(x), f64(base))); }
+log_f64be :: proc(x, base: f64be) -> f64be { return f64be(log_f64(f64(x), f64(base))); }
+log       :: proc{
+	log_f16, log_f16le, log_f16be,
+	log_f32, log_f32le, log_f32be,
+	log_f64, log_f64le, log_f64be,
+};
 
-log10_f16 :: proc(x: f16) -> f16 { return ln(x)/LN10; }
-log10_f32 :: proc(x: f32) -> f32 { return ln(x)/LN10; }
-log10_f64 :: proc(x: f64) -> f64 { return ln(x)/LN10; }
-log10     :: proc{log10_f16, log10_f32, log10_f64};
+log2_f16   :: proc(x: f16)   -> f16   { return ln(x)/LN2; }
+log2_f16le :: proc(x: f16le) -> f16le { return f16le(log2_f16(f16(x))); }
+log2_f16be :: proc(x: f16be) -> f16be { return f16be(log2_f16(f16(x))); }
+
+log2_f32   :: proc(x: f32)   -> f32   { return ln(x)/LN2; }
+log2_f32le :: proc(x: f32le) -> f32le { return f32le(log2_f32(f32(x))); }
+log2_f32be :: proc(x: f32be) -> f32be { return f32be(log2_f32(f32(x))); }
+
+log2_f64   :: proc(x: f64)   -> f64   { return ln(x)/LN2; }
+log2_f64le :: proc(x: f64le) -> f64le { return f64le(log2_f64(f64(x))); }
+log2_f64be :: proc(x: f64be) -> f64be { return f64be(log2_f64(f64(x))); }
+log2       :: proc{
+	log2_f16, log2_f16le, log2_f16be,
+	log2_f32, log2_f32le, log2_f32be,
+	log2_f64, log2_f64le, log2_f64be,
+};
 
+log10_f16   :: proc(x: f16)   -> f16   { return ln(x)/LN10; }
+log10_f16le :: proc(x: f16le) -> f16le { return f16le(log10_f16(f16(x))); }
+log10_f16be :: proc(x: f16be) -> f16be { return f16be(log10_f16(f16(x))); }
+
+log10_f32   :: proc(x: f32)   -> f32   { return ln(x)/LN10; }
+log10_f32le :: proc(x: f32le) -> f32le { return f32le(log10_f32(f32(x))); }
+log10_f32be :: proc(x: f32be) -> f32be { return f32be(log10_f32(f32(x))); }
+
+log10_f64   :: proc(x: f64)   -> f64   { return ln(x)/LN10; }
+log10_f64le :: proc(x: f64le) -> f64le { return f64le(log10_f64(f64(x))); }
+log10_f64be :: proc(x: f64be) -> f64be { return f64be(log10_f64(f64(x))); }
+log10       :: proc{
+	log10_f16, log10_f16le, log10_f16be,
+	log10_f32, log10_f32le, log10_f32be,
+	log10_f64, log10_f64le, log10_f64be,
+};
 
-tan_f16 :: proc(θ: f16) -> f16 { return sin(θ)/cos(θ); }
-tan_f32 :: proc(θ: f32) -> f32 { return sin(θ)/cos(θ); }
-tan_f64 :: proc(θ: f64) -> f64 { return sin(θ)/cos(θ); }
-tan     :: proc{tan_f16, tan_f32, tan_f64};
+tan_f16   :: proc(θ: f16)   -> f16   { return sin(θ)/cos(θ); }
+tan_f16le :: proc(θ: f16le) -> f16le { return f16le(tan_f16(f16(θ))); }
+tan_f16be :: proc(θ: f16be) -> f16be { return f16be(tan_f16(f16(θ))); }
+
+tan_f32   :: proc(θ: f32)   -> f32   { return sin(θ)/cos(θ); }
+tan_f32le :: proc(θ: f32le) -> f32le { return f32le(tan_f32(f32(θ))); }
+tan_f32be :: proc(θ: f32be) -> f32be { return f32be(tan_f32(f32(θ))); }
+
+tan_f64   :: proc(θ: f64)   -> f64   { return sin(θ)/cos(θ); }
+tan_f64le :: proc(θ: f64le) -> f64le { return f64le(tan_f64(f64(θ))); }
+tan_f64be :: proc(θ: f64be) -> f64be { return f64be(tan_f64(f64(θ))); }
+tan       :: proc{
+	tan_f16, tan_f16le, tan_f16be,
+	tan_f32, tan_f32le, tan_f32be,
+	tan_f64, tan_f64le, tan_f64be,
+};
 
 lerp :: proc(a, b: $T, t: $E) -> (x: T) { return a*(1-t) + b*t; }
 saturate :: proc(a: $T) -> (x: T) { return clamp(a, 0, 1); };
 
-unlerp_f16 :: proc(a, b, x: f16) -> (t: f16) { return (x-a)/(b-a); }
-unlerp_f32 :: proc(a, b, x: f32) -> (t: f32) { return (x-a)/(b-a); }
-unlerp_f64 :: proc(a, b, x: f64) -> (t: f64) { return (x-a)/(b-a); }
-unlerp     :: proc{unlerp_f16, unlerp_f32, unlerp_f64};
+unlerp :: proc(a, b, x: $T) -> (t: T)  where intrinsics.type_is_float(T), !intrinsics.type_is_array(T) {
+	return (x-a)/(b-a);
+}
 
+remap :: proc(old_value, old_min, old_max, new_min, new_max: $T) -> (x: T) where intrinsics.type_is_numeric(T), !intrinsics.type_is_array(T) {
+	old_range := old_max - old_min;
+	new_range := new_max - new_min;
+	if old_range == 0 {
+		return new_range / 2;
+	}
+	return ((old_value - old_min) / old_range) * new_range + new_min;
+}
 
 wrap :: proc(x, y: $T) -> T where intrinsics.type_is_numeric(T), !intrinsics.type_is_array(T) {
 	tmp := mod(x, y);
@@ -170,57 +311,105 @@ gain :: proc(t, g: $T) -> T where intrinsics.type_is_numeric(T) {
 }
 
 
-sign_f16 :: proc(x: f16) -> f16 { return f16(int(0 < x) - int(x < 0)); }
-sign_f32 :: proc(x: f32) -> f32 { return f32(int(0 < x) - int(x < 0)); }
-sign_f64 :: proc(x: f64) -> f64 { return f64(int(0 < x) - int(x < 0)); }
-sign     :: proc{sign_f16, sign_f32, sign_f64};
-
+sign_f16   :: proc(x: f16)   -> f16   { return f16(int(0 < x) - int(x < 0)); }
+sign_f16le :: proc(x: f16le) -> f16le { return f16le(int(0 < x) - int(x < 0)); }
+sign_f16be :: proc(x: f16be) -> f16be { return f16be(int(0 < x) - int(x < 0)); }
+sign_f32   :: proc(x: f32)   -> f32   { return f32(int(0 < x) - int(x < 0)); }
+sign_f32le :: proc(x: f32le) -> f32le { return f32le(int(0 < x) - int(x < 0)); }
+sign_f32be :: proc(x: f32be) -> f32be { return f32be(int(0 < x) - int(x < 0)); }
+sign_f64   :: proc(x: f64)   -> f64   { return f64(int(0 < x) - int(x < 0)); }
+sign_f64le :: proc(x: f64le) -> f64le { return f64le(int(0 < x) - int(x < 0)); }
+sign_f64be :: proc(x: f64be) -> f64be { return f64be(int(0 < x) - int(x < 0)); }
+sign       :: proc{
+	sign_f16, sign_f16le, sign_f16be,
+	sign_f32, sign_f32le, sign_f32be,
+	sign_f64, sign_f64le, sign_f64be,
+};
 
-sign_bit_f16 :: proc(x: f16) -> bool {
+sign_bit_f16 :: proc(x: f16)     -> bool {
 	return (transmute(u16)x) & (1<<15) != 0;
 }
-sign_bit_f32 :: proc(x: f32) -> bool {
+sign_bit_f16le :: proc(x: f16le) -> bool { return #force_inline sign_bit_f16(f16(x)); }
+sign_bit_f16be :: proc(x: f16be) -> bool { return #force_inline sign_bit_f16(f16(x)); }
+sign_bit_f32   :: proc(x: f32)   -> bool {
 	return (transmute(u32)x) & (1<<31) != 0;
 }
-sign_bit_f64 :: proc(x: f64) -> bool {
+sign_bit_f32le :: proc(x: f32le) -> bool { return #force_inline sign_bit_f32(f32(x)); }
+sign_bit_f32be :: proc(x: f32be) -> bool { return #force_inline sign_bit_f32(f32(x)); }
+sign_bit_f64   :: proc(x: f64)   -> bool {
 	return (transmute(u64)x) & (1<<63) != 0;
 }
-sign_bit :: proc{sign_bit_f16, sign_bit_f32, sign_bit_f64};
+sign_bit_f64le :: proc(x: f64le) -> bool { return #force_inline sign_bit_f64(f64(x)); }
+sign_bit_f64be :: proc(x: f64be) -> bool { return #force_inline sign_bit_f64(f64(x)); }
+sign_bit       :: proc{
+	sign_bit_f16, sign_bit_f16le, sign_bit_f16be,
+	sign_bit_f32, sign_bit_f32le, sign_bit_f32be,
+	sign_bit_f64, sign_bit_f64le, sign_bit_f64be,
+};
 
-copy_sign_f16 :: proc(x, y: f16) -> f16 {
+copy_sign_f16   :: proc(x, y: f16) -> f16 {
 	ix := transmute(u16)x;
 	iy := transmute(u16)y;
 	ix &= 0x7fff;
 	ix |= iy & 0x8000;
 	return transmute(f16)ix;
 }
-copy_sign_f32 :: proc(x, y: f32) -> f32 {
+copy_sign_f16le :: proc(x, y: f16le) -> f16le { return #force_inline f16le(copy_sign_f16(f16(x), f16(y))); }
+copy_sign_f16be :: proc(x, y: f16be) -> f16be { return #force_inline f16be(copy_sign_f16(f16(x), f16(y))); }
+copy_sign_f32   :: proc(x, y: f32) -> f32 {
 	ix := transmute(u32)x;
 	iy := transmute(u32)y;
 	ix &= 0x7fff_ffff;
 	ix |= iy & 0x8000_0000;
 	return transmute(f32)ix;
 }
-copy_sign_f64 :: proc(x, y: f64) -> f64 {
+copy_sign_f32le :: proc(x, y: f32le) -> f32le { return #force_inline f32le(copy_sign_f32(f32(x), f32(y))); }
+copy_sign_f32be :: proc(x, y: f32be) -> f32be { return #force_inline f32be(copy_sign_f32(f32(x), f32(y))); }
+copy_sign_f64   :: proc(x, y: f64) -> f64 {
 	ix := transmute(u64)x;
 	iy := transmute(u64)y;
 	ix &= 0x7fff_ffff_ffff_ffff;
 	ix |= iy & 0x8000_0000_0000_0000;
 	return transmute(f64)ix;
 }
-copy_sign :: proc{copy_sign_f16, copy_sign_f32, copy_sign_f64};
-
+copy_sign_f64le :: proc(x, y: f64le) -> f64le { return #force_inline f64le(copy_sign_f64(f64(x), f64(y))); }
+copy_sign_f64be :: proc(x, y: f64be) -> f64be { return #force_inline f64be(copy_sign_f64(f64(x), f64(y))); }
+copy_sign       :: proc{
+	copy_sign_f16, copy_sign_f16le, copy_sign_f16be,
+	copy_sign_f32, copy_sign_f32le, copy_sign_f32be,
+	copy_sign_f64, copy_sign_f64le, copy_sign_f64be,
+};
 
-to_radians_f16 :: proc(degrees: f16) -> f16 { return degrees * RAD_PER_DEG; }
-to_radians_f32 :: proc(degrees: f32) -> f32 { return degrees * RAD_PER_DEG; }
-to_radians_f64 :: proc(degrees: f64) -> f64 { return degrees * RAD_PER_DEG; }
-to_degrees_f16 :: proc(radians: f16) -> f16 { return radians * DEG_PER_RAD; }
-to_degrees_f32 :: proc(radians: f32) -> f32 { return radians * DEG_PER_RAD; }
-to_degrees_f64 :: proc(radians: f64) -> f64 { return radians * DEG_PER_RAD; }
-to_radians     :: proc{to_radians_f16, to_radians_f32, to_radians_f64};
-to_degrees     :: proc{to_degrees_f16, to_degrees_f32, to_degrees_f64};
+to_radians_f16   :: proc(degrees: f16)   -> f16   { return degrees * RAD_PER_DEG; }
+to_radians_f16le :: proc(degrees: f16le) -> f16le { return degrees * RAD_PER_DEG; }
+to_radians_f16be :: proc(degrees: f16be) -> f16be { return degrees * RAD_PER_DEG; }
+to_radians_f32   :: proc(degrees: f32)   -> f32   { return degrees * RAD_PER_DEG; }
+to_radians_f32le :: proc(degrees: f32le) -> f32le { return degrees * RAD_PER_DEG; }
+to_radians_f32be :: proc(degrees: f32be) -> f32be { return degrees * RAD_PER_DEG; }
+to_radians_f64   :: proc(degrees: f64)   -> f64   { return degrees * RAD_PER_DEG; }
+to_radians_f64le :: proc(degrees: f64le) -> f64le { return degrees * RAD_PER_DEG; }
+to_radians_f64be :: proc(degrees: f64be) -> f64be { return degrees * RAD_PER_DEG; }
+to_degrees_f16   :: proc(radians: f16)   -> f16   { return radians * DEG_PER_RAD; }
+to_degrees_f16le :: proc(radians: f16le) -> f16le { return radians * DEG_PER_RAD; }
+to_degrees_f16be :: proc(radians: f16be) -> f16be { return radians * DEG_PER_RAD; }
+to_degrees_f32   :: proc(radians: f32)   -> f32   { return radians * DEG_PER_RAD; }
+to_degrees_f32le :: proc(radians: f32le) -> f32le { return radians * DEG_PER_RAD; }
+to_degrees_f32be :: proc(radians: f32be) -> f32be { return radians * DEG_PER_RAD; }
+to_degrees_f64   :: proc(radians: f64)   -> f64   { return radians * DEG_PER_RAD; }
+to_degrees_f64le :: proc(radians: f64le) -> f64le { return radians * DEG_PER_RAD; }
+to_degrees_f64be :: proc(radians: f64be) -> f64be { return radians * DEG_PER_RAD; }
+to_radians       :: proc{
+	to_radians_f16, to_radians_f16le, to_radians_f16be,
+	to_radians_f32, to_radians_f32le, to_radians_f32be,
+	to_radians_f64, to_radians_f64le, to_radians_f64be,
+};
+to_degrees       :: proc{
+	to_degrees_f16, to_degrees_f16le, to_degrees_f16be,
+	to_degrees_f32, to_degrees_f32le, to_degrees_f32be,
+	to_degrees_f64, to_degrees_f64le, to_degrees_f64be,
+};
 
-trunc_f16 :: proc(x: f16) -> f16 {
+trunc_f16   :: proc(x: f16) -> f16 {
 	trunc_internal :: proc(f: f16) -> f16 {
 		mask :: 0x1f;
 		shift :: 16 - 6;
@@ -249,8 +438,10 @@ trunc_f16 :: proc(x: f16) -> f16 {
 	}
 	return trunc_internal(x);
 }
+trunc_f16le :: proc(x: f16le) -> f16le { return #force_inline f16le(trunc_f16(f16(x))); }
+trunc_f16be :: proc(x: f16be) -> f16be { return #force_inline f16be(trunc_f16(f16(x))); }
 
-trunc_f32 :: proc(x: f32) -> f32 {
+trunc_f32   :: proc(x: f32) -> f32 {
 	trunc_internal :: proc(f: f32) -> f32 {
 		mask :: 0xff;
 		shift :: 32 - 9;
@@ -279,8 +470,10 @@ trunc_f32 :: proc(x: f32) -> f32 {
 	}
 	return trunc_internal(x);
 }
+trunc_f32le :: proc(x: f32le) -> f32le { return #force_inline f32le(trunc_f32(f32(x))); }
+trunc_f32be :: proc(x: f32be) -> f32be { return #force_inline f32be(trunc_f32(f32(x))); }
 
-trunc_f64 :: proc(x: f64) -> f64 {
+trunc_f64   :: proc(x: f64) -> f64 {
 	trunc_internal :: proc(f: f64) -> f64 {
 		mask :: 0x7ff;
 		shift :: 64 - 12;
@@ -309,27 +502,68 @@ trunc_f64 :: proc(x: f64) -> f64 {
 	}
 	return trunc_internal(x);
 }
+trunc_f64le :: proc(x: f64le) -> f64le { return #force_inline f64le(trunc_f64(f64(x))); }
+trunc_f64be :: proc(x: f64be) -> f64be { return #force_inline f64be(trunc_f64(f64(x))); }
+trunc       :: proc{
+	trunc_f16, trunc_f16le, trunc_f16be,
+	trunc_f32, trunc_f32le, trunc_f32be, 
+	trunc_f64, trunc_f64le, trunc_f64be, 
+};
 
-trunc :: proc{trunc_f16, trunc_f32, trunc_f64};
+round_f16   :: proc(x: f16)   -> f16 {
+	return ceil(x - 0.5) if x < 0 else floor(x + 0.5);
+}
+round_f16le :: proc(x: f16le) -> f16le {
+	return ceil(x - 0.5) if x < 0 else floor(x + 0.5);
+}
+round_f16be :: proc(x: f16be) -> f16be {
+	return ceil(x - 0.5) if x < 0 else floor(x + 0.5);
+}
 
-round_f16 :: proc(x: f16) -> f16 {
+round_f32   :: proc(x: f32)   -> f32 {
+	return ceil(x - 0.5) if x < 0 else floor(x + 0.5);
+}
+round_f32le :: proc(x: f32le) -> f32le {
+	return ceil(x - 0.5) if x < 0 else floor(x + 0.5);
+}
+round_f32be :: proc(x: f32be) -> f32be {
+	return ceil(x - 0.5) if x < 0 else floor(x + 0.5);
+}
+round_f64   :: proc(x: f64)   -> f64 {
 	return ceil(x - 0.5) if x < 0 else floor(x + 0.5);
 }
-round_f32 :: proc(x: f32) -> f32 {
+round_f64le :: proc(x: f64le) -> f64le {
 	return ceil(x - 0.5) if x < 0 else floor(x + 0.5);
 }
-round_f64 :: proc(x: f64) -> f64 {
+round_f64be :: proc(x: f64be) -> f64be {
 	return ceil(x - 0.5) if x < 0 else floor(x + 0.5);
 }
-round :: proc{round_f16, round_f32, round_f64};
+round       :: proc{
+	round_f16, round_f16le, round_f16be,
+	round_f32, round_f32le, round_f32be,
+	round_f64, round_f64le, round_f64be,
+};
+
+
+ceil_f16   :: proc(x: f16)   -> f16   { return -floor(-x); }
+ceil_f16le :: proc(x: f16le) -> f16le { return -floor(-x); }
+ceil_f16be :: proc(x: f16be) -> f16be { return -floor(-x); }
+
+ceil_f32   :: proc(x: f32)   -> f32   { return -floor(-x); }
+ceil_f32le :: proc(x: f32le) -> f32le { return -floor(-x); }
+ceil_f32be :: proc(x: f32be) -> f32be { return -floor(-x); }
 
+ceil_f64   :: proc(x: f64)   -> f64   { return -floor(-x); }
+ceil_f64le :: proc(x: f64le) -> f64le { return -floor(-x); }
+ceil_f64be :: proc(x: f64be) -> f64be { return -floor(-x); }
 
-ceil_f16 :: proc(x: f16) -> f16 { return -floor(-x); }
-ceil_f32 :: proc(x: f32) -> f32 { return -floor(-x); }
-ceil_f64 :: proc(x: f64) -> f64 { return -floor(-x); }
-ceil :: proc{ceil_f16, ceil_f32, ceil_f64};
+ceil       :: proc{
+	ceil_f16, ceil_f16le, ceil_f16be,
+	ceil_f32, ceil_f32le, ceil_f32be,
+	ceil_f64, ceil_f64le, ceil_f64be,
+};
 
-floor_f16 :: proc(x: f16) -> f16 {
+floor_f16   :: proc(x: f16)   -> f16 {
 	if x == 0 || is_nan(x) || is_inf(x) {
 		return x;
 	}
@@ -343,7 +577,9 @@ floor_f16 :: proc(x: f16) -> f16 {
 	d, _ := modf(x);
 	return d;
 }
-floor_f32 :: proc(x: f32) -> f32 {
+floor_f16le :: proc(x: f16le) -> f16le { return #force_inline f16le(floor_f16(f16(x))); }
+floor_f16be :: proc(x: f16be) -> f16be { return #force_inline f16be(floor_f16(f16(x))); }
+floor_f32   :: proc(x: f32)   -> f32 {
 	if x == 0 || is_nan(x) || is_inf(x) {
 		return x;
 	}
@@ -357,7 +593,9 @@ floor_f32 :: proc(x: f32) -> f32 {
 	d, _ := modf(x);
 	return d;
 }
-floor_f64 :: proc(x: f64) -> f64 {
+floor_f32le :: proc(x: f32le) -> f32le { return #force_inline f32le(floor_f32(f32(x))); }
+floor_f32be :: proc(x: f32be) -> f32be { return #force_inline f32be(floor_f32(f32(x))); }
+floor_f64   :: proc(x: f64)   -> f64 {
 	if x == 0 || is_nan(x) || is_inf(x) {
 		return x;
 	}
@@ -371,7 +609,13 @@ floor_f64 :: proc(x: f64) -> f64 {
 	d, _ := modf(x);
 	return d;
 }
-floor :: proc{floor_f16, floor_f32, floor_f64};
+floor_f64le :: proc(x: f64le) -> f64le { return #force_inline f64le(floor_f64(f64(x))); }
+floor_f64be :: proc(x: f64be) -> f64be { return #force_inline f64be(floor_f64(f64(x))); }
+floor       :: proc{
+	floor_f16, floor_f16le, floor_f16be,
+	floor_f32, floor_f32le, floor_f32be,
+	floor_f64, floor_f64le, floor_f64be,
+};
 
 
 floor_div :: proc(x, y: $T) -> T
@@ -393,7 +637,7 @@ floor_mod :: proc(x, y: $T) -> T
 	return r;
 }
 
-modf_f16 :: proc(x: f16) -> (int: f16, frac: f16) {
+modf_f16   :: proc(x: f16) -> (int: f16, frac: f16) {
 	shift :: 16 - 5 - 1;
 	mask  :: 0x1f;
 	bias  :: 15;
@@ -419,7 +663,15 @@ modf_f16 :: proc(x: f16) -> (int: f16, frac: f16) {
 	frac = x - int;
 	return;
 }
-modf_f32 :: proc(x: f32) -> (int: f32, frac: f32) {
+modf_f16le :: proc(x: f16le) -> (int: f16le, frac: f16le) {
+	i, f := #force_inline modf_f16(f16(x));
+	return f16le(i), f16le(f);
+}
+modf_f16be :: proc(x: f16be) -> (int: f16be, frac: f16be) {
+	i, f := #force_inline modf_f16(f16(x));
+	return f16be(i), f16be(f);
+}
+modf_f32   :: proc(x: f32) -> (int: f32, frac: f32) {
 	shift :: 32 - 8 - 1;
 	mask  :: 0xff;
 	bias  :: 127;
@@ -445,7 +697,15 @@ modf_f32 :: proc(x: f32) -> (int: f32, frac: f32) {
 	frac = x - int;
 	return;
 }
-modf_f64 :: proc(x: f64) -> (int: f64, frac: f64) {
+modf_f32le :: proc(x: f32le) -> (int: f32le, frac: f32le) {
+	i, f := #force_inline modf_f32(f32(x));
+	return f32le(i), f32le(f);
+}
+modf_f32be :: proc(x: f32be) -> (int: f32be, frac: f32be) {
+	i, f := #force_inline modf_f32(f32(x));
+	return f32be(i), f32be(f);
+}
+modf_f64   :: proc(x: f64) -> (int: f64, frac: f64) {
 	shift :: 64 - 11 - 1;
 	mask  :: 0x7ff;
 	bias  :: 1023;
@@ -471,10 +731,22 @@ modf_f64 :: proc(x: f64) -> (int: f64, frac: f64) {
 	frac = x - int;
 	return;
 }
-modf :: proc{modf_f16, modf_f32, modf_f64};
+modf_f64le :: proc(x: f64le) -> (int: f64le, frac: f64le) {
+	i, f := #force_inline modf_f64(f64(x));
+	return f64le(i), f64le(f);
+}
+modf_f64be :: proc(x: f64be) -> (int: f64be, frac: f64be) {
+	i, f := #force_inline modf_f64(f64(x));
+	return f64be(i), f64be(f);
+}
+modf       :: proc{
+	modf_f16, modf_f16le, modf_f16be,
+	modf_f32, modf_f32le, modf_f32be,
+	modf_f64, modf_f64le, modf_f64be,
+};
 split_decimal :: modf;
 
-mod_f16 :: proc(x, y: f16) -> (n: f16) {
+mod_f16   :: proc(x, y: f16) -> (n: f16) {
 	z := abs(y);
 	n = remainder(abs(x), z);
 	if sign(n) < 0 {
@@ -482,7 +754,9 @@ mod_f16 :: proc(x, y: f16) -> (n: f16) {
 	}
 	return copy_sign(n, x);
 }
-mod_f32 :: proc(x, y: f32) -> (n: f32) {
+mod_f16le :: proc(x, y: f16le) -> (n: f16le) { return #force_inline f16le(mod_f16(f16(x), f16(y))); }
+mod_f16be :: proc(x, y: f16be) -> (n: f16be) { return #force_inline f16be(mod_f16(f16(x), f16(y))); }
+mod_f32   :: proc(x, y: f32)   -> (n: f32) {
 	z := abs(y);
 	n = remainder(abs(x), z);
 	if sign(n) < 0 {
@@ -490,7 +764,9 @@ mod_f32 :: proc(x, y: f32) -> (n: f32) {
 	}
 	return copy_sign(n, x);
 }
-mod_f64 :: proc(x, y: f64) -> (n: f64) {
+mod_f32le :: proc(x, y: f32le) -> (n: f32le) { return #force_inline f32le(mod_f32(f32(x), f32(y))); }
+mod_f32be :: proc(x, y: f32be) -> (n: f32be) { return #force_inline f32be(mod_f32(f32(x), f32(y))); }
+mod_f64   :: proc(x, y: f64)   -> (n: f64) {
 	z := abs(y);
 	n = remainder(abs(x), z);
 	if sign(n) < 0 {
@@ -498,14 +774,28 @@ mod_f64 :: proc(x, y: f64) -> (n: f64) {
 	}
 	return copy_sign(n, x);
 }
-mod :: proc{mod_f16, mod_f32, mod_f64};
-
-remainder_f16 :: proc(x, y: f16) -> f16 { return x - round(x/y) * y; }
-remainder_f32 :: proc(x, y: f32) -> f32 { return x - round(x/y) * y; }
-remainder_f64 :: proc(x, y: f64) -> f64 { return x - round(x/y) * y; }
-remainder :: proc{remainder_f16, remainder_f32, remainder_f64};
-
+mod_f64le :: proc(x, y: f64le) -> (n: f64le) { return #force_inline f64le(mod_f64(f64(x), f64(y))); }
+mod_f64be :: proc(x, y: f64be) -> (n: f64be) { return #force_inline f64be(mod_f64(f64(x), f64(y))); }
+mod       :: proc{
+	mod_f16, mod_f16le, mod_f16be,
+	mod_f32, mod_f32le, mod_f32be,
+	mod_f64, mod_f64le, mod_f64be,
+};
 
+remainder_f16   :: proc(x, y: f16  ) -> f16   { return x - round(x/y) * y; }
+remainder_f16le :: proc(x, y: f16le) -> f16le { return x - round(x/y) * y; }
+remainder_f16be :: proc(x, y: f16be) -> f16be { return x - round(x/y) * y; }
+remainder_f32   :: proc(x, y: f32  ) -> f32   { return x - round(x/y) * y; }
+remainder_f32le :: proc(x, y: f32le) -> f32le { return x - round(x/y) * y; }
+remainder_f32be :: proc(x, y: f32be) -> f32be { return x - round(x/y) * y; }
+remainder_f64   :: proc(x, y: f64  ) -> f64   { return x - round(x/y) * y; }
+remainder_f64le :: proc(x, y: f64le) -> f64le { return x - round(x/y) * y; }
+remainder_f64be :: proc(x, y: f64be) -> f64be { return x - round(x/y) * y; }
+remainder       :: proc{
+	remainder_f16, remainder_f16le, remainder_f16be,
+	remainder_f32, remainder_f32le, remainder_f32be,
+	remainder_f64, remainder_f64le, remainder_f64be,
+};
 
 gcd :: proc(x, y: $T) -> T
 	where intrinsics.type_is_ordered_numeric(T) {
@@ -522,14 +812,30 @@ lcm :: proc(x, y: $T) -> T
 	return x / gcd(x, y) * y;
 }
 
-frexp_f16 :: proc(x: f16) -> (significand: f16, exponent: int) {
+frexp_f16   :: proc(x: f16)   -> (significand: f16,   exponent: int) {
 	f, e := frexp_f64(f64(x));
 	return f16(f), e;
 }
-frexp_f32 :: proc(x: f32) -> (significand: f32, exponent: int) {
+frexp_f16le :: proc(x: f16le) -> (significand: f16le, exponent: int) {
+	f, e := frexp_f64(f64(x));
+	return f16le(f), e;
+}
+frexp_f16be :: proc(x: f16be) -> (significand: f16be, exponent: int) {
+	f, e := frexp_f64(f64(x));
+	return f16be(f), e;
+}
+frexp_f32 :: proc(x: f32)     -> (significand: f32,   exponent: int) {
 	f, e := frexp_f64(f64(x));
 	return f32(f), e;
 }
+frexp_f32le :: proc(x: f32le) -> (significand: f32le, exponent: int) {
+	f, e := frexp_f64(f64(x));
+	return f32le(f), e;
+}
+frexp_f32be :: proc(x: f32be) -> (significand: f32be, exponent: int) {
+	f, e := frexp_f64(f64(x));
+	return f32be(f), e;
+}
 frexp_f64 :: proc(x: f64) -> (significand: f64, exponent: int) {
 	switch {
 	case x == 0:
@@ -550,7 +856,19 @@ frexp_f64 :: proc(x: f64) -> (significand: f64, exponent: int) {
 	}
 	return;
 }
-frexp :: proc{frexp_f16, frexp_f32, frexp_f64};
+frexp_f64le :: proc(x: f64le) -> (significand: f64le, exponent: int) {
+	f, e := frexp_f64(f64(x));
+	return f64le(f), e;
+}
+frexp_f64be :: proc(x: f64be) -> (significand: f64be, exponent: int) {
+	f, e := frexp_f64(f64(x));
+	return f64be(f), e;
+}
+frexp       :: proc{
+	frexp_f16, frexp_f16le, frexp_f16be,
+	frexp_f32, frexp_f32le, frexp_f32be,
+	frexp_f64, frexp_f64le, frexp_f64be, 
+};
 
 
 
@@ -616,7 +934,7 @@ factorial :: proc(n: int) -> int {
 	return table[n];
 }
 
-classify_f16 :: proc(x: f16) -> Float_Class {
+classify_f16   :: proc(x: f16)   -> Float_Class {
 	switch {
 	case x == 0:
 		i := transmute(i16)x;
@@ -640,7 +958,9 @@ classify_f16 :: proc(x: f16) -> Float_Class {
 	}
 	return .Normal;
 }
-classify_f32 :: proc(x: f32) -> Float_Class {
+classify_f16le :: proc(x: f16le) -> Float_Class { return #force_inline classify_f16(f16(x)); }
+classify_f16be :: proc(x: f16be) -> Float_Class { return #force_inline classify_f16(f16(x)); }
+classify_f32   :: proc(x: f32)   -> Float_Class {
 	switch {
 	case x == 0:
 		i := transmute(i32)x;
@@ -664,7 +984,9 @@ classify_f32 :: proc(x: f32) -> Float_Class {
 	}
 	return .Normal;
 }
-classify_f64 :: proc(x: f64) -> Float_Class {
+classify_f32le :: proc(x: f32le) -> Float_Class { return #force_inline classify_f32(f32(x)); }
+classify_f32be :: proc(x: f32be) -> Float_Class { return #force_inline classify_f32(f32(x)); }
+classify_f64   :: proc(x: f64)   -> Float_Class {
 	switch {
 	case x == 0:
 		i := transmute(i64)x;
@@ -687,13 +1009,28 @@ classify_f64 :: proc(x: f64) -> Float_Class {
 	}
 	return .Normal;
 }
-classify :: proc{classify_f16, classify_f32, classify_f64};
-
-is_nan_f16 :: proc(x: f16) -> bool { return classify(x) == .NaN; }
-is_nan_f32 :: proc(x: f32) -> bool { return classify(x) == .NaN; }
-is_nan_f64 :: proc(x: f64) -> bool { return classify(x) == .NaN; }
-is_nan :: proc{is_nan_f16, is_nan_f32, is_nan_f64};
+classify_f64le :: proc(x: f64le) -> Float_Class { return #force_inline classify_f64(f64(x)); }
+classify_f64be :: proc(x: f64be) -> Float_Class { return #force_inline classify_f64(f64(x)); }
+classify       :: proc{
+	classify_f16, classify_f16le, classify_f16be,
+	classify_f32, classify_f32le, classify_f32be,
+	classify_f64, classify_f64le, classify_f64be,
+};
 
+is_nan_f16   :: proc(x: f16)   -> bool { return classify(x) == .NaN; }
+is_nan_f16le :: proc(x: f16le) -> bool { return classify(x) == .NaN; }
+is_nan_f16be :: proc(x: f16be) -> bool { return classify(x) == .NaN; }
+is_nan_f32   :: proc(x: f32)   -> bool { return classify(x) == .NaN; }
+is_nan_f32le :: proc(x: f32le) -> bool { return classify(x) == .NaN; }
+is_nan_f32be :: proc(x: f32be) -> bool { return classify(x) == .NaN; }
+is_nan_f64   :: proc(x: f64)   -> bool { return classify(x) == .NaN; }
+is_nan_f64le :: proc(x: f64le) -> bool { return classify(x) == .NaN; }
+is_nan_f64be :: proc(x: f64be) -> bool { return classify(x) == .NaN; }
+is_nan       :: proc{
+	is_nan_f16, is_nan_f16le, is_nan_f16be,
+	is_nan_f32, is_nan_f32le, is_nan_f32be,
+	is_nan_f64, is_nan_f64le, is_nan_f64be,
+};
 
 // is_inf reports whether f is an infinity, according to sign.
 // If sign > 0, is_inf reports whether f is positive infinity.
@@ -709,6 +1046,13 @@ is_inf_f16 :: proc(x: f16, sign: int = 0) -> bool {
 	}
 	return class == .Inf || class == .Neg_Inf;
 }
+is_inf_f16le :: proc(x: f16le, sign: int = 0) -> bool {
+	return #force_inline is_inf_f16(f16(x), sign);
+}
+is_inf_f16be :: proc(x: f16be, sign: int = 0) -> bool {
+	return #force_inline is_inf_f16(f16(x), sign);
+}
+
 is_inf_f32 :: proc(x: f32, sign: int = 0) -> bool {
 	class := classify(abs(x));
 	switch {
@@ -719,6 +1063,13 @@ is_inf_f32 :: proc(x: f32, sign: int = 0) -> bool {
 	}
 	return class == .Inf || class == .Neg_Inf;
 }
+is_inf_f32le :: proc(x: f32le, sign: int = 0) -> bool {
+	return #force_inline is_inf_f32(f32(x), sign);
+}
+is_inf_f32be :: proc(x: f32be, sign: int = 0) -> bool {
+	return #force_inline is_inf_f32(f32(x), sign);
+}
+
 is_inf_f64 :: proc(x: f64, sign: int = 0) -> bool {
 	class := classify(abs(x));
 	switch {
@@ -729,16 +1080,37 @@ is_inf_f64 :: proc(x: f64, sign: int = 0) -> bool {
 	}
 	return class == .Inf || class == .Neg_Inf;
 }
-is_inf :: proc{is_inf_f16, is_inf_f32, is_inf_f64};
-
+is_inf_f64le :: proc(x: f64le, sign: int = 0) -> bool {
+	return #force_inline is_inf_f64(f64(x), sign);
+}
+is_inf_f64be :: proc(x: f64be, sign: int = 0) -> bool {
+	return #force_inline is_inf_f64(f64(x), sign);
+}
+is_inf :: proc{
+	is_inf_f16, is_inf_f16le, is_inf_f16be,
+	is_inf_f32, is_inf_f32le, is_inf_f32be,
+	is_inf_f64, is_inf_f64le, is_inf_f64be,
+};
 
-inf_f16 :: proc(sign: int) -> f16 {
-	return f16(inf_f16(sign));
+inf_f16   :: proc(sign: int) -> f16 {
+	return f16(inf_f64(sign));
+}
+inf_f16le :: proc(sign: int) -> f16le {
+	return f16le(inf_f64(sign));
+}
+inf_f16be :: proc(sign: int) -> f16be {
+	return f16be(inf_f64(sign));
 }
-inf_f32 :: proc(sign: int) -> f32 {
+inf_f32   :: proc(sign: int) -> f32 {
 	return f32(inf_f64(sign));
 }
-inf_f64 :: proc(sign: int) -> f64 {
+inf_f32le :: proc(sign: int) -> f32le {
+	return f32le(inf_f64(sign));
+}
+inf_f32be :: proc(sign: int) -> f32be {
+	return f32be(inf_f64(sign));
+}
+inf_f64   :: proc(sign: int) -> f64 {
 	v: u64;
 	if sign >= 0 {
 		v = 0x7ff00000_00000000;
@@ -747,19 +1119,41 @@ inf_f64 :: proc(sign: int) -> f64 {
 	}
 	return transmute(f64)v;
 }
+inf_f64le :: proc(sign: int) -> f64le {
+	return f64le(inf_f64(sign));
+}
+inf_f64be :: proc(sign: int) -> f64be {
+	return f64be(inf_f64(sign));
+}
 
-nan_f16 :: proc() -> f16 {
+nan_f16   :: proc() -> f16 {
 	return f16(nan_f64());
 }
-nan_f32 :: proc() -> f32 {
+nan_f16le :: proc() -> f16le {
+	return f16le(nan_f64());
+}
+nan_f16be :: proc() -> f16be {
+	return f16be(nan_f64());
+}
+nan_f32   :: proc() -> f32 {
 	return f32(nan_f64());
 }
-nan_f64 :: proc() -> f64 {
+nan_f32le :: proc() -> f32le {
+	return f32le(nan_f64());
+}
+nan_f32be :: proc() -> f32be {
+	return f32be(nan_f64());
+}
+nan_f64   :: proc() -> f64 {
 	v: u64 = 0x7ff80000_00000001;
 	return transmute(f64)v;
 }
-
-
+nan_f64le :: proc() -> f64le {
+	return f64le(nan_f64());
+}
+nan_f64be :: proc() -> f64be {
+	return f64be(nan_f64());
+}
 
 is_power_of_two :: proc(x: int) -> bool {
 	return x > 0 && (x & (x-1)) == 0;
@@ -816,14 +1210,30 @@ cumsum :: proc(dst, src: $T/[]$E) -> T
 }
 
 
-atan2_f16 :: proc(y, x: f16) -> f16 {
+atan2_f16   :: proc(y, x: f16)   -> f16 {
 	// TODO(bill): Better atan2_f16
 	return f16(atan2_f64(f64(y), f64(x)));
 }
-atan2_f32 :: proc(y, x: f32) -> f32 {
+atan2_f16le :: proc(y, x: f16le) -> f16le {
+	// TODO(bill): Better atan2_f16
+	return f16le(atan2_f64(f64(y), f64(x)));
+}
+atan2_f16be :: proc(y, x: f16be) -> f16be {
+	// TODO(bill): Better atan2_f16
+	return f16be(atan2_f64(f64(y), f64(x)));
+}
+atan2_f32 :: proc(y, x: f32)     -> f32 {
 	// TODO(bill): Better atan2_f32
 	return f32(atan2_f64(f64(y), f64(x)));
 }
+atan2_f32le :: proc(y, x: f32le) -> f32le {
+	// TODO(bill): Better atan2_f32
+	return f32le(atan2_f64(f64(y), f64(x)));
+}
+atan2_f32be :: proc(y, x: f32be) -> f32be {
+	// TODO(bill): Better atan2_f32
+	return f32be(atan2_f64(f64(y), f64(x)));
+}
 
 atan2_f64 :: proc(y, x: f64) -> f64 {
 	// TODO(bill): Faster atan2_f64 if possible
@@ -910,80 +1320,45 @@ atan2_f64 :: proc(y, x: f64) -> f64 {
 	}
 	return q;
 }
-
-
-atan2 :: proc{atan2_f16, atan2_f32, atan2_f64};
-
-atan_f16 :: proc(x: f16) -> f16 {
-	return atan2_f16(x, 1);
-}
-atan_f32 :: proc(x: f32) -> f32 {
-	return atan2_f32(x, 1);
+atan2_f64le :: proc(y, x: f64le) -> f64le {
+	// TODO(bill): Better atan2_f32
+	return f64le(atan2_f64(f64(y), f64(x)));
 }
-atan_f64 :: proc(x: f64) -> f64 {
-	return atan2_f64(x, 1);
+atan2_f64be :: proc(y, x: f64be) -> f64be {
+	// TODO(bill): Better atan2_f32
+	return f64be(atan2_f64(f64(y), f64(x)));
 }
-atan :: proc{atan_f16, atan_f32, atan_f64};
 
-asin_f16 :: proc(x: f16) -> f16 {
-	return atan2_f16(x, 1 + sqrt_f16(1 - x*x));
-}
-asin_f32 :: proc(x: f32) -> f32 {
-	return atan2_f32(x, 1 + sqrt_f32(1 - x*x));
-}
-asin_f64 :: proc(x: f64) -> f64 {
-	return atan2_f64(x, 1 + sqrt_f64(1 - x*x));
-}
-asin :: proc{asin_f16, asin_f32, asin_f64};
+atan2 :: proc{
+	atan2_f16, atan2_f16le, atan2_f16be,
+	atan2_f32, atan2_f32le, atan2_f32be,
+	atan2_f64, atan2_f64le, atan2_f64be,
+};
 
-acos_f16 :: proc(x: f16) -> f16 {
-	return 2 * atan2_f16(sqrt_f16(1 - x), sqrt_f16(1 + x));
-}
-acos_f32 :: proc(x: f32) -> f32 {
-	return 2 * atan2_f32(sqrt_f32(1 - x), sqrt_f32(1 + x));
+atan :: proc(x: $T) -> T where intrinsics.type_is_float(x) {
+	return atan2(x, 1);
 }
-acos_f64 :: proc(x: f64) -> f64 {
-	return 2 * atan2_f64(sqrt_f64(1 - x), sqrt_f64(1 + x));
-}
-acos :: proc{acos_f16, acos_f32, acos_f64};
-
 
-sinh_f16 :: proc(x: f16) -> f16 {
-	return (exp(x) - exp(-x))*0.5;
+asin :: proc(x: $T) -> T where intrinsics.type_is_float(x) {
+	return atan2(x, 1 + sqrt(1 - x*x));
 }
-sinh_f32 :: proc(x: f32) -> f32 {
-	return (exp(x) - exp(-x))*0.5;
+
+acos :: proc(x: $T) -> T where intrinsics.type_is_float(x) {
+	return 2 * atan2(sqrt(1 - x), sqrt(1 + x));
 }
-sinh_f64 :: proc(x: f64) -> f64 {
+
+sinh :: proc(x: $T) -> T where intrinsics.type_is_float(x) {
 	return (exp(x) - exp(-x))*0.5;
 }
-sinh :: proc{sinh_f16, sinh_f32, sinh_f64};
 
-cosh_f16 :: proc(x: f16) -> f16 {
-	return (exp(x) + exp(-x))*0.5;
-}
-cosh_f32 :: proc(x: f32) -> f32 {
+cosh :: proc(x: $T) -> T where intrinsics.type_is_float(x) {
 	return (exp(x) + exp(-x))*0.5;
 }
-cosh_f64 :: proc(x: f64) -> f64 {
-	return (exp(x) + exp(-x))*0.5;
-}
-cosh :: proc{cosh_f16, cosh_f32, cosh_f64};
 
-tanh_f16 :: proc(x: f16) -> f16 {
-	t := exp(2*x);
-	return (t - 1) / (t + 1);
-}
-tanh_f32 :: proc(x: f32) -> f32 {
+tanh :: proc(x: $T) -> T where intrinsics.type_is_float(x) {
 	t := exp(2*x);
 	return (t - 1) / (t + 1);
 }
-tanh_f64 :: proc(x: f64) -> f64 {
-	t := exp(2*x);
-	return (t - 1) / (t + 1);
-}
-tanh :: proc{tanh_f16, tanh_f32, tanh_f64};
-
 
 F16_DIG        :: 3;
 F16_EPSILON    :: 0.00097656;

+ 1 - 0
src/check_builtin.cpp

@@ -1406,6 +1406,7 @@ bool check_builtin_procedure(CheckerContext *c, Operand *operand, Ast *call, i32
 
 			{
 				Type *bt = base_type(operand->type);
+				if (are_types_identical(bt, t_f16))        add_package_dependency(c, "runtime", "abs_f16");
 				if (are_types_identical(bt, t_f32))        add_package_dependency(c, "runtime", "abs_f32");
 				if (are_types_identical(bt, t_f64))        add_package_dependency(c, "runtime", "abs_f64");
 				if (are_types_identical(bt, t_complex64))  add_package_dependency(c, "runtime", "abs_complex64");