hex_float_test.cpp 65 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625
  1. // Copyright (c) 2015-2016 The Khronos Group Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include <cfloat>
  15. #include <cmath>
  16. #include <cstdio>
  17. #include <limits>
  18. #include <sstream>
  19. #include <string>
  20. #include <tuple>
  21. #include <utility>
  22. #include <vector>
  23. #include "gmock/gmock.h"
  24. #include "source/util/hex_float.h"
  25. #include "test/unit_spirv.h"
  26. namespace spvtools {
  27. namespace utils {
  28. namespace {
  29. using ::testing::Eq;
  30. // In this file "encode" means converting a number into a string,
  31. // and "decode" means converting a string into a number.
  32. using HexFloatTest =
  33. ::testing::TestWithParam<std::pair<FloatProxy<float>, std::string>>;
  34. using DecodeHexFloatTest =
  35. ::testing::TestWithParam<std::pair<std::string, FloatProxy<float>>>;
  36. using HexDoubleTest =
  37. ::testing::TestWithParam<std::pair<FloatProxy<double>, std::string>>;
  38. using DecodeHexDoubleTest =
  39. ::testing::TestWithParam<std::pair<std::string, FloatProxy<double>>>;
  40. using RoundTripFloatTest = ::testing::TestWithParam<float>;
  41. using RoundTripDoubleTest = ::testing::TestWithParam<double>;
  42. // Hex-encodes a float value.
  43. template <typename T>
  44. std::string EncodeViaHexFloat(const T& value) {
  45. std::stringstream ss;
  46. ss << HexFloat<T>(value);
  47. return ss.str();
  48. }
  49. // The following two tests can't be DRY because they take different parameter
  50. // types.
  51. TEST_P(HexFloatTest, EncodeCorrectly) {
  52. EXPECT_THAT(EncodeViaHexFloat(GetParam().first), Eq(GetParam().second));
  53. }
  54. TEST_P(HexDoubleTest, EncodeCorrectly) {
  55. EXPECT_THAT(EncodeViaHexFloat(GetParam().first), Eq(GetParam().second));
  56. }
  57. // Decodes a hex-float string.
  58. template <typename T>
  59. FloatProxy<T> Decode(const std::string& str) {
  60. HexFloat<FloatProxy<T>> decoded(0.f);
  61. EXPECT_TRUE((std::stringstream(str) >> decoded).eof());
  62. return decoded.value();
  63. }
  64. TEST_P(HexFloatTest, DecodeCorrectly) {
  65. EXPECT_THAT(Decode<float>(GetParam().second), Eq(GetParam().first));
  66. }
  67. TEST_P(HexDoubleTest, DecodeCorrectly) {
  68. EXPECT_THAT(Decode<double>(GetParam().second), Eq(GetParam().first));
  69. }
  70. INSTANTIATE_TEST_SUITE_P(
  71. Float32Tests, HexFloatTest,
  72. ::testing::ValuesIn(std::vector<std::pair<FloatProxy<float>, std::string>>({
  73. {0.f, "0x0p+0"},
  74. {1.f, "0x1p+0"},
  75. {2.f, "0x1p+1"},
  76. {3.f, "0x1.8p+1"},
  77. {0.5f, "0x1p-1"},
  78. {0.25f, "0x1p-2"},
  79. {0.75f, "0x1.8p-1"},
  80. {-0.f, "-0x0p+0"},
  81. {-1.f, "-0x1p+0"},
  82. {-0.5f, "-0x1p-1"},
  83. {-0.25f, "-0x1p-2"},
  84. {-0.75f, "-0x1.8p-1"},
  85. // Larger numbers
  86. {512.f, "0x1p+9"},
  87. {-512.f, "-0x1p+9"},
  88. {1024.f, "0x1p+10"},
  89. {-1024.f, "-0x1p+10"},
  90. {1024.f + 8.f, "0x1.02p+10"},
  91. {-1024.f - 8.f, "-0x1.02p+10"},
  92. // Small numbers
  93. {1.0f / 512.f, "0x1p-9"},
  94. {1.0f / -512.f, "-0x1p-9"},
  95. {1.0f / 1024.f, "0x1p-10"},
  96. {1.0f / -1024.f, "-0x1p-10"},
  97. {1.0f / 1024.f + 1.0f / 8.f, "0x1.02p-3"},
  98. {1.0f / -1024.f - 1.0f / 8.f, "-0x1.02p-3"},
  99. // lowest non-denorm
  100. {float(ldexp(1.0f, -126)), "0x1p-126"},
  101. {float(ldexp(-1.0f, -126)), "-0x1p-126"},
  102. // Denormalized values
  103. {float(ldexp(1.0f, -127)), "0x1p-127"},
  104. {float(ldexp(1.0f, -127) / 2.0f), "0x1p-128"},
  105. {float(ldexp(1.0f, -127) / 4.0f), "0x1p-129"},
  106. {float(ldexp(1.0f, -127) / 8.0f), "0x1p-130"},
  107. {float(ldexp(-1.0f, -127)), "-0x1p-127"},
  108. {float(ldexp(-1.0f, -127) / 2.0f), "-0x1p-128"},
  109. {float(ldexp(-1.0f, -127) / 4.0f), "-0x1p-129"},
  110. {float(ldexp(-1.0f, -127) / 8.0f), "-0x1p-130"},
  111. {float(ldexp(1.0, -127) + (ldexp(1.0, -127) / 2.0f)), "0x1.8p-127"},
  112. {float(ldexp(1.0, -127) / 2.0 + (ldexp(1.0, -127) / 4.0f)),
  113. "0x1.8p-128"},
  114. })));
  115. INSTANTIATE_TEST_SUITE_P(
  116. Float32NanTests, HexFloatTest,
  117. ::testing::ValuesIn(std::vector<std::pair<FloatProxy<float>, std::string>>({
  118. // Various NAN and INF cases
  119. {uint32_t(0xFF800000), "-0x1p+128"}, // -inf
  120. {uint32_t(0x7F800000), "0x1p+128"}, // inf
  121. {uint32_t(0xFFC00000), "-0x1.8p+128"}, // -nan
  122. {uint32_t(0xFF800100), "-0x1.0002p+128"}, // -nan
  123. {uint32_t(0xFF800c00), "-0x1.0018p+128"}, // -nan
  124. {uint32_t(0xFF80F000), "-0x1.01ep+128"}, // -nan
  125. {uint32_t(0xFFFFFFFF), "-0x1.fffffep+128"}, // -nan
  126. {uint32_t(0x7FC00000), "0x1.8p+128"}, // +nan
  127. {uint32_t(0x7F800100), "0x1.0002p+128"}, // +nan
  128. {uint32_t(0x7f800c00), "0x1.0018p+128"}, // +nan
  129. {uint32_t(0x7F80F000), "0x1.01ep+128"}, // +nan
  130. {uint32_t(0x7FFFFFFF), "0x1.fffffep+128"}, // +nan
  131. })));
  132. INSTANTIATE_TEST_SUITE_P(
  133. Float64Tests, HexDoubleTest,
  134. ::testing::ValuesIn(
  135. std::vector<std::pair<FloatProxy<double>, std::string>>({
  136. {0., "0x0p+0"},
  137. {1., "0x1p+0"},
  138. {2., "0x1p+1"},
  139. {3., "0x1.8p+1"},
  140. {0.5, "0x1p-1"},
  141. {0.25, "0x1p-2"},
  142. {0.75, "0x1.8p-1"},
  143. {-0., "-0x0p+0"},
  144. {-1., "-0x1p+0"},
  145. {-0.5, "-0x1p-1"},
  146. {-0.25, "-0x1p-2"},
  147. {-0.75, "-0x1.8p-1"},
  148. // Larger numbers
  149. {512., "0x1p+9"},
  150. {-512., "-0x1p+9"},
  151. {1024., "0x1p+10"},
  152. {-1024., "-0x1p+10"},
  153. {1024. + 8., "0x1.02p+10"},
  154. {-1024. - 8., "-0x1.02p+10"},
  155. // Large outside the range of normal floats
  156. {ldexp(1.0, 128), "0x1p+128"},
  157. {ldexp(1.0, 129), "0x1p+129"},
  158. {ldexp(-1.0, 128), "-0x1p+128"},
  159. {ldexp(-1.0, 129), "-0x1p+129"},
  160. {ldexp(1.0, 128) + ldexp(1.0, 90), "0x1.0000000004p+128"},
  161. {ldexp(1.0, 129) + ldexp(1.0, 120), "0x1.008p+129"},
  162. {ldexp(-1.0, 128) + ldexp(1.0, 90), "-0x1.fffffffff8p+127"},
  163. {ldexp(-1.0, 129) + ldexp(1.0, 120), "-0x1.ffp+128"},
  164. // Small numbers
  165. {1.0 / 512., "0x1p-9"},
  166. {1.0 / -512., "-0x1p-9"},
  167. {1.0 / 1024., "0x1p-10"},
  168. {1.0 / -1024., "-0x1p-10"},
  169. {1.0 / 1024. + 1.0 / 8., "0x1.02p-3"},
  170. {1.0 / -1024. - 1.0 / 8., "-0x1.02p-3"},
  171. // Small outside the range of normal floats
  172. {ldexp(1.0, -128), "0x1p-128"},
  173. {ldexp(1.0, -129), "0x1p-129"},
  174. {ldexp(-1.0, -128), "-0x1p-128"},
  175. {ldexp(-1.0, -129), "-0x1p-129"},
  176. {ldexp(1.0, -128) + ldexp(1.0, -90), "0x1.0000000004p-90"},
  177. {ldexp(1.0, -129) + ldexp(1.0, -120), "0x1.008p-120"},
  178. {ldexp(-1.0, -128) + ldexp(1.0, -90), "0x1.fffffffff8p-91"},
  179. {ldexp(-1.0, -129) + ldexp(1.0, -120), "0x1.ffp-121"},
  180. // lowest non-denorm
  181. {ldexp(1.0, -1022), "0x1p-1022"},
  182. {ldexp(-1.0, -1022), "-0x1p-1022"},
  183. // Denormalized values
  184. {ldexp(1.0, -1023), "0x1p-1023"},
  185. {ldexp(1.0, -1023) / 2.0, "0x1p-1024"},
  186. {ldexp(1.0, -1023) / 4.0, "0x1p-1025"},
  187. {ldexp(1.0, -1023) / 8.0, "0x1p-1026"},
  188. {ldexp(-1.0, -1024), "-0x1p-1024"},
  189. {ldexp(-1.0, -1024) / 2.0, "-0x1p-1025"},
  190. {ldexp(-1.0, -1024) / 4.0, "-0x1p-1026"},
  191. {ldexp(-1.0, -1024) / 8.0, "-0x1p-1027"},
  192. {ldexp(1.0, -1023) + (ldexp(1.0, -1023) / 2.0), "0x1.8p-1023"},
  193. {ldexp(1.0, -1023) / 2.0 + (ldexp(1.0, -1023) / 4.0),
  194. "0x1.8p-1024"},
  195. })));
  196. INSTANTIATE_TEST_SUITE_P(
  197. Float64NanTests, HexDoubleTest,
  198. ::testing::ValuesIn(std::vector<
  199. std::pair<FloatProxy<double>, std::string>>({
  200. // Various NAN and INF cases
  201. {uint64_t(0xFFF0000000000000LL), "-0x1p+1024"}, // -inf
  202. {uint64_t(0x7FF0000000000000LL), "0x1p+1024"}, // +inf
  203. {uint64_t(0xFFF8000000000000LL), "-0x1.8p+1024"}, // -nan
  204. {uint64_t(0xFFF0F00000000000LL), "-0x1.0fp+1024"}, // -nan
  205. {uint64_t(0xFFF0000000000001LL), "-0x1.0000000000001p+1024"}, // -nan
  206. {uint64_t(0xFFF0000300000000LL), "-0x1.00003p+1024"}, // -nan
  207. {uint64_t(0xFFFFFFFFFFFFFFFFLL), "-0x1.fffffffffffffp+1024"}, // -nan
  208. {uint64_t(0x7FF8000000000000LL), "0x1.8p+1024"}, // +nan
  209. {uint64_t(0x7FF0F00000000000LL), "0x1.0fp+1024"}, // +nan
  210. {uint64_t(0x7FF0000000000001LL), "0x1.0000000000001p+1024"}, // -nan
  211. {uint64_t(0x7FF0000300000000LL), "0x1.00003p+1024"}, // -nan
  212. {uint64_t(0x7FFFFFFFFFFFFFFFLL), "0x1.fffffffffffffp+1024"}, // -nan
  213. })));
  214. // Tests that encoding a value and decoding it again restores
  215. // the same value.
  216. TEST_P(RoundTripFloatTest, CanStoreAccurately) {
  217. std::stringstream ss;
  218. ss << FloatProxy<float>(GetParam());
  219. ss.seekg(0);
  220. FloatProxy<float> res;
  221. ss >> res;
  222. EXPECT_THAT(GetParam(), Eq(res.getAsFloat()));
  223. }
  224. TEST_P(RoundTripDoubleTest, CanStoreAccurately) {
  225. std::stringstream ss;
  226. ss << FloatProxy<double>(GetParam());
  227. ss.seekg(0);
  228. FloatProxy<double> res;
  229. ss >> res;
  230. EXPECT_THAT(GetParam(), Eq(res.getAsFloat()));
  231. }
  232. INSTANTIATE_TEST_SUITE_P(
  233. Float32StoreTests, RoundTripFloatTest,
  234. ::testing::ValuesIn(std::vector<float>(
  235. {// Value requiring more than 6 digits of precision to be
  236. // represented accurately.
  237. 3.0000002f})));
  238. INSTANTIATE_TEST_SUITE_P(
  239. Float64StoreTests, RoundTripDoubleTest,
  240. ::testing::ValuesIn(std::vector<double>(
  241. {// Value requiring more than 15 digits of precision to be
  242. // represented accurately.
  243. 1.5000000000000002})));
  244. TEST(HexFloatStreamTest, OperatorLeftShiftPreservesFloatAndFill) {
  245. std::stringstream s;
  246. s << std::setw(4) << std::oct << std::setfill('x') << 8 << " "
  247. << FloatProxy<float>(uint32_t(0xFF800100)) << " " << std::setw(4) << 9;
  248. EXPECT_THAT(s.str(), Eq(std::string("xx10 -0x1.0002p+128 xx11")));
  249. }
  250. TEST(HexDoubleStreamTest, OperatorLeftShiftPreservesFloatAndFill) {
  251. std::stringstream s;
  252. s << std::setw(4) << std::oct << std::setfill('x') << 8 << " "
  253. << FloatProxy<double>(uint64_t(0x7FF0F00000000000LL)) << " " << std::setw(4)
  254. << 9;
  255. EXPECT_THAT(s.str(), Eq(std::string("xx10 0x1.0fp+1024 xx11")));
  256. }
  257. TEST_P(DecodeHexFloatTest, DecodeCorrectly) {
  258. EXPECT_THAT(Decode<float>(GetParam().first), Eq(GetParam().second));
  259. }
  260. TEST_P(DecodeHexDoubleTest, DecodeCorrectly) {
  261. EXPECT_THAT(Decode<double>(GetParam().first), Eq(GetParam().second));
  262. }
  263. INSTANTIATE_TEST_SUITE_P(
  264. Float32DecodeTests, DecodeHexFloatTest,
  265. ::testing::ValuesIn(std::vector<std::pair<std::string, FloatProxy<float>>>({
  266. {"0x0p+000", 0.f},
  267. {"0x0p0", 0.f},
  268. {"0x0p-0", 0.f},
  269. // flush to zero cases
  270. {"0x1p-500", 0.f}, // Exponent underflows.
  271. {"-0x1p-500", -0.f},
  272. {"0x0.00000000001p-126", 0.f}, // Fraction causes underflow.
  273. {"-0x0.0000000001p-127", -0.f},
  274. {"-0x0.01p-142", -0.f}, // Fraction causes additional underflow.
  275. {"0x0.01p-142", 0.f},
  276. // Some floats that do not encode the same way as they decode.
  277. {"0x2p+0", 2.f},
  278. {"0xFFp+0", 255.f},
  279. {"0x0.8p+0", 0.5f},
  280. {"0x0.4p+0", 0.25f},
  281. })));
  282. INSTANTIATE_TEST_SUITE_P(
  283. Float32DecodeInfTests, DecodeHexFloatTest,
  284. ::testing::ValuesIn(std::vector<std::pair<std::string, FloatProxy<float>>>({
  285. // inf cases
  286. {"-0x1p+128", uint32_t(0xFF800000)}, // -inf
  287. {"0x32p+127", uint32_t(0x7F800000)}, // inf
  288. {"0x32p+500", uint32_t(0x7F800000)}, // inf
  289. {"-0x32p+127", uint32_t(0xFF800000)}, // -inf
  290. })));
  291. INSTANTIATE_TEST_SUITE_P(
  292. Float64DecodeTests, DecodeHexDoubleTest,
  293. ::testing::ValuesIn(
  294. std::vector<std::pair<std::string, FloatProxy<double>>>({
  295. {"0x0p+000", 0.},
  296. {"0x0p0", 0.},
  297. {"0x0p-0", 0.},
  298. // flush to zero cases
  299. {"0x1p-5000", 0.}, // Exponent underflows.
  300. {"-0x1p-5000", -0.},
  301. {"0x0.0000000000000001p-1023", 0.}, // Fraction causes underflow.
  302. {"-0x0.000000000000001p-1024", -0.},
  303. {"-0x0.01p-1090", -0.f}, // Fraction causes additional underflow.
  304. {"0x0.01p-1090", 0.},
  305. // Some floats that do not encode the same way as they decode.
  306. {"0x2p+0", 2.},
  307. {"0xFFp+0", 255.},
  308. {"0x0.8p+0", 0.5},
  309. {"0x0.4p+0", 0.25},
  310. })));
  311. INSTANTIATE_TEST_SUITE_P(
  312. Float64DecodeInfTests, DecodeHexDoubleTest,
  313. ::testing::ValuesIn(
  314. std::vector<std::pair<std::string, FloatProxy<double>>>({
  315. // inf cases
  316. {"-0x1p+1024", uint64_t(0xFFF0000000000000)}, // -inf
  317. {"0x32p+1023", uint64_t(0x7FF0000000000000)}, // inf
  318. {"0x32p+5000", uint64_t(0x7FF0000000000000)}, // inf
  319. {"-0x32p+1023", uint64_t(0xFFF0000000000000)}, // -inf
  320. })));
  321. TEST(FloatProxy, ValidConversion) {
  322. EXPECT_THAT(FloatProxy<float>(1.f).getAsFloat(), Eq(1.0f));
  323. EXPECT_THAT(FloatProxy<float>(32.f).getAsFloat(), Eq(32.0f));
  324. EXPECT_THAT(FloatProxy<float>(-1.f).getAsFloat(), Eq(-1.0f));
  325. EXPECT_THAT(FloatProxy<float>(0.f).getAsFloat(), Eq(0.0f));
  326. EXPECT_THAT(FloatProxy<float>(-0.f).getAsFloat(), Eq(-0.0f));
  327. EXPECT_THAT(FloatProxy<float>(1.2e32f).getAsFloat(), Eq(1.2e32f));
  328. EXPECT_TRUE(std::isinf(FloatProxy<float>(uint32_t(0xFF800000)).getAsFloat()));
  329. EXPECT_TRUE(std::isinf(FloatProxy<float>(uint32_t(0x7F800000)).getAsFloat()));
  330. EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFFC00000)).getAsFloat()));
  331. EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFF800100)).getAsFloat()));
  332. EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFF800c00)).getAsFloat()));
  333. EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFF80F000)).getAsFloat()));
  334. EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFFFFFFFF)).getAsFloat()));
  335. EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7FC00000)).getAsFloat()));
  336. EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7F800100)).getAsFloat()));
  337. EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7f800c00)).getAsFloat()));
  338. EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7F80F000)).getAsFloat()));
  339. EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7FFFFFFF)).getAsFloat()));
  340. EXPECT_THAT(FloatProxy<float>(uint32_t(0xFF800000)).data(), Eq(0xFF800000u));
  341. EXPECT_THAT(FloatProxy<float>(uint32_t(0x7F800000)).data(), Eq(0x7F800000u));
  342. EXPECT_THAT(FloatProxy<float>(uint32_t(0xFFC00000)).data(), Eq(0xFFC00000u));
  343. EXPECT_THAT(FloatProxy<float>(uint32_t(0xFF800100)).data(), Eq(0xFF800100u));
  344. EXPECT_THAT(FloatProxy<float>(uint32_t(0xFF800c00)).data(), Eq(0xFF800c00u));
  345. EXPECT_THAT(FloatProxy<float>(uint32_t(0xFF80F000)).data(), Eq(0xFF80F000u));
  346. EXPECT_THAT(FloatProxy<float>(uint32_t(0xFFFFFFFF)).data(), Eq(0xFFFFFFFFu));
  347. EXPECT_THAT(FloatProxy<float>(uint32_t(0x7FC00000)).data(), Eq(0x7FC00000u));
  348. EXPECT_THAT(FloatProxy<float>(uint32_t(0x7F800100)).data(), Eq(0x7F800100u));
  349. EXPECT_THAT(FloatProxy<float>(uint32_t(0x7f800c00)).data(), Eq(0x7f800c00u));
  350. EXPECT_THAT(FloatProxy<float>(uint32_t(0x7F80F000)).data(), Eq(0x7F80F000u));
  351. EXPECT_THAT(FloatProxy<float>(uint32_t(0x7FFFFFFF)).data(), Eq(0x7FFFFFFFu));
  352. }
  353. TEST(FloatProxy, Nan) {
  354. EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFFC00000)).isNan());
  355. EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFF800100)).isNan());
  356. EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFF800c00)).isNan());
  357. EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFF80F000)).isNan());
  358. EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFFFFFFFF)).isNan());
  359. EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7FC00000)).isNan());
  360. EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7F800100)).isNan());
  361. EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7f800c00)).isNan());
  362. EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7F80F000)).isNan());
  363. EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7FFFFFFF)).isNan());
  364. }
  365. TEST(FloatProxy, Negation) {
  366. EXPECT_THAT((-FloatProxy<float>(1.f)).getAsFloat(), Eq(-1.0f));
  367. EXPECT_THAT((-FloatProxy<float>(0.f)).getAsFloat(), Eq(-0.0f));
  368. EXPECT_THAT((-FloatProxy<float>(-1.f)).getAsFloat(), Eq(1.0f));
  369. EXPECT_THAT((-FloatProxy<float>(-0.f)).getAsFloat(), Eq(0.0f));
  370. EXPECT_THAT((-FloatProxy<float>(32.f)).getAsFloat(), Eq(-32.0f));
  371. EXPECT_THAT((-FloatProxy<float>(-32.f)).getAsFloat(), Eq(32.0f));
  372. EXPECT_THAT((-FloatProxy<float>(1.2e32f)).getAsFloat(), Eq(-1.2e32f));
  373. EXPECT_THAT((-FloatProxy<float>(-1.2e32f)).getAsFloat(), Eq(1.2e32f));
  374. EXPECT_THAT(
  375. (-FloatProxy<float>(std::numeric_limits<float>::infinity())).getAsFloat(),
  376. Eq(-std::numeric_limits<float>::infinity()));
  377. EXPECT_THAT((-FloatProxy<float>(-std::numeric_limits<float>::infinity()))
  378. .getAsFloat(),
  379. Eq(std::numeric_limits<float>::infinity()));
  380. }
  381. // Test conversion of FloatProxy values to strings.
  382. //
  383. // In previous cases, we always wrapped the FloatProxy value in a HexFloat
  384. // before conversion to a string. In the following cases, the FloatProxy
  385. // decides for itself whether to print as a regular number or as a hex float.
  386. using FloatProxyFloatTest =
  387. ::testing::TestWithParam<std::pair<FloatProxy<float>, std::string>>;
  388. using FloatProxyDoubleTest =
  389. ::testing::TestWithParam<std::pair<FloatProxy<double>, std::string>>;
  390. // Converts a float value to a string via a FloatProxy.
  391. template <typename T>
  392. std::string EncodeViaFloatProxy(const T& value) {
  393. std::stringstream ss;
  394. ss << value;
  395. return ss.str();
  396. }
  397. // Converts a floating point string so that the exponent prefix
  398. // is 'e', and the exponent value does not have leading zeros.
  399. // The Microsoft runtime library likes to write things like "2.5E+010".
  400. // Convert that to "2.5e+10".
  401. // We don't care what happens to strings that are not floating point
  402. // strings.
  403. std::string NormalizeExponentInFloatString(std::string in) {
  404. std::string result;
  405. // Reserve one spot for the terminating null, even when the sscanf fails.
  406. std::vector<char> prefix(in.size() + 1);
  407. char e;
  408. char plus_or_minus;
  409. int exponent; // in base 10
  410. if ((4 == std::sscanf(in.c_str(), "%[-+.0123456789]%c%c%d", prefix.data(), &e,
  411. &plus_or_minus, &exponent)) &&
  412. (e == 'e' || e == 'E') &&
  413. (plus_or_minus == '-' || plus_or_minus == '+')) {
  414. // It looks like a floating point value with exponent.
  415. std::stringstream out;
  416. out << prefix.data() << 'e' << plus_or_minus << exponent;
  417. result = out.str();
  418. } else {
  419. result = in;
  420. }
  421. return result;
  422. }
  423. TEST(NormalizeFloat, Sample) {
  424. EXPECT_THAT(NormalizeExponentInFloatString(""), Eq(""));
  425. EXPECT_THAT(NormalizeExponentInFloatString("1e-12"), Eq("1e-12"));
  426. EXPECT_THAT(NormalizeExponentInFloatString("1E+14"), Eq("1e+14"));
  427. EXPECT_THAT(NormalizeExponentInFloatString("1e-0012"), Eq("1e-12"));
  428. EXPECT_THAT(NormalizeExponentInFloatString("1.263E+014"), Eq("1.263e+14"));
  429. }
  430. // The following two tests can't be DRY because they take different parameter
  431. // types.
  432. TEST_P(FloatProxyFloatTest, EncodeCorrectly) {
  433. EXPECT_THAT(
  434. NormalizeExponentInFloatString(EncodeViaFloatProxy(GetParam().first)),
  435. Eq(GetParam().second));
  436. }
  437. TEST_P(FloatProxyDoubleTest, EncodeCorrectly) {
  438. EXPECT_THAT(
  439. NormalizeExponentInFloatString(EncodeViaFloatProxy(GetParam().first)),
  440. Eq(GetParam().second));
  441. }
  442. INSTANTIATE_TEST_SUITE_P(
  443. Float32Tests, FloatProxyFloatTest,
  444. ::testing::ValuesIn(std::vector<std::pair<FloatProxy<float>, std::string>>({
  445. // Zero
  446. {0.f, "0"},
  447. // Normal numbers
  448. {1.f, "1"},
  449. {-0.25f, "-0.25"},
  450. {1000.0f, "1000"},
  451. // Still normal numbers, but with large magnitude exponents.
  452. {float(ldexp(1.f, 126)), "8.50705917e+37"},
  453. {float(ldexp(-1.f, -126)), "-1.17549435e-38"},
  454. // denormalized values are printed as hex floats.
  455. {float(ldexp(1.0f, -127)), "0x1p-127"},
  456. {float(ldexp(1.5f, -128)), "0x1.8p-128"},
  457. {float(ldexp(1.25, -129)), "0x1.4p-129"},
  458. {float(ldexp(1.125, -130)), "0x1.2p-130"},
  459. {float(ldexp(-1.0f, -127)), "-0x1p-127"},
  460. {float(ldexp(-1.0f, -128)), "-0x1p-128"},
  461. {float(ldexp(-1.0f, -129)), "-0x1p-129"},
  462. {float(ldexp(-1.5f, -130)), "-0x1.8p-130"},
  463. // NaNs
  464. {FloatProxy<float>(uint32_t(0xFFC00000)), "-0x1.8p+128"},
  465. {FloatProxy<float>(uint32_t(0xFF800100)), "-0x1.0002p+128"},
  466. {std::numeric_limits<float>::infinity(), "0x1p+128"},
  467. {-std::numeric_limits<float>::infinity(), "-0x1p+128"},
  468. })));
  469. INSTANTIATE_TEST_SUITE_P(
  470. Float64Tests, FloatProxyDoubleTest,
  471. ::testing::ValuesIn(
  472. std::vector<std::pair<FloatProxy<double>, std::string>>({
  473. {0., "0"},
  474. {1., "1"},
  475. {-0.25, "-0.25"},
  476. {1000.0, "1000"},
  477. // Large outside the range of normal floats
  478. {ldexp(1.0, 128), "3.4028236692093846e+38"},
  479. {ldexp(1.5, 129), "1.0208471007628154e+39"},
  480. {ldexp(-1.0, 128), "-3.4028236692093846e+38"},
  481. {ldexp(-1.5, 129), "-1.0208471007628154e+39"},
  482. // Small outside the range of normal floats
  483. {ldexp(1.5, -129), "2.2040519077917891e-39"},
  484. {ldexp(-1.5, -129), "-2.2040519077917891e-39"},
  485. // lowest non-denorm
  486. {ldexp(1.0, -1022), "2.2250738585072014e-308"},
  487. {ldexp(-1.0, -1022), "-2.2250738585072014e-308"},
  488. // Denormalized values
  489. {ldexp(1.125, -1023), "0x1.2p-1023"},
  490. {ldexp(-1.375, -1024), "-0x1.6p-1024"},
  491. // NaNs
  492. {uint64_t(0x7FF8000000000000LL), "0x1.8p+1024"},
  493. {uint64_t(0xFFF0F00000000000LL), "-0x1.0fp+1024"},
  494. // Infinity
  495. {std::numeric_limits<double>::infinity(), "0x1p+1024"},
  496. {-std::numeric_limits<double>::infinity(), "-0x1p+1024"},
  497. })));
  498. // double is used so that unbiased_exponent can be used with the output
  499. // of ldexp directly.
  500. int32_t unbiased_exponent(double f) {
  501. return HexFloat<FloatProxy<float>>(static_cast<float>(f))
  502. .getUnbiasedNormalizedExponent();
  503. }
  504. int16_t unbiased_half_exponent(uint16_t f) {
  505. return HexFloat<FloatProxy<Float16>>(f).getUnbiasedNormalizedExponent();
  506. }
  507. TEST(HexFloatOperationTest, UnbiasedExponent) {
  508. // Float cases
  509. EXPECT_EQ(0, unbiased_exponent(ldexp(1.0f, 0)));
  510. EXPECT_EQ(-32, unbiased_exponent(ldexp(1.0f, -32)));
  511. EXPECT_EQ(42, unbiased_exponent(ldexp(1.0f, 42)));
  512. EXPECT_EQ(125, unbiased_exponent(ldexp(1.0f, 125)));
  513. EXPECT_EQ(128,
  514. HexFloat<FloatProxy<float>>(std::numeric_limits<float>::infinity())
  515. .getUnbiasedNormalizedExponent());
  516. EXPECT_EQ(-100, unbiased_exponent(ldexp(1.0f, -100)));
  517. EXPECT_EQ(-127, unbiased_exponent(ldexp(1.0f, -127))); // First denorm
  518. EXPECT_EQ(-128, unbiased_exponent(ldexp(1.0f, -128)));
  519. EXPECT_EQ(-129, unbiased_exponent(ldexp(1.0f, -129)));
  520. EXPECT_EQ(-140, unbiased_exponent(ldexp(1.0f, -140)));
  521. // Smallest representable number
  522. EXPECT_EQ(-126 - 23, unbiased_exponent(ldexp(1.0f, -126 - 23)));
  523. // Should get rounded to 0 first.
  524. EXPECT_EQ(0, unbiased_exponent(ldexp(1.0f, -127 - 23)));
  525. // Float16 cases
  526. // The exponent is represented in the bits 0x7C00
  527. // The offset is -15
  528. EXPECT_EQ(0, unbiased_half_exponent(0x3C00));
  529. EXPECT_EQ(3, unbiased_half_exponent(0x4800));
  530. EXPECT_EQ(-1, unbiased_half_exponent(0x3800));
  531. EXPECT_EQ(-14, unbiased_half_exponent(0x0400));
  532. EXPECT_EQ(16, unbiased_half_exponent(0x7C00));
  533. EXPECT_EQ(10, unbiased_half_exponent(0x6400));
  534. // Smallest representable number
  535. EXPECT_EQ(-24, unbiased_half_exponent(0x0001));
  536. }
  537. // Creates a float that is the sum of 1/(2 ^ fractions[i]) for i in factions
  538. float float_fractions(const std::vector<uint32_t>& fractions) {
  539. float f = 0;
  540. for (int32_t i : fractions) {
  541. f += std::ldexp(1.0f, -i);
  542. }
  543. return f;
  544. }
  545. // Returns the normalized significand of a HexFloat<FloatProxy<float>>
  546. // that was created by calling float_fractions with the input fractions,
  547. // raised to the power of exp.
  548. uint32_t normalized_significand(const std::vector<uint32_t>& fractions,
  549. uint32_t exp) {
  550. return HexFloat<FloatProxy<float>>(
  551. static_cast<float>(ldexp(float_fractions(fractions), exp)))
  552. .getNormalizedSignificand();
  553. }
  554. // Sets the bits from MSB to LSB of the significand part of a float.
  555. // For example 0 would set the bit 23 (counting from LSB to MSB),
  556. // and 1 would set the 22nd bit.
  557. uint32_t bits_set(const std::vector<uint32_t>& bits) {
  558. const uint32_t top_bit = 1u << 22u;
  559. uint32_t val = 0;
  560. for (uint32_t i : bits) {
  561. val |= top_bit >> i;
  562. }
  563. return val;
  564. }
  565. // The same as bits_set but for a Float16 value instead of 32-bit floating
  566. // point.
  567. uint16_t half_bits_set(const std::vector<uint32_t>& bits) {
  568. const uint32_t top_bit = 1u << 9u;
  569. uint32_t val = 0;
  570. for (uint32_t i : bits) {
  571. val |= top_bit >> i;
  572. }
  573. return static_cast<uint16_t>(val);
  574. }
  575. TEST(HexFloatOperationTest, NormalizedSignificand) {
  576. // For normalized numbers (the following) it should be a simple matter
  577. // of getting rid of the top implicit bit
  578. EXPECT_EQ(bits_set({}), normalized_significand({0}, 0));
  579. EXPECT_EQ(bits_set({0}), normalized_significand({0, 1}, 0));
  580. EXPECT_EQ(bits_set({0, 1}), normalized_significand({0, 1, 2}, 0));
  581. EXPECT_EQ(bits_set({1}), normalized_significand({0, 2}, 0));
  582. EXPECT_EQ(bits_set({1}), normalized_significand({0, 2}, 32));
  583. EXPECT_EQ(bits_set({1}), normalized_significand({0, 2}, 126));
  584. // For denormalized numbers we expect the normalized significand to
  585. // shift as if it were normalized. This means, in practice that the
  586. // top_most set bit will be cut off. Looks very similar to above (on purpose)
  587. EXPECT_EQ(bits_set({}),
  588. normalized_significand({0}, static_cast<uint32_t>(-127)));
  589. EXPECT_EQ(bits_set({3}),
  590. normalized_significand({0, 4}, static_cast<uint32_t>(-128)));
  591. EXPECT_EQ(bits_set({3}),
  592. normalized_significand({0, 4}, static_cast<uint32_t>(-127)));
  593. EXPECT_EQ(bits_set({}),
  594. normalized_significand({22}, static_cast<uint32_t>(-127)));
  595. EXPECT_EQ(bits_set({0}),
  596. normalized_significand({21, 22}, static_cast<uint32_t>(-127)));
  597. }
  598. // Returns the 32-bit floating point value created by
  599. // calling setFromSignUnbiasedExponentAndNormalizedSignificand
  600. // on a HexFloat<FloatProxy<float>>
  601. float set_from_sign(bool negative, int32_t unbiased_exponent,
  602. uint32_t significand, bool round_denorm_up) {
  603. HexFloat<FloatProxy<float>> f(0.f);
  604. f.setFromSignUnbiasedExponentAndNormalizedSignificand(
  605. negative, unbiased_exponent, significand, round_denorm_up);
  606. return f.value().getAsFloat();
  607. }
  608. TEST(HexFloatOperationTests,
  609. SetFromSignUnbiasedExponentAndNormalizedSignificand) {
  610. EXPECT_EQ(1.f, set_from_sign(false, 0, 0, false));
  611. // Tests insertion of various denormalized numbers with and without round up.
  612. EXPECT_EQ(static_cast<float>(ldexp(1.f, -149)),
  613. set_from_sign(false, -149, 0, false));
  614. EXPECT_EQ(static_cast<float>(ldexp(1.f, -149)),
  615. set_from_sign(false, -149, 0, true));
  616. EXPECT_EQ(0.f, set_from_sign(false, -150, 1, false));
  617. EXPECT_EQ(static_cast<float>(ldexp(1.f, -149)),
  618. set_from_sign(false, -150, 1, true));
  619. EXPECT_EQ(ldexp(1.0f, -127), set_from_sign(false, -127, 0, false));
  620. EXPECT_EQ(ldexp(1.0f, -128), set_from_sign(false, -128, 0, false));
  621. EXPECT_EQ(float_fractions({0, 1, 2, 5}),
  622. set_from_sign(false, 0, bits_set({0, 1, 4}), false));
  623. EXPECT_EQ(ldexp(float_fractions({0, 1, 2, 5}), -32),
  624. set_from_sign(false, -32, bits_set({0, 1, 4}), false));
  625. EXPECT_EQ(ldexp(float_fractions({0, 1, 2, 5}), -128),
  626. set_from_sign(false, -128, bits_set({0, 1, 4}), false));
  627. // The negative cases from above.
  628. EXPECT_EQ(-1.f, set_from_sign(true, 0, 0, false));
  629. EXPECT_EQ(-ldexp(1.0, -127), set_from_sign(true, -127, 0, false));
  630. EXPECT_EQ(-ldexp(1.0, -128), set_from_sign(true, -128, 0, false));
  631. EXPECT_EQ(-float_fractions({0, 1, 2, 5}),
  632. set_from_sign(true, 0, bits_set({0, 1, 4}), false));
  633. EXPECT_EQ(-ldexp(float_fractions({0, 1, 2, 5}), -32),
  634. set_from_sign(true, -32, bits_set({0, 1, 4}), false));
  635. EXPECT_EQ(-ldexp(float_fractions({0, 1, 2, 5}), -128),
  636. set_from_sign(true, -128, bits_set({0, 1, 4}), false));
  637. }
  638. TEST(HexFloatOperationTests, NonRounding) {
  639. // Rounding from 32-bit hex-float to 32-bit hex-float should be trivial,
  640. // except in the denorm case which is a bit more complex.
  641. using HF = HexFloat<FloatProxy<float>>;
  642. bool carry_bit = false;
  643. round_direction rounding[] = {round_direction::kToZero,
  644. round_direction::kToNearestEven,
  645. round_direction::kToPositiveInfinity,
  646. round_direction::kToNegativeInfinity};
  647. // Everything fits, so this should be straight-forward
  648. for (round_direction round : rounding) {
  649. EXPECT_EQ(bits_set({}),
  650. HF(0.f).getRoundedNormalizedSignificand<HF>(round, &carry_bit));
  651. EXPECT_FALSE(carry_bit);
  652. EXPECT_EQ(bits_set({0}),
  653. HF(float_fractions({0, 1}))
  654. .getRoundedNormalizedSignificand<HF>(round, &carry_bit));
  655. EXPECT_FALSE(carry_bit);
  656. EXPECT_EQ(bits_set({1, 3}),
  657. HF(float_fractions({0, 2, 4}))
  658. .getRoundedNormalizedSignificand<HF>(round, &carry_bit));
  659. EXPECT_FALSE(carry_bit);
  660. EXPECT_EQ(
  661. bits_set({0, 1, 4}),
  662. HF(static_cast<float>(-ldexp(float_fractions({0, 1, 2, 5}), -128)))
  663. .getRoundedNormalizedSignificand<HF>(round, &carry_bit));
  664. EXPECT_FALSE(carry_bit);
  665. EXPECT_EQ(bits_set({0, 1, 4, 22}),
  666. HF(static_cast<float>(float_fractions({0, 1, 2, 5, 23})))
  667. .getRoundedNormalizedSignificand<HF>(round, &carry_bit));
  668. EXPECT_FALSE(carry_bit);
  669. }
  670. }
  671. using RD = round_direction;
  672. struct RoundSignificandCase {
  673. float source_float;
  674. std::pair<int16_t, bool> expected_results;
  675. round_direction round;
  676. };
  677. using HexFloatRoundTest = ::testing::TestWithParam<RoundSignificandCase>;
  678. TEST_P(HexFloatRoundTest, RoundDownToFP16) {
  679. using HF = HexFloat<FloatProxy<float>>;
  680. using HF16 = HexFloat<FloatProxy<Float16>>;
  681. HF input_value(GetParam().source_float);
  682. bool carry_bit = false;
  683. EXPECT_EQ(GetParam().expected_results.first,
  684. input_value.getRoundedNormalizedSignificand<HF16>(GetParam().round,
  685. &carry_bit));
  686. EXPECT_EQ(carry_bit, GetParam().expected_results.second);
  687. }
  688. // clang-format off
  689. INSTANTIATE_TEST_SUITE_P(F32ToF16, HexFloatRoundTest,
  690. ::testing::ValuesIn(std::vector<RoundSignificandCase>(
  691. {
  692. {float_fractions({0}), std::make_pair(half_bits_set({}), false), RD::kToZero},
  693. {float_fractions({0}), std::make_pair(half_bits_set({}), false), RD::kToNearestEven},
  694. {float_fractions({0}), std::make_pair(half_bits_set({}), false), RD::kToPositiveInfinity},
  695. {float_fractions({0}), std::make_pair(half_bits_set({}), false), RD::kToNegativeInfinity},
  696. {float_fractions({0, 1}), std::make_pair(half_bits_set({0}), false), RD::kToZero},
  697. {float_fractions({0, 1, 11}), std::make_pair(half_bits_set({0}), false), RD::kToZero},
  698. {float_fractions({0, 1, 11}), std::make_pair(half_bits_set({0, 9}), false), RD::kToPositiveInfinity},
  699. {float_fractions({0, 1, 11}), std::make_pair(half_bits_set({0}), false), RD::kToNegativeInfinity},
  700. {float_fractions({0, 1, 11}), std::make_pair(half_bits_set({0}), false), RD::kToNearestEven},
  701. {float_fractions({0, 1, 10, 11}), std::make_pair(half_bits_set({0, 9}), false), RD::kToZero},
  702. {float_fractions({0, 1, 10, 11}), std::make_pair(half_bits_set({0, 8}), false), RD::kToPositiveInfinity},
  703. {float_fractions({0, 1, 10, 11}), std::make_pair(half_bits_set({0, 9}), false), RD::kToNegativeInfinity},
  704. {float_fractions({0, 1, 10, 11}), std::make_pair(half_bits_set({0, 8}), false), RD::kToNearestEven},
  705. {float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0}), false), RD::kToZero},
  706. {float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0, 9}), false), RD::kToPositiveInfinity},
  707. {float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0}), false), RD::kToNegativeInfinity},
  708. {float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0, 9}), false), RD::kToNearestEven},
  709. {-float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0}), false), RD::kToZero},
  710. {-float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0}), false), RD::kToPositiveInfinity},
  711. {-float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0, 9}), false), RD::kToNegativeInfinity},
  712. {-float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0, 9}), false), RD::kToNearestEven},
  713. {float_fractions({0, 1, 11, 22}), std::make_pair(half_bits_set({0}), false), RD::kToZero},
  714. {float_fractions({0, 1, 11, 22}), std::make_pair(half_bits_set({0, 9}), false), RD::kToPositiveInfinity},
  715. {float_fractions({0, 1, 11, 22}), std::make_pair(half_bits_set({0}), false), RD::kToNegativeInfinity},
  716. {float_fractions({0, 1, 11, 22}), std::make_pair(half_bits_set({0, 9}), false), RD::kToNearestEven},
  717. // Carries
  718. {float_fractions({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}), std::make_pair(half_bits_set({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}), false), RD::kToZero},
  719. {float_fractions({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}), std::make_pair(half_bits_set({}), true), RD::kToPositiveInfinity},
  720. {float_fractions({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}), std::make_pair(half_bits_set({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}), false), RD::kToNegativeInfinity},
  721. {float_fractions({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}), std::make_pair(half_bits_set({}), true), RD::kToNearestEven},
  722. // Cases where original number was denorm. Note: this should have no effect
  723. // the number is pre-normalized.
  724. {static_cast<float>(ldexp(float_fractions({0, 1, 11, 13}), -128)), std::make_pair(half_bits_set({0}), false), RD::kToZero},
  725. {static_cast<float>(ldexp(float_fractions({0, 1, 11, 13}), -129)), std::make_pair(half_bits_set({0, 9}), false), RD::kToPositiveInfinity},
  726. {static_cast<float>(ldexp(float_fractions({0, 1, 11, 13}), -131)), std::make_pair(half_bits_set({0}), false), RD::kToNegativeInfinity},
  727. {static_cast<float>(ldexp(float_fractions({0, 1, 11, 13}), -130)), std::make_pair(half_bits_set({0, 9}), false), RD::kToNearestEven},
  728. })));
  729. // clang-format on
  730. struct UpCastSignificandCase {
  731. uint16_t source_half;
  732. uint32_t expected_result;
  733. };
  734. using HexFloatRoundUpSignificandTest =
  735. ::testing::TestWithParam<UpCastSignificandCase>;
  736. TEST_P(HexFloatRoundUpSignificandTest, Widening) {
  737. using HF = HexFloat<FloatProxy<float>>;
  738. using HF16 = HexFloat<FloatProxy<Float16>>;
  739. bool carry_bit = false;
  740. round_direction rounding[] = {round_direction::kToZero,
  741. round_direction::kToNearestEven,
  742. round_direction::kToPositiveInfinity,
  743. round_direction::kToNegativeInfinity};
  744. // Everything fits, so everything should just be bit-shifts.
  745. for (round_direction round : rounding) {
  746. carry_bit = false;
  747. HF16 input_value(GetParam().source_half);
  748. EXPECT_EQ(
  749. GetParam().expected_result,
  750. input_value.getRoundedNormalizedSignificand<HF>(round, &carry_bit))
  751. << std::hex << "0x"
  752. << input_value.getRoundedNormalizedSignificand<HF>(round, &carry_bit)
  753. << " 0x" << GetParam().expected_result;
  754. EXPECT_FALSE(carry_bit);
  755. }
  756. }
  757. INSTANTIATE_TEST_SUITE_P(
  758. F16toF32, HexFloatRoundUpSignificandTest,
  759. // 0xFC00 of the source 16-bit hex value cover the sign and the exponent.
  760. // They are ignored for this test.
  761. ::testing::ValuesIn(std::vector<UpCastSignificandCase>({
  762. {0x3F00, 0x600000},
  763. {0x0F00, 0x600000},
  764. {0x0F01, 0x602000},
  765. {0x0FFF, 0x7FE000},
  766. })));
  767. struct DownCastTest {
  768. float source_float;
  769. uint16_t expected_half;
  770. std::vector<round_direction> directions;
  771. };
  772. std::string get_round_text(round_direction direction) {
  773. #define CASE(round_direction) \
  774. case round_direction: \
  775. return #round_direction
  776. switch (direction) {
  777. CASE(round_direction::kToZero);
  778. CASE(round_direction::kToPositiveInfinity);
  779. CASE(round_direction::kToNegativeInfinity);
  780. CASE(round_direction::kToNearestEven);
  781. }
  782. #undef CASE
  783. return "";
  784. }
  785. using HexFloatFP32To16Tests = ::testing::TestWithParam<DownCastTest>;
  786. TEST_P(HexFloatFP32To16Tests, NarrowingCasts) {
  787. using HF = HexFloat<FloatProxy<float>>;
  788. using HF16 = HexFloat<FloatProxy<Float16>>;
  789. HF f(GetParam().source_float);
  790. for (auto round : GetParam().directions) {
  791. HF16 half(0);
  792. f.castTo(half, round);
  793. EXPECT_EQ(GetParam().expected_half, half.value().getAsFloat().get_value())
  794. << get_round_text(round) << " " << std::hex
  795. << BitwiseCast<uint32_t>(GetParam().source_float)
  796. << " cast to: " << half.value().getAsFloat().get_value();
  797. }
  798. }
  799. const uint16_t positive_infinity = 0x7C00;
  800. const uint16_t negative_infinity = 0xFC00;
  801. INSTANTIATE_TEST_SUITE_P(
  802. F32ToF16, HexFloatFP32To16Tests,
  803. ::testing::ValuesIn(std::vector<DownCastTest>({
  804. // Exactly representable as half.
  805. {0.f,
  806. 0x0,
  807. {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
  808. RD::kToNearestEven}},
  809. {-0.f,
  810. 0x8000,
  811. {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
  812. RD::kToNearestEven}},
  813. {1.0f,
  814. 0x3C00,
  815. {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
  816. RD::kToNearestEven}},
  817. {-1.0f,
  818. 0xBC00,
  819. {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
  820. RD::kToNearestEven}},
  821. {float_fractions({0, 1, 10}),
  822. 0x3E01,
  823. {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
  824. RD::kToNearestEven}},
  825. {-float_fractions({0, 1, 10}),
  826. 0xBE01,
  827. {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
  828. RD::kToNearestEven}},
  829. {static_cast<float>(ldexp(float_fractions({0, 1, 10}), 3)),
  830. 0x4A01,
  831. {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
  832. RD::kToNearestEven}},
  833. {static_cast<float>(-ldexp(float_fractions({0, 1, 10}), 3)),
  834. 0xCA01,
  835. {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
  836. RD::kToNearestEven}},
  837. // Underflow
  838. {static_cast<float>(ldexp(1.0f, -25)),
  839. 0x0,
  840. {RD::kToZero, RD::kToNegativeInfinity, RD::kToNearestEven}},
  841. {static_cast<float>(ldexp(1.0f, -25)), 0x1, {RD::kToPositiveInfinity}},
  842. {static_cast<float>(-ldexp(1.0f, -25)),
  843. 0x8000,
  844. {RD::kToZero, RD::kToPositiveInfinity, RD::kToNearestEven}},
  845. {static_cast<float>(-ldexp(1.0f, -25)),
  846. 0x8001,
  847. {RD::kToNegativeInfinity}},
  848. {static_cast<float>(ldexp(1.0f, -24)),
  849. 0x1,
  850. {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
  851. RD::kToNearestEven}},
  852. // Overflow
  853. {static_cast<float>(ldexp(1.0f, 16)),
  854. positive_infinity,
  855. {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
  856. RD::kToNearestEven}},
  857. {static_cast<float>(ldexp(1.0f, 18)),
  858. positive_infinity,
  859. {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
  860. RD::kToNearestEven}},
  861. {static_cast<float>(ldexp(1.3f, 16)),
  862. positive_infinity,
  863. {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
  864. RD::kToNearestEven}},
  865. {static_cast<float>(-ldexp(1.0f, 16)),
  866. negative_infinity,
  867. {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
  868. RD::kToNearestEven}},
  869. {static_cast<float>(-ldexp(1.0f, 18)),
  870. negative_infinity,
  871. {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
  872. RD::kToNearestEven}},
  873. {static_cast<float>(-ldexp(1.3f, 16)),
  874. negative_infinity,
  875. {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
  876. RD::kToNearestEven}},
  877. // Transfer of Infinities
  878. {std::numeric_limits<float>::infinity(),
  879. positive_infinity,
  880. {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
  881. RD::kToNearestEven}},
  882. {-std::numeric_limits<float>::infinity(),
  883. negative_infinity,
  884. {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
  885. RD::kToNearestEven}},
  886. // Nans are below because we cannot test for equality.
  887. })));
  888. struct UpCastCase {
  889. uint16_t source_half;
  890. float expected_float;
  891. };
  892. using HexFloatFP16To32Tests = ::testing::TestWithParam<UpCastCase>;
  893. TEST_P(HexFloatFP16To32Tests, WideningCasts) {
  894. using HF = HexFloat<FloatProxy<float>>;
  895. using HF16 = HexFloat<FloatProxy<Float16>>;
  896. HF16 f(GetParam().source_half);
  897. round_direction rounding[] = {round_direction::kToZero,
  898. round_direction::kToNearestEven,
  899. round_direction::kToPositiveInfinity,
  900. round_direction::kToNegativeInfinity};
  901. // Everything fits, so everything should just be bit-shifts.
  902. for (round_direction round : rounding) {
  903. HF flt(0.f);
  904. f.castTo(flt, round);
  905. EXPECT_EQ(GetParam().expected_float, flt.value().getAsFloat())
  906. << get_round_text(round) << " " << std::hex
  907. << BitwiseCast<uint16_t>(GetParam().source_half)
  908. << " cast to: " << flt.value().getAsFloat();
  909. }
  910. }
  911. INSTANTIATE_TEST_SUITE_P(
  912. F16ToF32, HexFloatFP16To32Tests,
  913. ::testing::ValuesIn(std::vector<UpCastCase>({
  914. {0x0000, 0.f},
  915. {0x8000, -0.f},
  916. {0x3C00, 1.0f},
  917. {0xBC00, -1.0f},
  918. {0x3F00, float_fractions({0, 1, 2})},
  919. {0xBF00, -float_fractions({0, 1, 2})},
  920. {0x3F01, float_fractions({0, 1, 2, 10})},
  921. {0xBF01, -float_fractions({0, 1, 2, 10})},
  922. // denorm
  923. {0x0001, static_cast<float>(ldexp(1.0, -24))},
  924. {0x0002, static_cast<float>(ldexp(1.0, -23))},
  925. {0x8001, static_cast<float>(-ldexp(1.0, -24))},
  926. {0x8011, static_cast<float>(-ldexp(1.0, -20) + -ldexp(1.0, -24))},
  927. // inf
  928. {0x7C00, std::numeric_limits<float>::infinity()},
  929. {0xFC00, -std::numeric_limits<float>::infinity()},
  930. })));
  931. TEST(HexFloatOperationTests, NanTests) {
  932. using HF = HexFloat<FloatProxy<float>>;
  933. using HF16 = HexFloat<FloatProxy<Float16>>;
  934. round_direction rounding[] = {round_direction::kToZero,
  935. round_direction::kToNearestEven,
  936. round_direction::kToPositiveInfinity,
  937. round_direction::kToNegativeInfinity};
  938. // Everything fits, so everything should just be bit-shifts.
  939. for (round_direction round : rounding) {
  940. HF16 f16(0);
  941. HF f(0.f);
  942. HF(std::numeric_limits<float>::quiet_NaN()).castTo(f16, round);
  943. EXPECT_TRUE(f16.value().isNan());
  944. HF(std::numeric_limits<float>::signaling_NaN()).castTo(f16, round);
  945. EXPECT_TRUE(f16.value().isNan());
  946. HF16(0x7C01).castTo(f, round);
  947. EXPECT_TRUE(f.value().isNan());
  948. HF16(0x7C11).castTo(f, round);
  949. EXPECT_TRUE(f.value().isNan());
  950. HF16(0xFC01).castTo(f, round);
  951. EXPECT_TRUE(f.value().isNan());
  952. HF16(0x7C10).castTo(f, round);
  953. EXPECT_TRUE(f.value().isNan());
  954. HF16(0xFF00).castTo(f, round);
  955. EXPECT_TRUE(f.value().isNan());
  956. }
  957. }
  958. // A test case for parsing good and bad HexFloat<FloatProxy<T>> literals.
  959. template <typename T>
  960. struct FloatParseCase {
  961. std::string literal;
  962. bool negate_value;
  963. bool expect_success;
  964. HexFloat<FloatProxy<T>> expected_value;
  965. };
  966. using ParseNormalFloatTest = ::testing::TestWithParam<FloatParseCase<float>>;
  967. TEST_P(ParseNormalFloatTest, Samples) {
  968. std::stringstream input(GetParam().literal);
  969. HexFloat<FloatProxy<float>> parsed_value(0.0f);
  970. ParseNormalFloat(input, GetParam().negate_value, parsed_value);
  971. EXPECT_NE(GetParam().expect_success, input.fail())
  972. << " literal: " << GetParam().literal
  973. << " negate: " << GetParam().negate_value;
  974. if (GetParam().expect_success) {
  975. EXPECT_THAT(parsed_value.value(), Eq(GetParam().expected_value.value()))
  976. << " literal: " << GetParam().literal
  977. << " negate: " << GetParam().negate_value;
  978. }
  979. }
  980. // Returns a FloatParseCase with expected failure.
  981. template <typename T>
  982. FloatParseCase<T> BadFloatParseCase(std::string literal, bool negate_value,
  983. T expected_value) {
  984. HexFloat<FloatProxy<T>> proxy_expected_value(expected_value);
  985. return FloatParseCase<T>{literal, negate_value, false, proxy_expected_value};
  986. }
  987. // Returns a FloatParseCase that should successfully parse to a given value.
  988. template <typename T>
  989. FloatParseCase<T> GoodFloatParseCase(std::string literal, bool negate_value,
  990. T expected_value) {
  991. HexFloat<FloatProxy<T>> proxy_expected_value(expected_value);
  992. return FloatParseCase<T>{literal, negate_value, true, proxy_expected_value};
  993. }
  994. INSTANTIATE_TEST_SUITE_P(
  995. FloatParse, ParseNormalFloatTest,
  996. ::testing::ValuesIn(std::vector<FloatParseCase<float>>{
  997. // Failing cases due to trivially incorrect syntax.
  998. BadFloatParseCase("abc", false, 0.0f),
  999. BadFloatParseCase("abc", true, 0.0f),
  1000. // Valid cases.
  1001. GoodFloatParseCase("0", false, 0.0f),
  1002. GoodFloatParseCase("0.0", false, 0.0f),
  1003. GoodFloatParseCase("-0.0", false, -0.0f),
  1004. GoodFloatParseCase("2.0", false, 2.0f),
  1005. GoodFloatParseCase("-2.0", false, -2.0f),
  1006. GoodFloatParseCase("+2.0", false, 2.0f),
  1007. // Cases with negate_value being true.
  1008. GoodFloatParseCase("0.0", true, -0.0f),
  1009. GoodFloatParseCase("2.0", true, -2.0f),
  1010. // When negate_value is true, we should not accept a
  1011. // leading minus or plus.
  1012. BadFloatParseCase("-0.0", true, 0.0f),
  1013. BadFloatParseCase("-2.0", true, 0.0f),
  1014. BadFloatParseCase("+0.0", true, 0.0f),
  1015. BadFloatParseCase("+2.0", true, 0.0f),
  1016. // Overflow is an error for 32-bit float parsing.
  1017. BadFloatParseCase("1e40", false, FLT_MAX),
  1018. BadFloatParseCase("1e40", true, -FLT_MAX),
  1019. BadFloatParseCase("-1e40", false, -FLT_MAX),
  1020. // We can't have -1e40 and negate_value == true since
  1021. // that represents an original case of "--1e40" which
  1022. // is invalid.
  1023. }));
  1024. using ParseNormalFloat16Test =
  1025. ::testing::TestWithParam<FloatParseCase<Float16>>;
  1026. TEST_P(ParseNormalFloat16Test, Samples) {
  1027. std::stringstream input(GetParam().literal);
  1028. HexFloat<FloatProxy<Float16>> parsed_value(0);
  1029. ParseNormalFloat(input, GetParam().negate_value, parsed_value);
  1030. EXPECT_NE(GetParam().expect_success, input.fail())
  1031. << " literal: " << GetParam().literal
  1032. << " negate: " << GetParam().negate_value;
  1033. if (GetParam().expect_success) {
  1034. EXPECT_THAT(parsed_value.value(), Eq(GetParam().expected_value.value()))
  1035. << " literal: " << GetParam().literal
  1036. << " negate: " << GetParam().negate_value;
  1037. }
  1038. }
  1039. INSTANTIATE_TEST_SUITE_P(
  1040. Float16Parse, ParseNormalFloat16Test,
  1041. ::testing::ValuesIn(std::vector<FloatParseCase<Float16>>{
  1042. // Failing cases due to trivially incorrect syntax.
  1043. BadFloatParseCase<Float16>("abc", false, uint16_t{0}),
  1044. BadFloatParseCase<Float16>("abc", true, uint16_t{0}),
  1045. // Valid cases.
  1046. GoodFloatParseCase<Float16>("0", false, uint16_t{0}),
  1047. GoodFloatParseCase<Float16>("0.0", false, uint16_t{0}),
  1048. GoodFloatParseCase<Float16>("-0.0", false, uint16_t{0x8000}),
  1049. GoodFloatParseCase<Float16>("2.0", false, uint16_t{0x4000}),
  1050. GoodFloatParseCase<Float16>("-2.0", false, uint16_t{0xc000}),
  1051. GoodFloatParseCase<Float16>("+2.0", false, uint16_t{0x4000}),
  1052. // Cases with negate_value being true.
  1053. GoodFloatParseCase<Float16>("0.0", true, uint16_t{0x8000}),
  1054. GoodFloatParseCase<Float16>("2.0", true, uint16_t{0xc000}),
  1055. // When negate_value is true, we should not accept a leading minus or
  1056. // plus.
  1057. BadFloatParseCase<Float16>("-0.0", true, uint16_t{0}),
  1058. BadFloatParseCase<Float16>("-2.0", true, uint16_t{0}),
  1059. BadFloatParseCase<Float16>("+0.0", true, uint16_t{0}),
  1060. BadFloatParseCase<Float16>("+2.0", true, uint16_t{0}),
  1061. }));
  1062. // A test case for detecting infinities.
  1063. template <typename T>
  1064. struct OverflowParseCase {
  1065. std::string input;
  1066. bool expect_success;
  1067. T expected_value;
  1068. };
  1069. using FloatProxyParseOverflowFloatTest =
  1070. ::testing::TestWithParam<OverflowParseCase<float>>;
  1071. TEST_P(FloatProxyParseOverflowFloatTest, Sample) {
  1072. std::istringstream input(GetParam().input);
  1073. HexFloat<FloatProxy<float>> value(0.0f);
  1074. input >> value;
  1075. EXPECT_NE(GetParam().expect_success, input.fail());
  1076. if (GetParam().expect_success) {
  1077. EXPECT_THAT(value.value().getAsFloat(), GetParam().expected_value);
  1078. }
  1079. }
  1080. INSTANTIATE_TEST_SUITE_P(
  1081. FloatOverflow, FloatProxyParseOverflowFloatTest,
  1082. ::testing::ValuesIn(std::vector<OverflowParseCase<float>>({
  1083. {"0", true, 0.0f},
  1084. {"0.0", true, 0.0f},
  1085. {"1.0", true, 1.0f},
  1086. {"1e38", true, 1e38f},
  1087. {"-1e38", true, -1e38f},
  1088. {"1e40", false, FLT_MAX},
  1089. {"-1e40", false, -FLT_MAX},
  1090. {"1e400", false, FLT_MAX},
  1091. {"-1e400", false, -FLT_MAX},
  1092. })));
  1093. using FloatProxyParseOverflowDoubleTest =
  1094. ::testing::TestWithParam<OverflowParseCase<double>>;
  1095. TEST_P(FloatProxyParseOverflowDoubleTest, Sample) {
  1096. std::istringstream input(GetParam().input);
  1097. HexFloat<FloatProxy<double>> value(0.0);
  1098. input >> value;
  1099. EXPECT_NE(GetParam().expect_success, input.fail());
  1100. if (GetParam().expect_success) {
  1101. EXPECT_THAT(value.value().getAsFloat(), Eq(GetParam().expected_value));
  1102. }
  1103. }
  1104. INSTANTIATE_TEST_SUITE_P(
  1105. DoubleOverflow, FloatProxyParseOverflowDoubleTest,
  1106. ::testing::ValuesIn(std::vector<OverflowParseCase<double>>({
  1107. {"0", true, 0.0},
  1108. {"0.0", true, 0.0},
  1109. {"1.0", true, 1.0},
  1110. {"1e38", true, 1e38},
  1111. {"-1e38", true, -1e38},
  1112. {"1e40", true, 1e40},
  1113. {"-1e40", true, -1e40},
  1114. {"1e400", false, DBL_MAX},
  1115. {"-1e400", false, -DBL_MAX},
  1116. })));
  1117. using FloatProxyParseOverflowFloat16Test =
  1118. ::testing::TestWithParam<OverflowParseCase<uint16_t>>;
  1119. TEST_P(FloatProxyParseOverflowFloat16Test, Sample) {
  1120. std::istringstream input(GetParam().input);
  1121. HexFloat<FloatProxy<Float16>> value(0);
  1122. input >> value;
  1123. EXPECT_NE(GetParam().expect_success, input.fail())
  1124. << " literal: " << GetParam().input;
  1125. if (GetParam().expect_success) {
  1126. EXPECT_THAT(value.value().data(), Eq(GetParam().expected_value))
  1127. << " literal: " << GetParam().input;
  1128. }
  1129. }
  1130. INSTANTIATE_TEST_SUITE_P(
  1131. Float16Overflow, FloatProxyParseOverflowFloat16Test,
  1132. ::testing::ValuesIn(std::vector<OverflowParseCase<uint16_t>>({
  1133. {"0", true, uint16_t{0}},
  1134. {"0.0", true, uint16_t{0}},
  1135. {"1.0", true, uint16_t{0x3c00}},
  1136. // Overflow for 16-bit float is an error, and returns max or
  1137. // lowest value.
  1138. {"1e38", false, uint16_t{0x7bff}},
  1139. {"1e40", false, uint16_t{0x7bff}},
  1140. {"1e400", false, uint16_t{0x7bff}},
  1141. {"-1e38", false, uint16_t{0xfbff}},
  1142. {"-1e40", false, uint16_t{0xfbff}},
  1143. {"-1e400", false, uint16_t{0xfbff}},
  1144. })));
  1145. TEST(FloatProxy, Max) {
  1146. EXPECT_THAT(FloatProxy<Float16>::max().getAsFloat().get_value(),
  1147. Eq(uint16_t{0x7bff}));
  1148. EXPECT_THAT(FloatProxy<float>::max().getAsFloat(),
  1149. Eq(std::numeric_limits<float>::max()));
  1150. EXPECT_THAT(FloatProxy<double>::max().getAsFloat(),
  1151. Eq(std::numeric_limits<double>::max()));
  1152. }
  1153. TEST(FloatProxy, Lowest) {
  1154. EXPECT_THAT(FloatProxy<Float16>::lowest().getAsFloat().get_value(),
  1155. Eq(uint16_t{0xfbff}));
  1156. EXPECT_THAT(FloatProxy<float>::lowest().getAsFloat(),
  1157. Eq(std::numeric_limits<float>::lowest()));
  1158. EXPECT_THAT(FloatProxy<double>::lowest().getAsFloat(),
  1159. Eq(std::numeric_limits<double>::lowest()));
  1160. }
  1161. template <typename T>
  1162. struct StreamParseCase {
  1163. StreamParseCase(const std::string& lit, bool succ, const std::string& suffix,
  1164. T value)
  1165. : literal(lit),
  1166. expect_success(succ),
  1167. expected_suffix(suffix),
  1168. expected_value(HexFloat<FloatProxy<T>>(value)) {}
  1169. std::string literal;
  1170. bool expect_success;
  1171. std::string expected_suffix;
  1172. HexFloat<FloatProxy<T>> expected_value;
  1173. };
  1174. template <typename T>
  1175. std::ostream& operator<<(std::ostream& os, const StreamParseCase<T>& fspc) {
  1176. os << "StreamParseCase(" << fspc.literal
  1177. << ", expect_success:" << int(fspc.expect_success) << ","
  1178. << fspc.expected_suffix << "," << fspc.expected_value << ")";
  1179. return os;
  1180. }
  1181. using Float32StreamParseTest = ::testing::TestWithParam<StreamParseCase<float>>;
  1182. using Float16StreamParseTest =
  1183. ::testing::TestWithParam<StreamParseCase<Float16>>;
  1184. TEST_P(Float32StreamParseTest, Samples) {
  1185. std::stringstream input(GetParam().literal);
  1186. HexFloat<FloatProxy<float>> parsed_value(0.0f);
  1187. // Hex floats must be read with the stream input operator.
  1188. input >> parsed_value;
  1189. if (GetParam().expect_success) {
  1190. EXPECT_FALSE(input.fail());
  1191. std::string suffix;
  1192. input >> suffix;
  1193. // EXPECT_EQ(suffix, GetParam().expected_suffix);
  1194. EXPECT_EQ(parsed_value.value().getAsFloat(),
  1195. GetParam().expected_value.value().getAsFloat());
  1196. } else {
  1197. EXPECT_TRUE(input.fail());
  1198. }
  1199. }
  1200. // Returns a Float16 constructed from its sign bit, unbiased exponent, and
  1201. // mantissa.
  1202. Float16 makeF16(int sign_bit, int unbiased_exp, int mantissa) {
  1203. EXPECT_LE(0, sign_bit);
  1204. EXPECT_LE(sign_bit, 1);
  1205. // Exponent is 5 bits, with bias of 15.
  1206. EXPECT_LE(-15, unbiased_exp); // -15 means zero or subnormal
  1207. EXPECT_LE(unbiased_exp, 16); // 16 means infinity or NaN
  1208. EXPECT_LE(0, mantissa);
  1209. EXPECT_LE(mantissa, 0x3ff);
  1210. const unsigned biased_exp = 15 + unbiased_exp;
  1211. const uint32_t as_bits = sign_bit << 15 | (biased_exp << 10) | mantissa;
  1212. EXPECT_LE(as_bits, 0xffffu);
  1213. return Float16(static_cast<uint16_t>(as_bits));
  1214. }
  1215. TEST_P(Float16StreamParseTest, Samples) {
  1216. std::stringstream input(GetParam().literal);
  1217. HexFloat<FloatProxy<Float16>> parsed_value(makeF16(0, 0, 0));
  1218. // Hex floats must be read with the stream input operator.
  1219. input >> parsed_value;
  1220. if (GetParam().expect_success) {
  1221. EXPECT_FALSE(input.fail());
  1222. std::string suffix;
  1223. input >> suffix;
  1224. const auto got = parsed_value.value();
  1225. const auto expected = GetParam().expected_value.value();
  1226. EXPECT_EQ(got.data(), expected.data())
  1227. << "got: " << got << " expected: " << expected;
  1228. } else {
  1229. EXPECT_TRUE(input.fail());
  1230. }
  1231. }
  1232. INSTANTIATE_TEST_SUITE_P(
  1233. HexFloat32FillSignificantDigits, Float32StreamParseTest,
  1234. ::testing::ValuesIn(std::vector<StreamParseCase<float>>{
  1235. {"0x123456p0", true, "", ldexpf(0x123456, 0)},
  1236. // Patterns that fill all mantissa bits
  1237. {"0x1.fffffep+23", true, "", ldexpf(0x1fffffe, -1)},
  1238. {"0x1f.ffffep+19", true, "", ldexpf(0x1fffffe, -1)},
  1239. {"0x1ff.fffep+15", true, "", ldexpf(0x1fffffe, -1)},
  1240. {"0x1fff.ffep+11", true, "", ldexpf(0x1fffffe, -1)},
  1241. {"0x1ffff.fep+7", true, "", ldexpf(0x1fffffe, -1)},
  1242. {"0x1fffff.ep+3", true, "", ldexpf(0x1fffffe, -1)},
  1243. {"0x1fffffe.p-1", true, "", ldexpf(0x1fffffe, -1)},
  1244. {"0xffffff.p+0", true, "", ldexpf(0x1fffffe, -1)},
  1245. {"0xffffff.p+0", true, "", ldexpf(0xffffff, 0)},
  1246. // Now drop some bits in the middle
  1247. {"0xa5a5a5.p+0", true, "", ldexpf(0xa5a5a5, 0)},
  1248. {"0x5a5a5a.p+0", true, "", ldexpf(0x5a5a5a, 0)}}));
  1249. INSTANTIATE_TEST_SUITE_P(
  1250. HexFloat32ExcessSignificantDigits, Float32StreamParseTest,
  1251. ::testing::ValuesIn(std::vector<StreamParseCase<float>>{
  1252. // Base cases
  1253. {"0x1.fffffep0", true, "", ldexpf(0xffffff, -23)},
  1254. {"0xa5a5a5p0", true, "", ldexpf(0xa5a5a5, 0)},
  1255. {"0xa.5a5a5p+9", true, "", ldexpf(0xa5a5a5, -11)},
  1256. {"0x5a5a5ap0", true, "", ldexpf(0x5a5a5a, 0)},
  1257. {"0x5.a5a5ap+9", true, "", ldexpf(0x5a5a5a, -11)},
  1258. // Truncate extra bits: zeroes
  1259. {"0x1.fffffe0p0", true, "", ldexpf(0xffffff, -23)},
  1260. {"0xa5a5a5000p0", true, "", ldexpf(0xa5a5a5, 12)},
  1261. {"0xa.5a5a5000p+9", true, "", ldexpf(0xa5a5a5, -11)},
  1262. {"0x5a5a5a000p0", true, "", ldexpf(0x5a5a5a, 12)},
  1263. {"0x5.a5a5a000p+9", true, "", ldexpf(0x5a5a5a, -11)},
  1264. // Truncate extra bits: ones
  1265. {"0x1.ffffffp0", // Extra bits in the last nibble
  1266. true, "", ldexpf(0xffffff, -23)},
  1267. {"0x1.fffffffp0", true, "", ldexpf(0xffffff, -23)},
  1268. {"0xa5a5a5fffp0", true, "", ldexpf(0xa5a5a5, 12)},
  1269. {"0xa.5a5a5fffp+9", true, "", ldexpf(0xa5a5a5, -11)},
  1270. {"0x5a5a5afffp0",
  1271. // The 5 nibble (0101), leads with 0, so the result can fit a leading
  1272. // 1 bit , yielding 8 (1000).
  1273. true, "", ldexpf(0x5a5a5a8, 8)},
  1274. {"0x5.a5a5afffp+9", true, "", ldexpf(0x5a5a5a8, 8 - 32 + 9)}}));
  1275. INSTANTIATE_TEST_SUITE_P(
  1276. HexFloat32ExponentMissingDigits, Float32StreamParseTest,
  1277. ::testing::ValuesIn(std::vector<StreamParseCase<float>>{
  1278. {"0x1.0p1", true, "", 2.0f},
  1279. {"0x1.0p1a", true, "a", 2.0f},
  1280. {"-0x1.0p1f", true, "f", -2.0f},
  1281. {"0x1.0p", false, "", 0.0f},
  1282. {"0x1.0pa", false, "", 0.0f},
  1283. {"0x1.0p!", false, "", 0.0f},
  1284. {"0x1.0p+", false, "", 0.0f},
  1285. {"0x1.0p+a", false, "", 0.0f},
  1286. {"0x1.0p+!", false, "", 0.0f},
  1287. {"0x1.0p-", false, "", 0.0f},
  1288. {"0x1.0p-a", false, "", 0.0f},
  1289. {"0x1.0p-!", false, "", 0.0f},
  1290. {"0x1.0p++", false, "", 0.0f},
  1291. {"0x1.0p+-", false, "", 0.0f},
  1292. {"0x1.0p-+", false, "", 0.0f},
  1293. {"0x1.0p--", false, "", 0.0f}}));
  1294. INSTANTIATE_TEST_SUITE_P(
  1295. HexFloat32ExponentTrailingSign, Float32StreamParseTest,
  1296. ::testing::ValuesIn(std::vector<StreamParseCase<float>>{
  1297. // Don't consume a sign after the binary exponent digits.
  1298. {"0x1.0p1", true, "", 2.0f},
  1299. {"0x1.0p1+", true, "+", 2.0f},
  1300. {"0x1.0p1-", true, "-", 2.0f}}));
  1301. INSTANTIATE_TEST_SUITE_P(
  1302. HexFloat32PositiveExponentOverflow, Float32StreamParseTest,
  1303. ::testing::ValuesIn(std::vector<StreamParseCase<float>>{
  1304. // Positive exponents
  1305. {"0x1.0p1", true, "", 2.0f}, // fine, a normal number
  1306. {"0x1.0p15", true, "", 32768.0f}, // fine, a normal number
  1307. {"0x1.0p127", true, "", float(ldexp(1.0f, 127))}, // good large number
  1308. {"0x0.8p128", true, "", float(ldexp(1.0f, 127))}, // good large number
  1309. {"0x0.1p131", true, "", float(ldexp(1.0f, 127))}, // good large number
  1310. {"0x0.01p135", true, "", float(ldexp(1.0f, 127))}, // good large number
  1311. {"0x1.0p128", true, "", float(ldexp(1.0f, 128))}, // infinity
  1312. {"0x1.0p4294967295", true, "", float(ldexp(1.0f, 128))}, // infinity
  1313. {"0x1.0p5000000000", true, "", float(ldexp(1.0f, 128))}, // infinity
  1314. {"0x0.0p5000000000", true, "", 0.0f}, // zero mantissa, zero result
  1315. }));
  1316. INSTANTIATE_TEST_SUITE_P(
  1317. HexFloat32NegativeExponentOverflow, Float32StreamParseTest,
  1318. ::testing::ValuesIn(std::vector<StreamParseCase<float>>{
  1319. // Positive results, digits before '.'
  1320. {"0x1.0p-126", true, "",
  1321. float(ldexp(1.0f, -126))}, // fine, a small normal number
  1322. {"0x1.0p-127", true, "", float(ldexp(1.0f, -127))}, // denorm number
  1323. {"0x1.0p-149", true, "",
  1324. float(ldexp(1.0f, -149))}, // smallest positive denormal
  1325. {"0x0.8p-148", true, "",
  1326. float(ldexp(1.0f, -149))}, // smallest positive denormal
  1327. {"0x0.1p-145", true, "",
  1328. float(ldexp(1.0f, -149))}, // smallest positive denormal
  1329. {"0x0.01p-141", true, "",
  1330. float(ldexp(1.0f, -149))}, // smallest positive denormal
  1331. // underflow rounds down to zero
  1332. {"0x1.0p-150", true, "", 0.0f},
  1333. {"0x1.0p-4294967296", true, "",
  1334. 0.0f}, // avoid exponent overflow in parser
  1335. {"0x1.0p-5000000000", true, "",
  1336. 0.0f}, // avoid exponent overflow in parser
  1337. {"0x0.0p-5000000000", true, "", 0.0f}, // zero mantissa, zero result
  1338. }));
  1339. INSTANTIATE_TEST_SUITE_P(
  1340. HexFloat16ExcessSignificantDigits, Float16StreamParseTest,
  1341. ::testing::ValuesIn(std::vector<StreamParseCase<Float16>>{
  1342. // Zero
  1343. {"0x1.c00p0", true, "", makeF16(0, 0, 0x300)},
  1344. {"0x0p0", true, "", makeF16(0, -15, 0x0)},
  1345. {"0x000.0000p0", true, "", makeF16(0, -15, 0x0)},
  1346. // All leading 1s
  1347. {"0x1p0", true, "", makeF16(0, 0, 0x0)},
  1348. {"0x1.8p0", true, "", makeF16(0, 0, 0x200)},
  1349. {"0x1.cp0", true, "", makeF16(0, 0, 0x300)},
  1350. {"0x1.ep0", true, "", makeF16(0, 0, 0x380)},
  1351. {"0x1.fp0", true, "", makeF16(0, 0, 0x3c0)},
  1352. {"0x1.f8p0", true, "", makeF16(0, 0, 0x3e0)},
  1353. {"0x1.fcp0", true, "", makeF16(0, 0, 0x3f0)},
  1354. {"0x1.fep0", true, "", makeF16(0, 0, 0x3f8)},
  1355. {"0x1.ffp0", true, "", makeF16(0, 0, 0x3fc)},
  1356. // Fill trailing zeros to all significant places
  1357. // that might be used for significant digits.
  1358. {"0x1.ff8p0", true, "", makeF16(0, 0, 0x3fe)},
  1359. {"0x1.ffcp0", true, "", makeF16(0, 0, 0x3ff)},
  1360. {"0x1.800p0", true, "", makeF16(0, 0, 0x200)},
  1361. {"0x1.c00p0", true, "", makeF16(0, 0, 0x300)},
  1362. {"0x1.e00p0", true, "", makeF16(0, 0, 0x380)},
  1363. {"0x1.f00p0", true, "", makeF16(0, 0, 0x3c0)},
  1364. {"0x1.f80p0", true, "", makeF16(0, 0, 0x3e0)},
  1365. {"0x1.fc0p0", true, "", makeF16(0, 0, 0x3f0)},
  1366. {"0x1.fe0p0", true, "", makeF16(0, 0, 0x3f8)},
  1367. {"0x1.ff0p0", true, "", makeF16(0, 0, 0x3fc)},
  1368. {"0x1.ff8p0", true, "", makeF16(0, 0, 0x3fe)},
  1369. {"0x1.ffcp0", true, "", makeF16(0, 0, 0x3ff)},
  1370. // Add several trailing zeros
  1371. {"0x1.c00000p0", true, "", makeF16(0, 0, 0x300)},
  1372. {"0x1.e00000p0", true, "", makeF16(0, 0, 0x380)},
  1373. {"0x1.f00000p0", true, "", makeF16(0, 0, 0x3c0)},
  1374. {"0x1.f80000p0", true, "", makeF16(0, 0, 0x3e0)},
  1375. {"0x1.fc0000p0", true, "", makeF16(0, 0, 0x3f0)},
  1376. {"0x1.fe0000p0", true, "", makeF16(0, 0, 0x3f8)},
  1377. {"0x1.ff0000p0", true, "", makeF16(0, 0, 0x3fc)},
  1378. {"0x1.ff8000p0", true, "", makeF16(0, 0, 0x3fe)},
  1379. {"0x1.ffcp0000", true, "", makeF16(0, 0, 0x3ff)},
  1380. // Samples that drop out bits in the middle.
  1381. // 5 = 0101 4 = 0100
  1382. // a = 1010 8 = 1000
  1383. {"0x1.5a4p0", true, "", makeF16(0, 0, 0x169)},
  1384. {"0x1.a58p0", true, "", makeF16(0, 0, 0x296)},
  1385. // Samples that drop out bits *and* truncate significant bits
  1386. // that can't be represented.
  1387. {"0x1.5a40000p0", true, "", makeF16(0, 0, 0x169)},
  1388. {"0x1.5a7ffffp0", true, "", makeF16(0, 0, 0x169)},
  1389. {"0x1.a580000p0", true, "", makeF16(0, 0, 0x296)},
  1390. {"0x1.a5bffffp0", true, "", makeF16(0, 0, 0x296)},
  1391. // Try some negations.
  1392. {"-0x0p0", true, "", makeF16(1, -15, 0x0)},
  1393. {"-0x000.0000p0", true, "", makeF16(1, -15, 0x0)},
  1394. {"-0x1.5a40000p0", true, "", makeF16(1, 0, 0x169)},
  1395. {"-0x1.5a7ffffp0", true, "", makeF16(1, 0, 0x169)},
  1396. {"-0x1.a580000p0", true, "", makeF16(1, 0, 0x296)},
  1397. {"-0x1.a5bffffp0", true, "", makeF16(1, 0, 0x296)}}));
  1398. INSTANTIATE_TEST_SUITE_P(
  1399. HexFloat16IncreasingExponentsAndMantissa, Float16StreamParseTest,
  1400. ::testing::ValuesIn(std::vector<StreamParseCase<Float16>>{
  1401. // Zero
  1402. {"0x0p0", true, "", makeF16(0, -15, 0x0)},
  1403. {"0x0p5000000000000", true, "", makeF16(0, -15, 0x0)},
  1404. {"-0x0p5000000000000", true, "", makeF16(1, -15, 0x0)},
  1405. // Leading 1
  1406. {"0x1p0", true, "", makeF16(0, 0, 0x0)},
  1407. {"0x1p1", true, "", makeF16(0, 1, 0x0)},
  1408. {"0x1p16", true, "", makeF16(0, 16, 0x0)},
  1409. {"0x1p-1", true, "", makeF16(0, -1, 0x0)},
  1410. {"0x1p-14", true, "", makeF16(0, -14, 0x0)},
  1411. // Leading 2
  1412. {"0x2p0", true, "", makeF16(0, 1, 0x0)},
  1413. {"0x2p1", true, "", makeF16(0, 2, 0x0)},
  1414. {"0x2p15", true, "", makeF16(0, 16, 0x0)},
  1415. {"0x2p-1", true, "", makeF16(0, 0, 0x0)},
  1416. {"0x2p-15", true, "", makeF16(0, -14, 0x0)},
  1417. // Leading 8
  1418. {"0x8p0", true, "", makeF16(0, 3, 0x0)},
  1419. {"0x8p1", true, "", makeF16(0, 4, 0x0)},
  1420. {"0x8p13", true, "", makeF16(0, 16, 0x0)},
  1421. {"0x8p-3", true, "", makeF16(0, 0, 0x0)},
  1422. {"0x8p-17", true, "", makeF16(0, -14, 0x0)},
  1423. // Leading 10
  1424. {"0x10.0p0", true, "", makeF16(0, 4, 0x0)},
  1425. {"0x10.0p1", true, "", makeF16(0, 5, 0x0)},
  1426. {"0x10.0p12", true, "", makeF16(0, 16, 0x0)},
  1427. {"0x10.0p-5", true, "", makeF16(0, -1, 0x0)},
  1428. {"0x10.0p-18", true, "", makeF16(0, -14, 0x0)},
  1429. // Samples that drop out bits *and* truncate significant bits
  1430. // that can't be represented.
  1431. // Progressively increase the leading digit.
  1432. {"0x1.5a40000p0", true, "", makeF16(0, 0, 0x169)},
  1433. {"0x1.5a7ffffp0", true, "", makeF16(0, 0, 0x169)},
  1434. {"0x2.5a40000p0", true, "", makeF16(0, 1, 0x0b4)},
  1435. {"0x2.5a7ffffp0", true, "", makeF16(0, 1, 0x0b4)},
  1436. {"0x4.5a40000p0", true, "", makeF16(0, 2, 0x05a)},
  1437. {"0x4.5a7ffffp0", true, "", makeF16(0, 2, 0x05a)},
  1438. {"0x8.5a40000p0", true, "", makeF16(0, 3, 0x02d)},
  1439. {"0x8.5a7ffffp0", true, "", makeF16(0, 3, 0x02d)}}));
  1440. } // namespace
  1441. } // namespace utils
  1442. } // namespace spvtools