mojoshader.c 318 KB


  1. /**
  2. * MojoShader; generate shader programs from bytecode of compiled
  3. * Direct3D shaders.
  4. *
  5. * Please see the file LICENSE.txt in the source's root directory.
  6. *
  7. * This file written by Ryan C. Gordon.
  8. */
  9. // !!! FIXME: this file really needs to be split up.
  10. // !!! FIXME: I keep changing coding styles for symbols and typedefs.
  11. // !!! FIXME: rules from MSDN about temp registers we probably don't check.
  12. // - There are limited temporaries: vs_1_1 has 12 (ps_1_1 has _2_!).
  13. // - SM2 apparently was variable, between 12 and 32. Shader Model 3 has 32.
  14. // - A maximum of three temp registers can be used in a single instruction.
  15. #define __MOJOSHADER_INTERNAL__ 1
  16. #include "mojoshader_internal.h"
  17. typedef struct ConstantsList
  18. {
  19. MOJOSHADER_constant constant;
  20. struct ConstantsList *next;
  21. } ConstantsList;
  22. typedef struct VariableList
  23. {
  24. MOJOSHADER_uniformType type;
  25. int index;
  26. int count;
  27. ConstantsList *constant;
  28. int used;
  29. int emit_position; // used in some profiles.
  30. struct VariableList *next;
  31. } VariableList;
  32. typedef struct RegisterList
  33. {
  34. RegisterType regtype;
  35. int regnum;
  36. MOJOSHADER_usage usage;
  37. unsigned int index;
  38. int writemask;
  39. int misc;
  40. int written;
  41. const VariableList *array;
  42. struct RegisterList *next;
  43. } RegisterList;
  44. typedef struct
  45. {
  46. const uint32 *token; // this is the unmolested token in the stream.
  47. int regnum;
  48. int swizzle; // xyzw (all four, not split out).
  49. int swizzle_x;
  50. int swizzle_y;
  51. int swizzle_z;
  52. int swizzle_w;
  53. SourceMod src_mod;
  54. RegisterType regtype;
  55. int relative;
  56. RegisterType relative_regtype;
  57. int relative_regnum;
  58. int relative_component;
  59. const VariableList *relative_array;
  60. } SourceArgInfo;
  61. struct Profile; // predeclare.
  62. typedef struct CtabData
  63. {
  64. int have_ctab;
  65. int symbol_count;
  66. MOJOSHADER_symbol *symbols;
  67. } CtabData;
  68. // Context...this is state that changes as we parse through a shader...
  69. typedef struct Context
  70. {
  71. int isfail;
  72. int out_of_memory;
  73. MOJOSHADER_malloc malloc;
  74. MOJOSHADER_free free;
  75. void *malloc_data;
  76. int current_position;
  77. const uint32 *orig_tokens;
  78. const uint32 *tokens;
  79. uint32 tokencount;
  80. const MOJOSHADER_swizzle *swizzles;
  81. unsigned int swizzles_count;
  82. const MOJOSHADER_samplerMap *samplermap;
  83. unsigned int samplermap_count;
  84. Buffer *output;
  85. Buffer *preflight;
  86. Buffer *globals;
  87. Buffer *helpers;
  88. Buffer *subroutines;
  89. Buffer *mainline_intro;
  90. Buffer *mainline;
  91. Buffer *ignore;
  92. Buffer *output_stack[2];
  93. int indent_stack[2];
  94. int output_stack_len;
  95. int indent;
  96. const char *shader_type_str;
  97. const char *endline;
  98. int endline_len;
  99. int profileid;
  100. const struct Profile *profile;
  101. MOJOSHADER_shaderType shader_type;
  102. uint8 major_ver;
  103. uint8 minor_ver;
  104. DestArgInfo dest_arg;
  105. SourceArgInfo source_args[5];
  106. SourceArgInfo predicate_arg; // for predicated instructions.
  107. uint32 dwords[4];
  108. uint32 version_token;
  109. int instruction_count;
  110. uint32 instruction_controls;
  111. uint32 previous_opcode;
  112. int coissue;
  113. int loops;
  114. int reps;
  115. int max_reps;
  116. int cmps;
  117. int scratch_registers;
  118. int max_scratch_registers;
  119. int branch_labels_stack_index;
  120. int branch_labels_stack[32];
  121. int assigned_branch_labels;
  122. int assigned_vertex_attributes;
  123. int last_address_reg_component;
  124. RegisterList used_registers;
  125. RegisterList defined_registers;
  126. ErrorList *errors;
  127. int constant_count;
  128. ConstantsList *constants;
  129. int uniform_count;
  130. int uniform_float4_count;
  131. int uniform_int4_count;
  132. int uniform_bool_count;
  133. RegisterList uniforms;
  134. int attribute_count;
  135. RegisterList attributes;
  136. int sampler_count;
  137. RegisterList samplers;
  138. VariableList *variables; // variables to register mapping.
  139. int centroid_allowed;
  140. CtabData ctab;
  141. int have_relative_input_registers;
  142. int have_multi_color_outputs;
  143. int determined_constants_arrays;
  144. int predicated;
  145. int uses_pointsize;
  146. int uses_fog;
  147. int glsl_generated_lit_helper;
  148. int glsl_generated_texldd_setup;
  149. int glsl_generated_texm3x3spec_helper;
  150. int arb1_wrote_position;
  151. int have_preshader;
  152. int ignores_ctab;
  153. int reset_texmpad;
  154. int texm3x2pad_dst0;
  155. int texm3x2pad_src0;
  156. int texm3x3pad_dst0;
  157. int texm3x3pad_src0;
  158. int texm3x3pad_dst1;
  159. int texm3x3pad_src1;
  160. MOJOSHADER_preshader *preshader;
  161. #if SUPPORT_PROFILE_ARB1_NV
  162. int profile_supports_nv2;
  163. int profile_supports_nv3;
  164. int profile_supports_nv4;
  165. #endif
  166. #if SUPPORT_PROFILE_GLSL120
  167. int profile_supports_glsl120;
  168. #endif
  169. } Context;
  170. // Use these macros so we can remove all bits of these profiles from the build.
  171. #if SUPPORT_PROFILE_ARB1_NV
  172. #define support_nv2(ctx) ((ctx)->profile_supports_nv2)
  173. #define support_nv3(ctx) ((ctx)->profile_supports_nv3)
  174. #define support_nv4(ctx) ((ctx)->profile_supports_nv4)
  175. #else
  176. #define support_nv2(ctx) (0)
  177. #define support_nv3(ctx) (0)
  178. #define support_nv4(ctx) (0)
  179. #endif
  180. #if SUPPORT_PROFILE_GLSL120
  181. #define support_glsl120(ctx) ((ctx)->profile_supports_glsl120)
  182. #else
  183. #define support_glsl120(ctx) (0)
  184. #endif
  185. // Profile entry points...
  186. // one emit function for each opcode in each profile.
  187. typedef void (*emit_function)(Context *ctx);
  188. // one emit function for starting output in each profile.
  189. typedef void (*emit_start)(Context *ctx, const char *profilestr);
  190. // one emit function for ending output in each profile.
  191. typedef void (*emit_end)(Context *ctx);
  192. // one emit function for phase opcode output in each profile.
  193. typedef void (*emit_phase)(Context *ctx);
  194. // one emit function for finalizing output in each profile.
  195. typedef void (*emit_finalize)(Context *ctx);
  196. // one emit function for global definitions in each profile.
  197. typedef void (*emit_global)(Context *ctx, RegisterType regtype, int regnum);
  198. // one emit function for relative uniform arrays in each profile.
  199. typedef void (*emit_array)(Context *ctx, VariableList *var);
  200. // one emit function for relative constants arrays in each profile.
  201. typedef void (*emit_const_array)(Context *ctx,
  202. const struct ConstantsList *constslist,
  203. int base, int size);
  204. // one emit function for uniforms in each profile.
  205. typedef void (*emit_uniform)(Context *ctx, RegisterType regtype, int regnum,
  206. const VariableList *var);
  207. // one emit function for samplers in each profile.
  208. typedef void (*emit_sampler)(Context *ctx, int stage, TextureType ttype,
  209. int texbem);
  210. // one emit function for attributes in each profile.
  211. typedef void (*emit_attribute)(Context *ctx, RegisterType regtype, int regnum,
  212. MOJOSHADER_usage usage, int index, int wmask,
  213. int flags);
  214. // one args function for each possible sequence of opcode arguments.
  215. typedef int (*args_function)(Context *ctx);
  216. // one state function for each opcode where we have state machine updates.
  217. typedef void (*state_function)(Context *ctx);
  218. // one function for varnames in each profile.
  219. typedef const char *(*varname_function)(Context *c, RegisterType t, int num);
  220. // one function for const var array in each profile.
  221. typedef const char *(*const_array_varname_function)(Context *c, int base, int size);
  222. typedef struct Profile
  223. {
  224. const char *name;
  225. emit_start start_emitter;
  226. emit_end end_emitter;
  227. emit_phase phase_emitter;
  228. emit_global global_emitter;
  229. emit_array array_emitter;
  230. emit_const_array const_array_emitter;
  231. emit_uniform uniform_emitter;
  232. emit_sampler sampler_emitter;
  233. emit_attribute attribute_emitter;
  234. emit_finalize finalize_emitter;
  235. varname_function get_varname;
  236. const_array_varname_function get_const_array_varname;
  237. } Profile;
  238. // Convenience functions for allocators...
  239. #if !MOJOSHADER_FORCE_ALLOCATOR
  240. void *MOJOSHADER_internal_malloc(int bytes, void *d) { return malloc(bytes); }
  241. void MOJOSHADER_internal_free(void *ptr, void *d) { free(ptr); }
  242. #endif
  243. MOJOSHADER_error MOJOSHADER_out_of_mem_error = {
  244. "Out of memory", NULL, MOJOSHADER_POSITION_NONE
  245. };
  246. MOJOSHADER_parseData MOJOSHADER_out_of_mem_data = {
  247. 1, &MOJOSHADER_out_of_mem_error, 0, 0, 0, 0,
  248. MOJOSHADER_TYPE_UNKNOWN, 0, 0, 0, 0
  249. };
  250. // !!! FIXME: cut and paste between every damned source file follows...
  251. // !!! FIXME: We need to make some sort of ContextBase that applies to all
  252. // !!! FIXME: files and move this stuff to mojoshader_common.c ...
  253. static inline void out_of_memory(Context *ctx)
  254. {
  255. ctx->isfail = ctx->out_of_memory = 1;
  256. } // out_of_memory
  257. static inline void *Malloc(Context *ctx, const size_t len)
  258. {
  259. void *retval = ctx->malloc((int) len, ctx->malloc_data);
  260. if (retval == NULL)
  261. out_of_memory(ctx);
  262. return retval;
  263. } // Malloc
  264. static inline char *StrDup(Context *ctx, const char *str)
  265. {
  266. char *retval = (char *) Malloc(ctx, strlen(str) + 1);
  267. if (retval != NULL)
  268. strcpy(retval, str);
  269. return retval;
  270. } // StrDup
  271. static inline void Free(Context *ctx, void *ptr)
  272. {
  273. ctx->free(ptr, ctx->malloc_data);
  274. } // Free
  275. static void *MallocBridge(int bytes, void *data)
  276. {
  277. return Malloc((Context *) data, (size_t) bytes);
  278. } // MallocBridge
  279. static void FreeBridge(void *ptr, void *data)
  280. {
  281. Free((Context *) data, ptr);
  282. } // FreeBridge
  283. // jump between output sections in the context...
  284. static int set_output(Context *ctx, Buffer **section)
  285. {
  286. // only create output sections on first use.
  287. if (*section == NULL)
  288. {
  289. *section = buffer_create(256, MallocBridge, FreeBridge, ctx);
  290. if (*section == NULL)
  291. return 0;
  292. } // if
  293. ctx->output = *section;
  294. return 1;
  295. } // set_output
  296. static void push_output(Context *ctx, Buffer **section)
  297. {
  298. assert(ctx->output_stack_len < (int) (STATICARRAYLEN(ctx->output_stack)));
  299. ctx->output_stack[ctx->output_stack_len] = ctx->output;
  300. ctx->indent_stack[ctx->output_stack_len] = ctx->indent;
  301. ctx->output_stack_len++;
  302. if (!set_output(ctx, section))
  303. return;
  304. ctx->indent = 0;
  305. } // push_output
  306. static inline void pop_output(Context *ctx)
  307. {
  308. assert(ctx->output_stack_len > 0);
  309. ctx->output_stack_len--;
  310. ctx->output = ctx->output_stack[ctx->output_stack_len];
  311. ctx->indent = ctx->indent_stack[ctx->output_stack_len];
  312. } // pop_output
  313. // Shader model version magic...
  314. static inline uint32 ver_ui32(const uint8 major, const uint8 minor)
  315. {
  316. return ( (((uint32) major) << 16) | (((minor) == 0xFF) ? 1 : (minor)) );
  317. } // version_ui32
  318. static inline int shader_version_supported(const uint8 maj, const uint8 min)
  319. {
  320. return (ver_ui32(maj,min) <= ver_ui32(MAX_SHADER_MAJOR, MAX_SHADER_MINOR));
  321. } // shader_version_supported
  322. static inline int shader_version_atleast(const Context *ctx, const uint8 maj,
  323. const uint8 min)
  324. {
  325. return (ver_ui32(ctx->major_ver, ctx->minor_ver) >= ver_ui32(maj, min));
  326. } // shader_version_atleast
  327. static inline int shader_version_exactly(const Context *ctx, const uint8 maj,
  328. const uint8 min)
  329. {
  330. return ((ctx->major_ver == maj) && (ctx->minor_ver == min));
  331. } // shader_version_exactly
  332. static inline int shader_is_pixel(const Context *ctx)
  333. {
  334. return (ctx->shader_type == MOJOSHADER_TYPE_PIXEL);
  335. } // shader_is_pixel
  336. static inline int shader_is_vertex(const Context *ctx)
  337. {
  338. return (ctx->shader_type == MOJOSHADER_TYPE_VERTEX);
  339. } // shader_is_vertex
  340. static inline int isfail(const Context *ctx)
  341. {
  342. return ctx->isfail;
  343. } // isfail
  344. static void failf(Context *ctx, const char *fmt, ...) ISPRINTF(2,3);
  345. static void failf(Context *ctx, const char *fmt, ...)
  346. {
  347. ctx->isfail = 1;
  348. if (ctx->out_of_memory)
  349. return;
  350. // no filename at this level (we pass a NULL to errorlist_add_va()...)
  351. va_list ap;
  352. va_start(ap, fmt);
  353. errorlist_add_va(ctx->errors, NULL, ctx->current_position, fmt, ap);
  354. va_end(ap);
  355. } // failf
  356. static inline void fail(Context *ctx, const char *reason)
  357. {
  358. failf(ctx, "%s", reason);
  359. } // fail
  360. static void output_line(Context *ctx, const char *fmt, ...) ISPRINTF(2,3);
  361. static void output_line(Context *ctx, const char *fmt, ...)
  362. {
  363. assert(ctx->output != NULL);
  364. if (isfail(ctx))
  365. return; // we failed previously, don't go on...
  366. const int indent = ctx->indent;
  367. if (indent > 0)
  368. {
  369. char *indentbuf = (char *) alloca(indent);
  370. memset(indentbuf, '\t', indent);
  371. buffer_append(ctx->output, indentbuf, indent);
  372. } // if
  373. va_list ap;
  374. va_start(ap, fmt);
  375. buffer_append_va(ctx->output, fmt, ap);
  376. va_end(ap);
  377. buffer_append(ctx->output, ctx->endline, ctx->endline_len);
  378. } // output_line
  379. static inline void output_blank_line(Context *ctx)
  380. {
  381. assert(ctx->output != NULL);
  382. if (!isfail(ctx))
  383. buffer_append(ctx->output, ctx->endline, ctx->endline_len);
  384. } // output_blank_line
  385. // !!! FIXME: this is sort of nasty.
  386. static void floatstr(Context *ctx, char *buf, size_t bufsize, float f,
  387. int leavedecimal)
  388. {
  389. const size_t len = snprintf(buf, bufsize, "%f", f);
  390. if ((len+2) >= bufsize)
  391. fail(ctx, "BUG: internal buffer is too small");
  392. else
  393. {
  394. char *end = buf + len;
  395. char *ptr = strchr(buf, '.');
  396. if (ptr == NULL)
  397. {
  398. if (leavedecimal)
  399. strcat(buf, ".0");
  400. return; // done.
  401. } // if
  402. while (--end != ptr)
  403. {
  404. if (*end != '0')
  405. {
  406. end++;
  407. break;
  408. } // if
  409. } // while
  410. if ((leavedecimal) && (end == ptr))
  411. end += 2;
  412. *end = '\0'; // chop extra '0' or all decimal places off.
  413. } // else
  414. } // floatstr
  415. static inline TextureType cvtMojoToD3DSamplerType(const MOJOSHADER_samplerType type)
  416. {
  417. return (TextureType) (((int) type) + 2);
  418. } // cvtMojoToD3DSamplerType
  419. static inline MOJOSHADER_samplerType cvtD3DToMojoSamplerType(const TextureType type)
  420. {
  421. return (MOJOSHADER_samplerType) (((int) type) - 2);
  422. } // cvtD3DToMojoSamplerType
  423. // Deal with register lists... !!! FIXME: I sort of hate this.
  424. static void free_reglist(MOJOSHADER_free f, void *d, RegisterList *item)
  425. {
  426. while (item != NULL)
  427. {
  428. RegisterList *next = item->next;
  429. f(item, d);
  430. item = next;
  431. } // while
  432. } // free_reglist
  433. static inline uint32 reg_to_ui32(const RegisterType regtype, const int regnum)
  434. {
  435. return ( ((uint32) regtype) | (((uint32) regnum) << 16) );
  436. } // reg_to_uint32
  437. // !!! FIXME: ditch this for a hash table.
  438. static RegisterList *reglist_insert(Context *ctx, RegisterList *prev,
  439. const RegisterType regtype,
  440. const int regnum)
  441. {
  442. const uint32 newval = reg_to_ui32(regtype, regnum);
  443. RegisterList *item = prev->next;
  444. while (item != NULL)
  445. {
  446. const uint32 val = reg_to_ui32(item->regtype, item->regnum);
  447. if (newval == val)
  448. return item; // already set, so we're done.
  449. else if (newval < val) // insert it here.
  450. break;
  451. else // if (newval > val)
  452. {
  453. // keep going, we're not to the insertion point yet.
  454. prev = item;
  455. item = item->next;
  456. } // else
  457. } // while
  458. // we need to insert an entry after (prev).
  459. item = (RegisterList *) Malloc(ctx, sizeof (RegisterList));
  460. if (item != NULL)
  461. {
  462. item->regtype = regtype;
  463. item->regnum = regnum;
  464. item->usage = MOJOSHADER_USAGE_UNKNOWN;
  465. item->index = 0;
  466. item->writemask = 0;
  467. item->misc = 0;
  468. item->array = NULL;
  469. item->next = prev->next;
  470. prev->next = item;
  471. } // if
  472. return item;
  473. } // reglist_insert
  474. static RegisterList *reglist_find(const RegisterList *prev,
  475. const RegisterType rtype, const int regnum)
  476. {
  477. const uint32 newval = reg_to_ui32(rtype, regnum);
  478. RegisterList *item = prev->next;
  479. while (item != NULL)
  480. {
  481. const uint32 val = reg_to_ui32(item->regtype, item->regnum);
  482. if (newval == val)
  483. return item; // here it is.
  484. else if (newval < val) // should have been here if it existed.
  485. return NULL;
  486. else // if (newval > val)
  487. item = item->next;
  488. } // while
  489. return NULL; // wasn't in the list.
  490. } // reglist_find
  491. static inline const RegisterList *reglist_exists(RegisterList *prev,
  492. const RegisterType regtype,
  493. const int regnum)
  494. {
  495. return (reglist_find(prev, regtype, regnum));
  496. } // reglist_exists
  497. static inline int register_was_written(Context *ctx, const RegisterType rtype,
  498. const int regnum)
  499. {
  500. RegisterList *reg = reglist_find(&ctx->used_registers, rtype, regnum);
  501. return (reg && reg->written);
  502. } // register_was_written
  503. static inline RegisterList *set_used_register(Context *ctx,
  504. const RegisterType regtype,
  505. const int regnum,
  506. const int written)
  507. {
  508. RegisterList *reg = NULL;
  509. if ((regtype == REG_TYPE_COLOROUT) && (regnum > 0))
  510. ctx->have_multi_color_outputs = 1;
  511. reg = reglist_insert(ctx, &ctx->used_registers, regtype, regnum);
  512. if (reg && written)
  513. reg->written = 1;
  514. return reg;
  515. } // set_used_register
  516. static inline int get_used_register(Context *ctx, const RegisterType regtype,
  517. const int regnum)
  518. {
  519. return (reglist_exists(&ctx->used_registers, regtype, regnum) != NULL);
  520. } // get_used_register
  521. static inline void set_defined_register(Context *ctx, const RegisterType rtype,
  522. const int regnum)
  523. {
  524. reglist_insert(ctx, &ctx->defined_registers, rtype, regnum);
  525. } // set_defined_register
  526. static inline int get_defined_register(Context *ctx, const RegisterType rtype,
  527. const int regnum)
  528. {
  529. return (reglist_exists(&ctx->defined_registers, rtype, regnum) != NULL);
  530. } // get_defined_register
  531. static void add_attribute_register(Context *ctx, const RegisterType rtype,
  532. const int regnum, const MOJOSHADER_usage usage,
  533. const int index, const int writemask, int flags)
  534. {
  535. RegisterList *item = reglist_insert(ctx, &ctx->attributes, rtype, regnum);
  536. item->usage = usage;
  537. item->index = index;
  538. item->writemask = writemask;
  539. item->misc = flags;
  540. if ((rtype == REG_TYPE_OUTPUT) && (usage == MOJOSHADER_USAGE_POINTSIZE))
  541. ctx->uses_pointsize = 1; // note that we have to check this later.
  542. else if ((rtype == REG_TYPE_OUTPUT) && (usage == MOJOSHADER_USAGE_FOG))
  543. ctx->uses_fog = 1; // note that we have to check this later.
  544. } // add_attribute_register
  545. static inline void add_sampler(Context *ctx, const int regnum,
  546. TextureType ttype, const int texbem)
  547. {
  548. const RegisterType rtype = REG_TYPE_SAMPLER;
  549. // !!! FIXME: make sure it doesn't exist?
  550. // !!! FIXME: (ps_1_1 assume we can add it multiple times...)
  551. RegisterList *item = reglist_insert(ctx, &ctx->samplers, rtype, regnum);
  552. if (ctx->samplermap != NULL)
  553. {
  554. unsigned int i;
  555. for (i = 0; i < ctx->samplermap_count; i++)
  556. {
  557. if (ctx->samplermap[i].index == regnum)
  558. {
  559. ttype = cvtMojoToD3DSamplerType(ctx->samplermap[i].type);
  560. break;
  561. } // if
  562. } // for
  563. } // if
  564. item->index = (int) ttype;
  565. item->misc |= texbem;
  566. } // add_sampler
  567. static inline int writemask_xyzw(const int writemask)
  568. {
  569. return (writemask == 0xF); // 0xF == 1111. No explicit mask (full!).
  570. } // writemask_xyzw
  571. static inline int writemask_xyz(const int writemask)
  572. {
  573. return (writemask == 0x7); // 0x7 == 0111. (that is: xyz)
  574. } // writemask_xyz
  575. static inline int writemask_xy(const int writemask)
  576. {
  577. return (writemask == 0x3); // 0x3 == 0011. (that is: xy)
  578. } // writemask_xy
  579. static inline int writemask_x(const int writemask)
  580. {
  581. return (writemask == 0x1); // 0x1 == 0001. (that is: x)
  582. } // writemask_x
  583. static inline int writemask_y(const int writemask)
  584. {
  585. return (writemask == 0x2); // 0x1 == 0010. (that is: y)
  586. } // writemask_y
  587. static inline int replicate_swizzle(const int swizzle)
  588. {
  589. return ( (((swizzle >> 0) & 0x3) == ((swizzle >> 2) & 0x3)) &&
  590. (((swizzle >> 2) & 0x3) == ((swizzle >> 4) & 0x3)) &&
  591. (((swizzle >> 4) & 0x3) == ((swizzle >> 6) & 0x3)) );
  592. } // replicate_swizzle
  593. static inline int no_swizzle(const int swizzle)
  594. {
  595. return (swizzle == 0xE4); // 0xE4 == 11100100 ... 0 1 2 3. No swizzle.
  596. } // no_swizzle
  597. static inline int vecsize_from_writemask(const int m)
  598. {
  599. return (m & 1) + ((m >> 1) & 1) + ((m >> 2) & 1) + ((m >> 3) & 1);
  600. } // vecsize_from_writemask
  601. static inline void set_dstarg_writemask(DestArgInfo *dst, const int mask)
  602. {
  603. dst->writemask = mask;
  604. dst->writemask0 = ((mask >> 0) & 1);
  605. dst->writemask1 = ((mask >> 1) & 1);
  606. dst->writemask2 = ((mask >> 2) & 1);
  607. dst->writemask3 = ((mask >> 3) & 1);
  608. } // set_dstarg_writemask
  609. static int allocate_scratch_register(Context *ctx)
  610. {
  611. const int retval = ctx->scratch_registers++;
  612. if (retval >= ctx->max_scratch_registers)
  613. ctx->max_scratch_registers = retval + 1;
  614. return retval;
  615. } // allocate_scratch_register
  616. static int allocate_branch_label(Context *ctx)
  617. {
  618. return ctx->assigned_branch_labels++;
  619. } // allocate_branch_label
  620. static inline void adjust_token_position(Context *ctx, const int incr)
  621. {
  622. ctx->tokens += incr;
  623. ctx->tokencount -= incr;
  624. ctx->current_position += incr * sizeof (uint32);
  625. } // adjust_token_position
  626. // D3D stuff that's used in more than just the d3d profile...
  627. static int isscalar(Context *ctx, const MOJOSHADER_shaderType shader_type,
  628. const RegisterType rtype, const int rnum)
  629. {
  630. const int uses_psize = ctx->uses_pointsize;
  631. const int uses_fog = ctx->uses_fog;
  632. if ( (rtype == REG_TYPE_OUTPUT) && ((uses_psize) || (uses_fog)) )
  633. {
  634. const RegisterList *reg = reglist_find(&ctx->attributes, rtype, rnum);
  635. if (reg != NULL)
  636. {
  637. const MOJOSHADER_usage usage = reg->usage;
  638. return ( (uses_psize && (usage == MOJOSHADER_USAGE_POINTSIZE)) ||
  639. (uses_fog && (usage == MOJOSHADER_USAGE_FOG)) );
  640. } // if
  641. } // if
  642. return scalar_register(shader_type, rtype, rnum);
  643. } // isscalar
  644. static const char swizzle_channels[] = { 'x', 'y', 'z', 'w' };
  645. static const char *usagestrs[] = {
  646. "_position", "_blendweight", "_blendindices", "_normal", "_psize",
  647. "_texcoord", "_tangent", "_binormal", "_tessfactor", "_positiont",
  648. "_color", "_fog", "_depth", "_sample"
  649. };
  650. static const char *get_D3D_register_string(Context *ctx,
  651. RegisterType regtype,
  652. int regnum, char *regnum_str,
  653. size_t regnum_size)
  654. {
  655. const char *retval = NULL;
  656. int has_number = 1;
  657. switch (regtype)
  658. {
  659. case REG_TYPE_TEMP:
  660. retval = "r";
  661. break;
  662. case REG_TYPE_INPUT:
  663. retval = "v";
  664. break;
  665. case REG_TYPE_CONST:
  666. retval = "c";
  667. break;
  668. case REG_TYPE_ADDRESS: // (or REG_TYPE_TEXTURE, same value.)
  669. retval = shader_is_vertex(ctx) ? "a" : "t";
  670. break;
  671. case REG_TYPE_RASTOUT:
  672. switch ((RastOutType) regnum)
  673. {
  674. case RASTOUT_TYPE_POSITION: retval = "oPos"; break;
  675. case RASTOUT_TYPE_FOG: retval = "oFog"; break;
  676. case RASTOUT_TYPE_POINT_SIZE: retval = "oPts"; break;
  677. } // switch
  678. has_number = 0;
  679. break;
  680. case REG_TYPE_ATTROUT:
  681. retval = "oD";
  682. break;
  683. case REG_TYPE_OUTPUT: // (or REG_TYPE_TEXCRDOUT, same value.)
  684. if (shader_is_vertex(ctx) && shader_version_atleast(ctx, 3, 0))
  685. retval = "o";
  686. else
  687. retval = "oT";
  688. break;
  689. case REG_TYPE_CONSTINT:
  690. retval = "i";
  691. break;
  692. case REG_TYPE_COLOROUT:
  693. retval = "oC";
  694. break;
  695. case REG_TYPE_DEPTHOUT:
  696. retval = "oDepth";
  697. has_number = 0;
  698. break;
  699. case REG_TYPE_SAMPLER:
  700. retval = "s";
  701. break;
  702. case REG_TYPE_CONSTBOOL:
  703. retval = "b";
  704. break;
  705. case REG_TYPE_LOOP:
  706. retval = "aL";
  707. has_number = 0;
  708. break;
  709. case REG_TYPE_MISCTYPE:
  710. switch ((const MiscTypeType) regnum)
  711. {
  712. case MISCTYPE_TYPE_POSITION: retval = "vPos"; break;
  713. case MISCTYPE_TYPE_FACE: retval = "vFace"; break;
  714. } // switch
  715. has_number = 0;
  716. break;
  717. case REG_TYPE_LABEL:
  718. retval = "l";
  719. break;
  720. case REG_TYPE_PREDICATE:
  721. retval = "p";
  722. break;
  723. //case REG_TYPE_TEMPFLOAT16: // !!! FIXME: don't know this asm string
  724. default:
  725. fail(ctx, "unknown register type");
  726. retval = "???";
  727. has_number = 0;
  728. break;
  729. } // switch
  730. if (has_number)
  731. snprintf(regnum_str, regnum_size, "%u", (uint) regnum);
  732. else
  733. regnum_str[0] = '\0';
  734. return retval;
  735. } // get_D3D_register_string
  736. // !!! FIXME: can we split the profile code out to separate source files?
  737. #define AT_LEAST_ONE_PROFILE 0
  738. #if !SUPPORT_PROFILE_D3D
  739. #define PROFILE_EMITTER_D3D(op)
  740. #else
  741. #undef AT_LEAST_ONE_PROFILE
  742. #define AT_LEAST_ONE_PROFILE 1
  743. #define PROFILE_EMITTER_D3D(op) emit_D3D_##op,
  744. static const char *make_D3D_srcarg_string_in_buf(Context *ctx,
  745. const SourceArgInfo *arg,
  746. char *buf, size_t buflen)
  747. {
  748. const char *premod_str = "";
  749. const char *postmod_str = "";
  750. switch (arg->src_mod)
  751. {
  752. case SRCMOD_NEGATE:
  753. premod_str = "-";
  754. break;
  755. case SRCMOD_BIASNEGATE:
  756. premod_str = "-";
  757. // fall through.
  758. case SRCMOD_BIAS:
  759. postmod_str = "_bias";
  760. break;
  761. case SRCMOD_SIGNNEGATE:
  762. premod_str = "-";
  763. // fall through.
  764. case SRCMOD_SIGN:
  765. postmod_str = "_bx2";
  766. break;
  767. case SRCMOD_COMPLEMENT:
  768. premod_str = "1-";
  769. break;
  770. case SRCMOD_X2NEGATE:
  771. premod_str = "-";
  772. // fall through.
  773. case SRCMOD_X2:
  774. postmod_str = "_x2";
  775. break;
  776. case SRCMOD_DZ:
  777. postmod_str = "_dz";
  778. break;
  779. case SRCMOD_DW:
  780. postmod_str = "_dw";
  781. break;
  782. case SRCMOD_ABSNEGATE:
  783. premod_str = "-";
  784. // fall through.
  785. case SRCMOD_ABS:
  786. postmod_str = "_abs";
  787. break;
  788. case SRCMOD_NOT:
  789. premod_str = "!";
  790. break;
  791. case SRCMOD_NONE:
  792. case SRCMOD_TOTAL:
  793. break; // stop compiler whining.
  794. } // switch
  795. char regnum_str[16];
  796. const char *regtype_str = get_D3D_register_string(ctx, arg->regtype,
  797. arg->regnum, regnum_str,
  798. sizeof (regnum_str));
  799. if (regtype_str == NULL)
  800. {
  801. fail(ctx, "Unknown source register type.");
  802. *buf = '\0';
  803. return buf;
  804. } // if
  805. const char *rel_lbracket = "";
  806. const char *rel_rbracket = "";
  807. char rel_swizzle[4] = { '\0' };
  808. char rel_regnum_str[16] = { '\0' };
  809. const char *rel_regtype_str = "";
  810. if (arg->relative)
  811. {
  812. rel_swizzle[0] = '.';
  813. rel_swizzle[1] = swizzle_channels[arg->relative_component];
  814. rel_swizzle[2] = '\0';
  815. rel_lbracket = "[";
  816. rel_rbracket = "]";
  817. rel_regtype_str = get_D3D_register_string(ctx, arg->relative_regtype,
  818. arg->relative_regnum,
  819. rel_regnum_str,
  820. sizeof (rel_regnum_str));
  821. if (regtype_str == NULL)
  822. {
  823. fail(ctx, "Unknown relative source register type.");
  824. *buf = '\0';
  825. return buf;
  826. } // if
  827. } // if
  828. char swizzle_str[6];
  829. size_t i = 0;
  830. const int scalar = isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum);
  831. if (!scalar && !no_swizzle(arg->swizzle))
  832. {
  833. swizzle_str[i++] = '.';
  834. swizzle_str[i++] = swizzle_channels[arg->swizzle_x];
  835. swizzle_str[i++] = swizzle_channels[arg->swizzle_y];
  836. swizzle_str[i++] = swizzle_channels[arg->swizzle_z];
  837. swizzle_str[i++] = swizzle_channels[arg->swizzle_w];
  838. // .xyzz is the same as .xyz, .z is the same as .zzzz, etc.
  839. while (swizzle_str[i-1] == swizzle_str[i-2])
  840. i--;
  841. } // if
  842. swizzle_str[i] = '\0';
  843. assert(i < sizeof (swizzle_str));
  844. // !!! FIXME: c12[a0.x] actually needs to be c[a0.x + 12]
  845. snprintf(buf, buflen, "%s%s%s%s%s%s%s%s%s%s",
  846. premod_str, regtype_str, regnum_str, postmod_str,
  847. rel_lbracket, rel_regtype_str, rel_regnum_str, rel_swizzle,
  848. rel_rbracket, swizzle_str);
  849. // !!! FIXME: make sure the scratch buffer was large enough.
  850. return buf;
  851. } // make_D3D_srcarg_string_in_buf
  852. static const char *make_D3D_destarg_string(Context *ctx, char *buf,
  853. const size_t buflen)
  854. {
  855. const DestArgInfo *arg = &ctx->dest_arg;
  856. const char *result_shift_str = "";
  857. switch (arg->result_shift)
  858. {
  859. case 0x1: result_shift_str = "_x2"; break;
  860. case 0x2: result_shift_str = "_x4"; break;
  861. case 0x3: result_shift_str = "_x8"; break;
  862. case 0xD: result_shift_str = "_d8"; break;
  863. case 0xE: result_shift_str = "_d4"; break;
  864. case 0xF: result_shift_str = "_d2"; break;
  865. } // switch
  866. const char *sat_str = (arg->result_mod & MOD_SATURATE) ? "_sat" : "";
  867. const char *pp_str = (arg->result_mod & MOD_PP) ? "_pp" : "";
  868. const char *cent_str = (arg->result_mod & MOD_CENTROID) ? "_centroid" : "";
  869. char regnum_str[16];
  870. const char *regtype_str = get_D3D_register_string(ctx, arg->regtype,
  871. arg->regnum, regnum_str,
  872. sizeof (regnum_str));
  873. if (regtype_str == NULL)
  874. {
  875. fail(ctx, "Unknown destination register type.");
  876. *buf = '\0';
  877. return buf;
  878. } // if
  879. char writemask_str[6];
  880. size_t i = 0;
  881. const int scalar = isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum);
  882. if (!scalar && !writemask_xyzw(arg->writemask))
  883. {
  884. writemask_str[i++] = '.';
  885. if (arg->writemask0) writemask_str[i++] = 'x';
  886. if (arg->writemask1) writemask_str[i++] = 'y';
  887. if (arg->writemask2) writemask_str[i++] = 'z';
  888. if (arg->writemask3) writemask_str[i++] = 'w';
  889. } // if
  890. writemask_str[i] = '\0';
  891. assert(i < sizeof (writemask_str));
  892. const char *pred_left = "";
  893. const char *pred_right = "";
  894. char pred[32] = { '\0' };
  895. if (ctx->predicated)
  896. {
  897. pred_left = "(";
  898. pred_right = ") ";
  899. make_D3D_srcarg_string_in_buf(ctx, &ctx->predicate_arg,
  900. pred, sizeof (pred));
  901. } // if
  902. // may turn out something like "_x2_sat_pp_centroid (!p0.x) r0.xyzw" ...
  903. snprintf(buf, buflen, "%s%s%s%s %s%s%s%s%s%s",
  904. result_shift_str, sat_str, pp_str, cent_str,
  905. pred_left, pred, pred_right,
  906. regtype_str, regnum_str, writemask_str);
  907. // !!! FIXME: make sure the scratch buffer was large enough.
  908. return buf;
  909. } // make_D3D_destarg_string
  910. static const char *make_D3D_srcarg_string(Context *ctx, const size_t idx,
  911. char *buf, size_t buflen)
  912. {
  913. if (idx >= STATICARRAYLEN(ctx->source_args))
  914. {
  915. fail(ctx, "Too many source args");
  916. *buf = '\0';
  917. return buf;
  918. } // if
  919. const SourceArgInfo *arg = &ctx->source_args[idx];
  920. return make_D3D_srcarg_string_in_buf(ctx, arg, buf, buflen);
  921. } // make_D3D_srcarg_string
  922. static const char *get_D3D_varname_in_buf(Context *ctx, RegisterType rt,
  923. int regnum, char *buf,
  924. const size_t len)
  925. {
  926. char regnum_str[16];
  927. const char *regtype_str = get_D3D_register_string(ctx, rt, regnum,
  928. regnum_str, sizeof (regnum_str));
  929. snprintf(buf,len,"%s%s", regtype_str, regnum_str);
  930. return buf;
  931. } // get_D3D_varname_in_buf
  932. static const char *get_D3D_varname(Context *ctx, RegisterType rt, int regnum)
  933. {
  934. char buf[64];
  935. get_D3D_varname_in_buf(ctx, rt, regnum, buf, sizeof (buf));
  936. return StrDup(ctx, buf);
  937. } // get_D3D_varname
  938. static const char *get_D3D_const_array_varname(Context *ctx, int base, int size)
  939. {
  940. char buf[64];
  941. snprintf(buf, sizeof (buf), "c_array_%d_%d", base, size);
  942. return StrDup(ctx, buf);
  943. } // get_D3D_const_array_varname
  944. static void emit_D3D_start(Context *ctx, const char *profilestr)
  945. {
  946. const uint major = (uint) ctx->major_ver;
  947. const uint minor = (uint) ctx->minor_ver;
  948. char minor_str[16];
  949. ctx->ignores_ctab = 1;
  950. if (minor == 0xFF)
  951. strcpy(minor_str, "sw");
  952. else if ((major > 1) && (minor == 1))
  953. strcpy(minor_str, "x"); // for >= SM2, apparently this is "x". Weird.
  954. else
  955. snprintf(minor_str, sizeof (minor_str), "%u", (uint) minor);
  956. output_line(ctx, "%s_%u_%s", ctx->shader_type_str, major, minor_str);
  957. } // emit_D3D_start
  958. static void emit_D3D_end(Context *ctx)
  959. {
  960. output_line(ctx, "end");
  961. } // emit_D3D_end
  962. static void emit_D3D_phase(Context *ctx)
  963. {
  964. output_line(ctx, "phase");
  965. } // emit_D3D_phase
  966. static void emit_D3D_finalize(Context *ctx)
  967. {
  968. // no-op.
  969. } // emit_D3D_finalize
  970. static void emit_D3D_global(Context *ctx, RegisterType regtype, int regnum)
  971. {
  972. // no-op.
  973. } // emit_D3D_global
  974. static void emit_D3D_array(Context *ctx, VariableList *var)
  975. {
  976. // no-op.
  977. } // emit_D3D_array
  978. static void emit_D3D_const_array(Context *ctx, const ConstantsList *clist,
  979. int base, int size)
  980. {
  981. // no-op.
  982. } // emit_D3D_const_array
  983. static void emit_D3D_uniform(Context *ctx, RegisterType regtype, int regnum,
  984. const VariableList *var)
  985. {
  986. // no-op.
  987. } // emit_D3D_uniform
  988. static void emit_D3D_sampler(Context *ctx, int s, TextureType ttype, int tb)
  989. {
  990. // no-op.
  991. } // emit_D3D_sampler
  992. static void emit_D3D_attribute(Context *ctx, RegisterType regtype, int regnum,
  993. MOJOSHADER_usage usage, int index, int wmask,
  994. int flags)
  995. {
  996. // no-op.
  997. } // emit_D3D_attribute
  998. static void emit_D3D_RESERVED(Context *ctx)
  999. {
  1000. // do nothing; fails in the state machine.
  1001. } // emit_D3D_RESERVED
  1002. // Generic D3D opcode emitters. A list of macros generate all the entry points
  1003. // that call into these...
  1004. static char *lowercase(char *dst, const char *src)
  1005. {
  1006. int i = 0;
  1007. do
  1008. {
  1009. const char ch = src[i];
  1010. dst[i] = (((ch >= 'A') && (ch <= 'Z')) ? (ch - ('A' - 'a')) : ch);
  1011. } while (src[i++]);
  1012. return dst;
  1013. } // lowercase
  1014. static void emit_D3D_opcode_d(Context *ctx, const char *opcode)
  1015. {
  1016. char dst[64]; make_D3D_destarg_string(ctx, dst, sizeof (dst));
  1017. opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode);
  1018. output_line(ctx, "%s%s%s", ctx->coissue ? "+" : "", opcode, dst);
  1019. } // emit_D3D_opcode_d
  1020. static void emit_D3D_opcode_s(Context *ctx, const char *opcode)
  1021. {
  1022. char src0[64]; make_D3D_srcarg_string(ctx, 0, src0, sizeof (src0));
  1023. opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode);
  1024. output_line(ctx, "%s%s %s", ctx->coissue ? "+" : "", opcode, src0);
  1025. } // emit_D3D_opcode_s
  1026. static void emit_D3D_opcode_ss(Context *ctx, const char *opcode)
  1027. {
  1028. char src0[64]; make_D3D_srcarg_string(ctx, 0, src0, sizeof (src0));
  1029. char src1[64]; make_D3D_srcarg_string(ctx, 1, src1, sizeof (src1));
  1030. opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode);
  1031. output_line(ctx, "%s%s %s, %s", ctx->coissue ? "+" : "", opcode, src0, src1);
  1032. } // emit_D3D_opcode_ss
  1033. static void emit_D3D_opcode_ds(Context *ctx, const char *opcode)
  1034. {
  1035. char dst[64]; make_D3D_destarg_string(ctx, dst, sizeof (dst));
  1036. char src0[64]; make_D3D_srcarg_string(ctx, 0, src0, sizeof (src0));
  1037. opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode);
  1038. output_line(ctx, "%s%s%s, %s", ctx->coissue ? "+" : "", opcode, dst, src0);
  1039. } // emit_D3D_opcode_ds
  1040. static void emit_D3D_opcode_dss(Context *ctx, const char *opcode)
  1041. {
  1042. char dst[64]; make_D3D_destarg_string(ctx, dst, sizeof (dst));
  1043. char src0[64]; make_D3D_srcarg_string(ctx, 0, src0, sizeof (src0));
  1044. char src1[64]; make_D3D_srcarg_string(ctx, 1, src1, sizeof (src1));
  1045. opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode);
  1046. output_line(ctx, "%s%s%s, %s, %s", ctx->coissue ? "+" : "",
  1047. opcode, dst, src0, src1);
  1048. } // emit_D3D_opcode_dss
  1049. static void emit_D3D_opcode_dsss(Context *ctx, const char *opcode)
  1050. {
  1051. char dst[64]; make_D3D_destarg_string(ctx, dst, sizeof (dst));
  1052. char src0[64]; make_D3D_srcarg_string(ctx, 0, src0, sizeof (src0));
  1053. char src1[64]; make_D3D_srcarg_string(ctx, 1, src1, sizeof (src1));
  1054. char src2[64]; make_D3D_srcarg_string(ctx, 2, src2, sizeof (src2));
  1055. opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode);
  1056. output_line(ctx, "%s%s%s, %s, %s, %s", ctx->coissue ? "+" : "",
  1057. opcode, dst, src0, src1, src2);
  1058. } // emit_D3D_opcode_dsss
  1059. static void emit_D3D_opcode_dssss(Context *ctx, const char *opcode)
  1060. {
  1061. char dst[64]; make_D3D_destarg_string(ctx, dst, sizeof (dst));
  1062. char src0[64]; make_D3D_srcarg_string(ctx, 0, src0, sizeof (src0));
  1063. char src1[64]; make_D3D_srcarg_string(ctx, 1, src1, sizeof (src1));
  1064. char src2[64]; make_D3D_srcarg_string(ctx, 2, src2, sizeof (src2));
  1065. char src3[64]; make_D3D_srcarg_string(ctx, 3, src3, sizeof (src3));
  1066. opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode);
  1067. output_line(ctx,"%s%s%s, %s, %s, %s, %s", ctx->coissue ? "+" : "",
  1068. opcode, dst, src0, src1, src2, src3);
  1069. } // emit_D3D_opcode_dssss
  1070. static void emit_D3D_opcode(Context *ctx, const char *opcode)
  1071. {
  1072. opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode);
  1073. output_line(ctx, "%s%s", ctx->coissue ? "+" : "", opcode);
  1074. } // emit_D3D_opcode
  1075. #define EMIT_D3D_OPCODE_FUNC(op) \
  1076. static void emit_D3D_##op(Context *ctx) { \
  1077. emit_D3D_opcode(ctx, #op); \
  1078. }
  1079. #define EMIT_D3D_OPCODE_D_FUNC(op) \
  1080. static void emit_D3D_##op(Context *ctx) { \
  1081. emit_D3D_opcode_d(ctx, #op); \
  1082. }
  1083. #define EMIT_D3D_OPCODE_S_FUNC(op) \
  1084. static void emit_D3D_##op(Context *ctx) { \
  1085. emit_D3D_opcode_s(ctx, #op); \
  1086. }
  1087. #define EMIT_D3D_OPCODE_SS_FUNC(op) \
  1088. static void emit_D3D_##op(Context *ctx) { \
  1089. emit_D3D_opcode_ss(ctx, #op); \
  1090. }
  1091. #define EMIT_D3D_OPCODE_DS_FUNC(op) \
  1092. static void emit_D3D_##op(Context *ctx) { \
  1093. emit_D3D_opcode_ds(ctx, #op); \
  1094. }
  1095. #define EMIT_D3D_OPCODE_DSS_FUNC(op) \
  1096. static void emit_D3D_##op(Context *ctx) { \
  1097. emit_D3D_opcode_dss(ctx, #op); \
  1098. }
  1099. #define EMIT_D3D_OPCODE_DSSS_FUNC(op) \
  1100. static void emit_D3D_##op(Context *ctx) { \
  1101. emit_D3D_opcode_dsss(ctx, #op); \
  1102. }
  1103. #define EMIT_D3D_OPCODE_DSSSS_FUNC(op) \
  1104. static void emit_D3D_##op(Context *ctx) { \
  1105. emit_D3D_opcode_dssss(ctx, #op); \
  1106. }
  1107. EMIT_D3D_OPCODE_FUNC(NOP)
  1108. EMIT_D3D_OPCODE_DS_FUNC(MOV)
  1109. EMIT_D3D_OPCODE_DSS_FUNC(ADD)
  1110. EMIT_D3D_OPCODE_DSS_FUNC(SUB)
  1111. EMIT_D3D_OPCODE_DSSS_FUNC(MAD)
  1112. EMIT_D3D_OPCODE_DSS_FUNC(MUL)
  1113. EMIT_D3D_OPCODE_DS_FUNC(RCP)
  1114. EMIT_D3D_OPCODE_DS_FUNC(RSQ)
  1115. EMIT_D3D_OPCODE_DSS_FUNC(DP3)
  1116. EMIT_D3D_OPCODE_DSS_FUNC(DP4)
  1117. EMIT_D3D_OPCODE_DSS_FUNC(MIN)
  1118. EMIT_D3D_OPCODE_DSS_FUNC(MAX)
  1119. EMIT_D3D_OPCODE_DSS_FUNC(SLT)
  1120. EMIT_D3D_OPCODE_DSS_FUNC(SGE)
  1121. EMIT_D3D_OPCODE_DS_FUNC(EXP)
  1122. EMIT_D3D_OPCODE_DS_FUNC(LOG)
  1123. EMIT_D3D_OPCODE_DS_FUNC(LIT)
  1124. EMIT_D3D_OPCODE_DSS_FUNC(DST)
  1125. EMIT_D3D_OPCODE_DSSS_FUNC(LRP)
  1126. EMIT_D3D_OPCODE_DS_FUNC(FRC)
  1127. EMIT_D3D_OPCODE_DSS_FUNC(M4X4)
  1128. EMIT_D3D_OPCODE_DSS_FUNC(M4X3)
  1129. EMIT_D3D_OPCODE_DSS_FUNC(M3X4)
  1130. EMIT_D3D_OPCODE_DSS_FUNC(M3X3)
  1131. EMIT_D3D_OPCODE_DSS_FUNC(M3X2)
  1132. EMIT_D3D_OPCODE_S_FUNC(CALL)
  1133. EMIT_D3D_OPCODE_SS_FUNC(CALLNZ)
  1134. EMIT_D3D_OPCODE_SS_FUNC(LOOP)
  1135. EMIT_D3D_OPCODE_FUNC(RET)
  1136. EMIT_D3D_OPCODE_FUNC(ENDLOOP)
  1137. EMIT_D3D_OPCODE_S_FUNC(LABEL)
  1138. EMIT_D3D_OPCODE_DSS_FUNC(POW)
  1139. EMIT_D3D_OPCODE_DSS_FUNC(CRS)
  1140. EMIT_D3D_OPCODE_DSSS_FUNC(SGN)
  1141. EMIT_D3D_OPCODE_DS_FUNC(ABS)
  1142. EMIT_D3D_OPCODE_DS_FUNC(NRM)
  1143. EMIT_D3D_OPCODE_S_FUNC(REP)
  1144. EMIT_D3D_OPCODE_FUNC(ENDREP)
  1145. EMIT_D3D_OPCODE_S_FUNC(IF)
  1146. EMIT_D3D_OPCODE_FUNC(ELSE)
  1147. EMIT_D3D_OPCODE_FUNC(ENDIF)
  1148. EMIT_D3D_OPCODE_FUNC(BREAK)
  1149. EMIT_D3D_OPCODE_DS_FUNC(MOVA)
  1150. EMIT_D3D_OPCODE_D_FUNC(TEXKILL)
  1151. EMIT_D3D_OPCODE_DS_FUNC(TEXBEM)
  1152. EMIT_D3D_OPCODE_DS_FUNC(TEXBEML)
  1153. EMIT_D3D_OPCODE_DS_FUNC(TEXREG2AR)
  1154. EMIT_D3D_OPCODE_DS_FUNC(TEXREG2GB)
  1155. EMIT_D3D_OPCODE_DS_FUNC(TEXM3X2PAD)
  1156. EMIT_D3D_OPCODE_DS_FUNC(TEXM3X2TEX)
  1157. EMIT_D3D_OPCODE_DS_FUNC(TEXM3X3PAD)
  1158. EMIT_D3D_OPCODE_DS_FUNC(TEXM3X3TEX)
  1159. EMIT_D3D_OPCODE_DSS_FUNC(TEXM3X3SPEC)
  1160. EMIT_D3D_OPCODE_DS_FUNC(TEXM3X3VSPEC)
  1161. EMIT_D3D_OPCODE_DS_FUNC(EXPP)
  1162. EMIT_D3D_OPCODE_DS_FUNC(LOGP)
  1163. EMIT_D3D_OPCODE_DSSS_FUNC(CND)
  1164. EMIT_D3D_OPCODE_DS_FUNC(TEXREG2RGB)
  1165. EMIT_D3D_OPCODE_DS_FUNC(TEXDP3TEX)
  1166. EMIT_D3D_OPCODE_DS_FUNC(TEXM3X2DEPTH)
  1167. EMIT_D3D_OPCODE_DS_FUNC(TEXDP3)
  1168. EMIT_D3D_OPCODE_DS_FUNC(TEXM3X3)
  1169. EMIT_D3D_OPCODE_D_FUNC(TEXDEPTH)
  1170. EMIT_D3D_OPCODE_DSSS_FUNC(CMP)
  1171. EMIT_D3D_OPCODE_DSS_FUNC(BEM)
  1172. EMIT_D3D_OPCODE_DSSS_FUNC(DP2ADD)
  1173. EMIT_D3D_OPCODE_DS_FUNC(DSX)
  1174. EMIT_D3D_OPCODE_DS_FUNC(DSY)
  1175. EMIT_D3D_OPCODE_DSSSS_FUNC(TEXLDD)
  1176. EMIT_D3D_OPCODE_DSS_FUNC(TEXLDL)
  1177. EMIT_D3D_OPCODE_S_FUNC(BREAKP)
  1178. // special cases for comparison opcodes...
  1179. static const char *get_D3D_comparison_string(Context *ctx)
  1180. {
  1181. static const char *comps[] = {
  1182. "", "_gt", "_eq", "_ge", "_lt", "_ne", "_le"
  1183. };
  1184. if (ctx->instruction_controls >= STATICARRAYLEN(comps))
  1185. {
  1186. fail(ctx, "unknown comparison control");
  1187. return "";
  1188. } // if
  1189. return comps[ctx->instruction_controls];
  1190. } // get_D3D_comparison_string
  1191. static void emit_D3D_BREAKC(Context *ctx)
  1192. {
  1193. char op[16];
  1194. snprintf(op, sizeof (op), "break%s", get_D3D_comparison_string(ctx));
  1195. emit_D3D_opcode_ss(ctx, op);
  1196. } // emit_D3D_BREAKC
  1197. static void emit_D3D_IFC(Context *ctx)
  1198. {
  1199. char op[16];
  1200. snprintf(op, sizeof (op), "if%s", get_D3D_comparison_string(ctx));
  1201. emit_D3D_opcode_ss(ctx, op);
  1202. } // emit_D3D_IFC
  1203. static void emit_D3D_SETP(Context *ctx)
  1204. {
  1205. char op[16];
  1206. snprintf(op, sizeof (op), "setp%s", get_D3D_comparison_string(ctx));
  1207. emit_D3D_opcode_dss(ctx, op);
  1208. } // emit_D3D_SETP
  1209. static void emit_D3D_DEF(Context *ctx)
  1210. {
  1211. char dst[64];
  1212. make_D3D_destarg_string(ctx, dst, sizeof (dst));
  1213. const float *val = (const float *) ctx->dwords; // !!! FIXME: could be int?
  1214. char val0[32];
  1215. char val1[32];
  1216. char val2[32];
  1217. char val3[32];
  1218. floatstr(ctx, val0, sizeof (val0), val[0], 0);
  1219. floatstr(ctx, val1, sizeof (val1), val[1], 0);
  1220. floatstr(ctx, val2, sizeof (val2), val[2], 0);
  1221. floatstr(ctx, val3, sizeof (val3), val[3], 0);
  1222. output_line(ctx, "def%s, %s, %s, %s, %s", dst, val0, val1, val2, val3);
  1223. } // emit_D3D_DEF
  1224. static void emit_D3D_DEFI(Context *ctx)
  1225. {
  1226. char dst[64];
  1227. make_D3D_destarg_string(ctx, dst, sizeof (dst));
  1228. const int32 *x = (const int32 *) ctx->dwords;
  1229. output_line(ctx, "defi%s, %d, %d, %d, %d", dst,
  1230. (int) x[0], (int) x[1], (int) x[2], (int) x[3]);
  1231. } // emit_D3D_DEFI
  1232. static void emit_D3D_DEFB(Context *ctx)
  1233. {
  1234. char dst[64];
  1235. make_D3D_destarg_string(ctx, dst, sizeof (dst));
  1236. output_line(ctx, "defb%s, %s", dst, ctx->dwords[0] ? "true" : "false");
  1237. } // emit_D3D_DEFB
  1238. static void emit_D3D_DCL(Context *ctx)
  1239. {
  1240. char dst[64];
  1241. make_D3D_destarg_string(ctx, dst, sizeof (dst));
  1242. const DestArgInfo *arg = &ctx->dest_arg;
  1243. const char *usage_str = "";
  1244. char index_str[16] = { '\0' };
  1245. if (arg->regtype == REG_TYPE_SAMPLER)
  1246. {
  1247. switch ((const TextureType) ctx->dwords[0])
  1248. {
  1249. case TEXTURE_TYPE_2D: usage_str = "_2d"; break;
  1250. case TEXTURE_TYPE_CUBE: usage_str = "_cube"; break;
  1251. case TEXTURE_TYPE_VOLUME: usage_str = "_volume"; break;
  1252. default: fail(ctx, "unknown sampler texture type"); return;
  1253. } // switch
  1254. } // if
  1255. else if (arg->regtype == REG_TYPE_MISCTYPE)
  1256. {
  1257. switch ((const MiscTypeType) arg->regnum)
  1258. {
  1259. case MISCTYPE_TYPE_POSITION:
  1260. case MISCTYPE_TYPE_FACE:
  1261. usage_str = ""; // just become "dcl vFace" or whatever.
  1262. break;
  1263. default: fail(ctx, "unknown misc register type"); return;
  1264. } // switch
  1265. } // else if
  1266. else
  1267. {
  1268. const uint32 usage = ctx->dwords[0];
  1269. const uint32 index = ctx->dwords[1];
  1270. usage_str = usagestrs[usage];
  1271. if (index != 0)
  1272. snprintf(index_str, sizeof (index_str), "%u", (uint) index);
  1273. } // else
  1274. output_line(ctx, "dcl%s%s%s", usage_str, index_str, dst);
  1275. } // emit_D3D_DCL
  1276. static void emit_D3D_TEXCRD(Context *ctx)
  1277. {
  1278. // this opcode looks and acts differently depending on the shader model.
  1279. if (shader_version_atleast(ctx, 1, 4))
  1280. emit_D3D_opcode_ds(ctx, "texcrd");
  1281. else
  1282. emit_D3D_opcode_d(ctx, "texcoord");
  1283. } // emit_D3D_TEXCOORD
  1284. static void emit_D3D_TEXLD(Context *ctx)
  1285. {
  1286. // this opcode looks and acts differently depending on the shader model.
  1287. if (shader_version_atleast(ctx, 2, 0))
  1288. {
  1289. if (ctx->instruction_controls == CONTROL_TEXLD)
  1290. emit_D3D_opcode_dss(ctx, "texld");
  1291. else if (ctx->instruction_controls == CONTROL_TEXLDP)
  1292. emit_D3D_opcode_dss(ctx, "texldp");
  1293. else if (ctx->instruction_controls == CONTROL_TEXLDB)
  1294. emit_D3D_opcode_dss(ctx, "texldb");
  1295. } // if
  1296. else if (shader_version_atleast(ctx, 1, 4))
  1297. {
  1298. emit_D3D_opcode_ds(ctx, "texld");
  1299. } // else if
  1300. else
  1301. {
  1302. emit_D3D_opcode_d(ctx, "tex");
  1303. } // else
  1304. } // emit_D3D_TEXLD
  1305. static void emit_D3D_SINCOS(Context *ctx)
  1306. {
  1307. // this opcode needs extra registers for sm2 and lower.
  1308. if (!shader_version_atleast(ctx, 3, 0))
  1309. emit_D3D_opcode_dsss(ctx, "sincos");
  1310. else
  1311. emit_D3D_opcode_ds(ctx, "sincos");
  1312. } // emit_D3D_SINCOS
  1313. #undef EMIT_D3D_OPCODE_FUNC
  1314. #undef EMIT_D3D_OPCODE_D_FUNC
  1315. #undef EMIT_D3D_OPCODE_S_FUNC
  1316. #undef EMIT_D3D_OPCODE_SS_FUNC
  1317. #undef EMIT_D3D_OPCODE_DS_FUNC
  1318. #undef EMIT_D3D_OPCODE_DSS_FUNC
  1319. #undef EMIT_D3D_OPCODE_DSSS_FUNC
  1320. #undef EMIT_D3D_OPCODE_DSSSS_FUNC
  1321. #endif // SUPPORT_PROFILE_D3D
  1322. #if !SUPPORT_PROFILE_BYTECODE
  1323. #define PROFILE_EMITTER_BYTECODE(op)
  1324. #else
  1325. #undef AT_LEAST_ONE_PROFILE
  1326. #define AT_LEAST_ONE_PROFILE 1
  1327. #define PROFILE_EMITTER_BYTECODE(op) emit_BYTECODE_##op,
  1328. static void emit_BYTECODE_start(Context *ctx, const char *profilestr)
  1329. {
  1330. ctx->ignores_ctab = 1;
  1331. // just copy the whole token stream and make all other emitters no-ops.
  1332. if (set_output(ctx, &ctx->mainline))
  1333. {
  1334. const size_t len = ctx->tokencount * sizeof (uint32);
  1335. buffer_append(ctx->mainline, (const char *) ctx->tokens, len);
  1336. } // if
  1337. } // emit_BYTECODE_start
  1338. static void emit_BYTECODE_end(Context *ctx) {}
  1339. static void emit_BYTECODE_phase(Context *ctx) {}
  1340. static void emit_BYTECODE_finalize(Context *ctx) {}
  1341. static void emit_BYTECODE_global(Context *ctx, RegisterType t, int n) {}
  1342. static void emit_BYTECODE_array(Context *ctx, VariableList *var) {}
  1343. static void emit_BYTECODE_sampler(Context *c, int s, TextureType t, int tb) {}
  1344. static void emit_BYTECODE_const_array(Context *ctx, const ConstantsList *c,
  1345. int base, int size) {}
  1346. static void emit_BYTECODE_uniform(Context *ctx, RegisterType t, int n,
  1347. const VariableList *var) {}
  1348. static void emit_BYTECODE_attribute(Context *ctx, RegisterType t, int n,
  1349. MOJOSHADER_usage u, int i, int w,
  1350. int f) {}
  1351. static const char *get_BYTECODE_varname(Context *ctx, RegisterType rt, int regnum)
  1352. {
  1353. char regnum_str[16];
  1354. const char *regtype_str = get_D3D_register_string(ctx, rt, regnum,
  1355. regnum_str, sizeof (regnum_str));
  1356. char buf[64];
  1357. snprintf(buf, sizeof (buf), "%s%s", regtype_str, regnum_str);
  1358. return StrDup(ctx, buf);
  1359. } // get_BYTECODE_varname
  1360. static const char *get_BYTECODE_const_array_varname(Context *ctx, int base, int size)
  1361. {
  1362. char buf[64];
  1363. snprintf(buf, sizeof (buf), "c_array_%d_%d", base, size);
  1364. return StrDup(ctx, buf);
  1365. } // get_BYTECODE_const_array_varname
  1366. #define EMIT_BYTECODE_OPCODE_FUNC(op) \
  1367. static void emit_BYTECODE_##op(Context *ctx) {}
  1368. EMIT_BYTECODE_OPCODE_FUNC(RESERVED)
  1369. EMIT_BYTECODE_OPCODE_FUNC(NOP)
  1370. EMIT_BYTECODE_OPCODE_FUNC(MOV)
  1371. EMIT_BYTECODE_OPCODE_FUNC(ADD)
  1372. EMIT_BYTECODE_OPCODE_FUNC(SUB)
  1373. EMIT_BYTECODE_OPCODE_FUNC(MAD)
  1374. EMIT_BYTECODE_OPCODE_FUNC(MUL)
  1375. EMIT_BYTECODE_OPCODE_FUNC(RCP)
  1376. EMIT_BYTECODE_OPCODE_FUNC(RSQ)
  1377. EMIT_BYTECODE_OPCODE_FUNC(DP3)
  1378. EMIT_BYTECODE_OPCODE_FUNC(DP4)
  1379. EMIT_BYTECODE_OPCODE_FUNC(MIN)
  1380. EMIT_BYTECODE_OPCODE_FUNC(MAX)
  1381. EMIT_BYTECODE_OPCODE_FUNC(SLT)
  1382. EMIT_BYTECODE_OPCODE_FUNC(SGE)
  1383. EMIT_BYTECODE_OPCODE_FUNC(EXP)
  1384. EMIT_BYTECODE_OPCODE_FUNC(LOG)
  1385. EMIT_BYTECODE_OPCODE_FUNC(LIT)
  1386. EMIT_BYTECODE_OPCODE_FUNC(DST)
  1387. EMIT_BYTECODE_OPCODE_FUNC(LRP)
  1388. EMIT_BYTECODE_OPCODE_FUNC(FRC)
  1389. EMIT_BYTECODE_OPCODE_FUNC(M4X4)
  1390. EMIT_BYTECODE_OPCODE_FUNC(M4X3)
  1391. EMIT_BYTECODE_OPCODE_FUNC(M3X4)
  1392. EMIT_BYTECODE_OPCODE_FUNC(M3X3)
  1393. EMIT_BYTECODE_OPCODE_FUNC(M3X2)
  1394. EMIT_BYTECODE_OPCODE_FUNC(CALL)
  1395. EMIT_BYTECODE_OPCODE_FUNC(CALLNZ)
  1396. EMIT_BYTECODE_OPCODE_FUNC(LOOP)
  1397. EMIT_BYTECODE_OPCODE_FUNC(RET)
  1398. EMIT_BYTECODE_OPCODE_FUNC(ENDLOOP)
  1399. EMIT_BYTECODE_OPCODE_FUNC(LABEL)
  1400. EMIT_BYTECODE_OPCODE_FUNC(POW)
  1401. EMIT_BYTECODE_OPCODE_FUNC(CRS)
  1402. EMIT_BYTECODE_OPCODE_FUNC(SGN)
  1403. EMIT_BYTECODE_OPCODE_FUNC(ABS)
  1404. EMIT_BYTECODE_OPCODE_FUNC(NRM)
  1405. EMIT_BYTECODE_OPCODE_FUNC(SINCOS)
  1406. EMIT_BYTECODE_OPCODE_FUNC(REP)
  1407. EMIT_BYTECODE_OPCODE_FUNC(ENDREP)
  1408. EMIT_BYTECODE_OPCODE_FUNC(IF)
  1409. EMIT_BYTECODE_OPCODE_FUNC(ELSE)
  1410. EMIT_BYTECODE_OPCODE_FUNC(ENDIF)
  1411. EMIT_BYTECODE_OPCODE_FUNC(BREAK)
  1412. EMIT_BYTECODE_OPCODE_FUNC(MOVA)
  1413. EMIT_BYTECODE_OPCODE_FUNC(TEXKILL)
  1414. EMIT_BYTECODE_OPCODE_FUNC(TEXBEM)
  1415. EMIT_BYTECODE_OPCODE_FUNC(TEXBEML)
  1416. EMIT_BYTECODE_OPCODE_FUNC(TEXREG2AR)
  1417. EMIT_BYTECODE_OPCODE_FUNC(TEXREG2GB)
  1418. EMIT_BYTECODE_OPCODE_FUNC(TEXM3X2PAD)
  1419. EMIT_BYTECODE_OPCODE_FUNC(TEXM3X2TEX)
  1420. EMIT_BYTECODE_OPCODE_FUNC(TEXM3X3PAD)
  1421. EMIT_BYTECODE_OPCODE_FUNC(TEXM3X3TEX)
  1422. EMIT_BYTECODE_OPCODE_FUNC(TEXM3X3SPEC)
  1423. EMIT_BYTECODE_OPCODE_FUNC(TEXM3X3VSPEC)
  1424. EMIT_BYTECODE_OPCODE_FUNC(EXPP)
  1425. EMIT_BYTECODE_OPCODE_FUNC(LOGP)
  1426. EMIT_BYTECODE_OPCODE_FUNC(CND)
  1427. EMIT_BYTECODE_OPCODE_FUNC(TEXREG2RGB)
  1428. EMIT_BYTECODE_OPCODE_FUNC(TEXDP3TEX)
  1429. EMIT_BYTECODE_OPCODE_FUNC(TEXM3X2DEPTH)
  1430. EMIT_BYTECODE_OPCODE_FUNC(TEXDP3)
  1431. EMIT_BYTECODE_OPCODE_FUNC(TEXM3X3)
  1432. EMIT_BYTECODE_OPCODE_FUNC(TEXDEPTH)
  1433. EMIT_BYTECODE_OPCODE_FUNC(CMP)
  1434. EMIT_BYTECODE_OPCODE_FUNC(BEM)
  1435. EMIT_BYTECODE_OPCODE_FUNC(DP2ADD)
  1436. EMIT_BYTECODE_OPCODE_FUNC(DSX)
  1437. EMIT_BYTECODE_OPCODE_FUNC(DSY)
  1438. EMIT_BYTECODE_OPCODE_FUNC(TEXLDD)
  1439. EMIT_BYTECODE_OPCODE_FUNC(TEXLDL)
  1440. EMIT_BYTECODE_OPCODE_FUNC(BREAKP)
  1441. EMIT_BYTECODE_OPCODE_FUNC(BREAKC)
  1442. EMIT_BYTECODE_OPCODE_FUNC(IFC)
  1443. EMIT_BYTECODE_OPCODE_FUNC(SETP)
  1444. EMIT_BYTECODE_OPCODE_FUNC(DEF)
  1445. EMIT_BYTECODE_OPCODE_FUNC(DEFI)
  1446. EMIT_BYTECODE_OPCODE_FUNC(DEFB)
  1447. EMIT_BYTECODE_OPCODE_FUNC(DCL)
  1448. EMIT_BYTECODE_OPCODE_FUNC(TEXCRD)
  1449. EMIT_BYTECODE_OPCODE_FUNC(TEXLD)
  1450. #undef EMIT_BYTECODE_OPCODE_FUNC
  1451. #endif // SUPPORT_PROFILE_BYTECODE
  1452. #if !SUPPORT_PROFILE_GLSL
  1453. #define PROFILE_EMITTER_GLSL(op)
  1454. #else
  1455. #undef AT_LEAST_ONE_PROFILE
  1456. #define AT_LEAST_ONE_PROFILE 1
  1457. #define PROFILE_EMITTER_GLSL(op) emit_GLSL_##op,
  1458. #define EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(op) \
  1459. static void emit_GLSL_##op(Context *ctx) { \
  1460. fail(ctx, #op " unimplemented in glsl profile"); \
  1461. }
  1462. static inline const char *get_GLSL_register_string(Context *ctx,
  1463. const RegisterType regtype, const int regnum,
  1464. char *regnum_str, const size_t regnum_size)
  1465. {
  1466. // turns out these are identical at the moment.
  1467. return get_D3D_register_string(ctx,regtype,regnum,regnum_str,regnum_size);
  1468. } // get_GLSL_register_string
  1469. static const char *get_GLSL_uniform_type(Context *ctx, const RegisterType rtype)
  1470. {
  1471. switch (rtype)
  1472. {
  1473. case REG_TYPE_CONST: return "vec4";
  1474. case REG_TYPE_CONSTINT: return "ivec4";
  1475. case REG_TYPE_CONSTBOOL: return "bool";
  1476. default: fail(ctx, "BUG: used a uniform we don't know how to define.");
  1477. } // switch
  1478. return NULL;
  1479. } // get_GLSL_uniform_type
  1480. static const char *get_GLSL_varname_in_buf(Context *ctx, RegisterType rt,
  1481. int regnum, char *buf,
  1482. const size_t len)
  1483. {
  1484. char regnum_str[16];
  1485. const char *regtype_str = get_GLSL_register_string(ctx, rt, regnum,
  1486. regnum_str, sizeof (regnum_str));
  1487. snprintf(buf,len,"%s_%s%s", ctx->shader_type_str, regtype_str, regnum_str);
  1488. return buf;
  1489. } // get_GLSL_varname_in_buf
  1490. static const char *get_GLSL_varname(Context *ctx, RegisterType rt, int regnum)
  1491. {
  1492. char buf[64];
  1493. get_GLSL_varname_in_buf(ctx, rt, regnum, buf, sizeof (buf));
  1494. return StrDup(ctx, buf);
  1495. } // get_GLSL_varname
  1496. static inline const char *get_GLSL_const_array_varname_in_buf(Context *ctx,
  1497. const int base, const int size,
  1498. char *buf, const size_t buflen)
  1499. {
  1500. const char *type = ctx->shader_type_str;
  1501. snprintf(buf, buflen, "%s_const_array_%d_%d", type, base, size);
  1502. return buf;
  1503. } // get_GLSL_const_array_varname_in_buf
  1504. static const char *get_GLSL_const_array_varname(Context *ctx, int base, int size)
  1505. {
  1506. char buf[64];
  1507. get_GLSL_const_array_varname_in_buf(ctx, base, size, buf, sizeof (buf));
  1508. return StrDup(ctx, buf);
  1509. } // get_GLSL_const_array_varname
  1510. static inline const char *get_GLSL_input_array_varname(Context *ctx,
  1511. char *buf, const size_t buflen)
  1512. {
  1513. snprintf(buf, buflen, "%s", "vertex_input_array");
  1514. return buf;
  1515. } // get_GLSL_input_array_varname
  1516. static const char *get_GLSL_uniform_array_varname(Context *ctx,
  1517. const RegisterType regtype,
  1518. char *buf, const size_t len)
  1519. {
  1520. const char *shadertype = ctx->shader_type_str;
  1521. const char *type = get_GLSL_uniform_type(ctx, regtype);
  1522. snprintf(buf, len, "%s_uniforms_%s", shadertype, type);
  1523. return buf;
  1524. } // get_GLSL_uniform_array_varname
  1525. static const char *get_GLSL_destarg_varname(Context *ctx, char *buf, size_t len)
  1526. {
  1527. const DestArgInfo *arg = &ctx->dest_arg;
  1528. return get_GLSL_varname_in_buf(ctx, arg->regtype, arg->regnum, buf, len);
  1529. } // get_GLSL_destarg_varname
  1530. static const char *get_GLSL_srcarg_varname(Context *ctx, const size_t idx,
  1531. char *buf, size_t len)
  1532. {
  1533. if (idx >= STATICARRAYLEN(ctx->source_args))
  1534. {
  1535. fail(ctx, "Too many source args");
  1536. *buf = '\0';
  1537. return buf;
  1538. } // if
  1539. const SourceArgInfo *arg = &ctx->source_args[idx];
  1540. return get_GLSL_varname_in_buf(ctx, arg->regtype, arg->regnum, buf, len);
  1541. } // get_GLSL_srcarg_varname
  1542. static const char *make_GLSL_destarg_assign(Context *, char *, const size_t,
  1543. const char *, ...) ISPRINTF(4,5);
  1544. static const char *make_GLSL_destarg_assign(Context *ctx, char *buf,
  1545. const size_t buflen,
  1546. const char *fmt, ...)
  1547. {
  1548. int need_parens = 0;
  1549. const DestArgInfo *arg = &ctx->dest_arg;
  1550. if (arg->writemask == 0)
  1551. {
  1552. *buf = '\0';
  1553. return buf; // no writemask? It's a no-op.
  1554. } // if
  1555. char clampbuf[32] = { '\0' };
  1556. const char *clampleft = "";
  1557. const char *clampright = "";
  1558. if (arg->result_mod & MOD_SATURATE)
  1559. {
  1560. const int vecsize = vecsize_from_writemask(arg->writemask);
  1561. clampleft = "clamp(";
  1562. if (vecsize == 1)
  1563. clampright = ", 0.0, 1.0)";
  1564. else
  1565. {
  1566. snprintf(clampbuf, sizeof (clampbuf),
  1567. ", vec%d(0.0), vec%d(1.0))", vecsize, vecsize);
  1568. clampright = clampbuf;
  1569. } // else
  1570. } // if
  1571. // MSDN says MOD_PP is a hint and many implementations ignore it. So do we.
  1572. // CENTROID only allowed in DCL opcodes, which shouldn't come through here.
  1573. assert((arg->result_mod & MOD_CENTROID) == 0);
  1574. if (ctx->predicated)
  1575. {
  1576. fail(ctx, "predicated destinations unsupported"); // !!! FIXME
  1577. *buf = '\0';
  1578. return buf;
  1579. } // if
  1580. char operation[256];
  1581. va_list ap;
  1582. va_start(ap, fmt);
  1583. const int len = vsnprintf(operation, sizeof (operation), fmt, ap);
  1584. va_end(ap);
  1585. if (len >= sizeof (operation))
  1586. {
  1587. fail(ctx, "operation string too large"); // I'm lazy. :P
  1588. *buf = '\0';
  1589. return buf;
  1590. } // if
  1591. const char *result_shift_str = "";
  1592. switch (arg->result_shift)
  1593. {
  1594. case 0x1: result_shift_str = " * 2.0"; break;
  1595. case 0x2: result_shift_str = " * 4.0"; break;
  1596. case 0x3: result_shift_str = " * 8.0"; break;
  1597. case 0xD: result_shift_str = " / 8.0"; break;
  1598. case 0xE: result_shift_str = " / 4.0"; break;
  1599. case 0xF: result_shift_str = " / 2.0"; break;
  1600. } // switch
  1601. need_parens |= (result_shift_str[0] != '\0');
  1602. char regnum_str[16];
  1603. const char *regtype_str = get_GLSL_register_string(ctx, arg->regtype,
  1604. arg->regnum, regnum_str,
  1605. sizeof (regnum_str));
  1606. char writemask_str[6];
  1607. size_t i = 0;
  1608. const int scalar = isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum);
  1609. if (!scalar && !writemask_xyzw(arg->writemask))
  1610. {
  1611. writemask_str[i++] = '.';
  1612. if (arg->writemask0) writemask_str[i++] = 'x';
  1613. if (arg->writemask1) writemask_str[i++] = 'y';
  1614. if (arg->writemask2) writemask_str[i++] = 'z';
  1615. if (arg->writemask3) writemask_str[i++] = 'w';
  1616. } // if
  1617. writemask_str[i] = '\0';
  1618. assert(i < sizeof (writemask_str));
  1619. const char *leftparen = (need_parens) ? "(" : "";
  1620. const char *rightparen = (need_parens) ? ")" : "";
  1621. snprintf(buf, buflen, "%s_%s%s%s = %s%s%s%s%s%s;",
  1622. ctx->shader_type_str, regtype_str, regnum_str, writemask_str,
  1623. clampleft, leftparen, operation, rightparen, result_shift_str,
  1624. clampright);
  1625. // !!! FIXME: make sure the scratch buffer was large enough.
  1626. return buf;
  1627. } // make_GLSL_destarg_assign
  1628. static char *make_GLSL_swizzle_string(char *swiz_str, const size_t strsize,
  1629. const int swizzle, const int writemask)
  1630. {
  1631. size_t i = 0;
  1632. if ( (!no_swizzle(swizzle)) || (!writemask_xyzw(writemask)) )
  1633. {
  1634. const int writemask0 = (writemask >> 0) & 0x1;
  1635. const int writemask1 = (writemask >> 1) & 0x1;
  1636. const int writemask2 = (writemask >> 2) & 0x1;
  1637. const int writemask3 = (writemask >> 3) & 0x1;
  1638. const int swizzle_x = (swizzle >> 0) & 0x3;
  1639. const int swizzle_y = (swizzle >> 2) & 0x3;
  1640. const int swizzle_z = (swizzle >> 4) & 0x3;
  1641. const int swizzle_w = (swizzle >> 6) & 0x3;
  1642. swiz_str[i++] = '.';
  1643. if (writemask0) swiz_str[i++] = swizzle_channels[swizzle_x];
  1644. if (writemask1) swiz_str[i++] = swizzle_channels[swizzle_y];
  1645. if (writemask2) swiz_str[i++] = swizzle_channels[swizzle_z];
  1646. if (writemask3) swiz_str[i++] = swizzle_channels[swizzle_w];
  1647. } // if
  1648. assert(i < strsize);
  1649. swiz_str[i] = '\0';
  1650. return swiz_str;
  1651. } // make_GLSL_swizzle_string
  1652. static const char *make_GLSL_srcarg_string(Context *ctx, const size_t idx,
  1653. const int writemask, char *buf,
  1654. const size_t buflen)
  1655. {
  1656. *buf = '\0';
  1657. if (idx >= STATICARRAYLEN(ctx->source_args))
  1658. {
  1659. fail(ctx, "Too many source args");
  1660. return buf;
  1661. } // if
  1662. const SourceArgInfo *arg = &ctx->source_args[idx];
  1663. const char *premod_str = "";
  1664. const char *postmod_str = "";
  1665. switch (arg->src_mod)
  1666. {
  1667. case SRCMOD_NEGATE:
  1668. premod_str = "-";
  1669. break;
  1670. case SRCMOD_BIASNEGATE:
  1671. premod_str = "-(";
  1672. postmod_str = " - 0.5)";
  1673. break;
  1674. case SRCMOD_BIAS:
  1675. premod_str = "(";
  1676. postmod_str = " - 0.5)";
  1677. break;
  1678. case SRCMOD_SIGNNEGATE:
  1679. premod_str = "-((";
  1680. postmod_str = " - 0.5) * 2.0)";
  1681. break;
  1682. case SRCMOD_SIGN:
  1683. premod_str = "((";
  1684. postmod_str = " - 0.5) * 2.0)";
  1685. break;
  1686. case SRCMOD_COMPLEMENT:
  1687. premod_str = "(1.0 - ";
  1688. postmod_str = ")";
  1689. break;
  1690. case SRCMOD_X2NEGATE:
  1691. premod_str = "-(";
  1692. postmod_str = " * 2.0)";
  1693. break;
  1694. case SRCMOD_X2:
  1695. premod_str = "(";
  1696. postmod_str = " * 2.0)";
  1697. break;
  1698. case SRCMOD_DZ:
  1699. fail(ctx, "SRCMOD_DZ unsupported"); return buf; // !!! FIXME
  1700. postmod_str = "_dz";
  1701. break;
  1702. case SRCMOD_DW:
  1703. fail(ctx, "SRCMOD_DW unsupported"); return buf; // !!! FIXME
  1704. postmod_str = "_dw";
  1705. break;
  1706. case SRCMOD_ABSNEGATE:
  1707. premod_str = "-abs(";
  1708. postmod_str = ")";
  1709. break;
  1710. case SRCMOD_ABS:
  1711. premod_str = "abs(";
  1712. postmod_str = ")";
  1713. break;
  1714. case SRCMOD_NOT:
  1715. premod_str = "!";
  1716. break;
  1717. case SRCMOD_NONE:
  1718. case SRCMOD_TOTAL:
  1719. break; // stop compiler whining.
  1720. } // switch
  1721. const char *regtype_str = NULL;
  1722. if (!arg->relative)
  1723. {
  1724. regtype_str = get_GLSL_varname_in_buf(ctx, arg->regtype, arg->regnum,
  1725. (char *) alloca(64), 64);
  1726. } // if
  1727. const char *rel_lbracket = "";
  1728. char rel_offset[32] = { '\0' };
  1729. const char *rel_rbracket = "";
  1730. char rel_swizzle[4] = { '\0' };
  1731. const char *rel_regtype_str = "";
  1732. if (arg->relative)
  1733. {
  1734. if (arg->regtype == REG_TYPE_INPUT)
  1735. regtype_str=get_GLSL_input_array_varname(ctx,(char*)alloca(64),64);
  1736. else
  1737. {
  1738. assert(arg->regtype == REG_TYPE_CONST);
  1739. const int arrayidx = arg->relative_array->index;
  1740. const int offset = arg->regnum - arrayidx;
  1741. assert(offset >= 0);
  1742. if (arg->relative_array->constant)
  1743. {
  1744. const int arraysize = arg->relative_array->count;
  1745. regtype_str = get_GLSL_const_array_varname_in_buf(ctx,
  1746. arrayidx, arraysize, (char *) alloca(64), 64);
  1747. if (offset != 0)
  1748. snprintf(rel_offset, sizeof (rel_offset), "%d + ", offset);
  1749. } // if
  1750. else
  1751. {
  1752. regtype_str = get_GLSL_uniform_array_varname(ctx, arg->regtype,
  1753. (char *) alloca(64), 64);
  1754. if (offset == 0)
  1755. {
  1756. snprintf(rel_offset, sizeof (rel_offset),
  1757. "ARRAYBASE_%d + ", arrayidx);
  1758. } // if
  1759. else
  1760. {
  1761. snprintf(rel_offset, sizeof (rel_offset),
  1762. "(ARRAYBASE_%d + %d) + ", arrayidx, offset);
  1763. } // else
  1764. } // else
  1765. } // else
  1766. rel_lbracket = "[";
  1767. rel_regtype_str = get_GLSL_varname_in_buf(ctx, arg->relative_regtype,
  1768. arg->relative_regnum,
  1769. (char *) alloca(64), 64);
  1770. rel_swizzle[0] = '.';
  1771. rel_swizzle[1] = swizzle_channels[arg->relative_component];
  1772. rel_swizzle[2] = '\0';
  1773. rel_rbracket = "]";
  1774. } // if
  1775. char swiz_str[6] = { '\0' };
  1776. if (!isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum))
  1777. {
  1778. make_GLSL_swizzle_string(swiz_str, sizeof (swiz_str),
  1779. arg->swizzle, writemask);
  1780. } // if
  1781. if (regtype_str == NULL)
  1782. {
  1783. fail(ctx, "Unknown source register type.");
  1784. return buf;
  1785. } // if
  1786. snprintf(buf, buflen, "%s%s%s%s%s%s%s%s%s",
  1787. premod_str, regtype_str, rel_lbracket, rel_offset,
  1788. rel_regtype_str, rel_swizzle, rel_rbracket, swiz_str,
  1789. postmod_str);
  1790. // !!! FIXME: make sure the scratch buffer was large enough.
  1791. return buf;
  1792. } // make_GLSL_srcarg_string
  1793. // generate some convenience functions.
  1794. #define MAKE_GLSL_SRCARG_STRING_(mask, bitmask) \
  1795. static inline const char *make_GLSL_srcarg_string_##mask(Context *ctx, \
  1796. const size_t idx, char *buf, \
  1797. const size_t buflen) { \
  1798. return make_GLSL_srcarg_string(ctx, idx, bitmask, buf, buflen); \
  1799. }
  1800. MAKE_GLSL_SRCARG_STRING_(x, (1 << 0))
  1801. MAKE_GLSL_SRCARG_STRING_(y, (1 << 1))
  1802. MAKE_GLSL_SRCARG_STRING_(z, (1 << 2))
  1803. MAKE_GLSL_SRCARG_STRING_(w, (1 << 3))
  1804. MAKE_GLSL_SRCARG_STRING_(scalar, (1 << 0))
  1805. MAKE_GLSL_SRCARG_STRING_(full, 0xF)
  1806. MAKE_GLSL_SRCARG_STRING_(masked, ctx->dest_arg.writemask)
  1807. MAKE_GLSL_SRCARG_STRING_(vec3, 0x7)
  1808. MAKE_GLSL_SRCARG_STRING_(vec2, 0x3)
  1809. #undef MAKE_GLSL_SRCARG_STRING_
  1810. // special cases for comparison opcodes...
  1811. static const char *get_GLSL_comparison_string_scalar(Context *ctx)
  1812. {
  1813. static const char *comps[] = { "", ">", "==", ">=", "<", "!=", "<=" };
  1814. if (ctx->instruction_controls >= STATICARRAYLEN(comps))
  1815. {
  1816. fail(ctx, "unknown comparison control");
  1817. return "";
  1818. } // if
  1819. return comps[ctx->instruction_controls];
  1820. } // get_GLSL_comparison_string_scalar
  1821. static const char *get_GLSL_comparison_string_vector(Context *ctx)
  1822. {
  1823. static const char *comps[] = {
  1824. "", "greaterThan", "equal", "greaterThanEqual", "lessThan",
  1825. "notEqual", "lessThanEqual"
  1826. };
  1827. if (ctx->instruction_controls >= STATICARRAYLEN(comps))
  1828. {
  1829. fail(ctx, "unknown comparison control");
  1830. return "";
  1831. } // if
  1832. return comps[ctx->instruction_controls];
  1833. } // get_GLSL_comparison_string_vector
  1834. static void emit_GLSL_start(Context *ctx, const char *profilestr)
  1835. {
  1836. if (!shader_is_vertex(ctx) && !shader_is_pixel(ctx))
  1837. {
  1838. failf(ctx, "Shader type %u unsupported in this profile.",
  1839. (uint) ctx->shader_type);
  1840. return;
  1841. } // if
  1842. else if (strcmp(profilestr, MOJOSHADER_PROFILE_GLSL) == 0)
  1843. {
  1844. // No gl_FragData[] before GLSL 1.10, so we have to force the version.
  1845. push_output(ctx, &ctx->preflight);
  1846. output_line(ctx, "#version 110");
  1847. pop_output(ctx);
  1848. } // else if
  1849. #if SUPPORT_PROFILE_GLSL120
  1850. else if (strcmp(profilestr, MOJOSHADER_PROFILE_GLSL120) == 0)
  1851. {
  1852. ctx->profile_supports_glsl120 = 1;
  1853. push_output(ctx, &ctx->preflight);
  1854. output_line(ctx, "#version 120");
  1855. pop_output(ctx);
  1856. } // else if
  1857. #endif
  1858. else
  1859. {
  1860. failf(ctx, "Profile '%s' unsupported or unknown.", profilestr);
  1861. return;
  1862. } // else
  1863. push_output(ctx, &ctx->mainline_intro);
  1864. output_line(ctx, "void main()");
  1865. output_line(ctx, "{");
  1866. pop_output(ctx);
  1867. set_output(ctx, &ctx->mainline);
  1868. ctx->indent++;
  1869. } // emit_GLSL_start
  1870. static void emit_GLSL_RET(Context *ctx);
  1871. static void emit_GLSL_end(Context *ctx)
  1872. {
  1873. // ps_1_* writes color to r0 instead oC0. We move it to the right place.
  1874. // We don't have to worry about a RET opcode messing this up, since
  1875. // RET isn't available before ps_2_0.
  1876. if (shader_is_pixel(ctx) && !shader_version_atleast(ctx, 2, 0))
  1877. {
  1878. const char *shstr = ctx->shader_type_str;
  1879. set_used_register(ctx, REG_TYPE_COLOROUT, 0, 1);
  1880. output_line(ctx, "%s_oC0 = %s_r0;", shstr, shstr);
  1881. } // if
  1882. // force a RET opcode if we're at the end of the stream without one.
  1883. if (ctx->previous_opcode != OPCODE_RET)
  1884. emit_GLSL_RET(ctx);
  1885. } // emit_GLSL_end
  1886. static void emit_GLSL_phase(Context *ctx)
  1887. {
  1888. // no-op in GLSL.
  1889. } // emit_GLSL_phase
  1890. static void output_GLSL_uniform_array(Context *ctx, const RegisterType regtype,
  1891. const int size)
  1892. {
  1893. if (size > 0)
  1894. {
  1895. char buf[64];
  1896. get_GLSL_uniform_array_varname(ctx, regtype, buf, sizeof (buf));
  1897. output_line(ctx, "uniform vec4 %s[%d];", buf, size);
  1898. } // if
  1899. } // output_GLSL_uniform_array
  1900. static void emit_GLSL_finalize(Context *ctx)
  1901. {
  1902. // throw some blank lines around to make source more readable.
  1903. push_output(ctx, &ctx->globals);
  1904. output_blank_line(ctx);
  1905. pop_output(ctx);
  1906. // If we had a relative addressing of REG_TYPE_INPUT, we need to build
  1907. // an array for it at the start of main(). GLSL doesn't let you specify
  1908. // arrays of attributes.
  1909. //vec4 blah_array[BIGGEST_ARRAY];
  1910. if (ctx->have_relative_input_registers) // !!! FIXME
  1911. fail(ctx, "Relative addressing of input registers not supported.");
  1912. push_output(ctx, &ctx->preflight);
  1913. output_GLSL_uniform_array(ctx, REG_TYPE_CONST, ctx->uniform_float4_count);
  1914. output_GLSL_uniform_array(ctx, REG_TYPE_CONSTINT, ctx->uniform_int4_count);
  1915. output_GLSL_uniform_array(ctx, REG_TYPE_CONSTBOOL, ctx->uniform_bool_count);
  1916. pop_output(ctx);
  1917. } // emit_GLSL_finalize
  1918. static void emit_GLSL_global(Context *ctx, RegisterType regtype, int regnum)
  1919. {
  1920. char varname[64];
  1921. get_GLSL_varname_in_buf(ctx, regtype, regnum, varname, sizeof (varname));
  1922. push_output(ctx, &ctx->globals);
  1923. switch (regtype)
  1924. {
  1925. case REG_TYPE_ADDRESS:
  1926. if (shader_is_vertex(ctx))
  1927. output_line(ctx, "ivec4 %s;", varname);
  1928. else if (shader_is_pixel(ctx)) // actually REG_TYPE_TEXTURE.
  1929. {
  1930. // We have to map texture registers to temps for ps_1_1, since
  1931. // they work like temps, initialize with tex coords, and the
  1932. // ps_1_1 TEX opcode expects to overwrite it.
  1933. if (!shader_version_atleast(ctx, 1, 4))
  1934. {
  1935. output_line(ctx, "vec4 %s = gl_TexCoord[%d];",
  1936. varname, regnum);
  1937. } // if
  1938. } // else if
  1939. break;
  1940. case REG_TYPE_PREDICATE:
  1941. output_line(ctx, "bvec4 %s;", varname);
  1942. break;
  1943. case REG_TYPE_TEMP:
  1944. output_line(ctx, "vec4 %s;", varname);
  1945. break;
  1946. case REG_TYPE_LOOP:
  1947. break; // no-op. We declare these in for loops at the moment.
  1948. case REG_TYPE_LABEL:
  1949. break; // no-op. If we see it here, it means we optimized it out.
  1950. default:
  1951. fail(ctx, "BUG: we used a register we don't know how to define.");
  1952. break;
  1953. } // switch
  1954. pop_output(ctx);
  1955. } // emit_GLSL_global
  1956. static void emit_GLSL_array(Context *ctx, VariableList *var)
  1957. {
  1958. // All uniforms (except constant arrays, which only get pushed once at
  1959. // compile time) are now packed into a single array, so we can batch
  1960. // the uniform transfers. So this is doesn't actually define an array
  1961. // here; the one, big array is emitted during finalization instead.
  1962. // However, we need to #define the offset into the one, big array here,
  1963. // and let dereferences use that #define.
  1964. const int base = var->index;
  1965. const int glslbase = ctx->uniform_float4_count;
  1966. push_output(ctx, &ctx->globals);
  1967. output_line(ctx, "#define ARRAYBASE_%d %d", base, glslbase);
  1968. pop_output(ctx);
  1969. var->emit_position = glslbase;
  1970. } // emit_GLSL_array
  1971. static void emit_GLSL_const_array(Context *ctx, const ConstantsList *clist,
  1972. int base, int size)
  1973. {
  1974. char varname[64];
  1975. get_GLSL_const_array_varname_in_buf(ctx,base,size,varname,sizeof(varname));
  1976. #if 0
  1977. // !!! FIXME: fails on Nvidia's and Apple's GL, even with #version 120.
  1978. // !!! FIXME: (the 1.20 spec says it should work, though, I think...)
  1979. if (support_glsl120(ctx))
  1980. {
  1981. // GLSL 1.20 can do constant arrays.
  1982. const char *cstr = NULL;
  1983. push_output(ctx, &ctx->globals);
  1984. output_line(ctx, "const vec4 %s[%d] = vec4[%d](", varname, size, size);
  1985. ctx->indent++;
  1986. int i;
  1987. for (i = 0; i < size; i++)
  1988. {
  1989. while (clist->constant.type != MOJOSHADER_UNIFORM_FLOAT)
  1990. clist = clist->next;
  1991. assert(clist->constant.index == (base + i));
  1992. char val0[32];
  1993. char val1[32];
  1994. char val2[32];
  1995. char val3[32];
  1996. floatstr(ctx, val0, sizeof (val0), clist->constant.value.f[0], 1);
  1997. floatstr(ctx, val1, sizeof (val1), clist->constant.value.f[1], 1);
  1998. floatstr(ctx, val2, sizeof (val2), clist->constant.value.f[2], 1);
  1999. floatstr(ctx, val3, sizeof (val3), clist->constant.value.f[3], 1);
  2000. output_line(ctx, "vec4(%s, %s, %s, %s)%s", val0, val1, val2, val3,
  2001. (i < (size-1)) ? "," : "");
  2002. clist = clist->next;
  2003. } // for
  2004. ctx->indent--;
  2005. output_line(ctx, ");");
  2006. pop_output(ctx);
  2007. } // if
  2008. else
  2009. #endif
  2010. {
  2011. // stock GLSL 1.0 can't do constant arrays, so make a uniform array
  2012. // and have the OpenGL glue assign it at link time. Lame!
  2013. push_output(ctx, &ctx->globals);
  2014. output_line(ctx, "uniform vec4 %s[%d];", varname, size);
  2015. pop_output(ctx);
  2016. } // else
  2017. } // emit_GLSL_const_array
  2018. static void emit_GLSL_uniform(Context *ctx, RegisterType regtype, int regnum,
  2019. const VariableList *var)
  2020. {
  2021. // Now that we're pushing all the uniforms as one big array, pack these
  2022. // down, so if we only use register c439, it'll actually map to
  2023. // glsl_uniforms_vec4[0]. As we push one big array, this will prevent
  2024. // uploading unused data.
  2025. char varname[64];
  2026. char name[64];
  2027. int index = 0;
  2028. get_GLSL_varname_in_buf(ctx, regtype, regnum, varname, sizeof (varname));
  2029. push_output(ctx, &ctx->globals);
  2030. if (var == NULL)
  2031. {
  2032. get_GLSL_uniform_array_varname(ctx, regtype, name, sizeof (name));
  2033. if (regtype == REG_TYPE_CONST)
  2034. index = ctx->uniform_float4_count;
  2035. else if (regtype == REG_TYPE_CONSTINT)
  2036. index = ctx->uniform_int4_count;
  2037. else if (regtype == REG_TYPE_CONSTBOOL)
  2038. index = ctx->uniform_bool_count;
  2039. else // get_GLSL_uniform_array_varname() would have called fail().
  2040. assert(isfail(ctx));
  2041. output_line(ctx, "#define %s %s[%d]", varname, name, index);
  2042. } // if
  2043. else
  2044. {
  2045. const int arraybase = var->index;
  2046. if (var->constant)
  2047. {
  2048. get_GLSL_const_array_varname_in_buf(ctx, arraybase, var->count,
  2049. name, sizeof (name));
  2050. index = (regnum - arraybase);
  2051. } // if
  2052. else
  2053. {
  2054. assert(var->emit_position != -1);
  2055. get_GLSL_uniform_array_varname(ctx, regtype, name, sizeof (name));
  2056. index = (regnum - arraybase) + var->emit_position;
  2057. } // else
  2058. output_line(ctx, "#define %s %s[%d]", varname, name, index);
  2059. } // else
  2060. pop_output(ctx);
  2061. } // emit_GLSL_uniform
  2062. static void emit_GLSL_sampler(Context *ctx,int stage,TextureType ttype,int tb)
  2063. {
  2064. const char *type = "";
  2065. switch (ttype)
  2066. {
  2067. case TEXTURE_TYPE_2D: type = "sampler2D"; break;
  2068. case TEXTURE_TYPE_CUBE: type = "samplerCube"; break;
  2069. case TEXTURE_TYPE_VOLUME: type = "sampler3D"; break;
  2070. default: fail(ctx, "BUG: used a sampler we don't know how to define.");
  2071. } // switch
  2072. char var[64];
  2073. get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, stage, var, sizeof (var));
  2074. push_output(ctx, &ctx->globals);
  2075. output_line(ctx, "uniform %s %s;", type, var);
  2076. if (tb) // This sampler used a ps_1_1 TEXBEM opcode?
  2077. {
  2078. char name[64];
  2079. const int index = ctx->uniform_float4_count;
  2080. ctx->uniform_float4_count += 2;
  2081. get_GLSL_uniform_array_varname(ctx, REG_TYPE_CONST, name, sizeof (name));
  2082. output_line(ctx, "#define %s_texbem %s[%d]", var, name, index);
  2083. output_line(ctx, "#define %s_texbeml %s[%d]", var, name, index+1);
  2084. } // if
  2085. pop_output(ctx);
  2086. } // emit_GLSL_sampler
  2087. static void emit_GLSL_attribute(Context *ctx, RegisterType regtype, int regnum,
  2088. MOJOSHADER_usage usage, int index, int wmask,
  2089. int flags)
  2090. {
  2091. // !!! FIXME: this function doesn't deal with write masks at all yet!
  2092. const char *usage_str = NULL;
  2093. const char *arrayleft = "";
  2094. const char *arrayright = "";
  2095. char index_str[16] = { '\0' };
  2096. char var[64];
  2097. get_GLSL_varname_in_buf(ctx, regtype, regnum, var, sizeof (var));
  2098. //assert((flags & MOD_PP) == 0); // !!! FIXME: is PP allowed?
  2099. if (index != 0) // !!! FIXME: a lot of these MUST be zero.
  2100. snprintf(index_str, sizeof (index_str), "%u", (uint) index);
  2101. if (shader_is_vertex(ctx))
  2102. {
  2103. // pre-vs3 output registers.
  2104. // these don't ever happen in DCL opcodes, I think. Map to vs_3_*
  2105. // output registers.
  2106. if (!shader_version_atleast(ctx, 3, 0))
  2107. {
  2108. if (regtype == REG_TYPE_RASTOUT)
  2109. {
  2110. regtype = REG_TYPE_OUTPUT;
  2111. index = regnum;
  2112. switch ((const RastOutType) regnum)
  2113. {
  2114. case RASTOUT_TYPE_POSITION:
  2115. usage = MOJOSHADER_USAGE_POSITION;
  2116. break;
  2117. case RASTOUT_TYPE_FOG:
  2118. usage = MOJOSHADER_USAGE_FOG;
  2119. break;
  2120. case RASTOUT_TYPE_POINT_SIZE:
  2121. usage = MOJOSHADER_USAGE_POINTSIZE;
  2122. break;
  2123. } // switch
  2124. } // if
  2125. else if (regtype == REG_TYPE_ATTROUT)
  2126. {
  2127. regtype = REG_TYPE_OUTPUT;
  2128. usage = MOJOSHADER_USAGE_COLOR;
  2129. index = regnum;
  2130. } // else if
  2131. else if (regtype == REG_TYPE_TEXCRDOUT)
  2132. {
  2133. regtype = REG_TYPE_OUTPUT;
  2134. usage = MOJOSHADER_USAGE_TEXCOORD;
  2135. index = regnum;
  2136. } // else if
  2137. } // if
  2138. // to avoid limitations of various GL entry points for input
  2139. // attributes (glSecondaryColorPointer() can only take 3 component
  2140. // items, glVertexPointer() can't do GL_UNSIGNED_BYTE, many other
  2141. // issues), we set up all inputs as generic vertex attributes, so we
  2142. // can pass data in just about any form, and ignore the built-in GLSL
  2143. // attributes like gl_SecondaryColor. Output needs to use the the
  2144. // built-ins, though, but we don't have to worry about the GL entry
  2145. // point limitations there.
  2146. if (regtype == REG_TYPE_INPUT)
  2147. {
  2148. push_output(ctx, &ctx->globals);
  2149. output_line(ctx, "attribute vec4 %s;", var);
  2150. pop_output(ctx);
  2151. } // if
  2152. else if (regtype == REG_TYPE_OUTPUT)
  2153. {
  2154. switch (usage)
  2155. {
  2156. case MOJOSHADER_USAGE_POSITION:
  2157. usage_str = "gl_Position";
  2158. break;
  2159. case MOJOSHADER_USAGE_POINTSIZE:
  2160. usage_str = "gl_PointSize";
  2161. break;
  2162. case MOJOSHADER_USAGE_COLOR:
  2163. index_str[0] = '\0'; // no explicit number.
  2164. if (index == 0)
  2165. usage_str = "gl_FrontColor";
  2166. else if (index == 1)
  2167. usage_str = "gl_FrontSecondaryColor";
  2168. break;
  2169. case MOJOSHADER_USAGE_FOG:
  2170. usage_str = "gl_FogFragCoord";
  2171. break;
  2172. case MOJOSHADER_USAGE_TEXCOORD:
  2173. snprintf(index_str, sizeof (index_str), "%u", (uint) index);
  2174. usage_str = "gl_TexCoord";
  2175. arrayleft = "[";
  2176. arrayright = "]";
  2177. break;
  2178. default:
  2179. // !!! FIXME: we need to deal with some more built-in varyings here.
  2180. break;
  2181. } // switch
  2182. // !!! FIXME: the #define is a little hacky, but it means we don't
  2183. // !!! FIXME: have to track these separately if this works.
  2184. push_output(ctx, &ctx->globals);
  2185. // no mapping to built-in var? Just make it a regular global, pray.
  2186. if (usage_str == NULL)
  2187. output_line(ctx, "vec4 %s;", var);
  2188. else
  2189. {
  2190. output_line(ctx, "#define %s %s%s%s%s", var, usage_str,
  2191. arrayleft, index_str, arrayright);
  2192. } // else
  2193. pop_output(ctx);
  2194. } // else if
  2195. else
  2196. {
  2197. fail(ctx, "unknown vertex shader attribute register");
  2198. } // else
  2199. } // if
  2200. else if (shader_is_pixel(ctx))
  2201. {
  2202. // samplers DCLs get handled in emit_GLSL_sampler().
  2203. if (flags & MOD_CENTROID) // !!! FIXME
  2204. {
  2205. failf(ctx, "centroid unsupported in %s profile", ctx->profile->name);
  2206. return;
  2207. } // if
  2208. if (regtype == REG_TYPE_COLOROUT)
  2209. {
  2210. if (!ctx->have_multi_color_outputs)
  2211. usage_str = "gl_FragColor"; // maybe faster?
  2212. else
  2213. {
  2214. snprintf(index_str, sizeof (index_str), "%u", (uint) regnum);
  2215. usage_str = "gl_FragData";
  2216. arrayleft = "[";
  2217. arrayright = "]";
  2218. } // else
  2219. } // if
  2220. else if (regtype == REG_TYPE_DEPTHOUT)
  2221. usage_str = "gl_FragDepth";
  2222. // !!! FIXME: can you actualy have a texture register with COLOR usage?
  2223. else if ((regtype == REG_TYPE_TEXTURE) || (regtype == REG_TYPE_INPUT))
  2224. {
  2225. if (usage == MOJOSHADER_USAGE_TEXCOORD)
  2226. {
  2227. // ps_1_1 does a different hack for this attribute.
  2228. // Refer to emit_GLSL_global()'s REG_TYPE_TEXTURE code.
  2229. if (shader_version_atleast(ctx, 1, 4))
  2230. {
  2231. snprintf(index_str, sizeof (index_str), "%u", (uint) index);
  2232. usage_str = "gl_TexCoord";
  2233. arrayleft = "[";
  2234. arrayright = "]";
  2235. } // if
  2236. } // if
  2237. else if (usage == MOJOSHADER_USAGE_COLOR)
  2238. {
  2239. index_str[0] = '\0'; // no explicit number.
  2240. if (index == 0)
  2241. usage_str = "gl_Color";
  2242. else if (index == 1)
  2243. usage_str = "gl_SecondaryColor";
  2244. else
  2245. fail(ctx, "unsupported color index");
  2246. } // else if
  2247. } // else if
  2248. else if (regtype == REG_TYPE_MISCTYPE)
  2249. {
  2250. const MiscTypeType mt = (MiscTypeType) regnum;
  2251. if (mt == MISCTYPE_TYPE_FACE)
  2252. {
  2253. push_output(ctx, &ctx->globals);
  2254. output_line(ctx, "float %s = gl_FrontFacing ? 1.0 : -1.0;", var);
  2255. pop_output(ctx);
  2256. } // if
  2257. else if (mt == MISCTYPE_TYPE_POSITION)
  2258. {
  2259. index_str[0] = '\0'; // no explicit number.
  2260. usage_str = "gl_FragCoord"; // !!! FIXME: is this the same coord space as D3D?
  2261. } // else if
  2262. else
  2263. {
  2264. fail(ctx, "BUG: unhandled misc register");
  2265. } // else
  2266. } // else if
  2267. else
  2268. {
  2269. fail(ctx, "unknown pixel shader attribute register");
  2270. } // else
  2271. if (usage_str != NULL)
  2272. {
  2273. push_output(ctx, &ctx->globals);
  2274. output_line(ctx, "#define %s %s%s%s%s", var, usage_str,
  2275. arrayleft, index_str, arrayright);
  2276. pop_output(ctx);
  2277. } // if
  2278. } // else if
  2279. else
  2280. {
  2281. fail(ctx, "Unknown shader type"); // state machine should catch this.
  2282. } // else
  2283. } // emit_GLSL_attribute
  2284. static void emit_GLSL_NOP(Context *ctx)
  2285. {
  2286. // no-op is a no-op. :)
  2287. } // emit_GLSL_NOP
  2288. static void emit_GLSL_MOV(Context *ctx)
  2289. {
  2290. char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
  2291. char code[128];
  2292. make_GLSL_destarg_assign(ctx, code, sizeof (code), "%s", src0);
  2293. output_line(ctx, "%s", code);
  2294. } // emit_GLSL_MOV
  2295. static void emit_GLSL_ADD(Context *ctx)
  2296. {
  2297. char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
  2298. char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
  2299. char code[128];
  2300. make_GLSL_destarg_assign(ctx, code, sizeof (code), "%s + %s", src0, src1);
  2301. output_line(ctx, "%s", code);
  2302. } // emit_GLSL_ADD
  2303. static void emit_GLSL_SUB(Context *ctx)
  2304. {
  2305. char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
  2306. char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
  2307. char code[128];
  2308. make_GLSL_destarg_assign(ctx, code, sizeof (code), "%s - %s", src0, src1);
  2309. output_line(ctx, "%s", code);
  2310. } // emit_GLSL_SUB
  2311. static void emit_GLSL_MAD(Context *ctx)
  2312. {
  2313. char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
  2314. char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
  2315. char src2[64]; make_GLSL_srcarg_string_masked(ctx, 2, src2, sizeof (src2));
  2316. char code[128];
  2317. make_GLSL_destarg_assign(ctx, code, sizeof (code), "(%s * %s) + %s", src0, src1, src2);
  2318. output_line(ctx, "%s", code);
  2319. } // emit_GLSL_MAD
  2320. static void emit_GLSL_MUL(Context *ctx)
  2321. {
  2322. char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
  2323. char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
  2324. char code[128];
  2325. make_GLSL_destarg_assign(ctx, code, sizeof (code), "%s * %s", src0, src1);
  2326. output_line(ctx, "%s", code);
  2327. } // emit_GLSL_MUL
  2328. static void emit_GLSL_RCP(Context *ctx)
  2329. {
  2330. char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
  2331. char code[128];
  2332. make_GLSL_destarg_assign(ctx, code, sizeof (code), "1.0 / %s", src0);
  2333. output_line(ctx, "%s", code);
  2334. } // emit_GLSL_RCP
  2335. static void emit_GLSL_RSQ(Context *ctx)
  2336. {
  2337. char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
  2338. char code[128];
  2339. make_GLSL_destarg_assign(ctx, code, sizeof (code), "inversesqrt(%s)", src0);
  2340. output_line(ctx, "%s", code);
  2341. } // emit_GLSL_RSQ
  2342. static void emit_GLSL_dotprod(Context *ctx, const char *src0, const char *src1,
  2343. const char *extra)
  2344. {
  2345. const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask);
  2346. char castleft[16] = { '\0' };
  2347. const char *castright = "";
  2348. if (vecsize != 1)
  2349. {
  2350. snprintf(castleft, sizeof (castleft), "vec%d(", vecsize);
  2351. castright = ")";
  2352. } // if
  2353. char code[128];
  2354. make_GLSL_destarg_assign(ctx, code, sizeof (code), "%sdot(%s, %s)%s%s",
  2355. castleft, src0, src1, extra, castright);
  2356. output_line(ctx, "%s", code);
  2357. } // emit_GLSL_dotprod
  2358. static void emit_GLSL_DP3(Context *ctx)
  2359. {
  2360. char src0[64]; make_GLSL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0));
  2361. char src1[64]; make_GLSL_srcarg_string_vec3(ctx, 1, src1, sizeof (src1));
  2362. emit_GLSL_dotprod(ctx, src0, src1, "");
  2363. } // emit_GLSL_DP3
  2364. static void emit_GLSL_DP4(Context *ctx)
  2365. {
  2366. char src0[64]; make_GLSL_srcarg_string_full(ctx, 0, src0, sizeof (src0));
  2367. char src1[64]; make_GLSL_srcarg_string_full(ctx, 1, src1, sizeof (src1));
  2368. emit_GLSL_dotprod(ctx, src0, src1, "");
  2369. } // emit_GLSL_DP4
  2370. static void emit_GLSL_MIN(Context *ctx)
  2371. {
  2372. char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
  2373. char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
  2374. char code[128];
  2375. make_GLSL_destarg_assign(ctx, code, sizeof (code), "min(%s, %s)", src0, src1);
  2376. output_line(ctx, "%s", code);
  2377. } // emit_GLSL_MIN
  2378. static void emit_GLSL_MAX(Context *ctx)
  2379. {
  2380. char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
  2381. char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
  2382. char code[128];
  2383. make_GLSL_destarg_assign(ctx, code, sizeof (code), "max(%s, %s)", src0, src1);
  2384. output_line(ctx, "%s", code);
  2385. } // emit_GLSL_MAX
  2386. static void emit_GLSL_SLT(Context *ctx)
  2387. {
  2388. const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask);
  2389. char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
  2390. char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
  2391. char code[128];
  2392. // float(bool) or vec(bvec) results in 0.0 or 1.0, like SLT wants.
  2393. if (vecsize == 1)
  2394. make_GLSL_destarg_assign(ctx, code, sizeof (code), "float(%s < %s)", src0, src1);
  2395. else
  2396. {
  2397. make_GLSL_destarg_assign(ctx, code, sizeof (code),
  2398. "vec%d(lessThan(%s, %s))",
  2399. vecsize, src0, src1);
  2400. } // else
  2401. output_line(ctx, "%s", code);
  2402. } // emit_GLSL_SLT
  2403. static void emit_GLSL_SGE(Context *ctx)
  2404. {
  2405. const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask);
  2406. char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
  2407. char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
  2408. char code[128];
  2409. // float(bool) or vec(bvec) results in 0.0 or 1.0, like SGE wants.
  2410. if (vecsize == 1)
  2411. {
  2412. make_GLSL_destarg_assign(ctx, code, sizeof (code),
  2413. "float(%s >= %s)", src0, src1);
  2414. } // if
  2415. else
  2416. {
  2417. make_GLSL_destarg_assign(ctx, code, sizeof (code),
  2418. "vec%d(greaterThanEqual(%s, %s))",
  2419. vecsize, src0, src1);
  2420. } // else
  2421. output_line(ctx, "%s", code);
  2422. } // emit_GLSL_SGE
  2423. static void emit_GLSL_EXP(Context *ctx)
  2424. {
  2425. char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
  2426. char code[128];
  2427. make_GLSL_destarg_assign(ctx, code, sizeof (code), "exp2(%s)", src0);
  2428. output_line(ctx, "%s", code);
  2429. } // emit_GLSL_EXP
  2430. static void emit_GLSL_LOG(Context *ctx)
  2431. {
  2432. char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
  2433. char code[128];
  2434. make_GLSL_destarg_assign(ctx, code, sizeof (code), "log2(%s)", src0);
  2435. output_line(ctx, "%s", code);
  2436. } // emit_GLSL_LOG
  2437. static void emit_GLSL_LIT_helper(Context *ctx)
  2438. {
  2439. const char *maxp = "127.9961"; // value from the dx9 reference.
  2440. if (ctx->glsl_generated_lit_helper)
  2441. return;
  2442. ctx->glsl_generated_lit_helper = 1;
  2443. push_output(ctx, &ctx->helpers);
  2444. output_line(ctx, "vec4 LIT(const vec4 src)");
  2445. output_line(ctx, "{"); ctx->indent++;
  2446. output_line(ctx, "float power = clamp(src.w, -%s, %s);",maxp,maxp);
  2447. output_line(ctx, "vec4 retval = vec4(1.0, 0.0, 0.0, 1.0);");
  2448. output_line(ctx, "if (src.x > 0.0) {"); ctx->indent++;
  2449. output_line(ctx, "retval.y = src.x;");
  2450. output_line(ctx, "if (src.y > 0.0) {"); ctx->indent++;
  2451. output_line(ctx, "retval.z = pow(src.y, power);"); ctx->indent--;
  2452. output_line(ctx, "}"); ctx->indent--;
  2453. output_line(ctx, "}");
  2454. output_line(ctx, "return retval;"); ctx->indent--;
  2455. output_line(ctx, "}");
  2456. output_blank_line(ctx);
  2457. pop_output(ctx);
  2458. } // emit_GLSL_LIT_helper
  2459. static void emit_GLSL_LIT(Context *ctx)
  2460. {
  2461. char src0[64]; make_GLSL_srcarg_string_full(ctx, 0, src0, sizeof (src0));
  2462. char code[128];
  2463. emit_GLSL_LIT_helper(ctx);
  2464. make_GLSL_destarg_assign(ctx, code, sizeof (code), "LIT(%s)", src0);
  2465. output_line(ctx, "%s", code);
  2466. } // emit_GLSL_LIT
  2467. static void emit_GLSL_DST(Context *ctx)
  2468. {
  2469. // !!! FIXME: needs to take ctx->dst_arg.writemask into account.
  2470. char src0_y[64]; make_GLSL_srcarg_string_y(ctx, 0, src0_y, sizeof (src0_y));
  2471. char src1_y[64]; make_GLSL_srcarg_string_y(ctx, 1, src1_y, sizeof (src1_y));
  2472. char src0_z[64]; make_GLSL_srcarg_string_z(ctx, 0, src0_z, sizeof (src0_z));
  2473. char src1_w[64]; make_GLSL_srcarg_string_w(ctx, 1, src1_w, sizeof (src1_w));
  2474. char code[128];
  2475. make_GLSL_destarg_assign(ctx, code, sizeof (code),
  2476. "vec4(1.0, %s * %s, %s, %s)",
  2477. src0_y, src1_y, src0_z, src1_w);
  2478. output_line(ctx, "%s", code);
  2479. } // emit_GLSL_DST
  2480. static void emit_GLSL_LRP(Context *ctx)
  2481. {
  2482. char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
  2483. char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
  2484. char src2[64]; make_GLSL_srcarg_string_masked(ctx, 2, src2, sizeof (src2));
  2485. char code[128];
  2486. make_GLSL_destarg_assign(ctx, code, sizeof (code), "mix(%s, %s, %s)",
  2487. src2, src1, src0);
  2488. output_line(ctx, "%s", code);
  2489. } // emit_GLSL_LRP
  2490. static void emit_GLSL_FRC(Context *ctx)
  2491. {
  2492. char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
  2493. char code[128];
  2494. make_GLSL_destarg_assign(ctx, code, sizeof (code), "fract(%s)", src0);
  2495. output_line(ctx, "%s", code);
  2496. } // emit_GLSL_FRC
  2497. static void emit_GLSL_M4X4(Context *ctx)
  2498. {
  2499. char src0[64]; make_GLSL_srcarg_string_full(ctx, 0, src0, sizeof (src0));
  2500. char row0[64]; make_GLSL_srcarg_string_full(ctx, 1, row0, sizeof (row0));
  2501. char row1[64]; make_GLSL_srcarg_string_full(ctx, 2, row1, sizeof (row1));
  2502. char row2[64]; make_GLSL_srcarg_string_full(ctx, 3, row2, sizeof (row2));
  2503. char row3[64]; make_GLSL_srcarg_string_full(ctx, 4, row3, sizeof (row3));
  2504. char code[256];
  2505. make_GLSL_destarg_assign(ctx, code, sizeof (code),
  2506. "vec4(dot(%s, %s), dot(%s, %s), dot(%s, %s), dot(%s, %s))",
  2507. src0, row0, src0, row1, src0, row2, src0, row3);
  2508. output_line(ctx, "%s", code);
  2509. } // emit_GLSL_M4X4
  2510. static void emit_GLSL_M4X3(Context *ctx)
  2511. {
  2512. char src0[64]; make_GLSL_srcarg_string_full(ctx, 0, src0, sizeof (src0));
  2513. char row0[64]; make_GLSL_srcarg_string_full(ctx, 1, row0, sizeof (row0));
  2514. char row1[64]; make_GLSL_srcarg_string_full(ctx, 2, row1, sizeof (row1));
  2515. char row2[64]; make_GLSL_srcarg_string_full(ctx, 3, row2, sizeof (row2));
  2516. char code[256];
  2517. make_GLSL_destarg_assign(ctx, code, sizeof (code),
  2518. "vec3(dot(%s, %s), dot(%s, %s), dot(%s, %s))",
  2519. src0, row0, src0, row1, src0, row2);
  2520. output_line(ctx, "%s", code);
  2521. } // emit_GLSL_M4X3
  2522. static void emit_GLSL_M3X4(Context *ctx)
  2523. {
  2524. char src0[64]; make_GLSL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0));
  2525. char row0[64]; make_GLSL_srcarg_string_vec3(ctx, 1, row0, sizeof (row0));
  2526. char row1[64]; make_GLSL_srcarg_string_vec3(ctx, 2, row1, sizeof (row1));
  2527. char row2[64]; make_GLSL_srcarg_string_vec3(ctx, 3, row2, sizeof (row2));
  2528. char row3[64]; make_GLSL_srcarg_string_vec3(ctx, 4, row3, sizeof (row3));
  2529. char code[256];
  2530. make_GLSL_destarg_assign(ctx, code, sizeof (code),
  2531. "vec4(dot(%s, %s), dot(%s, %s), "
  2532. "dot(%s, %s), dot(%s, %s))",
  2533. src0, row0, src0, row1,
  2534. src0, row2, src0, row3);
  2535. output_line(ctx, "%s", code);
  2536. } // emit_GLSL_M3X4
  2537. static void emit_GLSL_M3X3(Context *ctx)
  2538. {
  2539. char src0[64]; make_GLSL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0));
  2540. char row0[64]; make_GLSL_srcarg_string_vec3(ctx, 1, row0, sizeof (row0));
  2541. char row1[64]; make_GLSL_srcarg_string_vec3(ctx, 2, row1, sizeof (row1));
  2542. char row2[64]; make_GLSL_srcarg_string_vec3(ctx, 3, row2, sizeof (row2));
  2543. char code[256];
  2544. make_GLSL_destarg_assign(ctx, code, sizeof (code),
  2545. "vec3(dot(%s, %s), dot(%s, %s), dot(%s, %s))",
  2546. src0, row0, src0, row1, src0, row2);
  2547. output_line(ctx, "%s", code);
  2548. } // emit_GLSL_M3X3
  2549. static void emit_GLSL_M3X2(Context *ctx)
  2550. {
  2551. char src0[64]; make_GLSL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0));
  2552. char row0[64]; make_GLSL_srcarg_string_vec3(ctx, 1, row0, sizeof (row0));
  2553. char row1[64]; make_GLSL_srcarg_string_vec3(ctx, 2, row1, sizeof (row1));
  2554. char code[256];
  2555. make_GLSL_destarg_assign(ctx, code, sizeof (code),
  2556. "vec2(dot(%s, %s), dot(%s, %s))",
  2557. src0, row0, src0, row1);
  2558. output_line(ctx, "%s", code);
  2559. } // emit_GLSL_M3X2
  2560. static void emit_GLSL_CALL(Context *ctx)
  2561. {
  2562. char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
  2563. if (ctx->loops > 0)
  2564. output_line(ctx, "%s(aL);", src0);
  2565. else
  2566. output_line(ctx, "%s();", src0);
  2567. } // emit_GLSL_CALL
  2568. static void emit_GLSL_CALLNZ(Context *ctx)
  2569. {
  2570. // !!! FIXME: if src1 is a constbool that's true, we can remove the
  2571. // !!! FIXME: if. If it's false, we can make this a no-op.
  2572. char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
  2573. char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
  2574. if (ctx->loops > 0)
  2575. output_line(ctx, "if (%s) { %s(aL); }", src1, src0);
  2576. else
  2577. output_line(ctx, "if (%s) { %s(); }", src1, src0);
  2578. } // emit_GLSL_CALLNZ
  2579. static void emit_GLSL_LOOP(Context *ctx)
  2580. {
  2581. // !!! FIXME: swizzle?
  2582. char var[64]; get_GLSL_srcarg_varname(ctx, 1, var, sizeof (var));
  2583. assert(ctx->source_args[0].regnum == 0); // in case they add aL1 someday.
  2584. output_line(ctx, "{");
  2585. ctx->indent++;
  2586. output_line(ctx, "const int aLend = %s.x + %s.y;", var, var);
  2587. output_line(ctx, "for (int aL = %s.y; aL < aLend; aL += %s.z) {", var, var);
  2588. ctx->indent++;
  2589. } // emit_GLSL_LOOP
  2590. static void emit_GLSL_RET(Context *ctx)
  2591. {
  2592. // thankfully, the MSDN specs say a RET _has_ to end a function...no
  2593. // early returns. So if you hit one, you know you can safely close
  2594. // a high-level function.
  2595. ctx->indent--;
  2596. output_line(ctx, "}");
  2597. output_blank_line(ctx);
  2598. set_output(ctx, &ctx->subroutines);
  2599. } // emit_GLSL_RET
  2600. static void emit_GLSL_ENDLOOP(Context *ctx)
  2601. {
  2602. ctx->indent--;
  2603. output_line(ctx, "}");
  2604. ctx->indent--;
  2605. output_line(ctx, "}");
  2606. } // emit_GLSL_ENDLOOP
  2607. static void emit_GLSL_LABEL(Context *ctx)
  2608. {
  2609. char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
  2610. const int label = ctx->source_args[0].regnum;
  2611. RegisterList *reg = reglist_find(&ctx->used_registers, REG_TYPE_LABEL, label);
  2612. assert(ctx->output == ctx->subroutines); // not mainline, etc.
  2613. assert(ctx->indent == 0); // we shouldn't be in the middle of a function.
  2614. // MSDN specs say CALL* has to come before the LABEL, so we know if we
  2615. // can ditch the entire function here as unused.
  2616. if (reg == NULL)
  2617. set_output(ctx, &ctx->ignore); // Func not used. Parse, but don't output.
  2618. // !!! FIXME: it would be nice if we could determine if a function is
  2619. // !!! FIXME: only called once and, if so, forcibly inline it.
  2620. const char *uses_loopreg = ((reg) && (reg->misc == 1)) ? "int aL" : "";
  2621. output_line(ctx, "void %s(%s)", src0, uses_loopreg);
  2622. output_line(ctx, "{");
  2623. ctx->indent++;
  2624. } // emit_GLSL_LABEL
  2625. static void emit_GLSL_DCL(Context *ctx)
  2626. {
  2627. // no-op. We do this in our emit_attribute() and emit_uniform().
  2628. } // emit_GLSL_DCL
  2629. static void emit_GLSL_POW(Context *ctx)
  2630. {
  2631. char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
  2632. char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
  2633. char code[128];
  2634. make_GLSL_destarg_assign(ctx, code, sizeof (code),
  2635. "pow(abs(%s), %s)", src0, src1);
  2636. output_line(ctx, "%s", code);
  2637. } // emit_GLSL_POW
  2638. static void emit_GLSL_CRS(Context *ctx)
  2639. {
  2640. // !!! FIXME: needs to take ctx->dst_arg.writemask into account.
  2641. char src0[64]; make_GLSL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0));
  2642. char src1[64]; make_GLSL_srcarg_string_vec3(ctx, 1, src1, sizeof (src1));
  2643. char code[128];
  2644. make_GLSL_destarg_assign(ctx, code, sizeof (code),
  2645. "cross(%s, %s)", src0, src1);
  2646. output_line(ctx, "%s", code);
  2647. } // emit_GLSL_CRS
  2648. static void emit_GLSL_SGN(Context *ctx)
  2649. {
  2650. // (we don't need the temporary registers specified for the D3D opcode.)
  2651. char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
  2652. char code[128];
  2653. make_GLSL_destarg_assign(ctx, code, sizeof (code), "sign(%s)", src0);
  2654. output_line(ctx, "%s", code);
  2655. } // emit_GLSL_SGN
  2656. static void emit_GLSL_ABS(Context *ctx)
  2657. {
  2658. char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
  2659. char code[128];
  2660. make_GLSL_destarg_assign(ctx, code, sizeof (code), "abs(%s)", src0);
  2661. output_line(ctx, "%s", code);
  2662. } // emit_GLSL_ABS
  2663. static void emit_GLSL_NRM(Context *ctx)
  2664. {
  2665. char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
  2666. char code[128];
  2667. make_GLSL_destarg_assign(ctx, code, sizeof (code), "normalize(%s)", src0);
  2668. output_line(ctx, "%s", code);
  2669. } // emit_GLSL_NRM
  2670. static void emit_GLSL_SINCOS(Context *ctx)
  2671. {
  2672. // we don't care about the temp registers that <= sm2 demands; ignore them.
  2673. // sm2 also talks about what components are left untouched vs. undefined,
  2674. // but we just leave those all untouched with GLSL write masks (which
  2675. // would fulfill the "undefined" requirement, too).
  2676. const int mask = ctx->dest_arg.writemask;
  2677. char src0[64]; make_GLSL_srcarg_string_scalar(ctx, 0, src0, sizeof (src0));
  2678. char code[128] = { '\0' };
  2679. if (writemask_x(mask))
  2680. make_GLSL_destarg_assign(ctx, code, sizeof (code), "cos(%s)", src0);
  2681. else if (writemask_y(mask))
  2682. make_GLSL_destarg_assign(ctx, code, sizeof (code), "sin(%s)", src0);
  2683. else if (writemask_xy(mask))
  2684. {
  2685. make_GLSL_destarg_assign(ctx, code, sizeof (code),
  2686. "vec2(cos(%s), sin(%s))", src0, src0);
  2687. } // else if
  2688. output_line(ctx, "%s", code);
  2689. } // emit_GLSL_SINCOS
  2690. static void emit_GLSL_REP(Context *ctx)
  2691. {
  2692. // !!! FIXME:
  2693. // msdn docs say legal loop values are 0 to 255. We can check DEFI values
  2694. // at parse time, but if they are pulling a value from a uniform, do
  2695. // we clamp here?
  2696. // !!! FIXME: swizzle is legal here, right?
  2697. char src0[64]; make_GLSL_srcarg_string_x(ctx, 0, src0, sizeof (src0));
  2698. const uint rep = (uint) ctx->reps;
  2699. output_line(ctx, "for (int rep%u = 0; rep%u < %s; rep%u++) {",
  2700. rep, rep, src0, rep);
  2701. ctx->indent++;
  2702. } // emit_GLSL_REP
  2703. static void emit_GLSL_ENDREP(Context *ctx)
  2704. {
  2705. ctx->indent--;
  2706. output_line(ctx, "}");
  2707. } // emit_GLSL_ENDREP
  2708. static void emit_GLSL_IF(Context *ctx)
  2709. {
  2710. char src0[64]; make_GLSL_srcarg_string_scalar(ctx, 0, src0, sizeof (src0));
  2711. output_line(ctx, "if (%s) {", src0);
  2712. ctx->indent++;
  2713. } // emit_GLSL_IF
  2714. static void emit_GLSL_IFC(Context *ctx)
  2715. {
  2716. const char *comp = get_GLSL_comparison_string_scalar(ctx);
  2717. char src0[64]; make_GLSL_srcarg_string_scalar(ctx, 0, src0, sizeof (src0));
  2718. char src1[64]; make_GLSL_srcarg_string_scalar(ctx, 1, src1, sizeof (src1));
  2719. output_line(ctx, "if (%s %s %s) {", src0, comp, src1);
  2720. ctx->indent++;
  2721. } // emit_GLSL_IFC
  2722. static void emit_GLSL_ELSE(Context *ctx)
  2723. {
  2724. ctx->indent--;
  2725. output_line(ctx, "} else {");
  2726. ctx->indent++;
  2727. } // emit_GLSL_ELSE
  2728. static void emit_GLSL_ENDIF(Context *ctx)
  2729. {
  2730. ctx->indent--;
  2731. output_line(ctx, "}");
  2732. } // emit_GLSL_ENDIF
  2733. static void emit_GLSL_BREAK(Context *ctx)
  2734. {
  2735. output_line(ctx, "break;");
  2736. } // emit_GLSL_BREAK
  2737. static void emit_GLSL_BREAKC(Context *ctx)
  2738. {
  2739. const char *comp = get_GLSL_comparison_string_scalar(ctx);
  2740. char src0[64]; make_GLSL_srcarg_string_scalar(ctx, 0, src0, sizeof (src0));
  2741. char src1[64]; make_GLSL_srcarg_string_scalar(ctx, 1, src1, sizeof (src1));
  2742. output_line(ctx, "if (%s %s %s) { break; }", src0, comp, src1);
  2743. } // emit_GLSL_BREAKC
  2744. static void emit_GLSL_MOVA(Context *ctx)
  2745. {
  2746. const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask);
  2747. char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
  2748. char code[128];
  2749. if (vecsize == 1)
  2750. {
  2751. make_GLSL_destarg_assign(ctx, code, sizeof (code),
  2752. "int(floor(abs(%s) + 0.5) * sign(%s))",
  2753. src0, src0);
  2754. } // if
  2755. else
  2756. {
  2757. make_GLSL_destarg_assign(ctx, code, sizeof (code),
  2758. "ivec%d(floor(abs(%s) + vec%d(0.5)) * sign(%s))",
  2759. vecsize, src0, vecsize, src0);
  2760. } // else
  2761. output_line(ctx, "%s", code);
  2762. } // emit_GLSL_MOVA
  2763. static void emit_GLSL_DEFB(Context *ctx)
  2764. {
  2765. char varname[64]; get_GLSL_destarg_varname(ctx, varname, sizeof (varname));
  2766. push_output(ctx, &ctx->globals);
  2767. output_line(ctx, "const bool %s = %s;",
  2768. varname, ctx->dwords[0] ? "true" : "false");
  2769. pop_output(ctx);
  2770. } // emit_GLSL_DEFB
  2771. static void emit_GLSL_DEFI(Context *ctx)
  2772. {
  2773. char varname[64]; get_GLSL_destarg_varname(ctx, varname, sizeof (varname));
  2774. const int32 *x = (const int32 *) ctx->dwords;
  2775. push_output(ctx, &ctx->globals);
  2776. output_line(ctx, "const ivec4 %s = ivec4(%d, %d, %d, %d);",
  2777. varname, (int) x[0], (int) x[1], (int) x[2], (int) x[3]);
  2778. pop_output(ctx);
  2779. } // emit_GLSL_DEFI
  2780. EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXCRD)
  2781. static void emit_GLSL_TEXKILL(Context *ctx)
  2782. {
  2783. char dst[64]; get_GLSL_destarg_varname(ctx, dst, sizeof (dst));
  2784. output_line(ctx, "if (any(lessThan(%s.xyz, vec3(0.0)))) discard;", dst);
  2785. } // emit_GLSL_TEXKILL
  2786. static void glsl_texld(Context *ctx, const int texldd)
  2787. {
  2788. if (!shader_version_atleast(ctx, 1, 4))
  2789. {
  2790. DestArgInfo *info = &ctx->dest_arg;
  2791. char dst[64];
  2792. char sampler[64];
  2793. char code[128] = {0};
  2794. assert(!texldd);
  2795. RegisterList *sreg;
  2796. sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, info->regnum);
  2797. const TextureType ttype = (TextureType) (sreg ? sreg->index : 0);
  2798. // !!! FIXME: this code counts on the register not having swizzles, etc.
  2799. get_GLSL_destarg_varname(ctx, dst, sizeof (dst));
  2800. get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum,
  2801. sampler, sizeof (sampler));
  2802. if (ttype == TEXTURE_TYPE_2D)
  2803. {
  2804. make_GLSL_destarg_assign(ctx, code, sizeof (code),
  2805. "texture2D(%s, %s.xy)",
  2806. sampler, dst);
  2807. }
  2808. else if (ttype == TEXTURE_TYPE_CUBE)
  2809. {
  2810. make_GLSL_destarg_assign(ctx, code, sizeof (code),
  2811. "textureCube(%s, %s.xyz)",
  2812. sampler, dst);
  2813. }
  2814. else if (ttype == TEXTURE_TYPE_VOLUME)
  2815. {
  2816. make_GLSL_destarg_assign(ctx, code, sizeof (code),
  2817. "texture3D(%s, %s.xyz)",
  2818. sampler, dst);
  2819. }
  2820. else
  2821. {
  2822. fail(ctx, "unexpected texture type");
  2823. } // else
  2824. output_line(ctx, "%s", code);
  2825. } // if
  2826. else if (!shader_version_atleast(ctx, 2, 0))
  2827. {
  2828. // ps_1_4 is different, too!
  2829. fail(ctx, "TEXLD == Shader Model 1.4 unimplemented."); // !!! FIXME
  2830. return;
  2831. } // else if
  2832. else
  2833. {
  2834. const SourceArgInfo *samp_arg = &ctx->source_args[1];
  2835. RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER,
  2836. samp_arg->regnum);
  2837. const char *funcname = NULL;
  2838. char src0[64] = { '\0' };
  2839. char src1[64]; get_GLSL_srcarg_varname(ctx, 1, src1, sizeof (src1)); // !!! FIXME: SRC_MOD?
  2840. char src2[64] = { '\0' };
  2841. char src3[64] = { '\0' };
  2842. if (sreg == NULL)
  2843. {
  2844. fail(ctx, "TEXLD using undeclared sampler");
  2845. return;
  2846. } // if
  2847. if (texldd)
  2848. {
  2849. make_GLSL_srcarg_string_vec2(ctx, 2, src2, sizeof (src2));
  2850. make_GLSL_srcarg_string_vec2(ctx, 3, src3, sizeof (src3));
  2851. } // if
  2852. // !!! FIXME: can TEXLDD set instruction_controls?
  2853. // !!! FIXME: does the d3d bias value map directly to GLSL?
  2854. const char *biassep = "";
  2855. char bias[64] = { '\0' };
  2856. if (ctx->instruction_controls == CONTROL_TEXLDB)
  2857. {
  2858. biassep = ", ";
  2859. make_GLSL_srcarg_string_w(ctx, 0, bias, sizeof (bias));
  2860. } // if
  2861. switch ((const TextureType) sreg->index)
  2862. {
  2863. case TEXTURE_TYPE_2D:
  2864. if (ctx->instruction_controls == CONTROL_TEXLDP)
  2865. {
  2866. funcname = "texture2DProj";
  2867. make_GLSL_srcarg_string_full(ctx, 0, src0, sizeof (src0));
  2868. } // if
  2869. else // texld/texldb
  2870. {
  2871. funcname = "texture2D";
  2872. make_GLSL_srcarg_string_vec2(ctx, 0, src0, sizeof (src0));
  2873. } // else
  2874. break;
  2875. case TEXTURE_TYPE_CUBE:
  2876. if (ctx->instruction_controls == CONTROL_TEXLDP)
  2877. fail(ctx, "TEXLDP on a cubemap"); // !!! FIXME: is this legal?
  2878. funcname = "textureCube";
  2879. make_GLSL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0));
  2880. break;
  2881. case TEXTURE_TYPE_VOLUME:
  2882. if (ctx->instruction_controls == CONTROL_TEXLDP)
  2883. {
  2884. funcname = "texture3DProj";
  2885. make_GLSL_srcarg_string_full(ctx, 0, src0, sizeof (src0));
  2886. } // if
  2887. else // texld/texldb
  2888. {
  2889. funcname = "texture3D";
  2890. make_GLSL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0));
  2891. } // else
  2892. break;
  2893. default:
  2894. fail(ctx, "unknown texture type");
  2895. return;
  2896. } // switch
  2897. assert(!isscalar(ctx, ctx->shader_type, samp_arg->regtype, samp_arg->regnum));
  2898. char swiz_str[6] = { '\0' };
  2899. make_GLSL_swizzle_string(swiz_str, sizeof (swiz_str),
  2900. samp_arg->swizzle, ctx->dest_arg.writemask);
  2901. char code[128];
  2902. if (texldd)
  2903. {
  2904. make_GLSL_destarg_assign(ctx, code, sizeof (code),
  2905. "%sGrad(%s, %s, %s, %s)%s", funcname,
  2906. src1, src0, src2, src3, swiz_str);
  2907. } // if
  2908. else
  2909. {
  2910. make_GLSL_destarg_assign(ctx, code, sizeof (code),
  2911. "%s(%s, %s%s%s)%s", funcname,
  2912. src1, src0, biassep, bias, swiz_str);
  2913. } // else
  2914. output_line(ctx, "%s", code);
  2915. } // else
  2916. } // glsl_texld
  2917. static void emit_GLSL_TEXLD(Context *ctx)
  2918. {
  2919. glsl_texld(ctx, 0);
  2920. } // emit_GLSL_TEXLD
  2921. static void emit_GLSL_TEXBEM(Context *ctx)
  2922. {
  2923. DestArgInfo *info = &ctx->dest_arg;
  2924. char dst[64]; get_GLSL_destarg_varname(ctx, dst, sizeof (dst));
  2925. char src[64]; get_GLSL_srcarg_varname(ctx, 0, src, sizeof (src));
  2926. char sampler[64];
  2927. char code[512];
  2928. // !!! FIXME: this code counts on the register not having swizzles, etc.
  2929. get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum,
  2930. sampler, sizeof (sampler));
  2931. make_GLSL_destarg_assign(ctx, code, sizeof (code),
  2932. "texture2D(%s, vec2(%s.x + (%s_texbem.x * %s.x) + (%s_texbem.z * %s.y),"
  2933. " %s.y + (%s_texbem.y * %s.x) + (%s_texbem.w * %s.y)))",
  2934. sampler,
  2935. dst, sampler, src, sampler, src,
  2936. dst, sampler, src, sampler, src);
  2937. output_line(ctx, "%s", code);
  2938. } // emit_GLSL_TEXBEM
  2939. static void emit_GLSL_TEXBEML(Context *ctx)
  2940. {
  2941. // !!! FIXME: this code counts on the register not having swizzles, etc.
  2942. DestArgInfo *info = &ctx->dest_arg;
  2943. char dst[64]; get_GLSL_destarg_varname(ctx, dst, sizeof (dst));
  2944. char src[64]; get_GLSL_srcarg_varname(ctx, 0, src, sizeof (src));
  2945. char sampler[64];
  2946. char code[512];
  2947. get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum,
  2948. sampler, sizeof (sampler));
  2949. make_GLSL_destarg_assign(ctx, code, sizeof (code),
  2950. "(texture2D(%s, vec2(%s.x + (%s_texbem.x * %s.x) + (%s_texbem.z * %s.y),"
  2951. " %s.y + (%s_texbem.y * %s.x) + (%s_texbem.w * %s.y)))) *"
  2952. " ((%s.z * %s_texbeml.x) + %s_texbem.y)",
  2953. sampler,
  2954. dst, sampler, src, sampler, src,
  2955. dst, sampler, src, sampler, src,
  2956. src, sampler, sampler);
  2957. output_line(ctx, "%s", code);
  2958. } // emit_GLSL_TEXBEML
  2959. EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2AR) // !!! FIXME
  2960. EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2GB) // !!! FIXME
  2961. static void emit_GLSL_TEXM3X2PAD(Context *ctx)
  2962. {
  2963. // no-op ... work happens in emit_GLSL_TEXM3X2TEX().
  2964. } // emit_GLSL_TEXM3X2PAD
  2965. static void emit_GLSL_TEXM3X2TEX(Context *ctx)
  2966. {
  2967. if (ctx->texm3x2pad_src0 == -1)
  2968. return;
  2969. DestArgInfo *info = &ctx->dest_arg;
  2970. char dst[64];
  2971. char src0[64];
  2972. char src1[64];
  2973. char src2[64];
  2974. char sampler[64];
  2975. char code[512];
  2976. // !!! FIXME: this code counts on the register not having swizzles, etc.
  2977. get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum,
  2978. sampler, sizeof (sampler));
  2979. get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x2pad_src0,
  2980. src0, sizeof (src0));
  2981. get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x2pad_dst0,
  2982. src1, sizeof (src1));
  2983. get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum,
  2984. src2, sizeof (src2));
  2985. get_GLSL_destarg_varname(ctx, dst, sizeof (dst));
  2986. make_GLSL_destarg_assign(ctx, code, sizeof (code),
  2987. "texture2D(%s, vec2(dot(%s.xyz, %s.xyz), dot(%s.xyz, %s.xyz)))",
  2988. sampler, src0, src1, src2, dst);
  2989. output_line(ctx, "%s", code);
  2990. } // emit_GLSL_TEXM3X2TEX
  2991. static void emit_GLSL_TEXM3X3PAD(Context *ctx)
  2992. {
  2993. // no-op ... work happens in emit_GLSL_TEXM3X3*().
  2994. } // emit_GLSL_TEXM3X3PAD
  2995. static void emit_GLSL_TEXM3X3TEX(Context *ctx)
  2996. {
  2997. if (ctx->texm3x3pad_src1 == -1)
  2998. return;
  2999. DestArgInfo *info = &ctx->dest_arg;
  3000. char dst[64];
  3001. char src0[64];
  3002. char src1[64];
  3003. char src2[64];
  3004. char src3[64];
  3005. char src4[64];
  3006. char sampler[64];
  3007. char code[512];
  3008. // !!! FIXME: this code counts on the register not having swizzles, etc.
  3009. get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum,
  3010. sampler, sizeof (sampler));
  3011. get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0,
  3012. src0, sizeof (src0));
  3013. get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0,
  3014. src1, sizeof (src1));
  3015. get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1,
  3016. src2, sizeof (src2));
  3017. get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1,
  3018. src3, sizeof (src3));
  3019. get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum,
  3020. src4, sizeof (src4));
  3021. get_GLSL_destarg_varname(ctx, dst, sizeof (dst));
  3022. RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER,
  3023. info->regnum);
  3024. const TextureType ttype = (TextureType) (sreg ? sreg->index : 0);
  3025. const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "Cube" : "3D";
  3026. make_GLSL_destarg_assign(ctx, code, sizeof (code),
  3027. "texture%s(%s,"
  3028. " vec3(dot(%s.xyz, %s.xyz),"
  3029. " dot(%s.xyz, %s.xyz),"
  3030. " dot(%s.xyz, %s.xyz)))",
  3031. ttypestr, sampler, src0, src1, src2, src3, dst, src4);
  3032. output_line(ctx, "%s", code);
  3033. } // emit_GLSL_TEXM3X3TEX
  3034. static void emit_GLSL_TEXM3X3SPEC_helper(Context *ctx)
  3035. {
  3036. if (ctx->glsl_generated_texm3x3spec_helper)
  3037. return;
  3038. ctx->glsl_generated_texm3x3spec_helper = 1;
  3039. push_output(ctx, &ctx->helpers);
  3040. output_line(ctx, "vec3 TEXM3X3SPEC_reflection(const vec3 normal, const vec3 eyeray)");
  3041. output_line(ctx, "{"); ctx->indent++;
  3042. output_line(ctx, "return (2.0 * ((normal * eyeray) / (normal * normal)) * normal) - eyeray;"); ctx->indent--;
  3043. output_line(ctx, "}");
  3044. output_blank_line(ctx);
  3045. pop_output(ctx);
  3046. } // emit_GLSL_TEXM3X3SPEC_helper
  3047. static void emit_GLSL_TEXM3X3SPEC(Context *ctx)
  3048. {
  3049. if (ctx->texm3x3pad_src1 == -1)
  3050. return;
  3051. DestArgInfo *info = &ctx->dest_arg;
  3052. char dst[64];
  3053. char src0[64];
  3054. char src1[64];
  3055. char src2[64];
  3056. char src3[64];
  3057. char src4[64];
  3058. char src5[64];
  3059. char sampler[64];
  3060. char code[512];
  3061. emit_GLSL_TEXM3X3SPEC_helper(ctx);
  3062. // !!! FIXME: this code counts on the register not having swizzles, etc.
  3063. get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum,
  3064. sampler, sizeof (sampler));
  3065. get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0,
  3066. src0, sizeof (src0));
  3067. get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0,
  3068. src1, sizeof (src1));
  3069. get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1,
  3070. src2, sizeof (src2));
  3071. get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1,
  3072. src3, sizeof (src3));
  3073. get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum,
  3074. src4, sizeof (src4));
  3075. get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[1].regnum,
  3076. src5, sizeof (src5));
  3077. get_GLSL_destarg_varname(ctx, dst, sizeof (dst));
  3078. RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER,
  3079. info->regnum);
  3080. const TextureType ttype = (TextureType) (sreg ? sreg->index : 0);
  3081. const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "Cube" : "3D";
  3082. make_GLSL_destarg_assign(ctx, code, sizeof (code),
  3083. "texture%s(%s, "
  3084. "TEXM3X3SPEC_reflection("
  3085. "vec3("
  3086. "dot(%s.xyz, %s.xyz), "
  3087. "dot(%s.xyz, %s.xyz), "
  3088. "dot(%s.xyz, %s.xyz)"
  3089. "),"
  3090. "%s.xyz,"
  3091. ")"
  3092. ")",
  3093. ttypestr, sampler, src0, src1, src2, src3, dst, src4, src5);
  3094. output_line(ctx, "%s", code);
  3095. } // emit_GLSL_TEXM3X3SPEC
  3096. static void emit_GLSL_TEXM3X3VSPEC(Context *ctx)
  3097. {
  3098. if (ctx->texm3x3pad_src1 == -1)
  3099. return;
  3100. DestArgInfo *info = &ctx->dest_arg;
  3101. char dst[64];
  3102. char src0[64];
  3103. char src1[64];
  3104. char src2[64];
  3105. char src3[64];
  3106. char src4[64];
  3107. char sampler[64];
  3108. char code[512];
  3109. emit_GLSL_TEXM3X3SPEC_helper(ctx);
  3110. // !!! FIXME: this code counts on the register not having swizzles, etc.
  3111. get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum,
  3112. sampler, sizeof (sampler));
  3113. get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0,
  3114. src0, sizeof (src0));
  3115. get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0,
  3116. src1, sizeof (src1));
  3117. get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1,
  3118. src2, sizeof (src2));
  3119. get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1,
  3120. src3, sizeof (src3));
  3121. get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum,
  3122. src4, sizeof (src4));
  3123. get_GLSL_destarg_varname(ctx, dst, sizeof (dst));
  3124. RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER,
  3125. info->regnum);
  3126. const TextureType ttype = (TextureType) (sreg ? sreg->index : 0);
  3127. const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "Cube" : "3D";
  3128. make_GLSL_destarg_assign(ctx, code, sizeof (code),
  3129. "texture%s(%s, "
  3130. "TEXM3X3SPEC_reflection("
  3131. "vec3("
  3132. "dot(%s.xyz, %s.xyz), "
  3133. "dot(%s.xyz, %s.xyz), "
  3134. "dot(%s.xyz, %s.xyz)"
  3135. "), "
  3136. "vec3(%s.w, %s.w, %s.w)"
  3137. ")"
  3138. ")",
  3139. ttypestr, sampler, src0, src1, src2, src3, dst, src4, src0, src2, dst);
  3140. output_line(ctx, "%s", code);
  3141. } // emit_GLSL_TEXM3X3VSPEC
  3142. static void emit_GLSL_EXPP(Context *ctx)
  3143. {
  3144. // !!! FIXME: msdn's asm docs don't list this opcode, I'll have to check the driver documentation.
  3145. emit_GLSL_EXP(ctx); // I guess this is just partial precision EXP?
  3146. } // emit_GLSL_EXPP
  3147. static void emit_GLSL_LOGP(Context *ctx)
  3148. {
  3149. // LOGP is just low-precision LOG, but we'll take the higher precision.
  3150. emit_GLSL_LOG(ctx);
  3151. } // emit_GLSL_LOGP
  3152. // common code between CMP and CND.
  3153. static void emit_GLSL_comparison_operations(Context *ctx, const char *cmp)
  3154. {
  3155. int i, j;
  3156. DestArgInfo *dst = &ctx->dest_arg;
  3157. const SourceArgInfo *srcarg0 = &ctx->source_args[0];
  3158. const int origmask = dst->writemask;
  3159. int used_swiz[4] = { 0, 0, 0, 0 };
  3160. const int writemask[4] = { dst->writemask0, dst->writemask1,
  3161. dst->writemask2, dst->writemask3 };
  3162. const int src0swiz[4] = { srcarg0->swizzle_x, srcarg0->swizzle_y,
  3163. srcarg0->swizzle_z, srcarg0->swizzle_w };
  3164. for (i = 0; i < 4; i++)
  3165. {
  3166. int mask = (1 << i);
  3167. if (!writemask[i]) continue;
  3168. if (used_swiz[i]) continue;
  3169. // This is a swizzle we haven't checked yet.
  3170. used_swiz[i] = 1;
  3171. // see if there are any other elements swizzled to match (.yyyy)
  3172. for (j = i + 1; j < 4; j++)
  3173. {
  3174. if (!writemask[j]) continue;
  3175. if (src0swiz[i] != src0swiz[j]) continue;
  3176. mask |= (1 << j);
  3177. used_swiz[j] = 1;
  3178. } // for
  3179. // okay, (mask) should be the writemask of swizzles we like.
  3180. //return make_GLSL_srcarg_string(ctx, idx, (1 << 0));
  3181. char src0[64];
  3182. char src1[64];
  3183. char src2[64];
  3184. make_GLSL_srcarg_string(ctx, 0, (1 << i), src0, sizeof (src0));
  3185. make_GLSL_srcarg_string(ctx, 1, mask, src1, sizeof (src1));
  3186. make_GLSL_srcarg_string(ctx, 2, mask, src2, sizeof (src2));
  3187. set_dstarg_writemask(dst, mask);
  3188. char code[128];
  3189. make_GLSL_destarg_assign(ctx, code, sizeof (code),
  3190. "((%s %s) ? %s : %s)",
  3191. src0, cmp, src1, src2);
  3192. output_line(ctx, "%s", code);
  3193. } // for
  3194. set_dstarg_writemask(dst, origmask);
  3195. } // emit_GLSL_comparison_operations
  3196. static void emit_GLSL_CND(Context *ctx)
  3197. {
  3198. emit_GLSL_comparison_operations(ctx, "> 0.5");
  3199. } // emit_GLSL_CND
  3200. static void emit_GLSL_DEF(Context *ctx)
  3201. {
  3202. const float *val = (const float *) ctx->dwords; // !!! FIXME: could be int?
  3203. char varname[64]; get_GLSL_destarg_varname(ctx, varname, sizeof (varname));
  3204. char val0[32]; floatstr(ctx, val0, sizeof (val0), val[0], 1);
  3205. char val1[32]; floatstr(ctx, val1, sizeof (val1), val[1], 1);
  3206. char val2[32]; floatstr(ctx, val2, sizeof (val2), val[2], 1);
  3207. char val3[32]; floatstr(ctx, val3, sizeof (val3), val[3], 1);
  3208. push_output(ctx, &ctx->globals);
  3209. output_line(ctx, "const vec4 %s = vec4(%s, %s, %s, %s);",
  3210. varname, val0, val1, val2, val3);
  3211. pop_output(ctx);
  3212. } // emit_GLSL_DEF
  3213. EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2RGB) // !!! FIXME
  3214. EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXDP3TEX) // !!! FIXME
  3215. EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXM3X2DEPTH) // !!! FIXME
  3216. EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXDP3) // !!! FIXME
  3217. static void emit_GLSL_TEXM3X3(Context *ctx)
  3218. {
  3219. if (ctx->texm3x3pad_src1 == -1)
  3220. return;
  3221. char dst[64];
  3222. char src0[64];
  3223. char src1[64];
  3224. char src2[64];
  3225. char src3[64];
  3226. char src4[64];
  3227. char code[512];
  3228. // !!! FIXME: this code counts on the register not having swizzles, etc.
  3229. get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0,
  3230. src0, sizeof (src0));
  3231. get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0,
  3232. src1, sizeof (src1));
  3233. get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1,
  3234. src2, sizeof (src2));
  3235. get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1,
  3236. src3, sizeof (src3));
  3237. get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum,
  3238. src4, sizeof (src4));
  3239. get_GLSL_destarg_varname(ctx, dst, sizeof (dst));
  3240. make_GLSL_destarg_assign(ctx, code, sizeof (code),
  3241. "vec4(dot(%s.xyz, %s.xyz), dot(%s.xyz, %s.xyz), dot(%s.xyz, %s.xyz), 1.0)",
  3242. src0, src1, src2, src3, dst, src4);
  3243. output_line(ctx, "%s", code);
  3244. } // emit_GLSL_TEXM3X3
  3245. EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXDEPTH) // !!! FIXME
  3246. static void emit_GLSL_CMP(Context *ctx)
  3247. {
  3248. emit_GLSL_comparison_operations(ctx, ">= 0.0");
  3249. } // emit_GLSL_CMP
  3250. EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(BEM) // !!! FIXME
  3251. static void emit_GLSL_DP2ADD(Context *ctx)
  3252. {
  3253. char src0[64]; make_GLSL_srcarg_string_vec2(ctx, 0, src0, sizeof (src0));
  3254. char src1[64]; make_GLSL_srcarg_string_vec2(ctx, 1, src1, sizeof (src1));
  3255. char src2[64]; make_GLSL_srcarg_string_scalar(ctx, 2, src2, sizeof (src2));
  3256. char extra[64]; snprintf(extra, sizeof (extra), " + %s", src2);
  3257. emit_GLSL_dotprod(ctx, src0, src1, extra);
  3258. } // emit_GLSL_DP2ADD
  3259. static void emit_GLSL_DSX(Context *ctx)
  3260. {
  3261. char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
  3262. char code[128];
  3263. make_GLSL_destarg_assign(ctx, code, sizeof (code), "dFdx(%s)", src0);
  3264. output_line(ctx, "%s", code);
  3265. } // emit_GLSL_DSX
  3266. static void emit_GLSL_DSY(Context *ctx)
  3267. {
  3268. char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
  3269. char code[128];
  3270. make_GLSL_destarg_assign(ctx, code, sizeof (code), "dFdy(%s)", src0);
  3271. output_line(ctx, "%s", code);
  3272. } // emit_GLSL_DSY
  3273. static void emit_GLSL_TEXLDD(Context *ctx)
  3274. {
  3275. // !!! FIXME:
  3276. // GLSL 1.30 introduced textureGrad() for this, but it looks like the
  3277. // functions are overloaded instead of texture2DGrad() (etc).
  3278. // GL_shader_texture_lod and GL_EXT_gpu_shader4 added texture2DGrad*(),
  3279. // so we'll use them if available. Failing that, we'll just fallback
  3280. // to a regular texture2D call and hope the mipmap it chooses is close
  3281. // enough.
  3282. if (!ctx->glsl_generated_texldd_setup)
  3283. {
  3284. ctx->glsl_generated_texldd_setup = 1;
  3285. push_output(ctx, &ctx->preflight);
  3286. output_line(ctx, "#if GL_ARB_shader_texture_lod");
  3287. output_line(ctx, "#extension GL_ARB_shader_texture_lod : enable");
  3288. output_line(ctx, "#define texture2DGrad texture2DGradARB");
  3289. output_line(ctx, "#define texture2DProjGrad texture2DProjARB");
  3290. output_line(ctx, "#elif GL_EXT_gpu_shader4");
  3291. output_line(ctx, "#extension GL_EXT_gpu_shader4 : enable");
  3292. output_line(ctx, "#else");
  3293. output_line(ctx, "#define texture2DGrad(a,b,c,d) texture2D(a,b)");
  3294. output_line(ctx, "#define texture2DProjGrad(a,b,c,d) texture2DProj(a,b)");
  3295. output_line(ctx, "#endif");
  3296. output_blank_line(ctx);
  3297. pop_output(ctx);
  3298. } // if
  3299. glsl_texld(ctx, 1);
  3300. } // emit_GLSL_TEXLDD
  3301. static void emit_GLSL_SETP(Context *ctx)
  3302. {
  3303. const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask);
  3304. char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
  3305. char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
  3306. char code[128];
  3307. // destination is always predicate register (which is type bvec4).
  3308. if (vecsize == 1)
  3309. {
  3310. const char *comp = get_GLSL_comparison_string_scalar(ctx);
  3311. make_GLSL_destarg_assign(ctx, code, sizeof (code),
  3312. "(%s %s %s)", src0, comp, src1);
  3313. } // if
  3314. else
  3315. {
  3316. const char *comp = get_GLSL_comparison_string_vector(ctx);
  3317. make_GLSL_destarg_assign(ctx, code, sizeof (code),
  3318. "%s(%s, %s)", comp, src0, src1);
  3319. } // else
  3320. output_line(ctx, "%s", code);
  3321. } // emit_GLSL_SETP
  3322. static void emit_GLSL_TEXLDL(Context *ctx)
  3323. {
  3324. // !!! FIXME: The spec says we can't use GLSL's texture*Lod() built-ins
  3325. // !!! FIXME: from fragment shaders for some inexplicable reason.
  3326. // !!! FIXME: For now, you'll just have to suffer with the potentially
  3327. // !!! FIXME: wrong mipmap until I can figure something out.
  3328. emit_GLSL_TEXLD(ctx);
  3329. } // emit_GLSL_TEXLDL
  3330. static void emit_GLSL_BREAKP(Context *ctx)
  3331. {
  3332. char src0[64]; make_GLSL_srcarg_string_scalar(ctx, 0, src0, sizeof (src0));
  3333. output_line(ctx, "if (%s) { break; }", src0);
  3334. } // emit_GLSL_BREAKP
  3335. static void emit_GLSL_RESERVED(Context *ctx)
  3336. {
  3337. // do nothing; fails in the state machine.
  3338. } // emit_GLSL_RESERVED
  3339. #endif // SUPPORT_PROFILE_GLSL
  3340. #if !SUPPORT_PROFILE_ARB1
  3341. #define PROFILE_EMITTER_ARB1(op)
  3342. #else
  3343. #undef AT_LEAST_ONE_PROFILE
  3344. #define AT_LEAST_ONE_PROFILE 1
  3345. #define PROFILE_EMITTER_ARB1(op) emit_ARB1_##op,
  3346. static inline const char *get_ARB1_register_string(Context *ctx,
  3347. const RegisterType regtype, const int regnum,
  3348. char *regnum_str, const size_t regnum_size)
  3349. {
  3350. // turns out these are identical at the moment.
  3351. return get_D3D_register_string(ctx,regtype,regnum,regnum_str,regnum_size);
  3352. } // get_ARB1_register_string
  3353. static const char *allocate_ARB1_scratch_reg_name(Context *ctx, char *buf,
  3354. const size_t buflen)
  3355. {
  3356. const int scratch = allocate_scratch_register(ctx);
  3357. snprintf(buf, buflen, "scratch%d", scratch);
  3358. return buf;
  3359. } // allocate_ARB1_scratch_reg_name
  3360. static inline const char *get_ARB1_branch_label_name(Context *ctx, const int id,
  3361. char *buf, const size_t buflen)
  3362. {
  3363. snprintf(buf, buflen, "branch_label%d", id);
  3364. return buf;
  3365. } // get_ARB1_branch_label_name
  3366. static const char *get_ARB1_varname_in_buf(Context *ctx, const RegisterType rt,
  3367. const int regnum, char *buf,
  3368. const size_t buflen)
  3369. {
  3370. // turns out these are identical at the moment.
  3371. return get_D3D_varname_in_buf(ctx, rt, regnum, buf, buflen);
  3372. } // get_ARB1_varname_in_buf
  3373. static const char *get_ARB1_varname(Context *ctx, const RegisterType rt,
  3374. const int regnum)
  3375. {
  3376. // turns out these are identical at the moment.
  3377. return get_D3D_varname(ctx, rt, regnum);
  3378. } // get_ARB1_varname
  3379. static inline const char *get_ARB1_const_array_varname_in_buf(Context *ctx,
  3380. const int base, const int size,
  3381. char *buf, const size_t buflen)
  3382. {
  3383. snprintf(buf, buflen, "c_array_%d_%d", base, size);
  3384. return buf;
  3385. } // get_ARB1_const_array_varname_in_buf
  3386. static const char *get_ARB1_const_array_varname(Context *ctx, int base, int size)
  3387. {
  3388. char buf[64];
  3389. get_ARB1_const_array_varname_in_buf(ctx, base, size, buf, sizeof (buf));
  3390. return StrDup(ctx, buf);
  3391. } // get_ARB1_const_array_varname
  3392. static const char *make_ARB1_srcarg_string_in_buf(Context *ctx,
  3393. const SourceArgInfo *arg,
  3394. char *buf, size_t buflen)
  3395. {
  3396. // !!! FIXME: this can hit pathological cases where we look like this...
  3397. //
  3398. // dp3 r1.xyz, t0_bx2, t0_bx2
  3399. // mad r1.xyz, t0_bias, 1-r1, t0_bx2
  3400. //
  3401. // ...which do a lot of duplicate work in arb1...
  3402. //
  3403. // SUB scratch0, t0, { 0.5, 0.5, 0.5, 0.5 };
  3404. // MUL scratch0, scratch0, { 2.0, 2.0, 2.0, 2.0 };
  3405. // SUB scratch1, t0, { 0.5, 0.5, 0.5, 0.5 };
  3406. // MUL scratch1, scratch1, { 2.0, 2.0, 2.0, 2.0 };
  3407. // DP3 r1.xyz, scratch0, scratch1;
  3408. // SUB scratch0, t0, { 0.5, 0.5, 0.5, 0.5 };
  3409. // SUB scratch1, { 1.0, 1.0, 1.0, 1.0 }, r1;
  3410. // SUB scratch2, t0, { 0.5, 0.5, 0.5, 0.5 };
  3411. // MUL scratch2, scratch2, { 2.0, 2.0, 2.0, 2.0 };
  3412. // MAD r1.xyz, scratch0, scratch1, scratch2;
  3413. //
  3414. // ...notice that the dp3 calculates the same value into two scratch
  3415. // registers. This case is easier to handle; just see if multiple
  3416. // source args are identical, build it up once, and use the same
  3417. // scratch register for multiple arguments in that opcode.
  3418. // Even better still, only calculate things once across instructions,
  3419. // and be smart about letting it linger in a scratch register until we
  3420. // definitely don't need the calculation anymore. That's harder to
  3421. // write, though.
  3422. char regnum_str[16] = { '\0' };
  3423. // !!! FIXME: use get_ARB1_varname_in_buf() instead?
  3424. const char *regtype_str = NULL;
  3425. if (!arg->relative)
  3426. {
  3427. regtype_str = get_ARB1_register_string(ctx, arg->regtype,
  3428. arg->regnum, regnum_str,
  3429. sizeof (regnum_str));
  3430. } // if
  3431. const char *rel_lbracket = "";
  3432. char rel_offset[32] = { '\0' };
  3433. const char *rel_rbracket = "";
  3434. char rel_swizzle[4] = { '\0' };
  3435. const char *rel_regtype_str = "";
  3436. if (arg->relative)
  3437. {
  3438. rel_regtype_str = get_ARB1_varname_in_buf(ctx, arg->relative_regtype,
  3439. arg->relative_regnum,
  3440. (char *) alloca(64), 64);
  3441. rel_swizzle[0] = '.';
  3442. rel_swizzle[1] = swizzle_channels[arg->relative_component];
  3443. rel_swizzle[2] = '\0';
  3444. if (!support_nv2(ctx))
  3445. {
  3446. // The address register in ARB1 only allows the '.x' component, so
  3447. // we need to load the component we need from a temp vector
  3448. // register into .x as needed.
  3449. assert(arg->relative_regtype == REG_TYPE_ADDRESS);
  3450. assert(arg->relative_regnum == 0);
  3451. if (ctx->last_address_reg_component != arg->relative_component)
  3452. {
  3453. output_line(ctx, "ARL %s.x, addr%d.%c;", rel_regtype_str,
  3454. arg->relative_regnum,
  3455. swizzle_channels[arg->relative_component]);
  3456. ctx->last_address_reg_component = arg->relative_component;
  3457. } // if
  3458. rel_swizzle[1] = 'x';
  3459. } // if
  3460. if (arg->regtype == REG_TYPE_INPUT)
  3461. regtype_str = "vertex.attrib";
  3462. else
  3463. {
  3464. assert(arg->regtype == REG_TYPE_CONST);
  3465. const int arrayidx = arg->relative_array->index;
  3466. const int arraysize = arg->relative_array->count;
  3467. const int offset = arg->regnum - arrayidx;
  3468. assert(offset >= 0);
  3469. regtype_str = get_ARB1_const_array_varname_in_buf(ctx, arrayidx,
  3470. arraysize, (char *) alloca(64), 64);
  3471. if (offset != 0)
  3472. snprintf(rel_offset, sizeof (rel_offset), " + %d", offset);
  3473. } // else
  3474. rel_lbracket = "[";
  3475. rel_rbracket = "]";
  3476. } // if
  3477. // This is the source register with everything but swizzle and source mods.
  3478. snprintf(buf, buflen, "%s%s%s%s%s%s%s", regtype_str, regnum_str,
  3479. rel_lbracket, rel_regtype_str, rel_swizzle, rel_offset,
  3480. rel_rbracket);
  3481. // Some of the source mods need to generate instructions to a temp
  3482. // register, in which case we'll replace the register name.
  3483. const SourceMod mod = arg->src_mod;
  3484. const int inplace = ( (mod == SRCMOD_NONE) || (mod == SRCMOD_NEGATE) ||
  3485. ((mod == SRCMOD_ABS) && support_nv2(ctx)) );
  3486. if (!inplace)
  3487. {
  3488. const size_t len = 64;
  3489. char *stackbuf = (char *) alloca(len);
  3490. regtype_str = allocate_ARB1_scratch_reg_name(ctx, stackbuf, len);
  3491. regnum_str[0] = '\0'; // move value to scratch register.
  3492. rel_lbracket = ""; // scratch register won't use array.
  3493. rel_rbracket = "";
  3494. rel_offset[0] = '\0';
  3495. rel_swizzle[0] = '\0';
  3496. rel_regtype_str = "";
  3497. } // if
  3498. const char *premod_str = "";
  3499. const char *postmod_str = "";
  3500. switch (mod)
  3501. {
  3502. case SRCMOD_NEGATE:
  3503. premod_str = "-";
  3504. break;
  3505. case SRCMOD_BIASNEGATE:
  3506. premod_str = "-";
  3507. // fall through.
  3508. case SRCMOD_BIAS:
  3509. output_line(ctx, "SUB %s, %s, { 0.5, 0.5, 0.5, 0.5 };",
  3510. regtype_str, buf);
  3511. break;
  3512. case SRCMOD_SIGNNEGATE:
  3513. premod_str = "-";
  3514. // fall through.
  3515. case SRCMOD_SIGN:
  3516. output_line(ctx,
  3517. "MAD %s, %s, { 2.0, 2.0, 2.0, 2.0 }, { -1.0, -1.0, -1.0, -1.0 };",
  3518. regtype_str, buf);
  3519. break;
  3520. case SRCMOD_COMPLEMENT:
  3521. output_line(ctx, "SUB %s, { 1.0, 1.0, 1.0, 1.0 }, %s;",
  3522. regtype_str, buf);
  3523. break;
  3524. case SRCMOD_X2NEGATE:
  3525. premod_str = "-";
  3526. // fall through.
  3527. case SRCMOD_X2:
  3528. output_line(ctx, "MUL %s, %s, { 2.0, 2.0, 2.0, 2.0 };",
  3529. regtype_str, buf);
  3530. break;
  3531. case SRCMOD_DZ:
  3532. fail(ctx, "SRCMOD_DZ currently unsupported in arb1");
  3533. postmod_str = "_dz";
  3534. break;
  3535. case SRCMOD_DW:
  3536. fail(ctx, "SRCMOD_DW currently unsupported in arb1");
  3537. postmod_str = "_dw";
  3538. break;
  3539. case SRCMOD_ABSNEGATE:
  3540. premod_str = "-";
  3541. // fall through.
  3542. case SRCMOD_ABS:
  3543. if (!support_nv2(ctx)) // GL_NV_vertex_program2_option adds this.
  3544. output_line(ctx, "ABS %s, %s;", regtype_str, buf);
  3545. else
  3546. {
  3547. premod_str = (mod == SRCMOD_ABSNEGATE) ? "-|" : "|";
  3548. postmod_str = "|";
  3549. } // else
  3550. break;
  3551. case SRCMOD_NOT:
  3552. fail(ctx, "SRCMOD_NOT currently unsupported in arb1");
  3553. premod_str = "!";
  3554. break;
  3555. case SRCMOD_NONE:
  3556. case SRCMOD_TOTAL:
  3557. break; // stop compiler whining.
  3558. } // switch
  3559. char swizzle_str[6];
  3560. size_t i = 0;
  3561. if (support_nv4(ctx)) // vFace must be output as "vFace.x" in nv4.
  3562. {
  3563. if (arg->regtype == REG_TYPE_MISCTYPE)
  3564. {
  3565. if ( ((const MiscTypeType) arg->regnum) == MISCTYPE_TYPE_FACE )
  3566. {
  3567. swizzle_str[i++] = '.';
  3568. swizzle_str[i++] = 'x';
  3569. } // if
  3570. } // if
  3571. } // if
  3572. const int scalar = isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum);
  3573. if (!scalar && !no_swizzle(arg->swizzle))
  3574. {
  3575. swizzle_str[i++] = '.';
  3576. // .xxxx is the same as .x, but .xx is illegal...scalar or full!
  3577. if (replicate_swizzle(arg->swizzle))
  3578. swizzle_str[i++] = swizzle_channels[arg->swizzle_x];
  3579. else
  3580. {
  3581. swizzle_str[i++] = swizzle_channels[arg->swizzle_x];
  3582. swizzle_str[i++] = swizzle_channels[arg->swizzle_y];
  3583. swizzle_str[i++] = swizzle_channels[arg->swizzle_z];
  3584. swizzle_str[i++] = swizzle_channels[arg->swizzle_w];
  3585. } // else
  3586. } // if
  3587. swizzle_str[i] = '\0';
  3588. assert(i < sizeof (swizzle_str));
  3589. snprintf(buf, buflen, "%s%s%s%s%s%s%s%s%s%s", premod_str,
  3590. regtype_str, regnum_str, rel_lbracket,
  3591. rel_regtype_str, rel_swizzle, rel_offset, rel_rbracket,
  3592. swizzle_str, postmod_str);
  3593. // !!! FIXME: make sure the scratch buffer was large enough.
  3594. return buf;
  3595. } // make_ARB1_srcarg_string_in_buf
  3596. static const char *get_ARB1_destarg_varname(Context *ctx, char *buf,
  3597. const size_t buflen)
  3598. {
  3599. const DestArgInfo *arg = &ctx->dest_arg;
  3600. return get_ARB1_varname_in_buf(ctx, arg->regtype, arg->regnum, buf, buflen);
  3601. } // get_ARB1_destarg_varname
  3602. static const char *get_ARB1_srcarg_varname(Context *ctx, const size_t idx,
  3603. char *buf, const size_t buflen)
  3604. {
  3605. if (idx >= STATICARRAYLEN(ctx->source_args))
  3606. {
  3607. fail(ctx, "Too many source args");
  3608. *buf = '\0';
  3609. return buf;
  3610. } // if
  3611. const SourceArgInfo *arg = &ctx->source_args[idx];
  3612. return get_ARB1_varname_in_buf(ctx, arg->regtype, arg->regnum, buf, buflen);
  3613. } // get_ARB1_srcarg_varname
  3614. static const char *make_ARB1_destarg_string(Context *ctx, char *buf,
  3615. const size_t buflen)
  3616. {
  3617. const DestArgInfo *arg = &ctx->dest_arg;
  3618. *buf = '\0';
  3619. const char *sat_str = "";
  3620. if (arg->result_mod & MOD_SATURATE)
  3621. {
  3622. // nv4 can use ".SAT" in all program types.
  3623. // For less than nv4, the "_SAT" modifier is only available in
  3624. // fragment shaders. Every thing else will fake it later in
  3625. // emit_ARB1_dest_modifiers() ...
  3626. if (support_nv4(ctx))
  3627. sat_str = ".SAT";
  3628. else if (shader_is_pixel(ctx))
  3629. sat_str = "_SAT";
  3630. } // if
  3631. const char *pp_str = "";
  3632. if (arg->result_mod & MOD_PP)
  3633. {
  3634. // Most ARB1 profiles can't do partial precision (MOD_PP), but that's
  3635. // okay. The spec says lots of Direct3D implementations ignore the
  3636. // flag anyhow.
  3637. if (support_nv4(ctx))
  3638. pp_str = "H";
  3639. } // if
  3640. // CENTROID only allowed in DCL opcodes, which shouldn't come through here.
  3641. assert((arg->result_mod & MOD_CENTROID) == 0);
  3642. char regnum_str[16];
  3643. const char *regtype_str = get_ARB1_register_string(ctx, arg->regtype,
  3644. arg->regnum, regnum_str,
  3645. sizeof (regnum_str));
  3646. if (regtype_str == NULL)
  3647. {
  3648. fail(ctx, "Unknown destination register type.");
  3649. return buf;
  3650. } // if
  3651. char writemask_str[6];
  3652. size_t i = 0;
  3653. const int scalar = isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum);
  3654. if (!scalar && !writemask_xyzw(arg->writemask))
  3655. {
  3656. writemask_str[i++] = '.';
  3657. if (arg->writemask0) writemask_str[i++] = 'x';
  3658. if (arg->writemask1) writemask_str[i++] = 'y';
  3659. if (arg->writemask2) writemask_str[i++] = 'z';
  3660. if (arg->writemask3) writemask_str[i++] = 'w';
  3661. } // if
  3662. writemask_str[i] = '\0';
  3663. assert(i < sizeof (writemask_str));
  3664. const char *pred_left = "";
  3665. const char *pred_right = "";
  3666. char pred[32] = { '\0' };
  3667. if (ctx->predicated)
  3668. {
  3669. fail(ctx, "dest register predication currently unsupported in arb1");
  3670. return buf;
  3671. pred_left = "(";
  3672. pred_right = ") ";
  3673. make_ARB1_srcarg_string_in_buf(ctx, &ctx->predicate_arg,
  3674. pred, sizeof (pred));
  3675. } // if
  3676. snprintf(buf, buflen, "%s%s %s%s%s", pp_str, sat_str,
  3677. regtype_str, regnum_str, writemask_str);
  3678. // !!! FIXME: make sure the scratch buffer was large enough.
  3679. return buf;
  3680. } // make_ARB1_destarg_string
  3681. static void emit_ARB1_dest_modifiers(Context *ctx)
  3682. {
  3683. const DestArgInfo *arg = &ctx->dest_arg;
  3684. if (arg->result_shift != 0x0)
  3685. {
  3686. char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
  3687. const char *multiplier = NULL;
  3688. switch (arg->result_shift)
  3689. {
  3690. case 0x1: multiplier = "2.0"; break;
  3691. case 0x2: multiplier = "4.0"; break;
  3692. case 0x3: multiplier = "8.0"; break;
  3693. case 0xD: multiplier = "0.125"; break;
  3694. case 0xE: multiplier = "0.25"; break;
  3695. case 0xF: multiplier = "0.5"; break;
  3696. } // switch
  3697. if (multiplier != NULL)
  3698. {
  3699. char var[64]; get_ARB1_destarg_varname(ctx, var, sizeof (var));
  3700. output_line(ctx, "MUL%s, %s, %s;", dst, var, multiplier);
  3701. } // if
  3702. } // if
  3703. if (arg->result_mod & MOD_SATURATE)
  3704. {
  3705. // nv4 and/or pixel shaders just used the "SAT" modifier, instead.
  3706. if ( (!support_nv4(ctx)) && (!shader_is_pixel(ctx)) )
  3707. {
  3708. char var[64]; get_ARB1_destarg_varname(ctx, var, sizeof (var));
  3709. char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
  3710. output_line(ctx, "MIN%s, %s, 1.0;", dst, var);
  3711. output_line(ctx, "MAX%s, %s, 0.0;", dst, var);
  3712. } // if
  3713. } // if
  3714. } // emit_ARB1_dest_modifiers
  3715. static const char *make_ARB1_srcarg_string(Context *ctx, const size_t idx,
  3716. char *buf, const size_t buflen)
  3717. {
  3718. if (idx >= STATICARRAYLEN(ctx->source_args))
  3719. {
  3720. fail(ctx, "Too many source args");
  3721. *buf = '\0';
  3722. return buf;
  3723. } // if
  3724. const SourceArgInfo *arg = &ctx->source_args[idx];
  3725. return make_ARB1_srcarg_string_in_buf(ctx, arg, buf, buflen);
  3726. } // make_ARB1_srcarg_string
  3727. static void emit_ARB1_opcode_ds(Context *ctx, const char *opcode)
  3728. {
  3729. char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
  3730. char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
  3731. output_line(ctx, "%s%s, %s;", opcode, dst, src0);
  3732. emit_ARB1_dest_modifiers(ctx);
  3733. } // emit_ARB1_opcode_ds
  3734. static void emit_ARB1_opcode_dss(Context *ctx, const char *opcode)
  3735. {
  3736. char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
  3737. char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
  3738. char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1));
  3739. output_line(ctx, "%s%s, %s, %s;", opcode, dst, src0, src1);
  3740. emit_ARB1_dest_modifiers(ctx);
  3741. } // emit_ARB1_opcode_dss
  3742. static void emit_ARB1_opcode_dsss(Context *ctx, const char *opcode)
  3743. {
  3744. char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
  3745. char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
  3746. char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1));
  3747. char src2[64]; make_ARB1_srcarg_string(ctx, 2, src2, sizeof (src2));
  3748. output_line(ctx, "%s%s, %s, %s, %s;", opcode, dst, src0, src1, src2);
  3749. emit_ARB1_dest_modifiers(ctx);
  3750. } // emit_ARB1_opcode_dsss
  3751. #define EMIT_ARB1_OPCODE_FUNC(op) \
  3752. static void emit_ARB1_##op(Context *ctx) { \
  3753. emit_ARB1_opcode(ctx, #op); \
  3754. }
  3755. #define EMIT_ARB1_OPCODE_D_FUNC(op) \
  3756. static void emit_ARB1_##op(Context *ctx) { \
  3757. emit_ARB1_opcode_d(ctx, #op); \
  3758. }
  3759. #define EMIT_ARB1_OPCODE_S_FUNC(op) \
  3760. static void emit_ARB1_##op(Context *ctx) { \
  3761. emit_ARB1_opcode_s(ctx, #op); \
  3762. }
  3763. #define EMIT_ARB1_OPCODE_SS_FUNC(op) \
  3764. static void emit_ARB1_##op(Context *ctx) { \
  3765. emit_ARB1_opcode_ss(ctx, #op); \
  3766. }
  3767. #define EMIT_ARB1_OPCODE_DS_FUNC(op) \
  3768. static void emit_ARB1_##op(Context *ctx) { \
  3769. emit_ARB1_opcode_ds(ctx, #op); \
  3770. }
  3771. #define EMIT_ARB1_OPCODE_DSS_FUNC(op) \
  3772. static void emit_ARB1_##op(Context *ctx) { \
  3773. emit_ARB1_opcode_dss(ctx, #op); \
  3774. }
  3775. #define EMIT_ARB1_OPCODE_DSSS_FUNC(op) \
  3776. static void emit_ARB1_##op(Context *ctx) { \
  3777. emit_ARB1_opcode_dsss(ctx, #op); \
  3778. }
  3779. #define EMIT_ARB1_OPCODE_DSSSS_FUNC(op) \
  3780. static void emit_ARB1_##op(Context *ctx) { \
  3781. emit_ARB1_opcode_dssss(ctx, #op); \
  3782. }
  3783. #define EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(op) \
  3784. static void emit_ARB1_##op(Context *ctx) { \
  3785. failf(ctx, #op " unimplemented in %s profile", ctx->profile->name); \
  3786. }
  3787. static void emit_ARB1_start(Context *ctx, const char *profilestr)
  3788. {
  3789. const char *shader_str = NULL;
  3790. const char *shader_full_str = NULL;
  3791. if (shader_is_vertex(ctx))
  3792. {
  3793. shader_str = "vp";
  3794. shader_full_str = "vertex";
  3795. } // if
  3796. else if (shader_is_pixel(ctx))
  3797. {
  3798. shader_str = "fp";
  3799. shader_full_str = "fragment";
  3800. } // else if
  3801. else
  3802. {
  3803. failf(ctx, "Shader type %u unsupported in this profile.",
  3804. (uint) ctx->shader_type);
  3805. return;
  3806. } // if
  3807. set_output(ctx, &ctx->preflight);
  3808. if (strcmp(profilestr, MOJOSHADER_PROFILE_ARB1) == 0)
  3809. output_line(ctx, "!!ARB%s1.0", shader_str);
  3810. #if SUPPORT_PROFILE_ARB1_NV
  3811. else if (strcmp(profilestr, MOJOSHADER_PROFILE_NV2) == 0)
  3812. {
  3813. ctx->profile_supports_nv2 = 1;
  3814. output_line(ctx, "!!ARB%s1.0", shader_str);
  3815. output_line(ctx, "OPTION NV_%s_program2;", shader_full_str);
  3816. } // else if
  3817. else if (strcmp(profilestr, MOJOSHADER_PROFILE_NV3) == 0)
  3818. {
  3819. // there's no NV_fragment_program3, so just use 2.
  3820. const int ver = shader_is_pixel(ctx) ? 2 : 3;
  3821. ctx->profile_supports_nv2 = 1;
  3822. ctx->profile_supports_nv3 = 1;
  3823. output_line(ctx, "!!ARB%s1.0", shader_str);
  3824. output_line(ctx, "OPTION NV_%s_program%d;", shader_full_str, ver);
  3825. } // else if
  3826. else if (strcmp(profilestr, MOJOSHADER_PROFILE_NV4) == 0)
  3827. {
  3828. ctx->profile_supports_nv2 = 1;
  3829. ctx->profile_supports_nv3 = 1;
  3830. ctx->profile_supports_nv4 = 1;
  3831. output_line(ctx, "!!NV%s4.0", shader_str);
  3832. } // else if
  3833. #endif
  3834. else
  3835. {
  3836. failf(ctx, "Profile '%s' unsupported or unknown.", profilestr);
  3837. } // else
  3838. set_output(ctx, &ctx->mainline);
  3839. } // emit_ARB1_start
  3840. static void emit_ARB1_end(Context *ctx)
  3841. {
  3842. // ps_1_* writes color to r0 instead oC0. We move it to the right place.
  3843. // We don't have to worry about a RET opcode messing this up, since
  3844. // RET isn't available before ps_2_0.
  3845. if (shader_is_pixel(ctx) && !shader_version_atleast(ctx, 2, 0))
  3846. {
  3847. set_used_register(ctx, REG_TYPE_COLOROUT, 0, 1);
  3848. output_line(ctx, "MOV oC0, r0;");
  3849. } // if
  3850. output_line(ctx, "END");
  3851. } // emit_ARB1_end
  3852. static void emit_ARB1_phase(Context *ctx)
  3853. {
  3854. // no-op in arb1.
  3855. } // emit_ARB1_phase
  3856. static inline const char *arb1_float_temp(const Context *ctx)
  3857. {
  3858. // nv4 lets you specify data type.
  3859. return (support_nv4(ctx)) ? "FLOAT TEMP" : "TEMP";
  3860. } // arb1_float_temp
  3861. static void emit_ARB1_finalize(Context *ctx)
  3862. {
  3863. push_output(ctx, &ctx->preflight);
  3864. if (shader_is_vertex(ctx) && !ctx->arb1_wrote_position)
  3865. output_line(ctx, "OPTION ARB_position_invariant;");
  3866. if (shader_is_pixel(ctx) && ctx->have_multi_color_outputs)
  3867. output_line(ctx, "OPTION ARB_draw_buffers;");
  3868. pop_output(ctx);
  3869. const char *tmpstr = arb1_float_temp(ctx);
  3870. int i;
  3871. push_output(ctx, &ctx->globals);
  3872. for (i = 0; i < ctx->max_scratch_registers; i++)
  3873. {
  3874. char buf[64];
  3875. allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf));
  3876. output_line(ctx, "%s %s;", tmpstr, buf);
  3877. } // for
  3878. // nv2 fragment programs (and anything nv4) have a real REP/ENDREP.
  3879. if ( (support_nv2(ctx)) && (!shader_is_pixel(ctx)) && (!support_nv4(ctx)) )
  3880. {
  3881. // set up temps for nv2 REP/ENDREP emulation through branching.
  3882. for (i = 0; i < ctx->max_reps; i++)
  3883. output_line(ctx, "TEMP rep%d;", i);
  3884. } // if
  3885. pop_output(ctx);
  3886. assert(ctx->scratch_registers == ctx->max_scratch_registers);
  3887. } // emit_ARB1_finalize
  3888. static void emit_ARB1_global(Context *ctx, RegisterType regtype, int regnum)
  3889. {
  3890. // !!! FIXME: dependency on ARB1 profile. // !!! FIXME about FIXME: huh?
  3891. char varname[64];
  3892. get_ARB1_varname_in_buf(ctx, regtype, regnum, varname, sizeof (varname));
  3893. push_output(ctx, &ctx->globals);
  3894. switch (regtype)
  3895. {
  3896. case REG_TYPE_ADDRESS:
  3897. if (shader_is_pixel(ctx)) // actually REG_TYPE_TEXTURE.
  3898. {
  3899. // We have to map texture registers to temps for ps_1_1, since
  3900. // they work like temps, initialize with tex coords, and the
  3901. // ps_1_1 TEX opcode expects to overwrite it.
  3902. if (!shader_version_atleast(ctx, 1, 4))
  3903. {
  3904. output_line(ctx, "%s %s;", arb1_float_temp(ctx), varname);
  3905. push_output(ctx, &ctx->mainline_intro);
  3906. output_line(ctx, "MOV %s, fragment.texcoord[%d];",
  3907. varname, regnum);
  3908. pop_output(ctx);
  3909. } // if
  3910. break;
  3911. } // if
  3912. // nv4 replaced address registers with generic int registers.
  3913. if (support_nv4(ctx))
  3914. output_line(ctx, "INT TEMP %s;", varname);
  3915. else
  3916. {
  3917. // nv2 has four-component address already, but stock arb1 has
  3918. // to emulate it in a temporary, and move components to the
  3919. // scalar ADDRESS register on demand.
  3920. output_line(ctx, "ADDRESS %s;", varname);
  3921. if (!support_nv2(ctx))
  3922. output_line(ctx, "TEMP addr%d;", regnum);
  3923. } // else
  3924. break;
  3925. //case REG_TYPE_PREDICATE:
  3926. // output_line(ctx, "bvec4 %s;", varname);
  3927. // break;
  3928. case REG_TYPE_TEMP:
  3929. output_line(ctx, "%s %s;", arb1_float_temp(ctx), varname);
  3930. break;
  3931. //case REG_TYPE_LOOP:
  3932. // break; // no-op. We declare these in for loops at the moment.
  3933. //case REG_TYPE_LABEL:
  3934. // break; // no-op. If we see it here, it means we optimized it out.
  3935. default:
  3936. fail(ctx, "BUG: we used a register we don't know how to define.");
  3937. break;
  3938. } // switch
  3939. pop_output(ctx);
  3940. } // emit_ARB1_global
  3941. static void emit_ARB1_array(Context *ctx, VariableList *var)
  3942. {
  3943. // All uniforms are now packed tightly into the program.local array,
  3944. // instead of trying to map them to the d3d registers. So this needs to
  3945. // map to the next piece of the array we haven't used yet. Thankfully,
  3946. // arb1 lets you make a PARAM array that maps to a subset of another
  3947. // array; we don't need to do offsets, since myarray[0] can map to
  3948. // program.local[5] without any extra math from us.
  3949. const int base = var->index;
  3950. const int size = var->count;
  3951. const int arb1base = ctx->uniform_float4_count +
  3952. ctx->uniform_int4_count +
  3953. ctx->uniform_bool_count;
  3954. char varname[64];
  3955. get_ARB1_const_array_varname_in_buf(ctx, base, size, varname, sizeof (varname));
  3956. push_output(ctx, &ctx->globals);
  3957. output_line(ctx, "PARAM %s[%d] = { program.local[%d..%d] };", varname,
  3958. size, arb1base, (arb1base + size) - 1);
  3959. pop_output(ctx);
  3960. var->emit_position = arb1base;
  3961. } // emit_ARB1_array
  3962. static void emit_ARB1_const_array(Context *ctx, const ConstantsList *clist,
  3963. int base, int size)
  3964. {
  3965. char varname[64];
  3966. get_ARB1_const_array_varname_in_buf(ctx, base, size, varname, sizeof (varname));
  3967. int i;
  3968. push_output(ctx, &ctx->globals);
  3969. output_line(ctx, "PARAM %s[%d] = {", varname, size);
  3970. ctx->indent++;
  3971. for (i = 0; i < size; i++)
  3972. {
  3973. while (clist->constant.type != MOJOSHADER_UNIFORM_FLOAT)
  3974. clist = clist->next;
  3975. assert(clist->constant.index == (base + i));
  3976. char val0[32];
  3977. char val1[32];
  3978. char val2[32];
  3979. char val3[32];
  3980. floatstr(ctx, val0, sizeof (val0), clist->constant.value.f[0], 1);
  3981. floatstr(ctx, val1, sizeof (val1), clist->constant.value.f[1], 1);
  3982. floatstr(ctx, val2, sizeof (val2), clist->constant.value.f[2], 1);
  3983. floatstr(ctx, val3, sizeof (val3), clist->constant.value.f[3], 1);
  3984. output_line(ctx, "{ %s, %s, %s, %s }%s", val0, val1, val2, val3,
  3985. (i < (size-1)) ? "," : "");
  3986. clist = clist->next;
  3987. } // for
  3988. ctx->indent--;
  3989. output_line(ctx, "};");
  3990. pop_output(ctx);
  3991. } // emit_ARB1_const_array
  3992. static void emit_ARB1_uniform(Context *ctx, RegisterType regtype, int regnum,
  3993. const VariableList *var)
  3994. {
  3995. // We pack these down into the program.local array, so if we only use
  3996. // register c439, it'll actually map to program.local[0]. This will
  3997. // prevent overflows when we actually have enough resources to run.
  3998. const char *arrayname = "program.local";
  3999. int index = 0;
  4000. char varname[64];
  4001. get_ARB1_varname_in_buf(ctx, regtype, regnum, varname, sizeof (varname));
  4002. push_output(ctx, &ctx->globals);
  4003. if (var == NULL)
  4004. {
  4005. // all types share one array (rather, all types convert to float4).
  4006. index = ctx->uniform_float4_count + ctx->uniform_int4_count +
  4007. ctx->uniform_bool_count;
  4008. } // if
  4009. else
  4010. {
  4011. const int arraybase = var->index;
  4012. if (var->constant)
  4013. {
  4014. const int arraysize = var->count;
  4015. arrayname = get_ARB1_const_array_varname_in_buf(ctx, arraybase,
  4016. arraysize, (char *) alloca(64), 64);
  4017. index = (regnum - arraybase);
  4018. } // if
  4019. else
  4020. {
  4021. assert(var->emit_position != -1);
  4022. index = (regnum - arraybase) + var->emit_position;
  4023. } // else
  4024. } // else
  4025. output_line(ctx, "PARAM %s = %s[%d];", varname, arrayname, index);
  4026. pop_output(ctx);
  4027. } // emit_ARB1_uniform
  4028. static void emit_ARB1_sampler(Context *ctx,int stage,TextureType ttype,int tb)
  4029. {
  4030. // this is mostly a no-op...you don't predeclare samplers in arb1.
  4031. if (tb) // This sampler used a ps_1_1 TEXBEM opcode?
  4032. {
  4033. const int index = ctx->uniform_float4_count + ctx->uniform_int4_count +
  4034. ctx->uniform_bool_count;
  4035. char var[64];
  4036. get_ARB1_varname_in_buf(ctx, REG_TYPE_SAMPLER, stage, var, sizeof(var));
  4037. push_output(ctx, &ctx->globals);
  4038. output_line(ctx, "PARAM %s_texbem = program.local[%d];", var, index);
  4039. output_line(ctx, "PARAM %s_texbeml = program.local[%d];", var, index+1);
  4040. pop_output(ctx);
  4041. ctx->uniform_float4_count += 2;
  4042. } // if
  4043. } // emit_ARB1_sampler
  4044. // !!! FIXME: a lot of cut-and-paste here from emit_GLSL_attribute().
  4045. static void emit_ARB1_attribute(Context *ctx, RegisterType regtype, int regnum,
  4046. MOJOSHADER_usage usage, int index, int wmask,
  4047. int flags)
  4048. {
  4049. // !!! FIXME: this function doesn't deal with write masks at all yet!
  4050. const char *usage_str = NULL;
  4051. const char *arrayleft = "";
  4052. const char *arrayright = "";
  4053. char index_str[16] = { '\0' };
  4054. char varname[64];
  4055. get_ARB1_varname_in_buf(ctx, regtype, regnum, varname, sizeof (varname));
  4056. //assert((flags & MOD_PP) == 0); // !!! FIXME: is PP allowed?
  4057. if (index != 0) // !!! FIXME: a lot of these MUST be zero.
  4058. snprintf(index_str, sizeof (index_str), "%u", (uint) index);
  4059. if (shader_is_vertex(ctx))
  4060. {
  4061. // pre-vs3 output registers.
  4062. // these don't ever happen in DCL opcodes, I think. Map to vs_3_*
  4063. // output registers.
  4064. if (!shader_version_atleast(ctx, 3, 0))
  4065. {
  4066. if (regtype == REG_TYPE_RASTOUT)
  4067. {
  4068. regtype = REG_TYPE_OUTPUT;
  4069. index = regnum;
  4070. switch ((const RastOutType) regnum)
  4071. {
  4072. case RASTOUT_TYPE_POSITION:
  4073. usage = MOJOSHADER_USAGE_POSITION;
  4074. break;
  4075. case RASTOUT_TYPE_FOG:
  4076. usage = MOJOSHADER_USAGE_FOG;
  4077. break;
  4078. case RASTOUT_TYPE_POINT_SIZE:
  4079. usage = MOJOSHADER_USAGE_POINTSIZE;
  4080. break;
  4081. } // switch
  4082. } // if
  4083. else if (regtype == REG_TYPE_ATTROUT)
  4084. {
  4085. regtype = REG_TYPE_OUTPUT;
  4086. usage = MOJOSHADER_USAGE_COLOR;
  4087. index = regnum;
  4088. } // else if
  4089. else if (regtype == REG_TYPE_TEXCRDOUT)
  4090. {
  4091. regtype = REG_TYPE_OUTPUT;
  4092. usage = MOJOSHADER_USAGE_TEXCOORD;
  4093. index = regnum;
  4094. } // else if
  4095. } // if
  4096. // to avoid limitations of various GL entry points for input
  4097. // attributes (glSecondaryColorPointer() can only take 3 component
  4098. // items, glVertexPointer() can't do GL_UNSIGNED_BYTE, many other
  4099. // issues), we set up all inputs as generic vertex attributes, so we
  4100. // can pass data in just about any form, and ignore the built-in GLSL
  4101. // attributes like gl_SecondaryColor. Output needs to use the the
  4102. // built-ins, though, but we don't have to worry about the GL entry
  4103. // point limitations there.
  4104. if (regtype == REG_TYPE_INPUT)
  4105. {
  4106. const int attr = ctx->assigned_vertex_attributes++;
  4107. push_output(ctx, &ctx->globals);
  4108. output_line(ctx, "ATTRIB %s = vertex.attrib[%d];", varname, attr);
  4109. pop_output(ctx);
  4110. } // if
  4111. else if (regtype == REG_TYPE_OUTPUT)
  4112. {
  4113. switch (usage)
  4114. {
  4115. case MOJOSHADER_USAGE_POSITION:
  4116. ctx->arb1_wrote_position = 1;
  4117. usage_str = "result.position";
  4118. break;
  4119. case MOJOSHADER_USAGE_POINTSIZE:
  4120. usage_str = "result.pointsize";
  4121. break;
  4122. case MOJOSHADER_USAGE_COLOR:
  4123. index_str[0] = '\0'; // no explicit number.
  4124. if (index == 0)
  4125. usage_str = "result.color.primary";
  4126. else if (index == 1)
  4127. usage_str = "result.color.secondary";
  4128. break;
  4129. case MOJOSHADER_USAGE_FOG:
  4130. usage_str = "result.fogcoord";
  4131. break;
  4132. case MOJOSHADER_USAGE_TEXCOORD:
  4133. snprintf(index_str, sizeof (index_str), "%u", (uint) index);
  4134. usage_str = "result.texcoord";
  4135. arrayleft = "[";
  4136. arrayright = "]";
  4137. break;
  4138. default:
  4139. // !!! FIXME: we need to deal with some more built-in varyings here.
  4140. break;
  4141. } // switch
  4142. // !!! FIXME: the #define is a little hacky, but it means we don't
  4143. // !!! FIXME: have to track these separately if this works.
  4144. push_output(ctx, &ctx->globals);
  4145. // no mapping to built-in var? Just make it a regular global, pray.
  4146. if (usage_str == NULL)
  4147. output_line(ctx, "%s %s;", arb1_float_temp(ctx), varname);
  4148. else
  4149. {
  4150. output_line(ctx, "OUTPUT %s = %s%s%s%s;", varname, usage_str,
  4151. arrayleft, index_str, arrayright);
  4152. } // else
  4153. pop_output(ctx);
  4154. } // else if
  4155. else
  4156. {
  4157. fail(ctx, "unknown vertex shader attribute register");
  4158. } // else
  4159. } // if
  4160. else if (shader_is_pixel(ctx))
  4161. {
  4162. const char *paramtype_str = "ATTRIB";
  4163. // samplers DCLs get handled in emit_ARB1_sampler().
  4164. if (flags & MOD_CENTROID)
  4165. {
  4166. if (!support_nv4(ctx)) // GL_NV_fragment_program4 adds centroid.
  4167. {
  4168. // !!! FIXME: should we just wing it without centroid here?
  4169. failf(ctx, "centroid unsupported in %s profile",
  4170. ctx->profile->name);
  4171. return;
  4172. } // if
  4173. paramtype_str = "CENTROID ATTRIB";
  4174. } // if
  4175. if (regtype == REG_TYPE_COLOROUT)
  4176. {
  4177. paramtype_str = "OUTPUT";
  4178. usage_str = "result.color";
  4179. if (ctx->have_multi_color_outputs)
  4180. {
  4181. // We have to gamble that you have GL_ARB_draw_buffers.
  4182. // You probably do at this point if you have a sane setup.
  4183. snprintf(index_str, sizeof (index_str), "%u", (uint) regnum);
  4184. arrayleft = "[";
  4185. arrayright = "]";
  4186. } // if
  4187. } // if
  4188. else if (regtype == REG_TYPE_DEPTHOUT)
  4189. {
  4190. paramtype_str = "OUTPUT";
  4191. usage_str = "result.depth";
  4192. } // else if
  4193. // !!! FIXME: can you actualy have a texture register with COLOR usage?
  4194. else if ((regtype == REG_TYPE_TEXTURE) || (regtype == REG_TYPE_INPUT))
  4195. {
  4196. if (usage == MOJOSHADER_USAGE_TEXCOORD)
  4197. {
  4198. // ps_1_1 does a different hack for this attribute.
  4199. // Refer to emit_ARB1_global()'s REG_TYPE_TEXTURE code.
  4200. if (shader_version_atleast(ctx, 1, 4))
  4201. {
  4202. snprintf(index_str, sizeof (index_str), "%u", (uint) index);
  4203. usage_str = "fragment.texcoord";
  4204. arrayleft = "[";
  4205. arrayright = "]";
  4206. } // if
  4207. } // if
  4208. else if (usage == MOJOSHADER_USAGE_COLOR)
  4209. {
  4210. index_str[0] = '\0'; // no explicit number.
  4211. if (index == 0)
  4212. usage_str = "fragment.color.primary";
  4213. else if (index == 1)
  4214. usage_str = "fragment.color.secondary";
  4215. else
  4216. fail(ctx, "unsupported color index");
  4217. } // else if
  4218. } // else if
  4219. else if (regtype == REG_TYPE_MISCTYPE)
  4220. {
  4221. const MiscTypeType mt = (MiscTypeType) regnum;
  4222. if (mt == MISCTYPE_TYPE_FACE)
  4223. {
  4224. if (support_nv4(ctx)) // FINALLY, a vFace equivalent in nv4!
  4225. {
  4226. index_str[0] = '\0'; // no explicit number.
  4227. usage_str = "fragment.facing";
  4228. } // if
  4229. else
  4230. {
  4231. failf(ctx, "vFace unsupported in %s profile",
  4232. ctx->profile->name);
  4233. } // else
  4234. } // if
  4235. else if (mt == MISCTYPE_TYPE_POSITION)
  4236. {
  4237. index_str[0] = '\0'; // no explicit number.
  4238. usage_str = "fragment.position"; // !!! FIXME: is this the same coord space as D3D?
  4239. } // else if
  4240. else
  4241. {
  4242. fail(ctx, "BUG: unhandled misc register");
  4243. } // else
  4244. } // else if
  4245. else
  4246. {
  4247. fail(ctx, "unknown pixel shader attribute register");
  4248. } // else
  4249. if (usage_str != NULL)
  4250. {
  4251. push_output(ctx, &ctx->globals);
  4252. output_line(ctx, "%s %s = %s%s%s%s;", paramtype_str, varname,
  4253. usage_str, arrayleft, index_str, arrayright);
  4254. pop_output(ctx);
  4255. } // if
  4256. } // else if
  4257. else
  4258. {
  4259. fail(ctx, "Unknown shader type"); // state machine should catch this.
  4260. } // else
  4261. } // emit_ARB1_attribute
  4262. static void emit_ARB1_RESERVED(Context *ctx) { /* no-op. */ }
  4263. static void emit_ARB1_NOP(Context *ctx)
  4264. {
  4265. // There is no NOP in arb1. Just don't output anything here.
  4266. } // emit_ARB1_NOP
  4267. EMIT_ARB1_OPCODE_DS_FUNC(MOV)
  4268. EMIT_ARB1_OPCODE_DSS_FUNC(ADD)
  4269. EMIT_ARB1_OPCODE_DSS_FUNC(SUB)
  4270. EMIT_ARB1_OPCODE_DSSS_FUNC(MAD)
  4271. EMIT_ARB1_OPCODE_DSS_FUNC(MUL)
  4272. EMIT_ARB1_OPCODE_DS_FUNC(RCP)
  4273. static void emit_ARB1_RSQ(Context *ctx)
  4274. {
  4275. // nv4 doesn't force abs() on this, so negative values will generate NaN.
  4276. // The spec says you should force the abs() yourself.
  4277. if (!support_nv4(ctx))
  4278. {
  4279. emit_ARB1_opcode_ds(ctx, "RSQ"); // pre-nv4 implies ABS.
  4280. return;
  4281. } // if
  4282. // we can optimize this to use nv2's |abs| construct in some cases.
  4283. if ( (ctx->source_args[0].src_mod == SRCMOD_NONE) ||
  4284. (ctx->source_args[0].src_mod == SRCMOD_NEGATE) ||
  4285. (ctx->source_args[0].src_mod == SRCMOD_ABSNEGATE) )
  4286. ctx->source_args[0].src_mod = SRCMOD_ABS;
  4287. char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
  4288. char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
  4289. if (ctx->source_args[0].src_mod == SRCMOD_ABS)
  4290. output_line(ctx, "RSQ%s, %s;", dst, src0);
  4291. else
  4292. {
  4293. char buf[64]; allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf));
  4294. output_line(ctx, "ABS %s, %s;", buf, src0);
  4295. output_line(ctx, "RSQ%s, %s.x;", dst, buf);
  4296. } // else
  4297. emit_ARB1_dest_modifiers(ctx);
  4298. } // emit_ARB1_RSQ
  4299. EMIT_ARB1_OPCODE_DSS_FUNC(DP3)
  4300. EMIT_ARB1_OPCODE_DSS_FUNC(DP4)
  4301. EMIT_ARB1_OPCODE_DSS_FUNC(MIN)
  4302. EMIT_ARB1_OPCODE_DSS_FUNC(MAX)
  4303. EMIT_ARB1_OPCODE_DSS_FUNC(SLT)
  4304. EMIT_ARB1_OPCODE_DSS_FUNC(SGE)
  4305. static void emit_ARB1_EXP(Context *ctx) { emit_ARB1_opcode_ds(ctx, "EX2"); }
  4306. static void arb1_log(Context *ctx, const char *opcode)
  4307. {
  4308. // !!! FIXME: SRCMOD_NEGATE can be made into SRCMOD_ABS here, too
  4309. // we can optimize this to use nv2's |abs| construct in some cases.
  4310. if ( (ctx->source_args[0].src_mod == SRCMOD_NONE) ||
  4311. (ctx->source_args[0].src_mod == SRCMOD_ABSNEGATE) )
  4312. ctx->source_args[0].src_mod = SRCMOD_ABS;
  4313. char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
  4314. char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
  4315. if (ctx->source_args[0].src_mod == SRCMOD_ABS)
  4316. output_line(ctx, "%s%s, %s;", opcode, dst, src0);
  4317. else
  4318. {
  4319. char buf[64]; allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf));
  4320. output_line(ctx, "ABS %s, %s;", buf, src0);
  4321. output_line(ctx, "%s%s, %s.x;", opcode, dst, buf);
  4322. } // else
  4323. emit_ARB1_dest_modifiers(ctx);
  4324. } // arb1_log
  4325. static void emit_ARB1_LOG(Context *ctx)
  4326. {
  4327. arb1_log(ctx, "LG2");
  4328. } // emit_ARB1_LOG
  4329. EMIT_ARB1_OPCODE_DS_FUNC(LIT)
  4330. EMIT_ARB1_OPCODE_DSS_FUNC(DST)
  4331. static void emit_ARB1_LRP(Context *ctx)
  4332. {
  4333. if (shader_is_pixel(ctx)) // fragment shaders have a matching LRP opcode.
  4334. emit_ARB1_opcode_dsss(ctx, "LRP");
  4335. else
  4336. {
  4337. char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
  4338. char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
  4339. char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1));
  4340. char src2[64]; make_ARB1_srcarg_string(ctx, 2, src2, sizeof (src2));
  4341. char buf[64]; allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf));
  4342. // LRP is: dest = src2 + src0 * (src1 - src2)
  4343. output_line(ctx, "SUB %s, %s, %s;", buf, src1, src2);
  4344. output_line(ctx, "MAD%s, %s, %s, %s;", dst, buf, src0, src2);
  4345. emit_ARB1_dest_modifiers(ctx);
  4346. } // else
  4347. } // emit_ARB1_LRP
  4348. EMIT_ARB1_OPCODE_DS_FUNC(FRC)
  4349. static void arb1_MxXy(Context *ctx, const int x, const int y)
  4350. {
  4351. DestArgInfo *dstarg = &ctx->dest_arg;
  4352. const int origmask = dstarg->writemask;
  4353. char src0[64];
  4354. int i;
  4355. make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
  4356. for (i = 0; i < y; i++)
  4357. {
  4358. char dst[64];
  4359. char row[64];
  4360. make_ARB1_srcarg_string(ctx, i + 1, row, sizeof (row));
  4361. set_dstarg_writemask(dstarg, 1 << i);
  4362. make_ARB1_destarg_string(ctx, dst, sizeof (dst));
  4363. output_line(ctx, "DP%d%s, %s, %s;", x, dst, src0, row);
  4364. } // for
  4365. set_dstarg_writemask(dstarg, origmask);
  4366. emit_ARB1_dest_modifiers(ctx);
  4367. } // arb1_MxXy
  4368. static void emit_ARB1_M4X4(Context *ctx) { arb1_MxXy(ctx, 4, 4); }
  4369. static void emit_ARB1_M4X3(Context *ctx) { arb1_MxXy(ctx, 4, 3); }
  4370. static void emit_ARB1_M3X4(Context *ctx) { arb1_MxXy(ctx, 3, 4); }
  4371. static void emit_ARB1_M3X3(Context *ctx) { arb1_MxXy(ctx, 3, 3); }
  4372. static void emit_ARB1_M3X2(Context *ctx) { arb1_MxXy(ctx, 3, 2); }
  4373. static void emit_ARB1_CALL(Context *ctx)
  4374. {
  4375. if (!support_nv2(ctx)) // no branching in stock ARB1.
  4376. {
  4377. failf(ctx, "branching unsupported in %s profile", ctx->profile->name);
  4378. return;
  4379. } // if
  4380. char labelstr[64];
  4381. get_ARB1_srcarg_varname(ctx, 0, labelstr, sizeof (labelstr));
  4382. output_line(ctx, "CAL %s;", labelstr);
  4383. } // emit_ARB1_CALL
  4384. static void emit_ARB1_CALLNZ(Context *ctx)
  4385. {
  4386. // !!! FIXME: if src1 is a constbool that's true, we can remove the
  4387. // !!! FIXME: if. If it's false, we can make this a no-op.
  4388. if (!support_nv2(ctx)) // no branching in stock ARB1.
  4389. failf(ctx, "branching unsupported in %s profile", ctx->profile->name);
  4390. else
  4391. {
  4392. // !!! FIXME: double-check this.
  4393. char labelstr[64];
  4394. char scratch[64];
  4395. char src1[64];
  4396. get_ARB1_srcarg_varname(ctx, 0, labelstr, sizeof (labelstr));
  4397. get_ARB1_srcarg_varname(ctx, 1, src1, sizeof (src1));
  4398. allocate_ARB1_scratch_reg_name(ctx, scratch, sizeof (scratch));
  4399. output_line(ctx, "MOVC %s, %s;", scratch, src1);
  4400. output_line(ctx, "CAL %s (NE.x);", labelstr);
  4401. } // else
  4402. } // emit_ARB1_CALLNZ
  4403. // !!! FIXME: needs BRA in nv2, LOOP in nv2 fragment progs, and REP in nv4.
  4404. EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(LOOP)
  4405. static void emit_ARB1_RET(Context *ctx)
  4406. {
  4407. // don't fail() if no nv2...maybe we're just ending the mainline?
  4408. // if we're ending a LABEL that had no CALL, this would all be written
  4409. // to ctx->ignore anyhow, so this should be "safe" ... arb1 profile will
  4410. // just end up throwing all this code out.
  4411. if (support_nv2(ctx)) // no branching in stock ARB1.
  4412. output_line(ctx, "RET;");
  4413. set_output(ctx, &ctx->mainline); // in case we were ignoring this function.
  4414. } // emit_ARB1_RET
  4415. EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(ENDLOOP)
  4416. static void emit_ARB1_LABEL(Context *ctx)
  4417. {
  4418. if (!support_nv2(ctx)) // no branching in stock ARB1.
  4419. return; // don't fail()...maybe we never use it, but do fail in CALL.
  4420. const int label = ctx->source_args[0].regnum;
  4421. RegisterList *reg = reglist_find(&ctx->used_registers, REG_TYPE_LABEL, label);
  4422. // MSDN specs say CALL* has to come before the LABEL, so we know if we
  4423. // can ditch the entire function here as unused.
  4424. if (reg == NULL)
  4425. set_output(ctx, &ctx->ignore); // Func not used. Parse, but don't output.
  4426. // !!! FIXME: it would be nice if we could determine if a function is
  4427. // !!! FIXME: only called once and, if so, forcibly inline it.
  4428. //const char *uses_loopreg = ((reg) && (reg->misc == 1)) ? "int aL" : "";
  4429. char labelstr[64];
  4430. get_ARB1_srcarg_varname(ctx, 0, labelstr, sizeof (labelstr));
  4431. output_line(ctx, "%s:", labelstr);
  4432. } // emit_ARB1_LABEL
  4433. static void emit_ARB1_POW(Context *ctx)
  4434. {
  4435. // we can optimize this to use nv2's |abs| construct in some cases.
  4436. if ( (ctx->source_args[0].src_mod == SRCMOD_NONE) ||
  4437. (ctx->source_args[0].src_mod == SRCMOD_ABSNEGATE) )
  4438. ctx->source_args[0].src_mod = SRCMOD_ABS;
  4439. char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
  4440. char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
  4441. char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1));
  4442. if (ctx->source_args[0].src_mod == SRCMOD_ABS)
  4443. output_line(ctx, "POW%s, %s, %s;", dst, src0, src1);
  4444. else
  4445. {
  4446. char buf[64]; allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf));
  4447. output_line(ctx, "ABS %s, %s;", buf, src0);
  4448. output_line(ctx, "POW%s, %s.x, %s;", dst, buf, src1);
  4449. } // else
  4450. emit_ARB1_dest_modifiers(ctx);
  4451. } // emit_ARB1_POW
  4452. static void emit_ARB1_CRS(Context *ctx) { emit_ARB1_opcode_dss(ctx, "XPD"); }
  4453. static void emit_ARB1_SGN(Context *ctx)
  4454. {
  4455. if (support_nv2(ctx))
  4456. emit_ARB1_opcode_ds(ctx, "SSG");
  4457. else
  4458. {
  4459. char dst[64];
  4460. char src0[64];
  4461. char scratch1[64];
  4462. char scratch2[64];
  4463. make_ARB1_destarg_string(ctx, dst, sizeof (dst));
  4464. make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
  4465. allocate_ARB1_scratch_reg_name(ctx, scratch1, sizeof (scratch1));
  4466. allocate_ARB1_scratch_reg_name(ctx, scratch2, sizeof (scratch2));
  4467. output_line(ctx, "SLT %s, %s, 0.0;", scratch1, src0);
  4468. output_line(ctx, "SLT %s, -%s, 0.0;", scratch2, src0);
  4469. output_line(ctx, "ADD%s -%s, %s;", dst, scratch1, scratch2);
  4470. emit_ARB1_dest_modifiers(ctx);
  4471. } // else
  4472. } // emit_ARB1_SGN
  4473. EMIT_ARB1_OPCODE_DS_FUNC(ABS)
  4474. static void emit_ARB1_NRM(Context *ctx)
  4475. {
  4476. // nv2 fragment programs (and anything nv4) have a real NRM.
  4477. if ( (support_nv4(ctx)) || ((support_nv2(ctx)) && (shader_is_pixel(ctx))) )
  4478. emit_ARB1_opcode_ds(ctx, "NRM");
  4479. else
  4480. {
  4481. char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
  4482. char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
  4483. char buf[64]; allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf));
  4484. output_line(ctx, "DP3 %s.w, %s, %s;", buf, src0, src0);
  4485. output_line(ctx, "RSQ %s.w, %s.w;", buf, buf);
  4486. output_line(ctx, "MUL%s, %s.w, %s;", dst, buf, src0);
  4487. emit_ARB1_dest_modifiers(ctx);
  4488. } // else
  4489. } // emit_ARB1_NRM
  4490. static void emit_ARB1_SINCOS(Context *ctx)
  4491. {
  4492. // we don't care about the temp registers that <= sm2 demands; ignore them.
  4493. const int mask = ctx->dest_arg.writemask;
  4494. // arb1 fragment programs and everything nv4 have sin/cos/sincos opcodes.
  4495. if ((shader_is_pixel(ctx)) || (support_nv4(ctx)))
  4496. {
  4497. char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
  4498. char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
  4499. if (writemask_x(mask))
  4500. output_line(ctx, "COS%s, %s;", dst, src0);
  4501. else if (writemask_y(mask))
  4502. output_line(ctx, "SIN%s, %s;", dst, src0);
  4503. else if (writemask_xy(mask))
  4504. output_line(ctx, "SCS%s, %s;", dst, src0);
  4505. } // if
  4506. // nv2+ profiles have sin and cos opcodes.
  4507. else if (support_nv2(ctx))
  4508. {
  4509. char dst[64]; get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
  4510. char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
  4511. if (writemask_x(mask))
  4512. output_line(ctx, "COS %s.x, %s;", dst, src0);
  4513. else if (writemask_y(mask))
  4514. output_line(ctx, "SIN %s.y, %s;", dst, src0);
  4515. else if (writemask_xy(mask))
  4516. {
  4517. output_line(ctx, "SIN %s.x, %s;", dst, src0);
  4518. output_line(ctx, "COS %s.y, %s;", dst, src0);
  4519. } // else if
  4520. } // if
  4521. else // big nasty.
  4522. {
  4523. char dst[64]; get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
  4524. char src0[64]; get_ARB1_srcarg_varname(ctx, 0, src0, sizeof (src0));
  4525. const int need_sin = (writemask_x(mask) || writemask_xy(mask));
  4526. const int need_cos = (writemask_y(mask) || writemask_xy(mask));
  4527. char scratch[64];
  4528. if (need_sin || need_cos)
  4529. allocate_ARB1_scratch_reg_name(ctx, scratch, sizeof (scratch));
  4530. // These sin() and cos() approximations originally found here:
  4531. // http://www.devmaster.net/forums/showthread.php?t=5784
  4532. //
  4533. // const float B = 4.0f / M_PI;
  4534. // const float C = -4.0f / (M_PI * M_PI);
  4535. // float y = B * x + C * x * fabs(x);
  4536. //
  4537. // // optional better precision...
  4538. // const float P = 0.225f;
  4539. // y = P * (y * fabs(y) - y) + y;
  4540. //
  4541. //
  4542. // That first thing can be reduced to:
  4543. // const float y = ((1.2732395447351626861510701069801f * x) +
  4544. // ((-0.40528473456935108577551785283891f * x) * fabs(x)));
  4545. if (need_sin)
  4546. {
  4547. // !!! FIXME: use SRCMOD_ABS here?
  4548. output_line(ctx, "ABS %s.x, %s.x;", dst, src0);
  4549. output_line(ctx, "MUL %s.x, %s.x, -0.40528473456935108577551785283891;", dst, dst);
  4550. output_line(ctx, "MUL %s.x, %s.x, 1.2732395447351626861510701069801;", scratch, src0);
  4551. output_line(ctx, "MAD %s.x, %s.x, %s.x, %s.x;", dst, dst, src0, scratch);
  4552. } // if
  4553. // cosine is sin(x + M_PI/2), but you have to wrap x to pi:
  4554. // if (x+(M_PI/2) > M_PI)
  4555. // x -= 2 * M_PI;
  4556. //
  4557. // which is...
  4558. // if (x+(1.57079637050628662109375) > 3.1415927410125732421875)
  4559. // x += -6.283185482025146484375;
  4560. if (need_cos)
  4561. {
  4562. output_line(ctx, "ADD %s.x, %s.x, 1.57079637050628662109375;", scratch, src0);
  4563. output_line(ctx, "SGE %s.y, %s.x, 3.1415927410125732421875;", scratch, scratch);
  4564. output_line(ctx, "MAD %s.x, %s.y, -6.283185482025146484375, %s.x;", scratch, scratch, scratch);
  4565. output_line(ctx, "ABS %s.x, %s.x;", dst, src0);
  4566. output_line(ctx, "MUL %s.x, %s.x, -0.40528473456935108577551785283891;", dst, dst);
  4567. output_line(ctx, "MUL %s.x, %s.x, 1.2732395447351626861510701069801;", scratch, src0);
  4568. output_line(ctx, "MAD %s.y, %s.x, %s.x, %s.x;", dst, dst, src0, scratch);
  4569. } // if
  4570. } // else
  4571. // !!! FIXME: might not have done anything. Don't emit if we didn't.
  4572. if (!isfail(ctx))
  4573. emit_ARB1_dest_modifiers(ctx);
  4574. } // emit_ARB1_SINCOS
  4575. static void emit_ARB1_REP(Context *ctx)
  4576. {
  4577. char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
  4578. // nv2 fragment programs (and everything nv4) have a real REP.
  4579. if ( (support_nv4(ctx)) || ((support_nv2(ctx)) && (shader_is_pixel(ctx))) )
  4580. output_line(ctx, "REP %s;", src0);
  4581. else if (support_nv2(ctx))
  4582. {
  4583. // no REP, but we can use branches.
  4584. char failbranch[32];
  4585. char topbranch[32];
  4586. const int toplabel = allocate_branch_label(ctx);
  4587. const int faillabel = allocate_branch_label(ctx);
  4588. get_ARB1_branch_label_name(ctx,faillabel,failbranch,sizeof(failbranch));
  4589. get_ARB1_branch_label_name(ctx,toplabel,topbranch,sizeof(topbranch));
  4590. assert(((size_t) ctx->branch_labels_stack_index) <
  4591. STATICARRAYLEN(ctx->branch_labels_stack)-1);
  4592. ctx->branch_labels_stack[ctx->branch_labels_stack_index++] = toplabel;
  4593. ctx->branch_labels_stack[ctx->branch_labels_stack_index++] = faillabel;
  4594. char scratch[32];
  4595. snprintf(scratch, sizeof (scratch), "rep%d", ctx->reps);
  4596. output_line(ctx, "MOVC %s.x, %s;", scratch, src0);
  4597. output_line(ctx, "BRA %s (LE.x);", failbranch);
  4598. output_line(ctx, "%s:", topbranch);
  4599. } // else if
  4600. else // stock ARB1 has no branching.
  4601. {
  4602. fail(ctx, "branching unsupported in this profile");
  4603. } // else
  4604. } // emit_ARB1_REP
  4605. static void emit_ARB1_ENDREP(Context *ctx)
  4606. {
  4607. // nv2 fragment programs (and everything nv4) have a real ENDREP.
  4608. if ( (support_nv4(ctx)) || ((support_nv2(ctx)) && (shader_is_pixel(ctx))) )
  4609. output_line(ctx, "ENDREP;");
  4610. else if (support_nv2(ctx))
  4611. {
  4612. // no ENDREP, but we can use branches.
  4613. assert(ctx->branch_labels_stack_index >= 2);
  4614. char failbranch[32];
  4615. char topbranch[32];
  4616. const int faillabel = ctx->branch_labels_stack[--ctx->branch_labels_stack_index];
  4617. const int toplabel = ctx->branch_labels_stack[--ctx->branch_labels_stack_index];
  4618. get_ARB1_branch_label_name(ctx,faillabel,failbranch,sizeof(failbranch));
  4619. get_ARB1_branch_label_name(ctx,toplabel,topbranch,sizeof(topbranch));
  4620. char scratch[32];
  4621. snprintf(scratch, sizeof (scratch), "rep%d", ctx->reps);
  4622. output_line(ctx, "SUBC %s.x, %s.x, 1.0;", scratch, scratch);
  4623. output_line(ctx, "BRA %s (GT.x);", topbranch);
  4624. output_line(ctx, "%s:", failbranch);
  4625. } // else if
  4626. else // stock ARB1 has no branching.
  4627. {
  4628. fail(ctx, "branching unsupported in this profile");
  4629. } // else
  4630. } // emit_ARB1_ENDREP
  4631. static void nv2_if(Context *ctx)
  4632. {
  4633. // The condition code register MUST be set up before this!
  4634. // nv2 fragment programs (and everything nv4) have a real IF.
  4635. if ( (support_nv4(ctx)) || (shader_is_pixel(ctx)) )
  4636. output_line(ctx, "IF EQ.x;");
  4637. else
  4638. {
  4639. // there's no IF construct, but we can use a branch to a label.
  4640. char failbranch[32];
  4641. const int label = allocate_branch_label(ctx);
  4642. get_ARB1_branch_label_name(ctx, label, failbranch, sizeof (failbranch));
  4643. assert(((size_t) ctx->branch_labels_stack_index)
  4644. < STATICARRAYLEN(ctx->branch_labels_stack));
  4645. ctx->branch_labels_stack[ctx->branch_labels_stack_index++] = label;
  4646. // !!! FIXME: should this be NE? (EQ would jump to the ELSE for the IF condition, right?).
  4647. output_line(ctx, "BRA %s (EQ.x);", failbranch);
  4648. } // else
  4649. } // nv2_if
  4650. static void emit_ARB1_IF(Context *ctx)
  4651. {
  4652. if (support_nv2(ctx))
  4653. {
  4654. char buf[64]; allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf));
  4655. char src0[64]; get_ARB1_srcarg_varname(ctx, 0, src0, sizeof (src0));
  4656. output_line(ctx, "MOVC %s.x, %s;", buf, src0);
  4657. nv2_if(ctx);
  4658. } // if
  4659. else // stock ARB1 has no branching.
  4660. {
  4661. failf(ctx, "branching unsupported in %s profile", ctx->profile->name);
  4662. } // else
  4663. } // emit_ARB1_IF
  4664. static void emit_ARB1_ELSE(Context *ctx)
  4665. {
  4666. // nv2 fragment programs (and everything nv4) have a real ELSE.
  4667. if ( (support_nv4(ctx)) || ((support_nv2(ctx)) && (shader_is_pixel(ctx))) )
  4668. output_line(ctx, "ELSE;");
  4669. else if (support_nv2(ctx))
  4670. {
  4671. // there's no ELSE construct, but we can use a branch to a label.
  4672. assert(ctx->branch_labels_stack_index > 0);
  4673. // At the end of the IF block, unconditionally jump to the ENDIF.
  4674. const int endlabel = allocate_branch_label(ctx);
  4675. char endbranch[32];
  4676. get_ARB1_branch_label_name(ctx,endlabel,endbranch,sizeof (endbranch));
  4677. output_line(ctx, "BRA %s;", endbranch);
  4678. // Now mark the ELSE section with a lable.
  4679. const int elselabel = ctx->branch_labels_stack[ctx->branch_labels_stack_index-1];
  4680. char elsebranch[32];
  4681. get_ARB1_branch_label_name(ctx,elselabel,elsebranch,sizeof(elsebranch));
  4682. output_line(ctx, "%s:", elsebranch);
  4683. // Replace the ELSE label with the ENDIF on the label stack.
  4684. ctx->branch_labels_stack[ctx->branch_labels_stack_index-1] = endlabel;
  4685. } // else if
  4686. else // stock ARB1 has no branching.
  4687. {
  4688. failf(ctx, "branching unsupported in %s profile", ctx->profile->name);
  4689. } // else
  4690. } // emit_ARB1_ELSE
  4691. static void emit_ARB1_ENDIF(Context *ctx)
  4692. {
  4693. // nv2 fragment programs (and everything nv4) have a real ENDIF.
  4694. if ( (support_nv4(ctx)) || ((support_nv2(ctx)) && (shader_is_pixel(ctx))) )
  4695. output_line(ctx, "ENDIF;");
  4696. else if (support_nv2(ctx))
  4697. {
  4698. // there's no ENDIF construct, but we can use a branch to a label.
  4699. assert(ctx->branch_labels_stack_index > 0);
  4700. const int endlabel = ctx->branch_labels_stack[--ctx->branch_labels_stack_index];
  4701. char endbranch[32];
  4702. get_ARB1_branch_label_name(ctx,endlabel,endbranch,sizeof (endbranch));
  4703. output_line(ctx, "%s:", endbranch);
  4704. } // if
  4705. else // stock ARB1 has no branching.
  4706. {
  4707. failf(ctx, "branching unsupported in %s profile", ctx->profile->name);
  4708. } // else
  4709. } // emit_ARB1_ENDIF
  4710. static void emit_ARB1_BREAK(Context *ctx)
  4711. {
  4712. // nv2 fragment programs (and everything nv4) have a real BREAK.
  4713. if ( (support_nv4(ctx)) || ((support_nv2(ctx)) && (shader_is_pixel(ctx))) )
  4714. output_line(ctx, "BRK;");
  4715. else if (support_nv2(ctx))
  4716. {
  4717. // no BREAK, but we can use branches.
  4718. assert(ctx->branch_labels_stack_index >= 2);
  4719. const int faillabel = ctx->branch_labels_stack[ctx->branch_labels_stack_index];
  4720. char failbranch[32];
  4721. get_ARB1_branch_label_name(ctx,faillabel,failbranch,sizeof(failbranch));
  4722. output_line(ctx, "BRA %s;", failbranch);
  4723. } // else if
  4724. else // stock ARB1 has no branching.
  4725. {
  4726. failf(ctx, "branching unsupported in %s profile", ctx->profile->name);
  4727. } // else
  4728. } // emit_ARB1_BREAK
  4729. static void emit_ARB1_MOVA(Context *ctx)
  4730. {
  4731. // nv2 and nv3 can use the ARR opcode.
  4732. // But nv4 removed ARR (and ADDRESS registers!). Just ROUND to an INT.
  4733. if (support_nv4(ctx))
  4734. emit_ARB1_opcode_ds(ctx, "ROUND.S"); // !!! FIXME: don't use a modifier here.
  4735. else if ((support_nv2(ctx)) || (support_nv3(ctx)))
  4736. emit_ARB1_opcode_ds(ctx, "ARR");
  4737. else
  4738. {
  4739. char src0[64];
  4740. char scratch[64];
  4741. char addr[32];
  4742. make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
  4743. allocate_ARB1_scratch_reg_name(ctx, scratch, sizeof (scratch));
  4744. snprintf(addr, sizeof (addr), "addr%d", ctx->dest_arg.regnum);
  4745. // !!! FIXME: we can optimize this if src_mod is ABS or ABSNEGATE.
  4746. // ARL uses floor(), but D3D expects round-to-nearest.
  4747. // There is probably a more efficient way to do this.
  4748. if (shader_is_pixel(ctx)) // CMP only exists in fragment programs. :/
  4749. output_line(ctx, "CMP %s, %s, -1.0, 1.0;", scratch, src0);
  4750. else
  4751. {
  4752. output_line(ctx, "SLT %s, %s, 0.0;", scratch, src0);
  4753. output_line(ctx, "MAD %s, %s, -2.0, 1.0;", scratch, scratch);
  4754. } // else
  4755. output_line(ctx, "ABS %s, %s;", addr, src0);
  4756. output_line(ctx, "ADD %s, %s, 0.5;", addr, addr);
  4757. output_line(ctx, "FLR %s, %s;", addr, addr);
  4758. output_line(ctx, "MUL %s, %s, %s;", addr, addr, scratch);
  4759. // we don't handle these right now, since emit_ARB1_dest_modifiers(ctx)
  4760. // wants to look at dest_arg, not our temp register.
  4761. assert(ctx->dest_arg.result_mod == 0);
  4762. assert(ctx->dest_arg.result_shift == 0);
  4763. // we assign to the actual address register as needed.
  4764. ctx->last_address_reg_component = -1;
  4765. } // else
  4766. } // emit_ARB1_MOVA
  4767. static void emit_ARB1_TEXKILL(Context *ctx)
  4768. {
  4769. // d3d kills on xyz, arb1 kills on xyzw. Fix the swizzle.
  4770. // We just map the x component to w. If it's negative, the fragment
  4771. // would discard anyhow, otherwise, it'll pass through okay. This saves
  4772. // us a temp register.
  4773. char dst[64];
  4774. get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
  4775. output_line(ctx, "KIL %s.xyzx;", dst);
  4776. } // emit_ARB1_TEXKILL
  4777. static void arb1_texbem(Context *ctx, const int luminance)
  4778. {
  4779. // !!! FIXME: this code counts on the register not having swizzles, etc.
  4780. const int stage = ctx->dest_arg.regnum;
  4781. char dst[64]; get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
  4782. char src[64]; get_ARB1_srcarg_varname(ctx, 0, src, sizeof (src));
  4783. char tmp[64]; allocate_ARB1_scratch_reg_name(ctx, tmp, sizeof (tmp));
  4784. char sampler[64];
  4785. get_ARB1_varname_in_buf(ctx, REG_TYPE_SAMPLER, stage,
  4786. sampler, sizeof (sampler));
  4787. output_line(ctx, "MUL %s, %s_texbem.xzyw, %s.xyxy;", tmp, sampler, src);
  4788. output_line(ctx, "ADD %s.xy, %s.xzxx, %s.ywxx;", tmp, tmp, tmp);
  4789. output_line(ctx, "ADD %s.xy, %s, %s;", tmp, tmp, dst);
  4790. output_line(ctx, "TEX %s, %s, texture[%d], 2D;", dst, tmp, stage);
  4791. if (luminance) // TEXBEML, not just TEXBEM?
  4792. {
  4793. output_line(ctx, "MAD %s, %s.zzzz, %s_texbeml.xxxx, %s_texbeml.yyyy;",
  4794. tmp, src, sampler, sampler);
  4795. output_line(ctx, "MUL %s, %s, %s;", dst, dst, tmp);
  4796. } // if
  4797. emit_ARB1_dest_modifiers(ctx);
  4798. } // arb1_texbem
  4799. static void emit_ARB1_TEXBEM(Context *ctx)
  4800. {
  4801. arb1_texbem(ctx, 0);
  4802. } // emit_ARB1_TEXBEM
  4803. static void emit_ARB1_TEXBEML(Context *ctx)
  4804. {
  4805. arb1_texbem(ctx, 1);
  4806. } // emit_ARB1_TEXBEML
  4807. EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2AR)
  4808. EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2GB)
  4809. static void emit_ARB1_TEXM3X2PAD(Context *ctx)
  4810. {
  4811. // no-op ... work happens in emit_ARB1_TEXM3X2TEX().
  4812. } // emit_ARB1_TEXM3X2PAD
  4813. static void emit_ARB1_TEXM3X2TEX(Context *ctx)
  4814. {
  4815. if (ctx->texm3x2pad_src0 == -1)
  4816. return;
  4817. char dst[64];
  4818. char src0[64];
  4819. char src1[64];
  4820. char src2[64];
  4821. // !!! FIXME: this code counts on the register not having swizzles, etc.
  4822. const int stage = ctx->dest_arg.regnum;
  4823. get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x2pad_src0,
  4824. src0, sizeof (src0));
  4825. get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x2pad_dst0,
  4826. src1, sizeof (src1));
  4827. get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum,
  4828. src2, sizeof (src2));
  4829. get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
  4830. output_line(ctx, "DP3 %s.y, %s, %s;", dst, src2, dst);
  4831. output_line(ctx, "DP3 %s.x, %s, %s;", dst, src0, src1);
  4832. output_line(ctx, "TEX %s, %s, texture[%d], 2D;", dst, dst, stage);
  4833. emit_ARB1_dest_modifiers(ctx);
  4834. } // emit_ARB1_TEXM3X2TEX
  4835. static void emit_ARB1_TEXM3X3PAD(Context *ctx)
  4836. {
  4837. // no-op ... work happens in emit_ARB1_TEXM3X3*().
  4838. } // emit_ARB1_TEXM3X3PAD
  4839. static void emit_ARB1_TEXM3X3TEX(Context *ctx)
  4840. {
  4841. if (ctx->texm3x3pad_src1 == -1)
  4842. return;
  4843. char dst[64];
  4844. char src0[64];
  4845. char src1[64];
  4846. char src2[64];
  4847. char src3[64];
  4848. char src4[64];
  4849. // !!! FIXME: this code counts on the register not having swizzles, etc.
  4850. const int stage = ctx->dest_arg.regnum;
  4851. get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0,
  4852. src0, sizeof (src0));
  4853. get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0,
  4854. src1, sizeof (src1));
  4855. get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1,
  4856. src2, sizeof (src2));
  4857. get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1,
  4858. src3, sizeof (src3));
  4859. get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum,
  4860. src4, sizeof (src4));
  4861. get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
  4862. RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, stage);
  4863. const TextureType ttype = (TextureType) (sreg ? sreg->index : 0);
  4864. const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "CUBE" : "3D";
  4865. output_line(ctx, "DP3 %s.z, %s, %s;", dst, dst, src4);
  4866. output_line(ctx, "DP3 %s.x, %s, %s;", dst, src0, src1);
  4867. output_line(ctx, "DP3 %s.y, %s, %s;", dst, src2, src3);
  4868. output_line(ctx, "TEX %s, %s, texture[%d], %s;", dst, dst, stage, ttypestr);
  4869. emit_ARB1_dest_modifiers(ctx);
  4870. } // emit_ARB1_TEXM3X3TEX
  4871. static void emit_ARB1_TEXM3X3SPEC(Context *ctx)
  4872. {
  4873. if (ctx->texm3x3pad_src1 == -1)
  4874. return;
  4875. char dst[64];
  4876. char src0[64];
  4877. char src1[64];
  4878. char src2[64];
  4879. char src3[64];
  4880. char src4[64];
  4881. char src5[64];
  4882. char tmp[64];
  4883. char tmp2[64];
  4884. // !!! FIXME: this code counts on the register not having swizzles, etc.
  4885. const int stage = ctx->dest_arg.regnum;
  4886. allocate_ARB1_scratch_reg_name(ctx, tmp, sizeof (tmp));
  4887. allocate_ARB1_scratch_reg_name(ctx, tmp2, sizeof (tmp2));
  4888. get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0,
  4889. src0, sizeof (src0));
  4890. get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0,
  4891. src1, sizeof (src1));
  4892. get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1,
  4893. src2, sizeof (src2));
  4894. get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1,
  4895. src3, sizeof (src3));
  4896. get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum,
  4897. src4, sizeof (src4));
  4898. get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[1].regnum,
  4899. src5, sizeof (src5));
  4900. get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
  4901. RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, stage);
  4902. const TextureType ttype = (TextureType) (sreg ? sreg->index : 0);
  4903. const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "CUBE" : "3D";
  4904. output_line(ctx, "DP3 %s.z, %s, %s;", dst, dst, src4);
  4905. output_line(ctx, "DP3 %s.x, %s, %s;", dst, src0, src1);
  4906. output_line(ctx, "DP3 %s.y, %s, %s;", dst, src2, src3);
  4907. output_line(ctx, "MUL %s, %s, %s;", tmp, dst, dst); // normal * normal
  4908. output_line(ctx, "MUL %s, %s, %s;", tmp2, dst, src5); // normal * eyeray
  4909. // !!! FIXME: This is goofy. There's got to be a way to do vector-wide
  4910. // !!! FIXME: divides or reciprocals...right?
  4911. output_line(ctx, "RCP %s.x, %s.x;", tmp2, tmp2);
  4912. output_line(ctx, "RCP %s.y, %s.y;", tmp2, tmp2);
  4913. output_line(ctx, "RCP %s.z, %s.z;", tmp2, tmp2);
  4914. output_line(ctx, "RCP %s.w, %s.w;", tmp2, tmp2);
  4915. output_line(ctx, "MUL %s, %s, %s;", tmp, tmp, tmp2);
  4916. output_line(ctx, "MUL %s, %s, { 2.0, 2.0, 2.0, 2.0 };", tmp, tmp);
  4917. output_line(ctx, "MAD %s, %s, %s, -%s;", tmp, tmp, dst, src5);
  4918. output_line(ctx, "TEX %s, %s, texture[%d], %s;", dst, tmp, stage, ttypestr);
  4919. emit_ARB1_dest_modifiers(ctx);
  4920. } // emit_ARB1_TEXM3X3SPEC
  4921. static void emit_ARB1_TEXM3X3VSPEC(Context *ctx)
  4922. {
  4923. if (ctx->texm3x3pad_src1 == -1)
  4924. return;
  4925. char dst[64];
  4926. char src0[64];
  4927. char src1[64];
  4928. char src2[64];
  4929. char src3[64];
  4930. char src4[64];
  4931. char tmp[64];
  4932. char tmp2[64];
  4933. char tmp3[64];
  4934. // !!! FIXME: this code counts on the register not having swizzles, etc.
  4935. const int stage = ctx->dest_arg.regnum;
  4936. allocate_ARB1_scratch_reg_name(ctx, tmp, sizeof (tmp));
  4937. allocate_ARB1_scratch_reg_name(ctx, tmp2, sizeof (tmp2));
  4938. allocate_ARB1_scratch_reg_name(ctx, tmp3, sizeof (tmp3));
  4939. get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0,
  4940. src0, sizeof (src0));
  4941. get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0,
  4942. src1, sizeof (src1));
  4943. get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1,
  4944. src2, sizeof (src2));
  4945. get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1,
  4946. src3, sizeof (src3));
  4947. get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum,
  4948. src4, sizeof (src4));
  4949. get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
  4950. RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, stage);
  4951. const TextureType ttype = (TextureType) (sreg ? sreg->index : 0);
  4952. const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "CUBE" : "3D";
  4953. output_line(ctx, "MOV %s.x, %s.w;", tmp3, src0);
  4954. output_line(ctx, "MOV %s.y, %s.w;", tmp3, src2);
  4955. output_line(ctx, "MOV %s.z, %s.w;", tmp3, dst);
  4956. output_line(ctx, "DP3 %s.z, %s, %s;", dst, dst, src4);
  4957. output_line(ctx, "DP3 %s.x, %s, %s;", dst, src0, src1);
  4958. output_line(ctx, "DP3 %s.y, %s, %s;", dst, src2, src3);
  4959. output_line(ctx, "MUL %s, %s, %s;", tmp, dst, dst); // normal * normal
  4960. output_line(ctx, "MUL %s, %s, %s;", tmp2, dst, tmp3); // normal * eyeray
  4961. // !!! FIXME: This is goofy. There's got to be a way to do vector-wide
  4962. // !!! FIXME: divides or reciprocals...right?
  4963. output_line(ctx, "RCP %s.x, %s.x;", tmp2, tmp2);
  4964. output_line(ctx, "RCP %s.y, %s.y;", tmp2, tmp2);
  4965. output_line(ctx, "RCP %s.z, %s.z;", tmp2, tmp2);
  4966. output_line(ctx, "RCP %s.w, %s.w;", tmp2, tmp2);
  4967. output_line(ctx, "MUL %s, %s, %s;", tmp, tmp, tmp2);
  4968. output_line(ctx, "MUL %s, %s, { 2.0, 2.0, 2.0, 2.0 };", tmp, tmp);
  4969. output_line(ctx, "MAD %s, %s, %s, -%s;", tmp, tmp, dst, tmp3);
  4970. output_line(ctx, "TEX %s, %s, texture[%d], %s;", dst, tmp, stage, ttypestr);
  4971. emit_ARB1_dest_modifiers(ctx);
  4972. } // emit_ARB1_TEXM3X3VSPEC
  4973. static void emit_ARB1_EXPP(Context *ctx) { emit_ARB1_opcode_ds(ctx, "EX2"); }
  4974. static void emit_ARB1_LOGP(Context *ctx) { arb1_log(ctx, "LG2"); }
  4975. static void emit_ARB1_CND(Context *ctx)
  4976. {
  4977. char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
  4978. char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
  4979. char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1));
  4980. char src2[64]; make_ARB1_srcarg_string(ctx, 2, src2, sizeof (src2));
  4981. char tmp[64]; allocate_ARB1_scratch_reg_name(ctx, tmp, sizeof (tmp));
  4982. // CND compares against 0.5, but we need to compare against 0.0...
  4983. // ...subtract to make up the difference.
  4984. output_line(ctx, "SUB %s, %s, { 0.5, 0.5, 0.5, 0.5 };", tmp, src0);
  4985. // D3D tests (src0 >= 0.0), but ARB1 tests (src0 < 0.0) ... so just
  4986. // switch src1 and src2 to get the same results.
  4987. output_line(ctx, "CMP%s, %s, %s, %s;", dst, tmp, src2, src1);
  4988. emit_ARB1_dest_modifiers(ctx);
  4989. } // emit_ARB1_CND
  4990. EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2RGB)
  4991. EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXDP3TEX)
  4992. EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXM3X2DEPTH)
  4993. EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXDP3)
  4994. static void emit_ARB1_TEXM3X3(Context *ctx)
  4995. {
  4996. if (ctx->texm3x3pad_src1 == -1)
  4997. return;
  4998. char dst[64];
  4999. char src0[64];
  5000. char src1[64];
  5001. char src2[64];
  5002. char src3[64];
  5003. char src4[64];
  5004. // !!! FIXME: this code counts on the register not having swizzles, etc.
  5005. get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0,
  5006. src0, sizeof (src0));
  5007. get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0,
  5008. src1, sizeof (src1));
  5009. get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1,
  5010. src2, sizeof (src2));
  5011. get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1,
  5012. src3, sizeof (src3));
  5013. get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum,
  5014. src4, sizeof (src4));
  5015. get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
  5016. output_line(ctx, "DP3 %s.z, %s, %s;", dst, dst, src4);
  5017. output_line(ctx, "DP3 %s.x, %s, %s;", dst, src0, src1);
  5018. output_line(ctx, "DP3 %s.y, %s, %s;", dst, src2, src3);
  5019. output_line(ctx, "MOV %s.w, { 1.0, 1.0, 1.0, 1.0 };", dst);
  5020. emit_ARB1_dest_modifiers(ctx);
  5021. } // emit_ARB1_TEXM3X3
  5022. EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXDEPTH)
  5023. static void emit_ARB1_CMP(Context *ctx)
  5024. {
  5025. char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
  5026. char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
  5027. char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1));
  5028. char src2[64]; make_ARB1_srcarg_string(ctx, 2, src2, sizeof (src2));
  5029. // D3D tests (src0 >= 0.0), but ARB1 tests (src0 < 0.0) ... so just
  5030. // switch src1 and src2 to get the same results.
  5031. output_line(ctx, "CMP%s, %s, %s, %s;", dst, src0, src2, src1);
  5032. emit_ARB1_dest_modifiers(ctx);
  5033. } // emit_ARB1_CMP
  5034. EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(BEM)
  5035. static void emit_ARB1_DP2ADD(Context *ctx)
  5036. {
  5037. if (support_nv4(ctx)) // nv4 has a built-in equivalent to DP2ADD.
  5038. emit_ARB1_opcode_dsss(ctx, "DP2A");
  5039. else
  5040. {
  5041. char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
  5042. char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
  5043. char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1));
  5044. char src2[64]; make_ARB1_srcarg_string(ctx, 2, src2, sizeof (src2));
  5045. char scratch[64];
  5046. // DP2ADD is:
  5047. // dst = (src0.r * src1.r) + (src0.g * src1.g) + src2.replicate_swiz
  5048. allocate_ARB1_scratch_reg_name(ctx, scratch, sizeof (scratch));
  5049. output_line(ctx, "MUL %s, %s, %s;", scratch, src0, src1);
  5050. output_line(ctx, "ADD %s, %s.x, %s.y;", scratch, scratch, scratch);
  5051. output_line(ctx, "ADD%s, %s.x, %s;", dst, scratch, src2);
  5052. emit_ARB1_dest_modifiers(ctx);
  5053. } // else
  5054. } // emit_ARB1_DP2ADD
  5055. static void emit_ARB1_DSX(Context *ctx)
  5056. {
  5057. if (support_nv2(ctx)) // nv2 has a built-in equivalent to DSX.
  5058. emit_ARB1_opcode_ds(ctx, "DDX");
  5059. else
  5060. failf(ctx, "DSX unsupported in %s profile", ctx->profile->name);
  5061. } // emit_ARB1_DSX
  5062. static void emit_ARB1_DSY(Context *ctx)
  5063. {
  5064. if (support_nv2(ctx)) // nv2 has a built-in equivalent to DSY.
  5065. emit_ARB1_opcode_ds(ctx, "DDY");
  5066. else
  5067. failf(ctx, "DSY unsupported in %s profile", ctx->profile->name);
  5068. } // emit_ARB1_DSY
  5069. static void arb1_texld(Context *ctx, const char *opcode, const int texldd)
  5070. {
  5071. // !!! FIXME: Hack: "TEXH" is invalid in nv4. Fix this more cleanly.
  5072. if ((ctx->dest_arg.result_mod & MOD_PP) && (support_nv4(ctx)))
  5073. ctx->dest_arg.result_mod &= ~MOD_PP;
  5074. char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
  5075. const int sm1 = !shader_version_atleast(ctx, 1, 4);
  5076. const int regnum = sm1 ? ctx->dest_arg.regnum : ctx->source_args[1].regnum;
  5077. RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, regnum);
  5078. const char *ttype = NULL;
  5079. char src0[64];
  5080. if (sm1)
  5081. get_ARB1_destarg_varname(ctx, src0, sizeof (src0));
  5082. else
  5083. get_ARB1_srcarg_varname(ctx, 0, src0, sizeof (src0));
  5084. //char src1[64]; get_ARB1_srcarg_varname(ctx, 1, src1, sizeof (src1)); // !!! FIXME: SRC_MOD?
  5085. char src2[64] = { 0 };
  5086. char src3[64] = { 0 };
  5087. if (texldd)
  5088. {
  5089. make_ARB1_srcarg_string(ctx, 2, src2, sizeof (src2));
  5090. make_ARB1_srcarg_string(ctx, 3, src3, sizeof (src3));
  5091. } // if
  5092. // !!! FIXME: this should be in state_TEXLD, not in the arb1/glsl emitters.
  5093. if (sreg == NULL)
  5094. {
  5095. fail(ctx, "TEXLD using undeclared sampler");
  5096. return;
  5097. } // if
  5098. // SM1 only specifies dst, so don't check swizzle there.
  5099. if ( !sm1 && (!no_swizzle(ctx->source_args[1].swizzle)) )
  5100. {
  5101. // !!! FIXME: does this ever actually happen?
  5102. fail(ctx, "BUG: can't handle TEXLD with sampler swizzle at the moment");
  5103. } // if
  5104. switch ((const TextureType) sreg->index)
  5105. {
  5106. case TEXTURE_TYPE_2D: ttype = "2D"; break; // !!! FIXME: "RECT"?
  5107. case TEXTURE_TYPE_CUBE: ttype = "CUBE"; break;
  5108. case TEXTURE_TYPE_VOLUME: ttype = "3D"; break;
  5109. default: fail(ctx, "unknown texture type"); return;
  5110. } // switch
  5111. if (texldd)
  5112. {
  5113. output_line(ctx, "%s%s, %s, %s, %s, texture[%d], %s;", opcode, dst,
  5114. src0, src2, src3, regnum, ttype);
  5115. } // if
  5116. else
  5117. {
  5118. output_line(ctx, "%s%s, %s, texture[%d], %s;", opcode, dst, src0,
  5119. regnum, ttype);
  5120. } // else
  5121. } // arb1_texld
  5122. static void emit_ARB1_TEXLDD(Context *ctx)
  5123. {
  5124. // With GL_NV_fragment_program2, we can use the TXD opcode.
  5125. // In stock arb1, we can settle for a standard texld, which isn't
  5126. // perfect, but oh well.
  5127. if (support_nv2(ctx))
  5128. arb1_texld(ctx, "TXD", 1);
  5129. else
  5130. arb1_texld(ctx, "TEX", 0);
  5131. } // emit_ARB1_TEXLDD
  5132. static void emit_ARB1_TEXLDL(Context *ctx)
  5133. {
  5134. if ((shader_is_vertex(ctx)) && (!support_nv3(ctx)))
  5135. {
  5136. failf(ctx, "Vertex shader TEXLDL unsupported in %s profile",
  5137. ctx->profile->name);
  5138. return;
  5139. } // if
  5140. else if ((shader_is_pixel(ctx)) && (!support_nv2(ctx)))
  5141. {
  5142. failf(ctx, "Pixel shader TEXLDL unsupported in %s profile",
  5143. ctx->profile->name);
  5144. return;
  5145. } // if
  5146. // !!! FIXME: this doesn't map exactly to TEXLDL. Review this.
  5147. arb1_texld(ctx, "TXL", 0);
  5148. } // emit_ARB1_TEXLDL
  5149. EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(BREAKP)
  5150. EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(BREAKC)
  5151. static void emit_ARB1_IFC(Context *ctx)
  5152. {
  5153. if (support_nv2(ctx))
  5154. {
  5155. static const char *comps[] = {
  5156. "", "SGTC", "SEQC", "SGEC", "SGTC", "SNEC", "SLEC"
  5157. };
  5158. if (ctx->instruction_controls >= STATICARRAYLEN(comps))
  5159. {
  5160. fail(ctx, "unknown comparison control");
  5161. return;
  5162. } // if
  5163. char src0[64];
  5164. char src1[64];
  5165. char scratch[64];
  5166. const char *comp = comps[ctx->instruction_controls];
  5167. get_ARB1_srcarg_varname(ctx, 0, src0, sizeof (src0));
  5168. get_ARB1_srcarg_varname(ctx, 1, src1, sizeof (src1));
  5169. allocate_ARB1_scratch_reg_name(ctx, scratch, sizeof (scratch));
  5170. output_line(ctx, "%s %s.x, %s, %s;", comp, scratch, src0, src1);
  5171. nv2_if(ctx);
  5172. } // if
  5173. else // stock ARB1 has no branching.
  5174. {
  5175. failf(ctx, "branching unsupported in %s profile", ctx->profile->name);
  5176. } // else
  5177. } // emit_ARB1_IFC
  5178. EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(SETP)
  5179. static void emit_ARB1_DEF(Context *ctx)
  5180. {
  5181. const float *val = (const float *) ctx->dwords; // !!! FIXME: could be int?
  5182. char dst[64]; get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
  5183. char val0[32]; floatstr(ctx, val0, sizeof (val0), val[0], 1);
  5184. char val1[32]; floatstr(ctx, val1, sizeof (val1), val[1], 1);
  5185. char val2[32]; floatstr(ctx, val2, sizeof (val2), val[2], 1);
  5186. char val3[32]; floatstr(ctx, val3, sizeof (val3), val[3], 1);
  5187. push_output(ctx, &ctx->globals);
  5188. output_line(ctx, "PARAM %s = { %s, %s, %s, %s };",
  5189. dst, val0, val1, val2, val3);
  5190. pop_output(ctx);
  5191. } // emit_ARB1_DEF
  5192. static void emit_ARB1_DEFI(Context *ctx)
  5193. {
  5194. char dst[64]; get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
  5195. const int32 *x = (const int32 *) ctx->dwords;
  5196. push_output(ctx, &ctx->globals);
  5197. output_line(ctx, "PARAM %s = { %d, %d, %d, %d };",
  5198. dst, (int) x[0], (int) x[1], (int) x[2], (int) x[3]);
  5199. pop_output(ctx);
  5200. } // emit_ARB1_DEFI
  5201. static void emit_ARB1_DEFB(Context *ctx)
  5202. {
  5203. char dst[64]; get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
  5204. push_output(ctx, &ctx->globals);
  5205. output_line(ctx, "PARAM %s = %d;", dst, ctx->dwords[0] ? 1 : 0);
  5206. pop_output(ctx);
  5207. } // emit_ARB1_DEFB
  5208. static void emit_ARB1_DCL(Context *ctx)
  5209. {
  5210. // no-op. We do this in our emit_attribute() and emit_uniform().
  5211. } // emit_ARB1_DCL
  5212. EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXCRD)
  5213. static void emit_ARB1_TEXLD(Context *ctx)
  5214. {
  5215. if (!shader_version_atleast(ctx, 1, 4))
  5216. {
  5217. arb1_texld(ctx, "TEX", 0);
  5218. return;
  5219. } // if
  5220. else if (!shader_version_atleast(ctx, 2, 0))
  5221. {
  5222. // ps_1_4 is different, too!
  5223. fail(ctx, "TEXLD == Shader Model 1.4 unimplemented."); // !!! FIXME
  5224. return;
  5225. } // if
  5226. // !!! FIXME: do texldb and texldp map between OpenGL and D3D correctly?
  5227. if (ctx->instruction_controls == CONTROL_TEXLD)
  5228. arb1_texld(ctx, "TEX", 0);
  5229. else if (ctx->instruction_controls == CONTROL_TEXLDP)
  5230. arb1_texld(ctx, "TXP", 0);
  5231. else if (ctx->instruction_controls == CONTROL_TEXLDB)
  5232. arb1_texld(ctx, "TXB", 0);
  5233. } // emit_ARB1_TEXLD
  5234. #endif // SUPPORT_PROFILE_ARB1
  5235. #if !AT_LEAST_ONE_PROFILE
  5236. #error No profiles are supported. Fix your build.
  5237. #endif
  5238. #define DEFINE_PROFILE(prof) { \
  5239. MOJOSHADER_PROFILE_##prof, \
  5240. emit_##prof##_start, \
  5241. emit_##prof##_end, \
  5242. emit_##prof##_phase, \
  5243. emit_##prof##_global, \
  5244. emit_##prof##_array, \
  5245. emit_##prof##_const_array, \
  5246. emit_##prof##_uniform, \
  5247. emit_##prof##_sampler, \
  5248. emit_##prof##_attribute, \
  5249. emit_##prof##_finalize, \
  5250. get_##prof##_varname, \
  5251. get_##prof##_const_array_varname, \
  5252. },
  5253. static const Profile profiles[] =
  5254. {
  5255. #if SUPPORT_PROFILE_D3D
  5256. DEFINE_PROFILE(D3D)
  5257. #endif
  5258. #if SUPPORT_PROFILE_BYTECODE
  5259. DEFINE_PROFILE(BYTECODE)
  5260. #endif
  5261. #if SUPPORT_PROFILE_GLSL
  5262. DEFINE_PROFILE(GLSL)
  5263. #endif
  5264. #if SUPPORT_PROFILE_ARB1
  5265. DEFINE_PROFILE(ARB1)
  5266. #endif
  5267. };
  5268. #undef DEFINE_PROFILE
  5269. // This is for profiles that extend other profiles...
  5270. static const struct { const char *from; const char *to; } profileMap[] =
  5271. {
  5272. { MOJOSHADER_PROFILE_GLSL120, MOJOSHADER_PROFILE_GLSL },
  5273. { MOJOSHADER_PROFILE_NV2, MOJOSHADER_PROFILE_ARB1 },
  5274. { MOJOSHADER_PROFILE_NV3, MOJOSHADER_PROFILE_ARB1 },
  5275. { MOJOSHADER_PROFILE_NV4, MOJOSHADER_PROFILE_ARB1 },
  5276. };
  5277. // The PROFILE_EMITTER_* items MUST be in the same order as profiles[]!
  5278. #define PROFILE_EMITTERS(op) { \
  5279. PROFILE_EMITTER_D3D(op) \
  5280. PROFILE_EMITTER_BYTECODE(op) \
  5281. PROFILE_EMITTER_GLSL(op) \
  5282. PROFILE_EMITTER_ARB1(op) \
  5283. }
  5284. static int parse_destination_token(Context *ctx, DestArgInfo *info)
  5285. {
  5286. // !!! FIXME: recheck against the spec for ranges (like RASTOUT values, etc).
  5287. if (ctx->tokencount == 0)
  5288. {
  5289. fail(ctx, "Out of tokens in destination parameter");
  5290. return 0;
  5291. } // if
  5292. const uint32 token = SWAP32(*(ctx->tokens));
  5293. const int reserved1 = (int) ((token >> 14) & 0x3); // bits 14 through 15
  5294. const int reserved2 = (int) ((token >> 31) & 0x1); // bit 31
  5295. info->token = ctx->tokens;
  5296. info->regnum = (int) (token & 0x7ff); // bits 0 through 10
  5297. info->relative = (int) ((token >> 13) & 0x1); // bit 13
  5298. info->orig_writemask = (int) ((token >> 16) & 0xF); // bits 16 through 19
  5299. info->result_mod = (int) ((token >> 20) & 0xF); // bits 20 through 23
  5300. info->result_shift = (int) ((token >> 24) & 0xF); // bits 24 through 27 abc
  5301. info->regtype = (RegisterType) (((token >> 28) & 0x7) | ((token >> 8) & 0x18)); // bits 28-30, 11-12
  5302. int writemask;
  5303. if (isscalar(ctx, ctx->shader_type, info->regtype, info->regnum))
  5304. writemask = 0x1; // just x.
  5305. else
  5306. writemask = info->orig_writemask;
  5307. set_dstarg_writemask(info, writemask); // bits 16 through 19.
  5308. // all the REG_TYPE_CONSTx types are the same register type, it's just
  5309. // split up so its regnum can be > 2047 in the bytecode. Clean it up.
  5310. if (info->regtype == REG_TYPE_CONST2)
  5311. {
  5312. info->regtype = REG_TYPE_CONST;
  5313. info->regnum += 2048;
  5314. } // else if
  5315. else if (info->regtype == REG_TYPE_CONST3)
  5316. {
  5317. info->regtype = REG_TYPE_CONST;
  5318. info->regnum += 4096;
  5319. } // else if
  5320. else if (info->regtype == REG_TYPE_CONST4)
  5321. {
  5322. info->regtype = REG_TYPE_CONST;
  5323. info->regnum += 6144;
  5324. } // else if
  5325. // swallow token for now, for multiple calls in a row.
  5326. adjust_token_position(ctx, 1);
  5327. if (reserved1 != 0x0)
  5328. fail(ctx, "Reserved bit #1 in destination token must be zero");
  5329. if (reserved2 != 0x1)
  5330. fail(ctx, "Reserved bit #2 in destination token must be one");
  5331. if (info->relative)
  5332. {
  5333. if (!shader_is_vertex(ctx))
  5334. fail(ctx, "Relative addressing in non-vertex shader");
  5335. if (!shader_version_atleast(ctx, 3, 0))
  5336. fail(ctx, "Relative addressing in vertex shader version < 3.0");
  5337. if ((!ctx->ctab.have_ctab) && (!ctx->ignores_ctab))
  5338. {
  5339. // it's hard to do this efficiently without!
  5340. fail(ctx, "relative addressing unsupported without a CTAB");
  5341. } // if
  5342. // !!! FIXME: I don't have a shader that has a relative dest currently.
  5343. fail(ctx, "Relative addressing of dest tokens is unsupported");
  5344. return 2;
  5345. } // if
  5346. const int s = info->result_shift;
  5347. if (s != 0)
  5348. {
  5349. if (!shader_is_pixel(ctx))
  5350. fail(ctx, "Result shift scale in non-pixel shader");
  5351. if (shader_version_atleast(ctx, 2, 0))
  5352. fail(ctx, "Result shift scale in pixel shader version >= 2.0");
  5353. if ( ! (((s >= 1) && (s <= 3)) || ((s >= 0xD) && (s <= 0xF))) )
  5354. fail(ctx, "Result shift scale isn't 1 to 3, or 13 to 15.");
  5355. } // if
  5356. if (info->result_mod & MOD_PP) // Partial precision (pixel shaders only)
  5357. {
  5358. if (!shader_is_pixel(ctx))
  5359. fail(ctx, "Partial precision result mod in non-pixel shader");
  5360. } // if
  5361. if (info->result_mod & MOD_CENTROID) // Centroid (pixel shaders only)
  5362. {
  5363. if (!shader_is_pixel(ctx))
  5364. fail(ctx, "Centroid result mod in non-pixel shader");
  5365. else if (!ctx->centroid_allowed) // only on DCL opcodes!
  5366. fail(ctx, "Centroid modifier not allowed here");
  5367. } // if
  5368. if ((info->regtype < 0) || (info->regtype > REG_TYPE_MAX))
  5369. fail(ctx, "Register type is out of range");
  5370. if (!isfail(ctx))
  5371. set_used_register(ctx, info->regtype, info->regnum, 1);
  5372. return 1;
  5373. } // parse_destination_token
  5374. static void determine_constants_arrays(Context *ctx)
  5375. {
  5376. // Only process this stuff once. This is called after all DEF* opcodes
  5377. // could have been parsed.
  5378. if (ctx->determined_constants_arrays)
  5379. return;
  5380. ctx->determined_constants_arrays = 1;
  5381. if (ctx->constant_count <= 1)
  5382. return; // nothing to sort or group.
  5383. // Sort the linked list into an array for easier tapdancing...
  5384. ConstantsList **array = (ConstantsList **) alloca(sizeof (ConstantsList *) * (ctx->constant_count + 1));
  5385. ConstantsList *item = ctx->constants;
  5386. int i;
  5387. for (i = 0; i < ctx->constant_count; i++)
  5388. {
  5389. if (item == NULL)
  5390. {
  5391. fail(ctx, "BUG: mismatched constant list and count");
  5392. return;
  5393. } // if
  5394. array[i] = item;
  5395. item = item->next;
  5396. } // for
  5397. array[ctx->constant_count] = NULL;
  5398. // bubble sort ftw.
  5399. int sorted;
  5400. do
  5401. {
  5402. sorted = 1;
  5403. for (i = 0; i < ctx->constant_count-1; i++)
  5404. {
  5405. if (array[i]->constant.index > array[i+1]->constant.index)
  5406. {
  5407. ConstantsList *tmp = array[i];
  5408. array[i] = array[i+1];
  5409. array[i+1] = tmp;
  5410. sorted = 0;
  5411. } // if
  5412. } // for
  5413. } while (!sorted);
  5414. // okay, sorted. While we're here, let's redo the linked list in order...
  5415. for (i = 0; i < ctx->constant_count; i++)
  5416. array[i]->next = array[i+1];
  5417. ctx->constants = array[0];
  5418. // now figure out the groupings of constants and add to ctx->variables...
  5419. int start = -1;
  5420. int prev = -1;
  5421. int count = 0;
  5422. const int hi = ctx->constant_count;
  5423. for (i = 0; i <= hi; i++)
  5424. {
  5425. if (array[i] && (array[i]->constant.type != MOJOSHADER_UNIFORM_FLOAT))
  5426. continue; // we only care about REG_TYPE_CONST for array groups.
  5427. if (start == -1)
  5428. {
  5429. prev = start = i; // first REG_TYPE_CONST we've seen. Mark it!
  5430. continue;
  5431. } // if
  5432. // not a match (or last item in the array)...see if we had a
  5433. // contiguous set before this point...
  5434. if ( (array[i]) && (array[i]->constant.index == (array[prev]->constant.index + 1)) )
  5435. count++;
  5436. else
  5437. {
  5438. if (count > 0) // multiple constants in the set?
  5439. {
  5440. VariableList *var;
  5441. var = (VariableList *) Malloc(ctx, sizeof (VariableList));
  5442. if (var == NULL)
  5443. break;
  5444. var->type = MOJOSHADER_UNIFORM_FLOAT;
  5445. var->index = array[start]->constant.index;
  5446. var->count = (array[prev]->constant.index - var->index) + 1;
  5447. var->constant = array[start];
  5448. var->used = 0;
  5449. var->emit_position = -1;
  5450. var->next = ctx->variables;
  5451. ctx->variables = var;
  5452. } // else
  5453. start = i; // set this as new start of sequence.
  5454. } // if
  5455. prev = i;
  5456. } // for
  5457. } // determine_constants_arrays
  5458. static int adjust_swizzle(const Context *ctx, const RegisterType regtype,
  5459. const int regnum, const int swizzle)
  5460. {
  5461. if (regtype != REG_TYPE_INPUT) // !!! FIXME: maybe lift this later?
  5462. return swizzle;
  5463. else if (ctx->swizzles_count == 0)
  5464. return swizzle;
  5465. const RegisterList *reg = reglist_find(&ctx->attributes, regtype, regnum);
  5466. if (reg == NULL)
  5467. return swizzle;
  5468. size_t i;
  5469. for (i = 0; i < ctx->swizzles_count; i++)
  5470. {
  5471. const MOJOSHADER_swizzle *swiz = &ctx->swizzles[i];
  5472. if ((swiz->usage == reg->usage) && (swiz->index == reg->index))
  5473. {
  5474. return ( (((int)(swiz->swizzles[((swizzle >> 0) & 0x3)])) << 0) |
  5475. (((int)(swiz->swizzles[((swizzle >> 2) & 0x3)])) << 2) |
  5476. (((int)(swiz->swizzles[((swizzle >> 4) & 0x3)])) << 4) |
  5477. (((int)(swiz->swizzles[((swizzle >> 6) & 0x3)])) << 6) );
  5478. } // if
  5479. } // for
  5480. return swizzle;
  5481. } // adjust_swizzle
  5482. static int parse_source_token(Context *ctx, SourceArgInfo *info)
  5483. {
  5484. int retval = 1;
  5485. if (ctx->tokencount == 0)
  5486. {
  5487. fail(ctx, "Out of tokens in source parameter");
  5488. return 0;
  5489. } // if
  5490. const uint32 token = SWAP32(*(ctx->tokens));
  5491. const int reserved1 = (int) ((token >> 14) & 0x3); // bits 14 through 15
  5492. const int reserved2 = (int) ((token >> 31) & 0x1); // bit 31
  5493. info->token = ctx->tokens;
  5494. info->regnum = (int) (token & 0x7ff); // bits 0 through 10
  5495. info->relative = (int) ((token >> 13) & 0x1); // bit 13
  5496. const int swizzle = (int) ((token >> 16) & 0xFF); // bits 16 through 23
  5497. info->src_mod = (SourceMod) ((token >> 24) & 0xF); // bits 24 through 27
  5498. info->regtype = (RegisterType) (((token >> 28) & 0x7) | ((token >> 8) & 0x18)); // bits 28-30, 11-12
  5499. // all the REG_TYPE_CONSTx types are the same register type, it's just
  5500. // split up so its regnum can be > 2047 in the bytecode. Clean it up.
  5501. if (info->regtype == REG_TYPE_CONST2)
  5502. {
  5503. info->regtype = REG_TYPE_CONST;
  5504. info->regnum += 2048;
  5505. } // else if
  5506. else if (info->regtype == REG_TYPE_CONST3)
  5507. {
  5508. info->regtype = REG_TYPE_CONST;
  5509. info->regnum += 4096;
  5510. } // else if
  5511. else if (info->regtype == REG_TYPE_CONST4)
  5512. {
  5513. info->regtype = REG_TYPE_CONST;
  5514. info->regnum += 6144;
  5515. } // else if
  5516. info->swizzle = adjust_swizzle(ctx, info->regtype, info->regnum, swizzle);
  5517. info->swizzle_x = ((info->swizzle >> 0) & 0x3);
  5518. info->swizzle_y = ((info->swizzle >> 2) & 0x3);
  5519. info->swizzle_z = ((info->swizzle >> 4) & 0x3);
  5520. info->swizzle_w = ((info->swizzle >> 6) & 0x3);
  5521. // swallow token for now, for multiple calls in a row.
  5522. adjust_token_position(ctx, 1);
  5523. if (reserved1 != 0x0)
  5524. fail(ctx, "Reserved bits #1 in source token must be zero");
  5525. if (reserved2 != 0x1)
  5526. fail(ctx, "Reserved bit #2 in source token must be one");
  5527. if ((info->relative) && (ctx->tokencount == 0))
  5528. {
  5529. fail(ctx, "Out of tokens in relative source parameter");
  5530. info->relative = 0; // don't try to process it.
  5531. } // if
  5532. if (info->relative)
  5533. {
  5534. if ( (shader_is_pixel(ctx)) && (!shader_version_atleast(ctx, 3, 0)) )
  5535. fail(ctx, "Relative addressing in pixel shader version < 3.0");
  5536. const uint32 reltoken = SWAP32(*(ctx->tokens));
  5537. // swallow token for now, for multiple calls in a row.
  5538. adjust_token_position(ctx, 1);
  5539. const int relswiz = (int) ((reltoken >> 16) & 0xFF);
  5540. info->relative_regnum = (int) (reltoken & 0x7ff);
  5541. info->relative_regtype = (RegisterType)
  5542. (((reltoken >> 28) & 0x7) |
  5543. ((reltoken >> 8) & 0x18));
  5544. if (((reltoken >> 31) & 0x1) == 0)
  5545. fail(ctx, "bit #31 in relative address must be set");
  5546. if ((reltoken & 0xF00E000) != 0) // usused bits.
  5547. fail(ctx, "relative address reserved bit must be zero");
  5548. switch (info->relative_regtype)
  5549. {
  5550. case REG_TYPE_LOOP:
  5551. case REG_TYPE_ADDRESS:
  5552. break;
  5553. default:
  5554. fail(ctx, "invalid register for relative address");
  5555. break;
  5556. } // switch
  5557. if (info->relative_regnum != 0) // true for now.
  5558. fail(ctx, "invalid register for relative address");
  5559. if (!replicate_swizzle(relswiz))
  5560. fail(ctx, "relative address needs replicate swizzle");
  5561. info->relative_component = (relswiz & 0x3);
  5562. if (info->regtype == REG_TYPE_INPUT)
  5563. {
  5564. if ( (shader_is_pixel(ctx)) || (!shader_version_atleast(ctx, 3, 0)) )
  5565. fail(ctx, "relative addressing of input registers not supported in this shader model");
  5566. ctx->have_relative_input_registers = 1;
  5567. } // if
  5568. else if (info->regtype == REG_TYPE_CONST)
  5569. {
  5570. // figure out what array we're in...
  5571. if (!ctx->ignores_ctab)
  5572. {
  5573. if (!ctx->ctab.have_ctab) // hard to do efficiently without!
  5574. fail(ctx, "relative addressing unsupported without a CTAB");
  5575. else
  5576. {
  5577. determine_constants_arrays(ctx);
  5578. VariableList *var;
  5579. const int reltarget = info->regnum;
  5580. for (var = ctx->variables; var != NULL; var = var->next)
  5581. {
  5582. const int lo = var->index;
  5583. if ( (reltarget >= lo) && (reltarget < (lo + var->count)) )
  5584. break; // match!
  5585. } // for
  5586. if (var == NULL)
  5587. fail(ctx, "relative addressing of indeterminate array");
  5588. else
  5589. {
  5590. var->used = 1;
  5591. info->relative_array = var;
  5592. set_used_register(ctx, info->relative_regtype, info->relative_regnum, 0);
  5593. } // else
  5594. } // else
  5595. } // if
  5596. } // else if
  5597. else
  5598. {
  5599. fail(ctx, "relative addressing of invalid register");
  5600. } // else
  5601. retval++;
  5602. } // if
  5603. switch (info->src_mod)
  5604. {
  5605. case SRCMOD_NONE:
  5606. case SRCMOD_ABSNEGATE:
  5607. case SRCMOD_ABS:
  5608. case SRCMOD_NEGATE:
  5609. break; // okay in any shader model.
  5610. // apparently these are only legal in Shader Model 1.x ...
  5611. case SRCMOD_BIASNEGATE:
  5612. case SRCMOD_BIAS:
  5613. case SRCMOD_SIGNNEGATE:
  5614. case SRCMOD_SIGN:
  5615. case SRCMOD_COMPLEMENT:
  5616. case SRCMOD_X2NEGATE:
  5617. case SRCMOD_X2:
  5618. case SRCMOD_DZ:
  5619. case SRCMOD_DW:
  5620. if (shader_version_atleast(ctx, 2, 0))
  5621. fail(ctx, "illegal source mod for this Shader Model.");
  5622. break;
  5623. case SRCMOD_NOT: // !!! FIXME: I _think_ this is right...
  5624. if (shader_version_atleast(ctx, 2, 0))
  5625. {
  5626. if (info->regtype != REG_TYPE_PREDICATE)
  5627. fail(ctx, "NOT only allowed on predicate register.");
  5628. } // if
  5629. break;
  5630. default:
  5631. fail(ctx, "Unknown source modifier");
  5632. } // switch
  5633. // !!! FIXME: docs say this for sm3 ... check these!
  5634. // "The negate modifier cannot be used on second source register of these
  5635. // instructions: m3x2 - ps, m3x3 - ps, m3x4 - ps, m4x3 - ps, and
  5636. // m4x4 - ps."
  5637. // "If any version 3 shader reads from one or more constant float
  5638. // registers (c#), one of the following must be true.
  5639. // All of the constant floating-point registers must use the abs modifier.
  5640. // None of the constant floating-point registers can use the abs modifier.
  5641. if (!isfail(ctx))
  5642. {
  5643. RegisterList *reg;
  5644. reg = set_used_register(ctx, info->regtype, info->regnum, 0);
  5645. // !!! FIXME: this test passes if you write to the register
  5646. // !!! FIXME: in this same instruction, because we parse the
  5647. // !!! FIXME: destination token first.
  5648. // !!! FIXME: Microsoft's shader validation explicitly checks temp
  5649. // !!! FIXME: registers for this...do they check other writable ones?
  5650. if ((info->regtype == REG_TYPE_TEMP) && (reg) && (!reg->written))
  5651. failf(ctx, "Temp register r%d used uninitialized", info->regnum);
  5652. } // if
  5653. return retval;
  5654. } // parse_source_token
  5655. static int parse_predicated_token(Context *ctx)
  5656. {
  5657. SourceArgInfo *arg = &ctx->predicate_arg;
  5658. parse_source_token(ctx, arg);
  5659. if (arg->regtype != REG_TYPE_PREDICATE)
  5660. fail(ctx, "Predicated instruction but not predicate register!");
  5661. if ((arg->src_mod != SRCMOD_NONE) && (arg->src_mod != SRCMOD_NOT))
  5662. fail(ctx, "Predicated instruction register is not NONE or NOT");
  5663. if ( !no_swizzle(arg->swizzle) && !replicate_swizzle(arg->swizzle) )
  5664. fail(ctx, "Predicated instruction register has wrong swizzle");
  5665. if (arg->relative) // I'm pretty sure this is illegal...?
  5666. fail(ctx, "relative addressing in predicated token");
  5667. return 1;
  5668. } // parse_predicated_token
  5669. static int parse_args_NULL(Context *ctx)
  5670. {
  5671. return 1;
  5672. } // parse_args_NULL
  5673. static int parse_args_DEF(Context *ctx)
  5674. {
  5675. parse_destination_token(ctx, &ctx->dest_arg);
  5676. if (ctx->dest_arg.regtype != REG_TYPE_CONST)
  5677. fail(ctx, "DEF using non-CONST register");
  5678. if (ctx->dest_arg.relative) // I'm pretty sure this is illegal...?
  5679. fail(ctx, "relative addressing in DEF");
  5680. ctx->dwords[0] = SWAP32(ctx->tokens[0]);
  5681. ctx->dwords[1] = SWAP32(ctx->tokens[1]);
  5682. ctx->dwords[2] = SWAP32(ctx->tokens[2]);
  5683. ctx->dwords[3] = SWAP32(ctx->tokens[3]);
  5684. return 6;
  5685. } // parse_args_DEF
  5686. static int parse_args_DEFI(Context *ctx)
  5687. {
  5688. parse_destination_token(ctx, &ctx->dest_arg);
  5689. if (ctx->dest_arg.regtype != REG_TYPE_CONSTINT)
  5690. fail(ctx, "DEFI using non-CONSTING register");
  5691. if (ctx->dest_arg.relative) // I'm pretty sure this is illegal...?
  5692. fail(ctx, "relative addressing in DEFI");
  5693. ctx->dwords[0] = SWAP32(ctx->tokens[0]);
  5694. ctx->dwords[1] = SWAP32(ctx->tokens[1]);
  5695. ctx->dwords[2] = SWAP32(ctx->tokens[2]);
  5696. ctx->dwords[3] = SWAP32(ctx->tokens[3]);
  5697. return 6;
  5698. } // parse_args_DEFI
  5699. static int parse_args_DEFB(Context *ctx)
  5700. {
  5701. parse_destination_token(ctx, &ctx->dest_arg);
  5702. if (ctx->dest_arg.regtype != REG_TYPE_CONSTBOOL)
  5703. fail(ctx, "DEFB using non-CONSTBOOL register");
  5704. if (ctx->dest_arg.relative) // I'm pretty sure this is illegal...?
  5705. fail(ctx, "relative addressing in DEFB");
  5706. ctx->dwords[0] = *(ctx->tokens) ? 1 : 0;
  5707. return 3;
  5708. } // parse_args_DEFB
  5709. static int valid_texture_type(const uint32 ttype)
  5710. {
  5711. switch ((const TextureType) ttype)
  5712. {
  5713. case TEXTURE_TYPE_2D:
  5714. case TEXTURE_TYPE_CUBE:
  5715. case TEXTURE_TYPE_VOLUME:
  5716. return 1; // it's okay.
  5717. } // switch
  5718. return 0;
  5719. } // valid_texture_type
  5720. // !!! FIXME: this function is kind of a mess.
  5721. static int parse_args_DCL(Context *ctx)
  5722. {
  5723. int unsupported = 0;
  5724. const uint32 token = SWAP32(*(ctx->tokens));
  5725. const int reserved1 = (int) ((token >> 31) & 0x1); // bit 31
  5726. uint32 reserved_mask = 0x00000000;
  5727. if (reserved1 != 0x1)
  5728. fail(ctx, "Bit #31 in DCL token must be one");
  5729. ctx->centroid_allowed = 1;
  5730. adjust_token_position(ctx, 1);
  5731. parse_destination_token(ctx, &ctx->dest_arg);
  5732. ctx->centroid_allowed = 0;
  5733. if (ctx->dest_arg.result_shift != 0) // I'm pretty sure this is illegal...?
  5734. fail(ctx, "shift scale in DCL");
  5735. if (ctx->dest_arg.relative) // I'm pretty sure this is illegal...?
  5736. fail(ctx, "relative addressing in DCL");
  5737. const RegisterType regtype = ctx->dest_arg.regtype;
  5738. const int regnum = ctx->dest_arg.regnum;
  5739. if ( (shader_is_pixel(ctx)) && (shader_version_atleast(ctx, 3, 0)) )
  5740. {
  5741. if (regtype == REG_TYPE_INPUT)
  5742. {
  5743. const uint32 usage = (token & 0xF);
  5744. const uint32 index = ((token >> 16) & 0xF);
  5745. reserved_mask = 0x7FF0FFE0;
  5746. ctx->dwords[0] = usage;
  5747. ctx->dwords[1] = index;
  5748. } // if
  5749. else if (regtype == REG_TYPE_MISCTYPE)
  5750. {
  5751. const MiscTypeType mt = (MiscTypeType) regnum;
  5752. if (mt == MISCTYPE_TYPE_POSITION)
  5753. reserved_mask = 0x7FFFFFFF;
  5754. else if (mt == MISCTYPE_TYPE_FACE)
  5755. {
  5756. reserved_mask = 0x7FFFFFFF;
  5757. if (!writemask_xyzw(ctx->dest_arg.orig_writemask))
  5758. fail(ctx, "DCL face writemask must be full");
  5759. if (ctx->dest_arg.result_mod != 0)
  5760. fail(ctx, "DCL face result modifier must be zero");
  5761. if (ctx->dest_arg.result_shift != 0)
  5762. fail(ctx, "DCL face shift scale must be zero");
  5763. } // else if
  5764. else
  5765. {
  5766. unsupported = 1;
  5767. } // else
  5768. ctx->dwords[0] = (uint32) MOJOSHADER_USAGE_UNKNOWN;
  5769. ctx->dwords[1] = 0;
  5770. } // else if
  5771. else if (regtype == REG_TYPE_TEXTURE)
  5772. {
  5773. const uint32 usage = (token & 0xF);
  5774. const uint32 index = ((token >> 16) & 0xF);
  5775. if (usage == MOJOSHADER_USAGE_TEXCOORD)
  5776. {
  5777. if (index > 7)
  5778. fail(ctx, "DCL texcoord usage must have 0-7 index");
  5779. } // if
  5780. else if (usage == MOJOSHADER_USAGE_COLOR)
  5781. {
  5782. if (index != 0)
  5783. fail(ctx, "DCL color usage must have 0 index");
  5784. } // else if
  5785. else
  5786. {
  5787. fail(ctx, "Invalid DCL texture usage");
  5788. } // else
  5789. reserved_mask = 0x7FF0FFE0;
  5790. ctx->dwords[0] = usage;
  5791. ctx->dwords[1] = index;
  5792. } // else if
  5793. else if (regtype == REG_TYPE_SAMPLER)
  5794. {
  5795. const uint32 ttype = ((token >> 27) & 0xF);
  5796. if (!valid_texture_type(ttype))
  5797. fail(ctx, "unknown sampler texture type");
  5798. reserved_mask = 0x7FFFFFF;
  5799. ctx->dwords[0] = ttype;
  5800. } // else if
  5801. else
  5802. {
  5803. unsupported = 1;
  5804. } // else
  5805. } // if
  5806. else if ( (shader_is_pixel(ctx)) && (shader_version_atleast(ctx, 2, 0)) )
  5807. {
  5808. if (regtype == REG_TYPE_INPUT)
  5809. {
  5810. ctx->dwords[0] = (uint32) MOJOSHADER_USAGE_COLOR;
  5811. ctx->dwords[1] = regnum;
  5812. reserved_mask = 0x7FFFFFFF;
  5813. } // if
  5814. else if (regtype == REG_TYPE_TEXTURE)
  5815. {
  5816. ctx->dwords[0] = (uint32) MOJOSHADER_USAGE_TEXCOORD;
  5817. ctx->dwords[1] = regnum;
  5818. reserved_mask = 0x7FFFFFFF;
  5819. } // else if
  5820. else if (regtype == REG_TYPE_SAMPLER)
  5821. {
  5822. const uint32 ttype = ((token >> 27) & 0xF);
  5823. if (!valid_texture_type(ttype))
  5824. fail(ctx, "unknown sampler texture type");
  5825. reserved_mask = 0x7FFFFFF;
  5826. ctx->dwords[0] = ttype;
  5827. } // else if
  5828. else
  5829. {
  5830. unsupported = 1;
  5831. } // else
  5832. } // if
  5833. else if ( (shader_is_vertex(ctx)) && (shader_version_atleast(ctx, 3, 0)) )
  5834. {
  5835. if ((regtype == REG_TYPE_INPUT) || (regtype == REG_TYPE_OUTPUT))
  5836. {
  5837. const uint32 usage = (token & 0xF);
  5838. const uint32 index = ((token >> 16) & 0xF);
  5839. reserved_mask = 0x7FF0FFE0;
  5840. ctx->dwords[0] = usage;
  5841. ctx->dwords[1] = index;
  5842. } // if
  5843. else
  5844. {
  5845. unsupported = 1;
  5846. } // else
  5847. } // else if
  5848. else if ( (shader_is_vertex(ctx)) && (shader_version_atleast(ctx, 1, 1)) )
  5849. {
  5850. if (regtype == REG_TYPE_INPUT)
  5851. {
  5852. const uint32 usage = (token & 0xF);
  5853. const uint32 index = ((token >> 16) & 0xF);
  5854. reserved_mask = 0x7FF0FFE0;
  5855. ctx->dwords[0] = usage;
  5856. ctx->dwords[1] = index;
  5857. } // if
  5858. else
  5859. {
  5860. unsupported = 1;
  5861. } // else
  5862. } // else if
  5863. else
  5864. {
  5865. unsupported = 1;
  5866. } // else
  5867. if (unsupported)
  5868. fail(ctx, "invalid DCL register type for this shader model");
  5869. if ((token & reserved_mask) != 0)
  5870. fail(ctx, "reserved bits in DCL dword aren't zero");
  5871. return 3;
  5872. } // parse_args_DCL
  5873. static int parse_args_D(Context *ctx)
  5874. {
  5875. int retval = 1;
  5876. retval += parse_destination_token(ctx, &ctx->dest_arg);
  5877. return retval;
  5878. } // parse_args_D
  5879. static int parse_args_S(Context *ctx)
  5880. {
  5881. int retval = 1;
  5882. retval += parse_source_token(ctx, &ctx->source_args[0]);
  5883. return retval;
  5884. } // parse_args_S
  5885. static int parse_args_SS(Context *ctx)
  5886. {
  5887. int retval = 1;
  5888. retval += parse_source_token(ctx, &ctx->source_args[0]);
  5889. retval += parse_source_token(ctx, &ctx->source_args[1]);
  5890. return retval;
  5891. } // parse_args_SS
  5892. static int parse_args_DS(Context *ctx)
  5893. {
  5894. int retval = 1;
  5895. retval += parse_destination_token(ctx, &ctx->dest_arg);
  5896. retval += parse_source_token(ctx, &ctx->source_args[0]);
  5897. return retval;
  5898. } // parse_args_DS
  5899. static int parse_args_DSS(Context *ctx)
  5900. {
  5901. int retval = 1;
  5902. retval += parse_destination_token(ctx, &ctx->dest_arg);
  5903. retval += parse_source_token(ctx, &ctx->source_args[0]);
  5904. retval += parse_source_token(ctx, &ctx->source_args[1]);
  5905. return retval;
  5906. } // parse_args_DSS
  5907. static int parse_args_DSSS(Context *ctx)
  5908. {
  5909. int retval = 1;
  5910. retval += parse_destination_token(ctx, &ctx->dest_arg);
  5911. retval += parse_source_token(ctx, &ctx->source_args[0]);
  5912. retval += parse_source_token(ctx, &ctx->source_args[1]);
  5913. retval += parse_source_token(ctx, &ctx->source_args[2]);
  5914. return retval;
  5915. } // parse_args_DSSS
  5916. static int parse_args_DSSSS(Context *ctx)
  5917. {
  5918. int retval = 1;
  5919. retval += parse_destination_token(ctx, &ctx->dest_arg);
  5920. retval += parse_source_token(ctx, &ctx->source_args[0]);
  5921. retval += parse_source_token(ctx, &ctx->source_args[1]);
  5922. retval += parse_source_token(ctx, &ctx->source_args[2]);
  5923. retval += parse_source_token(ctx, &ctx->source_args[3]);
  5924. return retval;
  5925. } // parse_args_DSSSS
  5926. static int parse_args_SINCOS(Context *ctx)
  5927. {
  5928. // this opcode needs extra registers for sm2 and lower.
  5929. if (!shader_version_atleast(ctx, 3, 0))
  5930. return parse_args_DSSS(ctx);
  5931. return parse_args_DS(ctx);
  5932. } // parse_args_SINCOS
  5933. static int parse_args_TEXCRD(Context *ctx)
  5934. {
  5935. // added extra register in ps_1_4.
  5936. if (shader_version_atleast(ctx, 1, 4))
  5937. return parse_args_DS(ctx);
  5938. return parse_args_D(ctx);
  5939. } // parse_args_TEXCRD
  5940. static int parse_args_TEXLD(Context *ctx)
  5941. {
  5942. // different registers in px_1_3, ps_1_4, and ps_2_0!
  5943. if (shader_version_atleast(ctx, 2, 0))
  5944. return parse_args_DSS(ctx);
  5945. else if (shader_version_atleast(ctx, 1, 4))
  5946. return parse_args_DS(ctx);
  5947. return parse_args_D(ctx);
  5948. } // parse_args_TEXLD
  5949. // State machine functions...
  5950. static ConstantsList *alloc_constant_listitem(Context *ctx)
  5951. {
  5952. ConstantsList *item = (ConstantsList *) Malloc(ctx, sizeof (ConstantsList));
  5953. if (item == NULL)
  5954. return NULL;
  5955. memset(&item->constant, '\0', sizeof (MOJOSHADER_constant));
  5956. item->next = ctx->constants;
  5957. ctx->constants = item;
  5958. ctx->constant_count++;
  5959. return item;
  5960. } // alloc_constant_listitem
  5961. static void state_DEF(Context *ctx)
  5962. {
  5963. const RegisterType regtype = ctx->dest_arg.regtype;
  5964. const int regnum = ctx->dest_arg.regnum;
  5965. // !!! FIXME: fail if same register is defined twice.
  5966. if (ctx->instruction_count != 0)
  5967. fail(ctx, "DEF token must come before any instructions");
  5968. else if (regtype != REG_TYPE_CONST)
  5969. fail(ctx, "DEF token using invalid register");
  5970. else
  5971. {
  5972. ConstantsList *item = alloc_constant_listitem(ctx);
  5973. if (item != NULL)
  5974. {
  5975. item->constant.index = regnum;
  5976. item->constant.type = MOJOSHADER_UNIFORM_FLOAT;
  5977. memcpy(item->constant.value.f, ctx->dwords,
  5978. sizeof (item->constant.value.f));
  5979. set_defined_register(ctx, regtype, regnum);
  5980. } // if
  5981. } // else
  5982. } // state_DEF
  5983. static void state_DEFI(Context *ctx)
  5984. {
  5985. const RegisterType regtype = ctx->dest_arg.regtype;
  5986. const int regnum = ctx->dest_arg.regnum;
  5987. // !!! FIXME: fail if same register is defined twice.
  5988. if (ctx->instruction_count != 0)
  5989. fail(ctx, "DEFI token must come before any instructions");
  5990. else if (regtype != REG_TYPE_CONSTINT)
  5991. fail(ctx, "DEFI token using invalid register");
  5992. else
  5993. {
  5994. ConstantsList *item = alloc_constant_listitem(ctx);
  5995. if (item != NULL)
  5996. {
  5997. item->constant.index = regnum;
  5998. item->constant.type = MOJOSHADER_UNIFORM_INT;
  5999. memcpy(item->constant.value.i, ctx->dwords,
  6000. sizeof (item->constant.value.i));
  6001. set_defined_register(ctx, regtype, regnum);
  6002. } // if
  6003. } // else
  6004. } // state_DEFI
  6005. static void state_DEFB(Context *ctx)
  6006. {
  6007. const RegisterType regtype = ctx->dest_arg.regtype;
  6008. const int regnum = ctx->dest_arg.regnum;
  6009. // !!! FIXME: fail if same register is defined twice.
  6010. if (ctx->instruction_count != 0)
  6011. fail(ctx, "DEFB token must come before any instructions");
  6012. else if (regtype != REG_TYPE_CONSTBOOL)
  6013. fail(ctx, "DEFB token using invalid register");
  6014. else
  6015. {
  6016. ConstantsList *item = alloc_constant_listitem(ctx);
  6017. if (item != NULL)
  6018. {
  6019. item->constant.index = regnum;
  6020. item->constant.type = MOJOSHADER_UNIFORM_BOOL;
  6021. item->constant.value.b = ctx->dwords[0] ? 1 : 0;
  6022. set_defined_register(ctx, regtype, regnum);
  6023. } // if
  6024. } // else
  6025. } // state_DEFB
  6026. static void state_DCL(Context *ctx)
  6027. {
  6028. const DestArgInfo *arg = &ctx->dest_arg;
  6029. const RegisterType regtype = arg->regtype;
  6030. const int regnum = arg->regnum;
  6031. const int wmask = arg->writemask;
  6032. const int mods = arg->result_mod;
  6033. // parse_args_DCL() does a lot of state checking before we get here.
  6034. // !!! FIXME: apparently vs_3_0 can use sampler registers now.
  6035. // !!! FIXME: (but only s0 through s3, not all 16 of them.)
  6036. if (ctx->instruction_count != 0)
  6037. fail(ctx, "DCL token must come before any instructions");
  6038. else if (shader_is_vertex(ctx))
  6039. {
  6040. const MOJOSHADER_usage usage = (const MOJOSHADER_usage) ctx->dwords[0];
  6041. const int index = ctx->dwords[1];
  6042. if (usage >= MOJOSHADER_USAGE_TOTAL)
  6043. {
  6044. fail(ctx, "unknown DCL usage");
  6045. return;
  6046. } // if
  6047. add_attribute_register(ctx, regtype, regnum, usage, index, wmask, mods);
  6048. } // if
  6049. else if (shader_is_pixel(ctx))
  6050. {
  6051. if (regtype == REG_TYPE_SAMPLER)
  6052. add_sampler(ctx, regnum, (TextureType) ctx->dwords[0], 0);
  6053. else
  6054. {
  6055. const MOJOSHADER_usage usage = (MOJOSHADER_usage) ctx->dwords[0];
  6056. const int index = ctx->dwords[1];
  6057. add_attribute_register(ctx, regtype, regnum, usage, index, wmask, mods);
  6058. } // else
  6059. } // else if
  6060. else
  6061. {
  6062. fail(ctx, "unsupported shader type."); // should be caught elsewhere.
  6063. return;
  6064. } // else
  6065. set_defined_register(ctx, regtype, regnum);
  6066. } // state_DCL
  6067. static void state_TEXCRD(Context *ctx)
  6068. {
  6069. if (shader_version_atleast(ctx, 2, 0))
  6070. fail(ctx, "TEXCRD in Shader Model >= 2.0"); // apparently removed.
  6071. } // state_TEXCRD
  6072. static void state_FRC(Context *ctx)
  6073. {
  6074. const DestArgInfo *dst = &ctx->dest_arg;
  6075. if (dst->result_mod & MOD_SATURATE) // according to msdn...
  6076. fail(ctx, "FRC destination can't use saturate modifier");
  6077. else if (!shader_version_atleast(ctx, 2, 0))
  6078. {
  6079. if (!writemask_y(dst->writemask) && !writemask_xy(dst->writemask))
  6080. fail(ctx, "FRC writemask must be .y or .xy for shader model 1.x");
  6081. } // else if
  6082. } // state_FRC
  6083. // replicate the matrix registers to source args. The D3D profile will
  6084. // only use the one legitimate argument, but this saves other profiles
  6085. // from having to build this.
  6086. static void srcarg_matrix_replicate(Context *ctx, const int idx,
  6087. const int rows)
  6088. {
  6089. int i;
  6090. SourceArgInfo *src = &ctx->source_args[idx];
  6091. SourceArgInfo *dst = &ctx->source_args[idx+1];
  6092. for (i = 0; i < (rows-1); i++, dst++)
  6093. {
  6094. memcpy(dst, src, sizeof (SourceArgInfo));
  6095. dst->regnum += (i + 1);
  6096. set_used_register(ctx, dst->regtype, dst->regnum, 0);
  6097. } // for
  6098. } // srcarg_matrix_replicate
  6099. static void state_M4X4(Context *ctx)
  6100. {
  6101. const DestArgInfo *info = &ctx->dest_arg;
  6102. if (!writemask_xyzw(info->writemask))
  6103. fail(ctx, "M4X4 writemask must be full");
  6104. // !!! FIXME: MSDN:
  6105. //The xyzw (default) mask is required for the destination register. Negate and swizzle modifiers are allowed for src0, but not for src1.
  6106. //Swizzle and negate modifiers are invalid for the src0 register. The dest and src0 registers cannot be the same.
  6107. srcarg_matrix_replicate(ctx, 1, 4);
  6108. } // state_M4X4
  6109. static void state_M4X3(Context *ctx)
  6110. {
  6111. const DestArgInfo *info = &ctx->dest_arg;
  6112. if (!writemask_xyz(info->writemask))
  6113. fail(ctx, "M4X3 writemask must be .xyz");
  6114. // !!! FIXME: MSDN stuff
  6115. srcarg_matrix_replicate(ctx, 1, 3);
  6116. } // state_M4X3
  6117. static void state_M3X4(Context *ctx)
  6118. {
  6119. const DestArgInfo *info = &ctx->dest_arg;
  6120. if (!writemask_xyzw(info->writemask))
  6121. fail(ctx, "M3X4 writemask must be .xyzw");
  6122. // !!! FIXME: MSDN stuff
  6123. srcarg_matrix_replicate(ctx, 1, 4);
  6124. } // state_M3X4
  6125. static void state_M3X3(Context *ctx)
  6126. {
  6127. const DestArgInfo *info = &ctx->dest_arg;
  6128. if (!writemask_xyz(info->writemask))
  6129. fail(ctx, "M3X3 writemask must be .xyz");
  6130. // !!! FIXME: MSDN stuff
  6131. srcarg_matrix_replicate(ctx, 1, 3);
  6132. } // state_M3X3
  6133. static void state_M3X2(Context *ctx)
  6134. {
  6135. const DestArgInfo *info = &ctx->dest_arg;
  6136. if (!writemask_xy(info->writemask))
  6137. fail(ctx, "M3X2 writemask must be .xy");
  6138. // !!! FIXME: MSDN stuff
  6139. srcarg_matrix_replicate(ctx, 1, 2);
  6140. } // state_M3X2
  6141. static void state_RET(Context *ctx)
  6142. {
  6143. // MSDN all but says that assembly shaders are more or less serialized
  6144. // HLSL functions, and a RET means you're at the end of one, unlike how
  6145. // most CPUs would behave. This is actually really helpful,
  6146. // since we can use high-level constructs and not a mess of GOTOs,
  6147. // which is a godsend for GLSL...this also means we can consider things
  6148. // like a LOOP without a matching ENDLOOP within a label's section as
  6149. // an error.
  6150. if (ctx->loops > 0)
  6151. fail(ctx, "LOOP without ENDLOOP");
  6152. if (ctx->reps > 0)
  6153. fail(ctx, "REP without ENDREP");
  6154. } // state_RET
  6155. static void check_label_register(Context *ctx, int arg, const char *opcode)
  6156. {
  6157. const SourceArgInfo *info = &ctx->source_args[arg];
  6158. const RegisterType regtype = info->regtype;
  6159. const int regnum = info->regnum;
  6160. if (regtype != REG_TYPE_LABEL)
  6161. failf(ctx, "%s with a non-label register specified", opcode);
  6162. if (!shader_version_atleast(ctx, 2, 0))
  6163. failf(ctx, "%s not supported in Shader Model 1", opcode);
  6164. if ((shader_version_atleast(ctx, 2, 255)) && (regnum > 2047))
  6165. fail(ctx, "label register number must be <= 2047");
  6166. if (regnum > 15)
  6167. fail(ctx, "label register number must be <= 15");
  6168. } // check_label_register
  6169. static void state_LABEL(Context *ctx)
  6170. {
  6171. if (ctx->previous_opcode != OPCODE_RET)
  6172. fail(ctx, "LABEL not followed by a RET");
  6173. check_label_register(ctx, 0, "LABEL");
  6174. set_defined_register(ctx, REG_TYPE_LABEL, ctx->source_args[0].regnum);
  6175. } // state_LABEL
  6176. static void check_call_loop_wrappage(Context *ctx, const int regnum)
  6177. {
  6178. // msdn says subroutines inherit aL register if you're in a loop when
  6179. // you call, and further more _if you ever call this function in a loop,
  6180. // it must always be called in a loop_. So we'll just pass our loop
  6181. // variable as a function parameter in those cases.
  6182. const int current_usage = (ctx->loops > 0) ? 1 : -1;
  6183. RegisterList *reg = reglist_find(&ctx->used_registers, REG_TYPE_LABEL, regnum);
  6184. assert(reg != NULL);
  6185. if (reg->misc == 0)
  6186. reg->misc = current_usage;
  6187. else if (reg->misc != current_usage)
  6188. {
  6189. if (current_usage == 1)
  6190. fail(ctx, "CALL to this label must be wrapped in LOOP/ENDLOOP");
  6191. else
  6192. fail(ctx, "CALL to this label must not be wrapped in LOOP/ENDLOOP");
  6193. } // else if
  6194. } // check_call_loop_wrappage
  6195. static void state_CALL(Context *ctx)
  6196. {
  6197. check_label_register(ctx, 0, "CALL");
  6198. check_call_loop_wrappage(ctx, ctx->source_args[0].regnum);
  6199. } // state_CALL
  6200. static void state_CALLNZ(Context *ctx)
  6201. {
  6202. const RegisterType regtype = ctx->source_args[1].regtype;
  6203. if ((regtype != REG_TYPE_CONSTBOOL) && (regtype != REG_TYPE_PREDICATE))
  6204. fail(ctx, "CALLNZ argument isn't constbool or predicate register");
  6205. check_label_register(ctx, 0, "CALLNZ");
  6206. check_call_loop_wrappage(ctx, ctx->source_args[0].regnum);
  6207. } // state_CALLNZ
  6208. static void state_MOVA(Context *ctx)
  6209. {
  6210. if (ctx->dest_arg.regtype != REG_TYPE_ADDRESS)
  6211. fail(ctx, "MOVA argument isn't address register");
  6212. } // state_MOVA
  6213. static void state_RCP(Context *ctx)
  6214. {
  6215. if (!replicate_swizzle(ctx->source_args[0].swizzle))
  6216. fail(ctx, "RCP without replicate swizzzle");
  6217. } // state_RCP
  6218. static void state_LOOP(Context *ctx)
  6219. {
  6220. if (ctx->source_args[0].regtype != REG_TYPE_LOOP)
  6221. fail(ctx, "LOOP argument isn't loop register");
  6222. else if (ctx->source_args[1].regtype != REG_TYPE_CONSTINT)
  6223. fail(ctx, "LOOP argument isn't constint register");
  6224. else
  6225. ctx->loops++;
  6226. } // state_LOOP
  6227. static void state_ENDLOOP(Context *ctx)
  6228. {
  6229. // !!! FIXME: check that we aren't straddling an IF block.
  6230. if (ctx->loops <= 0)
  6231. fail(ctx, "ENDLOOP without LOOP");
  6232. ctx->loops--;
  6233. } // state_ENDLOOP
  6234. static void state_BREAKP(Context *ctx)
  6235. {
  6236. const RegisterType regtype = ctx->source_args[0].regtype;
  6237. if (regtype != REG_TYPE_PREDICATE)
  6238. fail(ctx, "BREAKP argument isn't predicate register");
  6239. else if (!replicate_swizzle(ctx->source_args[0].swizzle))
  6240. fail(ctx, "BREAKP without replicate swizzzle");
  6241. else if ((ctx->loops == 0) && (ctx->reps == 0))
  6242. fail(ctx, "BREAKP outside LOOP/ENDLOOP or REP/ENDREP");
  6243. } // state_BREAKP
  6244. static void state_BREAK(Context *ctx)
  6245. {
  6246. if ((ctx->loops == 0) && (ctx->reps == 0))
  6247. fail(ctx, "BREAK outside LOOP/ENDLOOP or REP/ENDREP");
  6248. } // state_BREAK
  6249. static void state_SETP(Context *ctx)
  6250. {
  6251. const RegisterType regtype = ctx->dest_arg.regtype;
  6252. if (regtype != REG_TYPE_PREDICATE)
  6253. fail(ctx, "SETP argument isn't predicate register");
  6254. } // state_SETP
  6255. static void state_REP(Context *ctx)
  6256. {
  6257. const RegisterType regtype = ctx->source_args[0].regtype;
  6258. if (regtype != REG_TYPE_CONSTINT)
  6259. fail(ctx, "REP argument isn't constint register");
  6260. ctx->reps++;
  6261. if (ctx->reps > ctx->max_reps)
  6262. ctx->max_reps = ctx->reps;
  6263. } // state_REP
  6264. static void state_ENDREP(Context *ctx)
  6265. {
  6266. // !!! FIXME: check that we aren't straddling an IF block.
  6267. if (ctx->reps <= 0)
  6268. fail(ctx, "ENDREP without REP");
  6269. ctx->reps--;
  6270. } // state_ENDREP
  6271. static void state_CMP(Context *ctx)
  6272. {
  6273. ctx->cmps++;
  6274. // extra limitations for ps <= 1.4 ...
  6275. if (!shader_version_atleast(ctx, 1, 4))
  6276. {
  6277. int i;
  6278. const DestArgInfo *dst = &ctx->dest_arg;
  6279. const RegisterType dregtype = dst->regtype;
  6280. const int dregnum = dst->regnum;
  6281. if (ctx->cmps > 3)
  6282. fail(ctx, "only 3 CMP instructions allowed in this shader model");
  6283. for (i = 0; i < 3; i++)
  6284. {
  6285. const SourceArgInfo *src = &ctx->source_args[i];
  6286. const RegisterType sregtype = src->regtype;
  6287. const int sregnum = src->regnum;
  6288. if ((dregtype == sregtype) && (dregnum == sregnum))
  6289. fail(ctx, "CMP dest can't match sources in this shader model");
  6290. } // for
  6291. ctx->instruction_count++; // takes an extra slot in ps_1_2 and _3.
  6292. } // if
  6293. } // state_CMP
  6294. static void state_DP4(Context *ctx)
  6295. {
  6296. // extra limitations for ps <= 1.4 ...
  6297. if (!shader_version_atleast(ctx, 1, 4))
  6298. ctx->instruction_count++; // takes an extra slot in ps_1_2 and _3.
  6299. } // state_DP4
  6300. static void state_CND(Context *ctx)
  6301. {
  6302. // apparently it was removed...it's not in the docs past ps_1_4 ...
  6303. if (shader_version_atleast(ctx, 2, 0))
  6304. fail(ctx, "CND not allowed in this shader model");
  6305. // extra limitations for ps <= 1.4 ...
  6306. else if (!shader_version_atleast(ctx, 1, 4))
  6307. {
  6308. const SourceArgInfo *src = &ctx->source_args[0];
  6309. if ((src->regtype != REG_TYPE_TEMP) || (src->regnum != 0) ||
  6310. (src->swizzle != 0xFF))
  6311. {
  6312. fail(ctx, "CND src must be r0.a in this shader model");
  6313. } // if
  6314. } // if
  6315. } // state_CND
  6316. static void state_POW(Context *ctx)
  6317. {
  6318. if (!replicate_swizzle(ctx->source_args[0].swizzle))
  6319. fail(ctx, "POW src0 must have replicate swizzle");
  6320. else if (!replicate_swizzle(ctx->source_args[1].swizzle))
  6321. fail(ctx, "POW src1 must have replicate swizzle");
  6322. } // state_POW
  6323. static void state_LOG(Context *ctx)
  6324. {
  6325. if (!replicate_swizzle(ctx->source_args[0].swizzle))
  6326. fail(ctx, "LOG src0 must have replicate swizzle");
  6327. } // state_LOG
  6328. static void state_LOGP(Context *ctx)
  6329. {
  6330. if (!replicate_swizzle(ctx->source_args[0].swizzle))
  6331. fail(ctx, "LOGP src0 must have replicate swizzle");
  6332. } // state_LOGP
  6333. static void state_SINCOS(Context *ctx)
  6334. {
  6335. const DestArgInfo *dst = &ctx->dest_arg;
  6336. const int mask = dst->writemask;
  6337. if (!writemask_x(mask) && !writemask_y(mask) && !writemask_xy(mask))
  6338. fail(ctx, "SINCOS write mask must be .x or .y or .xy");
  6339. else if (!replicate_swizzle(ctx->source_args[0].swizzle))
  6340. fail(ctx, "SINCOS src0 must have replicate swizzle");
  6341. else if (dst->result_mod & MOD_SATURATE) // according to msdn...
  6342. fail(ctx, "SINCOS destination can't use saturate modifier");
  6343. // this opcode needs extra registers, with extra limitations, for <= sm2.
  6344. else if (!shader_version_atleast(ctx, 3, 0))
  6345. {
  6346. int i;
  6347. for (i = 1; i < 3; i++)
  6348. {
  6349. if (ctx->source_args[i].regtype != REG_TYPE_CONST)
  6350. {
  6351. failf(ctx, "SINCOS src%d must be constfloat", i);
  6352. return;
  6353. } // if
  6354. } // for
  6355. if (ctx->source_args[1].regnum == ctx->source_args[2].regnum)
  6356. fail(ctx, "SINCOS src1 and src2 must be different registers");
  6357. } // if
  6358. } // state_SINCOS
  6359. static void state_IF(Context *ctx)
  6360. {
  6361. const RegisterType regtype = ctx->source_args[0].regtype;
  6362. if ((regtype != REG_TYPE_PREDICATE) && (regtype != REG_TYPE_CONSTBOOL))
  6363. fail(ctx, "IF src0 must be CONSTBOOL or PREDICATE");
  6364. else if (!replicate_swizzle(ctx->source_args[0].swizzle))
  6365. fail(ctx, "IF src0 must have replicate swizzle");
  6366. // !!! FIXME: track if nesting depth.
  6367. } // state_IF
  6368. static void state_IFC(Context *ctx)
  6369. {
  6370. if (!replicate_swizzle(ctx->source_args[0].swizzle))
  6371. fail(ctx, "IFC src0 must have replicate swizzle");
  6372. else if (!replicate_swizzle(ctx->source_args[1].swizzle))
  6373. fail(ctx, "IFC src1 must have replicate swizzle");
  6374. // !!! FIXME: track if nesting depth.
  6375. } // state_IFC
  6376. static void state_BREAKC(Context *ctx)
  6377. {
  6378. if (!replicate_swizzle(ctx->source_args[0].swizzle))
  6379. fail(ctx, "BREAKC src1 must have replicate swizzle");
  6380. else if (!replicate_swizzle(ctx->source_args[1].swizzle))
  6381. fail(ctx, "BREAKC src2 must have replicate swizzle");
  6382. else if ((ctx->loops == 0) && (ctx->reps == 0))
  6383. fail(ctx, "BREAKC outside LOOP/ENDLOOP or REP/ENDREP");
  6384. } // state_BREAKC
  6385. static void state_TEXKILL(Context *ctx)
  6386. {
  6387. // The MSDN docs say this should be a source arg, but the driver docs
  6388. // say it's a dest arg. That's annoying.
  6389. const DestArgInfo *info = &ctx->dest_arg;
  6390. const RegisterType regtype = info->regtype;
  6391. if (!writemask_xyzw(info->writemask))
  6392. fail(ctx, "TEXKILL writemask must be .xyzw");
  6393. else if ((regtype != REG_TYPE_TEMP) && (regtype != REG_TYPE_TEXTURE))
  6394. fail(ctx, "TEXKILL must use a temp or texture register");
  6395. // !!! FIXME: "If a temporary register is used, all components must have been previously written."
  6396. // !!! FIXME: "If a texture register is used, all components that are read must have been declared."
  6397. // !!! FIXME: there are further limitations in ps_1_3 and earlier.
  6398. } // state_TEXKILL
  6399. // Some rules that apply to some of the fruity ps_1_1 texture opcodes...
  6400. static void state_texops(Context *ctx, const char *opcode,
  6401. const int dims, const int texbem)
  6402. {
  6403. const DestArgInfo *dst = &ctx->dest_arg;
  6404. const SourceArgInfo *src = &ctx->source_args[0];
  6405. if (dst->regtype != REG_TYPE_TEXTURE)
  6406. failf(ctx, "%s destination must be a texture register", opcode);
  6407. if (src->regtype != REG_TYPE_TEXTURE)
  6408. failf(ctx, "%s source must be a texture register", opcode);
  6409. if (src->regnum >= dst->regnum) // so says MSDN.
  6410. failf(ctx, "%s dest must be a higher register than source", opcode);
  6411. if (dims)
  6412. {
  6413. TextureType ttyp = (dims == 2) ? TEXTURE_TYPE_2D : TEXTURE_TYPE_CUBE;
  6414. add_sampler(ctx, dst->regnum, ttyp, texbem);
  6415. } // if
  6416. add_attribute_register(ctx, REG_TYPE_TEXTURE, dst->regnum,
  6417. MOJOSHADER_USAGE_TEXCOORD, dst->regnum, 0xF, 0);
  6418. // Strictly speaking, there should be a TEX opcode prior to this call that
  6419. // should fill in this metadata, but I'm not sure that's required for the
  6420. // shader to assemble in D3D, so we'll do this so we don't fail with a
  6421. // cryptic error message even if the developer didn't do the TEX.
  6422. add_attribute_register(ctx, REG_TYPE_TEXTURE, src->regnum,
  6423. MOJOSHADER_USAGE_TEXCOORD, src->regnum, 0xF, 0);
  6424. } // state_texops
  6425. static void state_texbem(Context *ctx, const char *opcode)
  6426. {
  6427. // The TEXBEM equasion, according to MSDN:
  6428. //u' = TextureCoordinates(stage m)u + D3DTSS_BUMPENVMAT00(stage m)*t(n)R
  6429. // + D3DTSS_BUMPENVMAT10(stage m)*t(n)G
  6430. //v' = TextureCoordinates(stage m)v + D3DTSS_BUMPENVMAT01(stage m)*t(n)R
  6431. // + D3DTSS_BUMPENVMAT11(stage m)*t(n)G
  6432. //t(m)RGBA = TextureSample(stage m)
  6433. //
  6434. // ...TEXBEML adds this at the end:
  6435. //t(m)RGBA = t(m)RGBA * [(t(n)B * D3DTSS_BUMPENVLSCALE(stage m)) +
  6436. // D3DTSS_BUMPENVLOFFSET(stage m)]
  6437. if (shader_version_atleast(ctx, 1, 4))
  6438. failf(ctx, "%s opcode not available after Shader Model 1.3", opcode);
  6439. if (!shader_version_atleast(ctx, 1, 2))
  6440. {
  6441. if (ctx->source_args[0].src_mod == SRCMOD_SIGN)
  6442. failf(ctx, "%s forbids _bx2 on source reg before ps_1_2", opcode);
  6443. } // if
  6444. // !!! FIXME: MSDN:
  6445. // !!! FIXME: Register data that has been read by a texbem
  6446. // !!! FIXME: or texbeml instruction cannot be read later,
  6447. // !!! FIXME: except by another texbem or texbeml.
  6448. state_texops(ctx, opcode, 2, 1);
  6449. } // state_texbem
  6450. static void state_TEXBEM(Context *ctx)
  6451. {
  6452. state_texbem(ctx, "TEXBEM");
  6453. } // state_TEXBEM
  6454. static void state_TEXBEML(Context *ctx)
  6455. {
  6456. state_texbem(ctx, "TEXBEML");
  6457. } // state_TEXBEML
  6458. static void state_TEXM3X2PAD(Context *ctx)
  6459. {
  6460. if (shader_version_atleast(ctx, 1, 4))
  6461. fail(ctx, "TEXM3X2PAD opcode not available after Shader Model 1.3");
  6462. state_texops(ctx, "TEXM3X2PAD", 0, 0);
  6463. // !!! FIXME: check for correct opcode existance and order more rigorously?
  6464. ctx->texm3x2pad_src0 = ctx->source_args[0].regnum;
  6465. ctx->texm3x2pad_dst0 = ctx->dest_arg.regnum;
  6466. } // state_TEXM3X2PAD
  6467. static void state_TEXM3X2TEX(Context *ctx)
  6468. {
  6469. if (shader_version_atleast(ctx, 1, 4))
  6470. fail(ctx, "TEXM3X2TEX opcode not available after Shader Model 1.3");
  6471. if (ctx->texm3x2pad_dst0 == -1)
  6472. fail(ctx, "TEXM3X2TEX opcode without matching TEXM3X2PAD");
  6473. // !!! FIXME: check for correct opcode existance and order more rigorously?
  6474. state_texops(ctx, "TEXM3X2TEX", 2, 0);
  6475. ctx->reset_texmpad = 1;
  6476. RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER,
  6477. ctx->dest_arg.regnum);
  6478. const TextureType ttype = (TextureType) (sreg ? sreg->index : 0);
  6479. // A samplermap might change this to something nonsensical.
  6480. if (ttype != TEXTURE_TYPE_2D)
  6481. fail(ctx, "TEXM3X2TEX needs a 2D sampler");
  6482. } // state_TEXM3X2TEX
  6483. static void state_TEXM3X3PAD(Context *ctx)
  6484. {
  6485. if (shader_version_atleast(ctx, 1, 4))
  6486. fail(ctx, "TEXM3X2TEX opcode not available after Shader Model 1.3");
  6487. state_texops(ctx, "TEXM3X3PAD", 0, 0);
  6488. // !!! FIXME: check for correct opcode existance and order more rigorously?
  6489. if (ctx->texm3x3pad_dst0 == -1)
  6490. {
  6491. ctx->texm3x3pad_src0 = ctx->source_args[0].regnum;
  6492. ctx->texm3x3pad_dst0 = ctx->dest_arg.regnum;
  6493. } // if
  6494. else if (ctx->texm3x3pad_dst1 == -1)
  6495. {
  6496. ctx->texm3x3pad_src1 = ctx->source_args[0].regnum;
  6497. ctx->texm3x3pad_dst1 = ctx->dest_arg.regnum;
  6498. } // else
  6499. } // state_TEXM3X3PAD
  6500. static void state_texm3x3(Context *ctx, const char *opcode, const int dims)
  6501. {
  6502. // !!! FIXME: check for correct opcode existance and order more rigorously?
  6503. if (shader_version_atleast(ctx, 1, 4))
  6504. failf(ctx, "%s opcode not available after Shader Model 1.3", opcode);
  6505. if (ctx->texm3x3pad_dst1 == -1)
  6506. failf(ctx, "%s opcode without matching TEXM3X3PADs", opcode);
  6507. state_texops(ctx, opcode, dims, 0);
  6508. ctx->reset_texmpad = 1;
  6509. RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER,
  6510. ctx->dest_arg.regnum);
  6511. const TextureType ttype = (TextureType) (sreg ? sreg->index : 0);
  6512. // A samplermap might change this to something nonsensical.
  6513. if ((ttype != TEXTURE_TYPE_VOLUME) && (ttype != TEXTURE_TYPE_CUBE))
  6514. failf(ctx, "%s needs a 3D or Cubemap sampler", opcode);
  6515. } // state_texm3x3
  6516. static void state_TEXM3X3(Context *ctx)
  6517. {
  6518. if (!shader_version_atleast(ctx, 1, 2))
  6519. fail(ctx, "TEXM3X3 opcode not available in Shader Model 1.1");
  6520. state_texm3x3(ctx, "TEXM3X3", 0);
  6521. } // state_TEXM3X3
  6522. static void state_TEXM3X3TEX(Context *ctx)
  6523. {
  6524. state_texm3x3(ctx, "TEXM3X3TEX", 3);
  6525. } // state_TEXM3X3TEX
  6526. static void state_TEXM3X3SPEC(Context *ctx)
  6527. {
  6528. state_texm3x3(ctx, "TEXM3X3SPEC", 3);
  6529. if (ctx->source_args[1].regtype != REG_TYPE_CONST)
  6530. fail(ctx, "TEXM3X3SPEC final arg must be a constant register");
  6531. } // state_TEXM3X3SPEC
  6532. static void state_TEXM3X3VSPEC(Context *ctx)
  6533. {
  6534. state_texm3x3(ctx, "TEXM3X3VSPEC", 3);
  6535. } // state_TEXM3X3VSPEC
  6536. static void state_TEXLD(Context *ctx)
  6537. {
  6538. if (shader_version_atleast(ctx, 2, 0))
  6539. {
  6540. const SourceArgInfo *src0 = &ctx->source_args[0];
  6541. const SourceArgInfo *src1 = &ctx->source_args[1];
  6542. // !!! FIXME: verify texldp restrictions:
  6543. //http://msdn.microsoft.com/en-us/library/bb206221(VS.85).aspx
  6544. // !!! FIXME: ...and texldb, too.
  6545. //http://msdn.microsoft.com/en-us/library/bb206217(VS.85).aspx
  6546. //const RegisterType rt0 = src0->regtype;
  6547. // !!! FIXME: msdn says it has to be temp, but Microsoft's HLSL
  6548. // !!! FIXME: compiler is generating code that uses oC0 for a dest.
  6549. //if (ctx->dest_arg.regtype != REG_TYPE_TEMP)
  6550. // fail(ctx, "TEXLD dest must be a temp register");
  6551. // !!! FIXME: this can be an REG_TYPE_INPUT, DCL'd to TEXCOORD.
  6552. //else if ((rt0 != REG_TYPE_TEXTURE) && (rt0 != REG_TYPE_TEMP))
  6553. // fail(ctx, "TEXLD src0 must be texture or temp register");
  6554. //else
  6555. if (src0->src_mod != SRCMOD_NONE)
  6556. fail(ctx, "TEXLD src0 must have no modifiers");
  6557. else if (src1->regtype != REG_TYPE_SAMPLER)
  6558. fail(ctx, "TEXLD src1 must be sampler register");
  6559. else if (src1->src_mod != SRCMOD_NONE)
  6560. fail(ctx, "TEXLD src1 must have no modifiers");
  6561. else if ( (ctx->instruction_controls != CONTROL_TEXLD) &&
  6562. (ctx->instruction_controls != CONTROL_TEXLDP) &&
  6563. (ctx->instruction_controls != CONTROL_TEXLDB) )
  6564. {
  6565. fail(ctx, "TEXLD has unknown control bits");
  6566. } // else if
  6567. // Shader Model 3 added swizzle support to this opcode.
  6568. if (!shader_version_atleast(ctx, 3, 0))
  6569. {
  6570. if (!no_swizzle(src0->swizzle))
  6571. fail(ctx, "TEXLD src0 must not swizzle");
  6572. else if (!no_swizzle(src1->swizzle))
  6573. fail(ctx, "TEXLD src1 must not swizzle");
  6574. } // if
  6575. if ( ((TextureType) ctx->source_args[1].regnum) == TEXTURE_TYPE_CUBE )
  6576. ctx->instruction_count += 3;
  6577. } // if
  6578. else if (shader_version_atleast(ctx, 1, 4))
  6579. {
  6580. // !!! FIXME: checks for ps_1_4 version here...
  6581. } // else if
  6582. else
  6583. {
  6584. // !!! FIXME: add (other?) checks for ps_1_1 version here...
  6585. const DestArgInfo *info = &ctx->dest_arg;
  6586. const int sampler = info->regnum;
  6587. if (info->regtype != REG_TYPE_TEXTURE)
  6588. fail(ctx, "TEX param must be a texture register");
  6589. add_sampler(ctx, sampler, TEXTURE_TYPE_2D, 0);
  6590. add_attribute_register(ctx, REG_TYPE_TEXTURE, sampler,
  6591. MOJOSHADER_USAGE_TEXCOORD, sampler, 0xF, 0);
  6592. } // else
  6593. } // state_TEXLD
  6594. static void state_TEXLDL(Context *ctx)
  6595. {
  6596. if (!shader_version_atleast(ctx, 3, 0))
  6597. fail(ctx, "TEXLDL in version < Shader Model 3.0");
  6598. else if (ctx->source_args[1].regtype != REG_TYPE_SAMPLER)
  6599. fail(ctx, "TEXLDL src1 must be sampler register");
  6600. else
  6601. {
  6602. if ( ((TextureType) ctx->source_args[1].regnum) == TEXTURE_TYPE_CUBE )
  6603. ctx->instruction_count += 3;
  6604. } // else
  6605. } // state_TEXLDL
  6606. static void state_DP2ADD(Context *ctx)
  6607. {
  6608. if (!replicate_swizzle(ctx->source_args[2].swizzle))
  6609. fail(ctx, "DP2ADD src2 must have replicate swizzle");
  6610. } // state_DP2ADD
  6611. // Lookup table for instruction opcodes...
  6612. typedef struct
  6613. {
  6614. const char *opcode_string;
  6615. int slots; // number of instruction slots this opcode eats.
  6616. MOJOSHADER_shaderType shader_types; // mask of types that can use opcode.
  6617. args_function parse_args;
  6618. state_function state;
  6619. emit_function emitter[STATICARRAYLEN(profiles)];
  6620. } Instruction;
  6621. // These have to be in the right order! This array is indexed by the value
  6622. // of the instruction token.
  6623. static const Instruction instructions[] =
  6624. {
  6625. #define INSTRUCTION_STATE(op, opstr, slots, a, t) { \
  6626. opstr, slots, t, parse_args_##a, state_##op, PROFILE_EMITTERS(op) \
  6627. },
  6628. #define INSTRUCTION(op, opstr, slots, a, t) { \
  6629. opstr, slots, t, parse_args_##a, 0, PROFILE_EMITTERS(op) \
  6630. },
  6631. #define MOJOSHADER_DO_INSTRUCTION_TABLE 1
  6632. #include "mojoshader_internal.h"
  6633. #undef MOJOSHADER_DO_INSTRUCTION_TABLE
  6634. #undef INSTRUCTION
  6635. #undef INSTRUCTION_STATE
  6636. };
  6637. // parse various token types...
  6638. static int parse_instruction_token(Context *ctx)
  6639. {
  6640. int retval = 0;
  6641. const int start_position = ctx->current_position;
  6642. const uint32 *start_tokens = ctx->tokens;
  6643. const uint32 start_tokencount = ctx->tokencount;
  6644. const uint32 token = SWAP32(*(ctx->tokens));
  6645. const uint32 opcode = (token & 0xFFFF);
  6646. const uint32 controls = ((token >> 16) & 0xFF);
  6647. const uint32 insttoks = ((token >> 24) & 0x0F);
  6648. const int coissue = (token & 0x40000000) ? 1 : 0;
  6649. const int predicated = (token & 0x10000000) ? 1 : 0;
  6650. if ( opcode >= (sizeof (instructions) / sizeof (instructions[0])) )
  6651. return 0; // not an instruction token, or just not handled here.
  6652. const Instruction *instruction = &instructions[opcode];
  6653. const emit_function emitter = instruction->emitter[ctx->profileid];
  6654. if ((token & 0x80000000) != 0)
  6655. fail(ctx, "instruction token high bit must be zero."); // so says msdn.
  6656. if (instruction->opcode_string == NULL)
  6657. {
  6658. fail(ctx, "Unknown opcode.");
  6659. return insttoks + 1; // pray that you resync later.
  6660. } // if
  6661. ctx->coissue = coissue;
  6662. if (coissue)
  6663. {
  6664. if (!shader_is_pixel(ctx))
  6665. fail(ctx, "coissue instruction on non-pixel shader");
  6666. if (shader_version_atleast(ctx, 2, 0))
  6667. fail(ctx, "coissue instruction in Shader Model >= 2.0");
  6668. } // if
  6669. if ((ctx->shader_type & instruction->shader_types) == 0)
  6670. {
  6671. failf(ctx, "opcode '%s' not available in this shader type.",
  6672. instruction->opcode_string);
  6673. } // if
  6674. memset(ctx->dwords, '\0', sizeof (ctx->dwords));
  6675. ctx->instruction_controls = controls;
  6676. ctx->predicated = predicated;
  6677. // Update the context with instruction's arguments.
  6678. adjust_token_position(ctx, 1);
  6679. retval = instruction->parse_args(ctx);
  6680. if (predicated)
  6681. retval += parse_predicated_token(ctx);
  6682. // parse_args() moves these forward for convenience...reset them.
  6683. ctx->tokens = start_tokens;
  6684. ctx->tokencount = start_tokencount;
  6685. ctx->current_position = start_position;
  6686. if (instruction->state != NULL)
  6687. instruction->state(ctx);
  6688. ctx->instruction_count += instruction->slots;
  6689. if (!isfail(ctx))
  6690. emitter(ctx); // call the profile's emitter.
  6691. if (ctx->reset_texmpad)
  6692. {
  6693. ctx->texm3x2pad_dst0 = -1;
  6694. ctx->texm3x2pad_src0 = -1;
  6695. ctx->texm3x3pad_dst0 = -1;
  6696. ctx->texm3x3pad_src0 = -1;
  6697. ctx->texm3x3pad_dst1 = -1;
  6698. ctx->texm3x3pad_src1 = -1;
  6699. ctx->reset_texmpad = 0;
  6700. } // if
  6701. ctx->previous_opcode = opcode;
  6702. ctx->scratch_registers = 0; // reset after every instruction.
  6703. if (!shader_version_atleast(ctx, 2, 0))
  6704. {
  6705. if (insttoks != 0) // reserved field in shaders < 2.0 ...
  6706. fail(ctx, "instruction token count must be zero");
  6707. } // if
  6708. else
  6709. {
  6710. if (((uint32)retval) != (insttoks+1))
  6711. {
  6712. failf(ctx, "wrong token count (%u, not %u) for opcode '%s'.",
  6713. (uint) retval, (uint) (insttoks+1),
  6714. instruction->opcode_string);
  6715. retval = insttoks + 1; // try to keep sync.
  6716. } // if
  6717. } // else
  6718. return retval;
  6719. } // parse_instruction_token
  6720. static int parse_version_token(Context *ctx, const char *profilestr)
  6721. {
  6722. if (ctx->tokencount == 0)
  6723. {
  6724. fail(ctx, "Expected version token, got none at all.");
  6725. return 0;
  6726. } // if
  6727. const uint32 token = SWAP32(*(ctx->tokens));
  6728. const uint32 shadertype = ((token >> 16) & 0xFFFF);
  6729. const uint8 major = (uint8) ((token >> 8) & 0xFF);
  6730. const uint8 minor = (uint8) (token & 0xFF);
  6731. ctx->version_token = token;
  6732. // 0xFFFF == pixel shader, 0xFFFE == vertex shader
  6733. if (shadertype == 0xFFFF)
  6734. {
  6735. ctx->shader_type = MOJOSHADER_TYPE_PIXEL;
  6736. ctx->shader_type_str = "ps";
  6737. } // if
  6738. else if (shadertype == 0xFFFE)
  6739. {
  6740. ctx->shader_type = MOJOSHADER_TYPE_VERTEX;
  6741. ctx->shader_type_str = "vs";
  6742. } // else if
  6743. else // geometry shader? Bogus data?
  6744. {
  6745. fail(ctx, "Unsupported shader type or not a shader at all");
  6746. return -1;
  6747. } // else
  6748. ctx->major_ver = major;
  6749. ctx->minor_ver = minor;
  6750. if (!shader_version_supported(major, minor))
  6751. {
  6752. failf(ctx, "Shader Model %u.%u is currently unsupported.",
  6753. (uint) major, (uint) minor);
  6754. } // if
  6755. if (!isfail(ctx))
  6756. ctx->profile->start_emitter(ctx, profilestr);
  6757. return 1; // ate one token.
  6758. } // parse_version_token
  6759. static int parse_ctab_string(const uint8 *start, const uint32 bytes,
  6760. const uint32 name)
  6761. {
  6762. // Make sure strings don't overflow the CTAB buffer...
  6763. if (name < bytes)
  6764. {
  6765. int i;
  6766. const int slenmax = bytes - name;
  6767. const char *namestr = (const char *) (start + name);
  6768. for (i = 0; i < slenmax; i++)
  6769. {
  6770. if (namestr[i] == '\0')
  6771. return 1; // it's okay.
  6772. } // for
  6773. } // if
  6774. return 0; // overflowed.
  6775. } // parse_ctab_string
  6776. static int parse_ctab_typeinfo(Context *ctx, const uint8 *start,
  6777. const uint32 bytes, const uint32 pos,
  6778. MOJOSHADER_symbolTypeInfo *info)
  6779. {
  6780. if ((pos + 16) >= bytes)
  6781. return 0; // corrupt CTAB.
  6782. const uint16 *typeptr = (const uint16 *) (start + pos);
  6783. info->parameter_class = (MOJOSHADER_symbolClass) SWAP16(typeptr[0]);
  6784. info->parameter_type = (MOJOSHADER_symbolType) SWAP16(typeptr[1]);
  6785. info->rows = (unsigned int) SWAP16(typeptr[2]);
  6786. info->columns = (unsigned int) SWAP16(typeptr[3]);
  6787. info->elements = (unsigned int) SWAP16(typeptr[4]);
  6788. info->member_count = (unsigned int) SWAP16(typeptr[5]);
  6789. if ((pos + 16 + (info->member_count * 8)) >= bytes)
  6790. return 0; // corrupt CTAB.
  6791. if (info->member_count == 0)
  6792. info->members = NULL;
  6793. else
  6794. {
  6795. const size_t len = sizeof (MOJOSHADER_symbolStructMember) *
  6796. info->member_count;
  6797. info->members = (MOJOSHADER_symbolStructMember *) Malloc(ctx, len);
  6798. if (info->members == NULL)
  6799. return 1; // we'll check ctx->out_of_memory later.
  6800. memset(info->members, '\0', len);
  6801. } // else
  6802. int i;
  6803. const uint32 *member = (const uint32 *)((const uint8 *) (&typeptr[6]));
  6804. for (i = 0; i < info->member_count; i++)
  6805. {
  6806. MOJOSHADER_symbolStructMember *mbr = &info->members[i];
  6807. const uint32 name = SWAP32(member[0]);
  6808. const uint32 memberinfopos = SWAP32(member[1]);
  6809. member += 2;
  6810. if (!parse_ctab_string(start, bytes, name))
  6811. return 0; // info->members will be free()'d elsewhere.
  6812. mbr->name = StrDup(ctx, (const char *) (start + name));
  6813. if (mbr->name == NULL)
  6814. return 1; // we'll check ctx->out_of_memory later.
  6815. if (!parse_ctab_typeinfo(ctx, start, bytes, memberinfopos, &mbr->info))
  6816. return 0;
  6817. if (ctx->out_of_memory)
  6818. return 1; // drop out now.
  6819. } // for
  6820. return 1;
  6821. } // parse_ctab_typeinfo
  6822. // Microsoft's tools add a CTAB comment to all shaders. This is the
  6823. // "constant table," or specifically: D3DXSHADER_CONSTANTTABLE:
  6824. // http://msdn.microsoft.com/en-us/library/bb205440(VS.85).aspx
  6825. // This may tell us high-level truths about an otherwise generic low-level
  6826. // registers, for instance, how large an array actually is, etc.
  6827. static void parse_constant_table(Context *ctx, const uint32 *tokens,
  6828. const uint32 bytes, const uint32 okay_version,
  6829. const int setvariables, CtabData *ctab)
  6830. {
  6831. const uint32 id = SWAP32(tokens[1]);
  6832. if (id != CTAB_ID)
  6833. return; // not the constant table.
  6834. assert(ctab->have_ctab == 0); // !!! FIXME: can you have more than one?
  6835. ctab->have_ctab = 1;
  6836. const uint8 *start = (uint8 *) &tokens[2];
  6837. if (bytes < 32)
  6838. {
  6839. fail(ctx, "Truncated CTAB data");
  6840. return;
  6841. } // if
  6842. const uint32 size = SWAP32(tokens[2]);
  6843. const uint32 creator = SWAP32(tokens[3]);
  6844. const uint32 version = SWAP32(tokens[4]);
  6845. const uint32 constants = SWAP32(tokens[5]);
  6846. const uint32 constantinfo = SWAP32(tokens[6]);
  6847. const uint32 target = SWAP32(tokens[8]);
  6848. if (size != CTAB_SIZE)
  6849. goto corrupt_ctab;
  6850. if (version != okay_version) goto corrupt_ctab;
  6851. if (creator >= bytes) goto corrupt_ctab;
  6852. if ((constantinfo + (constants * CINFO_SIZE)) >= bytes) goto corrupt_ctab;
  6853. if (target >= bytes) goto corrupt_ctab;
  6854. if (!parse_ctab_string(start, bytes, target)) goto corrupt_ctab;
  6855. // !!! FIXME: check that (start+target) points to "ps_3_0", etc.
  6856. ctab->symbol_count = constants;
  6857. ctab->symbols = (MOJOSHADER_symbol *)Malloc(ctx, sizeof (MOJOSHADER_symbol) * constants);
  6858. if (ctab->symbols == NULL)
  6859. return;
  6860. memset(ctab->symbols, '\0', sizeof (MOJOSHADER_symbol) * constants);
  6861. uint32 i = 0;
  6862. for (i = 0; i < constants; i++)
  6863. {
  6864. const uint8 *ptr = start + constantinfo + (i * CINFO_SIZE);
  6865. const uint32 name = SWAP32(*((uint32 *) (ptr + 0)));
  6866. const uint16 regset = SWAP16(*((uint16 *) (ptr + 4)));
  6867. const uint16 regidx = SWAP16(*((uint16 *) (ptr + 6)));
  6868. const uint16 regcnt = SWAP16(*((uint16 *) (ptr + 8)));
  6869. const uint32 typeinf = SWAP32(*((uint32 *) (ptr + 12)));
  6870. const uint32 defval = SWAP32(*((uint32 *) (ptr + 16)));
  6871. MOJOSHADER_uniformType mojotype = MOJOSHADER_UNIFORM_UNKNOWN;
  6872. if (!parse_ctab_string(start, bytes, name)) goto corrupt_ctab;
  6873. if (defval >= bytes) goto corrupt_ctab;
  6874. switch (regset)
  6875. {
  6876. case 0: mojotype = MOJOSHADER_UNIFORM_BOOL; break;
  6877. case 1: mojotype = MOJOSHADER_UNIFORM_INT; break;
  6878. case 2: mojotype = MOJOSHADER_UNIFORM_FLOAT; break;
  6879. case 3: /* SAMPLER */ break;
  6880. default: goto corrupt_ctab;
  6881. } // switch
  6882. if ((setvariables) && (mojotype != MOJOSHADER_UNIFORM_UNKNOWN))
  6883. {
  6884. VariableList *item;
  6885. item = (VariableList *) Malloc(ctx, sizeof (VariableList));
  6886. if (item != NULL)
  6887. {
  6888. item->type = mojotype;
  6889. item->index = regidx;
  6890. item->count = regcnt;
  6891. item->constant = NULL;
  6892. item->used = 0;
  6893. item->emit_position = -1;
  6894. item->next = ctx->variables;
  6895. ctx->variables = item;
  6896. } // if
  6897. } // if
  6898. // Add the symbol.
  6899. const char *namecpy = StrDup(ctx, (const char *) (start + name));
  6900. if (namecpy == NULL)
  6901. return;
  6902. MOJOSHADER_symbol *sym = &ctab->symbols[i];
  6903. sym->name = namecpy;
  6904. sym->register_set = (MOJOSHADER_symbolRegisterSet) regset;
  6905. sym->register_index = (unsigned int) regidx;
  6906. sym->register_count = (unsigned int) regcnt;
  6907. if (!parse_ctab_typeinfo(ctx, start, bytes, typeinf, &sym->info))
  6908. goto corrupt_ctab; // sym->name will get free()'d later.
  6909. else if (ctx->out_of_memory)
  6910. return; // just bail now.
  6911. } // for
  6912. return;
  6913. corrupt_ctab:
  6914. fail(ctx, "Shader has corrupt CTAB data");
  6915. } // parse_constant_table
  6916. static void free_symbols(MOJOSHADER_free f, void *d, MOJOSHADER_symbol *syms,
  6917. const int symcount);
  6918. static int is_comment_token(Context *ctx, const uint32 tok, uint32 *tokcount)
  6919. {
  6920. const uint32 token = SWAP32(tok);
  6921. if ((token & 0xFFFF) == 0xFFFE) // actually a comment token?
  6922. {
  6923. if ((token & 0x80000000) != 0)
  6924. fail(ctx, "comment token high bit must be zero."); // so says msdn.
  6925. *tokcount = ((token >> 16) & 0xFFFF);
  6926. return 1;
  6927. } // if
  6928. return 0;
  6929. } // is_comment_token
  6930. typedef struct PreshaderBlockInfo
  6931. {
  6932. const uint32 *tokens;
  6933. uint32 tokcount;
  6934. int seen;
  6935. } PreshaderBlockInfo;
  6936. // Preshaders only show up in compiled Effect files. The format is
  6937. // undocumented, and even the instructions aren't the same opcodes as you
  6938. // would find in a regular shader. These things show up because the HLSL
  6939. // compiler can detect work that sets up constant registers that could
  6940. // be moved out of the shader itself. Preshaders run once, then the shader
  6941. // itself runs many times, using the constant registers the preshader has set
  6942. // up. There are cases where the preshaders are 3+ times as many instructions
  6943. // as the shader itself, so this can be a big performance win.
  6944. // My presumption is that Microsoft's Effects framework runs the preshaders on
  6945. // the CPU, then loads the constant register file appropriately before handing
  6946. // off to the GPU. As such, we do the same.
  6947. static void parse_preshader(Context *ctx, uint32 tokcount)
  6948. {
  6949. const uint32 *tokens = ctx->tokens;
  6950. if ((tokcount < 2) || (SWAP32(tokens[1]) != PRES_ID))
  6951. return; // not a preshader.
  6952. #if !SUPPORT_PRESHADERS
  6953. fail(ctx, "Preshader found, but preshader support is disabled!");
  6954. #else
  6955. assert(ctx->have_preshader == 0); // !!! FIXME: can you have more than one?
  6956. ctx->have_preshader = 1;
  6957. // !!! FIXME: I don't know what specific versions signify, but we need to
  6958. // !!! FIXME: save this to test against the CTAB version field, if
  6959. // !!! FIXME: nothing else.
  6960. // !!! FIXME: 0x02 0x01 is probably the version (fx_2_1),
  6961. // !!! FIXME: and 0x4658 is the magic, like a real shader's version token.
  6962. const uint32 okay_version = 0x46580201;
  6963. if (SWAP32(tokens[2]) != okay_version)
  6964. {
  6965. fail(ctx, "Unsupported preshader version.");
  6966. return; // fail because the shader will malfunction w/o this.
  6967. } // if
  6968. tokens += 3;
  6969. tokcount -= 3;
  6970. // All sections of a preshader are packed into separate comment tokens,
  6971. // inside the containing comment token block. Find them all before
  6972. // we start, so we don't care about the order they appear in the file.
  6973. PreshaderBlockInfo ctab = { 0, 0, 0 };
  6974. PreshaderBlockInfo prsi = { 0, 0, 0 };
  6975. PreshaderBlockInfo fxlc = { 0, 0, 0 };
  6976. PreshaderBlockInfo clit = { 0, 0, 0 };
  6977. while (tokcount > 0)
  6978. {
  6979. uint32 subtokcount = 0;
  6980. if ( (!is_comment_token(ctx, *tokens, &subtokcount)) ||
  6981. (subtokcount > tokcount) )
  6982. {
  6983. fail(ctx, "Bogus preshader data.");
  6984. return;
  6985. } // if
  6986. tokens++;
  6987. tokcount--;
  6988. const uint32 *nexttokens = tokens + subtokcount;
  6989. const uint32 nexttokcount = tokcount - subtokcount;
  6990. if (subtokcount > 0)
  6991. {
  6992. switch (SWAP32(*tokens))
  6993. {
  6994. #define PRESHADER_BLOCK_CASE(id, var) \
  6995. case id##_ID: { \
  6996. if (var.seen) { \
  6997. fail(ctx, "Multiple " #id " preshader blocks."); \
  6998. return; \
  6999. } \
  7000. var.tokens = tokens; \
  7001. var.tokcount = subtokcount; \
  7002. var.seen = 1; \
  7003. break; \
  7004. }
  7005. PRESHADER_BLOCK_CASE(CTAB, ctab);
  7006. PRESHADER_BLOCK_CASE(PRSI, prsi);
  7007. PRESHADER_BLOCK_CASE(FXLC, fxlc);
  7008. PRESHADER_BLOCK_CASE(CLIT, clit);
  7009. default: fail(ctx, "Bogus preshader section."); return;
  7010. #undef PRESHADER_BLOCK_CASE
  7011. } // switch
  7012. } // if
  7013. tokens = nexttokens;
  7014. tokcount = nexttokcount;
  7015. } // while
  7016. if (!ctab.seen) { fail(ctx, "No CTAB block in preshader."); return; }
  7017. if (!prsi.seen) { fail(ctx, "No PRSI block in preshader."); return; }
  7018. if (!fxlc.seen) { fail(ctx, "No FXLC block in preshader."); return; }
  7019. if (!clit.seen) { fail(ctx, "No CLIT block in preshader."); return; }
  7020. MOJOSHADER_preshader *preshader = (MOJOSHADER_preshader *)
  7021. Malloc(ctx, sizeof (MOJOSHADER_preshader));
  7022. if (preshader == NULL)
  7023. return;
  7024. memset(preshader, '\0', sizeof (MOJOSHADER_preshader));
  7025. ctx->preshader = preshader;
  7026. // Let's set up the constant literals first...
  7027. if (clit.tokcount == 0)
  7028. fail(ctx, "Bogus CLIT block in preshader.");
  7029. else
  7030. {
  7031. const uint32 lit_count = SWAP32(clit.tokens[1]);
  7032. if (lit_count > ((clit.tokcount - 2) / 2))
  7033. {
  7034. fail(ctx, "Bogus CLIT block in preshader.");
  7035. return;
  7036. } // if
  7037. else if (lit_count > 0)
  7038. {
  7039. preshader->literal_count = (unsigned int) lit_count;
  7040. assert(sizeof (double) == 8); // just in case.
  7041. const size_t len = sizeof (double) * lit_count;
  7042. preshader->literals = (double *) Malloc(ctx, len);
  7043. if (preshader->literals == NULL)
  7044. return; // oh well.
  7045. const double *litptr = (const double *) (clit.tokens + 2);
  7046. int i;
  7047. for (i = 0; i < lit_count; i++)
  7048. preshader->literals[i] = SWAPDBL(litptr[i]);
  7049. } // else if
  7050. } // else
  7051. // Parse out the PRSI block. This is used to map the output registers.
  7052. if (prsi.tokcount < 8)
  7053. {
  7054. fail(ctx, "Bogus preshader PRSI data");
  7055. return;
  7056. } // if
  7057. //const uint32 first_output_reg = SWAP32(prsi.tokens[1]);
  7058. // !!! FIXME: there are a lot of fields here I don't know about.
  7059. // !!! FIXME: maybe [2] and [3] are for int4 and bool registers?
  7060. //const uint32 output_reg_count = SWAP32(prsi.tokens[4]);
  7061. // !!! FIXME: maybe [5] and [6] are for int4 and bool registers?
  7062. const uint32 output_map_count = SWAP32(prsi.tokens[7]);
  7063. prsi.tokcount -= 8;
  7064. prsi.tokens += 8;
  7065. if (prsi.tokcount < ((output_map_count + 1) * 2))
  7066. {
  7067. fail(ctx, "Bogus preshader PRSI data");
  7068. return;
  7069. } // if
  7070. const uint32 *output_map = prsi.tokens;
  7071. // Now we'll figure out the CTAB...
  7072. CtabData ctabdata = { 0, 0, 0 };
  7073. parse_constant_table(ctx, ctab.tokens - 1, ctab.tokcount * 4,
  7074. okay_version, 0, &ctabdata);
  7075. // preshader owns this now. Don't free it in this function.
  7076. preshader->symbol_count = ctabdata.symbol_count;
  7077. preshader->symbols = ctabdata.symbols;
  7078. if (!ctabdata.have_ctab)
  7079. {
  7080. fail(ctx, "Bogus preshader CTAB data");
  7081. return;
  7082. } // if
  7083. // The FXLC block has the actual instructions...
  7084. uint32 opcode_count = SWAP32(fxlc.tokens[1]);
  7085. size_t len = sizeof (MOJOSHADER_preshaderInstruction) * opcode_count;
  7086. preshader->instruction_count = (unsigned int) opcode_count;
  7087. preshader->instructions = (MOJOSHADER_preshaderInstruction *)
  7088. Malloc(ctx, len);
  7089. if (preshader->instructions == NULL)
  7090. return;
  7091. memset(preshader->instructions, '\0', len);
  7092. fxlc.tokens += 2;
  7093. fxlc.tokcount -= 2;
  7094. if (opcode_count > (fxlc.tokcount / 2))
  7095. {
  7096. fail(ctx, "Bogus preshader FXLC block.");
  7097. return;
  7098. } // if
  7099. MOJOSHADER_preshaderInstruction *inst = preshader->instructions;
  7100. while (opcode_count--)
  7101. {
  7102. const uint32 opcodetok = SWAP32(fxlc.tokens[0]);
  7103. MOJOSHADER_preshaderOpcode opcode = MOJOSHADER_PRESHADEROP_NOP;
  7104. switch ((opcodetok >> 16) & 0xFFFF)
  7105. {
  7106. case 0x1000: opcode = MOJOSHADER_PRESHADEROP_MOV; break;
  7107. case 0x1010: opcode = MOJOSHADER_PRESHADEROP_NEG; break;
  7108. case 0x1030: opcode = MOJOSHADER_PRESHADEROP_RCP; break;
  7109. case 0x1040: opcode = MOJOSHADER_PRESHADEROP_FRC; break;
  7110. case 0x1050: opcode = MOJOSHADER_PRESHADEROP_EXP; break;
  7111. case 0x1060: opcode = MOJOSHADER_PRESHADEROP_LOG; break;
  7112. case 0x1070: opcode = MOJOSHADER_PRESHADEROP_RSQ; break;
  7113. case 0x1080: opcode = MOJOSHADER_PRESHADEROP_SIN; break;
  7114. case 0x1090: opcode = MOJOSHADER_PRESHADEROP_COS; break;
  7115. case 0x10A0: opcode = MOJOSHADER_PRESHADEROP_ASIN; break;
  7116. case 0x10B0: opcode = MOJOSHADER_PRESHADEROP_ACOS; break;
  7117. case 0x10C0: opcode = MOJOSHADER_PRESHADEROP_ATAN; break;
  7118. case 0x2000: opcode = MOJOSHADER_PRESHADEROP_MIN; break;
  7119. case 0x2010: opcode = MOJOSHADER_PRESHADEROP_MAX; break;
  7120. case 0x2020: opcode = MOJOSHADER_PRESHADEROP_LT; break;
  7121. case 0x2030: opcode = MOJOSHADER_PRESHADEROP_GE; break;
  7122. case 0x2040: opcode = MOJOSHADER_PRESHADEROP_ADD; break;
  7123. case 0x2050: opcode = MOJOSHADER_PRESHADEROP_MUL; break;
  7124. case 0x2060: opcode = MOJOSHADER_PRESHADEROP_ATAN2; break;
  7125. case 0x2080: opcode = MOJOSHADER_PRESHADEROP_DIV; break;
  7126. case 0x3000: opcode = MOJOSHADER_PRESHADEROP_CMP; break;
  7127. case 0x3010: opcode = MOJOSHADER_PRESHADEROP_MOVC; break;
  7128. case 0x5000: opcode = MOJOSHADER_PRESHADEROP_DOT; break;
  7129. case 0x5020: opcode = MOJOSHADER_PRESHADEROP_NOISE; break;
  7130. case 0xA000: opcode = MOJOSHADER_PRESHADEROP_MIN_SCALAR; break;
  7131. case 0xA010: opcode = MOJOSHADER_PRESHADEROP_MAX_SCALAR; break;
  7132. case 0xA020: opcode = MOJOSHADER_PRESHADEROP_LT_SCALAR; break;
  7133. case 0xA030: opcode = MOJOSHADER_PRESHADEROP_GE_SCALAR; break;
  7134. case 0xA040: opcode = MOJOSHADER_PRESHADEROP_ADD_SCALAR; break;
  7135. case 0xA050: opcode = MOJOSHADER_PRESHADEROP_MUL_SCALAR; break;
  7136. case 0xA060: opcode = MOJOSHADER_PRESHADEROP_ATAN2_SCALAR; break;
  7137. case 0xA080: opcode = MOJOSHADER_PRESHADEROP_DIV_SCALAR; break;
  7138. case 0xD000: opcode = MOJOSHADER_PRESHADEROP_DOT_SCALAR; break;
  7139. case 0xD020: opcode = MOJOSHADER_PRESHADEROP_NOISE_SCALAR; break;
  7140. default: fail(ctx, "Unknown preshader opcode."); break;
  7141. } // switch
  7142. uint32 operand_count = SWAP32(fxlc.tokens[1]) + 1; // +1 for dest.
  7143. inst->opcode = opcode;
  7144. inst->element_count = (unsigned int) (opcodetok & 0xFF);
  7145. inst->operand_count = (unsigned int) operand_count;
  7146. fxlc.tokens += 2;
  7147. fxlc.tokcount -= 2;
  7148. if ((operand_count * 3) > fxlc.tokcount)
  7149. {
  7150. fail(ctx, "Bogus preshader FXLC block.");
  7151. return;
  7152. } // if
  7153. MOJOSHADER_preshaderOperand *operand = inst->operands;
  7154. while (operand_count--)
  7155. {
  7156. const unsigned int item = (unsigned int) SWAP32(fxlc.tokens[2]);
  7157. // !!! FIXME: don't know what first token does.
  7158. switch (SWAP32(fxlc.tokens[1]))
  7159. {
  7160. case 1: // literal from CLIT block.
  7161. {
  7162. if (item >= preshader->literal_count)
  7163. {
  7164. fail(ctx, "Bogus preshader literal index.");
  7165. break;
  7166. } // if
  7167. operand->type = MOJOSHADER_PRESHADEROPERAND_LITERAL;
  7168. break;
  7169. } // case
  7170. case 2: // item from ctabdata.
  7171. {
  7172. int i;
  7173. MOJOSHADER_symbol *sym = ctabdata.symbols;
  7174. for (i = 0; i < ctabdata.symbol_count; i++, sym++)
  7175. {
  7176. const uint32 base = sym->register_index * 4;
  7177. const uint32 count = sym->register_count * 4;
  7178. assert(sym->register_set==MOJOSHADER_SYMREGSET_FLOAT4);
  7179. if ( (base <= item) && ((base + count) > item) )
  7180. break;
  7181. } // for
  7182. if (i == ctabdata.symbol_count)
  7183. {
  7184. fail(ctx, "Bogus preshader input index.");
  7185. break;
  7186. } // if
  7187. operand->type = MOJOSHADER_PRESHADEROPERAND_INPUT;
  7188. break;
  7189. } // case
  7190. case 4:
  7191. {
  7192. int i;
  7193. for (i = 0; i < output_map_count; i++)
  7194. {
  7195. const uint32 base = output_map[(i*2)] * 4;
  7196. const uint32 count = output_map[(i*2)+1] * 4;
  7197. if ( (base <= item) && ((base + count) > item) )
  7198. break;
  7199. } // for
  7200. if (i == output_map_count)
  7201. {
  7202. fail(ctx, "Bogus preshader output index.");
  7203. break;
  7204. } // if
  7205. operand->type = MOJOSHADER_PRESHADEROPERAND_OUTPUT;
  7206. break;
  7207. } // case
  7208. case 7:
  7209. {
  7210. operand->type = MOJOSHADER_PRESHADEROPERAND_TEMP;
  7211. if (item >= preshader->temp_count)
  7212. preshader->temp_count = item + 1;
  7213. break;
  7214. } // case
  7215. } // switch
  7216. operand->index = item;
  7217. fxlc.tokens += 3;
  7218. fxlc.tokcount -= 3;
  7219. operand++;
  7220. } // while
  7221. inst++;
  7222. } // while
  7223. #endif
  7224. } // parse_preshader
  7225. static int parse_comment_token(Context *ctx)
  7226. {
  7227. uint32 commenttoks = 0;
  7228. if (is_comment_token(ctx, *ctx->tokens, &commenttoks))
  7229. {
  7230. if ((commenttoks >= 1) && (commenttoks < ctx->tokencount))
  7231. {
  7232. const uint32 id = SWAP32(ctx->tokens[1]);
  7233. if (id == PRES_ID)
  7234. parse_preshader(ctx, commenttoks);
  7235. else if (id == CTAB_ID)
  7236. {
  7237. parse_constant_table(ctx, ctx->tokens, commenttoks * 4,
  7238. ctx->version_token, 1, &ctx->ctab);
  7239. } // else if
  7240. } // if
  7241. return commenttoks + 1; // comment data plus the initial token.
  7242. } // if
  7243. return 0; // not a comment token.
  7244. } // parse_comment_token
  7245. static int parse_end_token(Context *ctx)
  7246. {
  7247. if (SWAP32(*(ctx->tokens)) != 0x0000FFFF) // end token always 0x0000FFFF.
  7248. return 0; // not us, eat no tokens.
  7249. if (ctx->tokencount != 1) // we _must_ be last. If not: fail.
  7250. fail(ctx, "end token before end of stream");
  7251. if (!isfail(ctx))
  7252. ctx->profile->end_emitter(ctx);
  7253. return 1;
  7254. } // parse_end_token
  7255. static int parse_phase_token(Context *ctx)
  7256. {
  7257. // !!! FIXME: needs state; allow only one phase token per shader, I think?
  7258. if (SWAP32(*(ctx->tokens)) != 0x0000FFFD) // phase token always 0x0000FFFD.
  7259. return 0; // not us, eat no tokens.
  7260. if ( (!shader_is_pixel(ctx)) || (!shader_version_exactly(ctx, 1, 4)) )
  7261. fail(ctx, "phase token only available in 1.4 pixel shaders");
  7262. if (!isfail(ctx))
  7263. ctx->profile->phase_emitter(ctx);
  7264. return 1;
  7265. } // parse_phase_token
  7266. static int parse_token(Context *ctx)
  7267. {
  7268. int rc = 0;
  7269. assert(ctx->output_stack_len == 0);
  7270. if (ctx->tokencount == 0)
  7271. fail(ctx, "unexpected end of shader.");
  7272. else if ((rc = parse_comment_token(ctx)) != 0)
  7273. return rc;
  7274. else if ((rc = parse_end_token(ctx)) != 0)
  7275. return rc;
  7276. else if ((rc = parse_phase_token(ctx)) != 0)
  7277. return rc;
  7278. else if ((rc = parse_instruction_token(ctx)) != 0)
  7279. return rc;
  7280. failf(ctx, "unknown token (0x%x)", (uint) *ctx->tokens);
  7281. return 1; // good luck!
  7282. } // parse_token
  7283. static int find_profile_id(const char *profile)
  7284. {
  7285. size_t i;
  7286. for (i = 0; i < STATICARRAYLEN(profileMap); i++)
  7287. {
  7288. const char *name = profileMap[i].from;
  7289. if (strcmp(name, profile) == 0)
  7290. {
  7291. profile = profileMap[i].to;
  7292. break;
  7293. } // if
  7294. } // for
  7295. for (i = 0; i < STATICARRAYLEN(profiles); i++)
  7296. {
  7297. const char *name = profiles[i].name;
  7298. if (strcmp(name, profile) == 0)
  7299. return i;
  7300. } // for
  7301. return -1; // no match.
  7302. } // find_profile_id
  7303. static Context *build_context(const char *profile,
  7304. const unsigned char *tokenbuf,
  7305. const unsigned int bufsize,
  7306. const MOJOSHADER_swizzle *swiz,
  7307. const unsigned int swizcount,
  7308. const MOJOSHADER_samplerMap *smap,
  7309. const unsigned int smapcount,
  7310. MOJOSHADER_malloc m, MOJOSHADER_free f, void *d)
  7311. {
  7312. if (m == NULL) m = MOJOSHADER_internal_malloc;
  7313. if (f == NULL) f = MOJOSHADER_internal_free;
  7314. Context *ctx = (Context *) m(sizeof (Context), d);
  7315. if (ctx == NULL)
  7316. return NULL;
  7317. memset(ctx, '\0', sizeof (Context));
  7318. ctx->malloc = m;
  7319. ctx->free = f;
  7320. ctx->malloc_data = d;
  7321. ctx->tokens = (const uint32 *) tokenbuf;
  7322. ctx->orig_tokens = (const uint32 *) tokenbuf;
  7323. ctx->tokencount = bufsize / sizeof (uint32);
  7324. ctx->swizzles = swiz;
  7325. ctx->swizzles_count = swizcount;
  7326. ctx->samplermap = smap;
  7327. ctx->samplermap_count = smapcount;
  7328. ctx->endline = ENDLINE_STR;
  7329. ctx->endline_len = strlen(ctx->endline);
  7330. ctx->last_address_reg_component = -1;
  7331. ctx->current_position = MOJOSHADER_POSITION_BEFORE;
  7332. ctx->texm3x2pad_dst0 = -1;
  7333. ctx->texm3x2pad_src0 = -1;
  7334. ctx->texm3x3pad_dst0 = -1;
  7335. ctx->texm3x3pad_src0 = -1;
  7336. ctx->texm3x3pad_dst1 = -1;
  7337. ctx->texm3x3pad_src1 = -1;
  7338. ctx->errors = errorlist_create(MallocBridge, FreeBridge, ctx);
  7339. if (ctx->errors == NULL)
  7340. {
  7341. f(ctx, d);
  7342. return NULL;
  7343. } // if
  7344. if (!set_output(ctx, &ctx->mainline))
  7345. {
  7346. errorlist_destroy(ctx->errors);
  7347. f(ctx, d);
  7348. return NULL;
  7349. } // if
  7350. const int profileid = find_profile_id(profile);
  7351. ctx->profileid = profileid;
  7352. if (profileid >= 0)
  7353. ctx->profile = &profiles[profileid];
  7354. else
  7355. failf(ctx, "Profile '%s' is unknown or unsupported", profile);
  7356. return ctx;
  7357. } // build_context
  7358. static void free_constants_list(MOJOSHADER_free f, void *d, ConstantsList *item)
  7359. {
  7360. while (item != NULL)
  7361. {
  7362. ConstantsList *next = item->next;
  7363. f(item, d);
  7364. item = next;
  7365. } // while
  7366. } // free_constants_list
  7367. static void free_variable_list(MOJOSHADER_free f, void *d, VariableList *item)
  7368. {
  7369. while (item != NULL)
  7370. {
  7371. VariableList *next = item->next;
  7372. f(item, d);
  7373. item = next;
  7374. } // while
  7375. } // free_variable_list
  7376. static void free_sym_typeinfo(MOJOSHADER_free f, void *d,
  7377. MOJOSHADER_symbolTypeInfo *typeinfo)
  7378. {
  7379. int i;
  7380. for (i = 0; i < typeinfo->member_count; i++)
  7381. {
  7382. f((void *) typeinfo->members[i].name, d);
  7383. free_sym_typeinfo(f, d, &typeinfo->members[i].info);
  7384. } // for
  7385. f((void *) typeinfo->members, d);
  7386. } // free_sym_members
  7387. static void free_symbols(MOJOSHADER_free f, void *d, MOJOSHADER_symbol *syms,
  7388. const int symcount)
  7389. {
  7390. int i;
  7391. for (i = 0; i < symcount; i++)
  7392. {
  7393. f((void *) syms[i].name, d);
  7394. free_sym_typeinfo(f, d, &syms[i].info);
  7395. } // for
  7396. f((void *) syms, d);
  7397. } // free_symbols
  7398. static void free_preshader(MOJOSHADER_free f, void *d,
  7399. MOJOSHADER_preshader *preshader)
  7400. {
  7401. if (preshader != NULL)
  7402. {
  7403. f((void *) preshader->literals, d);
  7404. f((void *) preshader->instructions, d);
  7405. free_symbols(f, d, preshader->symbols, preshader->symbol_count);
  7406. f((void *) preshader, d);
  7407. } // if
  7408. } // free_preshader
  7409. static void destroy_context(Context *ctx)
  7410. {
  7411. if (ctx != NULL)
  7412. {
  7413. MOJOSHADER_free f = ((ctx->free != NULL) ? ctx->free : MOJOSHADER_internal_free);
  7414. void *d = ctx->malloc_data;
  7415. buffer_destroy(ctx->preflight);
  7416. buffer_destroy(ctx->globals);
  7417. buffer_destroy(ctx->helpers);
  7418. buffer_destroy(ctx->subroutines);
  7419. buffer_destroy(ctx->mainline_intro);
  7420. buffer_destroy(ctx->mainline);
  7421. buffer_destroy(ctx->ignore);
  7422. free_constants_list(f, d, ctx->constants);
  7423. free_reglist(f, d, ctx->used_registers.next);
  7424. free_reglist(f, d, ctx->defined_registers.next);
  7425. free_reglist(f, d, ctx->uniforms.next);
  7426. free_reglist(f, d, ctx->attributes.next);
  7427. free_reglist(f, d, ctx->samplers.next);
  7428. free_variable_list(f, d, ctx->variables);
  7429. errorlist_destroy(ctx->errors);
  7430. free_symbols(f, d, ctx->ctab.symbols, ctx->ctab.symbol_count);
  7431. free_preshader(f, d, ctx->preshader);
  7432. f(ctx, d);
  7433. } // if
  7434. } // destroy_context
  7435. static char *build_output(Context *ctx, size_t *len)
  7436. {
  7437. // add a byte for a null terminator.
  7438. Buffer *buffers[] = {
  7439. ctx->preflight, ctx->globals, ctx->helpers,
  7440. ctx->subroutines, ctx->mainline_intro, ctx->mainline
  7441. // don't append ctx->ignore ... that's why it's called "ignore"
  7442. };
  7443. char *retval = buffer_merge(buffers, STATICARRAYLEN(buffers), len);
  7444. return retval;
  7445. } // build_output
  7446. static inline const char *alloc_varname(Context *ctx, const RegisterList *reg)
  7447. {
  7448. return ctx->profile->get_varname(ctx, reg->regtype, reg->regnum);
  7449. } // alloc_varname
  7450. // !!! FIXME: this code is sort of hard to follow:
  7451. // !!! FIXME: "var->used" only applies to arrays (at the moment, at least,
  7452. // !!! FIXME: but this might be buggy at a later time?), and this code
  7453. // !!! FIXME: relies on that.
  7454. // !!! FIXME: "variables" means "things we found in a CTAB" but it's not
  7455. // !!! FIXME: all registers, etc.
  7456. // !!! FIXME: "const_array" means an array for d3d "const" registers (c0, c1,
  7457. // !!! FIXME: etc), but not a constant array, although they _can_ be.
  7458. // !!! FIXME: It's just a mess. :/
  7459. static MOJOSHADER_uniform *build_uniforms(Context *ctx)
  7460. {
  7461. const size_t len = sizeof (MOJOSHADER_uniform) * ctx->uniform_count;
  7462. MOJOSHADER_uniform *retval = (MOJOSHADER_uniform *) Malloc(ctx, len);
  7463. if (retval != NULL)
  7464. {
  7465. MOJOSHADER_uniform *wptr = retval;
  7466. memset(wptr, '\0', len);
  7467. VariableList *var;
  7468. int written = 0;
  7469. for (var = ctx->variables; var != NULL; var = var->next)
  7470. {
  7471. if (var->used)
  7472. {
  7473. const char *name = ctx->profile->get_const_array_varname(ctx,
  7474. var->index, var->count);
  7475. if (name != NULL)
  7476. {
  7477. wptr->type = MOJOSHADER_UNIFORM_FLOAT;
  7478. wptr->index = var->index;
  7479. wptr->array_count = var->count;
  7480. wptr->constant = (var->constant != NULL) ? 1 : 0;
  7481. wptr->name = name;
  7482. wptr++;
  7483. written++;
  7484. } // if
  7485. } // if
  7486. } // for
  7487. RegisterList *item = ctx->uniforms.next;
  7488. MOJOSHADER_uniformType type = MOJOSHADER_UNIFORM_FLOAT;
  7489. while (written < ctx->uniform_count)
  7490. {
  7491. int skip = 0;
  7492. // !!! FIXME: does this fail if written > ctx->uniform_count?
  7493. if (item == NULL)
  7494. {
  7495. fail(ctx, "BUG: mismatched uniform list and count");
  7496. break;
  7497. } // if
  7498. int index = item->regnum;
  7499. switch (item->regtype)
  7500. {
  7501. case REG_TYPE_CONST:
  7502. skip = (item->array != NULL);
  7503. type = MOJOSHADER_UNIFORM_FLOAT;
  7504. break;
  7505. case REG_TYPE_CONSTINT:
  7506. type = MOJOSHADER_UNIFORM_INT;
  7507. break;
  7508. case REG_TYPE_CONSTBOOL:
  7509. type = MOJOSHADER_UNIFORM_BOOL;
  7510. break;
  7511. default:
  7512. fail(ctx, "unknown uniform datatype");
  7513. break;
  7514. } // switch
  7515. if (!skip)
  7516. {
  7517. wptr->type = type;
  7518. wptr->index = index;
  7519. wptr->array_count = 0;
  7520. wptr->name = alloc_varname(ctx, item);
  7521. wptr++;
  7522. written++;
  7523. } // if
  7524. item = item->next;
  7525. } // for
  7526. } // if
  7527. return retval;
  7528. } // build_uniforms
  7529. static MOJOSHADER_constant *build_constants(Context *ctx)
  7530. {
  7531. const size_t len = sizeof (MOJOSHADER_constant) * ctx->constant_count;
  7532. MOJOSHADER_constant *retval = (MOJOSHADER_constant *) Malloc(ctx, len);
  7533. if (retval != NULL)
  7534. {
  7535. ConstantsList *item = ctx->constants;
  7536. int i;
  7537. for (i = 0; i < ctx->constant_count; i++)
  7538. {
  7539. if (item == NULL)
  7540. {
  7541. fail(ctx, "BUG: mismatched constant list and count");
  7542. break;
  7543. } // if
  7544. memcpy(&retval[i], &item->constant, sizeof (MOJOSHADER_constant));
  7545. item = item->next;
  7546. } // for
  7547. } // if
  7548. return retval;
  7549. } // build_constants
  7550. static MOJOSHADER_sampler *build_samplers(Context *ctx)
  7551. {
  7552. const size_t len = sizeof (MOJOSHADER_sampler) * ctx->sampler_count;
  7553. MOJOSHADER_sampler *retval = (MOJOSHADER_sampler *) Malloc(ctx, len);
  7554. if (retval != NULL)
  7555. {
  7556. RegisterList *item = ctx->samplers.next;
  7557. int i;
  7558. memset(retval, '\0', len);
  7559. for (i = 0; i < ctx->sampler_count; i++)
  7560. {
  7561. if (item == NULL)
  7562. {
  7563. fail(ctx, "BUG: mismatched sampler list and count");
  7564. break;
  7565. } // if
  7566. assert(item->regtype == REG_TYPE_SAMPLER);
  7567. retval[i].type = cvtD3DToMojoSamplerType((TextureType) item->index);
  7568. retval[i].index = item->regnum;
  7569. retval[i].name = alloc_varname(ctx, item);
  7570. retval[i].texbem = (item->misc != 0) ? 1 : 0;
  7571. item = item->next;
  7572. } // for
  7573. } // if
  7574. return retval;
  7575. } // build_samplers
  7576. static MOJOSHADER_attribute *build_attributes(Context *ctx, int *_count)
  7577. {
  7578. int count = 0;
  7579. if (ctx->attribute_count == 0)
  7580. {
  7581. *_count = 0;
  7582. return NULL; // nothing to do.
  7583. } // if
  7584. const size_t len = sizeof (MOJOSHADER_attribute) * ctx->attribute_count;
  7585. MOJOSHADER_attribute *retval = (MOJOSHADER_attribute *) Malloc(ctx, len);
  7586. if (retval != NULL)
  7587. {
  7588. RegisterList *item = ctx->attributes.next;
  7589. MOJOSHADER_attribute *wptr = retval;
  7590. int ignore = 0;
  7591. int i;
  7592. memset(retval, '\0', len);
  7593. for (i = 0; i < ctx->attribute_count; i++)
  7594. {
  7595. if (item == NULL)
  7596. {
  7597. fail(ctx, "BUG: mismatched attribute list and count");
  7598. break;
  7599. } // if
  7600. switch (item->regtype)
  7601. {
  7602. case REG_TYPE_RASTOUT:
  7603. case REG_TYPE_ATTROUT:
  7604. case REG_TYPE_TEXCRDOUT:
  7605. case REG_TYPE_COLOROUT:
  7606. case REG_TYPE_DEPTHOUT:
  7607. ignore = 1;
  7608. break;
  7609. case REG_TYPE_TEXTURE:
  7610. case REG_TYPE_MISCTYPE:
  7611. case REG_TYPE_INPUT:
  7612. ignore = shader_is_pixel(ctx);
  7613. break;
  7614. default:
  7615. ignore = 0;
  7616. break;
  7617. } // switch
  7618. if (!ignore)
  7619. {
  7620. if (shader_is_pixel(ctx))
  7621. fail(ctx, "BUG: pixel shader with vertex attributes");
  7622. else
  7623. {
  7624. wptr->usage = item->usage;
  7625. wptr->index = item->index;
  7626. wptr->name = alloc_varname(ctx, item);
  7627. wptr++;
  7628. count++;
  7629. } // else
  7630. } // if
  7631. item = item->next;
  7632. } // for
  7633. } // if
  7634. *_count = count;
  7635. return retval;
  7636. } // build_attributes
  7637. static MOJOSHADER_attribute *build_outputs(Context *ctx, int *_count)
  7638. {
  7639. int count = 0;
  7640. if (ctx->attribute_count == 0)
  7641. {
  7642. *_count = 0;
  7643. return NULL; // nothing to do.
  7644. } // if
  7645. const size_t len = sizeof (MOJOSHADER_attribute) * ctx->attribute_count;
  7646. MOJOSHADER_attribute *retval = (MOJOSHADER_attribute *) Malloc(ctx, len);
  7647. if (retval != NULL)
  7648. {
  7649. RegisterList *item = ctx->attributes.next;
  7650. MOJOSHADER_attribute *wptr = retval;
  7651. int i;
  7652. memset(retval, '\0', len);
  7653. for (i = 0; i < ctx->attribute_count; i++)
  7654. {
  7655. if (item == NULL)
  7656. {
  7657. fail(ctx, "BUG: mismatched attribute list and count");
  7658. break;
  7659. } // if
  7660. switch (item->regtype)
  7661. {
  7662. case REG_TYPE_RASTOUT:
  7663. case REG_TYPE_ATTROUT:
  7664. case REG_TYPE_TEXCRDOUT:
  7665. case REG_TYPE_COLOROUT:
  7666. case REG_TYPE_DEPTHOUT:
  7667. wptr->usage = item->usage;
  7668. wptr->index = item->index;
  7669. wptr->name = alloc_varname(ctx, item);
  7670. wptr++;
  7671. count++;
  7672. break;
  7673. default:
  7674. break;
  7675. } // switch
  7676. item = item->next;
  7677. } // for
  7678. } // if
  7679. *_count = count;
  7680. return retval;
  7681. } // build_outputs
  7682. static MOJOSHADER_parseData *build_parsedata(Context *ctx)
  7683. {
  7684. char *output = NULL;
  7685. MOJOSHADER_constant *constants = NULL;
  7686. MOJOSHADER_uniform *uniforms = NULL;
  7687. MOJOSHADER_attribute *attributes = NULL;
  7688. MOJOSHADER_attribute *outputs = NULL;
  7689. MOJOSHADER_sampler *samplers = NULL;
  7690. MOJOSHADER_swizzle *swizzles = NULL;
  7691. MOJOSHADER_error *errors = NULL;
  7692. MOJOSHADER_parseData *retval = NULL;
  7693. size_t output_len = 0;
  7694. int attribute_count = 0;
  7695. int output_count = 0;
  7696. if (ctx->out_of_memory)
  7697. return &MOJOSHADER_out_of_mem_data;
  7698. retval = (MOJOSHADER_parseData*) Malloc(ctx, sizeof(MOJOSHADER_parseData));
  7699. if (retval == NULL)
  7700. return &MOJOSHADER_out_of_mem_data;
  7701. memset(retval, '\0', sizeof (MOJOSHADER_parseData));
  7702. if (!isfail(ctx))
  7703. output = build_output(ctx, &output_len);
  7704. if (!isfail(ctx))
  7705. constants = build_constants(ctx);
  7706. if (!isfail(ctx))
  7707. uniforms = build_uniforms(ctx);
  7708. if (!isfail(ctx))
  7709. attributes = build_attributes(ctx, &attribute_count);
  7710. if (!isfail(ctx))
  7711. outputs = build_outputs(ctx, &output_count);
  7712. if (!isfail(ctx))
  7713. samplers = build_samplers(ctx);
  7714. const int error_count = errorlist_count(ctx->errors);
  7715. errors = errorlist_flatten(ctx->errors);
  7716. if (!isfail(ctx))
  7717. {
  7718. if (ctx->swizzles_count > 0)
  7719. {
  7720. const int len = ctx->swizzles_count * sizeof (MOJOSHADER_swizzle);
  7721. swizzles = (MOJOSHADER_swizzle *) Malloc(ctx, len);
  7722. if (swizzles != NULL)
  7723. memcpy(swizzles, ctx->swizzles, len);
  7724. } // if
  7725. } // if
  7726. // check again, in case build_output, etc, ran out of memory.
  7727. if (isfail(ctx))
  7728. {
  7729. int i;
  7730. Free(ctx, output);
  7731. Free(ctx, constants);
  7732. Free(ctx, swizzles);
  7733. if (uniforms != NULL)
  7734. {
  7735. for (i = 0; i < ctx->uniform_count; i++)
  7736. Free(ctx, (void *) uniforms[i].name);
  7737. Free(ctx, uniforms);
  7738. } // if
  7739. if (attributes != NULL)
  7740. {
  7741. for (i = 0; i < attribute_count; i++)
  7742. Free(ctx, (void *) attributes[i].name);
  7743. Free(ctx, attributes);
  7744. } // if
  7745. if (outputs != NULL)
  7746. {
  7747. for (i = 0; i < output_count; i++)
  7748. Free(ctx, (void *) outputs[i].name);
  7749. Free(ctx, outputs);
  7750. } // if
  7751. if (samplers != NULL)
  7752. {
  7753. for (i = 0; i < ctx->sampler_count; i++)
  7754. Free(ctx, (void *) samplers[i].name);
  7755. Free(ctx, samplers);
  7756. } // if
  7757. if (ctx->out_of_memory)
  7758. {
  7759. for (i = 0; i < error_count; i++)
  7760. {
  7761. Free(ctx, (void *) errors[i].filename);
  7762. Free(ctx, (void *) errors[i].error);
  7763. } // for
  7764. Free(ctx, errors);
  7765. Free(ctx, retval);
  7766. return &MOJOSHADER_out_of_mem_data;
  7767. } // if
  7768. } // if
  7769. else
  7770. {
  7771. retval->profile = ctx->profile->name;
  7772. retval->output = output;
  7773. retval->output_len = (int) output_len;
  7774. retval->instruction_count = ctx->instruction_count;
  7775. retval->shader_type = ctx->shader_type;
  7776. retval->major_ver = (int) ctx->major_ver;
  7777. retval->minor_ver = (int) ctx->minor_ver;
  7778. retval->uniform_count = ctx->uniform_count;
  7779. retval->uniforms = uniforms;
  7780. retval->constant_count = ctx->constant_count;
  7781. retval->constants = constants;
  7782. retval->sampler_count = ctx->sampler_count;
  7783. retval->samplers = samplers;
  7784. retval->attribute_count = attribute_count;
  7785. retval->attributes = attributes;
  7786. retval->output_count = output_count;
  7787. retval->outputs = outputs;
  7788. retval->swizzle_count = ctx->swizzles_count;
  7789. retval->swizzles = swizzles;
  7790. retval->symbol_count = ctx->ctab.symbol_count;
  7791. retval->symbols = ctx->ctab.symbols;
  7792. retval->preshader = ctx->preshader;
  7793. // we don't own these now, retval does.
  7794. ctx->ctab.symbols = NULL;
  7795. ctx->preshader = NULL;
  7796. ctx->ctab.symbol_count = 0;
  7797. } // else
  7798. retval->error_count = error_count;
  7799. retval->errors = errors;
  7800. retval->malloc = (ctx->malloc == MOJOSHADER_internal_malloc) ? NULL : ctx->malloc;
  7801. retval->free = (ctx->free == MOJOSHADER_internal_free) ? NULL : ctx->free;
  7802. retval->malloc_data = ctx->malloc_data;
  7803. return retval;
  7804. } // build_parsedata
  7805. static void process_definitions(Context *ctx)
  7806. {
  7807. // !!! FIXME: apparently, pre ps_3_0, sampler registers don't need to be
  7808. // !!! FIXME: DCL'd before use (default to 2d?). We aren't checking
  7809. // !!! FIXME: this at the moment, though.
  7810. determine_constants_arrays(ctx); // in case this hasn't been called yet.
  7811. RegisterList *uitem = &ctx->uniforms;
  7812. RegisterList *prev = &ctx->used_registers;
  7813. RegisterList *item = prev->next;
  7814. while (item != NULL)
  7815. {
  7816. RegisterList *next = item->next;
  7817. const RegisterType regtype = item->regtype;
  7818. const int regnum = item->regnum;
  7819. if (!get_defined_register(ctx, regtype, regnum))
  7820. {
  7821. // haven't already dealt with this one.
  7822. switch (regtype)
  7823. {
  7824. // !!! FIXME: I'm not entirely sure this is right...
  7825. case REG_TYPE_RASTOUT:
  7826. case REG_TYPE_ATTROUT:
  7827. case REG_TYPE_TEXCRDOUT:
  7828. case REG_TYPE_COLOROUT:
  7829. case REG_TYPE_DEPTHOUT:
  7830. if (shader_is_vertex(ctx)&&shader_version_atleast(ctx,3,0))
  7831. {
  7832. fail(ctx, "vs_3 can't use output registers"
  7833. " without declaring them first.");
  7834. return;
  7835. } // if
  7836. // Apparently this is an attribute that wasn't DCL'd.
  7837. // Add it to the attribute list; deal with it later.
  7838. add_attribute_register(ctx, regtype, regnum,
  7839. MOJOSHADER_USAGE_UNKNOWN, 0, 0xF, 0);
  7840. break;
  7841. case REG_TYPE_ADDRESS:
  7842. case REG_TYPE_PREDICATE:
  7843. case REG_TYPE_TEMP:
  7844. case REG_TYPE_LOOP:
  7845. case REG_TYPE_LABEL:
  7846. ctx->profile->global_emitter(ctx, regtype, regnum);
  7847. break;
  7848. case REG_TYPE_CONST:
  7849. case REG_TYPE_CONSTINT:
  7850. case REG_TYPE_CONSTBOOL:
  7851. // separate uniforms into a different list for now.
  7852. prev->next = next;
  7853. item->next = NULL;
  7854. uitem->next = item;
  7855. uitem = item;
  7856. item = prev;
  7857. break;
  7858. case REG_TYPE_INPUT:
  7859. // You don't have to dcl_ your inputs in Shader Model 1.
  7860. if (shader_is_pixel(ctx)&&!shader_version_atleast(ctx,2,0))
  7861. {
  7862. add_attribute_register(ctx, regtype, regnum,
  7863. MOJOSHADER_USAGE_COLOR, regnum,
  7864. 0xF, 0);
  7865. break;
  7866. } // if
  7867. // fall through...
  7868. default:
  7869. fail(ctx, "BUG: we used a register we don't know how to define.");
  7870. } // switch
  7871. } // if
  7872. prev = item;
  7873. item = next;
  7874. } // while
  7875. // okay, now deal with uniform/constant arrays...
  7876. VariableList *var;
  7877. for (var = ctx->variables; var != NULL; var = var->next)
  7878. {
  7879. if (var->used)
  7880. {
  7881. if (var->constant)
  7882. {
  7883. ctx->profile->const_array_emitter(ctx, var->constant,
  7884. var->index, var->count);
  7885. } // if
  7886. else
  7887. {
  7888. ctx->profile->array_emitter(ctx, var);
  7889. ctx->uniform_float4_count += var->count;
  7890. ctx->uniform_count++;
  7891. } // else
  7892. } // if
  7893. } // for
  7894. // ...and uniforms...
  7895. for (item = ctx->uniforms.next; item != NULL; item = item->next)
  7896. {
  7897. int arraysize = -1;
  7898. // check if this is a register contained in an array...
  7899. if (item->regtype == REG_TYPE_CONST)
  7900. {
  7901. for (var = ctx->variables; var != NULL; var = var->next)
  7902. {
  7903. if (!var->used)
  7904. continue;
  7905. const int regnum = item->regnum;
  7906. const int lo = var->index;
  7907. if ( (regnum >= lo) && (regnum < (lo + var->count)) )
  7908. {
  7909. assert(!var->constant);
  7910. item->array = var; // used when building parseData.
  7911. arraysize = var->count;
  7912. break;
  7913. } // if
  7914. } // for
  7915. } // if
  7916. ctx->profile->uniform_emitter(ctx, item->regtype, item->regnum, var);
  7917. if (arraysize < 0) // not part of an array?
  7918. {
  7919. ctx->uniform_count++;
  7920. switch (item->regtype)
  7921. {
  7922. case REG_TYPE_CONST: ctx->uniform_float4_count++; break;
  7923. case REG_TYPE_CONSTINT: ctx->uniform_int4_count++; break;
  7924. case REG_TYPE_CONSTBOOL: ctx->uniform_bool_count++; break;
  7925. default: break;
  7926. } // switch
  7927. } // if
  7928. } // for
  7929. // ...and samplers...
  7930. for (item = ctx->samplers.next; item != NULL; item = item->next)
  7931. {
  7932. ctx->sampler_count++;
  7933. ctx->profile->sampler_emitter(ctx, item->regnum,
  7934. (TextureType) item->index,
  7935. item->misc != 0);
  7936. } // for
  7937. // ...and attributes...
  7938. for (item = ctx->attributes.next; item != NULL; item = item->next)
  7939. {
  7940. ctx->attribute_count++;
  7941. ctx->profile->attribute_emitter(ctx, item->regtype, item->regnum,
  7942. item->usage, item->index,
  7943. item->writemask, item->misc);
  7944. } // for
  7945. } // process_definitions
  7946. static void verify_swizzles(Context *ctx)
  7947. {
  7948. size_t i;
  7949. const char *failmsg = "invalid swizzle";
  7950. for (i = 0; i < ctx->swizzles_count; i++)
  7951. {
  7952. const MOJOSHADER_swizzle *swiz = &ctx->swizzles[i];
  7953. if (swiz->swizzles[0] > 3) { fail(ctx, failmsg); return; }
  7954. if (swiz->swizzles[1] > 3) { fail(ctx, failmsg); return; }
  7955. if (swiz->swizzles[2] > 3) { fail(ctx, failmsg); return; }
  7956. if (swiz->swizzles[3] > 3) { fail(ctx, failmsg); return; }
  7957. } // for
  7958. } // verify_swizzles
  7959. // API entry point...
  7960. // !!! FIXME:
  7961. // MSDN: "Shader validation will fail CreatePixelShader on any shader that
  7962. // attempts to read from a temporary register that has not been written by a
  7963. // previous instruction." (true for ps_1_*, maybe others). Check this.
  7964. const MOJOSHADER_parseData *MOJOSHADER_parse(const char *profile,
  7965. const unsigned char *tokenbuf,
  7966. const unsigned int bufsize,
  7967. const MOJOSHADER_swizzle *swiz,
  7968. const unsigned int swizcount,
  7969. const MOJOSHADER_samplerMap *smap,
  7970. const unsigned int smapcount,
  7971. MOJOSHADER_malloc m,
  7972. MOJOSHADER_free f, void *d)
  7973. {
  7974. MOJOSHADER_parseData *retval = NULL;
  7975. Context *ctx = NULL;
  7976. int rc = 0;
  7977. int failed = 0;
  7978. if ( ((m == NULL) && (f != NULL)) || ((m != NULL) && (f == NULL)) )
  7979. return &MOJOSHADER_out_of_mem_data; // supply both or neither.
  7980. ctx = build_context(profile, tokenbuf, bufsize, swiz, swizcount,
  7981. smap, smapcount, m, f, d);
  7982. if (ctx == NULL)
  7983. return &MOJOSHADER_out_of_mem_data;
  7984. if (isfail(ctx))
  7985. {
  7986. retval = build_parsedata(ctx);
  7987. destroy_context(ctx);
  7988. return retval;
  7989. } // if
  7990. verify_swizzles(ctx);
  7991. // Version token always comes first.
  7992. ctx->current_position = 0;
  7993. rc = parse_version_token(ctx, profile);
  7994. // drop out now if this definitely isn't bytecode. Saves lots of
  7995. // meaningless errors flooding through.
  7996. if (rc < 0)
  7997. {
  7998. retval = build_parsedata(ctx);
  7999. destroy_context(ctx);
  8000. return retval;
  8001. } // if
  8002. if ( ((uint32) rc) > ctx->tokencount )
  8003. {
  8004. fail(ctx, "Corrupted or truncated shader");
  8005. ctx->tokencount = rc;
  8006. } // if
  8007. adjust_token_position(ctx, rc);
  8008. // parse out the rest of the tokens after the version token...
  8009. while (ctx->tokencount > 0)
  8010. {
  8011. // reset for each token.
  8012. if (isfail(ctx))
  8013. {
  8014. failed = 1;
  8015. ctx->isfail = 0;
  8016. } // if
  8017. rc = parse_token(ctx);
  8018. if ( ((uint32) rc) > ctx->tokencount )
  8019. {
  8020. fail(ctx, "Corrupted or truncated shader");
  8021. break;
  8022. } // if
  8023. adjust_token_position(ctx, rc);
  8024. } // while
  8025. ctx->current_position = MOJOSHADER_POSITION_AFTER;
  8026. // for ps_1_*, the output color is written to r0...throw an
  8027. // error if this register was never written. This isn't
  8028. // important for vertex shaders, or shader model 2+.
  8029. if (shader_is_pixel(ctx) && !shader_version_atleast(ctx, 2, 0))
  8030. {
  8031. if (!register_was_written(ctx, REG_TYPE_TEMP, 0))
  8032. fail(ctx, "r0 (pixel shader 1.x color output) never written to");
  8033. } // if
  8034. if (!failed)
  8035. {
  8036. process_definitions(ctx);
  8037. failed = isfail(ctx);
  8038. } // if
  8039. if (!failed)
  8040. ctx->profile->finalize_emitter(ctx);
  8041. ctx->isfail = failed;
  8042. retval = build_parsedata(ctx);
  8043. destroy_context(ctx);
  8044. return retval;
  8045. } // MOJOSHADER_parse
  8046. void MOJOSHADER_freeParseData(const MOJOSHADER_parseData *_data)
  8047. {
  8048. MOJOSHADER_parseData *data = (MOJOSHADER_parseData *) _data;
  8049. if ((data == NULL) || (data == &MOJOSHADER_out_of_mem_data))
  8050. return; // no-op.
  8051. MOJOSHADER_free f = (data->free == NULL) ? MOJOSHADER_internal_free : data->free;
  8052. void *d = data->malloc_data;
  8053. int i;
  8054. // we don't f(data->profile), because that's internal static data.
  8055. f((void *) data->output, d);
  8056. f((void *) data->constants, d);
  8057. f((void *) data->swizzles, d);
  8058. for (i = 0; i < data->error_count; i++)
  8059. {
  8060. f((void *) data->errors[i].error, d);
  8061. f((void *) data->errors[i].filename, d);
  8062. } // for
  8063. f((void *) data->errors, d);
  8064. for (i = 0; i < data->uniform_count; i++)
  8065. f((void *) data->uniforms[i].name, d);
  8066. f((void *) data->uniforms, d);
  8067. for (i = 0; i < data->attribute_count; i++)
  8068. f((void *) data->attributes[i].name, d);
  8069. f((void *) data->attributes, d);
  8070. for (i = 0; i < data->output_count; i++)
  8071. f((void *) data->outputs[i].name, d);
  8072. f((void *) data->outputs, d);
  8073. for (i = 0; i < data->sampler_count; i++)
  8074. f((void *) data->samplers[i].name, d);
  8075. f((void *) data->samplers, d);
  8076. free_symbols(f, d, data->symbols, data->symbol_count);
  8077. free_preshader(f, d, data->preshader);
  8078. f(data, d);
  8079. } // MOJOSHADER_freeParseData
  8080. int MOJOSHADER_version(void)
  8081. {
  8082. return MOJOSHADER_VERSION;
  8083. } // MOJOSHADER_version
  8084. const char *MOJOSHADER_changeset(void)
  8085. {
  8086. return MOJOSHADER_CHANGESET;
  8087. } // MOJOSHADER_changeset
  8088. int MOJOSHADER_maxShaderModel(const char *profile)
  8089. {
  8090. #define PROFILE_SHADER_MODEL(p,v) if (strcmp(profile, p) == 0) return v;
  8091. PROFILE_SHADER_MODEL(MOJOSHADER_PROFILE_D3D, 3);
  8092. PROFILE_SHADER_MODEL(MOJOSHADER_PROFILE_BYTECODE, 3);
  8093. PROFILE_SHADER_MODEL(MOJOSHADER_PROFILE_GLSL, 3);
  8094. PROFILE_SHADER_MODEL(MOJOSHADER_PROFILE_GLSL120, 3);
  8095. PROFILE_SHADER_MODEL(MOJOSHADER_PROFILE_ARB1, 2);
  8096. PROFILE_SHADER_MODEL(MOJOSHADER_PROFILE_NV2, 2);
  8097. PROFILE_SHADER_MODEL(MOJOSHADER_PROFILE_NV3, 2);
  8098. PROFILE_SHADER_MODEL(MOJOSHADER_PROFILE_NV4, 3);
  8099. #undef PROFILE_SHADER_MODEL
  8100. return -1; // unknown profile?
  8101. } // MOJOSHADER_maxShaderModel
  8102. // end of mojoshader.c ...