fp-contract.ll 1.1 KB

123456789101112131415161718192021222324252627282930313233
  1. ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -fp-contract=fast | FileCheck %s --check-prefix=FAST
  2. ; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 | FileCheck %s --check-prefix=DEFAULT
  3. target triple = "nvptx64-unknown-cuda"
  4. ;; Make sure we are generating proper instruction sequences for fused ops
  5. ;; If fusion is allowed, we try to form fma.rn at the PTX level, and emit
  6. ;; add.f32 otherwise. Without an explicit rounding mode on add.f32, ptxas
  7. ;; is free to fuse with a multiply if it is able. If fusion is not allowed,
  8. ;; we do not form fma.rn at the PTX level and explicitly generate add.rn
  9. ;; for all adds to prevent ptxas from fusion the ops.
  10. ;; FAST-LABEL: @t0
  11. ;; DEFAULT-LABEL: @t0
  12. define float @t0(float %a, float %b, float %c) {
  13. ;; FAST: fma.rn.f32
  14. ;; DEFAULT: mul.rn.f32
  15. ;; DEFAULT: add.rn.f32
  16. %v0 = fmul float %a, %b
  17. %v1 = fadd float %v0, %c
  18. ret float %v1
  19. }
  20. ;; FAST-LABEL: @t1
  21. ;; DEFAULT-LABEL: @t1
  22. define float @t1(float %a, float %b) {
  23. ;; We cannot form an fma here, but make sure we explicitly emit add.rn.f32
  24. ;; to prevent ptxas from fusing this with anything else.
  25. ;; FAST: add.f32
  26. ;; DEFAULT: add.rn.f32
  27. %v1 = fadd float %a, %b
  28. ret float %v1
  29. }