Browse Source

Use i32 to fake 64bit for buffer load/store. (#63)

Xiang Li 8 years ago
parent
commit
f43f396598

+ 2 - 2
lib/HLSL/DxilOperations.cpp

@@ -139,8 +139,8 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
   // Resources                                                                                                              void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
   {  OC::TextureLoad,             "TextureLoad",              OCC::TextureLoad,              "textureLoad",                false,  true,  true, false, false, false,  true,  true, false, Attribute::ReadOnly, },
   {  OC::TextureStore,            "TextureStore",             OCC::TextureStore,             "textureStore",               false,  true,  true, false, false, false,  true,  true, false, Attribute::None,     },
-  {  OC::BufferLoad,              "BufferLoad",               OCC::BufferLoad,               "bufferLoad",                 false,  true,  true, false, false, false,  true,  true,  true, Attribute::ReadOnly, },
-  {  OC::BufferStore,             "BufferStore",              OCC::BufferStore,              "bufferStore",                false,  true,  true, false, false, false,  true,  true,  true, Attribute::None,     },
+  {  OC::BufferLoad,              "BufferLoad",               OCC::BufferLoad,               "bufferLoad",                 false,  true,  true, false, false, false,  true,  true, false, Attribute::ReadOnly, },
+  {  OC::BufferStore,             "BufferStore",              OCC::BufferStore,              "bufferStore",                false,  true,  true, false, false, false,  true,  true, false, Attribute::None,     },
   {  OC::BufferUpdateCounter,     "BufferUpdateCounter",      OCC::BufferUpdateCounter,      "bufferUpdateCounter",         true, false, false, false, false, false, false, false, false, Attribute::None,     },
   {  OC::CheckAccessFullyMapped,  "CheckAccessFullyMapped",   OCC::CheckAccessFullyMapped,   "checkAccessFullyMapped",     false, false, false, false, false, false, false,  true, false, Attribute::ReadOnly, },
   {  OC::GetDimensions,           "GetDimensions",            OCC::GetDimensions,            "getDimensions",               true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },

+ 111 - 110
lib/HLSL/DxilValidation.cpp

@@ -324,7 +324,7 @@ struct ValidationContext {
   }
 
   void EmitGlobalValueError(GlobalValue *GV, ValidationRule rule) {
-    EmitFormatError(rule, { GV->getName().str().c_str() });
+    EmitFormatError(rule, { GV->getName().str() });
   }
 
   // This is the least desirable mechanism, as it has no context.
@@ -333,12 +333,12 @@ struct ValidationContext {
     Failed = true;
   }
 
-  void FormatRuleText(std::string &ruleText, ArrayRef<const char *> args) {
+  void FormatRuleText(std::string &ruleText, ArrayRef<StringRef> args) {
     // Consider changing const char * to StringRef
     for (unsigned i = 0; i < args.size(); i++) {
       std::string argIdx = "%" + std::to_string(i);
-      const char *pArg = args[i];
-      if (pArg == nullptr)
+      StringRef pArg = args[i];
+      if (pArg == "")
         pArg = "<null>";
 
       std::string::size_type offset = ruleText.find(argIdx);
@@ -350,7 +350,7 @@ struct ValidationContext {
     }
   }
 
-  void EmitFormatError(ValidationRule rule, ArrayRef<const char *> args) {
+  void EmitFormatError(ValidationRule rule, ArrayRef<StringRef> args) {
     std::string ruleText = GetValidationRuleText(rule);
     FormatRuleText(ruleText, args);
     DiagPrinter << ruleText << '\n';
@@ -373,7 +373,7 @@ struct ValidationContext {
 
   void EmitResourceFormatError(const hlsl::DxilResourceBase *Res,
                                ValidationRule rule,
-                               ArrayRef<const char *> args) {
+                               ArrayRef<StringRef> args) {
     std::string ruleText = GetValidationRuleText(rule);
     FormatRuleText(ruleText, args);
     DiagPrinter << ruleText;
@@ -453,7 +453,7 @@ struct ValidationContext {
     Failed = true;
   }
 
-  void EmitInstrFormatError(Instruction *I, ValidationRule rule, ArrayRef<const char *> args) {
+  void EmitInstrFormatError(Instruction *I, ValidationRule rule, ArrayRef<StringRef> args) {
     if (!EmitInstrLoc(I, rule)) return;
 
     std::string ruleText = GetValidationRuleText(rule);
@@ -463,7 +463,7 @@ struct ValidationContext {
     Failed = true;
   }
 
-  void EmitOperandOutOfRange(Instruction *I, const char *name, const char * range, const char * v) {
+  void EmitOperandOutOfRange(Instruction *I, StringRef name, StringRef range, StringRef v) {
     if (!EmitInstrLoc(I, ValidationRule::InstrOperandRange)) return;
 
     std::string ruleText = GetValidationRuleText(ValidationRule::InstrOperandRange);
@@ -481,7 +481,7 @@ struct ValidationContext {
     std::string O;
     raw_string_ostream OSS(O);
     Ty->print(OSS);
-    EmitFormatError(rule, { OSS.str().c_str() });
+    EmitFormatError(rule, { OSS.str() });
   }
 };
 
@@ -1097,8 +1097,8 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
         if (immVertexID < low || immVertexID >= high) {
           std::string range = std::to_string(low)+"~"+
                                        std::to_string(high);
-          ValCtx.EmitOperandOutOfRange(CI, "VertexID", range.c_str(),
-                                       std::to_string(immVertexID).c_str());
+          ValCtx.EmitOperandOutOfRange(CI, "VertexID", range,
+                                       std::to_string(immVertexID));
         }
       }
     }
@@ -1397,9 +1397,9 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
       if (fBias < DXIL::kMinMipLodBias || fBias > DXIL::kMaxMipLodBias) {
         ValCtx.EmitInstrFormatError(
             CI, ValidationRule::InstrImmBiasForSampleB,
-            {std::to_string(DXIL::kMinMipLodBias).c_str(),
-             std::to_string(DXIL::kMaxMipLodBias).c_str(),
-             std::to_string(cBias->getValueAPF().convertToFloat()).c_str()});
+            {std::to_string(DXIL::kMinMipLodBias),
+             std::to_string(DXIL::kMaxMipLodBias),
+             std::to_string(cBias->getValueAPF().convertToFloat())});
       }
     }
 
@@ -1491,7 +1491,7 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
     if (stValMask != uMask) {
       ValCtx.EmitInstrFormatError(
           CI, ValidationRule::InstrWriteMaskMatchValueForUAVStore,
-          {std::to_string(uMask).c_str(), std::to_string(stValMask).c_str()});
+          {std::to_string(uMask), std::to_string(stValMask)});
     }
 
     Value *offset = bufSt.get_coord1();
@@ -1557,8 +1557,9 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
                           texSt.get_value2(), texSt.get_value3()});
 
     if (stValMask != uMask) {
-      ValCtx.EmitInstrError(
-          CI, ValidationRule::InstrWriteMaskMatchValueForUAVStore);
+      ValCtx.EmitInstrFormatError(
+          CI, ValidationRule::InstrWriteMaskMatchValueForUAVStore,
+          {std::to_string(uMask), std::to_string(stValMask)});
     }
 
     switch (resKind) {
@@ -1670,7 +1671,7 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
         int immStreamID = cStreamID->getValue().getLimitedValue();
         if (cStreamID->getValue().isNegative() || immStreamID >= 4) {
           ValCtx.EmitOperandOutOfRange(CI, "StreamID","0~4",
-                                       std::to_string(immStreamID).c_str());
+                                       std::to_string(immStreamID));
         } else {
           unsigned immMask = 1 << immStreamID;
           if ((streamMask & immMask) == 0) {
@@ -1680,8 +1681,8 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
                 range += std::to_string(i) + " ";
               }
             }
-            ValCtx.EmitOperandOutOfRange(CI, "StreamID", range.c_str(),
-                                         std::to_string(immStreamID).c_str());
+            ValCtx.EmitOperandOutOfRange(CI, "StreamID", range,
+                                         std::to_string(immStreamID));
           }
         }
 
@@ -2475,13 +2476,13 @@ static void ValidateFunction(Function &F, ValidationContext &ValCtx) {
   } else {
     if (!F.arg_empty())
       ValCtx.EmitFormatError(ValidationRule::FlowFunctionCall,
-                             {F.getName().str().c_str()});
+                             {F.getName().str()});
 
     DxilFunctionAnnotation *funcAnnotation =
         ValCtx.DxilMod.GetTypeSystem().GetFunctionAnnotation(&F);
     if (!funcAnnotation) {
       ValCtx.EmitFormatError(ValidationRule::MetaFunctionAnnotation,
-                             {F.getName().str().c_str()});
+                             {F.getName().str()});
       return;
     }
 
@@ -2502,11 +2503,11 @@ static void ValidateFunction(Function &F, ValidationContext &ValCtx) {
         if (arg.hasName())
           ValCtx.EmitFormatError(
               ValidationRule::DeclFnFlattenParam,
-              {arg.getName().str().c_str(), F.getName().str().c_str()});
+              {arg.getName().str(), F.getName().str()});
         else
           ValCtx.EmitFormatError(ValidationRule::DeclFnFlattenParam,
-                                 {std::to_string(arg.getArgNo()).c_str(),
-                                  F.getName().str().c_str()});
+                                 {std::to_string(arg.getArgNo()),
+                                  F.getName().str()});
         break;
       }
     }
@@ -2612,8 +2613,8 @@ static void ValidateGlobalVariables(ValidationContext &ValCtx) {
 
   if (TGSMSize > DXIL::kMaxTGSMSize) {
     ValCtx.EmitFormatError(ValidationRule::SmMaxTGSMSize,
-                           {std::to_string(TGSMSize).c_str(),
-                            std::to_string(DXIL::kMaxTGSMSize).c_str()});
+                           {std::to_string(TGSMSize),
+                            std::to_string(DXIL::kMaxTGSMSize)});
   }
   if (!fixAddrTGSMList.empty()) {
     ValidateTGSMRaceCondition(fixAddrTGSMList, ValCtx);
@@ -2649,7 +2650,7 @@ static void ValidateMetadata(ValidationContext &ValCtx) {
   Module *pModule = &ValCtx.M;
   const std::string &target = pModule->getTargetTriple();
   if (target != "dxil-ms-dx") {
-    ValCtx.EmitFormatError(ValidationRule::MetaTarget, {target.c_str()});
+    ValCtx.EmitFormatError(ValidationRule::MetaTarget, {target});
   }
 
   StringMap<bool> llvmNamedMeta;
@@ -2668,11 +2669,11 @@ static void ValidateMetadata(ValidationContext &ValCtx) {
     if (!DxilModule::IsKnownNamedMetaData(NamedMetaNode)) {
       StringRef name = NamedMetaNode.getName();
       if (!name.startswith_lower("llvm."))
-        ValCtx.EmitFormatError(ValidationRule::MetaKnown, {name.str().c_str()});
+        ValCtx.EmitFormatError(ValidationRule::MetaKnown, {name.str()});
       else {
         if (llvmNamedMeta.count(name) == 0) {
           ValCtx.EmitFormatError(ValidationRule::MetaKnown,
-                                 {name.str().c_str()});
+                                 {name.str()});
         }
       }
     }
@@ -2704,11 +2705,11 @@ static void ValidateResourceOverlap(
   if (conflictRes) {
     ValCtx.EmitFormatError(
         ValidationRule::SmResourceRangeOverlap,
-        {res.GetGlobalName().c_str(), std::to_string(base).c_str(),
-         std::to_string(size).c_str(),
-         std::to_string(conflictRes->GetLowerBound()).c_str(),
-         std::to_string(conflictRes->GetRangeSize()).c_str(),
-         std::to_string(space).c_str()});
+        {res.GetGlobalName(), std::to_string(base),
+         std::to_string(size),
+         std::to_string(conflictRes->GetLowerBound()),
+         std::to_string(conflictRes->GetRangeSize()),
+         std::to_string(space)});
   }
 }
 
@@ -2763,13 +2764,13 @@ static void ValidateResource(hlsl::DxilResource &res,
     if (!alignedTo4Bytes) {
       ValCtx.EmitResourceFormatError(
           &res, ValidationRule::MetaStructBufAlignment,
-          {std::to_string(4).c_str(), std::to_string(stride).c_str()});
+          {std::to_string(4), std::to_string(stride)});
     }
     if (stride > DXIL::kMaxStructBufferStride) {
       ValCtx.EmitResourceFormatError(
           &res, ValidationRule::MetaStructBufAlignmentOutOfBound,
-          {std::to_string(DXIL::kMaxStructBufferStride).c_str(),
-           std::to_string(stride).c_str()});
+          {std::to_string(DXIL::kMaxStructBufferStride),
+           std::to_string(stride)});
     }
   }
 
@@ -2785,7 +2786,7 @@ static void ValidateResource(hlsl::DxilResource &res,
 static void
 CollectCBufferRanges(DxilStructAnnotation *annotation,
                      SpanAllocator<unsigned, DxilFieldAnnotation> &constAllocator,
-                     unsigned base, DxilTypeSystem &typeSys, const char *cbName,
+                     unsigned base, DxilTypeSystem &typeSys, StringRef cbName,
                      ValidationContext &ValCtx) {
   unsigned cbSize = annotation->GetCBufferSize();
 
@@ -2808,7 +2809,7 @@ CollectCBufferRanges(DxilStructAnnotation *annotation,
                                   base + offset + EltSize - 1)) {
           ValCtx.EmitFormatError(
               ValidationRule::SmCBufferOffsetOverlap,
-              {cbName, std::to_string(base + offset).c_str()});
+              {cbName, std::to_string(base + offset)});
         }
       }
     } else if (isa<ArrayType>(EltTy)) {
@@ -2839,7 +2840,7 @@ CollectCBufferRanges(DxilStructAnnotation *annotation,
                                     arrayBase + EltSize - 1)) {
             ValCtx.EmitFormatError(
                 ValidationRule::SmCBufferOffsetOverlap,
-                {cbName, std::to_string(base + offset).c_str()});
+                {cbName, std::to_string(base + offset)});
           }
 
         } else {
@@ -2856,7 +2857,7 @@ CollectCBufferRanges(DxilStructAnnotation *annotation,
 
     if (bOutOfBound) {
       ValCtx.EmitFormatError(ValidationRule::SmCBufferElementOverflow,
-                             {cbName, std::to_string(base + offset).c_str()});
+                             {cbName, std::to_string(base + offset)});
     }
   }
 }
@@ -2884,7 +2885,7 @@ static void ValidateCBuffer(DxilCBuffer &cb, ValidationContext &ValCtx) {
       DXIL::kMaxCBufferSize << 4);
   CollectCBufferRanges(annotation, constAllocator,
                        0, typeSys,
-                       cb.GetGlobalName().c_str(), ValCtx);
+                       cb.GetGlobalName(), ValCtx);
 }
 
 static void ValidateResources(ValidationContext &ValCtx) {
@@ -3049,7 +3050,7 @@ static void ValidateSignatureElement(DxilSignatureElement &SE,
   if (!bAllowedInSig) {
     ValCtx.EmitFormatError(
         ValidationRule::SmSemantic,
-        {SE.GetName(), ValCtx.DxilMod.GetShaderModel()->GetKindName().c_str(), inputOutput});
+        {SE.GetName(), ValCtx.DxilMod.GetShaderModel()->GetKindName(), inputOutput});
   } else if (bShouldBeAllocated && !SE.IsAllocated()) {
     ValCtx.EmitFormatError(ValidationRule::MetaSemanticShouldBeAllocated,
       {inputOutput, SE.GetName()});
@@ -3145,13 +3146,13 @@ static void ValidateSignatureElement(DxilSignatureElement &SE,
   if (ValCtx.DxilMod.GetShaderModel()->IsGS() && SE.IsOutput()) {
     if (SE.GetOutputStream() >= DXIL::kNumOutputStreams) {
       ValCtx.EmitFormatError(ValidationRule::SmStreamIndexRange,
-                             {std::to_string(SE.GetOutputStream()).c_str(),
-                              std::to_string(DXIL::kNumOutputStreams - 1).c_str()});
+                             {std::to_string(SE.GetOutputStream()),
+                              std::to_string(DXIL::kNumOutputStreams - 1)});
     }
   } else {
     if (SE.GetOutputStream() > 0) {
       ValCtx.EmitFormatError(ValidationRule::SmStreamIndexRange,
-                             {std::to_string(SE.GetOutputStream()).c_str(),
+                             {std::to_string(SE.GetOutputStream()),
                               "0"});
     }
   }
@@ -3198,9 +3199,9 @@ static void ValidateSignatureElement(DxilSignatureElement &SE,
     unsigned size = (SE.GetRows() - 1) * 4 + SE.GetCols();
     ValCtx.EmitFormatError(ValidationRule::MetaSignatureOutOfRange,
                             {SE.GetName(),
-                            std::to_string(SE.GetStartRow()).c_str(),
-                            std::to_string(SE.GetStartCol()).c_str(),
-                            std::to_string(size).c_str()});
+                            std::to_string(SE.GetStartRow()),
+                            std::to_string(SE.GetStartCol()),
+                            std::to_string(size)});
   }
 
   if (!SE.GetInterpolationMode()->IsValid()) {
@@ -3235,26 +3236,26 @@ static void ValidateSignatureOverlap(
   case DxilSignatureAllocator::kConflictsWithIndexed:
     ValCtx.EmitFormatError(ValidationRule::MetaSignatureIndexConflict,
                             {E.GetName(),
-                            std::to_string(E.GetStartRow()).c_str(),
-                            std::to_string(E.GetStartCol()).c_str(),
-                            std::to_string(E.GetRows()).c_str(),
-                            std::to_string(E.GetCols()).c_str()});
+                            std::to_string(E.GetStartRow()),
+                            std::to_string(E.GetStartCol()),
+                            std::to_string(E.GetRows()),
+                            std::to_string(E.GetCols())});
     break;
   case DxilSignatureAllocator::kConflictsWithIndexedTessFactor:
     ValCtx.EmitFormatError(ValidationRule::MetaSignatureIndexConflict,
                             {E.GetName(),
-                            std::to_string(E.GetStartRow()).c_str(),
-                            std::to_string(E.GetStartCol()).c_str(),
-                            std::to_string(E.GetRows()).c_str(),
-                            std::to_string(E.GetCols()).c_str()});
+                            std::to_string(E.GetStartRow()),
+                            std::to_string(E.GetStartCol()),
+                            std::to_string(E.GetRows()),
+                            std::to_string(E.GetCols())});
     break;
   case DxilSignatureAllocator::kConflictsWithInterpolationMode:
     ValCtx.EmitFormatError(ValidationRule::MetaInterpModeInOneRow,
                             {E.GetName(),
-                            std::to_string(E.GetStartRow()).c_str(),
-                            std::to_string(E.GetStartCol()).c_str(),
-                            std::to_string(E.GetRows()).c_str(),
-                            std::to_string(E.GetCols()).c_str()});
+                            std::to_string(E.GetStartRow()),
+                            std::to_string(E.GetStartCol()),
+                            std::to_string(E.GetRows()),
+                            std::to_string(E.GetCols())});
     break;
   case DxilSignatureAllocator::kInsufficientFreeComponents:
     DXASSERT(false, "otherwise, conflict not translated");
@@ -3262,26 +3263,26 @@ static void ValidateSignatureOverlap(
   case DxilSignatureAllocator::kOverlapElement:
     ValCtx.EmitFormatError(ValidationRule::MetaSignatureOverlap,
                             {E.GetName(),
-                            std::to_string(E.GetStartRow()).c_str(),
-                            std::to_string(E.GetStartCol()).c_str(),
-                            std::to_string(E.GetRows()).c_str(),
-                            std::to_string(E.GetCols()).c_str()});
+                            std::to_string(E.GetStartRow()),
+                            std::to_string(E.GetStartCol()),
+                            std::to_string(E.GetRows()),
+                            std::to_string(E.GetCols())});
     break;
   case DxilSignatureAllocator::kIllegalComponentOrder:
     ValCtx.EmitFormatError(ValidationRule::MetaSignatureIllegalComponentOrder,
                             {E.GetName(),
-                            std::to_string(E.GetStartRow()).c_str(),
-                            std::to_string(E.GetStartCol()).c_str(),
-                            std::to_string(E.GetRows()).c_str(),
-                            std::to_string(E.GetCols()).c_str()});
+                            std::to_string(E.GetStartRow()),
+                            std::to_string(E.GetStartCol()),
+                            std::to_string(E.GetRows()),
+                            std::to_string(E.GetCols())});
     break;
   case DxilSignatureAllocator::kConflictFit:
     ValCtx.EmitFormatError(ValidationRule::MetaSignatureOutOfRange,
                             {E.GetName(),
-                            std::to_string(E.GetStartRow()).c_str(),
-                            std::to_string(E.GetStartCol()).c_str(),
-                            std::to_string(E.GetRows()).c_str(),
-                            std::to_string(E.GetCols()).c_str()});
+                            std::to_string(E.GetStartRow()),
+                            std::to_string(E.GetStartCol()),
+                            std::to_string(E.GetRows()),
+                            std::to_string(E.GetCols())});
     break;
   default:
     DXASSERT(false, "otherwise, unrecognized conflict type from DxilSignatureAllocator");
@@ -3319,7 +3320,7 @@ static void ValidateSignature(ValidationContext &ValCtx, const DxilSignature &S,
     for (unsigned semIdx : E->GetSemanticIndexVec()) {
       if (semIdxSet.count(semIdx) > 0) {
         ValCtx.EmitFormatError(ValidationRule::MetaNoSemanticOverlap,
-                               {E->GetName(), std::to_string(semIdx).c_str()});
+                               {E->GetName(), std::to_string(semIdx)});
         return;
       } else
         semIdxSet.insert(semIdx);
@@ -3332,7 +3333,7 @@ static void ValidateSignature(ValidationContext &ValCtx, const DxilSignature &S,
         unsigned mask = ((1 << E->GetRows()) - 1) << E->GetStartRow();
         if (TargetMask & mask) {
           ValCtx.EmitFormatError(ValidationRule::MetaNoSemanticOverlap,
-                                 {"SV_Target", std::to_string(E->GetStartRow()).c_str()});
+                                 {"SV_Target", std::to_string(E->GetStartRow())});
         }
         TargetMask = TargetMask | mask;
       }
@@ -3502,10 +3503,10 @@ static void ValidateSignatures(ValidationContext &ValCtx) {
     if (totalOutputScalars > DXIL::kMaxGSOutputTotalScalars) {
       ValCtx.EmitFormatError(
           ValidationRule::SmGSTotalOutputVertexDataRange,
-          {std::to_string(maxVertexCount).c_str(),
-           std::to_string(outputScalarCount).c_str(),
-           std::to_string(totalOutputScalars).c_str(),
-           std::to_string(DXIL::kMaxGSOutputTotalScalars).c_str()});
+          {std::to_string(maxVertexCount),
+           std::to_string(outputScalarCount),
+           std::to_string(totalOutputScalars),
+           std::to_string(DXIL::kMaxGSOutputTotalScalars)});
     }
   }
 
@@ -3578,19 +3579,19 @@ static void CheckPatchConstantSemantic(ValidationContext &ValCtx)
       bFoundEdgeSemantic = true;
       if (SE->GetRows() != edgeSize || SE->GetCols() > 1) {
         ValCtx.EmitFormatError(ValidationRule::SmTessFactorSizeMatchDomain,
-                               {std::to_string(SE->GetRows()).c_str(),
-                                std::to_string(SE->GetCols()).c_str(),
+                               {std::to_string(SE->GetRows()),
+                                std::to_string(SE->GetCols()),
                                 domainName,
-                                std::to_string(edgeSize).c_str()});
+                                std::to_string(edgeSize)});
       }
     } else if (kind == kInsideSemantic) {
       bFoundInsideSemantic = true;
       if (SE->GetRows() != insideSize || SE->GetCols() > 1) {
         ValCtx.EmitFormatError(ValidationRule::SmInsideTessFactorSizeMatchDomain,
-                               {std::to_string(SE->GetRows()).c_str(),
-                                std::to_string(SE->GetCols()).c_str(),
+                               {std::to_string(SE->GetRows()),
+                                std::to_string(SE->GetCols()),
                                 domainName,
-                                std::to_string(insideSize).c_str()});
+                                std::to_string(insideSize)});
       }
     }
   }
@@ -3619,30 +3620,30 @@ static void ValidateShaderState(ValidationContext &ValCtx) {
     if ((x < DXIL::kMinCSThreadGroupX) || (x > DXIL::kMaxCSThreadGroupX)) {
       ValCtx.EmitFormatError(
           ValidationRule::SmThreadGroupChannelRange,
-          {"X", std::to_string(x).c_str(),
-           std::to_string(DXIL::kMinCSThreadGroupX).c_str(),
-           std::to_string(DXIL::kMaxCSThreadGroupX).c_str()});
+          {"X", std::to_string(x),
+           std::to_string(DXIL::kMinCSThreadGroupX),
+           std::to_string(DXIL::kMaxCSThreadGroupX)});
     }
     if ((y < DXIL::kMinCSThreadGroupY) || (y > DXIL::kMaxCSThreadGroupY)) {
       ValCtx.EmitFormatError(
           ValidationRule::SmThreadGroupChannelRange,
-          {"Y", std::to_string(y).c_str(),
-           std::to_string(DXIL::kMinCSThreadGroupY).c_str(),
-           std::to_string(DXIL::kMaxCSThreadGroupY).c_str()});
+          {"Y", std::to_string(y),
+           std::to_string(DXIL::kMinCSThreadGroupY),
+           std::to_string(DXIL::kMaxCSThreadGroupY)});
     }
     if ((z < DXIL::kMinCSThreadGroupZ) || (z > DXIL::kMaxCSThreadGroupZ)) {
       ValCtx.EmitFormatError(
           ValidationRule::SmThreadGroupChannelRange,
-          {"Z", std::to_string(z).c_str(),
-           std::to_string(DXIL::kMinCSThreadGroupZ).c_str(),
-           std::to_string(DXIL::kMaxCSThreadGroupZ).c_str()});
+          {"Z", std::to_string(z),
+           std::to_string(DXIL::kMinCSThreadGroupZ),
+           std::to_string(DXIL::kMaxCSThreadGroupZ)});
     }
 
     if (threadsInGroup > DXIL::kMaxCSThreadsPerGroup) {
       ValCtx.EmitFormatError(
           ValidationRule::SmMaxTheadGroup,
-          {std::to_string(threadsInGroup).c_str(),
-           std::to_string(DXIL::kMaxCSThreadsPerGroup).c_str()});
+          {std::to_string(threadsInGroup),
+           std::to_string(DXIL::kMaxCSThreadsPerGroup)});
     }
 
     // type of threadID, thread group ID take care by DXIL operation overload
@@ -3656,8 +3657,8 @@ static void ValidateShaderState(ValidationContext &ValCtx) {
     if (inputControlPointCount > DXIL::kMaxIAPatchControlPointCount) {
       ValCtx.EmitFormatError(
           ValidationRule::SmDSInputControlPointCountRange,
-          {std::to_string(DXIL::kMaxIAPatchControlPointCount).c_str(),
-           std::to_string(inputControlPointCount).c_str()});
+          {std::to_string(DXIL::kMaxIAPatchControlPointCount),
+           std::to_string(inputControlPointCount)});
     }
     if (domain == DXIL::TessellatorDomain::Undefined) {
       ValCtx.EmitError(ValidationRule::SmValidDomain);
@@ -3672,8 +3673,8 @@ static void ValidateShaderState(ValidationContext &ValCtx) {
         inputControlPointCount > DXIL::kMaxIAPatchControlPointCount) {
       ValCtx.EmitFormatError(
           ValidationRule::SmHSInputControlPointCountRange,
-          {std::to_string(DXIL::kMaxIAPatchControlPointCount).c_str(),
-           std::to_string(inputControlPointCount).c_str()});
+          {std::to_string(DXIL::kMaxIAPatchControlPointCount),
+           std::to_string(inputControlPointCount)});
     }
     if (domain == DXIL::TessellatorDomain::Undefined) {
       ValCtx.EmitError(ValidationRule::SmValidDomain);
@@ -3695,9 +3696,9 @@ static void ValidateShaderState(ValidationContext &ValCtx) {
         maxTessFactor > DXIL::kHSMaxTessFactorUpperBound) {
       ValCtx.EmitFormatError(
           ValidationRule::MetaMaxTessFactor,
-          {std::to_string(DXIL::kHSMaxTessFactorLowerBound).c_str(),
-           std::to_string(DXIL::kHSMaxTessFactorUpperBound).c_str(),
-           std::to_string(maxTessFactor).c_str()});
+          {std::to_string(DXIL::kHSMaxTessFactorLowerBound),
+           std::to_string(DXIL::kHSMaxTessFactorUpperBound),
+           std::to_string(maxTessFactor)});
     }
     // Domain and OutPrimivtive match.
     switch (domain) {
@@ -3758,15 +3759,15 @@ static void ValidateShaderState(ValidationContext &ValCtx) {
     if (maxVertexCount > DXIL::kMaxGSOutputVertexCount) {
       ValCtx.EmitFormatError(
           ValidationRule::SmGSOutputVertexCountRange,
-          {std::to_string(DXIL::kMaxGSOutputVertexCount).c_str(),
-           std::to_string(maxVertexCount).c_str()});
+          {std::to_string(DXIL::kMaxGSOutputVertexCount),
+           std::to_string(maxVertexCount)});
     }
 
     unsigned instanceCount = M.GetGSInstanceCount();
     if (instanceCount > DXIL::kMaxGSInstanceCount || instanceCount < 1) {
       ValCtx.EmitFormatError(ValidationRule::SmGSInstanceCountRange,
-                             {std::to_string(DXIL::kMaxGSInstanceCount).c_str(),
-                              std::to_string(instanceCount).c_str()});
+                             {std::to_string(DXIL::kMaxGSInstanceCount),
+                              std::to_string(instanceCount)});
     }
 
     DXIL::PrimitiveTopology topo = M.GetStreamPrimitiveTopology();
@@ -3791,8 +3792,8 @@ static void ValidateShaderState(ValidationContext &ValCtx) {
   if (outputControlPointCount > DXIL::kMaxIAPatchControlPointCount) {
     ValCtx.EmitFormatError(
         ValidationRule::SmOutputControlPointCountRange,
-        {std::to_string(DXIL::kMaxIAPatchControlPointCount).c_str(),
-         std::to_string(outputControlPointCount).c_str()});
+        {std::to_string(DXIL::kMaxIAPatchControlPointCount),
+         std::to_string(outputControlPointCount)});
   }
 }
 

+ 311 - 57
lib/HLSL/HLOperationLower.cpp

@@ -1976,6 +1976,22 @@ Value *ScalarizeResRet(Type *RetTy, Value *ResRet, IRBuilder<> &Builder) {
   return retVal;
 }
 
+Value *ScalarizeElements(Type *RetTy, ArrayRef<Value*> Elts, IRBuilder<> &Builder) {
+  // Extract value part.
+  Value *retVal = llvm::UndefValue::get(RetTy);
+  if (RetTy->isVectorTy()) {
+    unsigned vecSize = RetTy->getVectorNumElements();
+    DXASSERT(vecSize <= Elts.size(), "vector size mismatch");
+    for (unsigned i = 0; i < vecSize; i++) {
+      Value *retComp = Elts[i];
+      retVal = Builder.CreateInsertElement(retVal, retComp, i);
+    }
+  } else {
+    retVal = Elts[0];
+  }
+  return retVal;
+}
+
 void UpdateStatus(Value *ResRet, Value *status, IRBuilder<> &Builder) {
   if (status && !isa<UndefValue>(status)) {
     Value *statusVal = Builder.CreateExtractValue(ResRet, 4);
@@ -2673,6 +2689,35 @@ ResLoadHelper::ResLoadHelper(CallInst *CI, DxilResource::Kind RK,
 void TranslateStructBufSubscript(CallInst *CI, Value *handle, Value *status,
                                  hlsl::OP *OP, const DataLayout &DL);
 
+// Create { v0, v1 } from { v0.lo, v0.hi, v1.lo, v1.hi }
+void Make64bitResultForLoad(Type *EltTy, ArrayRef<Value *> resultElts32,
+                            unsigned size, MutableArrayRef<Value *> resultElts,
+                            hlsl::OP *hlslOP, IRBuilder<> &Builder) {
+  Type *i64Ty = Builder.getInt64Ty();
+  Type *doubleTy = Builder.getDoubleTy();
+  if (EltTy == doubleTy) {
+    Function *makeDouble =
+        hlslOP->GetOpFunc(DXIL::OpCode::MakeDouble, doubleTy);
+    Value *makeDoubleOpArg =
+        Builder.getInt32((unsigned)DXIL::OpCode::MakeDouble);
+    for (unsigned i = 0; i < size; i++) {
+      Value *lo = resultElts32[2 * i];
+      Value *hi = resultElts32[2 * i + 1];
+      Value *V = Builder.CreateCall(makeDouble, {makeDoubleOpArg, lo, hi});
+      resultElts[i] = V;
+    }
+  } else {
+    for (unsigned i = 0; i < size; i++) {
+      Value *lo = resultElts32[2 * i];
+      Value *hi = resultElts32[2 * i + 1];
+      lo = Builder.CreateZExt(lo, i64Ty);
+      hi = Builder.CreateZExt(hi, i64Ty);
+      hi = Builder.CreateShl(hi, 32);
+      resultElts[i] = Builder.CreateOr(lo, hi);
+    }
+  }
+}
+
 void TranslateLoad(ResLoadHelper &helper, HLResource::Kind RK,
                    IRBuilder<> &Builder, hlsl::OP *OP, const DataLayout &DL) {
 
@@ -2685,11 +2730,19 @@ void TranslateLoad(ResLoadHelper &helper, HLResource::Kind RK,
 
   OP::OpCode opcode = helper.opcode;
 
-  Function *F = OP->GetOpFunc(opcode, Ty->getScalarType());
+  Type *i32Ty = Builder.getInt32Ty();
+  Type *i64Ty = Builder.getInt64Ty();
+  Type *doubleTy = Builder.getDoubleTy();
+  Type *EltTy = Ty->getScalarType();
+  bool is64 = EltTy == i64Ty || EltTy == doubleTy;
+  if (is64) {
+    EltTy = i32Ty;
+  }
+
+  Function *F = OP->GetOpFunc(opcode, EltTy);
   llvm::Constant *opArg = OP->GetU32Const((unsigned)opcode);
 
-  llvm::Value *undefI =
-      llvm::UndefValue::get(llvm::Type::getInt32Ty(Ty->getContext()));
+  llvm::Value *undefI = llvm::UndefValue::get(i32Ty);
 
   SmallVector<Value *, 12> loadArgs;
   loadArgs.emplace_back(opArg);         // opcode
@@ -2752,10 +2805,32 @@ void TranslateLoad(ResLoadHelper &helper, HLResource::Kind RK,
     loadArgs.emplace_back(
         OP->GetU32Const(0)); // For case use built-in types in structure buffer.
 
-  llvm::Value *ResRet =
+  Value *ResRet =
       Builder.CreateCall(F, loadArgs, OP->GetOpCodeName(opcode));
 
-  llvm::Value *retValNew = ScalarizeResRet(Ty, ResRet, Builder);
+  Value *retValNew = nullptr;
+  if (!is64) {
+    retValNew = ScalarizeResRet(Ty, ResRet, Builder);
+  } else {
+    unsigned size = 1;
+    if (Ty->isVectorTy()) {
+      size = Ty->getVectorNumElements();
+    }
+    DXASSERT(size <= 2, "typed buffer only allow 4 dwords");
+    EltTy = Ty->getScalarType();
+    Value *Elts[2];
+
+    Make64bitResultForLoad(Ty->getScalarType(),
+                           {
+                               Builder.CreateExtractValue(ResRet, 0),
+                               Builder.CreateExtractValue(ResRet, 1),
+                               Builder.CreateExtractValue(ResRet, 2),
+                               Builder.CreateExtractValue(ResRet, 3),
+                           },
+                           size, Elts, OP, Builder);
+
+    retValNew = ScalarizeElements(Ty, Elts, Builder);
+  }
   // replace
   helper.retVal->replaceAllUsesWith(retValNew);
 
@@ -2782,6 +2857,45 @@ Value *TranslateResourceLoad(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
   return nullptr;
 }
 
+// Split { v0, v1 } to { v0.lo, v0.hi, v1.lo, v1.hi }
+void Split64bitValForStore(Type *EltTy, ArrayRef<Value *> vals, unsigned size,
+                           MutableArrayRef<Value *> vals32, hlsl::OP *hlslOP,
+                           IRBuilder<> &Builder) {
+  Type *i32Ty = Builder.getInt32Ty();
+  Type *doubleTy = Builder.getDoubleTy();
+  Value *undefI32 = UndefValue::get(i32Ty);
+
+  if (EltTy == doubleTy) {
+    Function *dToU = hlslOP->GetOpFunc(DXIL::OpCode::SplitDouble, doubleTy);
+    Value *dToUOpArg = Builder.getInt32((unsigned)DXIL::OpCode::SplitDouble);
+    for (unsigned i = 0; i < size; i++) {
+      if (isa<UndefValue>(vals[i])) {
+        vals32[2 * i] = undefI32;
+        vals32[2 * i + 1] = undefI32;
+      } else {
+        Value *retVal = Builder.CreateCall(dToU, {dToUOpArg, vals[i]});
+        Value *lo = Builder.CreateExtractValue(retVal, 0);
+        Value *hi = Builder.CreateExtractValue(retVal, 1);
+        vals32[2 * i] = lo;
+        vals32[2 * i + 1] = hi;
+      }
+    }
+  } else {
+    for (unsigned i = 0; i < size; i++) {
+      if (isa<UndefValue>(vals[i])) {
+        vals32[2 * i] = undefI32;
+        vals32[2 * i + 1] = undefI32;
+      } else {
+        Value *lo = Builder.CreateTrunc(vals[i], i32Ty);
+        Value *hi = Builder.CreateLShr(vals[i], 32);
+        hi = Builder.CreateTrunc(hi, i32Ty);
+        vals32[2 * i] = lo;
+        vals32[2 * i + 1] = hi;
+      }
+    }
+  }
+}
+
 void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
                     Value *offset, IRBuilder<> &Builder, hlsl::OP *OP) {
   Type *Ty = val->getType();
@@ -2800,7 +2914,16 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
     break;
   }
 
-  Function *F = OP->GetOpFunc(opcode, Ty->getScalarType());
+  Type *i32Ty = Builder.getInt32Ty();
+  Type *i64Ty = Builder.getInt64Ty();
+  Type *doubleTy = Builder.getDoubleTy();
+  Type *EltTy = Ty->getScalarType();
+  bool is64 = EltTy == i64Ty || EltTy == doubleTy;
+  if (is64) {
+    EltTy = i32Ty;
+  }
+
+  Function *F = OP->GetOpFunc(opcode, EltTy);
   llvm::Constant *opArg = OP->GetU32Const((unsigned)opcode);
 
   llvm::Value *undefI =
@@ -2880,6 +3003,33 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
     }
   }
 
+  if (is64) {
+    DXASSERT(mask == DXIL::kCompMask_All, "only typed buffer could have 64bit");
+    unsigned size = 1;
+    if (Ty->isVectorTy()) {
+      size = Ty->getVectorNumElements();
+    }
+    DXASSERT(size <= 2, "typed buffer only allow 4 dwords");
+    unsigned val0OpIdx = opcode == DXIL::OpCode::TextureStore
+                             ? DXIL::OperandIndex::kTextureStoreVal0OpIdx
+                             : DXIL::OperandIndex::kBufferStoreVal0OpIdx;
+    Value *V0 = storeArgs[val0OpIdx];
+    Value *V1 = storeArgs[val0OpIdx+1];
+
+    Value *vals32[4];
+    EltTy = Ty->getScalarType();
+    Split64bitValForStore(EltTy, {V0, V1}, size, vals32, OP, Builder);
+    // Fill the uninit vals.
+    if (size == 1) {
+      vals32[2] = vals32[0];
+      vals32[3] = vals32[1];
+    }
+    // Change valOp to 32 version.
+    for (unsigned i = 0; i < 4; i++) {
+      storeArgs[val0OpIdx + i] = vals32[i];
+    }
+  }
+
   storeArgs.emplace_back(OP->GetU8Const(mask));
 
   Builder.CreateCall(F, storeArgs);
@@ -4897,39 +5047,143 @@ Value *GEPIdxToOffset(GetElementPtrInst *GEP, IRBuilder<> &Builder,
   return addr;
 }
 
-Value *GenerateStructBufLd(Value *handle, Value *bufIdx, Value *offset,
-                           Value *status, Type *EltTy, hlsl::OP *OP,
-                           IRBuilder<> &Builder) {
+void GenerateStructBufLd(Value *handle, Value *bufIdx, Value *offset,
+                         Value *status, Type *EltTy,
+                         MutableArrayRef<Value *> resultElts, hlsl::OP *OP,
+                         IRBuilder<> &Builder) {
   OP::OpCode opcode = OP::OpCode::BufferLoad;
-  SmallVector<Value *, 4> Args;
-  Args.emplace_back(OP->GetU32Const((unsigned)opcode));
-  Args.emplace_back(handle);
-  Args.emplace_back(bufIdx);
-  Args.emplace_back(offset);
-  Function *dxilF = OP->GetOpFunc(opcode, EltTy);
-  Value *Ld = Builder.CreateCall(dxilF, Args, OP::GetOpCodeName(opcode));
 
-  // status
-  UpdateStatus(Ld, status, Builder);
+  DXASSERT(resultElts.size() <= 4,
+           "buffer load cannot load more than 4 values");
+
+  Value *Args[] = {OP->GetU32Const((unsigned)opcode), handle, bufIdx, offset};
+
+  Type *i64Ty = Builder.getInt64Ty();
+  Type *doubleTy = Builder.getDoubleTy();
+  bool is64 = EltTy == i64Ty || EltTy == doubleTy;
+
+  if (!is64) {
+    Function *dxilF = OP->GetOpFunc(opcode, EltTy);
+    Value *Ld = Builder.CreateCall(dxilF, Args, OP::GetOpCodeName(opcode));
 
-  return Ld;
+    for (unsigned i = 0; i < resultElts.size(); i++) {
+      resultElts[i] = Builder.CreateExtractValue(Ld, i);
+    }
+
+    // status
+    UpdateStatus(Ld, status, Builder);
+    return;
+  } else {
+    // 64 bit.
+    Function *dxilF = OP->GetOpFunc(opcode, Builder.getInt32Ty());
+    Value *Ld = Builder.CreateCall(dxilF, Args, OP::GetOpCodeName(opcode));
+    Value *resultElts32[8];
+    unsigned size = resultElts.size();
+    unsigned eltBase = 0;
+    for (unsigned i = 0; i < size; i++) {
+      if (i == 2) {
+        // Update offset 4 by 4 bytes.
+        Args[DXIL::OperandIndex::kBufferLoadCoord1OpIdx] =
+            Builder.CreateAdd(offset, Builder.getInt32(4 * 4));
+        Ld = Builder.CreateCall(dxilF, Args, OP::GetOpCodeName(opcode));
+        eltBase = 4;
+      }
+      unsigned resBase = 2 * i;
+      resultElts32[resBase] = Builder.CreateExtractValue(Ld, resBase - eltBase);
+      resultElts32[resBase + 1] =
+          Builder.CreateExtractValue(Ld, resBase + 1 - eltBase);
+    }
+
+    Make64bitResultForLoad(EltTy, resultElts32, size, resultElts, OP, Builder);
+
+    // status
+    UpdateStatus(Ld, status, Builder);
+
+    return;
+  }
 }
 
 void GenerateStructBufSt(Value *handle, Value *bufIdx, Value *offset,
                          Type *EltTy, hlsl::OP *OP, IRBuilder<> &Builder,
-                         Value *val0, Value *val1, Value *val2, Value *val3, uint8_t mask) {
+                         ArrayRef<Value *> vals, uint8_t mask) {
   OP::OpCode opcode = OP::OpCode::BufferStore;
-  Value *Args[] = {OP->GetU32Const((unsigned)opcode),
-                   handle,
-                   bufIdx,
-                   offset,
-                   val0,
-                   val1,
-                   val2,
-                   val3,
-                   OP->GetU8Const(mask)};
-  Function *dxilF = OP->GetOpFunc(opcode, EltTy);
-  Builder.CreateCall(dxilF, Args);
+  DXASSERT(vals.size() == 4, "buffer store need 4 values");
+  Type *i64Ty = Builder.getInt64Ty();
+  Type *doubleTy = Builder.getDoubleTy();
+  bool is64 = EltTy == i64Ty || EltTy == doubleTy;
+  if (!is64) {
+    Value *Args[] = {OP->GetU32Const((unsigned)opcode),
+                     handle,
+                     bufIdx,
+                     offset,
+                     vals[0],
+                     vals[1],
+                     vals[2],
+                     vals[3],
+                     OP->GetU8Const(mask)};
+    Function *dxilF = OP->GetOpFunc(opcode, EltTy);
+    Builder.CreateCall(dxilF, Args);
+  } else {
+    Type *i32Ty = Builder.getInt32Ty();
+    Function *dxilF = OP->GetOpFunc(opcode, i32Ty);
+
+    Value *undefI32 = UndefValue::get(i32Ty);
+    Value *vals32[8] = {undefI32, undefI32, undefI32, undefI32,
+                        undefI32, undefI32, undefI32, undefI32};
+
+    unsigned maskLo = 0;
+    unsigned maskHi = 0;
+    unsigned size = 0;
+    switch (mask) {
+    case 1:
+      maskLo = 3;
+      size = 1;
+      break;
+    case 3:
+      maskLo = 15;
+      size = 2;
+      break;
+    case 7:
+      maskLo = 15;
+      maskHi = 3;
+      size = 3;
+      break;
+    case 15:
+      maskLo = 15;
+      maskHi = 15;
+      size = 4;
+      break;
+    default:
+      DXASSERT(0, "invalid mask");
+    }
+
+    Split64bitValForStore(EltTy, vals, size, vals32, OP, Builder);
+
+    Value *Args[] = {OP->GetU32Const((unsigned)opcode),
+                     handle,
+                     bufIdx,
+                     offset,
+                     vals32[0],
+                     vals32[1],
+                     vals32[2],
+                     vals32[3],
+                     OP->GetU8Const(maskLo)};
+    Builder.CreateCall(dxilF, Args);
+    if (maskHi) {
+      // Update offset 4 by 4 bytes.
+      offset = Builder.CreateAdd(offset, Builder.getInt32(4 * 4));
+      Value *Args[] = {OP->GetU32Const((unsigned)opcode),
+                       handle,
+                       bufIdx,
+                       offset,
+                       vals32[4],
+                       vals32[5],
+                       vals32[6],
+                       vals32[7],
+                       OP->GetU8Const(maskHi)};
+      Builder.CreateCall(dxilF, Args);
+    }
+  }
 }
 
 Value *TranslateStructBufMatLd(Type *matType, IRBuilder<> &Builder,
@@ -4948,20 +5202,20 @@ Value *TranslateStructBufMatLd(Type *matType, IRBuilder<> &Builder,
 
   unsigned rest = (matSize % 4);
   if (rest) {
-    Value *ResRet =
-        GenerateStructBufLd(handle, bufIdx, offset, status, EltTy, OP, Builder);
+    Value *ResultElts[4];
+    GenerateStructBufLd(handle, bufIdx, offset, status, EltTy, ResultElts, OP, Builder);
     for (unsigned i = 0; i < rest; i++)
-      elts[i] = Builder.CreateExtractValue(ResRet, i);
+      elts[i] = ResultElts[i];
     offset = Builder.CreateAdd(offset, OP->GetU32Const(4 * rest));
   }
 
   for (unsigned i = rest; i < matSize; i += 4) {
-    Value *ResRet =
-        GenerateStructBufLd(handle, bufIdx, offset, status, EltTy, OP, Builder);
-    elts[i] = Builder.CreateExtractValue(ResRet, 0);
-    elts[i + 1] = Builder.CreateExtractValue(ResRet, 1);
-    elts[i + 2] = Builder.CreateExtractValue(ResRet, 2);
-    elts[i + 3] = Builder.CreateExtractValue(ResRet, 3);
+    Value *ResultElts[4];
+    GenerateStructBufLd(handle, bufIdx, offset, status, EltTy, ResultElts, OP, Builder);
+    elts[i] = ResultElts[0];
+    elts[i + 1] = ResultElts[1];
+    elts[i + 2] = ResultElts[2];
+    elts[i + 3] = ResultElts[3];
 
     // Update offset by 4*4bytes.
     offset = Builder.CreateAdd(offset, OP->GetU32Const(4 * 4));
@@ -5007,8 +5261,8 @@ void TranslateStructBufMatSt(Type *matType, IRBuilder<> &Builder, Value *handle,
       if (elts[i+j] != undefElt)
         mask |= (1<<j);
     }
-    GenerateStructBufSt(handle, bufIdx, offset, EltTy, OP, Builder, elts[i],
-                        elts[i + 1], elts[i + 2], elts[i + 3], mask);
+    GenerateStructBufSt(handle, bufIdx, offset, EltTy, OP, Builder,
+                        {elts[i], elts[i + 1], elts[i + 2], elts[i + 3]}, mask);
     // Update offset by 4*4bytes.
     offset = Builder.CreateAdd(offset, OP->GetU32Const(4 * 4));
   }
@@ -5187,13 +5441,14 @@ void TranslateStructBufMatSubscript(CallInst *CI, Value *handle,
           Value *EltVal = stBuilder.CreateExtractElement(Val, i);
           uint8_t mask = DXIL::kCompMask_X;
           GenerateStructBufSt(handle, bufIdx, idxList[i], EltTy, hlslOP,
-                              stBuilder, EltVal, undefElt, undefElt, undefElt,
+                              stBuilder, {EltVal, undefElt, undefElt, undefElt},
                               mask);
         }
       } else {
         uint8_t mask = DXIL::kCompMask_X;
-        GenerateStructBufSt(handle, bufIdx, idxList[0], EltTy, hlslOP, stBuilder,
-                            Val, undefElt, undefElt, undefElt, mask);
+        GenerateStructBufSt(handle, bufIdx, idxList[0], EltTy, hlslOP,
+                            stBuilder, {Val, undefElt, undefElt, undefElt},
+                            mask);
       }
 
       stUser->eraseFromParent();
@@ -5204,16 +5459,14 @@ void TranslateStructBufMatSubscript(CallInst *CI, Value *handle,
       Value *ldData = UndefValue::get(resultType);
       if (resultType->isVectorTy()) {
         for (unsigned i = 0; i < resultSize; i++) {
-          Value *eltData =
-              GenerateStructBufLd(handle, bufIdx, idxList[i],
-                                  /*status*/ nullptr, EltTy, hlslOP, ldBuilder);
-          eltData = ldBuilder.CreateExtractValue(eltData, 0);
-          ldData = ldBuilder.CreateInsertElement(ldData, eltData, i);
+          Value *ResultElt;
+          GenerateStructBufLd(handle, bufIdx, idxList[i],
+                                  /*status*/ nullptr, EltTy, ResultElt, hlslOP, ldBuilder);
+          ldData = ldBuilder.CreateInsertElement(ldData, ResultElt, i);
         }
       } else {
-        ldData =
-            GenerateStructBufLd(handle, bufIdx, idxList[0], /*status*/ nullptr,
-                                EltTy, hlslOP, ldBuilder);
+        GenerateStructBufLd(handle, bufIdx, idxList[0], /*status*/ nullptr,
+                                EltTy, ldData, hlslOP, ldBuilder);
       }
       ldUser->replaceAllUsesWith(ldData);
       ldUser->eraseFromParent();
@@ -5338,9 +5591,10 @@ void TranslateStructBufSubscriptUser(Instruction *user, Value *handle,
 
     if (ldInst) {
       auto LdElement = [&](Value *offset, IRBuilder<> &Builder) -> Value * {
-        Value *newLd = GenerateStructBufLd(handle, bufIdx, offset, status,
-                                           pOverloadTy, OP, Builder);
-        return ScalarizeResRet(Ty, newLd, Builder);
+        Value *ResultElts[4];
+        GenerateStructBufLd(handle, bufIdx, offset, status, pOverloadTy,
+                            ResultElts, OP, Builder);
+        return ScalarizeElements(Ty, ResultElts, Builder);
       };
 
       Value *newLd = LdElement(offset, Builder);
@@ -5375,7 +5629,7 @@ void TranslateStructBufSubscriptUser(Instruction *user, Value *handle,
         }
 
         GenerateStructBufSt(handle, bufIdx, offset, pOverloadTy, OP, Builder,
-                            vals[0], vals[1], vals[2], vals[3], mask);
+                            vals, mask);
       };
       if (arraySize > 1)
         val = Builder.CreateExtractValue(val, 0);

+ 8 - 0
tools/clang/lib/CodeGen/CGHLSLMS.cpp

@@ -2101,6 +2101,14 @@ uint32_t CGMSHLSLRuntime::AddUAVSRV(VarDecl *decl,
     if (EltTy->isBuiltinType()) {
       const BuiltinType *BTy = EltTy->getAs<BuiltinType>();
       CompType::Kind kind = BuiltinTyToCompTy(BTy, bSNorm, bUNorm);
+      // 64bits types are implemented with u32.
+      if (kind == CompType::Kind::U64 ||
+          kind == CompType::Kind::I64 ||
+          kind == CompType::Kind::SNormF64 ||
+          kind == CompType::Kind::UNormF64 ||
+          kind == CompType::Kind::F64) {
+        kind = CompType::Kind::U32;
+      }
       hlslRes->SetCompType(kind);
     } else {
       DXASSERT(!bSNorm && !bUNorm, "snorm/unorm on invalid type");

+ 43 - 0
tools/clang/test/CodeGenHLSL/res64bit.hlsl

@@ -0,0 +1,43 @@
+// RUN: %dxc -E main  -T cs_6_0 %s  | FileCheck %s
+
+RWBuffer<double> uav1;
+RWTexture2D<uint64_t> uav2;
+
+RWTexture1D<double2> uav3;
+
+struct Foo
+{
+  double2 a;
+  int64_t b;
+  uint64_t4 c;
+};
+
+StructuredBuffer<Foo> buf1;
+RWStructuredBuffer<Foo> buf2;
+
+[ numthreads( 64, 2, 2 ) ]
+void main( uint GI : SV_GroupIndex)
+{
+    // CHECK: splitdouble
+    uav1[GI] = GI;
+
+    uav2[GI.xx] = GI+1;
+    // CHECK: splitDouble
+    uav3[GI] = GI+2;
+
+    // CHECK: makeDouble
+    buf2[GI] = buf1[GI];
+
+    // CHECK: zext
+    // CHECK: zext
+    // CHECK: shl
+    // CHECK: or
+    // CHECK: 6
+    // CHECK: trunc
+    // CHECK: lshr
+    // CHECK: trunc
+    buf2[GI+1].b = buf1[GI].b + 6;
+    // CHECK: makeDouble
+    // CHECK: splitdouble
+    buf2[GI+2].a = buf1[GI].a;
+}

+ 2 - 1
tools/clang/test/CodeGenHLSL/uint64_2.hlsl

@@ -2,7 +2,8 @@
 
 // CHECK: Typed UAV Load Additional Formats
 // CHECK: 64-Bit integer
-// CHECK: dx.op.bufferStore.i64
+// CHECK: dx.op.bufferStore.i32
+// CHECK: dx.op.bufferStore.i32
 // CHECK: !{i32 0, i64 1056768
 
 // Note: a change in the internal layout will produce

+ 5 - 0
tools/clang/unittests/HLSL/CompilerTest.cpp

@@ -438,6 +438,7 @@ public:
   TEST_METHOD(CodeGenReadFromOutput2)
   TEST_METHOD(CodeGenReadFromOutput3)
   TEST_METHOD(CodeGenRedundantinput1)
+  TEST_METHOD(CodeGenRes64bit)
   TEST_METHOD(CodeGenRovs)
   TEST_METHOD(CodeGenRValSubscript)
   TEST_METHOD(CodeGenSample1)
@@ -2260,6 +2261,10 @@ TEST_F(CompilerTest, CodeGenRedundantinput1) {
   CodeGenTest(L"..\\CodeGenHLSL\\redundantinput1.hlsl");
 }
 
+TEST_F(CompilerTest, CodeGenRes64bit) {
+  CodeGenTestCheck(L"..\\CodeGenHLSL\\res64bit.hlsl");
+}
+
 TEST_F(CompilerTest, CodeGenRovs) {
   CodeGenTestCheck(L"..\\CodeGenHLSL\\rovs.hlsl");
 }

+ 2 - 2
utils/hct/hctdb.py

@@ -656,13 +656,13 @@ class db_dxil(object):
             db_dxil_param(9, "$o", "value3", "value"),
             db_dxil_param(10,"i8", "mask", "written value mask")])
         next_op_idx += 1
-        self.add_dxil_op("BufferLoad", next_op_idx, "BufferLoad", "reads from a TypedBuffer", "hfwil", "ro", [
+        self.add_dxil_op("BufferLoad", next_op_idx, "BufferLoad", "reads from a TypedBuffer", "hfwi", "ro", [
             db_dxil_param(0, "$r", "", "the loaded value"),
             db_dxil_param(2, "res", "srv", "handle of TypedBuffer SRV to sample"),
             db_dxil_param(3, "i32", "index", "element index"),
             db_dxil_param(4, "i32", "wot", "coordinate")])
         next_op_idx += 1
-        self.add_dxil_op("BufferStore", next_op_idx, "BufferStore", "writes to a RWTypedBuffer", "hfwil", "", [
+        self.add_dxil_op("BufferStore", next_op_idx, "BufferStore", "writes to a RWTypedBuffer", "hfwi", "", [
             db_dxil_param(0, "v", "", ""),
             db_dxil_param(2, "res", "uav", "handle of UAV to store to"),
             db_dxil_param(3, "i32", "coord0", "coordinate in elements"),