Browse Source

[spirv] Add support for vector swizzling (#503)

Most the cases for vector swizzling are handled except:

* Compound assignment used as lhs
Lei Zhang 8 years ago
parent
commit
a1a3623787

+ 6 - 0
tools/clang/include/clang/SPIRV/ModuleBuilder.h

@@ -105,6 +105,12 @@ public:
   uint32_t createCompositeExtract(uint32_t resultType, uint32_t composite,
                                   llvm::ArrayRef<uint32_t> indexes);
 
+  /// \brief Creates a vector shuffle instruction of selecting from the two
+  /// vectors using selectors and returns the <result-id> of the result vector.
+  uint32_t createVectorShuffle(uint32_t resultType, uint32_t vector1,
+                               uint32_t vector2,
+                               llvm::ArrayRef<uint32_t> selectors);
+
   /// \brief Creates a load instruction loading the value of the given
   /// <result-type> from the given pointer. Returns the <result-id> for the
   /// loaded value.

+ 377 - 75
tools/clang/lib/SPIRV/EmitSPIRVAction.cpp

@@ -56,6 +56,24 @@ bool isFloatOrVecOfFloatType(QualType type) {
           hlsl::GetHLSLVecElementType(type)->isFloatingType());
 }
 
+bool isCompoundAssignment(BinaryOperatorKind opcode) {
+  switch (opcode) {
+  case BO_AddAssign:
+  case BO_SubAssign:
+  case BO_MulAssign:
+  case BO_DivAssign:
+  case BO_RemAssign:
+  case BO_AndAssign:
+  case BO_OrAssign:
+  case BO_XorAssign:
+  case BO_ShlAssign:
+  case BO_ShrAssign:
+    return true;
+  default:
+    return false;
+  }
+}
+
 } // namespace
 
 /// SPIR-V emitter class. It consumes the HLSL AST and emits SPIR-V words.
@@ -610,6 +628,10 @@ public:
       return doUnaryOperator(unaryOp);
     }
 
+    if (const auto *vecElemExpr = dyn_cast<HLSLVectorElementExpr>(expr)) {
+      return doHLSLVectorElementExpr(vecElemExpr);
+    }
+
     if (const auto *funcCall = dyn_cast<CallExpr>(expr)) {
       return doCallExpr(funcCall);
     }
@@ -694,30 +716,132 @@ public:
     return theBuilder.createCompositeConstruct(resultType, constituents);
   }
 
-  uint32_t doBinaryOperator(const BinaryOperator *expr) {
-    const auto opcode = expr->getOpcode();
+  /// Tries to emit instructions for assigning to the given vector element
+  /// accessing expression. Returns 0 if the trial fails and no instructions
+  /// are generated.
+  ///
+  /// This method handles the cases that we are writing to neither one element
+  /// or all elements in their original order. For other cases, 0 will be
+  /// returned and the normal assignment process should be used.
+  uint32_t tryToAssignToVectorElements(const Expr *lhs, const uint32_t rhs) {
+    // Assigning to a vector swizzling lhs is tricky if we are neither
+    // writing to one element nor all elements in their original order.
+    // Under such cases, we need to create a new vector swizzling involving
+    // both the lhs and rhs vectors and then write the result of this swizzling
+    // into the base vector of lhs.
+    // For example, for vec4.yz = vec2, we nee to do the following:
+    //
+    //   %vec4Val = OpLoad %v4float %vec4
+    //   %vec2Val = OpLoad %v2float %vec2
+    //   %shuffle = OpVectorShuffle %v4float %vec4Val %vec2Val 0 4 5 3
+    //   OpStore %vec4 %shuffle
+    //
+    // When doing the vector shuffle, we use the lhs base vector as the first
+    // vector and the rhs vector as the second vector. Therefore, all elements
+    // in the second vector will be selected into the shuffle result.
 
-    // Handle assignment first since we need to evaluate rhs before lhs.
-    // For other binary operations, we need to evaluate lhs before rhs.
-    if (opcode == BO_Assign) {
-      const uint32_t rhs = doExpr(expr->getRHS());
-      const uint32_t lhs = doExpr(expr->getLHS());
+    const auto *lhsExpr = dyn_cast<HLSLVectorElementExpr>(lhs);
 
-      theBuilder.createStore(lhs, rhs);
-      // Assignment returns a rvalue.
-      return rhs;
+    if (!lhsExpr)
+      return 0;
+
+    if (!isVectorShuffle(lhs)) {
+      // No vector shuffle needed to be generated for this assignment.
+      // Should fall back to the normal handling of assignment.
+      return 0;
     }
 
-    // Try to optimize floatN * float case
-    if (opcode == BO_Mul) {
-      if (const uint32_t result = tryToGenFloatVectorScale(expr))
-        return result;
+    const Expr *base = nullptr;
+    hlsl::VectorMemberAccessPositions accessor;
+    condenseVectorElementExpr(lhsExpr, &base, &accessor);
+
+    const QualType baseType = base->getType();
+    assert(hlsl::IsHLSLVecType(baseType));
+    const auto baseSizse = hlsl::GetHLSLVecSize(baseType);
+
+    llvm::SmallVector<uint32_t, 4> selectors;
+    selectors.resize(baseSizse);
+    // Assume we are selecting all original elements first.
+    for (uint32_t i = 0; i < baseSizse; ++i) {
+      selectors[i] = i;
+    }
+
+    // Now fix up the elements that actually got overwritten by the rhs vector.
+    // Since we are using the rhs vector as the second vector, their index
+    // should be offset'ed by the size of the lhs base vector.
+    for (uint32_t i = 0; i < accessor.Count; ++i) {
+      uint32_t position;
+      accessor.GetPosition(i, &position);
+      selectors[position] = baseSizse + i;
+    }
+
+    const uint32_t baseTypeId = typeTranslator.translateType(baseType);
+    const uint32_t vec1 = doExpr(base);
+    const uint32_t vec1Val = theBuilder.createLoad(baseTypeId, vec1);
+    const uint32_t shuffle =
+        theBuilder.createVectorShuffle(baseTypeId, vec1Val, rhs, selectors);
+
+    theBuilder.createStore(vec1, shuffle);
+
+    // TODO: OK, this return value is incorrect for compound assignments, for
+    // which cases we should return lvalues. Should at least emit errors if
+    // this return value is used (can be checked via ASTContext.getParents).
+    return rhs;
+  }
+
+  /// Generates the necessary instructions for assigning rhs to lhs. If lhsPtr
+  /// is not zero, it will be used as the pointer from lhs instead of evaluating
+  /// lhs again.
+  uint32_t processAssignment(const Expr *lhs, const uint32_t rhs,
+                             bool isCompoundAssignment, uint32_t lhsPtr = 0) {
+    // Assigning to vector swizzling should be handled differently.
+    if (const uint32_t result = tryToAssignToVectorElements(lhs, rhs)) {
+      return result;
     }
 
-    const uint32_t lhs = doExpr(expr->getLHS());
-    const uint32_t rhs = doExpr(expr->getRHS());
-    const uint32_t typeId = typeTranslator.translateType(expr->getType());
-    const QualType elemType = expr->getLHS()->getType();
+    // Normal assignment procedure
+    if (lhsPtr == 0)
+      lhsPtr = doExpr(lhs);
+
+    theBuilder.createStore(lhsPtr, rhs);
+    // Plain assignment returns a rvalue, while compound assignment returns
+    // lvalue.
+    return isCompoundAssignment ? lhsPtr : rhs;
+  }
+
+  /// Generates the necessary instructions for conducting the given binary
+  /// operation on lhs and rhs. If lhsResultId is not nullptr, the evaluated
+  /// pointer from lhs during the process will be written into it. If
+  /// mandateGenOpcode is not spv::Op::Max, it will used as the SPIR-V opcode
+  /// instead of deducing from Clang frontend opcode.
+  uint32_t processBinaryOp(const Expr *lhs, const Expr *rhs,
+                           const BinaryOperatorKind opcode,
+                           const uint32_t resultType,
+                           uint32_t *lhsResultId = nullptr,
+                           const spv::Op mandateGenOpcode = spv::Op::Max) {
+    const spv::Op spvOp = (mandateGenOpcode == spv::Op::Max)
+                              ? translateOp(opcode, lhs->getType())
+                              : mandateGenOpcode;
+
+    uint32_t rhsVal, lhsPtr, lhsVal;
+    if (isCompoundAssignment(opcode)) {
+      // Evalute rhs before lhs
+      rhsVal = doExpr(rhs);
+      lhsVal = lhsPtr = doExpr(lhs);
+      // This is a compound assignment. We need to load the lhs value if lhs
+      // does not generate a vector shuffle.
+      if (!isVectorShuffle(lhs)) {
+        const uint32_t lhsTy = typeTranslator.translateType(lhs->getType());
+        lhsVal = theBuilder.createLoad(lhsTy, lhsPtr);
+      }
+    } else {
+      // Evalute lhs before rhs
+      lhsVal = lhsPtr = doExpr(lhs);
+      rhsVal = doExpr(rhs);
+    }
+
+    if (lhsResultId)
+      *lhsResultId = lhsPtr;
 
     switch (opcode) {
     case BO_Add:
@@ -737,23 +861,47 @@ public:
     case BO_Shl:
     case BO_Shr:
     case BO_LAnd:
-    case BO_LOr: {
-      const spv::Op spvOp = translateOp(opcode, elemType);
-      return theBuilder.createBinaryOp(spvOp, typeId, lhs, rhs);
-    }
-    case BO_Assign: {
-      llvm_unreachable("assignment already handled before");
-    } break;
+    case BO_LOr:
+    case BO_AddAssign:
+    case BO_SubAssign:
+    case BO_MulAssign:
+    case BO_DivAssign:
+    case BO_RemAssign:
+    case BO_AndAssign:
+    case BO_OrAssign:
+    case BO_XorAssign:
+    case BO_ShlAssign:
+    case BO_ShrAssign:
+      return theBuilder.createBinaryOp(spvOp, resultType, lhsVal, rhsVal);
+    case BO_Assign:
+      llvm_unreachable("assignment should not be handled here");
     default:
       break;
     }
 
     emitError("BinaryOperator '%0' is not supported yet.")
-        << expr->getOpcodeStr(opcode);
-    expr->dump();
+        << BinaryOperator::getOpcodeStr(opcode);
     return 0;
   }
 
+  uint32_t doBinaryOperator(const BinaryOperator *expr) {
+    const auto opcode = expr->getOpcode();
+
+    // Handle assignment first since we need to evaluate rhs before lhs.
+    // For other binary operations, we need to evaluate lhs before rhs.
+    if (opcode == BO_Assign)
+      return processAssignment(expr->getLHS(), doExpr(expr->getRHS()), false);
+
+    // Try to optimize floatN * float case
+    if (opcode == BO_Mul) {
+      if (const uint32_t result = tryToGenFloatVectorScale(expr))
+        return result;
+    }
+
+    const uint32_t resultType = typeTranslator.translateType(expr->getType());
+    return processBinaryOp(expr->getLHS(), expr->getRHS(), opcode, resultType);
+  }
+
   uint32_t doCompoundAssignOperator(const CompoundAssignOperator *expr) {
     const auto opcode = expr->getOpcode();
 
@@ -766,37 +914,11 @@ public:
     const auto *rhs = expr->getRHS();
     const auto *lhs = expr->getLHS();
 
-    switch (opcode) {
-    case BO_AddAssign:
-    case BO_SubAssign:
-    case BO_MulAssign:
-    case BO_DivAssign:
-    case BO_RemAssign:
-    case BO_AndAssign:
-    case BO_OrAssign:
-    case BO_XorAssign:
-    case BO_ShlAssign:
-    case BO_ShrAssign: {
-      const uint32_t resultType = typeTranslator.translateType(expr->getType());
-
-      // Evalute rhs before lhs
-      const uint32_t rhsVal = doExpr(rhs);
-      const uint32_t lhsPtr = doExpr(lhs);
-      const uint32_t lhsVal = theBuilder.createLoad(resultType, lhsPtr);
-
-      const spv::Op spvOp = translateOp(opcode, expr->getType());
-      const uint32_t result =
-          theBuilder.createBinaryOp(spvOp, resultType, lhsVal, rhsVal);
-      theBuilder.createStore(lhsPtr, result);
-
-      // Compound assign operators return lvalues.
-      return lhsPtr;
-    }
-    default:
-      emitError("CompoundAssignOperator '%0' unimplemented")
-          << expr->getOpcodeStr(opcode);
-      return 0;
-    }
+    uint32_t lhsPtr = 0;
+    const uint32_t resultType = typeTranslator.translateType(expr->getType());
+    const uint32_t result =
+        processBinaryOp(lhs, rhs, opcode, resultType, &lhsPtr);
+    return processAssignment(lhs, result, true, lhsPtr);
   }
 
   uint32_t doUnaryOperator(const UnaryOperator *expr) {
@@ -852,6 +974,181 @@ public:
     return 0;
   }
 
+  /// Processes the given expression and emits SPIR-V instructions. If the
+  /// result is a GLValue, does an additional load.
+  ///
+  /// This method is useful for cases where ImplicitCastExpr (LValueToRValue) is
+  /// missing when using an lvalue as rvalue in the AST, e.g., DeclRefExpr will
+  /// not be wrapped in ImplicitCastExpr (LValueToRValue) when appearing in
+  /// HLSLVectorElementExpr since the generated HLSLVectorElementExpr itself can
+  /// be lvalue or rvalue.
+  uint32_t loadIfGLValue(const Expr *expr) {
+    const uint32_t result = doExpr(expr);
+    if (expr->isGLValue()) {
+      const uint32_t baseTyId = typeTranslator.translateType(expr->getType());
+      return theBuilder.createLoad(baseTyId, result);
+    }
+
+    return result;
+  }
+
+  /// Condenses a sequence of HLSLVectorElementExpr starting from the given
+  /// expr into one. Writes the original base into *basePtr and the condensed
+  /// accessor into *flattenedAccessor.
+  void condenseVectorElementExpr(
+      const HLSLVectorElementExpr *expr, const Expr **basePtr,
+      hlsl::VectorMemberAccessPositions *flattenedAccessor) {
+    llvm::SmallVector<hlsl::VectorMemberAccessPositions, 2> accessors;
+    accessors.push_back(expr->getEncodedElementAccess());
+
+    // Recursively descending until we find the true base vector. In the
+    // meanwhile, collecting accessors in the reverse order.
+    *basePtr = expr->getBase();
+    while (const auto *vecElemBase =
+               dyn_cast<HLSLVectorElementExpr>(*basePtr)) {
+      accessors.push_back(vecElemBase->getEncodedElementAccess());
+      *basePtr = vecElemBase->getBase();
+    }
+
+    *flattenedAccessor = accessors.back();
+    for (int32_t i = accessors.size() - 2; i >= 0; --i) {
+      const auto &currentAccessor = accessors[i];
+
+      // Apply the current level of accessor to the flattened accessor of all
+      // previous levels of ones.
+      hlsl::VectorMemberAccessPositions combinedAccessor;
+      for (uint32_t j = 0; j < currentAccessor.Count; ++j) {
+        uint32_t currentPosition = 0;
+        currentAccessor.GetPosition(j, &currentPosition);
+        uint32_t previousPosition = 0;
+        flattenedAccessor->GetPosition(currentPosition, &previousPosition);
+        combinedAccessor.SetPosition(j, previousPosition);
+      }
+      combinedAccessor.Count = currentAccessor.Count;
+      combinedAccessor.IsValid =
+          flattenedAccessor->IsValid && currentAccessor.IsValid;
+
+      *flattenedAccessor = combinedAccessor;
+    }
+  }
+
+  uint32_t doHLSLVectorElementExpr(const HLSLVectorElementExpr *expr) {
+    const Expr *baseExpr = nullptr;
+    hlsl::VectorMemberAccessPositions accessor;
+    condenseVectorElementExpr(expr, &baseExpr, &accessor);
+
+    const QualType baseType = baseExpr->getType();
+    assert(hlsl::IsHLSLVecType(baseType));
+    const auto baseSize = hlsl::GetHLSLVecSize(baseType);
+
+    const uint32_t type = typeTranslator.translateType(expr->getType());
+    const auto accessorSize = accessor.Count;
+
+    // Depending on the number of elements selected, we emit different
+    // instructions.
+    // For vectors of size greater than 1, if we are only selecting one element,
+    // typical access chain or composite extraction should be fine. But if we
+    // are selecting more than one elements, we must resolve to vector specific
+    // operations.
+    // For size-1 vectors, if we are selecting their single elements multiple
+    // times, we need composite construct instructions.
+
+    if (accessorSize == 1) {
+      if (baseSize == 1) {
+        // Selecting one element from a size-1 vector. The underlying vector is
+        // already treated as a scalar.
+        return doExpr(baseExpr);
+      }
+
+      // If the base is an lvalue, we should emit an access chain instruction
+      // so that we can load/store the specified element. For rvalue base,
+      // we should use composite extraction. We should check the immediate base
+      // instead of the original base here since we can have something like
+      // v.xyyz to turn a lvalue v into rvalue.
+      if (expr->getBase()->isGLValue()) { // E.g., v.x;
+        // TODO: select the correct storage class
+        const uint32_t ptrType =
+            theBuilder.getPointerType(type, spv::StorageClass::Function);
+        const uint32_t index = theBuilder.getConstantInt32(accessor.Swz0);
+        // We need a lvalue here. Do not try to load.
+        return theBuilder.createAccessChain(ptrType, doExpr(baseExpr), {index});
+      } else { // E.g., (v + w).x;
+        // The original base vector may not be a rvalue. Need to load it if
+        // it is lvalue since ImplicitCastExpr (LValueToRValue) will be missing
+        // for that case.
+        return theBuilder.createCompositeExtract(type, loadIfGLValue(baseExpr),
+                                                 {accessor.Swz0});
+      }
+    }
+
+    if (baseSize == 1) {
+      // Selecting one element from a size-1 vector. Construct the vector.
+      llvm::SmallVector<uint32_t, 4> components(
+          static_cast<size_t>(accessorSize), loadIfGLValue(baseExpr));
+      return theBuilder.createCompositeConstruct(type, components);
+    }
+
+    llvm::SmallVector<uint32_t, 4> selectors;
+    selectors.resize(accessorSize);
+    // Whether we are selecting elements in the original order
+    bool originalOrder = baseSize == accessorSize;
+    for (uint32_t i = 0; i < accessorSize; ++i) {
+      accessor.GetPosition(i, &selectors[i]);
+      // We can select more elements than the vector provides. This handles
+      // that case too.
+      originalOrder &= selectors[i] == i;
+    }
+
+    if (originalOrder)
+      return doExpr(baseExpr);
+
+    const uint32_t baseVal = loadIfGLValue(baseExpr);
+    // Use base for both vectors. But we are only selecting values from the
+    // first one.
+    return theBuilder.createVectorShuffle(type, baseVal, baseVal, selectors);
+  }
+
+  /// Returns true if the given expression will be translated into a vector
+  /// shuffle instruction in SPIR-V.
+  ///
+  /// We emit a vector shuffle instruction iff
+  /// * We are not selecting only one element from the vector (OpAccessChain
+  ///   or OpCompositeExtract for such case);
+  /// * We are not selecting all elements in their original order (essentially
+  ///   the original vector, no shuffling needed).
+  bool isVectorShuffle(const Expr *expr) {
+    // TODO: the following check is essentially duplicated from
+    // doHLSLVectorElementExpr. Should unify them.
+    if (const auto *vecElemExpr = dyn_cast<HLSLVectorElementExpr>(expr)) {
+      const Expr *base = nullptr;
+      hlsl::VectorMemberAccessPositions accessor;
+      condenseVectorElementExpr(vecElemExpr, &base, &accessor);
+
+      const auto accessorSize = accessor.Count;
+      if (accessorSize == 1) {
+        // Selecting only one element. OpAccessChain or OpCompositeExtract for
+        // such cases.
+        return false;
+      }
+
+      const auto baseSize = hlsl::GetHLSLVecSize(base->getType());
+      if (accessorSize != baseSize)
+        return true;
+
+      for (uint32_t i = 0; i < accessorSize; ++i) {
+        uint32_t position;
+        accessor.GetPosition(i, &position);
+        if (position != i)
+          return true;
+      }
+
+      // Selecting exactly the original vector. No vector shuffle generated.
+      return false;
+    }
+
+    return false;
+  }
+
   uint32_t doCastExpr(const CastExpr *expr) {
     const Expr *subExpr = expr->getSubExpr();
     const QualType toType = expr->getType();
@@ -859,6 +1156,14 @@ public:
     switch (expr->getCastKind()) {
     case CastKind::CK_LValueToRValue: {
       const uint32_t fromValue = doExpr(subExpr);
+      if (isVectorShuffle(subExpr)) {
+        // By reaching here, it means the vector element accessing operation is
+        // an lvalue. If we generated a vector shuffle for it and trying to use
+        // it as a rvalue, we cannot do the load here as normal. Need the upper
+        // nodes in the AST tree to handle it properly.
+        return fromValue;
+      }
+
       // Using lvalue as rvalue means we need to OpLoad the contents from
       // the parameter/variable first.
       const uint32_t resultType = typeTranslator.translateType(toType);
@@ -1217,20 +1522,15 @@ public:
           const uint32_t vecType =
               typeTranslator.translateType(expr->getType());
           if (isa<CompoundAssignOperator>(expr)) {
-            // For floatN * float cases. We'll need to do the load/store and
-            // return the lhs.
-            const uint32_t rhsVal = doExpr(cast->getSubExpr());
-            const uint32_t lhsPtr = doExpr(lhs);
-            const uint32_t lhsVal = theBuilder.createLoad(vecType, lhsPtr);
-            const uint32_t result = theBuilder.createBinaryOp(
-                spv::Op::OpVectorTimesScalar, vecType, lhsVal, rhsVal);
-            theBuilder.createStore(lhsPtr, result);
-            return lhsPtr;
+            uint32_t lhsPtr = 0;
+            const uint32_t result =
+                processBinaryOp(lhs, cast->getSubExpr(), expr->getOpcode(),
+                                vecType, &lhsPtr, spv::Op::OpVectorTimesScalar);
+            return processAssignment(lhs, result, true, lhsPtr);
           } else {
-            const uint32_t lhsId = doExpr(lhs);
-            const uint32_t rhsId = doExpr(cast->getSubExpr());
-            return theBuilder.createBinaryOp(spv::Op::OpVectorTimesScalar,
-                                             vecType, lhsId, rhsId);
+            return processBinaryOp(lhs, cast->getSubExpr(), expr->getOpcode(),
+                                   vecType, nullptr,
+                                   spv::Op::OpVectorTimesScalar);
           }
         }
       }
@@ -1242,10 +1542,12 @@ public:
         if (cast->getCastKind() == CK_HLSLVectorSplat) {
           const uint32_t vecType =
               typeTranslator.translateType(expr->getType());
-          const uint32_t lhsId = doExpr(cast->getSubExpr());
-          const uint32_t rhsId = doExpr(rhs);
-          return theBuilder.createBinaryOp(spv::Op::OpVectorTimesScalar,
-                                           vecType, rhsId, lhsId);
+          // We need to switch the positions of lhs and rhs here because
+          // OpVectorTimesScalar requires the first operand to be a vector and
+          // the second to be a scalar.
+          return processBinaryOp(rhs, cast->getSubExpr(), expr->getOpcode(),
+                                 vecType, nullptr,
+                                 spv::Op::OpVectorTimesScalar);
         }
       }
     }

+ 12 - 0
tools/clang/lib/SPIRV/ModuleBuilder.cpp

@@ -148,6 +148,18 @@ ModuleBuilder::createCompositeExtract(uint32_t resultType, uint32_t composite,
   return resultId;
 }
 
+uint32_t
+ModuleBuilder::createVectorShuffle(uint32_t resultType, uint32_t vector1,
+                                   uint32_t vector2,
+                                   llvm::ArrayRef<uint32_t> selectors) {
+  assert(insertPoint && "null insert point");
+  const uint32_t resultId = theContext.takeNextId();
+  instBuilder.opVectorShuffle(resultType, resultId, vector1, vector2, selectors)
+      .x();
+  insertPoint->appendInstruction(std::move(constructSite));
+  return resultId;
+}
+
 uint32_t ModuleBuilder::createLoad(uint32_t resultType, uint32_t pointer) {
   assert(insertPoint && "null insert point");
   const uint32_t resultId = theContext.takeNextId();

+ 41 - 0
tools/clang/test/CodeGenSPIRV/binary-op.arith-assign.vector.hlsl

@@ -112,4 +112,45 @@ void main() {
 // CHECK-NEXT: [[mod3:%\d+]] = OpFRem %v4float [[y5]] [[x5]]
 // CHECK-NEXT: OpStore %y [[mod3]]
     y %= x;
+
+    float3 v3f;
+    float4 v4f;
+
+    // Vector swizzle
+// CHECK-NEXT: [[ac0:%\d+]] = OpAccessChain %_ptr_Function_float %v3f %int_2
+// CHECK-NEXT: [[e0:%\d+]] = OpLoad %float [[ac0]]
+// CHECK-NEXT: [[ac1:%\d+]] = OpAccessChain %_ptr_Function_float %v4f %int_0
+// CHECK-NEXT: [[e1:%\d+]] = OpLoad %float [[ac1]]
+// CHECK-NEXT: [[mul4:%\d+]] = OpFMul %float [[e1]] [[e0]]
+// CHECK-NEXT: OpStore [[ac1]] [[mul4]]
+    v4f.x *= v3f.z; // one element
+
+// CHECK-NEXT: [[v3f0:%\d+]] = OpLoad %v3float %v3f
+// CHECK-NEXT: [[vs0:%\d+]] = OpVectorShuffle %v2float [[v3f0]] [[v3f0]] 0 1
+// CHECK-NEXT: [[v4f0:%\d+]] = OpLoad %v4float %v4f
+// CHECK-NEXT: [[vs1:%\d+]] = OpVectorShuffle %v2float [[v4f0]] [[v4f0]] 2 3
+// CHECK-NEXT: [[mul5:%\d+]] = OpFMul %v2float [[vs1]] [[vs0]]
+// CHECK-NEXT: [[v4f1:%\d+]] = OpLoad %v4float %v4f
+// CHECK-NEXT: [[vs2:%\d+]] = OpVectorShuffle %v4float [[v4f1]] [[mul5]] 0 1 4 5
+// CHECK-NEXT: OpStore %v4f [[vs2]]
+    v4f.zw *= v3f.xy; // two elements
+
+// CHECK-NEXT: [[v4f2:%\d+]] = OpLoad %v4float %v4f
+// CHECK-NEXT: [[vs3:%\d+]] = OpVectorShuffle %v3float [[v4f2]] [[v4f2]] 0 1 2
+// CHECK-NEXT: [[mul6:%\d+]] = OpVectorTimesScalar %v3float [[vs3]] %float_4
+// CHECK-NEXT: [[v4f3:%\d+]] = OpLoad %v4float %v4f
+// CHECK-NEXT: [[vs4:%\d+]] = OpVectorShuffle %v4float [[v4f3]] [[mul6]] 4 5 6 3
+// CHECK-NEXT: OpStore %v4f [[vs4]]
+    v4f.xyz *= 4.0; // three elements (with scalar, should generate OpVectorTimesScalar)
+
+    int4 v4i;
+
+// CHECK-NEXT: [[cc0:%\d+]] = OpCompositeConstruct %v3int %int_4 %int_4 %int_4
+// CHECK-NEXT: [[v4i0:%\d+]] = OpLoad %v4int %v4i
+// CHECK-NEXT: [[vs5:%\d+]] = OpVectorShuffle %v3int [[v4i0]] [[v4i0]] 0 1 2
+// CHECK-NEXT: [[mul7:%\d+]] = OpIMul %v3int [[vs5]] [[cc0]]
+// CHECK-NEXT: [[v4i1:%\d+]] = OpLoad %v4int %v4i
+// CHECK-NEXT: [[vs6:%\d+]] = OpVectorShuffle %v4int [[v4i1]] [[mul7]] 4 5 6 3
+// CHECK-NEXT: OpStore %v4i [[vs6]]
+    v4i.xyz *= 4; // three elements (with scalar, but should not generate OpVectorTimesScalar)
 }

+ 1 - 1
tools/clang/test/CodeGenSPIRV/binary-op.arithmetic.mixed.hlsl

@@ -15,8 +15,8 @@ void main() {
 // CHECK-NEXT: [[mul0:%\d+]] = OpVectorTimesScalar %v4float [[a4]] [[s4]]
 // CHECK-NEXT: OpStore %b [[mul0]]
     b = a * s;
-// CHECK-NEXT: [[s5:%\d+]] = OpLoad %float %s
 // CHECK-NEXT: [[a5:%\d+]] = OpLoad %v4float %a
+// CHECK-NEXT: [[s5:%\d+]] = OpLoad %float %s
 // CHECK-NEXT: [[mul1:%\d+]] = OpVectorTimesScalar %v4float [[a5]] [[s5]]
 // CHECK-NEXT: OpStore %b [[mul1]]
     b = s * a;

+ 141 - 0
tools/clang/test/CodeGenSPIRV/op.vector.swizzle.hlsl

@@ -0,0 +1,141 @@
+// Run: %dxc -T vs_6_0 -E main
+
+// Tests should cover vector swizzling
+// * from lvalue/rvalue
+// * used as lvalue/rvalue
+// * selecting one/two/three/four elements
+// * selecting the same element multiple times
+// * selecting more elements than the base vector
+// * selecting the original vector
+// * element selection order
+// * assignment/compound assignment
+// * continuous selection
+
+void main() {
+// CHECK-LABEL: %bb_entry = OpLabel
+    float4 v4f1, v4f2;
+    float3 v3f1, v3f2;
+    float2 v2f;
+
+    // Assign to whole vector
+// CHECK:      [[v0:%\d+]] = OpLoad %v4float %v4f1
+// CHECK-NEXT: OpStore %v4f2 [[v0]]
+    v4f2 = v4f1.xyzw; // rhs: all in original order
+
+    // Assign to one element
+// CHECK-NEXT: [[ac0:%\d+]] = OpAccessChain %_ptr_Function_float %v4f1 %int_0
+// CHECK-NEXT: [[v1:%\d+]] = OpLoad %float [[ac0]]
+// CHECK-NEXT: [[ac1:%\d+]] = OpAccessChain %_ptr_Function_float %v4f2 %int_0
+// CHECK-NEXT: OpStore [[ac1]] [[v1]]
+    v4f2.x = v4f1.r;
+    // Assign to two elements
+// CHECK-NEXT: [[v2:%\d+]] = OpLoad %v4float %v4f1
+// CHECK-NEXT: [[vs0:%\d+]] = OpVectorShuffle %v2float [[v2]] [[v2]] 0 1
+// CHECK-NEXT: [[v3:%\d+]] = OpLoad %v4float %v4f2
+// CHECK-NEXT: [[vs1:%\d+]] = OpVectorShuffle %v4float [[v3]] [[vs0]] 4 5 2 3
+// CHECK-NEXT: OpStore %v4f2 [[vs1]]
+    v4f2.xy = v4f1.rg;
+    // Assign to three elements
+// CHECK-NEXT: [[v4:%\d+]] = OpLoad %v4float %v4f1
+// CHECK-NEXT: [[vs2:%\d+]] = OpVectorShuffle %v3float [[v4]] [[v4]] 0 1 2
+// CHECK-NEXT: [[v5:%\d+]] = OpLoad %v4float %v4f2
+// CHECK-NEXT: [[vs3:%\d+]] = OpVectorShuffle %v4float [[v5]] [[vs2]] 4 5 6 3
+// CHECK-NEXT: OpStore %v4f2 [[vs3]]
+    v4f2.xyz = v4f1.rgb;
+    // Assign to four elements
+// CHECK-NEXT: [[v6:%\d+]] = OpLoad %v4float %v4f1
+// CHECK-NEXT: OpStore %v4f2 [[v6]]
+    v4f2.xyzw = v4f1.rgba; // lhs: all in original order
+
+    // Random order
+// CHECK-NEXT: [[v7:%\d+]] = OpLoad %v4float %v4f1
+// CHECK-NEXT: [[vs4:%\d+]] = OpVectorShuffle %v4float [[v7]] [[v7]] 3 1 2 0
+// CHECK-NEXT: [[v8:%\d+]] = OpLoad %v4float %v4f2
+// CHECK-NEXT: [[vs5:%\d+]] = OpVectorShuffle %v4float [[v8]] [[vs4]] 6 7 5 4
+// CHECK-NEXT: OpStore %v4f2 [[vs5]]
+    v4f2.abrg = v4f1.wyzx;
+
+    // Assign from whole vector
+// CHECK-NEXT: [[v9:%\d+]] = OpLoad %v2float %v2f
+// CHECK-NEXT: [[v10:%\d+]] = OpLoad %v4float %v4f2
+// CHECK-NEXT: [[vs6:%\d+]] = OpVectorShuffle %v4float [[v10]] [[v9]] 0 1 4 5
+// CHECK-NEXT: OpStore %v4f2 [[vs6]]
+    v4f2.zw = v2f;
+
+    // Select the same element multiple times (can only happen for rhs)
+// CHECK-NEXT: [[v11:%\d+]] = OpLoad %v4float %v4f1
+// CHECK-NEXT: [[vs7:%\d+]] = OpVectorShuffle %v4float [[v11]] [[v11]] 0 0 1 1
+// CHECK-NEXT: OpStore %v4f2 [[vs7]]
+    v4f2 = v4f1.xxyy;
+
+    // Select more than original size (can only happen for rhs)
+// CHECK-NEXT: [[v13:%\d+]] = OpLoad %v2float %v2f
+// CHECK-NEXT: [[vs9:%\d+]] = OpVectorShuffle %v4float [[v13]] [[v13]] 0 1 1 0
+// CHECK-NEXT: OpStore %v4f2 [[vs9]]
+    v4f2 = v2f.xyyx;
+
+    // Select from rvalue & chained assignment
+// CHECK-NEXT: [[v15:%\d+]] = OpLoad %v3float %v3f1
+// CHECK-NEXT: [[v16:%\d+]] = OpLoad %v3float %v3f2
+// CHECK-NEXT: [[add0:%\d+]] = OpFAdd %v3float [[v15]] [[v16]]
+// CHECK-NEXT: [[vs11:%\d+]] = OpVectorShuffle %v2float [[add0]] [[add0]] 1 0
+// CHECK-NEXT: OpStore %v2f [[vs11]]
+// CHECK-NEXT: [[v17:%\d+]] = OpLoad %v4float %v4f2
+// CHECK-NEXT: [[vs12:%\d+]] = OpVectorShuffle %v4float [[v17]] [[vs11]] 0 1 5 4
+// CHECK-NEXT: OpStore %v4f2 [[vs12]]
+    v4f2.wz = v2f = (v3f1 + v3f2).yx;
+
+// CHECK-NEXT: [[v18:%\d+]] = OpLoad %v3float %v3f1
+// CHECK-NEXT: [[v19:%\d+]] = OpLoad %v3float %v3f2
+// CHECK-NEXT: [[mul0:%\d+]] = OpFMul %v3float [[v18]] [[v19]]
+// CHECK-NEXT: [[ce0:%\d+]] = OpCompositeExtract %float [[mul0]] 1
+// CHECK-NEXT: [[ac2:%\d+]] = OpAccessChain %_ptr_Function_float %v2f %int_1
+// CHECK-NEXT: OpStore [[ac2]] [[ce0]]
+// CHECK-NEXT: [[ac3:%\d+]] = OpAccessChain %_ptr_Function_float %v2f %int_0
+// CHECK-NEXT: OpStore [[ac3]] [[ce0]]
+    v2f.x = v2f.y = (v3f1 * v3f2).y; // one element
+
+    // Use in binary operations
+// CHECK-NEXT: [[v20:%\d+]] = OpLoad %v3float %v3f1
+// CHECK-NEXT: [[vs13:%\d+]] = OpVectorShuffle %v2float [[v20]] [[v20]] 0 1
+// CHECK-NEXT: [[mul1:%\d+]] = OpVectorTimesScalar %v2float [[vs13]] %float_2
+// CHECK-NEXT: [[v21:%\d+]] = OpLoad %v3float %v3f2
+// CHECK-NEXT: [[vs14:%\d+]] = OpVectorShuffle %v2float [[v21]] [[v21]] 1 2
+// CHECK-NEXT: [[mul2:%\d+]] = OpFMul %v2float [[mul1]] [[vs14]]
+// CHECK-NEXT: OpStore %v2f [[mul2]]
+    v2f = 2.0 * v3f1.xy * v3f2.yz;
+
+    // Continuous selection
+
+    // v2f.(1, 0).(1, 0, 1) -> v2f.(0, 1, 0)
+    // v4f2.(3, 2, 0).(1, 0, 2) -> v4f2.(2, 3, 0)
+    // Write rhs.0 (+4 = 4) to lhs.2
+    // Write rhs.1 (+4 = 5) to lhs.3
+    // Write rhs.2 (+4 = 6) to lhs.0
+    // Keep lhs.1
+    // So final selectors to write to lhs.(0, 1, 2, 3): 6, 1, 4, 5
+// CHECK-NEXT: [[v22:%\d+]] = OpLoad %v2float %v2f
+// CHECK-NEXT: [[vs15:%\d+]] = OpVectorShuffle %v3float [[v22]] [[v22]] 0 1 0
+// CHECK-NEXT: [[v23:%\d+]] = OpLoad %v4float %v4f2
+// CHECK-NEXT: [[vs16:%\d+]] = OpVectorShuffle %v4float [[v23]] [[vs15]] 6 1 4 5
+// CHECK-NEXT: OpStore %v4f2 [[vs16]]
+    v4f2.wzx.grb = v2f.gr.yxy; // select more than original, write to a part
+
+// CHECK-NEXT: [[v24:%\d+]] = OpLoad %v4float %v4f1
+// CHECK-NEXT: OpStore %v4f2 [[v24]]
+    v4f2.wzyx.abgr.xywz.rgab = v4f1.xyzw.xyzw.rgab.rgab; // from original vector to original vector
+
+    // Note that we cannot generate OpAccessChain for v4f1 since v4f1.xzyx is
+    // already not a lvalue!
+// CHECK-NEXT: [[v24:%\d+]] = OpLoad %v4float %v4f1
+// CHECK-NEXT: [[ce1:%\d+]] = OpCompositeExtract %float [[v24]] 2
+// CHECK-NEXT: [[ac4:%\d+]] = OpAccessChain %_ptr_Function_float %v4f2 %int_1
+// CHECK-NEXT: OpStore [[ac4]] [[ce1]]
+    v4f2.wzyx.zy.x = v4f1.xzyx.y.x; // from one element (rvalue) to one element (lvalue)
+
+// CHECK-NEXT: [[ac2:%\d+]] = OpAccessChain %_ptr_Function_float %v4f1 %int_1
+// CHECK-NEXT: [[e0:%\d+]] = OpLoad %float [[ac2]]
+// CHECK-NEXT: [[ac3:%\d+]] = OpAccessChain %_ptr_Function_float %v4f2 %int_3
+// CHECK-NEXT: OpStore [[ac3]] [[e0]]
+    v4f2.w.x.x.x = v4f1.y.x.x.x; // continuously selecting one element
+}

+ 39 - 0
tools/clang/test/CodeGenSPIRV/op.vector.swizzle.size1.hlsl

@@ -0,0 +1,39 @@
+// Run: %dxc -T vs_6_0 -E main
+
+void main() {
+// CHECK-LABEL: %bb_entry = OpLabel
+    float2 v2f;
+    float1 v1f1, v1f2;
+    float sf;
+
+    // Assign to whole vector
+// CHECK:      [[v0:%\d+]] = OpLoad %float %v1f1
+// CHECK-NEXT: OpStore %v1f2 [[v0]]
+    v1f2 = v1f1.x; // rhs: all in original order
+
+    // Assign from whole vector, to one element
+// CHECK-NEXT: [[v1:%\d+]] = OpLoad %float %v1f1
+// CHECK-NEXT: OpStore %v1f2 [[v1]]
+    v1f2.x = v1f1;
+
+    // Select one element multiple times & select more than size
+// CHECK-NEXT: [[v2:%\d+]] = OpLoad %float %v1f1
+// CHECK-NEXT: [[cc0:%\d+]] = OpCompositeConstruct %v2float [[v2]] [[v2]]
+// CHECK-NEXT: OpStore %v2f [[cc0]]
+    v2f = v1f1.xx;
+
+    // Select from rvalue & chained assignment
+// CHECK-NEXT: [[v3:%\d+]] = OpLoad %float %v1f1
+// CHECK-NEXT: [[v4:%\d+]] = OpLoad %float %v1f2
+// CHECK-NEXT: [[mul0:%\d+]] = OpFMul %float [[v3]] [[v4]]
+// CHECK-NEXT: [[ac0:%\d+]] = OpAccessChain %_ptr_Function_float %v2f %int_1
+// CHECK-NEXT: OpStore [[ac0]] [[mul0]]
+// CHECK-NEXT: OpStore %v1f2 [[mul0]]
+// CHECK-NEXT: OpStore %sf [[mul0]]
+    sf = v1f2 = v2f.y = (v1f1 * v1f2).r;
+
+    // Continuous selection
+// CHECK-NEXT: [[v5:%\d+]] = OpLoad %float %v1f1
+// CHECK-NEXT: OpStore %v1f2 [[v5]]
+    v1f2.x.r.x = v1f1.r.x.r;
+}

+ 6 - 0
tools/clang/unittests/SPIRV/CodeGenSPIRVTest.cpp

@@ -128,6 +128,12 @@ TEST_F(FileTest, TernaryOpConditionalOp) {
   runFileTest("ternary-op.cond-op.hlsl");
 }
 
+// For vector swizzle operators
+TEST_F(FileTest, OpVectorSwizzle) { runFileTest("op.vector.swizzle.hlsl"); }
+TEST_F(FileTest, OpVectorSize1Swizzle) {
+  runFileTest("op.vector.swizzle.size1.hlsl");
+}
+
 // For casting
 TEST_F(FileTest, CastNoOp) { runFileTest("cast.no-op.hlsl"); }
 TEST_F(FileTest, CastImplicit2Bool) { runFileTest("cast.2bool.implicit.hlsl"); }