Browse Source

metal: Add support for YUV/NV12 texture formats.

Alex Szpakowski 7 years ago
parent
commit
740a90af37

+ 269 - 67
src/render/metal/SDL_render_metal.m

@@ -90,8 +90,15 @@ SDL_RenderDriver METAL_RenderDriver = {
     {
      "metal",
      (SDL_RENDERER_ACCELERATED | SDL_RENDERER_PRESENTVSYNC | SDL_RENDERER_TARGETTEXTURE),
-     2,
-     {SDL_PIXELFORMAT_ARGB8888, SDL_PIXELFORMAT_ABGR8888},
+     6,
+     {
+         SDL_PIXELFORMAT_ARGB8888,
+         SDL_PIXELFORMAT_ABGR8888,
+         SDL_PIXELFORMAT_YV12,
+         SDL_PIXELFORMAT_IYUV,
+         SDL_PIXELFORMAT_NV12,
+         SDL_PIXELFORMAT_NV21
+     },
 
      // !!! FIXME: how do you query Metal for this?
      // (the weakest GPU supported by Metal on iOS has 4k texture max, and
@@ -116,7 +123,10 @@ SDL_RenderDriver METAL_RenderDriver = {
 
 static const size_t CONSTANTS_OFFSET_IDENTITY = 0;
 static const size_t CONSTANTS_OFFSET_HALF_PIXEL_TRANSFORM = ALIGN_CONSTANTS(CONSTANTS_OFFSET_IDENTITY + sizeof(float) * 16);
-static const size_t CONSTANTS_OFFSET_CLEAR_VERTS = ALIGN_CONSTANTS(CONSTANTS_OFFSET_HALF_PIXEL_TRANSFORM + sizeof(float) * 16);
+static const size_t CONSTANTS_OFFSET_DECODE_JPEG = ALIGN_CONSTANTS(CONSTANTS_OFFSET_HALF_PIXEL_TRANSFORM + sizeof(float) * 16);
+static const size_t CONSTANTS_OFFSET_DECODE_BT601 = ALIGN_CONSTANTS(CONSTANTS_OFFSET_DECODE_JPEG + sizeof(float) * 4 * 4);
+static const size_t CONSTANTS_OFFSET_DECODE_BT709 = ALIGN_CONSTANTS(CONSTANTS_OFFSET_DECODE_BT601 + sizeof(float) * 4 * 4);
+static const size_t CONSTANTS_OFFSET_CLEAR_VERTS = ALIGN_CONSTANTS(CONSTANTS_OFFSET_DECODE_BT709 + sizeof(float) * 4 * 4);
 static const size_t CONSTANTS_LENGTH = CONSTANTS_OFFSET_CLEAR_VERTS + sizeof(float) * 6;
 
 typedef enum SDL_MetalVertexFunction
@@ -127,8 +137,12 @@ typedef enum SDL_MetalVertexFunction
 
 typedef enum SDL_MetalFragmentFunction
 {
-    SDL_METAL_FRAGMENT_SOLID,
+    SDL_METAL_FRAGMENT_SOLID = 0,
     SDL_METAL_FRAGMENT_COPY,
+    SDL_METAL_FRAGMENT_YUV,
+    SDL_METAL_FRAGMENT_NV12,
+    SDL_METAL_FRAGMENT_NV21,
+    SDL_METAL_FRAGMENT_COUNT,
 } SDL_MetalFragmentFunction;
 
 typedef struct METAL_PipelineState
@@ -146,6 +160,15 @@ typedef struct METAL_PipelineCache
     const char *label;
 } METAL_PipelineCache;
 
+/* Each shader combination used by drawing functions has a separate pipeline
+ * cache. This is more efficient than iterating over a global cache to find
+ * the pipeline based on the specified shader combination, since we know what
+ * the shader combination is inside each drawing function's code. */
+typedef struct METAL_ShaderPipelines
+{
+    METAL_PipelineCache caches[SDL_METAL_FRAGMENT_COUNT];
+} METAL_ShaderPipelines;
+
 @interface METAL_RenderData : NSObject
     @property (nonatomic, retain) id<MTLDevice> mtldevice;
     @property (nonatomic, retain) id<MTLCommandQueue> mtlcmdqueue;
@@ -153,13 +176,12 @@ typedef struct METAL_PipelineCache
     @property (nonatomic, retain) id<MTLRenderCommandEncoder> mtlcmdencoder;
     @property (nonatomic, retain) id<MTLLibrary> mtllibrary;
     @property (nonatomic, retain) id<CAMetalDrawable> mtlbackbuffer;
-    @property (nonatomic, assign) METAL_PipelineCache *mtlpipelineprims;
-    @property (nonatomic, assign) METAL_PipelineCache *mtlpipelinecopy;
     @property (nonatomic, retain) id<MTLSamplerState> mtlsamplernearest;
     @property (nonatomic, retain) id<MTLSamplerState> mtlsamplerlinear;
     @property (nonatomic, retain) id<MTLBuffer> mtlbufconstants;
     @property (nonatomic, retain) CAMetalLayer *mtllayer;
     @property (nonatomic, retain) MTLRenderPassDescriptor *mtlpassdesc;
+    @property (nonatomic, assign) METAL_ShaderPipelines *pipelines;
 @end
 
 @implementation METAL_RenderData
@@ -184,7 +206,12 @@ typedef struct METAL_PipelineCache
 
 @interface METAL_TextureData : NSObject
     @property (nonatomic, retain) id<MTLTexture> mtltexture;
+    @property (nonatomic, retain) id<MTLTexture> mtltexture_uv;
     @property (nonatomic, retain) id<MTLSamplerState> mtlsampler;
+    @property (nonatomic, assign) SDL_MetalFragmentFunction fragmentFunction;
+    @property (nonatomic, assign) BOOL yuv;
+    @property (nonatomic, assign) BOOL nv12;
+    @property (nonatomic, assign) size_t conversionBufferOffset;
 @end
 
 @implementation METAL_TextureData
@@ -192,6 +219,7 @@ typedef struct METAL_PipelineCache
 - (void)dealloc
 {
     [_mtltexture release];
+    [_mtltexture_uv release];
     [_mtlsampler release];
     [super dealloc];
 }
@@ -265,6 +293,9 @@ GetFragmentFunctionName(SDL_MetalFragmentFunction function)
     switch (function) {
         case SDL_METAL_FRAGMENT_SOLID: return @"SDL_Solid_fragment";
         case SDL_METAL_FRAGMENT_COPY: return @"SDL_Copy_fragment";
+        case SDL_METAL_FRAGMENT_YUV: return @"SDL_YUV_fragment";
+        case SDL_METAL_FRAGMENT_NV12: return @"SDL_NV12_fragment";
+        case SDL_METAL_FRAGMENT_NV21: return @"SDL_NV21_fragment";
         default: return nil;
     }
 }
@@ -329,16 +360,9 @@ MakePipelineState(METAL_RenderData *data, METAL_PipelineCache *cache,
     }
 }
 
-static METAL_PipelineCache *
-MakePipelineCache(METAL_RenderData *data, const char *label, SDL_MetalVertexFunction vertfn, SDL_MetalFragmentFunction fragfn)
+static void
+MakePipelineCache(METAL_RenderData *data, METAL_PipelineCache *cache, const char *label, SDL_MetalVertexFunction vertfn, SDL_MetalFragmentFunction fragfn)
 {
-    METAL_PipelineCache *cache = SDL_malloc(sizeof(METAL_PipelineCache));
-
-    if (!cache) {
-        SDL_OutOfMemory();
-        return NULL;
-    }
-
     SDL_zerop(cache);
 
     cache->vertexFunction = vertfn;
@@ -347,12 +371,10 @@ MakePipelineCache(METAL_RenderData *data, const char *label, SDL_MetalVertexFunc
 
     /* Create pipeline states for the default blend modes. Custom blend modes
      * will be added to the cache on-demand. */
-    MakePipelineState(data, cache, @"(blend=none)", SDL_BLENDMODE_NONE);
-    MakePipelineState(data, cache, @"(blend=blend)", SDL_BLENDMODE_BLEND);
-    MakePipelineState(data, cache, @"(blend=add)", SDL_BLENDMODE_ADD);
-    MakePipelineState(data, cache, @"(blend=mod)", SDL_BLENDMODE_MOD);
-
-    return cache;
+    MakePipelineState(data, cache, @" (blend=none)", SDL_BLENDMODE_NONE);
+    MakePipelineState(data, cache, @" (blend=blend)", SDL_BLENDMODE_BLEND);
+    MakePipelineState(data, cache, @" (blend=add)", SDL_BLENDMODE_ADD);
+    MakePipelineState(data, cache, @" (blend=mod)", SDL_BLENDMODE_MOD);
 }
 
 static void
@@ -364,20 +386,51 @@ DestroyPipelineCache(METAL_PipelineCache *cache)
         }
 
         SDL_free(cache->states);
-        SDL_free(cache);
+    }
+}
+
+static METAL_ShaderPipelines *
+MakeShaderPipelines(METAL_RenderData *data)
+{
+    METAL_ShaderPipelines *pipelines = SDL_calloc(1, sizeof(METAL_ShaderPipelines));
+    if (!pipelines) {
+        SDL_OutOfMemory();
+        return NULL;
+    }
+
+    MakePipelineCache(data, &pipelines->caches[SDL_METAL_FRAGMENT_SOLID], "SDL primitives pipeline", SDL_METAL_VERTEX_SOLID, SDL_METAL_FRAGMENT_SOLID);
+    MakePipelineCache(data, &pipelines->caches[SDL_METAL_FRAGMENT_COPY], "SDL copy pipeline", SDL_METAL_VERTEX_COPY, SDL_METAL_FRAGMENT_COPY);
+    MakePipelineCache(data, &pipelines->caches[SDL_METAL_FRAGMENT_YUV], "SDL YUV pipeline", SDL_METAL_VERTEX_COPY, SDL_METAL_FRAGMENT_YUV);
+    MakePipelineCache(data, &pipelines->caches[SDL_METAL_FRAGMENT_NV12], "SDL NV12 pipeline", SDL_METAL_VERTEX_COPY, SDL_METAL_FRAGMENT_NV12);
+    MakePipelineCache(data, &pipelines->caches[SDL_METAL_FRAGMENT_NV21], "SDL NV21 pipeline", SDL_METAL_VERTEX_COPY, SDL_METAL_FRAGMENT_NV21);
+
+    return pipelines;
+}
+
+static void
+DestroyShaderPipelines(METAL_ShaderPipelines *pipelines)
+{
+    if (pipelines != NULL) {
+        for (int i = 0; i < SDL_METAL_FRAGMENT_COUNT; i++) {
+            DestroyPipelineCache(&pipelines->caches[i]);
+        }
+
+        SDL_free(pipelines);
     }
 }
 
 static inline id<MTLRenderPipelineState>
-ChoosePipelineState(METAL_RenderData *data, METAL_PipelineCache *cache, const SDL_BlendMode blendmode)
+ChoosePipelineState(METAL_RenderData *data, METAL_ShaderPipelines *pipelines, SDL_MetalFragmentFunction fragfn, SDL_BlendMode blendmode)
 {
+    METAL_PipelineCache *cache = &pipelines->caches[fragfn];
+
     for (int i = 0; i < cache->count; i++) {
         if (cache->states[i].blendMode == blendmode) {
             return (__bridge id<MTLRenderPipelineState>)cache->states[i].pipe;
         }
     }
 
-    return MakePipelineState(data, cache, [NSString stringWithFormat:@"(blend=custom 0x%x)", blendmode], blendmode);
+    return MakePipelineState(data, cache, [NSString stringWithFormat:@" (blend=custom 0x%x)", blendmode], blendmode);
 }
 
 static SDL_Renderer *
@@ -455,8 +508,7 @@ METAL_CreateRenderer(SDL_Window * window, Uint32 flags)
 #endif
     data.mtllibrary.label = @"SDL Metal renderer shader library";
 
-    data.mtlpipelineprims = MakePipelineCache(data, "SDL primitives pipeline ", SDL_METAL_VERTEX_SOLID, SDL_METAL_FRAGMENT_SOLID);
-    data.mtlpipelinecopy = MakePipelineCache(data, "SDL texture pipeline ", SDL_METAL_VERTEX_COPY, SDL_METAL_FRAGMENT_COPY);
+    data.pipelines = MakeShaderPipelines(data);
 
     MTLSamplerDescriptor *samplerdesc = [[MTLSamplerDescriptor alloc] init];
 
@@ -485,6 +537,28 @@ METAL_CreateRenderer(SDL_Window * window, Uint32 flags)
         0.5f, 0.5f, 0.0f, 1.0f,
     };
 
+    /* Metal pads float3s to 16 bytes. */
+    float decodetransformJPEG[4*4] = {
+        0.0, -0.501960814, -0.501960814, 0.0, /* offset */
+        1.0000,  0.0000,  1.4020, 0.0,        /* Rcoeff */
+        1.0000, -0.3441, -0.7141, 0.0,        /* Gcoeff */
+        1.0000,  1.7720,  0.0000, 0.0,        /* Bcoeff */
+    };
+
+    float decodetransformBT601[4*4] = {
+        -0.0627451017, -0.501960814, -0.501960814, 0.0, /* offset */
+        1.1644,  0.0000,  1.5960, 0.0,                  /* Rcoeff */
+        1.1644, -0.3918, -0.8130, 0.0,                  /* Gcoeff */
+        1.1644,  2.0172,  0.0000, 0.0,                  /* Bcoeff */
+    };
+
+    float decodetransformBT709[4*4] = {
+        0.0, -0.501960814, -0.501960814, 0.0, /* offset */
+        1.0000,  0.0000,  1.4020, 0.0,        /* Rcoeff */
+        1.0000, -0.3441, -0.7141, 0.0,        /* Gcoeff */
+        1.0000,  1.7720,  0.0000, 0.0,        /* Bcoeff */
+    };
+
     float clearverts[6] = {0.0f, 0.0f,  0.0f, 2.0f,  2.0f, 0.0f};
 
     id<MTLBuffer> mtlbufconstantstaging = [data.mtldevice newBufferWithLength:CONSTANTS_LENGTH options:MTLResourceStorageModeShared];
@@ -497,6 +571,9 @@ METAL_CreateRenderer(SDL_Window * window, Uint32 flags)
     char *constantdata = [mtlbufconstantstaging contents];
     SDL_memcpy(constantdata + CONSTANTS_OFFSET_IDENTITY, identitytransform, sizeof(identitytransform));
     SDL_memcpy(constantdata + CONSTANTS_OFFSET_HALF_PIXEL_TRANSFORM, halfpixeltransform, sizeof(halfpixeltransform));
+    SDL_memcpy(constantdata + CONSTANTS_OFFSET_DECODE_JPEG, decodetransformJPEG, sizeof(decodetransformJPEG));
+    SDL_memcpy(constantdata + CONSTANTS_OFFSET_DECODE_BT601, decodetransformBT601, sizeof(decodetransformBT601));
+    SDL_memcpy(constantdata + CONSTANTS_OFFSET_DECODE_BT709, decodetransformBT709, sizeof(decodetransformBT709));
     SDL_memcpy(constantdata + CONSTANTS_OFFSET_CLEAR_VERTS, clearverts, sizeof(clearverts));
 
     id<MTLCommandBuffer> cmdbuffer = [data.mtlcmdqueue commandBuffer];
@@ -660,15 +737,26 @@ static int
 METAL_CreateTexture(SDL_Renderer * renderer, SDL_Texture * texture)
 { @autoreleasepool {
     METAL_RenderData *data = (__bridge METAL_RenderData *) renderer->driverdata;
-    MTLPixelFormat mtlpixfmt;
+    MTLPixelFormat pixfmt;
 
     switch (texture->format) {
-        case SDL_PIXELFORMAT_ABGR8888: mtlpixfmt = MTLPixelFormatRGBA8Unorm; break;
-        case SDL_PIXELFORMAT_ARGB8888: mtlpixfmt = MTLPixelFormatBGRA8Unorm; break;
-        default: return SDL_SetError("Texture format %s not supported by Metal", SDL_GetPixelFormatName(texture->format));
+        case SDL_PIXELFORMAT_ABGR8888:
+            pixfmt = MTLPixelFormatRGBA8Unorm;
+            break;
+        case SDL_PIXELFORMAT_ARGB8888:
+            pixfmt = MTLPixelFormatBGRA8Unorm;
+            break;
+        case SDL_PIXELFORMAT_IYUV:
+        case SDL_PIXELFORMAT_YV12:
+        case SDL_PIXELFORMAT_NV12:
+        case SDL_PIXELFORMAT_NV21:
+            pixfmt = MTLPixelFormatR8Unorm;
+            break;
+        default:
+            return SDL_SetError("Texture format %s not supported by Metal", SDL_GetPixelFormatName(texture->format));
     }
 
-    MTLTextureDescriptor *mtltexdesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:mtlpixfmt
+    MTLTextureDescriptor *mtltexdesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:pixfmt
                                             width:(NSUInteger)texture->w height:(NSUInteger)texture->h mipmapped:NO];
 
     /* Not available in iOS 8. */
@@ -679,14 +767,31 @@ METAL_CreateTexture(SDL_Renderer * renderer, SDL_Texture * texture)
             mtltexdesc.usage = MTLTextureUsageShaderRead;
         }
     }
-    //mtltexdesc.resourceOptions = MTLResourceCPUCacheModeDefaultCache | MTLResourceStorageModeManaged;
-    //mtltexdesc.storageMode = MTLStorageModeManaged;
     
     id<MTLTexture> mtltexture = [data.mtldevice newTextureWithDescriptor:mtltexdesc];
     if (mtltexture == nil) {
         return SDL_SetError("Texture allocation failed");
     }
 
+    id<MTLTexture> mtltexture_uv = nil;
+
+    BOOL yuv = (texture->format == SDL_PIXELFORMAT_IYUV) || (texture->format == SDL_PIXELFORMAT_YV12);
+    BOOL nv12 = (texture->format == SDL_PIXELFORMAT_NV12) || (texture->format == SDL_PIXELFORMAT_NV21);
+
+    if (yuv) {
+        mtltexdesc.pixelFormat = MTLPixelFormatR8Unorm;
+        mtltexdesc.width = (texture->w + 1) / 2;
+        mtltexdesc.height = (texture->h + 1) / 2;
+        mtltexdesc.textureType = MTLTextureType2DArray;
+        mtltexdesc.arrayLength = 2;
+        mtltexture_uv = [data.mtldevice newTextureWithDescriptor:mtltexdesc];
+    } else if (nv12) {
+        mtltexdesc.pixelFormat = MTLPixelFormatRG8Unorm;
+        mtltexdesc.width = (texture->w + 1) / 2;
+        mtltexdesc.height = (texture->h + 1) / 2;
+        mtltexture_uv = [data.mtldevice newTextureWithDescriptor:mtltexdesc];
+    }
+
     METAL_TextureData *texturedata = [[METAL_TextureData alloc] init];
     const char *hint = SDL_GetHint(SDL_HINT_RENDER_SCALE_QUALITY);
     if (!hint || *hint == '0' || SDL_strcasecmp(hint, "nearest") == 0) {
@@ -695,12 +800,39 @@ METAL_CreateTexture(SDL_Renderer * renderer, SDL_Texture * texture)
         texturedata.mtlsampler = data.mtlsamplerlinear;
     }
     texturedata.mtltexture = mtltexture;
+    texturedata.mtltexture_uv = mtltexture_uv;
+
+    texturedata.yuv = yuv;
+    texturedata.nv12 = nv12;
+
+    if (yuv) {
+        texturedata.fragmentFunction = SDL_METAL_FRAGMENT_YUV;
+    } else if (texture->format == SDL_PIXELFORMAT_NV12) {
+        texturedata.fragmentFunction = SDL_METAL_FRAGMENT_NV12;
+    } else if (texture->format == SDL_PIXELFORMAT_NV21) {
+        texturedata.fragmentFunction = SDL_METAL_FRAGMENT_NV21;
+    } else {
+        texturedata.fragmentFunction = SDL_METAL_FRAGMENT_COPY;
+    }
+
+    if (yuv || nv12) {
+        size_t offset = 0;
+        SDL_YUV_CONVERSION_MODE mode = SDL_GetYUVConversionModeForResolution(texture->w, texture->h);
+        switch (mode) {
+            case SDL_YUV_CONVERSION_JPEG: offset = CONSTANTS_OFFSET_DECODE_JPEG; break;
+            case SDL_YUV_CONVERSION_BT601: offset = CONSTANTS_OFFSET_DECODE_BT601; break;
+            case SDL_YUV_CONVERSION_BT709: offset = CONSTANTS_OFFSET_DECODE_BT709; break;
+            default: offset = 0; break;
+        }
+        texturedata.conversionBufferOffset = offset;
+    }
 
     texture->driverdata = (void*)CFBridgingRetain(texturedata);
 
 #if !__has_feature(objc_arc)
     [texturedata release];
     [mtltexture release];
+    [mtltexture_uv release];
 #endif
 
     return 0;
@@ -710,12 +842,52 @@ static int
 METAL_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture,
                  const SDL_Rect * rect, const void *pixels, int pitch)
 { @autoreleasepool {
+    METAL_TextureData *texturedata = (__bridge METAL_TextureData *)texture->driverdata;
+
     // !!! FIXME: this is a synchronous call; it doesn't return until data is uploaded in some form.
     // !!! FIXME:  Maybe move this off to a thread that marks the texture as uploaded and only stall the main thread if we try to
     // !!! FIXME:  use this texture before the marking is done? Is it worth it? Or will we basically always be uploading a bunch of
     // !!! FIXME:  stuff way ahead of time and/or using it immediately after upload?
-    id<MTLTexture> mtltexture = ((__bridge METAL_TextureData *)texture->driverdata).mtltexture;
-    [mtltexture replaceRegion:MTLRegionMake2D(rect->x, rect->y, rect->w, rect->h) mipmapLevel:0 withBytes:pixels bytesPerRow:pitch];
+
+    [texturedata.mtltexture replaceRegion:MTLRegionMake2D(rect->x, rect->y, rect->w, rect->h)
+                              mipmapLevel:0
+                                withBytes:pixels
+                              bytesPerRow:pitch];
+
+    if (texturedata.yuv) {
+        int Uslice = texture->format == SDL_PIXELFORMAT_YV12 ? 1 : 0;
+        int Vslice = texture->format == SDL_PIXELFORMAT_YV12 ? 0 : 1;
+
+        /* Skip to the correct offset into the next texture */
+        pixels = (const void*)((const Uint8*)pixels + rect->h * pitch);
+        [texturedata.mtltexture_uv replaceRegion:MTLRegionMake2D(rect->x / 2, rect->y / 2, (rect->w + 1) / 2, (rect->h + 1) / 2)
+                                     mipmapLevel:0
+                                           slice:Uslice
+                                       withBytes:pixels
+                                     bytesPerRow:(pitch + 1) / 2
+                                   bytesPerImage:0];
+
+        /* Skip to the correct offset into the next texture */
+        pixels = (const void*)((const Uint8*)pixels + ((rect->h + 1) / 2) * ((pitch + 1)/2));
+        [texturedata.mtltexture_uv replaceRegion:MTLRegionMake2D(rect->x / 2, rect->y / 2, (rect->w + 1) / 2, (rect->h + 1) / 2)
+                                     mipmapLevel:0
+                                           slice:Vslice
+                                       withBytes:pixels
+                                     bytesPerRow:(pitch + 1) / 2
+                                   bytesPerImage:0];
+    }
+
+    if (texturedata.nv12) {
+        /* Skip to the correct offset into the next texture */
+        pixels = (const void*)((const Uint8*)pixels + rect->h * pitch);
+        [texturedata.mtltexture_uv replaceRegion:MTLRegionMake2D(rect->x / 2, rect->y / 2, (rect->w + 1) / 2, (rect->h + 1) / 2)
+                                     mipmapLevel:0
+                                           slice:0
+                                       withBytes:pixels
+                                     bytesPerRow:2 * ((pitch + 1) / 2)
+                                   bytesPerImage:0];
+    }
+
     return 0;
 }}
 
@@ -725,9 +897,37 @@ METAL_UpdateTextureYUV(SDL_Renderer * renderer, SDL_Texture * texture,
                     const Uint8 *Yplane, int Ypitch,
                     const Uint8 *Uplane, int Upitch,
                     const Uint8 *Vplane, int Vpitch)
-{
-    return SDL_Unsupported();  // !!! FIXME
-}
+{ @autoreleasepool {
+    METAL_TextureData *texturedata = (__bridge METAL_TextureData *)texture->driverdata;
+    int Uslice = texture->format == SDL_PIXELFORMAT_YV12 ? 1 : 0;
+    int Vslice = texture->format == SDL_PIXELFORMAT_YV12 ? 0 : 1;
+
+    /* Bail out if we're supposed to update an empty rectangle */
+    if (rect->w <= 0 || rect->h <= 0) {
+        return 0;
+    }
+
+    [texturedata.mtltexture replaceRegion:MTLRegionMake2D(rect->x, rect->y, rect->w, rect->h)
+                              mipmapLevel:0
+                                withBytes:Yplane
+                              bytesPerRow:Ypitch];
+
+    [texturedata.mtltexture_uv replaceRegion:MTLRegionMake2D(rect->x / 2, rect->y / 2, (rect->w + 1) / 2, (rect->h + 1) / 2)
+                                 mipmapLevel:0
+                                       slice:Uslice
+                                   withBytes:Uplane
+                                 bytesPerRow:Upitch
+                               bytesPerImage:0];
+
+    [texturedata.mtltexture_uv replaceRegion:MTLRegionMake2D(rect->x / 2, rect->y / 2, (rect->w + 1) / 2, (rect->h + 1) / 2)
+                                 mipmapLevel:0
+                                       slice:Vslice
+                                   withBytes:Vplane
+                                 bytesPerRow:Vpitch
+                               bytesPerImage:0];
+
+    return 0;
+}}
 
 static int
 METAL_LockTexture(SDL_Renderer * renderer, SDL_Texture * texture,
@@ -864,7 +1064,7 @@ METAL_RenderClear(SDL_Renderer * renderer)
         // Slow path for clearing: draw a filled fullscreen triangle.
         METAL_SetOrthographicProjection(renderer, 1, 1);
         [data.mtlcmdencoder setViewport:viewport];
-        [data.mtlcmdencoder setRenderPipelineState:ChoosePipelineState(data, data.mtlpipelineprims, SDL_BLENDMODE_NONE)];
+        [data.mtlcmdencoder setRenderPipelineState:ChoosePipelineState(data, data.pipelines, SDL_METAL_FRAGMENT_SOLID, SDL_BLENDMODE_NONE)];
         [data.mtlcmdencoder setVertexBuffer:data.mtlbufconstants offset:CONSTANTS_OFFSET_CLEAR_VERTS atIndex:0];
         [data.mtlcmdencoder setVertexBuffer:data.mtlbufconstants offset:CONSTANTS_OFFSET_IDENTITY atIndex:3];
         [data.mtlcmdencoder setFragmentBytes:color length:sizeof(color) atIndex:0];
@@ -903,7 +1103,7 @@ DrawVerts(SDL_Renderer * renderer, const SDL_FPoint * points, int count,
     // !!! FIXME: render color should live in a dedicated uniform buffer.
     const float color[4] = { ((float)renderer->r) / 255.0f, ((float)renderer->g) / 255.0f, ((float)renderer->b) / 255.0f, ((float)renderer->a) / 255.0f };
 
-    [data.mtlcmdencoder setRenderPipelineState:ChoosePipelineState(data, data.mtlpipelineprims, renderer->blendMode)];
+    [data.mtlcmdencoder setRenderPipelineState:ChoosePipelineState(data, data.pipelines, SDL_METAL_FRAGMENT_SOLID, renderer->blendMode)];
     [data.mtlcmdencoder setFragmentBytes:color length:sizeof(color) atIndex:0];
 
     [data.mtlcmdencoder setVertexBytes:points length:vertlen atIndex:0];
@@ -934,7 +1134,7 @@ METAL_RenderFillRects(SDL_Renderer * renderer, const SDL_FRect * rects, int coun
     // !!! FIXME: render color should live in a dedicated uniform buffer.
     const float color[4] = { ((float)renderer->r) / 255.0f, ((float)renderer->g) / 255.0f, ((float)renderer->b) / 255.0f, ((float)renderer->a) / 255.0f };
 
-    [data.mtlcmdencoder setRenderPipelineState:ChoosePipelineState(data, data.mtlpipelineprims, renderer->blendMode)];
+    [data.mtlcmdencoder setRenderPipelineState:ChoosePipelineState(data, data.pipelines, SDL_METAL_FRAGMENT_SOLID, renderer->blendMode)];
     [data.mtlcmdencoder setFragmentBytes:color length:sizeof(color) atIndex:0];
     [data.mtlcmdencoder setVertexBuffer:data.mtlbufconstants offset:CONSTANTS_OFFSET_IDENTITY atIndex:3];
 
@@ -955,6 +1155,29 @@ METAL_RenderFillRects(SDL_Renderer * renderer, const SDL_FRect * rects, int coun
     return 0;
 }}
 
+static void
+METAL_SetupRenderCopy(METAL_RenderData *data, SDL_Texture *texture, METAL_TextureData *texturedata)
+{
+    float color[4] = { 1.0f, 1.0f, 1.0f, 1.0f };
+    if (texture->modMode) {
+        color[0] = ((float)texture->r) / 255.0f;
+        color[1] = ((float)texture->g) / 255.0f;
+        color[2] = ((float)texture->b) / 255.0f;
+        color[3] = ((float)texture->a) / 255.0f;
+    }
+
+    [data.mtlcmdencoder setRenderPipelineState:ChoosePipelineState(data, data.pipelines, texturedata.fragmentFunction, texture->blendMode)];
+    [data.mtlcmdencoder setFragmentBytes:color length:sizeof(color) atIndex:0];
+    [data.mtlcmdencoder setFragmentSamplerState:texturedata.mtlsampler atIndex:0];
+
+    [data.mtlcmdencoder setFragmentTexture:texturedata.mtltexture atIndex:0];
+
+    if (texturedata.yuv || texturedata.nv12) {
+        [data.mtlcmdencoder setFragmentTexture:texturedata.mtltexture_uv atIndex:1];
+        [data.mtlcmdencoder setFragmentBuffer:data.mtlbufconstants offset:texturedata.conversionBufferOffset atIndex:1];
+    }
+}
+
 static int
 METAL_RenderCopy(SDL_Renderer * renderer, SDL_Texture * texture,
               const SDL_Rect * srcrect, const SDL_FRect * dstrect)
@@ -965,6 +1188,8 @@ METAL_RenderCopy(SDL_Renderer * renderer, SDL_Texture * texture,
     const float texw = (float) texturedata.mtltexture.width;
     const float texh = (float) texturedata.mtltexture.height;
 
+    METAL_SetupRenderCopy(data, texture, texturedata);
+
     const float xy[] = {
         dstrect->x, dstrect->y + dstrect->h,
         dstrect->x, dstrect->y,
@@ -979,21 +1204,9 @@ METAL_RenderCopy(SDL_Renderer * renderer, SDL_Texture * texture,
         normtex(srcrect->x + srcrect->w, texw), normtex(srcrect->y, texh)
     };
 
-    float color[4] = { 1.0f, 1.0f, 1.0f, 1.0f };
-    if (texture->modMode) {
-        color[0] = ((float)texture->r) / 255.0f;
-        color[1] = ((float)texture->g) / 255.0f;
-        color[2] = ((float)texture->b) / 255.0f;
-        color[3] = ((float)texture->a) / 255.0f;
-    }
-
-    [data.mtlcmdencoder setRenderPipelineState:ChoosePipelineState(data, data.mtlpipelinecopy, texture->blendMode)];
     [data.mtlcmdencoder setVertexBytes:xy length:sizeof(xy) atIndex:0];
     [data.mtlcmdencoder setVertexBytes:uv length:sizeof(uv) atIndex:1];
     [data.mtlcmdencoder setVertexBuffer:data.mtlbufconstants offset:CONSTANTS_OFFSET_IDENTITY atIndex:3];
-    [data.mtlcmdencoder setFragmentBytes:color length:sizeof(color) atIndex:0];
-    [data.mtlcmdencoder setFragmentTexture:texturedata.mtltexture atIndex:0];
-    [data.mtlcmdencoder setFragmentSamplerState:texturedata.mtlsampler atIndex:0];
     [data.mtlcmdencoder drawPrimitives:MTLPrimitiveTypeTriangleStrip vertexStart:0 vertexCount:4];
 
     return 0;
@@ -1012,6 +1225,8 @@ METAL_RenderCopyEx(SDL_Renderer * renderer, SDL_Texture * texture,
     float transform[16];
     float minu, maxu, minv, maxv;
 
+    METAL_SetupRenderCopy(data, texture, texturedata);
+
     minu = normtex(srcrect->x, texw);
     maxu = normtex(srcrect->x + srcrect->w, texw);
     minv = normtex(srcrect->y, texh);
@@ -1062,21 +1277,9 @@ METAL_RenderCopyEx(SDL_Renderer * renderer, SDL_Texture * texture,
         transform[13] = dstrect->y + center->y;
     }
 
-    float color[4] = { 1.0f, 1.0f, 1.0f, 1.0f };
-    if (texture->modMode) {
-        color[0] = ((float)texture->r) / 255.0f;
-        color[1] = ((float)texture->g) / 255.0f;
-        color[2] = ((float)texture->b) / 255.0f;
-        color[3] = ((float)texture->a) / 255.0f;
-    }
-
-    [data.mtlcmdencoder setRenderPipelineState:ChoosePipelineState(data, data.mtlpipelinecopy, texture->blendMode)];
     [data.mtlcmdencoder setVertexBytes:xy length:sizeof(xy) atIndex:0];
     [data.mtlcmdencoder setVertexBytes:uv length:sizeof(uv) atIndex:1];
     [data.mtlcmdencoder setVertexBytes:transform length:sizeof(transform) atIndex:3];
-    [data.mtlcmdencoder setFragmentBytes:color length:sizeof(color) atIndex:0];
-    [data.mtlcmdencoder setFragmentTexture:texturedata.mtltexture atIndex:0];
-    [data.mtlcmdencoder setFragmentSamplerState:texturedata.mtlsampler atIndex:0];
     [data.mtlcmdencoder drawPrimitives:MTLPrimitiveTypeTriangleStrip vertexStart:0 vertexCount:4];
 
     return 0;
@@ -1144,8 +1347,7 @@ METAL_DestroyRenderer(SDL_Renderer * renderer)
             [data.mtlcmdencoder endEncoding];
         }
 
-        DestroyPipelineCache(data.mtlpipelineprims);
-        DestroyPipelineCache(data.mtlpipelinecopy);
+        DestroyShaderPipelines(data.pipelines);
     }
 
     SDL_free(renderer);

+ 58 - 1
src/render/metal/SDL_shaders_metal.metal

@@ -16,7 +16,7 @@ vertex SolidVertexOutput SDL_Solid_vertex(const device float2 *position [[buffer
 {
     SolidVertexOutput v;
     v.position = (projection * transform) * float4(position[vid], 0.0f, 1.0f);
-    v.pointSize = 0.5f;
+    v.pointSize = 1.0f;
     return v;
 }
 
@@ -50,3 +50,60 @@ fragment float4 SDL_Copy_fragment(CopyVertexOutput vert [[stage_in]],
 {
     return tex.sample(s, vert.texcoord) * col;
 }
+
+struct YUVDecode
+{
+    float3 offset;
+    float3 Rcoeff;
+    float3 Gcoeff;
+    float3 Bcoeff;
+};
+
+fragment float4 SDL_YUV_fragment(CopyVertexOutput vert [[stage_in]],
+                                 constant float4 &col [[buffer(0)]],
+                                 constant YUVDecode &decode [[buffer(1)]],
+                                 texture2d<float> texY [[texture(0)]],
+                                 texture2d_array<float> texUV [[texture(1)]],
+                                 sampler s [[sampler(0)]])
+{
+    float3 yuv;
+    yuv.x = texY.sample(s, vert.texcoord).r;
+    yuv.y = texUV.sample(s, vert.texcoord, 0).r;
+    yuv.z = texUV.sample(s, vert.texcoord, 1).r;
+
+    yuv += decode.offset;
+
+    return col * float4(dot(yuv, decode.Rcoeff), dot(yuv, decode.Gcoeff), dot(yuv, decode.Bcoeff), 1.0);
+}
+
+fragment float4 SDL_NV12_fragment(CopyVertexOutput vert [[stage_in]],
+                                 constant float4 &col [[buffer(0)]],
+                                 constant YUVDecode &decode [[buffer(1)]],
+                                 texture2d<float> texY [[texture(0)]],
+                                 texture2d<float> texUV [[texture(1)]],
+                                 sampler s [[sampler(0)]])
+{
+    float3 yuv;
+    yuv.x = texY.sample(s, vert.texcoord).r;
+    yuv.yz = texUV.sample(s, vert.texcoord).rg;
+
+    yuv += decode.offset;
+
+    return col * float4(dot(yuv, decode.Rcoeff), dot(yuv, decode.Gcoeff), dot(yuv, decode.Bcoeff), 1.0);
+}
+
+fragment float4 SDL_NV21_fragment(CopyVertexOutput vert [[stage_in]],
+                                 constant float4 &col [[buffer(0)]],
+                                 constant YUVDecode &decode [[buffer(1)]],
+                                 texture2d<float> texY [[texture(0)]],
+                                 texture2d<float> texUV [[texture(1)]],
+                                 sampler s [[sampler(0)]])
+{
+    float3 yuv;
+    yuv.x = texY.sample(s, vert.texcoord).r;
+    yuv.yz = texUV.sample(s, vert.texcoord).gr;
+
+    yuv += decode.offset;
+
+    return col * float4(dot(yuv, decode.Rcoeff), dot(yuv, decode.Gcoeff), dot(yuv, decode.Bcoeff), 1.0);
+}

File diff suppressed because it is too large
+ 1310 - 700
src/render/metal/SDL_shaders_metal_ios.h


File diff suppressed because it is too large
+ 1314 - 704
src/render/metal/SDL_shaders_metal_osx.h


Some files were not shown because too many files changed in this diff