GR32.Blur.SelectiveGaussian.pas 48 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599
  1. unit GR32.Blur.SelectiveGaussian;
  2. (* ***** BEGIN LICENSE BLOCK *****
  3. * Version: MPL 1.1 or LGPL 2.1 with linking exception
  4. *
  5. * The contents of this file are subject to the Mozilla Public License Version
  6. * 1.1 (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. * http://www.mozilla.org/MPL/
  9. *
  10. * Software distributed under the License is distributed on an "AS IS" basis,
  11. * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  12. * for the specific language governing rights and limitations under the
  13. * License.
  14. *
  15. * Alternatively, the contents of this file may be used under the terms of the
  16. * Free Pascal modified version of the GNU Lesser General Public License
  17. * Version 2.1 (the "FPC modified LGPL License"), in which case the provisions
  18. * of this license are applicable instead of those above.
  19. * Please see the file LICENSE.txt for additional information concerning this
  20. * license.
  21. *
  22. * The Original Code is Selective Gaussian Blur for Graphics32
  23. *
  24. * The Initial Developer of the Original Code is
  25. * Mattias Andersson
  26. *
  27. * Portions created by the Initial Developer are Copyright (C) 2005
  28. * the Initial Developer. All Rights Reserved.
  29. *
  30. * ***** END LICENSE BLOCK ***** *)
  31. interface
  32. {$include GR32.inc}
  33. uses
  34. {$if not defined(FPC)}
  35. System.SysUtils, // Must be before GR32 so we get the correct PByteArray
  36. {$else}
  37. SysUtils,
  38. {$ifend}
  39. GR32;
  40. //------------------------------------------------------------------------------
  41. //
  42. // Selective Gaussian Blur
  43. //
  44. //------------------------------------------------------------------------------
  45. // Definition of Selective Gaussian Blur from the GIMP User Manual:
  46. //
  47. // The Selective Gaussian Blur filter performs a mathematical region-based
  48. // selection of the image in small chunks, and determines the level of detail
  49. // within that chunk. After this it applies a Gaussian-based blur to it.
  50. // Selective Gaussian Blur can be very processor intensive, but produces very
  51. // controlled blurring.
  52. //
  53. // The Blur Radius setting affects the maximum number of pixels considered for
  54. // blurring. The higher the setting, the higher the number of pixels that will
  55. // be included in the region analysis. Be aware that a higher setting will take
  56. // considerably longer to compute.
  57. //
  58. // The Delta affects the level of detail that will be blurred. A higher setting
  59. // here will produce more smoothing of the pixels in the radius.
  60. //
  61. // A common use for the Selective Gaussian Blur filter is smoothing areas
  62. // affected by populations of JPEG artifacts, or bad pixelization distortions.
  63. //------------------------------------------------------------------------------
  64. // Can, in theory, be used as (a bad) ordinary Gaussian Blur by specifying
  65. // Delta >= 255.
  66. //
  67. // Note that the selective blur, by design, does not blur the alpha channel.
  68. //------------------------------------------------------------------------------
  69. type
  70. TSelectiveGaussian32Proc = procedure(ASource, ADest: TBitmap32; Radius: TFloat; Delta: Integer);
  71. var
  72. SelectiveGaussianBlur32: TSelectiveGaussian32Proc;
  73. GammaSelectiveGaussianBlur32: TSelectiveGaussian32Proc;
  74. //------------------------------------------------------------------------------
  75. // Selective Gaussian Blur
  76. // Mattias Andersson, 2005
  77. //------------------------------------------------------------------------------
  78. // SIMD optimized versions
  79. //------------------------------------------------------------------------------
  80. // The performance of SelectiveGaussian1 is generally better than
  81. // SelectiveGaussian2 (25% faster on some images) but it also has a slightly
  82. // higher signal loss. For example:
  83. // - SelectiveGaussian1 on a solid color (value=255), yields value=253.
  84. // - SelectiveGaussian2 on a solid color (value=255), yields value=254.
  85. // This is generally not a problem since such a small difference isn't visible.
  86. //------------------------------------------------------------------------------
  87. procedure SelectiveGaussian1(Src, Dst: TBitmap32; Radius: TFloat; Delta: Integer);
  88. procedure SelectiveGaussianGamma1(Src, Dst: TBitmap32; Radius: TFloat; Delta: Integer);
  89. procedure SelectiveGaussian2(Src, Dst: TBitmap32; Radius: TFloat; Delta: Integer);
  90. procedure SelectiveGaussianGamma2(Src, Dst: TBitmap32; Radius: TFloat; Delta: Integer);
  91. type
  92. TSelectiveGaussianAccumulateProc = procedure(PSrc: PByteArray; PFact: PWordArray; Count, Min, Max: Integer; out Sum, FactSum: Cardinal);
  93. var
  94. SelectiveGaussianAccumulate: TSelectiveGaussianAccumulateProc;
  95. //------------------------------------------------------------------------------
  96. // Selective Gaussian Blur
  97. // Eric Grange, 2005
  98. //------------------------------------------------------------------------------
  99. // https://borland.public.delphi.language.basm.narkive.com/XiSH6pUn/anyone-up-for-a-selective-gaussian-optimization
  100. // https://web.archive.org/web/20240914225741/https://borland.public.delphi.language.basm.narkive.com/XiSH6pUn/anyone-up-for-a-selective-gaussian-optimization
  101. //------------------------------------------------------------------------------
  102. // Unoptimized reference implemention.
  103. //------------------------------------------------------------------------------
  104. procedure SelectiveGaussianGimp(Src, Dst: TBitmap32; Radius: TFloat; Delta: Integer);
  105. procedure SelectiveGaussianGimpGamma(Src, Dst: TBitmap32; Radius: TFloat; Delta: Integer);
  106. //------------------------------------------------------------------------------
  107. // Various Selective Gaussian Blur variations
  108. // Mattias Andersson, 2005
  109. //------------------------------------------------------------------------------
  110. // http://delphi.newswhat.com/geoxml/forumhistorythread?groupname=borland.public.delphi.language.basm&[email protected] (link dead)
  111. // https://groups.google.com/g/borland.public.delphi.language.basm/c/QXxiJZnIOa8/m/YMID8XaqzdsJ
  112. // https://web.archive.org/web/20240914232817/https://groups.google.com/g/borland.public.delphi.language.basm/c/QXxiJZnIOa8/m/YMID8XaqzdsJ
  113. //------------------------------------------------------------------------------
  114. // Unoptimized reference implementions.
  115. //------------------------------------------------------------------------------
  116. procedure SelectiveGaussianNew(Src, Dst: TBitmap32; Radius: TFloat; Delta: Integer);
  117. procedure SelectiveGaussianNewGamma(Src, Dst: TBitmap32; Radius: TFloat; Delta: Integer);
  118. procedure SelectiveGaussianHorzVert(Src, Dst: TBitmap32; Radius: TFloat; Delta: Integer); deprecated 'Destroys source pixels';
  119. //------------------------------------------------------------------------------
  120. //------------------------------------------------------------------------------
  121. //------------------------------------------------------------------------------
  122. implementation
  123. uses
  124. {$if not defined(FPC)}
  125. System.Math,
  126. System.SyncObjs, // TCriticalSection
  127. {$else}
  128. Math,
  129. SyncObjs, // TCriticalSection
  130. {$ifend}
  131. GR32_Gamma,
  132. GR32.Blur,
  133. GR32_Bindings,
  134. GR32_LowLevel,
  135. GR32_System,
  136. GR32_OrdinalMaps,
  137. GR32.Types.SIMD;
  138. // Ensure that we use the GR32.TFloat and not FPC's Math.TFloat (which is an alias for Double!)
  139. type
  140. TFloat = GR32.TFloat;
  141. PFloat = ^TFloat;
  142. //------------------------------------------------------------------------------
  143. //
  144. // PremultiplyLUT
  145. //
  146. //------------------------------------------------------------------------------
  147. // Lookup tables for alpha premultiplication.
  148. //
  149. // MulDiv255[a,b] = a * b / 255 Used for premultiplication
  150. // Mul255Div[a,b] = Round(a * 255 / b) Used for unpremultiplication
  151. //
  152. // where
  153. //
  154. // a: Color value
  155. // b: Alpha value
  156. //
  157. // PremultiplyLUT is used for pre- and unpremultiplication.
  158. // GammaPremultiplyLUT rolls the gamma correction and pre-/unpremultiplication
  159. // operations into one for a significant gain in precision at no extra cost in
  160. // performance.
  161. //
  162. //------------------------------------------------------------------------------
  163. type
  164. PPremultiplyLUT = ^TPremultiplyLUT;
  165. TPremultiplyLUT = record
  166. strict private
  167. class constructor Create;
  168. class destructor Destroy;
  169. const OneOver255 = 1 / 255;
  170. strict private class var
  171. FLock: TCriticalSection;
  172. FPremultiplyLUT: PPremultiplyLUT;
  173. FGammaPremultiplyLUT: PPremultiplyLUT;
  174. strict private
  175. FsRGB: boolean;
  176. FGamma: Double;
  177. FGammaInv: Double;
  178. procedure SetGamma(const GammaValue: Double; sRGB: boolean);
  179. procedure GammaChangedHandler;
  180. public type
  181. TLUT88 = array[byte, byte] of byte;
  182. public
  183. Mul255Div: TLUT88;
  184. MulDiv255: TLUT88;
  185. public
  186. class function PremultiplyLUT: PPremultiplyLUT; static;
  187. class function GammaPremultiplyLUT: PPremultiplyLUT; static;
  188. class procedure Apply(const LUT: TLUT88; Values, Alpha: PByteArray; Count: integer); static;
  189. property sRGB: boolean read FsRGB;
  190. property Gamma: Double read FGamma;
  191. end;
  192. //------------------------------------------------------------------------------
  193. class constructor TPremultiplyLUT.Create;
  194. begin
  195. FLock := TCriticalSection.Create;
  196. FPremultiplyLUT := nil;
  197. FGammaPremultiplyLUT := nil;
  198. end;
  199. class destructor TPremultiplyLUT.Destroy;
  200. begin
  201. FLock.Free;
  202. if (FPremultiplyLUT <> nil) then
  203. Dispose(FPremultiplyLUT);
  204. if (FGammaPremultiplyLUT <> nil) then
  205. begin
  206. UnregisterGammaChangeNotification(FGammaPremultiplyLUT.GammaChangedHandler);
  207. Dispose(FGammaPremultiplyLUT);
  208. end;
  209. end;
  210. //------------------------------------------------------------------------------
  211. class procedure TPremultiplyLUT.Apply(const LUT: TLUT88; Values, Alpha: PByteArray; Count: integer);
  212. begin
  213. while (Count > 0) do
  214. begin
  215. PByte(Values)^ := LUT[PByte(Values)^, PByte(Alpha)^];
  216. Inc(PByte(Values));
  217. Inc(PByte(Alpha));
  218. Dec(Count);
  219. end;
  220. end;
  221. //------------------------------------------------------------------------------
  222. class function TPremultiplyLUT.PremultiplyLUT: PPremultiplyLUT;
  223. var
  224. AlphaValue, ColorValue: Integer;
  225. begin
  226. if (FPremultiplyLUT = nil) then
  227. begin
  228. FLock.Acquire;
  229. if (FPremultiplyLUT = nil) then
  230. begin
  231. New(FPremultiplyLUT);
  232. for ColorValue := 0 to 255 do
  233. begin
  234. FPremultiplyLUT.Mul255Div[ColorValue, 0] := 0;
  235. FPremultiplyLUT.MulDiv255[ColorValue, 0] := 0;
  236. for AlphaValue := 1 to 255 do
  237. begin
  238. FPremultiplyLUT.Mul255Div[ColorValue, AlphaValue] := Clamp(Round(ColorValue * 255 / AlphaValue));
  239. FPremultiplyLUT.MulDiv255[ColorValue, AlphaValue] := Round(ColorValue * AlphaValue * OneOver255);
  240. end;
  241. end;
  242. end;
  243. FLock.Release;
  244. end;
  245. Result := FPremultiplyLUT;
  246. end;
  247. //------------------------------------------------------------------------------
  248. class function TPremultiplyLUT.GammaPremultiplyLUT: PPremultiplyLUT;
  249. begin
  250. if (FGammaPremultiplyLUT = nil) then
  251. begin
  252. FLock.Acquire;
  253. if (FGammaPremultiplyLUT = nil) then
  254. begin
  255. New(FGammaPremultiplyLUT);
  256. RegisterGammaChangeNotification(FGammaPremultiplyLUT.GammaChangedHandler);
  257. FGammaPremultiplyLUT.SetGamma(GAMMA_VALUE, GAMMA_IS_SRGB);
  258. end;
  259. FLock.Release;
  260. end;
  261. Result := FGammaPremultiplyLUT;
  262. end;
  263. procedure TPremultiplyLUT.GammaChangedHandler;
  264. begin
  265. SetGamma(GAMMA_VALUE, GAMMA_IS_SRGB);
  266. end;
  267. procedure TPremultiplyLUT.SetGamma(const GammaValue: Double; sRGB: boolean);
  268. var
  269. AlphaValue, ColorValue: Integer;
  270. n: Single;
  271. ColorLinear, ColorRGB: TFloat;
  272. begin
  273. if (FsRGB = sRGB) and ((FsRGB) or (GammaValue = FGamma)) then
  274. exit;
  275. FsRGB := sRGB;
  276. if (not FsRGB) then
  277. begin
  278. FGamma := GammaValue;
  279. FGammaInv := 1 / FGamma;
  280. end;
  281. for ColorValue := 0 to 255 do
  282. begin
  283. Mul255Div[ColorValue, 0] := 0;
  284. MulDiv255[ColorValue, 0] := 0;
  285. // sRGB -> Linear RGB / 255
  286. ColorLinear := ColorValue * OneOver255;
  287. if (FsRGB) then
  288. begin
  289. if (ColorLinear >= 0.04045) then
  290. ColorLinear := Power((ColorLinear + 0.055) * (1 / 1.055), 2.4)
  291. else
  292. ColorLinear := ColorLinear * (1 / 12.92);
  293. end else
  294. ColorLinear := Power(ColorLinear, FGamma);
  295. // ColorValue: Color, AlphaValue: Alpha
  296. for AlphaValue := 1 to 255 do
  297. begin
  298. // Linear RGB -> Premultiplied, Linear RGB
  299. n := ColorLinear * AlphaValue;
  300. MulDiv255[ColorValue, AlphaValue] := Round(n);
  301. // Premultiplied, Linear RGB -> Unpremultiplied, Linear RGB
  302. n := ColorValue / AlphaValue;
  303. // Linear RGB -> sRGB / 255
  304. if (FsRGB) then
  305. begin
  306. if (n >= 0.0031308) then
  307. ColorRGB := 1.055 * Power(n, 1 / 2.4) - 0.055
  308. else
  309. ColorRGB := n * 12.92;
  310. end else
  311. ColorRGB := Power(n, FGammaInv);
  312. Mul255Div[ColorValue, AlphaValue] := Clamp(Round(ColorRGB * 255));
  313. end;
  314. end;
  315. end;
  316. //------------------------------------------------------------------------------
  317. type
  318. TSingleDynArray = array of Single;
  319. function GaussianKernel(Radius: Single): TSingleDynArray;
  320. var
  321. i, R: Integer;
  322. StdDev, C1, C2: Single;
  323. begin
  324. R := Ceil(Radius);
  325. SetLength(Result, R + 1);
  326. StdDev := Radius * GaussianRadiusToSigma;
  327. C1 := 1.0 / Sqrt(2.0 * Pi * StdDev);
  328. C2 := -0.5 / Sqr(StdDev);
  329. for i := 0 to R do
  330. Result[i] := C1 * Exp(Sqr(i) * C2);
  331. end;
  332. //------------------------------------------------------------------------------
  333. //
  334. // The original implementation of selective gaussian blur (similar to the
  335. // one in GIMP).
  336. //
  337. //------------------------------------------------------------------------------
  338. // Adapted from Eric Grange's version which in turn was based on the source
  339. // of "Selective gaussian blur filter for the GIMP".
  340. //------------------------------------------------------------------------------
  341. procedure InternalSelectiveGaussianGimp(Src, Dst: TBitmap32; Radius: TFloat; Delta: Integer; const PremultiplyLUT: TPremultiplyLUT);
  342. var
  343. X, Y, Plane, WindowX, WindowY, R, MaxX, MaxY: Integer;
  344. MinValue, MaxValue: Integer;
  345. Kernel: TSingleDynArray;
  346. Sum, Fact, Weight: Single;
  347. RefColor: TColor32Entry;
  348. RefValue: Integer;
  349. SampleColor: TColor32Entry;
  350. SampleValue: Integer;
  351. DstValue: Byte;
  352. pDestColor: PColor32Entry;
  353. pDstLine, pSrcLine: PColor32Array;
  354. begin
  355. ASSERT(Src <> Dst);
  356. if (Radius < GaussianRadiusToSigma) or (Delta <= 0) or (Src.Empty) then
  357. begin
  358. Src.CopyMapTo(Dst);
  359. exit;
  360. end;
  361. R := Ceil(Radius);
  362. MaxX := Src.Width - 1;
  363. MaxY := Src.Height - 1;
  364. Dst.SetSizeFrom(Src);
  365. Kernel := GaussianKernel(Radius);
  366. try
  367. for Y := 0 to MaxY do
  368. begin
  369. pDstLine := Dst.ScanLine[Y];
  370. for X := 0 to MaxX do
  371. begin
  372. RefColor := TColor32Entry(Src[X, Y]);
  373. pDestColor := @(pDstLine[X]);
  374. // Process each of the RGB channels in turn
  375. for Plane := 0 to 2 do
  376. begin
  377. Sum := 0;
  378. Fact := 0;
  379. RefValue := RefColor.Planes[Plane];
  380. // Premultiply and gamma (sRGB->LinearRGB)
  381. RefValue := PremultiplyLUT.MulDiv255[RefValue, RefColor.A];
  382. MinValue := RefValue - Delta;
  383. MaxValue := RefValue + Delta;
  384. for WindowY := -R to R do
  385. begin
  386. if WindowY + Y < 0 then
  387. Continue;
  388. if WindowY + Y > MaxY then
  389. Break;
  390. pSrcLine := Src.ScanLine[WindowY + Y];
  391. for WindowX := -R to R do
  392. begin
  393. if WindowX + X < 0 then
  394. Continue;
  395. if WindowX + X > MaxX then
  396. Break;
  397. SampleColor := TColor32Entry(pSrcLine[WindowX + X]);
  398. SampleValue := SampleColor.Planes[Plane];
  399. // Premultiply and gamma (sRGB->LinearRGB)
  400. SampleValue := PremultiplyLUT.MulDiv255[SampleValue, SampleColor.A];
  401. if (SampleValue >= MinValue) and (SampleValue <= MaxValue) then
  402. begin
  403. Weight := Kernel[Abs(WindowX)] * Kernel[Abs(WindowY)];
  404. Sum := Sum + SampleValue * Weight;
  405. Fact := Fact + Weight;
  406. end;
  407. end;
  408. end;
  409. DstValue := FastRound(Sum / Fact); // TODO : Need to Clamp. Rounding errors can cause values to grow beyond 255
  410. // Unpremultiply and gamma (LinearRGB->sRGB)
  411. DstValue := PremultiplyLUT.Mul255Div[DstValue, RefColor.A];
  412. pDestColor.Planes[Plane] := DstValue;
  413. end;
  414. // Copy alpha
  415. pDestColor.A := RefColor.A;
  416. end;
  417. end;
  418. finally
  419. Kernel := nil;
  420. end;
  421. end;
  422. //------------------------------------------------------------------------------
  423. procedure SelectiveGaussianGimp(Src, Dst: TBitmap32; Radius: TFloat; Delta: Integer);
  424. var
  425. PremultiplyLUT: PPremultiplyLUT;
  426. begin
  427. PremultiplyLUT := TPremultiplyLUT.PremultiplyLUT;
  428. InternalSelectiveGaussianGimp(Src, Dst, Radius, Delta, PremultiplyLUT^);
  429. end;
  430. procedure SelectiveGaussianGimpGamma(Src, Dst: TBitmap32; Radius: TFloat; Delta: Integer);
  431. var
  432. PremultiplyLUT: PPremultiplyLUT;
  433. begin
  434. PremultiplyLUT := TPremultiplyLUT.GammaPremultiplyLUT;
  435. InternalSelectiveGaussianGimp(Src, Dst, Radius, Delta, PremultiplyLUT^);
  436. end;
  437. //------------------------------------------------------------------------------
  438. //
  439. // Optimized algorithm that performs horizontal and vertical blurring only
  440. // once for each reference color at a certain position (x, y). A table is
  441. // used for cacheing convolution sum of the reference color values that have
  442. // already been visited. Vertical blurring can then be performed in a
  443. // single pass by looking up already cached entries for a given reference
  444. // color (and we can thus take advantage of the fact the gaussian is
  445. // separable). In order to minimize required memory, the horizontal pass is
  446. // performed on one column at a time. This requires Src.Height * 2^BitDepth
  447. // bytes of memory, so it's a problem if we want to support 16-bit images.
  448. // Another problem with images with higher bit-depth is that there is a
  449. // smaller probability that adjacent colors have the same pixel values.
  450. //
  451. //------------------------------------------------------------------------------
  452. procedure InternalSelectiveGaussianNew(Src, Dst: TBitmap32; Radius: TFloat; Delta: Integer; const PremultiplyLUT: TPremultiplyLUT);
  453. type
  454. PCacheEntry = ^TCacheEntry;
  455. TCacheEntry = record
  456. Sum: Single; // intermediate sum of convolution
  457. Fact: Single; // intermediate sum of kernel weights
  458. end;
  459. var
  460. X, Y, Plane, WindowX, WindowY, LoY, SampleValue, R, MaxX, MaxY: Integer;
  461. MinValue, MaxValue: Integer;
  462. Kernel: TSingleDynArray;
  463. Sum, Fact, Weight: Single;
  464. Color: TColor32Entry;
  465. pColor: PColor32Entry;
  466. RefValue: Integer;
  467. PSrcLine: PColor32Array;
  468. PEntry: PCacheEntry;
  469. SumCache: array of array [Byte] of TCacheEntry;
  470. LastPos: array [Byte] of Integer;
  471. Value: Byte;
  472. begin
  473. ASSERT(Src <> Dst);
  474. if (Radius < GaussianRadiusToSigma) or (Delta <= 0) or (Src.Empty) then
  475. begin
  476. Src.CopyMapTo(Dst);
  477. exit;
  478. end;
  479. R := Ceil(Radius);
  480. MaxX := Src.Width - 1;
  481. MaxY := Src.Height - 1;
  482. Dst.SetSizeFrom(Src);
  483. SetLength(SumCache, Src.Height);
  484. Kernel := GaussianKernel(Radius);
  485. for X := 0 to MaxX do
  486. begin
  487. // Process each of the RGB channels in turn
  488. for Plane := 0 to 2 do
  489. begin
  490. FillLongword(LastPos[0], Length(LastPos), Cardinal(Low(Integer)));
  491. for Y := 0 to MaxY do
  492. begin
  493. Color := TColor32Entry(Src[X, Y]);
  494. RefValue := Color.Planes[Plane];
  495. // Premultiply and gamma (sRGB->LinearRGB)
  496. RefValue := PremultiplyLUT.MulDiv255[RefValue, Color.A];
  497. MinValue := RefValue - Delta;
  498. MaxValue := RefValue + Delta;
  499. if LastPos[RefValue] < Y - R then
  500. LoY := Y - R
  501. else
  502. LoY := LastPos[RefValue] + 1;
  503. for WindowY := LoY to Y + R do
  504. begin
  505. if WindowY < 0 then
  506. Continue;
  507. if WindowY > MaxY then
  508. Break;
  509. Sum := 0;
  510. Fact := 0;
  511. PSrcLine := Src.Scanline[WindowY];
  512. for WindowX := -R to R do
  513. begin
  514. if WindowX + X < 0 then
  515. Continue;
  516. if WindowX + X > MaxX then
  517. Break;
  518. Color := TColor32Entry(PSrcLine[WindowX + X]);
  519. SampleValue := Color.Planes[Plane];
  520. // Premultiply and gamma (sRGB->LinearRGB)
  521. SampleValue := PremultiplyLUT.MulDiv255[SampleValue, Color.A];
  522. if (SampleValue >= MinValue) and (SampleValue <= MaxValue) then
  523. begin
  524. Weight := Kernel[Abs(WindowX)];
  525. Sum := Sum + SampleValue * Weight;
  526. Fact := Fact + Weight;
  527. end;
  528. end;
  529. PEntry := @SumCache[WindowY][RefValue];
  530. PEntry.Sum := Sum;
  531. PEntry.Fact := Fact;
  532. end;
  533. LastPos[RefValue] := Y + R;
  534. end;
  535. for Y := 0 to MaxY do
  536. begin
  537. Color := TColor32Entry(Src[X, Y]);
  538. RefValue := Color.Planes[Plane];
  539. // Premultiply and gamma (sRGB->LinearRGB)
  540. RefValue := PremultiplyLUT.MulDiv255[RefValue, Color.A];
  541. Sum := 0;
  542. Fact := 0;
  543. for WindowY := -R to R do
  544. begin
  545. if WindowY + Y < 0 then
  546. Continue;
  547. if WindowY + Y > MaxY then
  548. Break;
  549. Weight := Kernel[Abs(WindowY)];
  550. PEntry := @SumCache[WindowY + Y][RefValue];
  551. Sum := Sum + PEntry.Sum * Weight;
  552. Fact := Fact + PEntry.Fact * Weight;
  553. end;
  554. pColor := PColor32Entry(Dst.PixelPtr[X, Y]);
  555. Value := Round(Sum / Fact);
  556. // Unpremultiply and gamma (LinearRGB->sRGB)
  557. Value := PremultiplyLUT.Mul255Div[Value, pColor.A];
  558. pColor.Planes[Plane] := Value;
  559. end;
  560. // Copy alpha
  561. for Y := 0 to MaxY do
  562. PColor32Entry(Dst.PixelPtr[X, Y]).A := TColor32Entry(Src[X, Y]).A;
  563. end;
  564. end;
  565. end;
  566. //------------------------------------------------------------------------------
  567. procedure SelectiveGaussianNew(Src, Dst: TBitmap32; Radius: TFloat; Delta: Integer);
  568. var
  569. PremultiplyLUT: PPremultiplyLUT;
  570. begin
  571. PremultiplyLUT := TPremultiplyLUT.PremultiplyLUT;
  572. InternalSelectiveGaussianNew(Src, Dst, Radius, Delta, PremultiplyLUT^);
  573. end;
  574. procedure SelectiveGaussianNewGamma(Src, Dst: TBitmap32; Radius: TFloat; Delta: Integer);
  575. var
  576. PremultiplyLUT: PPremultiplyLUT;
  577. begin
  578. PremultiplyLUT := TPremultiplyLUT.GammaPremultiplyLUT;
  579. InternalSelectiveGaussianNew(Src, Dst, Radius, Delta, PremultiplyLUT^);
  580. end;
  581. //------------------------------------------------------------------------------
  582. //
  583. // This algorithm performs selective blurring first horizontally (storing
  584. // the intermediate result in a bitmap), and then vertically. The output
  585. // is slightly different from ordinary gaussian selective blurring, but
  586. // the speed-up is significant.
  587. //
  588. //------------------------------------------------------------------------------
  589. procedure SelectiveGaussianHorzVert(Src, Dst: TBitmap32; Radius: TFloat; Delta: Integer);
  590. var
  591. X, Y, Plane, {LoU,} WindowX, WindowY, SampleValue, R, MaxX, MaxY: Integer;
  592. MinValue, MaxValue: Integer;
  593. Kernel: TSingleDynArray;
  594. Sum, Fact, Weight: Single;
  595. RefColor: TColor32;
  596. RefValue: Integer;
  597. PDstLine, PSrcLine: PColor32Array;
  598. begin
  599. ASSERT(Src <> Dst);
  600. if (Radius < GaussianRadiusToSigma) or (Delta <= 0) or (Src.Empty) then
  601. begin
  602. Src.CopyMapTo(Dst);
  603. exit;
  604. end;
  605. R := Ceil(Radius);
  606. MaxX := Src.Width - 1;
  607. MaxY := Src.Height - 1;
  608. Dst.SetSizeFrom(Src);
  609. Kernel := GaussianKernel(Radius);
  610. try
  611. for Y := 0 to MaxY do
  612. begin
  613. PDstLine := Dst.ScanLine[Y];
  614. PSrcLine := Src.ScanLine[Y];
  615. for X := 0 to MaxX do
  616. begin
  617. RefColor := Src[X, Y];
  618. for Plane := 0 to 2 do
  619. begin
  620. Sum := 0;
  621. Fact := 0;
  622. // TODO : Premultiply and gamma (sRGB->LinearRGB)
  623. RefValue := TColor32Entry(RefColor).Planes[Plane];
  624. MinValue := RefValue - Delta;
  625. MaxValue := RefValue + Delta;
  626. for WindowX := -R to R do
  627. begin
  628. if WindowX + X < 0 then
  629. Continue;
  630. if WindowX + X > MaxX then
  631. Break;
  632. SampleValue := TColor32Entry(PSrcLine[WindowX + X]).Planes[Plane];
  633. if (SampleValue >= MinValue) and (SampleValue <= MaxValue) then
  634. begin
  635. Weight := Kernel[Abs(WindowX)];
  636. Sum := Sum + SampleValue * Weight;
  637. Fact := Fact + Weight;
  638. end;
  639. end;
  640. TColor32Entry(PDstLine[X]).Planes[Plane] := Round(Sum / Fact);
  641. end;
  642. end;
  643. end;
  644. for Y := 0 to MaxY do
  645. begin
  646. // TODO : This is using the source bitmap as a temporary buffer
  647. // thus destroying the source bitmap!
  648. PDstLine := Src.ScanLine[Y];
  649. for X := 0 to MaxX do
  650. begin
  651. RefColor := Dst[X, Y];
  652. for Plane := 0 to 2 do
  653. begin
  654. Sum := 0;
  655. Fact := 0;
  656. RefValue := TColor32Entry(RefColor).Planes[Plane];
  657. MinValue := RefValue - Delta;
  658. MaxValue := RefValue + Delta;
  659. for WindowY := -R to R do
  660. begin
  661. if WindowY + Y < 0 then
  662. Continue;
  663. if WindowY + Y > MaxY then
  664. Break;
  665. SampleValue := TColor32Entry(Dst[X, WindowY + Y]).Planes[Plane];
  666. if (SampleValue >= MinValue) and (SampleValue <= MaxValue) then
  667. begin
  668. Weight := Kernel[Abs(WindowY)];
  669. Sum := Sum + SampleValue * Weight;
  670. Fact := Fact + Weight;
  671. end;
  672. end;
  673. // TODO : Unpremultiply and gamma (LinearRGB->sRGB)
  674. TColor32Entry(PDstLine[X]).Planes[Plane] := Round(Sum / Fact);
  675. end;
  676. TColor32Entry(PDstLine[X]).A := TColor32Entry(RefColor).A;
  677. end;
  678. end;
  679. Dst.Assign(Src);
  680. finally
  681. Kernel := nil;
  682. end;
  683. end;
  684. //------------------------------------------------------------------------------
  685. //
  686. // SIMD optimized Selective Gaussian Blur
  687. // Originally by Mattias Andersson
  688. //
  689. //------------------------------------------------------------------------------
  690. // Modified to also blur alpha channel.
  691. // Various fixes for size not mod 4 and buffer overflows.
  692. // Replaced MMX version with SSE2 version.
  693. //------------------------------------------------------------------------------
  694. type
  695. PCacheEntry = ^TCacheEntry;
  696. TCacheEntry = record
  697. Sum: Cardinal; // intermediate sum of convolution
  698. Fact: Cardinal; // intermediate sum of kernel weights
  699. end;
  700. PRangeEntry = ^TRangeEntry;
  701. TRangeEntry = packed record
  702. Min: Byte;
  703. Max: Byte;
  704. Sum: Cardinal;
  705. end;
  706. //------------------------------------------------------------------------------
  707. function GaussianKernelInt(Radius: Single): TArrayOfWord;
  708. var
  709. i, R: Integer;
  710. C: Single;
  711. begin
  712. R := Ceil(Radius);
  713. SetLength(Result, R * 2 + 1);
  714. C := -0.5 / Sqr(Radius * GaussianRadiusToSigma);
  715. // for i := -R to R do
  716. for i := 0 to R do
  717. begin
  718. // Result[R+i] := Round(255 * Exp(Sqr(i) * C));
  719. Result[R+i] := Round(255 * Exp(Sqr(i) * C));
  720. Result[R-i] := Result[R+i];
  721. end;
  722. end;
  723. //------------------------------------------------------------------------------
  724. procedure Accumulate_Pas(pSrc: PByteArray; pFact: PWordArray; Count, Min, Max: Integer; out Sum, FactSum: Cardinal);
  725. begin
  726. Sum := 0;
  727. FactSum := 0;
  728. while (Count > 0) do
  729. begin
  730. Dec(Count);
  731. if (pSrc[Count] > Min) and (pSrc[Count] < Max) then
  732. begin
  733. Sum := Sum + ((pSrc[Count] * pFact[Count]) shr 8);
  734. FactSum := FactSum + pFact[Count];
  735. end;
  736. end;
  737. end;
  738. {$if (not defined(PUREPASCAL)) and (not defined(OMIT_SSE2))}
  739. procedure Accumulate_SSE2(pSrc: Pointer; pFact: Pointer; Count, Min, Max: Integer; out Sum, FactSum: Cardinal); //{$IFDEF FPC} assembler; {$ENDIF}
  740. // Parameters (x86):
  741. // EAX <- pSrc
  742. // EDX <- pFact
  743. // ECX <- Count
  744. // Stack[0] <- Min
  745. // Stack[1] <- Max
  746. // Stack[2] <- @Sum
  747. // Stack[3] <- @FactSum
  748. //
  749. // Parameters (x64):
  750. // RCX <- pSrc
  751. // RDX <- pFact
  752. // R8 <- Count
  753. // R9 <- Min
  754. // Stack[0] <- Max
  755. // Stack[1] <- @Sum
  756. // Stack[2] <- @FactSum
  757. // SSE register usage:
  758. // XMM0: Min | Min | Min | Min
  759. // XMM1: Max | Max | Max | Max
  760. // XMM2: Four pSrc bytes
  761. // XMM3: Four pFact words
  762. // XMM4: "Misc"
  763. // XMM5: Sum
  764. // XMM6: FactSum
  765. // XMM7: "Zero"
  766. {$if defined(TARGET_x64) and defined(FPC)}begin{$ifend}
  767. asm
  768. {$if defined(TARGET_x64)}
  769. {$IFNDEF FPC}
  770. .SAVENV XMM4
  771. .SAVENV XMM5
  772. .SAVENV XMM6
  773. .SAVENV XMM7
  774. {$ENDIF}
  775. {$elseif defined(TARGET_x86)}
  776. // nothing
  777. {$else}
  778. {$message fatal 'Unsupported target'}
  779. {$ifend}
  780. {$IFDEF FPC}
  781. {$define RETARD_COMPILER} // Just to make it clear what I think of FPC's assembler
  782. {$ENDIF}
  783. // initialize
  784. // M0 := Min;
  785. MOVD XMM0, Min
  786. PUNPCKLWD XMM0, XMM0 // Unpack Low Data ([ab][cd] -> [acbd])
  787. PUNPCKLDQ XMM0, XMM0
  788. // M1 := Max;
  789. MOVD XMM1, Max
  790. PUNPCKLWD XMM1, XMM1
  791. PUNPCKLDQ XMM1, XMM1
  792. PXOR XMM5, XMM5
  793. PXOR XMM6, XMM6
  794. PXOR XMM7, XMM7
  795. // Negative offset "trick"
  796. {$if defined(TARGET_x86)}
  797. LEA pSrc, [pSrc+Count]
  798. LEA pFact, [pFact+Count*2]
  799. NEG Count
  800. {$elseif defined(TARGET_x64)}
  801. {$IFNDEF RETARD_COMPILER}
  802. LEA pSrc, [pSrc+R8]
  803. LEA pFact, [pFact+R8*2]
  804. {$ELSE}
  805. LEA ECX, [RCX+R8]
  806. LEA EDX, [RDX+R8*2]
  807. {$ENDIF}
  808. NEG R8
  809. {$ifend}
  810. // if (Count mod 4 = 0) then goto :ProcessFours
  811. {$if defined(TARGET_x86)}
  812. TEST Count, $0003
  813. {$elseif defined(TARGET_x64)}
  814. TEST R8, $0003
  815. {$ifend}
  816. JZ @ProcessFours
  817. // Process Count/4 remainders
  818. {$if defined(TARGET_x86)}
  819. PUSH EBX
  820. PUSH EDI
  821. {$ifend}
  822. @NextOne:
  823. {$if defined(TARGET_x86)}
  824. // if (pSrc[Count] <= Min) or (pSrc[Count] >= Max) then goto :SkipOne
  825. MOVZX EBX, BYTE PTR[pSrc+Count] // Load single byte
  826. CMP Min, EBX
  827. JGE @SkipOne
  828. CMP EBX, Max
  829. JGE @SkipOne
  830. // Sum := Sum + ((pSrc[Count] * pFact[Count]) shr 8);
  831. MOVZX EDI, WORD PTR[pFact+Count*2] // Load single word
  832. IMUL EBX, EDI
  833. SHR EBX, 8
  834. MOVD XMM2, EBX
  835. PADDD XMM5, XMM2
  836. // FactSum := FactSum + pFact[Count];
  837. MOVD XMM3, EDI
  838. PADDD XMM6, XMM3
  839. {$elseif defined(TARGET_x64)}
  840. // if (pSrc[Count] <= Min) or (pSrc[Count] >= Max) then goto :SkipOne
  841. {$IFNDEF RETARD_COMPILER}
  842. MOVZX R10D, BYTE PTR[pSrc+R8] // Load single byte
  843. {$ELSE}
  844. MOVZX R10D, BYTE PTR[RCX+R8] // Load single byte
  845. {$ENDIF}
  846. {$IFNDEF RETARD_COMPILER}
  847. CMP Min, R10D
  848. {$ELSE}
  849. CMP R9D, R10D
  850. {$ENDIF}
  851. JGE @SkipOne
  852. CMP R10D, Max
  853. JGE @SkipOne
  854. // Sum := Sum + ((pSrc[Count] * pFact[Count]) shr 8);
  855. {$IFNDEF RETARD_COMPILER}
  856. MOVZX R11D, WORD PTR[pFact+R8*2] // Load single word
  857. {$ELSE}
  858. MOVZX R11D, WORD PTR[RDX+R8*2] // Load single word
  859. {$ENDIF}
  860. IMUL R10D, R11D
  861. SHR R10D, 8
  862. MOVD XMM2, R10D
  863. PADDD XMM5, XMM2
  864. // FactSum := FactSum + pFact[Count];
  865. MOVD XMM3, R11D
  866. PADDD XMM6, XMM3
  867. {$ifend}
  868. @SkipOne:
  869. {$if defined(TARGET_x86)}
  870. INC Count
  871. {$elseif defined(TARGET_x64)}
  872. INC R8
  873. {$ifend}
  874. // if (Count mod 4 <> 0) then goto :NextOne
  875. {$if defined(TARGET_x86)}
  876. TEST Count, $0003
  877. {$elseif defined(TARGET_x64)}
  878. TEST R8, $0003
  879. {$ifend}
  880. JNZ @NextOne
  881. {$if defined(TARGET_x86)}
  882. POP EDI
  883. POP EBX
  884. {$ifend}
  885. @ProcessFours:
  886. // Count := Count div 4
  887. // if (Count = 0) then goto :Done
  888. {$if defined(TARGET_x86)}
  889. SAR Count, 2
  890. JCXZ @Done
  891. {$elseif defined(TARGET_x64)}
  892. SAR R8, 2
  893. JZ @Done
  894. {$ifend}
  895. // loop start
  896. @Loop:
  897. // if (pSrc[Count] > Min) and (pSrc[Count] < Max) then
  898. // begin
  899. // Sum := Sum + pSrc[Count] * pFact[Count];
  900. // FactSum := FactSum + pFact[Count];
  901. // end;
  902. // M2 := pSrc[Count];
  903. {$if defined(TARGET_x86)}
  904. MOVD XMM2, DWORD PTR [pSrc+Count*4] // Load four bytes
  905. {$elseif defined(TARGET_x64)}
  906. {$IFNDEF RETARD_COMPILER}
  907. MOVD XMM2, DWORD PTR [pSrc+R8*4] // Load four bytes
  908. {$ELSE}
  909. MOVD XMM2, DWORD PTR [RCX+R8*4] // Load four bytes
  910. {$ENDIF}
  911. {$ifend}
  912. PUNPCKLBW XMM2, XMM7
  913. // M3 := pFact[Count];
  914. {$if defined(TARGET_x86)}
  915. MOVQ XMM3, QWORD PTR [pFact+Count*8] // Load four words
  916. {$elseif defined(TARGET_x64)}
  917. {$IFNDEF RETARD_COMPILER}
  918. MOVQ XMM3, QWORD PTR [pFact+R8*8] // Load four words
  919. {$ELSE}
  920. MOVQ XMM3, QWORD PTR [RDX+R8*8] // Load four words
  921. {$ENDIF}
  922. {$ifend}
  923. // store threshold mask in MM4
  924. // M4 := M2;
  925. MOVQ XMM4, XMM2
  926. // if (M4 > Min) then M4 := $FF else M4 := $00;
  927. PCMPGTW XMM4, XMM0 // Compare Packed Signed Integers for Greater Than
  928. // M4 := M4 and Max;
  929. PAND XMM4, XMM1
  930. // if (M4 > M2) then M4 := $FF else M4 := $00;
  931. PCMPGTW XMM4, XMM2
  932. // mask colors and weights
  933. // M2 := M2 and M4;
  934. PAND XMM2, XMM4
  935. // M3 := M3 and M4;
  936. PAND XMM3, XMM4
  937. // multiply colors and weights
  938. // M2 := M2 * M3;
  939. PMULLW XMM2, XMM3
  940. // Clear lower byte of four words
  941. {$if (not defined(FPC)) or (not defined(TARGET_X64))}
  942. PAND XMM2, DQWORD PTR [SSE_FF00FF00_ALIGNED]
  943. {$else}
  944. PAND XMM2, DQWORD PTR [rip+SSE_FF00FF00_ALIGNED]
  945. {$ifend}
  946. // perform accumulation
  947. // M5 := M5 + M2;
  948. PSADBW XMM2, XMM7 // Compute Sum of Absolute Differences
  949. // This sums the four ((M2 * M3) shl 8).
  950. PADDD XMM5, XMM2
  951. // M6 := M6 + M3;
  952. PSADBW XMM3, XMM7
  953. PADDD XMM6, XMM3
  954. // loop end
  955. {$if defined(TARGET_x86)}
  956. INC Count
  957. {$elseif defined(TARGET_x64)}
  958. INC R8
  959. {$ifend}
  960. JNZ @Loop
  961. @Done:
  962. {$if defined(TARGET_x86)}
  963. MOV EAX, Sum
  964. MOVD DWORD PTR [EAX], XMM5
  965. MOV EAX, FactSum
  966. MOVD DWORD PTR [EAX], XMM6
  967. {$elseif defined(TARGET_x64)}
  968. MOV RAX, Sum
  969. MOVD DWORD PTR [RAX], XMM5
  970. MOV RAX, FactSum
  971. MOVD DWORD PTR [RAX], XMM6
  972. {$ifend}
  973. {$if defined(TARGET_x64) and defined(FPC)}end['XMM4', 'XMM5', 'XMM6', 'XMM7'];{$ifend}
  974. end;
  975. {$ifend}
  976. //------------------------------------------------------------------------------
  977. procedure InternalSelectiveGaussian1(Src, Dst: TBitmap32; Radius: TFloat; Delta: Integer; const PremultiplyLUT: TPremultiplyLUT);
  978. const
  979. SourcePlanes: array[0..2] of TConversionType = (ctBlue, ctGreen, ctRed);
  980. var
  981. X, Y, WindowY, MinX: NativeInt; // NativeInt required on FPC to support negative array offset
  982. Plane, LoY, R, MaxX, MaxY: Integer;
  983. XCount, ColCount: Integer;
  984. MinValue, MaxValue: Integer;
  985. Kernel: TArrayOfWord;
  986. PKernel: PWordArray;
  987. VSum, VFact, Sum: Cardinal;
  988. Weight, Fact, CFact: Cardinal;
  989. RefValue: Cardinal;
  990. CacheEntry: PCacheEntry;
  991. Value: Cardinal;
  992. pColor: PColor32Entry;
  993. SumCache: array of array [Byte] of TCacheEntry;
  994. LastPos: array [Byte] of Integer;
  995. Map: array[Low(SourcePlanes)..High(SourcePlanes)] of TByteMap;
  996. pMap: PByteArray;
  997. begin
  998. ASSERT(Src <> Dst);
  999. if (Radius < GaussianRadiusToSigma) or (Delta <= 0) or (Src.Empty) then
  1000. begin
  1001. Src.CopyMapTo(Dst);
  1002. exit;
  1003. end;
  1004. R := Ceil(Radius);
  1005. MaxX := Src.Width - 1;
  1006. MaxY := Src.Height - 1;
  1007. ColCount := Src.Width;
  1008. Dst.SetSizeFrom(Src);
  1009. SetLength(SumCache, Src.Height);
  1010. Kernel := GaussianKernelInt(Radius);
  1011. PKernel := PWordArray(@Kernel[R]); // Note: Pointer to midpoint
  1012. for Plane := Low(Map) to High(Map) do
  1013. Map[Plane] := TByteMap.Create;
  1014. try
  1015. // Load RGB into separate maps
  1016. for Plane := Low(Map) to High(Map) do
  1017. begin
  1018. Map[Plane].ReadFrom(Src, SourcePlanes[Plane]);
  1019. // Premultiply and gamma (sRGB->LinearRGB)
  1020. pMap := Map[Plane].Bits;
  1021. for X := 0 to Src.Width*Src.Height-1 do
  1022. begin
  1023. TColor32Entry(Dst.Bits[X]).A := TColor32Entry(Src.Bits[X]).A;
  1024. pMap[X] := PremultiplyLUT.MulDiv255[pMap[X], TColor32Entry(Src.Bits[X]).A];
  1025. end;
  1026. end;
  1027. CFact := 0;
  1028. for X := -R to R do
  1029. CFact := CFact + PKernel[X];
  1030. for X := 0 to MaxX do
  1031. begin
  1032. MinX := Max(-R, -X);
  1033. XCount := Min(R, MaxX - X) - MinX;
  1034. // Process each channel in turn
  1035. for Plane := Low(Map) to High(Map) do
  1036. begin
  1037. pMap := PByteArray(Map[Plane].ValPtr[X + MinX, 0]);
  1038. FillLongword(LastPos[0], Length(LastPos), Cardinal(Low(Integer)));
  1039. for Y := 0 to MaxY do
  1040. begin
  1041. RefValue := pMap[Y * ColCount - MinX];
  1042. MinValue := integer(RefValue) - Delta;
  1043. MaxValue := integer(RefValue) + Delta;
  1044. if LastPos[RefValue] < Y - R then
  1045. begin
  1046. if Y < R then
  1047. LoY := 0
  1048. else
  1049. LoY := Y - R
  1050. end else
  1051. LoY := LastPos[RefValue] + 1;
  1052. VSum := 0;
  1053. VFact := 0;
  1054. for WindowY := Y - R to LoY - 1 do
  1055. begin
  1056. if WindowY < 0 then
  1057. Continue;
  1058. Weight := PKernel[WindowY - Y];
  1059. CacheEntry := @SumCache[WindowY][RefValue];
  1060. VSum := VSum + CacheEntry.Sum * Weight;
  1061. VFact := VFact + CacheEntry.Fact * Weight;
  1062. end;
  1063. for WindowY := LoY to Y + R do
  1064. begin
  1065. if WindowY > MaxY then
  1066. Break;
  1067. SelectiveGaussianAccumulate(@pMap[WindowY * ColCount], @PKernel[MinX], XCount, MinValue, MaxValue, Sum, Fact);
  1068. CacheEntry := @SumCache[WindowY][RefValue];
  1069. CacheEntry.Sum := Sum;
  1070. CacheEntry.Fact := Fact;
  1071. Weight := PKernel[WindowY - Y];
  1072. VSum := VSum + Sum * Weight;
  1073. VFact := VFact + Fact * Weight;
  1074. end;
  1075. LastPos[RefValue] := Min(Y + R, MaxY);
  1076. Value := 0;
  1077. pColor := PColor32Entry(Dst.PixelPtr[X, Y]);
  1078. // Note:
  1079. // It's tempting to lessen the rounding error by doing a
  1080. // "VSum shl 8" below instead of a "VFact shr 8" here.
  1081. // Unfortunately that can cause an overflow because
  1082. // "VSum shl 8" overflows 31 bits and turn the result
  1083. // negative.
  1084. // In order to avoid this overflow we need the Sum variables
  1085. // to be unsigned so we can use all 32 bits.
  1086. // Old:
  1087. // VFact := VFact shr 8;
  1088. // Value := VSum div (VFact shr 8);
  1089. // New:
  1090. // Value := (VSum shl 8) div VFact;
  1091. if (VSum <> 0) and (VFact <> 0) then
  1092. begin
  1093. // We could improve the precision and lessen the signal loss by
  1094. // doing a Round instead of a Div here, but it only improves
  1095. // the loss slightly and it absolutely kills the performance.
  1096. // Value := Round((VSum shl 8) / VFact);
  1097. Value := (VSum shl 8) div VFact;
  1098. // Unpremultiply and gamma (LinearRGB->sRGB)
  1099. Value := PremultiplyLUT.Mul255Div[Value, pColor.A];
  1100. end;
  1101. pColor.Planes[Plane] := Value;
  1102. end;
  1103. end;
  1104. end;
  1105. finally
  1106. for Plane := Low(Map) to High(Map) do
  1107. Map[Plane].Free;
  1108. end;
  1109. end;
  1110. //------------------------------------------------------------------------------
  1111. procedure SelectiveGaussian1(Src, Dst: TBitmap32; Radius: TFloat; Delta: Integer);
  1112. var
  1113. PremultiplyLUT: PPremultiplyLUT;
  1114. begin
  1115. PremultiplyLUT := TPremultiplyLUT.PremultiplyLUT;
  1116. InternalSelectiveGaussian1(Src, Dst, Radius, Delta, PremultiplyLUT^);
  1117. end;
  1118. procedure SelectiveGaussianGamma1(Src, Dst: TBitmap32; Radius: TFloat; Delta: Integer);
  1119. var
  1120. PremultiplyLUT: PPremultiplyLUT;
  1121. begin
  1122. PremultiplyLUT := TPremultiplyLUT.GammaPremultiplyLUT;
  1123. InternalSelectiveGaussian1(Src, Dst, Radius, Delta, PremultiplyLUT^);
  1124. end;
  1125. //------------------------------------------------------------------------------
  1126. procedure InternalSelectiveGaussian2(Src, Dst: TBitmap32; Radius: TFloat; Delta: Integer; const PremultiplyLUT: TPremultiplyLUT);
  1127. const
  1128. SourcePlanes: array[0..2] of TConversionType = (ctBlue, ctGreen, ctRed);
  1129. var
  1130. KernelMinX, KernelMaxX, WindowX, WindowY: NativeInt;
  1131. Plane, LoY, R: Integer;
  1132. X, Y, MaxX, MaxY: integer;
  1133. MinValue, MaxValue: integer;
  1134. ColCount: integer;
  1135. Kernel: TArrayOfWord;
  1136. PKernel: PWordArray;
  1137. VSum, VFact, Sum: Cardinal;
  1138. Weight, Fact, CFact: Cardinal;
  1139. RefValue: integer;
  1140. PSrcLine: PByteArray;
  1141. CacheEntry: PCacheEntry;
  1142. RangeEntry: PRangeEntry;
  1143. pColor: PColor32Entry;
  1144. SampleValue: integer;
  1145. Value: Cardinal;
  1146. SumCache: array of array [Byte] of TCacheEntry;
  1147. RangeCache: array of TRangeEntry;
  1148. LastPos: array [Byte] of Integer;
  1149. Map: array[Low(SourcePlanes)..High(SourcePlanes)] of TByteMap;
  1150. PMap: PByteArray;
  1151. begin
  1152. ASSERT(Src <> Dst);
  1153. if (Radius < GaussianRadiusToSigma) or (Delta <= 0) or (Src.Empty) then
  1154. begin
  1155. Src.CopyMapTo(Dst);
  1156. exit;
  1157. end;
  1158. R := Ceil(Radius);
  1159. MaxX := Src.Width - 1;
  1160. MaxY := Src.Height - 1;
  1161. ColCount := Src.Width;
  1162. Dst.SetSizeFrom(Src);
  1163. SetLength(SumCache, Src.Height);
  1164. SetLength(RangeCache, Src.Height);
  1165. Kernel := GaussianKernelInt(Radius);
  1166. PKernel := PWordArray(@Kernel[R]);
  1167. for Plane := Low(Map) to High(Map) do
  1168. Map[Plane] := TByteMap.Create;
  1169. try
  1170. // Load RGB into separate maps
  1171. for Plane := Low(Map) to High(Map) do
  1172. begin
  1173. Map[Plane].ReadFrom(Src, SourcePlanes[Plane]);
  1174. // Premultiply and gamma (sRGB->LinearRGB)
  1175. pMap := Map[Plane].Bits;
  1176. for X := 0 to Src.Width*Src.Height-1 do
  1177. begin
  1178. TColor32Entry(Dst.Bits[X]).A := TColor32Entry(Src.Bits[X]).A;
  1179. pMap[X] := PremultiplyLUT.MulDiv255[pMap[X], TColor32Entry(Src.Bits[X]).A];
  1180. end;
  1181. end;
  1182. CFact := 0;
  1183. for WindowX := -R to R do
  1184. CFact := CFact + PKernel[WindowX];
  1185. for X := 0 to MaxX do
  1186. begin
  1187. KernelMinX := Max(-R, -X);
  1188. KernelMaxX := Min(R, MaxX - X);
  1189. // Process each channel in turn
  1190. for Plane := Low(Map) to High(Map) do
  1191. begin
  1192. PMap := PByteArray(Map[Plane].ValPtr[X, 0]);
  1193. // compute range entries
  1194. for Y := 0 to MaxY do
  1195. begin
  1196. PSrcLine := PByteArray(Map[Plane].ValPtr[X, Y]);
  1197. Sum := 0;
  1198. MinValue := 255;
  1199. MaxValue := 0;
  1200. for WindowX := KernelMinX to KernelMaxX do
  1201. begin
  1202. SampleValue := PSrcLine[WindowX];
  1203. Sum := Sum + Cardinal(PKernel[WindowX] * SampleValue);
  1204. if SampleValue < MinValue then
  1205. MinValue := SampleValue;
  1206. if SampleValue > MaxValue then
  1207. MaxValue := SampleValue;
  1208. end;
  1209. RangeEntry := @RangeCache[Y];
  1210. RangeEntry.Min := MinValue;
  1211. RangeEntry.Max := MaxValue;
  1212. RangeEntry.Sum := Sum shr 8;
  1213. end;
  1214. FillLongword(LastPos[0], Length(LastPos), Cardinal(Low(Integer)));
  1215. for Y := 0 to MaxY do
  1216. begin
  1217. RefValue := PMap[Y * ColCount];
  1218. MinValue := RefValue - Delta;
  1219. MaxValue := RefValue + Delta;
  1220. if LastPos[RefValue] < Y - R then
  1221. begin
  1222. if Y < R then
  1223. LoY := 0
  1224. else
  1225. LoY := Y - R
  1226. end else
  1227. LoY := LastPos[RefValue] + 1;
  1228. VSum := 0;
  1229. VFact := 0;
  1230. for WindowY := Y - R to LoY - 1 do
  1231. begin
  1232. if WindowY < 0 then
  1233. Continue;
  1234. Weight := PKernel[WindowY - Y];
  1235. CacheEntry := @SumCache[WindowY][RefValue];
  1236. VSum := VSum + CacheEntry.Sum * Weight;
  1237. VFact := VFact + CacheEntry.Fact * Weight;
  1238. end;
  1239. for WindowY := LoY to Y + R do
  1240. begin
  1241. if WindowY > MaxY then
  1242. break;
  1243. RangeEntry := @RangeCache[WindowY];
  1244. if (RangeEntry.Min < MinValue) or (RangeEntry.Max > MaxValue) then
  1245. begin
  1246. SelectiveGaussianAccumulate(@PMap[WindowY * ColCount + KernelMinX], @PKernel[KernelMinX], KernelMaxX - KernelMinX, MinValue, MaxValue, Sum, Fact);
  1247. end else
  1248. begin
  1249. Sum := RangeEntry.Sum;
  1250. Fact := CFact;
  1251. if X - R < 0 then
  1252. for WindowX := X to R - 1 do
  1253. Fact := Fact - PKernel[WindowX + 1]
  1254. else
  1255. if X + R > MaxX then
  1256. for WindowX := MaxX - X to R - 1 do
  1257. Fact := Fact - PKernel[WindowX + 1];
  1258. end;
  1259. CacheEntry := @SumCache[WindowY][RefValue];
  1260. CacheEntry.Sum := Sum;
  1261. CacheEntry.Fact := Fact;
  1262. Weight := PKernel[WindowY - Y];
  1263. VSum := VSum + Sum * Weight;
  1264. VFact := VFact + Fact * Weight;
  1265. end;
  1266. LastPos[RefValue] := Min(Y + R, MaxY);
  1267. Value := 0;
  1268. pColor := PColor32Entry(Dst.PixelPtr[X, Y]);
  1269. // Disabled: We do a "VSum shl 8" below instead.
  1270. // See comment in InternalSelectiveGaussian2.
  1271. // VFact := VFact shr 8;
  1272. if (VSum <> 0) and (VFact <> 0) then
  1273. begin
  1274. Value := (VSum shl 8) div VFact;
  1275. // Unpremultiply and gamma (LinearRGB->sRGB)
  1276. Value := PremultiplyLUT.Mul255Div[Value, pColor.A];
  1277. end;
  1278. pColor.Planes[Plane] := Value;
  1279. end;
  1280. end;
  1281. end;
  1282. finally
  1283. for Plane := Low(Map) to High(Map) do
  1284. Map[Plane].Free;
  1285. end;
  1286. end;
  1287. //------------------------------------------------------------------------------
  1288. procedure SelectiveGaussian2(Src, Dst: TBitmap32; Radius: TFloat; Delta: Integer);
  1289. var
  1290. PremultiplyLUT: PPremultiplyLUT;
  1291. begin
  1292. PremultiplyLUT := TPremultiplyLUT.PremultiplyLUT;
  1293. InternalSelectiveGaussian2(Src, Dst, Radius, Delta, PremultiplyLUT^);
  1294. end;
  1295. procedure SelectiveGaussianGamma2(Src, Dst: TBitmap32; Radius: TFloat; Delta: Integer);
  1296. var
  1297. PremultiplyLUT: PPremultiplyLUT;
  1298. begin
  1299. PremultiplyLUT := TPremultiplyLUT.GammaPremultiplyLUT;
  1300. InternalSelectiveGaussian2(Src, Dst, Radius, Delta, PremultiplyLUT^);
  1301. end;
  1302. //------------------------------------------------------------------------------
  1303. //
  1304. // Bindings
  1305. //
  1306. //------------------------------------------------------------------------------
  1307. procedure SelectiveGaussian32NotImplemented(ASource, ADest: TBitmap32; Radius: TFloat; Delta: Integer);
  1308. begin
  1309. raise Exception.Create('This blur function has not been implemented');
  1310. end;
  1311. procedure RegisterBindings;
  1312. begin
  1313. BlurRegistry.RegisterBinding(@@SelectiveGaussianBlur32, 'SelectiveGaussianBlur32');
  1314. BlurRegistry.RegisterBinding(@@GammaSelectiveGaussianBlur32, 'GammaSelectiveGaussianBlur32');
  1315. BlurRegistry.RegisterBinding(@@SelectiveGaussianAccumulate, 'SelectiveGaussianAccumulate');
  1316. (*
  1317. ** SelectiveGaussianAccumulate
  1318. *)
  1319. BlurRegistry[@@SelectiveGaussianAccumulate].Add(@Accumulate_Pas, [isPascal]).Name := 'Accumulate_Pas';
  1320. {$if (not defined(PUREPASCAL)) and (not defined(OMIT_SSE2))}
  1321. BlurRegistry[@@SelectiveGaussianAccumulate].Add(@Accumulate_SSE2, [isSSE2]).Name := 'Accumulate_SSE2';
  1322. {$ifend}
  1323. // Implementation ordered by performance:
  1324. // 1. SelectiveGaussian1
  1325. // 2. SelectiveGaussian2
  1326. // 3. SelectiveGaussianHorzVert (destructive, disqualified)
  1327. // 4. SelectiveGaussianNew
  1328. // 5. SelectiveGaussianGimp
  1329. (*
  1330. ** SelectiveGaussianBlur32
  1331. *)
  1332. BlurRegistry[@@SelectiveGaussianBlur32].Add(@SelectiveGaussianGimp, [isPascal], 1024).Name := 'SelectiveGaussianGimp';
  1333. BlurRegistry[@@SelectiveGaussianBlur32].Add(@SelectiveGaussianNew, [isPascal], 768).Name := 'SelectiveGaussianNew';
  1334. BlurRegistry[@@SelectiveGaussianBlur32].Add(@SelectiveGaussian2, [isPascal], 0).Name := 'SelectiveGaussian2';
  1335. BlurRegistry[@@SelectiveGaussianBlur32].Add(@SelectiveGaussian1, [isPascal], -256).Name := 'SelectiveGaussian1';
  1336. (*
  1337. ** GammaSelectiveGaussianBlur32
  1338. *)
  1339. BlurRegistry[@@GammaSelectiveGaussianBlur32].Add(@SelectiveGaussianGimpGamma, [isPascal], 1024).Name := 'SelectiveGaussianGimpGamma';
  1340. BlurRegistry[@@GammaSelectiveGaussianBlur32].Add(@SelectiveGaussianNewGamma, [isPascal], 768).Name := 'SelectiveGaussianNewGamma';
  1341. BlurRegistry[@@GammaSelectiveGaussianBlur32].Add(@SelectiveGaussianGamma2, [isPascal], 0).Name := 'SelectiveGaussianGamma2';
  1342. BlurRegistry[@@GammaSelectiveGaussianBlur32].Add(@SelectiveGaussianGamma1, [isPascal], -256).Name := 'SelectiveGaussianGamma1';
  1343. (*
  1344. ** Rebind the above bindings
  1345. *)
  1346. BlurRegistry[@@SelectiveGaussianBlur32].Rebind;
  1347. BlurRegistry[@@GammaSelectiveGaussianBlur32].Rebind;
  1348. BlurRegistry[@@SelectiveGaussianAccumulate].Rebind;
  1349. end;
  1350. initialization
  1351. SelectiveGaussianBlur32 := SelectiveGaussian32NotImplemented;
  1352. GammaSelectiveGaussianBlur32 := SelectiveGaussian32NotImplemented;
  1353. RegisterBindings;
  1354. end.