ImagingBinary.pas 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442
  1. {
  2. Vampyre Imaging Library
  3. by Marek Mauder
  4. https://github.com/galfar/imaginglib
  5. https://imaginglib.sourceforge.io
  6. - - - - -
  7. This Source Code Form is subject to the terms of the Mozilla Public
  8. License, v. 2.0. If a copy of the MPL was not distributed with this
  9. file, You can obtain one at https://mozilla.org/MPL/2.0.
  10. }
  11. { Unit with operations on binary images. Binary images in Imaging are
  12. ifGray8 images where pixels with value 0 are considered off, an pixels > 0
  13. are on.
  14. Note: Native ifBinary data format was later added to Imaging. However,
  15. these functions still use ifGray8 for representation for less complex
  16. and faster processing. ifBinary is meant moreless like interchange
  17. format for IO file formats. }
  18. unit ImagingBinary;
  19. {$I ImagingOptions.inc}
  20. interface
  21. uses
  22. Types, ImagingTypes, Imaging, ImagingFormats, ImagingUtility;
  23. type
  24. { Basic morphologic operators.}
  25. TMorphologyOp = (
  26. moErode, // Erosion
  27. moDilate // Dilatation
  28. );
  29. { Structuring element for morphology operations. Use ones and
  30. zeroes to define your struct elements.}
  31. TStructElement = array of array of Byte;
  32. TCalcSkewAngleStats = record
  33. PixelCount: Integer;
  34. TestedPixels: Integer;
  35. AccumulatorSize: Integer;
  36. AccumulatedCounts: Integer;
  37. BestCount: Integer;
  38. end;
  39. PCalcSkewAngleStats = ^TCalcSkewAngleStats;
  40. { Thresholding using Otsu's method (which chooses the threshold
  41. to minimize the intraclass variance of the black and white pixels!).
  42. Functions returns calculated threshold level value [0..255].
  43. If BinarizeImage is True then the Image is automatically converted to binary using
  44. computed threshold level.}
  45. function OtsuThresholding(var Image: TImageData; BinarizeImage: Boolean = False): Integer;
  46. { Applies basic morphology operators (Erode/Dilate) on Image using given structuring element
  47. Strel. You can do composite operations (Open/Close) by calling this function
  48. twice each time with different operator.}
  49. procedure Morphology(var Image: TImageData; const Strel: TStructElement; Op: TMorphologyOp);
  50. { Calculates rotation angle for given 8bit grayscale image.
  51. Useful for finding skew of scanned documents etc.
  52. Uses Hough transform internally.
  53. MaxAngle is maximal (abs. value) expected skew angle in degrees (to speed things up)
  54. and Threshold (0..255) is used to classify pixel as black (text) or white (background).
  55. Area of interest rectangle can be defined to restrict the detection to
  56. work only in defined part of image (useful when the document has text only in
  57. smaller area of page and non-text features outside the area confuse the rotation detector).
  58. Various calculations stats can be retrieved by passing Stats parameter.}
  59. function CalcRotationAngle(MaxAngle: Integer; Threshold: Integer;
  60. Width, Height: Integer; Pixels: PByteArray; DetectionArea: PRect = nil;
  61. Stats: PCalcSkewAngleStats = nil): Double;
  62. { Deskews given image. Finds rotation angle and rotates image accordingly.
  63. Works best on low-color document-like images (scans).
  64. MaxAngle is maximal (abs. value) expected skew angle in degrees (to speed things up)
  65. and Threshold (0..255) is used to classify pixel as black (text) or white (background).
  66. If Threshold=-1 then auto threshold calculated by OtsuThresholding is used.}
  67. procedure DeskewImage(var Image: TImageData; MaxAngle: Integer = 10; Threshold: Integer = -1);
  68. implementation
  69. function OtsuThresholding(var Image: TImageData; BinarizeImage: Boolean): Integer;
  70. var
  71. Histogram: array[Byte] of Single;
  72. Level, Max, Min, I, J, NumPixels: Integer;
  73. Pix: PByte;
  74. Mean, Variance: Single;
  75. Mu, Omega, LevelMean, LargestMu: Single;
  76. begin
  77. Assert(Image.Format = ifGray8);
  78. FillChar(Histogram, SizeOf(Histogram), 0);
  79. Min := 255;
  80. Max := 0;
  81. Level := 0;
  82. NumPixels := Image.Width * Image.Height;
  83. Pix := Image.Bits;
  84. // Compute histogram and determine min and max pixel values
  85. for I := 0 to NumPixels - 1 do
  86. begin
  87. Histogram[Pix^] := Histogram[Pix^] + 1.0;
  88. if Pix^ < Min then
  89. Min := Pix^;
  90. if Pix^ > Max then
  91. Max := Pix^;
  92. Inc(Pix);
  93. end;
  94. // Normalize histogram
  95. for I := 0 to 255 do
  96. Histogram[I] := Histogram[I] / NumPixels;
  97. // Compute image mean and variance
  98. Mean := 0.0;
  99. Variance := 0.0;
  100. for I := 0 to 255 do
  101. Mean := Mean + (I + 1) * Histogram[I];
  102. for I := 0 to 255 do
  103. Variance := Variance + Sqr(I + 1 - Mean) * Histogram[I];
  104. // Now finally compute threshold level
  105. LargestMu := 0;
  106. for I := 0 to 255 do
  107. begin
  108. Omega := 0.0;
  109. LevelMean := 0.0;
  110. for J := 0 to I - 1 do
  111. begin
  112. Omega := Omega + Histogram[J];
  113. LevelMean := LevelMean + (J + 1) * Histogram[J];
  114. end;
  115. Mu := Sqr(Mean * Omega - LevelMean);
  116. Omega := Omega * (1.0 - Omega);
  117. if Omega > 0.0 then
  118. Mu := Mu / Omega
  119. else
  120. Mu := 0;
  121. if Mu > LargestMu then
  122. begin
  123. LargestMu := Mu;
  124. Level := I;
  125. end;
  126. end;
  127. if BinarizeImage then
  128. begin
  129. // Do thresholding using computed level
  130. Pix := Image.Bits;
  131. for I := 0 to Image.Width * Image.Height - 1 do
  132. begin
  133. if Pix^ >= Level then
  134. Pix^ := 255
  135. else
  136. Pix^ := 0;
  137. Inc(Pix);
  138. end;
  139. end;
  140. Result := Level;
  141. end;
  142. procedure Morphology(var Image: TImageData; const Strel: TStructElement; Op: TMorphologyOp);
  143. var
  144. X, Y, I, J: Integer;
  145. SWidth, SHeight, PixCount, PixVal, NumOnes, PosX, PosY: Integer;
  146. ImgOut: TImageData;
  147. OutPix: PByte;
  148. begin
  149. Assert(Image.Format = ifGray8);
  150. Assert((Length(Strel) > 0) and (Length(Strel[0]) > 0));
  151. SWidth := Length(Strel);
  152. SHeight := Length(Strel[0]);
  153. NumOnes := 0;
  154. if Op = moErode then
  155. begin
  156. // We need to know number of ones in the strel for erosion
  157. for I := 0 to SWidth - 1 do
  158. for J := 0 to SHeight - 1 do
  159. NumOnes := NumOnes + Strel[I, J];
  160. end;
  161. InitImage(ImgOut);
  162. NewImage(Image.Width, Image.Height, ifGray8, ImgOut);
  163. OutPix := ImgOut.Bits;
  164. for J := 0 to Image.Height - 1 do
  165. for I := 0 to Image.Width - 1 do
  166. begin
  167. PixCount := 0;
  168. for X := 0 to SWidth - 1 do
  169. begin
  170. PosX := ClampInt(X + I - SWidth div 2, 0, Image.Width - 1);
  171. for Y := 0 to SHeight - 1 do
  172. begin
  173. PosY := ClampInt(Y + J - SHeight div 2, 0, Image.Height - 1);
  174. if (PosX >= 0) and (PosX < Image.Width) and
  175. (PosY >= 0) and (PosY < Image.Height) then
  176. begin
  177. PixVal := PByteArray(Image.Bits)[PosY * Image.Width + PosX];
  178. end
  179. else
  180. PixVal := 0;
  181. if (Strel[X, Y] > 0) and (PixVal > 0) then
  182. Inc(PixCount);
  183. end;
  184. end;
  185. case Op of
  186. moErode: OutPix^ := Iff(PixCount = NumOnes, 255, 0);
  187. moDilate: OutPix^ := Iff(PixCount > 0, 255, 0);
  188. end;
  189. Inc(OutPix);
  190. end;
  191. FreeImage(Image);
  192. Image := ImgOut;
  193. end;
  194. function CalcRotationAngle(MaxAngle: Integer; Threshold: Integer;
  195. Width, Height: Integer; Pixels: PByteArray; DetectionArea: PRect; Stats: PCalcSkewAngleStats): Double;
  196. const
  197. // Number of "best" lines we take into account when determining
  198. // resulting rotation angle (lines with most votes).
  199. BestLinesCount = 20;
  200. // Angle step used in alpha parameter quantization
  201. AlphaStep = 0.1;
  202. type
  203. TLine = record
  204. Count: Integer;
  205. Index: Integer;
  206. Alpha: Double;
  207. D: Double;
  208. end;
  209. TLineArray = array of TLine;
  210. var
  211. AlphaStart, MinD, SumAngles: Double;
  212. AlphaSteps, DCount, AccumulatorSize, I, AccumulatedCounts: Integer;
  213. BestLines: TLineArray;
  214. HoughAccumulator: array of Integer;
  215. PageWidth, PageHeight: Integer;
  216. ContentRect: TRect;
  217. // Classifies pixel at [X, Y] as black or white using threshold.
  218. function IsPixelBlack(X, Y: Integer): Boolean;
  219. begin
  220. Result := Pixels[Y * Width + X] < Threshold;
  221. end;
  222. // Calculates alpha parameter for given angle step.
  223. function GetAlpha(Index: Integer): Double;
  224. begin
  225. Result := AlphaStart + Index * AlphaStep;
  226. end;
  227. function CalcDIndex(D: Double): Integer;
  228. begin
  229. Result := Trunc(D - MinD);
  230. end;
  231. // Calculates angle and distance parameters for all lines
  232. // going through point [X, Y].
  233. procedure CalcLines(X, Y: Integer);
  234. var
  235. D, Rads: Double;
  236. I, DIndex, Index: Integer;
  237. begin
  238. for I := 0 to AlphaSteps - 1 do
  239. begin
  240. Rads := GetAlpha(I) * Pi / 180; // Angle for current step in radians
  241. D := Y * Cos(Rads) - X * Sin(Rads); // Parameter D of the line y=tg(alpha)x + d
  242. DIndex := CalcDIndex(D);
  243. Index := DIndex * AlphaSteps + I;
  244. HoughAccumulator[Index] := HoughAccumulator[Index] + 1; // Add one vote for current line
  245. end;
  246. end;
  247. // Uses Hough transform to calculate all lines that intersect
  248. // interesting points (those classified as being on base line of the text).
  249. procedure CalcHoughTransform;
  250. var
  251. Y, X: Integer;
  252. begin
  253. for Y := 0 to PageHeight - 1 do
  254. for X := 0 to PageWidth - 1 do
  255. begin
  256. if IsPixelBlack(ContentRect.Left + X, ContentRect.Top + Y) and
  257. not IsPixelBlack(ContentRect.Left + X, ContentRect.Top + Y + 1) then
  258. begin
  259. CalcLines(X, Y);
  260. end;
  261. end;
  262. end;
  263. // Chooses "best" lines (with the most votes) from the accumulator
  264. function GetBestLines(Count: Integer): TLineArray;
  265. var
  266. I, J, DIndex, AlphaIndex: Integer;
  267. Temp: TLine;
  268. begin
  269. SetLength(Result, Count);
  270. for I := 0 to AccumulatorSize - 1 do
  271. begin
  272. if HoughAccumulator[I] > Result[Count - 1].Count then
  273. begin
  274. // Current line has more votes than the last selected one,
  275. // let's put it the pot
  276. Result[Count - 1].Count := HoughAccumulator[I];
  277. Result[Count - 1].Index := I;
  278. J := Count - 1;
  279. // Sort the lines based on number of votes
  280. while (J > 0) and (Result[J].Count > Result[J - 1].Count) do
  281. begin
  282. Temp := Result[J];
  283. Result[J] := Result[J - 1];
  284. Result[J - 1] := Temp;
  285. J := J - 1;
  286. end;
  287. end;
  288. AccumulatedCounts := AccumulatedCounts + HoughAccumulator[I];
  289. end;
  290. for I := 0 to Count - 1 do
  291. begin
  292. // Calculate line angle and distance according to index in the accumulator
  293. DIndex := Result[I].Index div AlphaSteps;
  294. AlphaIndex := Result[I].Index - DIndex * AlphaSteps;
  295. Result[I].Alpha := GetAlpha(AlphaIndex);
  296. Result[I].D := DIndex + MinD;
  297. end;
  298. end;
  299. begin
  300. AccumulatedCounts := 0;
  301. // Use supplied page content rect or just the whole image
  302. ContentRect := Rect(0, 0, Width, Height);
  303. if DetectionArea <> nil then
  304. begin
  305. Assert((RectWidth(DetectionArea^) <= Width) and (RectHeight(DetectionArea^) <= Height));
  306. ContentRect := DetectionArea^;
  307. end;
  308. PageWidth := ContentRect.Right - ContentRect.Left;
  309. PageHeight := ContentRect.Bottom - ContentRect.Top;
  310. AlphaStart := -MaxAngle;
  311. AlphaSteps := Round(2 * MaxAngle / AlphaStep); // Number of angle steps = samples from interval <-MaxAngle, MaxAngle>
  312. MinD := -PageWidth;
  313. DCount := 2 * (PageWidth + PageHeight);
  314. // Determine the size of line accumulator
  315. AccumulatorSize := DCount * AlphaSteps;
  316. SetLength(HoughAccumulator, AccumulatorSize);
  317. // Calculate Hough transform
  318. CalcHoughTransform;
  319. // Get the best lines with most votes
  320. BestLines := GetBestLines(BestLinesCount);
  321. // Average angles of the selected lines to get the rotation angle of the image
  322. SumAngles := 0;
  323. for I := 0 to BestLinesCount - 1 do
  324. SumAngles := SumAngles + BestLines[I].Alpha;
  325. Result := SumAngles / BestLinesCount;
  326. if Stats <> nil then
  327. begin
  328. Stats.BestCount := BestLines[0].Count;
  329. Stats.PixelCount := PageWidth * PageHeight;
  330. Stats.AccumulatorSize := AccumulatorSize;
  331. Stats.AccumulatedCounts := AccumulatedCounts;
  332. Stats.TestedPixels := AccumulatedCounts div AlphaSteps;
  333. end;
  334. end;
  335. procedure DeskewImage(var Image: TImageData; MaxAngle: Integer; Threshold: Integer);
  336. var
  337. Angle: Double;
  338. OutputImage: TImageData;
  339. Info: TImageFormatInfo;
  340. begin
  341. if not TestImage(Image) then
  342. raise EImagingBadImage.Create;
  343. // Clone input image and convert it to 8bit grayscale. This will be our
  344. // working image.
  345. CloneImage(Image, OutputImage);
  346. ConvertImage(Image, ifGray8);
  347. if Threshold < 0 then
  348. begin
  349. // Determine the threshold automatically if needed.
  350. Threshold := OtsuThresholding(Image);
  351. end;
  352. // Main step - calculate image rotation angle
  353. Angle := CalcRotationAngle(MaxAngle, Threshold, Image.Width, Image.Height, Image.Bits);
  354. // Finally, rotate the image. We rotate the original input image, not the working
  355. // one so the color space is preserved.
  356. GetImageFormatInfo(OutputImage.Format, Info);
  357. if Info.IsIndexed or Info.IsSpecial then
  358. ConvertImage(OutputImage, ifA8R8G8B8); // Rotation doesn't like indexed and compressed images
  359. RotateImage(OutputImage, Angle);
  360. FreeImage(Image);
  361. Image := OutputImage;
  362. end;
  363. {
  364. File Notes:
  365. -- TODOS ----------------------------------------------------
  366. - nothing now
  367. -- 0.77 -------------------------------------------------------
  368. - OtsuThresholding signature changed, now it's a function and
  369. always returns the computed level.
  370. - Extended CalcRotationAngle, added margins and stats.
  371. - Added CalcRotationAngle and DeskewImage functions.
  372. -- 0.25.0 Changes/Bug Fixes -----------------------------------
  373. - Unit created with basic stuff (otsu and erode/dilate morphology ops).
  374. }
  375. end.