Selaa lähdekoodia

add polygon result

Youngmin Baek 6 vuotta sitten
vanhempi
commit
e08622d157
2 muutettua tiedostoa jossa 170 lisäystä ja 7 poistoa
  1. 159 2
      craft_utils.py
  2. 11 5
      test.py

+ 159 - 2
craft_utils.py

@@ -8,6 +8,13 @@ import numpy as np
 import cv2
 import math
 
+""" auxilary functions """
+# unwarp corodinates
+def warpCoord(Minv, pt):
+    out = np.matmul(Minv, (pt[0], pt[1], 1))
+    return np.array([out[0]/out[2], out[1]/out[2]])
+""" end of auxilary functions """
+
 
 def getDetBoxes_core(textmap, linkmap, text_threshold, link_threshold, low_text):
     # prepare data
@@ -71,11 +78,161 @@ def getDetBoxes_core(textmap, linkmap, text_threshold, link_threshold, low_text)
 
     return det, labels, mapper
 
+def getPoly_core(boxes, labels, mapper, linkmap):
+    # configs
+    num_cp = 5
+    max_len_ratio = 0.7
+    expand_ratio = 1.45
+    max_r = 2.0
+    step_r = 0.2
+
+    polys = []  
+    for k, box in enumerate(boxes):
+        # size filter for small instance
+        w, h = int(np.linalg.norm(box[0] - box[1]) + 1), int(np.linalg.norm(box[1] - box[2]) + 1)
+        if w < 30 or h < 30:
+            polys.append(None); continue
+
+        # warp image
+        tar = np.float32([[0,0],[w,0],[w,h],[0,h]])
+        M = cv2.getPerspectiveTransform(box, tar)
+        word_label = cv2.warpPerspective(labels, M, (w, h), flags=cv2.INTER_NEAREST)
+        try:
+            Minv = np.linalg.inv(M)
+        except:
+            polys.append(None); continue
+
+        # binarization for selected label
+        cur_label = mapper[k]
+        word_label[word_label != cur_label] = 0
+        word_label[word_label > 0] = 1
+
+        """ Polygon generation """
+        # find top/bottom contours
+        cp = []
+        max_len = -1
+        for i in range(w):
+            region = np.where(word_label[:,i] != 0)[0]
+            if len(region) < 2 : continue
+            cp.append((i, region[0], region[-1]))
+            length = region[-1] - region[0] + 1
+            if length > max_len: max_len = length
+
+        # pass if max_len is similar to h
+        if h * max_len_ratio < max_len:
+            polys.append(None); continue
+
+        # get pivot points with fixed length
+        tot_seg = num_cp * 2 + 1
+        seg_w = w / tot_seg     # segment width
+        pp = [None] * num_cp    # init pivot points
+        cp_section = [[0, 0]] * tot_seg
+        seg_height = [0] * num_cp
+        seg_num = 0
+        num_sec = 0
+        prev_h = -1
+        for i in range(0,len(cp)):
+            (x, sy, ey) = cp[i]
+            if (seg_num + 1) * seg_w <= x and seg_num <= tot_seg:
+                # average previous segment
+                if num_sec == 0: break
+                cp_section[seg_num] = [cp_section[seg_num][0] / num_sec, cp_section[seg_num][1] / num_sec]
+                num_sec = 0
+
+                # reset variables
+                seg_num += 1
+                prev_h = -1
+
+            # accumulate center points
+            cy = (sy + ey) * 0.5
+            cur_h = ey - sy + 1
+            cp_section[seg_num] = [cp_section[seg_num][0] + x, cp_section[seg_num][1] + cy]
+            num_sec += 1
+
+            if seg_num % 2 == 0: continue # No polygon area
+
+            if prev_h < cur_h:
+                pp[int((seg_num - 1)/2)] = (x, cy)
+                seg_height[int((seg_num - 1)/2)] = cur_h
+                prev_h = cur_h
+
+        # processing last segment
+        if num_sec != 0:
+            cp_section[-1] = [cp_section[-1][0] / num_sec, cp_section[-1][1] / num_sec]
+
+        # pass if num of pivots is not sufficient or segment widh is smaller than character height 
+        if None in pp or seg_w < np.max(seg_height) * 0.25:
+            polys.append(None); continue
+
+        # calc median maximum of pivot points
+        half_char_h = np.median(seg_height) * expand_ratio / 2
+
+        # calc gradiant and apply to make horizontal pivots
+        new_pp = []
+        for i, (x, cy) in enumerate(pp):
+            dx = cp_section[i * 2 + 2][0] - cp_section[i * 2][0]
+            dy = cp_section[i * 2 + 2][1] - cp_section[i * 2][1]
+            if dx == 0:     # gradient if zero
+                new_pp.append([x, cy - half_char_h, x, cy + half_char_h])
+                continue
+            rad = - math.atan2(dy, dx)
+            c, s = half_char_h * math.cos(rad), half_char_h * math.sin(rad)
+            new_pp.append([x - s, cy - c, x + s, cy + c])
+
+        # get edge points to cover character heatmaps
+        isSppFound, isEppFound = False, False
+        grad_s = (pp[1][1] - pp[0][1]) / (pp[1][0] - pp[0][0]) + (pp[2][1] - pp[1][1]) / (pp[2][0] - pp[1][0])
+        grad_e = (pp[-2][1] - pp[-1][1]) / (pp[-2][0] - pp[-1][0]) + (pp[-3][1] - pp[-2][1]) / (pp[-3][0] - pp[-2][0])
+        for r in np.arange(0.5, max_r, step_r):
+            dx = 2 * half_char_h * r
+            if not isSppFound:
+                line_img = np.zeros(word_label.shape, dtype=np.uint8)
+                dy = grad_s * dx
+                p = np.array(new_pp[0]) - np.array([dx, dy, dx, dy])
+                cv2.line(line_img, (int(p[0]), int(p[1])), (int(p[2]), int(p[3])), 1, thickness=1)
+                if np.sum(np.logical_and(word_label, line_img)) == 0 or r + 2 * step_r >= max_r:
+                    spp = p
+                    isSppFound = True
+            if not isEppFound:
+                line_img = np.zeros(word_label.shape, dtype=np.uint8)
+                dy = grad_e * dx
+                p = np.array(new_pp[-1]) + np.array([dx, dy, dx, dy])
+                cv2.line(line_img, (int(p[0]), int(p[1])), (int(p[2]), int(p[3])), 1, thickness=1)
+                if np.sum(np.logical_and(word_label, line_img)) == 0 or r + 2 * step_r >= max_r:
+                    epp = p
+                    isEppFound = True
+            if isSppFound and isEppFound:
+                break
+
+        # pass if boundary of polygon is not found
+        if not (isSppFound and isEppFound):
+            polys.append(None); continue
+
+        # make final polygon
+        poly = []
+        poly.append(warpCoord(Minv, (spp[0], spp[1])))
+        for p in new_pp:
+            poly.append(warpCoord(Minv, (p[0], p[1])))
+        poly.append(warpCoord(Minv, (epp[0], epp[1])))
+        poly.append(warpCoord(Minv, (epp[2], epp[3])))
+        for p in reversed(new_pp):
+            poly.append(warpCoord(Minv, (p[2], p[3])))
+        poly.append(warpCoord(Minv, (spp[2], spp[3])))
+
+        # add to final result
+        polys.append(np.array(poly))
+
+    return polys
 
-def getDetBoxes(textmap, linkmap, text_threshold, link_threshold, low_text):
+def getDetBoxes(textmap, linkmap, text_threshold, link_threshold, low_text, poly=False):
     boxes, labels, mapper = getDetBoxes_core(textmap, linkmap, text_threshold, link_threshold, low_text)
 
-    return boxes
+    if poly:
+        polys = getPoly_core(boxes, labels, mapper, linkmap)
+    else:
+        polys = [None] * len(boxes)
+
+    return boxes, polys
 
 def adjustResultCoordinates(polys, ratio_w, ratio_h, ratio_net = 2):
     if len(polys) > 0:

+ 11 - 5
test.py

@@ -50,6 +50,7 @@ parser.add_argument('--link_threshold', default=0.4, type=float, help='link conf
 parser.add_argument('--cuda', default=True, type=str2bool, help='Use cuda to train model')
 parser.add_argument('--canvas_size', default=1280, type=int, help='image size for inference')
 parser.add_argument('--mag_ratio', default=1.5, type=float, help='image magnification ratio')
+parser.add_argument('--poly', default=False, action='store_true', help='enable polygon type')
 parser.add_argument('--show_time', default=False, action='store_true', help='show processing time')
 parser.add_argument('--test_folder', default='/data/', type=str, help='folder path to input images')
 
@@ -63,7 +64,7 @@ result_folder = './result/'
 if not os.path.isdir(result_folder):
     os.mkdir(result_folder)
 
-def test_net(net, image, text_threshold, link_threshold, low_text, cuda):
+def test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly):
     t0 = time.time()
 
     # resize
@@ -88,8 +89,13 @@ def test_net(net, image, text_threshold, link_threshold, low_text, cuda):
     t1 = time.time()
 
     # Post-processing
-    boxes = craft_utils.getDetBoxes(score_text, score_link, text_threshold, link_threshold, low_text)
+    boxes, polys = craft_utils.getDetBoxes(score_text, score_link, text_threshold, link_threshold, low_text, poly)
+
+    # coordinate adjustment
     boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
+    polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)
+    for k in range(len(polys)):
+        if polys[k] is None: polys[k] = boxes[k]
 
     t1 = time.time() - t1
 
@@ -100,7 +106,7 @@ def test_net(net, image, text_threshold, link_threshold, low_text, cuda):
 
     if args.show_time : print("\ninfer/postproc time : {:.3f}/{:.3f}".format(t0, t1))
 
-    return boxes, ret_score_text
+    return boxes, polys, ret_score_text
 
 
 
@@ -128,13 +134,13 @@ if __name__ == '__main__':
         print("Test image {:d}/{:d}: {:s}".format(k+1, len(image_list), image_path), end='\r')
         image = imgproc.loadImage(image_path)
 
-        bboxes, score_text = test_net(net, image, args.text_threshold, args.link_threshold, args.low_text, args.cuda)
+        bboxes, polys, score_text = test_net(net, image, args.text_threshold, args.link_threshold, args.low_text, args.cuda, args.poly)
 
         # save score text
         filename, file_ext = os.path.splitext(os.path.basename(image_path))
         mask_file = result_folder + "/res_" + filename + '_mask.jpg'
         cv2.imwrite(mask_file, score_text)
 
-        file_utils.saveResult(image_path, image[:,:,::-1], bboxes, dirname=result_folder)
+        file_utils.saveResult(image_path, image[:,:,::-1], polys, dirname=result_folder)
 
     print("elapsed time : {}s".format(time.time() - t))