Merge branch 'dygraph' of https://github.com/PaddlePaddle/PaddleOCR i…

…nto tablemaster
akshaysanil · Jul 4, 2022 · 929ee46 · 929ee46
2 parents 7c92b44 + cec3464
commit 929ee46
Show file tree

Hide file tree

Showing 377 changed files with 14,545 additions and 2,539 deletions.
diff --git a/PPOCRLabel/PPOCRLabel.py b/PPOCRLabel/PPOCRLabel.py
@@ -1149,7 +1149,10 @@ def format_shape(s):
         for box in self.result_dic:
             trans_dic = {"label": box[1][0], "points": box[0], "difficult": False}
             if self.kie_mode:
-                trans_dic.update({"key_cls": "None"})
+                if len(box) == 3:
+                    trans_dic.update({"key_cls": box[2]})
+                else:
+                    trans_dic.update({"key_cls": "None"})
             if trans_dic["label"] == "" and mode == 'Auto':
                 continue
             shapes.append(trans_dic)
@@ -2047,6 +2050,7 @@ def reRecognition(self):
             rec_flag = 0
             for shape in self.canvas.shapes:
                 box = [[int(p.x()), int(p.y())] for p in shape.points]
+                kie_cls = shape.key_cls
 
                 if len(box) > 4:
                     box = self.gen_quad_from_poly(np.array(box))
@@ -2062,17 +2066,27 @@ def reRecognition(self):
                     if shape.line_color == DEFAULT_LOCK_COLOR:
                         shape.label = result[0][0]
                         result.insert(0, box)
+                        if self.kie_mode:
+                            result.append(kie_cls)
                         self.result_dic_locked.append(result)
                     else:
                         result.insert(0, box)
+                        if self.kie_mode:
+                            result.append(kie_cls)
                         self.result_dic.append(result)
                 else:
                     print('Can not recognise the box')
                     if shape.line_color == DEFAULT_LOCK_COLOR:
                         shape.label = result[0][0]
-                        self.result_dic_locked.append([box, (self.noLabelText, 0)])
+                        if self.kie_mode:
+                            self.result_dic_locked.append([box, (self.noLabelText, 0), kie_cls])
+                        else:
+                            self.result_dic_locked.append([box, (self.noLabelText, 0)])
                     else:
-                        self.result_dic.append([box, (self.noLabelText, 0)])
+                        if self.kie_mode:
+                            self.result_dic.append([box, (self.noLabelText, 0), kie_cls])
+                        else:
+                            self.result_dic.append([box, (self.noLabelText, 0)])
                 try:
                     if self.noLabelText == shape.label or result[1][0] == shape.label:
                         print('label no change')

diff --git a/PPOCRLabel/libs/shape.py b/PPOCRLabel/libs/shape.py
@@ -48,6 +48,7 @@ class Shape(object):
 
     def __init__(self, label=None, line_color=None, difficult=False, key_cls="None", paintLabel=False):
         self.label = label
+        self.idx = 0
         self.points = []
         self.fill = False
         self.selected = False

diff --git a/README_ch.md b/README_ch.md
@@ -71,6 +71,8 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库，助力
 ## 《动手学OCR》电子书
 - [《动手学OCR》电子书📚](./doc/doc_ch/ocr_book.md)
 
+## 场景应用
+- PaddleOCR场景应用覆盖通用，制造、金融、交通行业的主要OCR垂类应用，在PP-OCR、PP-Structure的通用能力基础之上，以notebook的形式展示利用场景数据微调、模型优化方法、数据增广等内容，为开发者快速落地OCR应用提供示范与启发。详情可查看[README](./applications)。
 
 <a name="开源社区"></a>
 ## 开源社区

diff --git a/applications/PCB字符识别/PCB字符识别.md b/applications/PCB字符识别/PCB字符识别.md
diff --git a/applications/PCB字符识别/gen_data/background/bg.jpg b/applications/PCB字符识别/gen_data/background/bg.jpg
diff --git a/applications/PCB字符识别/gen_data/corpus/text.txt b/applications/PCB字符识别/gen_data/corpus/text.txt
@@ -0,0 +1,30 @@
+5ZQ
+I4UL
+PWL
+SNOG
+ZL02
+1C30
+O3H
+YHRS
+N03S
+1U5Y
+JTK
+EN4F
+YKJ
+DWNH
+R42W
+X0V
+4OF5
+08AM
+Y93S
+GWE2
+0KR
+9U2A
+DBQ
+Y6J
+ROZ
+K06
+KIEY
+NZQJ
+UN1B
+6X4
diff --git a/applications/PCB字符识别/gen_data/det_background/1.png b/applications/PCB字符识别/gen_data/det_background/1.png
diff --git a/applications/PCB字符识别/gen_data/det_background/2.png b/applications/PCB字符识别/gen_data/det_background/2.png
diff --git a/applications/PCB字符识别/gen_data/gen.py b/applications/PCB字符识别/gen_data/gen.py
@@ -0,0 +1,261 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is refer from:
+https://github.com/zcswdt/Color_OCR_image_generator
+"""
+import os
+import random
+from PIL import Image, ImageDraw, ImageFont
+import json
+import argparse
+
+
+def get_char_lines(txt_root_path):
+    """
+    desc:get corpus line
+    """
+    txt_files = os.listdir(txt_root_path)
+    char_lines = []
+    for txt in txt_files:
+        f = open(os.path.join(txt_root_path, txt), mode='r', encoding='utf-8')
+        lines = f.readlines()
+        f.close()
+        for line in lines:
+            char_lines.append(line.strip())
+        return char_lines
+
+
+def get_horizontal_text_picture(image_file, chars, fonts_list, cf):
+    """
+    desc:gen horizontal text picture
+    """
+    img = Image.open(image_file)
+    if img.mode != 'RGB':
+        img = img.convert('RGB')
+    img_w, img_h = img.size
+
+    # random choice font
+    font_path = random.choice(fonts_list)
+    # random choice font size
+    font_size = random.randint(cf.font_min_size, cf.font_max_size)
+    font = ImageFont.truetype(font_path, font_size)
+
+    ch_w = []
+    ch_h = []
+    for ch in chars:
+        wt, ht = font.getsize(ch)
+        ch_w.append(wt)
+        ch_h.append(ht)
+    f_w = sum(ch_w)
+    f_h = max(ch_h)
+
+    # add space
+    char_space_width = max(ch_w)
+    f_w += (char_space_width * (len(chars) - 1))
+
+    x1 = random.randint(0, img_w - f_w)
+    y1 = random.randint(0, img_h - f_h)
+    x2 = x1 + f_w
+    y2 = y1 + f_h
+
+    crop_y1 = y1
+    crop_x1 = x1
+    crop_y2 = y2
+    crop_x2 = x2
+
+    best_color = (0, 0, 0)
+    draw = ImageDraw.Draw(img)
+    for i, ch in enumerate(chars):
+        draw.text((x1, y1), ch, best_color, font=font)
+        x1 += (ch_w[i] + char_space_width)
+    crop_img = img.crop((crop_x1, crop_y1, crop_x2, crop_y2))
+    return crop_img, chars
+
+
+def get_vertical_text_picture(image_file, chars, fonts_list, cf):
+    """
+    desc:gen vertical text picture
+    """
+    img = Image.open(image_file)
+    if img.mode != 'RGB':
+        img = img.convert('RGB')
+    img_w, img_h = img.size
+    # random choice font
+    font_path = random.choice(fonts_list)
+    # random choice font size
+    font_size = random.randint(cf.font_min_size, cf.font_max_size)
+    font = ImageFont.truetype(font_path, font_size)
+
+    ch_w = []
+    ch_h = []
+    for ch in chars:
+        wt, ht = font.getsize(ch)
+        ch_w.append(wt)
+        ch_h.append(ht)
+    f_w = max(ch_w)
+    f_h = sum(ch_h)
+
+    x1 = random.randint(0, img_w - f_w)
+    y1 = random.randint(0, img_h - f_h)
+    x2 = x1 + f_w
+    y2 = y1 + f_h
+
+    crop_y1 = y1
+    crop_x1 = x1
+    crop_y2 = y2
+    crop_x2 = x2
+
+    best_color = (0, 0, 0)
+    draw = ImageDraw.Draw(img)
+    i = 0
+    for ch in chars:
+        draw.text((x1, y1), ch, best_color, font=font)
+        y1 = y1 + ch_h[i]
+        i = i + 1
+    crop_img = img.crop((crop_x1, crop_y1, crop_x2, crop_y2))
+    crop_img = crop_img.transpose(Image.ROTATE_90)
+    return crop_img, chars
+
+
+def get_fonts(fonts_path):
+    """
+    desc: get all fonts
+    """
+    font_files = os.listdir(fonts_path)
+    fonts_list=[]
+    for font_file in font_files:
+        font_path=os.path.join(fonts_path, font_file)
+        fonts_list.append(font_path)
+    return fonts_list
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--num_img', type=int, default=30, help="Number of images to generate")
+    parser.add_argument('--font_min_size', type=int, default=11)
+    parser.add_argument('--font_max_size', type=int, default=12,
+                        help="Help adjust the size of the generated text and the size of the picture")
+    parser.add_argument('--bg_path', type=str, default='./background',
+                        help='The generated text pictures will be pasted onto the pictures of this folder')
+    parser.add_argument('--det_bg_path', type=str, default='./det_background',
+                        help='The generated text pictures will use the pictures of this folder as the background')
+    parser.add_argument('--fonts_path', type=str, default='../../StyleText/fonts',
+                        help='The font used to generate the picture')
+    parser.add_argument('--corpus_path', type=str, default='./corpus',
+                        help='The corpus used to generate the text picture')
+    parser.add_argument('--output_dir', type=str, default='./output/', help='Images save dir')
+
+
+    cf = parser.parse_args()
+    # save path
+    if not os.path.exists(cf.output_dir):
+        os.mkdir(cf.output_dir)
+
+    # get corpus
+    txt_root_path = cf.corpus_path
+    char_lines = get_char_lines(txt_root_path=txt_root_path)
+
+    # get all fonts
+    fonts_path = cf.fonts_path
+    fonts_list = get_fonts(fonts_path)
+
+    # rec bg
+    img_root_path = cf.bg_path
+    imnames=os.listdir(img_root_path)
+
+    # det bg
+    det_bg_path = cf.det_bg_path
+    bg_pics = os.listdir(det_bg_path)
+
+    # OCR det files
+    det_val_file = open(cf.output_dir + 'det_gt_val.txt', 'w', encoding='utf-8')
+    det_train_file = open(cf.output_dir + 'det_gt_train.txt', 'w', encoding='utf-8')
+    # det imgs
+    det_save_dir = 'imgs/'
+    if not os.path.exists(cf.output_dir + det_save_dir):
+        os.mkdir(cf.output_dir + det_save_dir)
+    det_val_save_dir = 'imgs_val/'
+    if not os.path.exists(cf.output_dir + det_val_save_dir):
+        os.mkdir(cf.output_dir + det_val_save_dir)
+
+    # OCR rec files
+    rec_val_file = open(cf.output_dir + 'rec_gt_val.txt', 'w', encoding='utf-8')
+    rec_train_file = open(cf.output_dir + 'rec_gt_train.txt', 'w', encoding='utf-8')
+    # rec imgs
+    rec_save_dir = 'rec_imgs/'
+    if not os.path.exists(cf.output_dir + rec_save_dir):
+        os.mkdir(cf.output_dir + rec_save_dir)
+    rec_val_save_dir = 'rec_imgs_val/'
+    if not os.path.exists(cf.output_dir + rec_val_save_dir):
+        os.mkdir(cf.output_dir + rec_val_save_dir)
+
+
+    val_ratio = cf.num_img * 0.2  # val dataset ratio
+
+    print('start generating...')
+    for i in range(0, cf.num_img):
+        imname = random.choice(imnames)
+        img_path = os.path.join(img_root_path, imname)
+
+        rnd = random.random()
+        # gen horizontal text picture
+        if rnd < 0.5:
+            gen_img, chars = get_horizontal_text_picture(img_path, char_lines[i], fonts_list, cf)
+            ori_w, ori_h = gen_img.size
+            gen_img = gen_img.crop((0, 3, ori_w, ori_h))
+        # gen vertical text picture
+        else:
+            gen_img, chars = get_vertical_text_picture(img_path, char_lines[i], fonts_list, cf)
+            ori_w, ori_h = gen_img.size
+            gen_img = gen_img.crop((3, 0, ori_w, ori_h))
+
+        ori_w, ori_h = gen_img.size
+
+        # rec imgs
+        save_img_name = str(i).zfill(4) + '.jpg'
+        if i < val_ratio:
+            save_dir = os.path.join(rec_val_save_dir, save_img_name)
+            line = save_dir + '\t' + char_lines[i] + '\n'
+            rec_val_file.write(line)
+        else:
+            save_dir = os.path.join(rec_save_dir, save_img_name)
+            line = save_dir + '\t' + char_lines[i] + '\n'
+            rec_train_file.write(line)
+        gen_img.save(cf.output_dir + save_dir, quality = 95, subsampling=0)
+
+        # det img
+        # random choice bg
+        bg_pic = random.sample(bg_pics, 1)[0]
+        det_img = Image.open(os.path.join(det_bg_path, bg_pic))
+        # the PCB position is fixed, modify it according to your own scenario
+        if bg_pic == '1.png':
+            x1 = 38
+            y1 = 3
+        else:
+            x1 = 34
+            y1 = 1
+
+        det_img.paste(gen_img, (x1, y1))
+        # text pos
+        chars_pos = [[x1, y1], [x1 + ori_w, y1], [x1 + ori_w, y1 + ori_h], [x1, y1 + ori_h]]
+        label = [{"transcription":char_lines[i], "points":chars_pos}]
+        if i < val_ratio:
+            save_dir = os.path.join(det_val_save_dir, save_img_name)
+            det_val_file.write(save_dir + '\t' + json.dumps(
+                    label, ensure_ascii=False) + '\n')
+        else:
+            save_dir = os.path.join(det_save_dir, save_img_name)
+            det_train_file.write(save_dir + '\t' + json.dumps(
+                    label, ensure_ascii=False) + '\n')
+        det_img.save(cf.output_dir + save_dir, quality = 95, subsampling=0)