Skip to content

Commit

Permalink
add possibility to use bounding rectangles instead of polygons
Browse files Browse the repository at this point in the history
  • Loading branch information
maxnth committed Sep 16, 2021
1 parent ddd8fe0 commit de67b3b
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 17 deletions.
16 changes: 10 additions & 6 deletions ocr4all_helper_scripts/cli/pagelineseg.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"ImageRegions overlap with TextRegions.")
@click.option("--minscale", type=float, default=12.0,
help="Minimum scale permitted.")
@click.option("--maxlines", type=float, default=300,
@click.option("--maxlines", type=int, default=300,
help="Maximum number of lines permitted.")
@click.option("--threshold", type=float, default=0.2,
help="Baseline threshold.")
Expand Down Expand Up @@ -59,11 +59,14 @@
help="Widen black separators (to account for warping).")
@click.option("--max-whiteseps", type=int, default=-1,
help="Maximum amount of whitespace column separators.")
@click.option("--minheight-whiteseps", type=float, default=10,
@click.option("--minheight-whiteseps", type=int, default=10,
help="Minimum column height (units=scale).")
def pagelineseg_cli(dataset, remove_images, minscale, maxlines, threshold, usegauss, scale, hscale, vscale,
filter_strength, maxskew, skewsteps, parallel, smear_x, smear_y, growth_x, growth_y, fail_save,
max_blackseps, widen_blackseps, max_whiteseps, minheight_whiteseps):
@click.option("--bounding-rectangle", is_flag=True, default=False, help="Uses bounding rectangles instead of polygons.")
def pagelineseg_cli(dataset: str, remove_images: bool, minscale: float, maxlines: int, threshold: float,
usegauss: bool, scale: float, hscale: float, vscale: float, filter_strength: float, maxskew: float,
skewsteps: int, parallel: int, smear_x: float, smear_y: float, growth_x: float, growth_y: float,
fail_save: int, max_blackseps: int, widen_blackseps: int, max_whiteseps: int,
minheight_whiteseps: int, bounding_rectangle: bool):
with Path(dataset).open('r') as data_file:
dataset = json.load(data_file)

Expand Down Expand Up @@ -94,7 +97,8 @@ def parallel_proc(data):
maxskew=maxskew,
skewsteps=skewsteps,
usegauss=usegauss,
remove_images=remove_images)
remove_images=remove_images,
bounding_box=bounding_rectangle)

with Path(path_out).open("w+") as output_file:
pagelineseg_helper.s_print(f"Save annotations into '{path_out}'")
Expand Down
36 changes: 25 additions & 11 deletions ocr4all_helper_scripts/helpers/pagelineseg_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,13 @@ def s_print_error(*objs):
s_print("ERROR: ", *objs, file=sys.stderr)


def compute_lines(segmentation: np.ndarray, smear_strength: Tuple[float, float], scale: int,
growth: Tuple[float, float], max_iterations: int, filter_strength: float) -> List[Record]:
def compute_lines(segmentation: np.ndarray,
smear_strength: Tuple[float, float],
scale: int,
growth: Tuple[float, float],
max_iterations: int,
filter_strength: float,
bounding_box: bool) -> List[Record]:
"""Given a line segmentation map, computes a list of tuples consisting of 2D slices and masked images.
Implementation derived from ocropy with changes to allow extracting the line coords/polygons.
"""
Expand All @@ -62,7 +67,7 @@ def compute_lines(segmentation: np.ndarray, smear_strength: Tuple[float, float],
result.label = i + 1
result.bounds = o
polygon = []
if ((segmentation[o] != 0) == (segmentation[o] != i + 1)).any():
if ((segmentation[o] != 0) == (segmentation[o] != i + 1)).any() and not bounding_box:
ppoints = approximate_smear_polygon(mask, smear_strength, growth, max_iterations)
ppoints = ppoints[1:] if ppoints else []
polygon = [(o[1].start + x, o[0].start + y) for x, y in ppoints]
Expand Down Expand Up @@ -201,7 +206,8 @@ def segment(im: Image, scale: float = None, max_blackseps: int = 0, widen_blacks
minheight_whiteseps: int = 10, filter_strength: float = 1.0,
smear_strength: Tuple[float, float] = (1.0, 2.0), growth: Tuple[float, float] = (1.1, 1.1),
orientation: int = 0, fail_save_iterations: int = 1000, vscale: float = 1.0, hscale: float = 1.0,
minscale: float = 12.0, maxlines: int = 300, threshold: float = 0.2, usegauss: bool = False):
minscale: float = 12.0, maxlines: int = 300, threshold: float = 0.2, usegauss: bool = False,
bounding_box: bool = False):
"""
Segments a page into text lines.
Segments a page into text lines and returns the absolute coordinates of
Expand Down Expand Up @@ -254,7 +260,8 @@ def segment(im: Image, scale: float = None, max_blackseps: int = 0, widen_blacks
scale,
growth,
fail_save_iterations,
filter_strength)
filter_strength,
bounding_box)

# Translate each point back to original
delta_x = (im_rotated.width - im.width) / 2
Expand Down Expand Up @@ -286,7 +293,7 @@ def pagelineseg(xmlfile: str,
widen_blackseps: int = 10,
max_whiteseps: int = -1,
minheight_whiteseps: int = 10,
minscale: int = 12,
minscale: float = 12.0,
maxlines: int = 300,
smear_strength: Tuple[float, float] = (1.0, 2.0),
growth: Tuple[float, float] = (1.1, 1.1),
Expand All @@ -295,7 +302,8 @@ def pagelineseg(xmlfile: str,
maxskew: float = 2.0,
skewsteps: int = 8,
usegauss: bool = False,
remove_images: bool = False):
remove_images: bool = False,
bounding_box: bool = False):
name = Path(imgpath).name.split(".")[0]
s_print(f"""Start process for '{name}'
|- Image: '{imgpath}'
Expand All @@ -316,6 +324,8 @@ def pagelineseg(xmlfile: str,
if remove_images:
imageutils.remove_images(im, root)

pageutils.remove_existing_textlines(root)

for coord_idx, coord in enumerate(sorted(coordmap)):
coords = coordmap[coord]['coords']

Expand Down Expand Up @@ -345,12 +355,16 @@ def pagelineseg(xmlfile: str,
max_whiteseps=max_whiteseps,
minheight_whiteseps=minheight_whiteseps,
filter_strength=filter_strength,
smear_strength=smear_strength, growth=growth,
smear_strength=smear_strength,
growth=growth,
orientation=orientation,
fail_save_iterations=fail_save_iterations,
vscale=vscale, hscale=hscale,
minscale=minscale, maxlines=maxlines,
usegauss=usegauss)
vscale=vscale,
hscale=hscale,
minscale=minscale,
maxlines=maxlines,
usegauss=usegauss,
bounding_box=bounding_box)

else:
lines = []
Expand Down
5 changes: 5 additions & 0 deletions ocr4all_helper_scripts/utils/pageutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,8 @@ def construct_coordmap(tree: etree.Element) -> dict:
coordmap[region_id]["orientation"] = float(text_region.attrib["orientation"])

return coordmap


def remove_existing_textlines(tree: etree.Element):
for textline in tree.findall(".//{*}TextLine"):
textline.getparent().remove(textline)

0 comments on commit de67b3b

Please sign in to comment.