-
Notifications
You must be signed in to change notification settings - Fork 0
/
interval.py
196 lines (155 loc) · 6.95 KB
/
interval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
"""This module contains dataclasses for (depth) intervals."""
from __future__ import annotations
import abc
import fitz
from stratigraphy.depthcolumn.depthcolumnentry import (
AnnotatedDepthColumnEntry,
DepthColumnEntry,
LayerDepthColumnEntry,
)
from stratigraphy.lines.line import TextLine
from stratigraphy.text.textblock import TextBlock
class Interval(metaclass=abc.ABCMeta):
"""Abstract class for (depth) intervals."""
def __init__(self, start: DepthColumnEntry | None, end: DepthColumnEntry | None):
super().__init__()
self.start = start
self.end = end
@property
def start_value(self) -> float | None:
"""Get the start value of the interval."""
if self.start:
return self.start.value
else:
return None
@property
def end_value(self) -> float | None:
"""Get the end value of the interval."""
if self.end:
return self.end.value
else:
return None
@property
@abc.abstractmethod
def line_anchor(self) -> fitz.Point:
"""Get the line anchor of the interval."""
pass
@property
@abc.abstractmethod
def background_rect(self) -> fitz.Rect | None:
"""Get the background rectangle of the interval."""
pass
def to_json(self):
"""Convert the interval to a JSON serializable format."""
return {
"start": self.start.to_json() if self.start else None,
"end": self.end.to_json() if self.end else None,
}
class AnnotatedInterval:
"""Class for annotated intervals."""
def __init__(self, start: float, end: float, background_rect: fitz.Rect):
self.start = AnnotatedDepthColumnEntry(start)
self.end = AnnotatedDepthColumnEntry(end)
self.background_rect = background_rect
class BoundaryInterval(Interval):
"""Class for boundary intervals.
Boundary intervals are intervals that are defined by a start and an end point.
"""
@property
def line_anchor(self) -> fitz.Point | None:
if self.start and self.end:
return fitz.Point(self.start.rect.x1, (self.start.rect.y0 + self.end.rect.y1) / 2)
elif self.start:
return fitz.Point(self.start.rect.x1, self.start.rect.y1)
elif self.end:
return fitz.Point(self.end.rect.x1, self.end.rect.y0)
@property
def background_rect(self) -> fitz.Rect | None:
if self.start and self.end:
return fitz.Rect(self.start.rect.x0, self.start.rect.y1, self.start.rect.x1, self.end.rect.y0)
def matching_blocks(
self, all_blocks: list[TextBlock], block_index: int
) -> tuple[list[TextBlock], list[TextBlock], list[TextBlock]]:
"""Calculates pre, exact and post blocks for the boundary interval.
Pre contains all the blocks that are supposed to come before the interval.
Exact contains all the blocks that are supposed to be inside the interval.
Post contains all the blocks that are supposed to come after the interval.
Args:
all_blocks (list[TextBlock]): All blocks available blocks.
block_index (int): Index of the current block.
Returns:
tuple[list[TextBlock], list[TextBlock], list[TextBlock]]: Pre, exact and post blocks.
"""
pre, exact, post = [], [], []
while block_index < len(all_blocks) and (
self.end is None or all_blocks[block_index].rect.y1 < self.end.rect.y1
):
current_block = all_blocks[block_index]
# only exact match when sufficient distance to previous and next blocks, to avoid a vertically shifted
# description "accidentally" being nicely contained in the depth interval.
distances_above = [
current_block.rect.y0 - other.rect.y1 for other in all_blocks if other.rect.y0 < current_block.rect.y0
]
distance_above_ok_for_exact = len(distances_above) == 0 or min(distances_above) > 5
exact_match_blocks = []
exact_match_index = block_index
if distance_above_ok_for_exact:
continue_exact_match = True
can_end_exact_match = True
while continue_exact_match and exact_match_index < len(all_blocks):
exact_match_block = all_blocks[exact_match_index]
exact_match_rect = exact_match_block.rect
if (
self.start is None or exact_match_rect.y0 > (self.start.rect.y0 + self.start.rect.y1) / 2
) and (self.end is None or exact_match_rect.y1 < (self.end.rect.y0 + self.end.rect.y1) / 2):
exact_match_blocks.append(exact_match_block)
exact_match_index += 1
distances_below = [other.rect.y0 - exact_match_block.rect.y1 for other in all_blocks]
distances_below = [distance for distance in distances_below if distance > 0]
can_end_exact_match = len(distances_below) == 0 or min(distances_below) > 5
else:
continue_exact_match = False
if not can_end_exact_match:
exact_match_blocks = []
if exact_match_blocks:
exact.extend(exact_match_blocks)
block_index = exact_match_index - 1
elif exact:
post.append(current_block)
else:
pre.append(current_block)
block_index += 1
return pre, exact, post
class LayerInterval(Interval):
"""Class for layer intervals.
A layer interval is an interval whose start and end-points are defined in a single entry.
E.g. 1.00 - 2.30m.
"""
def __init__(self, layer_depth_column_entry: LayerDepthColumnEntry):
self.entry = layer_depth_column_entry
super().__init__(layer_depth_column_entry.start, layer_depth_column_entry.end)
@property
def line_anchor(self) -> fitz.Point | None:
if self.end:
return fitz.Point(self.end.rect.x1, (self.end.rect.y0 + self.end.rect.y1) / 2)
@property
def background_rect(self) -> fitz.Rect | None:
return None
def matching_blocks(
self, all_lines: list[TextLine], line_index: int, next_interval: Interval | None
) -> list[TextBlock]:
"""Adds lines to a block until the next layer identifier is reached."""
y1_threshold = None
if next_interval:
next_interval_start_rect = next_interval.start.rect
y1_threshold = next_interval_start_rect.y0 + next_interval_start_rect.height / 2
matched_lines = []
for current_line in all_lines[line_index:]:
if y1_threshold is None or current_line.rect.y1 < y1_threshold:
matched_lines.append(current_line)
else:
break
if matched_lines:
return [TextBlock(matched_lines)]
else:
return []