Исходный код manuscript.layouts._simplesorting

from typing import List, Optional, Tuple

import numpy as np

from manuscript.api.layout import BaseLayout

from ...data import Block, Line, Page, TextSpan


[документация] class SimpleSorting(BaseLayout): """ Layout-модель, которая группирует обнаруженные текстовые области по колонкам и строкам. Параметры ---------- max_splits : int, optional Максимальное число попыток разбиения на колонки. По умолчанию 10. use_columns : bool, optional Если ``True``, перед группировкой строк выполняется сегментация на колонки. По умолчанию ``True``. """
[документация] def __init__(self, max_splits: int = 10, use_columns: bool = True): self.max_splits = max_splits self.use_columns = use_columns
def _initialize_session(self): """Layout stage has no backend session.""" return None @staticmethod def _resolve_intersections( boxes: List[Tuple[float, float, float, float]], ) -> List[Tuple[float, float, float, float]]: def intersect( b1: Tuple[float, float, float, float], b2: Tuple[float, float, float, float], ) -> bool: return not ( b1[2] <= b2[0] or b2[2] <= b1[0] or b1[3] <= b2[1] or b2[3] <= b1[1] ) resolved = list(boxes) max_iterations = 50 for _ in range(max_iterations): changed = False for i in range(len(resolved)): for j in range(i + 1, len(resolved)): if intersect(resolved[i], resolved[j]): x0, y0, x1, y1 = resolved[i] x0b, y0b, x1b, y1b = resolved[j] resolved[i] = ( x0, y0, int(x1 - (x1 - x0) * 0.1), int(y1 - (y1 - y0) * 0.1), ) resolved[j] = ( x0b, y0b, int(x1b - (x1b - x0b) * 0.1), int(y1b - (y1b - y0b) * 0.1), ) changed = True if not changed: break return resolved @staticmethod def _find_gaps( boxes: List[Tuple[int, int, int, int]], start: int, end: int ) -> List[int]: segs = [ (max(b[0], start), min(b[2], end)) for b in boxes if not (b[2] <= start or b[0] >= end) ] if not segs: return [] segs.sort() merged = [segs[0]] for s, e in segs[1:]: ms, me = merged[-1] if s <= me: merged[-1] = (ms, max(me, e)) else: merged.append((s, e)) gaps = [] prev_end = start for s, e in merged: if s > prev_end: gaps.append((prev_end, s)) prev_end = e if prev_end < end: gaps.append((prev_end, end)) return [(a + b) // 2 for a, b in gaps if b - a > 1] @staticmethod def _emptiness( boxes: List[Tuple[int, int, int, int]], start: int, end: int ) -> float: col = [b for b in boxes if b[0] >= start and b[2] <= end] if not col: return 1.0 min_y, min_x = min(b[1] for b in col), max(b[3] for b in col) rect = (end - start) * (min_x - min_y) area = sum((b[2] - b[0]) * (b[3] - b[1]) for b in col) return (rect - area) / rect if rect else 1.0 def _segment_columns( self, boxes: List[Tuple[int, int, int, int]], ) -> List[List[Tuple[int, int, int, int]]]: if not boxes: return [] img_width = max(b[2] for b in boxes) segments = [(0, img_width)] separators: List[int] = [] for _ in range(self.max_splits or img_width): best = None for idx, (s, e) in enumerate(segments): for x in self._find_gaps(boxes, s, e): if not ( any(b[2] <= x and b[0] >= s for b in boxes) and any(b[0] >= x and b[2] <= e for b in boxes) ): continue score = self._emptiness(boxes, s, x) + self._emptiness( boxes, x, e ) if best is None or score < best[0]: best = (score, x, idx) if not best: break _, x_split, idx = best s, e = segments.pop(idx) separators.append(x_split) segments.insert(idx, (s, x_split)) segments.insert(idx + 1, (x_split, e)) segments.sort() parts = [(0, img_width)] for x in separators: new_parts: List[Tuple[int, int]] = [] for s, e in parts: if s < x < e: new_parts += [(s, x), (x, e)] else: new_parts.append((s, e)) parts = new_parts cols: List[List[Tuple[int, int, int, int]]] = [] for s, e in parts: col = [b for b in boxes if b[0] >= s and b[2] <= e] cols.append(col) cols = [c for c in cols if c] if not cols: return [] return sorted(cols, key=lambda c: min(b[0] for b in c)) @staticmethod def _sort_column_into_lines( compressed: List[Tuple[int, int, int, int]], mapping: dict, y_tol_ratio: float, x_gap_ratio: float, ) -> List[List[Tuple[float, float, float, float]]]: if not compressed: return [] avg_h = np.mean([b[3] - b[1] for b in compressed]) lines = [] for b in sorted(compressed, key=lambda b: (b[1] + b[3]) / 2): cy = (b[1] + b[3]) / 2 placed = False for ln in lines: line_cy = np.mean([(v[1] + v[3]) / 2 for v in ln]) last_x1 = max(v[2] for v in ln) if ( abs(cy - line_cy) <= avg_h * y_tol_ratio and (b[0] - last_x1) <= avg_h * x_gap_ratio ): ln.append(b) placed = True break if not placed: lines.append([b]) lines.sort(key=lambda ln: np.mean([(b[1] + b[3]) / 2 for b in ln])) result = [] for ln in lines: ln.sort(key=lambda b: b[0]) original_line = [mapping[b] for b in ln] result.append(original_line) return result def _sort_into_lines( self, boxes: List[Tuple[float, float, float, float]], y_tol_ratio: float = 0.6, x_gap_ratio: float = np.inf, use_columns: bool = True, ) -> List[List[Tuple[float, float, float, float]]]: if not boxes: return [] compressed = self._resolve_intersections(boxes) mapping = {c: o for c, o in zip(compressed, boxes)} if use_columns: columns = self._segment_columns(compressed) all_lines = [] for column_boxes in columns: column_lines = self._sort_column_into_lines( column_boxes, mapping, y_tol_ratio, x_gap_ratio ) all_lines.extend(column_lines) return all_lines return self._sort_column_into_lines( compressed, mapping, y_tol_ratio, x_gap_ratio )
[документация] def predict(self, page: Page, image: Optional[np.ndarray] = None) -> Page: """ Упорядочивает текстовые области на странице по блокам и строкам и назначает порядок чтения. Параметры ---------- page : Page Входная страница с обнаруженными текстовыми областями. image : numpy.ndarray, optional Исходное изображение страницы (не используется данной layout-моделью). Возвращает ------- Page Упорядоченная страница. """ _ = image all_text_spans: List[TextSpan] = [] for block in page.blocks: for line in block.lines: all_text_spans.extend(line.text_spans) if not all_text_spans: return Page(blocks=[Block(lines=[Line(text_spans=[], order=0)], order=0)]) text_span_to_box = {} boxes = [] for text_span in all_text_spans: poly = np.array(text_span.polygon, dtype=np.int32) x_min, y_min = np.min(poly, axis=0) x_max, y_max = np.max(poly, axis=0) box = (int(x_min), int(y_min), int(x_max), int(y_max)) boxes.append(box) text_span_to_box[box] = text_span if self.use_columns: columns = self._segment_columns(boxes) else: columns = [boxes] blocks: List[Block] = [] for block_idx, column_boxes in enumerate(columns): lines_in_column = self._sort_into_lines(column_boxes, use_columns=False) lines: List[Line] = [] for line_idx, line_boxes in enumerate(lines_in_column): line_text_spans = [] for text_span_idx, box in enumerate(line_boxes): text_span = text_span_to_box[box] text_span.order = text_span_idx line_text_spans.append(text_span) line = Line(text_spans=line_text_spans, order=line_idx) lines.append(line) block = Block(lines=lines, order=block_idx) blocks.append(block) return Page(blocks=blocks)