Source code for manuscript.utils.visualization

from typing import Tuple, Optional, Union
from pathlib import Path

import cv2
import numpy as np
from PIL import Image, ImageDraw

from .io import read_image

try:
    import torch
except ImportError:
    torch = None


def _draw_quads(
    image: Union[str, Path, np.ndarray, Image.Image],
    quads: np.ndarray,
    color: Tuple[int, int, int] = (0, 255, 0),
    thickness: int = 2,
    dark_alpha: float = 0.3,
    blur_ksize: int = 5,
) -> Image.Image:
    """
    Draw quadrilateral boxes on an image with semi-transparent overlay.

    Parameters
    ----------
    image : str, Path, np.ndarray, or PIL.Image
        Input image. Can be:
        - Path to image file (str or Path)
        - RGB numpy array with shape (H, W, 3)
        - PIL Image object
    quads : np.ndarray
        Array of quad boxes with shape (N, 8) or (N, 9).
        Each row contains [x1, y1, x2, y2, x3, y3, x4, y4] or with score.
    color : tuple of int, default=(0, 255, 0)
        RGB color for drawing boxes.
    thickness : int, default=2
        Line thickness in pixels.
    dark_alpha : float, default=0.3
        Alpha value for darkening the image (0=no darkening, 1=fully dark).
    blur_ksize : int, default=5
        Kernel size for Gaussian blur (must be odd, 0=no blur).

    Returns
    -------
    PIL.Image.Image
        Image with drawn quadrilaterals.

    Examples
    --------
    >>> import numpy as np
    >>> from PIL import Image
    >>> # From numpy array
    >>> img = np.zeros((480, 640, 3), dtype=np.uint8)
    >>> quads = np.array([[100, 100, 200, 100, 200, 150, 100, 150]])
    >>> result = draw_quads(img, quads, color=(255, 0, 0))

    >>> # From file path
    >>> result = draw_quads("document.jpg", quads, color=(255, 0, 0))
    """
    # Load image using universal reader
    if isinstance(image, (str, Path)):
        img = read_image(image)
    elif isinstance(image, Image.Image):
        img = np.array(image.convert("RGB"))
    else:
        img = image.copy()

    # Apply darkening if requested
    if dark_alpha > 0:
        overlay = (img * (1 - dark_alpha)).astype(np.uint8)
    else:
        overlay = img

    # Apply blur if requested
    if blur_ksize > 0:
        overlay = cv2.GaussianBlur(overlay, (blur_ksize, blur_ksize), 0)

    # Draw each quad
    for quad in quads:
        coords = quad[:8].reshape(4, 2).astype(np.int32)
        cv2.polylines(
            overlay, [coords], isClosed=True, color=color, thickness=thickness
        )

    return Image.fromarray(overlay)



[docs]
def visualize_page(
    image: Union[str, Path, np.ndarray, Image.Image],
    page: "Page",  # type: ignore  # noqa: F821
    color=(0, 255, 0),
    thickness=2,
    show_order=True,
    show_lines=False,
    show_numbers=False,
    line_color=(255, 165, 0),
    number_bg=(255, 255, 255),
    number_color=(0, 0, 0),
    max_size=4096,
) -> Image.Image:
    """
    Visualize a Page object with detected words/blocks.

    This function draws all words from the Page structure on the image,
    optionally showing reading order with numbered markers and connecting lines.
    When show_order=True, it also visualizes blocks with semi-transparent
    bounding boxes, each block having a distinct color.

    Parameters
    ----------
    image : str, Path, np.ndarray, or PIL.Image
        Input image. Can be:
        - Path to image file (str or Path) - supports Unicode paths
        - RGB numpy array with shape (H, W, 3)
        - PIL Image object
    page : Page
        Page object from manuscript.data containing detected blocks/words.
    color : tuple of int, default=(0, 255, 0)
        RGB color for word boundaries.
    thickness : int, default=2
        Line thickness for word boundaries.
    show_order : bool, default=True
        If True, colors different text lines with different colors and shows
        semi-transparent block boundaries with different colors per block.
    show_lines : bool, default=False
        If True and show_order=True, draw connecting lines between consecutive
        words showing the reading sequence.
    show_numbers : bool, default=False
        If True and show_order=True, display numbered markers on each word
        showing the reading order.
    line_color : tuple of int, default=(255, 165, 0)
        RGB color for connecting lines between words.
    number_bg : tuple of int, default=(255, 255, 255)
        Background color for order number boxes.
    number_color : tuple of int, default=(0, 0, 0)
        Text color for order numbers.
    max_size : int or None, default=4096
        Maximum size for the longer dimension of the output image.
        Image will be resized proportionally if larger. Set to None to
        keep original size.

    Returns
    -------
    PIL.Image.Image
        Visualized image with detection boxes and optional reading order annotations.
        When show_order=True, also includes semi-transparent block boundaries.

    Examples
    --------
    Basic visualization without reading order:

    >>> from manuscript import EAST
    >>> from manuscript.utils import visualize_page
    >>> detector = EAST()
    >>> result = detector.predict("document.jpg")
    >>> # Can pass path directly
    >>> vis = visualize_page("document.jpg", result["page"])
    >>> vis.save("output.jpg")

    Visualization with reading order and block boundaries:

    >>> # Can also use numpy array or PIL Image
    >>> from manuscript.utils import read_image
    >>> img = read_image("document.jpg")
    >>> vis = visualize_page(
    ...     img,
    ...     result["page"],
    ...     show_order=True,
    ...     color=(255, 0, 0),
    ...     thickness=3
    ... )

    Show connecting lines and numbers between words:

    >>> vis = visualize_page(
    ...     "document.jpg",
    ...     result["page"],
    ...     show_order=True,
    ...     show_lines=True,
    ...     show_numbers=True
    ... )
    """
    # Load image using universal reader
    if isinstance(image, (str, Path)):
        img = read_image(image)
    elif isinstance(image, Image.Image):
        img = np.array(image.convert("RGB"))
    else:
        img = image.copy()

    if max_size is not None:
        h, w = img.shape[:2]
        scale = max_size / max(h, w)
        if scale < 1:
            img = cv2.resize(img, (int(w * scale), int(h * scale)))
        else:
            scale = 1.0
    else:
        scale = 1.0

    def get_line_color(idx: int):
        hue = (idx * 0.618033988749895) % 1.0
        hsv = np.uint8([[[int(hue * 179), 220, 255]]])
        return tuple(int(c) for c in cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB)[0][0])

    def get_block_color(idx: int):
        hue = ((idx * 0.618033988749895) + 0.5) % 1.0
        hsv = np.uint8([[[int(hue * 179), 180, 255]]])
        return tuple(int(c) for c in cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB)[0][0])

    lines = []
    blocks = []
    line_index = 0

    for block_idx, block in enumerate(page.blocks):
        block_quads = []
        if block.lines:
            for line in block.lines:
                quads, words = [], []
                for w in line.words:
                    poly = np.array(w.polygon) * scale
                    quad = poly.reshape(-1)
                    quads.append(quad)
                    words.append(w)
                    block_quads.append(quad)
                if quads:
                    lines.append((quads, words, line_index))
                    line_index += 1
        elif block.words:
            quads, words = [], []
            for w in block.words:
                poly = np.array(w.polygon) * scale
                quad = poly.reshape(-1)
                quads.append(quad)
                words.append(w)
                block_quads.append(quad)
            if quads:
                lines.append((quads, words, line_index))
                line_index += 1
        if block_quads:
            blocks.append((block_quads, block_idx))

    if not lines:
        return Image.fromarray(img)

    h, w = img.shape[:2]

    # ----- BLOCK LAYER (RGBA) -----
    block_layer = np.zeros((h, w, 4), dtype=np.uint8)

    for block_quads, block_idx in blocks:
        pts = np.vstack([q.reshape(4, 2) for q in block_quads])
        x1, y1 = pts[:, 0].min(), pts[:, 1].min()
        x2, y2 = pts[:, 0].max(), pts[:, 1].max()
        color_b = get_block_color(block_idx)
        cv2.rectangle(
            block_layer, (int(x1), int(y1)), (int(x2), int(y2)), (*color_b, 75), -1
        )  # alpha=75
    
    # ----- WORD MASK (cut out words from block layers) -----
    word_mask = np.zeros((h, w), dtype=np.uint8)
    for quads, _, _ in lines:
        for quad in quads:
            coords = quad.reshape(4, 2).astype(np.int32)
            cv2.fillPoly(word_mask, [coords], 255)

    inv_word_mask = cv2.bitwise_not(word_mask)

     # cut out words → blocks DO NOT cover words
    block_layer[:, :, 3] = cv2.bitwise_and(block_layer[:, :, 3], inv_word_mask)

    # final image
    base = Image.fromarray(img).convert("RGBA")
    block_img = Image.fromarray(block_layer, mode="RGBA")
    out = Image.alpha_composite(base, block_img).convert("RGB")

    draw = ImageDraw.Draw(out)

    # ----- WORD BOXES -----
    for quads, _, idx in lines:
        col = get_line_color(idx) if show_order else color
        for quad in quads:
            pts = quad.reshape(4, 2)
            pts_py = [(int(x), int(y)) for x, y in pts]
            draw.line(pts_py + [pts_py[0]], fill=tuple(col), width=thickness)

    # ----- ORDER LINES & NUMBERS -----
    if show_order:
        words = [w for _, ws, _ in lines for w in ws]
        centers = []
        for w in words:
            xs = [p[0] * scale for p in w.polygon]
            ys = [p[1] * scale for p in w.polygon]
            centers.append((sum(xs) / 4, sum(ys) / 4))

        # Draw connecting lines only if show_lines is True
        if show_lines:
            for p, c in zip(centers, centers[1:]):
                draw.line([p, c], fill=line_color, width=3)

        # Draw numbers only if show_numbers is True
        if show_numbers:
            overlay = Image.new("RGBA", out.size, (0, 0, 0, 0))
            d2 = ImageDraw.Draw(overlay)
            for cx, cy in centers:
                d2.rectangle([cx - 12, cy - 12, cx + 12, cy + 12], fill=number_bg + (140,))
            out = Image.alpha_composite(out.convert("RGBA"), overlay).convert("RGB")

            draw = ImageDraw.Draw(out)
            for i, (cx, cy) in enumerate(centers, 1):
                draw.text((cx - 6, cy - 8), str(i), fill=number_color)

    return out