from typing import Tuple, Optional, Union
from pathlib import Path
import cv2
import numpy as np
from PIL import Image, ImageDraw
from .io import read_image
try:
import torch
except ImportError:
torch = None
def _draw_quads(
image: Union[str, Path, np.ndarray, Image.Image],
quads: np.ndarray,
color: Tuple[int, int, int] = (0, 255, 0),
thickness: int = 2,
dark_alpha: float = 0.3,
blur_ksize: int = 5,
) -> Image.Image:
"""
Draw quadrilateral boxes on an image with semi-transparent overlay.
Parameters
----------
image : str, Path, np.ndarray, or PIL.Image
Input image. Can be:
- Path to image file (str or Path)
- RGB numpy array with shape (H, W, 3)
- PIL Image object
quads : np.ndarray
Array of quad boxes with shape (N, 8) or (N, 9).
Each row contains [x1, y1, x2, y2, x3, y3, x4, y4] or with score.
color : tuple of int, default=(0, 255, 0)
RGB color for drawing boxes.
thickness : int, default=2
Line thickness in pixels.
dark_alpha : float, default=0.3
Alpha value for darkening the image (0=no darkening, 1=fully dark).
blur_ksize : int, default=5
Kernel size for Gaussian blur (must be odd, 0=no blur).
Returns
-------
PIL.Image.Image
Image with drawn quadrilaterals.
Examples
--------
>>> import numpy as np
>>> from PIL import Image
>>> # From numpy array
>>> img = np.zeros((480, 640, 3), dtype=np.uint8)
>>> quads = np.array([[100, 100, 200, 100, 200, 150, 100, 150]])
>>> result = draw_quads(img, quads, color=(255, 0, 0))
>>> # From file path
>>> result = draw_quads("document.jpg", quads, color=(255, 0, 0))
"""
# Load image using universal reader
if isinstance(image, (str, Path)):
img = read_image(image)
elif isinstance(image, Image.Image):
img = np.array(image.convert("RGB"))
else:
img = image.copy()
# Apply darkening if requested
if dark_alpha > 0:
overlay = (img * (1 - dark_alpha)).astype(np.uint8)
else:
overlay = img
# Apply blur if requested
if blur_ksize > 0:
overlay = cv2.GaussianBlur(overlay, (blur_ksize, blur_ksize), 0)
# Draw each quad
for quad in quads:
coords = quad[:8].reshape(4, 2).astype(np.int32)
cv2.polylines(
overlay, [coords], isClosed=True, color=color, thickness=thickness
)
return Image.fromarray(overlay)
[docs]
def visualize_page(
image: Union[str, Path, np.ndarray, Image.Image],
page: "Page", # type: ignore # noqa: F821
color=(0, 255, 0),
thickness=2,
show_order=True,
show_lines=False,
show_numbers=False,
line_color=(255, 165, 0),
number_bg=(255, 255, 255),
number_color=(0, 0, 0),
max_size=4096,
) -> Image.Image:
"""
Visualize a Page object with detected words/blocks.
This function draws all words from the Page structure on the image,
optionally showing reading order with numbered markers and connecting lines.
When show_order=True, it also visualizes blocks with semi-transparent
bounding boxes, each block having a distinct color.
Parameters
----------
image : str, Path, np.ndarray, or PIL.Image
Input image. Can be:
- Path to image file (str or Path) - supports Unicode paths
- RGB numpy array with shape (H, W, 3)
- PIL Image object
page : Page
Page object from manuscript.data containing detected blocks/words.
color : tuple of int, default=(0, 255, 0)
RGB color for word boundaries.
thickness : int, default=2
Line thickness for word boundaries.
show_order : bool, default=True
If True, colors different text lines with different colors and shows
semi-transparent block boundaries with different colors per block.
show_lines : bool, default=False
If True and show_order=True, draw connecting lines between consecutive
words showing the reading sequence.
show_numbers : bool, default=False
If True and show_order=True, display numbered markers on each word
showing the reading order.
line_color : tuple of int, default=(255, 165, 0)
RGB color for connecting lines between words.
number_bg : tuple of int, default=(255, 255, 255)
Background color for order number boxes.
number_color : tuple of int, default=(0, 0, 0)
Text color for order numbers.
max_size : int or None, default=4096
Maximum size for the longer dimension of the output image.
Image will be resized proportionally if larger. Set to None to
keep original size.
Returns
-------
PIL.Image.Image
Visualized image with detection boxes and optional reading order annotations.
When show_order=True, also includes semi-transparent block boundaries.
Examples
--------
Basic visualization without reading order:
>>> from manuscript import EAST
>>> from manuscript.utils import visualize_page
>>> detector = EAST()
>>> result = detector.predict("document.jpg")
>>> # Can pass path directly
>>> vis = visualize_page("document.jpg", result["page"])
>>> vis.save("output.jpg")
Visualization with reading order and block boundaries:
>>> # Can also use numpy array or PIL Image
>>> from manuscript.utils import read_image
>>> img = read_image("document.jpg")
>>> vis = visualize_page(
... img,
... result["page"],
... show_order=True,
... color=(255, 0, 0),
... thickness=3
... )
Show connecting lines and numbers between words:
>>> vis = visualize_page(
... "document.jpg",
... result["page"],
... show_order=True,
... show_lines=True,
... show_numbers=True
... )
"""
# Load image using universal reader
if isinstance(image, (str, Path)):
img = read_image(image)
elif isinstance(image, Image.Image):
img = np.array(image.convert("RGB"))
else:
img = image.copy()
if max_size is not None:
h, w = img.shape[:2]
scale = max_size / max(h, w)
if scale < 1:
img = cv2.resize(img, (int(w * scale), int(h * scale)))
else:
scale = 1.0
else:
scale = 1.0
def get_line_color(idx: int):
hue = (idx * 0.618033988749895) % 1.0
hsv = np.uint8([[[int(hue * 179), 220, 255]]])
return tuple(int(c) for c in cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB)[0][0])
def get_block_color(idx: int):
hue = ((idx * 0.618033988749895) + 0.5) % 1.0
hsv = np.uint8([[[int(hue * 179), 180, 255]]])
return tuple(int(c) for c in cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB)[0][0])
lines = []
blocks = []
line_index = 0
for block_idx, block in enumerate(page.blocks):
block_quads = []
if block.lines:
for line in block.lines:
quads, words = [], []
for w in line.words:
poly = np.array(w.polygon) * scale
quad = poly.reshape(-1)
quads.append(quad)
words.append(w)
block_quads.append(quad)
if quads:
lines.append((quads, words, line_index))
line_index += 1
elif block.words:
quads, words = [], []
for w in block.words:
poly = np.array(w.polygon) * scale
quad = poly.reshape(-1)
quads.append(quad)
words.append(w)
block_quads.append(quad)
if quads:
lines.append((quads, words, line_index))
line_index += 1
if block_quads:
blocks.append((block_quads, block_idx))
if not lines:
return Image.fromarray(img)
h, w = img.shape[:2]
# ----- BLOCK LAYER (RGBA) -----
block_layer = np.zeros((h, w, 4), dtype=np.uint8)
for block_quads, block_idx in blocks:
pts = np.vstack([q.reshape(4, 2) for q in block_quads])
x1, y1 = pts[:, 0].min(), pts[:, 1].min()
x2, y2 = pts[:, 0].max(), pts[:, 1].max()
color_b = get_block_color(block_idx)
cv2.rectangle(
block_layer, (int(x1), int(y1)), (int(x2), int(y2)), (*color_b, 75), -1
) # alpha=75
# ----- WORD MASK (cut out words from block layers) -----
word_mask = np.zeros((h, w), dtype=np.uint8)
for quads, _, _ in lines:
for quad in quads:
coords = quad.reshape(4, 2).astype(np.int32)
cv2.fillPoly(word_mask, [coords], 255)
inv_word_mask = cv2.bitwise_not(word_mask)
# cut out words → blocks DO NOT cover words
block_layer[:, :, 3] = cv2.bitwise_and(block_layer[:, :, 3], inv_word_mask)
# final image
base = Image.fromarray(img).convert("RGBA")
block_img = Image.fromarray(block_layer, mode="RGBA")
out = Image.alpha_composite(base, block_img).convert("RGB")
draw = ImageDraw.Draw(out)
# ----- WORD BOXES -----
for quads, _, idx in lines:
col = get_line_color(idx) if show_order else color
for quad in quads:
pts = quad.reshape(4, 2)
pts_py = [(int(x), int(y)) for x, y in pts]
draw.line(pts_py + [pts_py[0]], fill=tuple(col), width=thickness)
# ----- ORDER LINES & NUMBERS -----
if show_order:
words = [w for _, ws, _ in lines for w in ws]
centers = []
for w in words:
xs = [p[0] * scale for p in w.polygon]
ys = [p[1] * scale for p in w.polygon]
centers.append((sum(xs) / 4, sum(ys) / 4))
# Draw connecting lines only if show_lines is True
if show_lines:
for p, c in zip(centers, centers[1:]):
draw.line([p, c], fill=line_color, width=3)
# Draw numbers only if show_numbers is True
if show_numbers:
overlay = Image.new("RGBA", out.size, (0, 0, 0, 0))
d2 = ImageDraw.Draw(overlay)
for cx, cy in centers:
d2.rectangle([cx - 12, cy - 12, cx + 12, cy + 12], fill=number_bg + (140,))
out = Image.alpha_composite(out.convert("RGBA"), overlay).convert("RGB")
draw = ImageDraw.Draw(out)
for i, (cx, cy) in enumerate(centers, 1):
draw.text((cx - 6, cy - 8), str(i), fill=number_color)
return out