Skip to main content
Streaming writers for persisting tile-level predictions. Provides a PredictionWriter protocol and three concrete implementations (ZarrWriter, HDF5Writer, NPZWriter) that save per-tile outputs and metadata to disk in a memory-efficient way. Zarr and HDF5 writers stream results row-by-row so that the full output tensor never needs to reside in memory. The NPZ writer accumulates results in Python lists and flushes on close.

OutputFormat

class OutputFormat(str, enum.Enum)
Supported on-disk formats for prediction output.

PredictionWriter

class PredictionWriter(ABC)
Abstract base for streaming prediction writers.

open

@abstractmethod
def open(num_tiles: int, embedding_dim: int) -> None
Pre-allocates storage once the embedding dim is known.
num_tiles
int
required
Total number of tiles to be written.
embedding_dim
int
required
Dimensionality of each prediction vector.

write

@abstractmethod
def write(response: ModelResponse) -> None
Writes a single tile’s model response.
response
ModelResponse
required
The model response to persist.

set_thumbnail

@abstractmethod
def set_thumbnail(thumbnail: NDArray) -> None
Stores a slide thumbnail image.
thumbnail
NDArray
required
RGB image array of shape (H, W, 3).

set_metadata

@abstractmethod
def set_metadata(metadata: dict[str, Any]) -> None
Stores slide-level metadata alongside the arrays.
metadata
dict[str, Any]
required
Key-value pairs (slide_name, tile_size, etc.).

set_tissue_mask

@abstractmethod
def set_tissue_mask(mask: NDArray) -> None
Stores the tissue mask used during extraction.
mask
NDArray
required
Binary mask array of shape (H, W), dtype uint8.

set_gene_names

def set_gene_names(input_genes: list[str] | None = None,
                   output_genes: list[str] | None = None) -> None
Stores input and output gene name lists. Default implementation is a no-op. Subclasses that support gene annotation should override this.
input_genes
list[str] | None
Ordered Ensembl IDs for bulk RNA input.
output_genes
list[str] | None
Ordered Ensembl IDs for predicted output.

close

@abstractmethod
def close() -> None
Flushes and finalises the output file.

ZarrWriter

class ZarrWriter(PredictionWriter)
Streams tile predictions into a Zarr directory store.

open

def open(num_tiles: int, embedding_dim: int) -> None
Pre-allocates Zarr datasets for outputs, coordinates, and tissue ratios.
num_tiles
int
required
Total number of tiles to be written.
embedding_dim
int
required
Dimensionality of each prediction vector.

write

def write(response: ModelResponse) -> None
Writes a single tile response into the Zarr datasets.
response
ModelResponse
required
The model response to persist.

set_thumbnail

def set_thumbnail(thumbnail: NDArray) -> None
Stores a slide thumbnail as a Zarr dataset.
thumbnail
NDArray
required
RGB image array of shape (H, W, 3).

set_metadata

def set_metadata(metadata: dict[str, Any]) -> None
Stores metadata as Zarr root attributes.
metadata
dict[str, Any]
required
Key-value pairs (slide_name, tile_size, etc.).

set_tissue_mask

def set_tissue_mask(mask: NDArray) -> None
Stores the tissue mask as a Zarr dataset.
mask
NDArray
required
Binary mask array of shape (H, W), dtype uint8.

set_gene_names

def set_gene_names(input_genes: list[str] | None = None,
                   output_genes: list[str] | None = None) -> None
Stores gene name arrays as Zarr string datasets.
input_genes
list[str] | None
Ordered Ensembl IDs for bulk RNA input.
output_genes
list[str] | None
Ordered Ensembl IDs for predicted output.

close

def close() -> None
Logs output path (Zarr stores are flushed on write).

HDF5Writer

class HDF5Writer(PredictionWriter)
Streams tile predictions into an HDF5 file.

open

def open(num_tiles: int, embedding_dim: int) -> None
Opens the HDF5 file and pre-allocates datasets.
num_tiles
int
required
Total number of tiles to be written.
embedding_dim
int
required
Dimensionality of each prediction vector.

write

def write(response: ModelResponse) -> None
Writes a single tile response into the HDF5 datasets.
response
ModelResponse
required
The model response to persist.

set_thumbnail

def set_thumbnail(thumbnail: NDArray) -> None
Stores a slide thumbnail as an HDF5 dataset.
thumbnail
NDArray
required
RGB image array of shape (H, W, 3).

set_metadata

def set_metadata(metadata: dict[str, Any]) -> None
Stores metadata as HDF5 file-level attributes.
metadata
dict[str, Any]
required
Key-value pairs (slide_name, tile_size, etc.).

set_tissue_mask

def set_tissue_mask(mask: NDArray) -> None
Stores the tissue mask as an HDF5 dataset.
mask
NDArray
required
Binary mask array of shape (H, W), dtype uint8.

set_gene_names

def set_gene_names(input_genes: list[str] | None = None,
                   output_genes: list[str] | None = None) -> None
Stores gene name arrays as HDF5 string datasets.
input_genes
list[str] | None
Ordered Ensembl IDs for bulk RNA input.
output_genes
list[str] | None
Ordered Ensembl IDs for predicted output.

close

def close() -> None
Closes the HDF5 file handle and flushes to disk.

NPZWriter

class NPZWriter(PredictionWriter)
Accumulates tile predictions in memory, saves as compressed npz.

open

def open(num_tiles: int, embedding_dim: int) -> None
Allocates in-memory arrays for accumulating results.
num_tiles
int
required
Total number of tiles to be written.
embedding_dim
int
required
Dimensionality of each prediction vector.

write

def write(response: ModelResponse) -> None
Writes a single tile response into the in-memory arrays.
response
ModelResponse
required
The model response to persist.

set_thumbnail

def set_thumbnail(thumbnail: NDArray) -> None
Stores the slide thumbnail for later NPZ serialization.
thumbnail
NDArray
required
RGB image array of shape (H, W, 3).

set_metadata

def set_metadata(metadata: dict[str, Any]) -> None
Stores metadata for later NPZ serialization.
metadata
dict[str, Any]
required
Key-value pairs (slide_name, tile_size, etc.).

set_tissue_mask

def set_tissue_mask(mask: NDArray) -> None
Stores the tissue mask for later NPZ serialization.
mask
NDArray
required
Binary mask array of shape (H, W), dtype uint8.

set_gene_names

def set_gene_names(input_genes: list[str] | None = None,
                   output_genes: list[str] | None = None) -> None
Stores gene name lists for later NPZ serialization.
input_genes
list[str] | None
Ordered Ensembl IDs for bulk RNA input.
output_genes
list[str] | None
Ordered Ensembl IDs for predicted output.

close

def close() -> None
Flushes all accumulated arrays to a compressed .npz file.

create_writer

def create_writer(fmt: OutputFormat, path: Path) -> PredictionWriter
Creates a writer for the requested format.
fmt
OutputFormat
required
Output format.
path
Path
required
Destination file or directory path.
returns
PredictionWriter
An initialised (but not yet opened) PredictionWriter.