Skip to content

zarr

insert_omero_metadata #

insert_omero_metadata(src, window_max=None, window_min=None, window_start=None, window_end=None, id=None, name=None)

Insert or update missing omero transitional metadata into .zattrs metadata of parent group for the input zarr array.

Parameters:

Name Type Description Default
src str

Path to Zarr array.

required
window_max int

Max view window value. Defaults to None.

None
window_min int

Min view window value. Defaults to None.

None
window_start int

Contrast min value. Defaults to None.

None
window_end int

Contrast max value. Defaults to None.

None
id int

Defaults to None.

None
name str

Name of the dataset. Defaults to None.

None
Source code in src/cellmap_utils/zarr/metadata.py
def insert_omero_metadata(
    src: str,
    window_max: int = None,
    window_min: int = None,
    window_start: int = None,
    window_end: int = None,
    id: int = None,
    name: str = None,
):
    """
    Insert or update missing omero transitional metadata into .zattrs metadata of parent group for the input zarr array.


    Args:
        src (str): Path to Zarr array.
        window_max (int, optional): Max view window value. Defaults to None.
        window_min (int, optional): Min view window value. Defaults to None.
        window_start (int, optional): Contrast min value. Defaults to None.
        window_end (int, optional): Contrast max value. Defaults to None.
        id (int, optional): Defaults to None.
        name (str, optional): Name of the dataset. Defaults to None.
    """

    store_path, zarr_path = separate_store_path(src, "")

    z_store = zarr.NestedDirectoryStore(store_path)
    z_arr = zarr.open(store=z_store, path=zarr_path, mode="a")

    parent_group = access_parent(z_arr)

    if window_max == None:
        window_max = numpy.iinfo(z_arr.dtype).max
    if window_min == None:
        window_min = numpy.iinfo(z_arr.dtype).min

    omero = dict()
    omero["id"] = 1 if id == None else id
    omero["name"] = (
        os.path.basename(z_store.path.rstrip("/")).split(".")[0]
        if name == None
        else name
    )
    omero["version"] = "0.4"
    omero["channels"] = [
        {
            "active": True,
            "coefficient": 1,
            "color": "FFFFFF",
            "inverted": False,
            "label": parent_group.path.split("/")[-1],
            "window": {
                "end": window_max if window_end == None else window_end,
                "max": window_max,
                "min": window_min,
                "start": window_min if window_start == None else window_start,
            },
        }
    ]
    omero["rdefs"] = {
        "defaultT": 0,
        "defaultZ": int(z_arr.shape[0] / 2),
        "model": "greyscale",
    }
    parent_group.attrs["omero"] = omero

get_single_scale_metadata #

get_single_scale_metadata(ds_name, voxel_size, translation, name, units='nanometer', axes=['z', 'y', 'x'])

Returns multiscales ngff metadata with a single level.

Parameters:

Name Type Description Default
ds_name str

name of the dataset that contains the data.

required
voxel_size list[float]

scale. Example: [1.0, 1.0, 1.0]

required
translation list[float]

offset. Example: [0.0, 0.0, 0.0]

required
name str

Name of a multiscale set.

required
units str

Physical units. Defaults to 'nanometer'.

'nanometer'
axes list[str]

Axes labeling. Defaults to ['z', 'y', 'x'].

['z', 'y', 'x']

Returns:

Name Type Description
_type_

description

Source code in src/cellmap_utils/zarr/metadata.py
def get_single_scale_metadata(
    ds_name: str,
    voxel_size: list[float],
    translation: list[float],
    name: str,
    units: str = "nanometer",
    axes: list[str] = ["z", "y", "x"],
):
    """Returns multiscales ngff metadata with a single level.

    Args:
        ds_name (str): name of the dataset that contains the data.
        voxel_size (list[float]): scale. Example: [1.0, 1.0, 1.0]
        translation (list[float]): offset. Example: [0.0, 0.0, 0.0]
        name (str): Name of a multiscale set.
        units (str, optional): Physical units. Defaults to 'nanometer'.
        axes (list[str], optional): Axes labeling. Defaults to ['z', 'y', 'x'].

    Returns:
        _type_: _description_
    """
    z_attrs: dict = {"multiscales": [{}]}
    z_attrs["multiscales"][0]["axes"] = [
        {"name": axis, "type": "space", "unit": units} for axis in axes
    ]
    z_attrs["multiscales"][0]["coordinateTransformations"] = [
        {"scale": [1.0, 1.0, 1.0], "type": "scale"}
    ]
    z_attrs["multiscales"][0]["datasets"] = [
        {
            "coordinateTransformations": [
                {"scale": [float(item) for item in voxel_size], "type": "scale"},
                {"translation": translation, "type": "translation"},
            ],
            "path": ds_name,
        }
    ]

    z_attrs["multiscales"][0]["name"] = name
    z_attrs["multiscales"][0]["version"] = "0.4"

    return z_attrs

get_multiscale_metadata #

get_multiscale_metadata(voxel_size, translation, levels, units='nanometer', axes=['z', 'y', 'x'], name='')

Generates a multiscale metadata from specified voxel size, offset and multi-scale pyramid levels.

Parameters:

Name Type Description Default
voxel_size list[float]

physical size of the voxel

required
translation list[float]

physical translation of the center of the voxel.

required
levels int

how many levels are present in the multis-scale pyramid.

required
units str

Physical units. Defaults to 'nanometer'.

'nanometer'
axes list[str]

Axis order. Defaults to ['z', 'y', 'x'].

['z', 'y', 'x']
name str

Name of the dataset that would utilize multi-scale metadata. Defaults to ''.

''

Returns:

Name Type Description
_type_

description

Source code in src/cellmap_utils/zarr/metadata.py
def get_multiscale_metadata(
    voxel_size: list[float],
    translation: list[float],
    levels: int,
    units: str = "nanometer",
    axes: list[str] = ["z", "y", "x"],
    name: str = "",
):
    """Generates a multiscale metadata from specified voxel size, offset and multi-scale pyramid levels.

    Args:
        voxel_size (list[float]): physical size of the voxel
        translation (list[float]): physical translation of the center of the voxel.
        levels (int): how many levels are present in the multis-scale pyramid.
        units (str, optional): Physical units. Defaults to 'nanometer'.
        axes (list[str], optional): Axis order. Defaults to ['z', 'y', 'x'].
        name (str, optional): Name of the dataset that would utilize multi-scale metadata. Defaults to ''.

    Returns:
        _type_: _description_
    """

    multsc = get_single_scale_metadata("s0", voxel_size, translation, name, units, axes)

    z_attrs = multsc
    base_scale = z_attrs["multiscales"][0]["datasets"][0]["coordinateTransformations"][
        0
    ]["scale"]
    base_trans = z_attrs["multiscales"][0]["datasets"][0]["coordinateTransformations"][
        1
    ]["translation"]
    num_levels = levels
    for level in range(1, num_levels + 1):
        # print(f'{level=}')

        sn = [float(dim * pow(2, level)) for dim in base_scale]
        trn = [
            (dim * (pow(2, level - 1) - 0.5)) + tr
            for (dim, tr) in zip(base_scale, base_trans)
        ]

        z_attrs["multiscales"][0]["datasets"].append(
            {
                "coordinateTransformations": [
                    {"type": "scale", "scale": sn},
                    {"type": "translation", "translation": trn},
                ],
                "path": f"s{level}",
            }
        )

    return z_attrs

ome_ngff_only #

ome_ngff_only(zg)

Delete all attrs from .zattrs that are not part of the OME-NGFF Zarr spec and CellMap metadata.

Parameters:

Name Type Description Default
zg Group

zarr group that contains multiscale metadata.

required
Source code in src/cellmap_utils/zarr/metadata.py
def ome_ngff_only(zg: zarr.Group):
    """Delete all attrs from .zattrs that are not part of the OME-NGFF Zarr spec and CellMap metadata.

    Args:
        zg (zarr.Group): zarr group that contains multiscale metadata.
    """
    to_keep = [
        "multiscales",
        "cellmap",
        "omero",
        "bioformats2raw.layout",
        "labels",
        "well",
        "plate",
    ]
    to_delete_attrs = [attr for attr in list(zg.attrs) if attr not in to_keep]

    for attr_name in to_delete_attrs:
        zg.attrs.__delitem__(attr_name)

round_decimals #

round_decimals(group, decimals)

Round scale and translation metadata

Parameters:

Name Type Description Default
group Group

zarr group with ome-zarr metadata

required
decimals int

number of decimals to round

required
Source code in src/cellmap_utils/zarr/metadata.py
def round_decimals(group : zarr.Group, decimals : int):
    """Round scale and translation metadata

    Args:
        group (zarr.Group): zarr group with ome-zarr metadata
        decimals (int): number of decimals to round
    """
    z_attrs = dict()
    z_attrs['multiscales'] = group.attrs['multiscales']

    # multiscale levels
    ms_levels = z_attrs['multiscales'][0]['datasets']
    for level in ms_levels:
        scale = level['coordinateTransformations'][0]['scale']
        translation = level['coordinateTransformations'][1]['translation']
        level['coordinateTransformations'][0]['scale'] = [round(sc, decimals) for sc in scale]
        level['coordinateTransformations'][1]['translation'] = [round(tr, decimals) for tr in translation]
    group.attrs['multiscales'] = z_attrs['multiscales']

access_parent #

access_parent(node)

Get the parent (zarr.Group) of an input zarr array(ds).

Parameters:

Name Type Description Default
node Array or Group

description

required

Raises:

Type Description
RuntimeError

returned if the node array is in the parent group,

Returns:

Type Description

zarr.hierarchy.Group : parent group that contains input group/array

Source code in src/cellmap_utils/zarr/node.py
def access_parent(node: zarr.Group | zarr.Array):
    """
    Get the parent (zarr.Group) of an input zarr array(ds).


    Args:
        node (zarr.core.Array or zarr.hierarchy.Group): _description_

    Raises:
        RuntimeError: returned if the node array is in the parent group,
        or the group itself is the root group

    Returns:
        zarr.hierarchy.Group : parent group that contains input group/array
    """

    store_path, node_path = separate_store_path(node.store.path, node.path)
    if node_path == "":
        raise RuntimeError(
            f"{node.name} is in the root group of the {node.store.path} store."
        )
    else:
        return zarr.open(store=store_path, path=os.path.split(node_path)[0], mode="a")

repair_zarr_branch #

repair_zarr_branch(input_zarr_path)

A recursive methond that adds missing .zgroup file in any parent zarr group between input zarr group and root of the zarr container.

Parameters:

Name Type Description Default
input_zarr_path str

description

required
Source code in src/cellmap_utils/zarr/node.py
def repair_zarr_branch(input_zarr_path: str):
    """A recursive methond that adds missing .zgroup file in any parent zarr group
       between input zarr group and root of the zarr container.

    Args:
        input_zarr_path (str): _description_
    """
    try:
        zarr_path = input_zarr_path.rstrip("/ ")  # remove unnecessary '/' and ' '
        fs = get_file_system(zarr_path)
        fs.exists(zarr_path)
    except:
        sys.exit("Path not found!")

    z_store, z_path = zarr_path.split(".zarr")

    try:
        read(zarr_path)
    except:
        print("not found, added .zgroup to: ", zarr_path)
        with fs.open(UPath(os.path.join(zarr_path, ".zgroup")), mode="w") as f:
            f.write(str({"zarr_format": 2}).replace("'", '"'))

    if z_path.lstrip("/ ").rstrip("/ ") != "":
        repair_zarr_branch(
            os.path.join(f"{z_store}.zarr", os.path.split(z_path)[0].lstrip("/"))
        )

recalibrate_offset #

recalibrate_offset(roi, grid_spacing)

The offset of the roi at multiscale level with scale=grid_spacing must be divisible by grid_spacing. This method would recalibrate offset, if roi grid does not align with grid {scale : grid_spacing, translation : [0.0, 0.0, 0.0]}

Parameters:

Name Type Description Default
roi Group

roi zarr group with multiscale pyramid

required
grid_spacing list[float]

grid spacing, with assumption that translation=[0.0, 0.0, 0.0]

required

Returns:

Type Description
Tuple[list[float], list[float]]

Tuple[list[float], list[float]]: returns (ROI s0 scale, recalibrated offset)

Source code in src/cellmap_utils/zarr/roi.py
def recalibrate_offset(roi: zarr.Group, grid_spacing : list[float]) -> Tuple[list[float], list[float]]:
    """The offset of the roi at multiscale level with scale=grid_spacing must be divisible by grid_spacing.
        This method would recalibrate offset, if roi grid does not align with grid {scale : grid_spacing, translation : [0.0, 0.0, 0.0]}  

    Args:
        roi (zarr.Group): roi zarr group with multiscale pyramid 
        grid_spacing (list[float]): grid spacing, with assumption that translation=[0.0, 0.0, 0.0]

    Returns:
        Tuple[list[float], list[float]]: returns (ROI s0 scale, recalibrated offset)
    """

    roi_scale, roi_offset = get_s0_level(roi)

    # calculate log2(roi s0 level/grid_spacing), for transforming it to the dataset 
    from math import log2
    roi_level = [log2(float(roi_sc)/float(ds_sc)) for roi_sc, ds_sc in zip(roi_scale, grid_spacing)] 

    # calculate roi translation as if it was rescaled to scale=grid_spacing
    roi_tr_at_grid_spacing = [
            tr_n - sc_n *(0.5 - pow(2, -(l_n+1)))
            for (sc_n, tr_n, l_n) in zip(roi_scale, roi_offset, roi_level)
        ]

    # shift roi offset to align with grid spacing  
    tr_roi_at_s0_correct = [round(float(tr)/float(sc))*sc for sc, tr in zip(grid_spacing, roi_tr_at_grid_spacing)]
    tr_roi_sn_correct = [
        round((sc * (pow(2, level - 1) - 0.5)) + tr, 2)
        for (sc, tr, level) in zip(grid_spacing, tr_roi_at_s0_correct, roi_level)
    ]

    return {'scale': roi_scale, 'translation' : tr_roi_sn_correct}

separate_store_path #

separate_store_path(store, path)

sometimes you can pass a total os path to node, leading to an empty('') node.path attribute. the correct way is to separate path to container(.n5, .zarr) from path to array within a container.

Parameters:

Name Type Description Default
store string

path to store

required
path string

path array/group (.n5 or .zarr)

required

Returns:

Type Description
(string, string)

returns regularized store and group/array path

Source code in src/cellmap_utils/zarr/store.py
def separate_store_path(store, path):
    """
    sometimes you can pass a total os path to node, leading to
    an empty('') node.path attribute.
    the correct way is to separate path to container(.n5, .zarr)
    from path to array within a container.

    Args:
        store (string): path to store
        path (string): path array/group (.n5 or .zarr)

    Returns:
        (string, string): returns regularized store and group/array path
    """
    new_store, path_prefix = os.path.split(store)
    if ".zarr" in path_prefix:
        return store, path
    return separate_store_path(new_store, os.path.join(path_prefix, path))

validate_ome #

validate_ome(zg)

thin wrapper method for ngff_zarr.validate.

Parameters:

Name Type Description Default
zg Group

the input zarr group with a json schema to validate.

required
Source code in src/cellmap_utils/zarr/validate.py
def validate_ome(zg : zarr.Group):
    """thin wrapper method for ngff_zarr.validate. 

    Args:
        zg (zarr.Group): the input zarr group with a json schema to validate.
    """
    nz.validate(ngff_dict = dict(zg.attrs), version='0.4', model='image', strict=False)