airtable

upsert_image #

upsert_image(at_api, ds_name, image_name, image_path, image_title, image_type, institution='HHMI / Janelia Research Campus', challenge=False)

Upsert a record to airtable image table.

Parameters:

Name	Type	Description	Default
`image_table`	`Table`	image airtable object to create references.	required
`ds_name`	`str`	name of the dataset.	required
`image_name`	`str`	name of the image to upsert.	required
`image_path`	`str`	image location.	required
`image_title`	`str`	image title on openorganelle.com.	required
`image_type`	`Literal['human_segmentation', 'em']`	image type	required
`collection_table`	`Table`	collation airtable object to create references.	required
`fibsem_table`	`Table`	fibsem_imaging airtable object to create references.	required
`annotation_table`	`Table`	annotation airtable object to create references.	required

Raises:

Type	Description
`ValueError`	raise value error if multiple records with the same location and name are found in the image table.

Source code in src/cellmap_utils/airtable/upsert/image.py

def upsert_image(
    at_api: api,
    ds_name: str,
    image_name: str,
    image_path: str,
    image_title: str,
    image_type: Literal["human_segmentation", "em"],
    institution: str = "HHMI / Janelia Research Campus",
    challenge : bool = False,
):
    """Upsert a record to airtable image table.

    Args:
        image_table (api.table.Table): image airtable object to create references.
        ds_name (str): name of the dataset.
        image_name (str): name of the image to upsert.
        image_path (str): image location.
        image_title (str): image title on openorganelle.com.
        image_type (Literal[&#39;human_segmentation&#39;, &#39;em&#39;]): image type
        collection_table (api.table.Table): collation airtable object to create references.
        fibsem_table (api.table.Table): fibsem_imaging airtable object to create references.
        annotation_table (api.table.Table): annotation airtable object to create references.

    Raises:
        ValueError: raise value error if multiple records with the same location and name are found in the image table.
    """

    image_table = at_api.table(
        os.environ["AIRTABLE_BASE_ID"], os.environ["IMAGE_TABLE_ID"]
    )
    collection_table = at_api.table(
        os.environ["AIRTABLE_BASE_ID"], os.environ["COLLECTION_TABLE_ID"]
    )
    fibsem_table = at_api.table(
        os.environ["AIRTABLE_BASE_ID"], os.environ["FIBSEM_TABLE_ID"]
    )
    annotation_table = at_api.table(
        os.environ["AIRTABLE_BASE_ID"], os.environ["ANNOTATION_TABLE_ID"]
    )
    institution_table = at_api.table(
        os.environ["AIRTABLE_BASE_ID"], os.environ["INSTITUTION_TABLE_ID"]
    )

    existing_records = image_table.all(
        formula=match({"name": image_name, "location": image_path.rstrip("/")})
    )

    if image_type in ["human_segmentation", "ml_segmentation"]:
        value_type = "label"
    else:
        value_type = "scalar"

    input_zarr = read(image_path.rstrip("/"))
    if isinstance(input_zarr, zarr.Group):
        zg = input_zarr
        z_arr_name = "s0"
    else:
        zg_path, z_arr_name = os.path.split(image_path.rstrip("/"))
        zg = read(zg_path)

    scale = zg.attrs["multiscales"][0]["datasets"][0]["coordinateTransformations"][0][
        "scale"
    ]
    offset = zg.attrs["multiscales"][0]["datasets"][0]["coordinateTransformations"][1][
        "translation"
    ]
    shape = zg[z_arr_name].shape

    try:
        fibsem_imaging = [fibsem_table.all(formula=match({"name": ds_name}))[0]["id"]]
    except:
        fibsem_imaging = []

    try:
        annotation = [
            annotation_table.all(formula=match({"name": image_name}))[0]["id"]
        ]
    except:
        annotation = []

    record_to_upsert = {
        "name": image_name,
        "collection": [collection_table.all(formula=match({"id": ds_name}))[0]["id"]],
        "location": image_path.rstrip("/"),
        "format": "zarr",
        "title": image_title,
        "institution": [
            institution_table.all(formula=match({"name": institution}))[0]["id"]
        ],
        "image_type": image_type,
        "value_type": value_type,
        "size_x_pix": shape[2],
        "size_y_pix": shape[1],
        "size_z_pix": shape[0],
        "resolution_x_nm": scale[2],
        "resolution_y_nm": scale[1],
        "resolution_z_nm": scale[0],
        "offset_x_nm": offset[2],
        "offset_y_nm": offset[1],
        "offset_z_nm": offset[0],
        "fibsem_imaging": fibsem_imaging,
        "challenge" : challenge, 
        "annotation": annotation,
    }

    if len(existing_records) > 2:
        raise ValueError("Multiple records with matching input image name found")

    if not existing_records:
        image_table.create(record_to_upsert)
    elif len(existing_records) == 1:
        image_table.update(existing_records[0]["id"], record_to_upsert)

upsert_record_scene_tables #

upsert_record_scene_tables(scene_table, scene_to_image_table, image_table, image_location, scene_data={}, scene_to_image_data={})

This method upserts records into scene and scene_to_image table at the same time.

Parameters:

Name	Type	Description	Default
`scene_table`	`Table`	output table to upsert	required
`scene_to_image_table`	`Table`	output table to upsert	required
`image_table`	`Table`	input table from where the record is being taken	required
`image_location`	`str`	provides value for location filter paramater for the image record	required
`scene_update`	`dict`	custom data to upsert into "scene" table	required
`scene_to_image_update`	`dict`	custom data to upsert in "scene_to_image" table	required

Returns:

Type	Description
`Tuple[dict]`	Tuple[dict]: output records that were upserted

Source code in src/cellmap_utils/airtable/upsert/scene.py

def upsert_record_scene_tables(
    scene_table: api.table.Table,
    scene_to_image_table: api.table.Table,
    image_table: api.table.Table,
    image_location: str,
    scene_data: dict = {},
    scene_to_image_data: dict = {},
) -> Tuple[dict]:
    """This method upserts records into scene and scene_to_image table at the same time.

    Args:
        scene_table (pyairtable.api.table.Table): output table to upsert
        scene_to_image_table (pyairtable.api.table.Table): output table to upsert
        image_table (pyairtable.api.table.Table): input table from where the record is being taken
        image_location (str): provides value for location filter paramater for the image record
        scene_update (dict): custom data to upsert into "scene" table
        scene_to_image_update (dict): custom data to upsert in "scene_to_image" table

    Returns:
        Tuple[dict]: output records that were upserted
    """

    # fetch a record from image table:
    image_records = image_table.all(formula=match({"location": image_location}))
    if len(image_records) > 1:
        raise ValueError(
            "Multiple images with the same path are found in airtable image table. Only one record should exist!"
        )
    else:
        image_airt = image_records[0]

    scene_to_image_ids = {}
    scene_ids = {}

    scene_to_image_insert = {
        "image": [image_airt["id"]],
        "contrast_start": 0,
        "contrast_stop": 255,
        "color": "white",
    }
    # update if input data for an upsert record exists
    scene_to_image_insert = {
        key: scene_to_image_data.get(key, val)
        for key, val in scene_to_image_insert.items()
    }

    location = image_airt["fields"]["location"]
    print(location)

    # define whether update or create a record in the scene_to_image table by looking at the value from 'location_web' column
    existing_record_scene_to_image = scene_to_image_table.first(
        formula=match({"location_web": location})
    )

    # add records to scene_to_image table
    if existing_record_scene_to_image == None:
        scene_to_image_ids[location] = scene_to_image_table.create(
            scene_to_image_insert
        )
    else:
        scene_to_image_ids[location] = scene_to_image_table.update(
            existing_record_scene_to_image["id"], scene_to_image_insert
        )

    # add record to scene table
    # define whether update or create a record in the scene table by looking at the value from 'scene_to_image' column
    if existing_record_scene_to_image:
        existing_record_scene = scene_table.first(
            formula=match(
                {"scene_to_image": existing_record_scene_to_image["fields"]["name"]}
            )
        )
    else:
        existing_record_scene = None

    scene_insert = {
        "name": "Default view",
        "scene_to_image": [scene_to_image_ids[location]["id"]],
        "description": "The default view of the data.",
        "crossection_scale": 10.0,
        "projection_scale": 1000.0,
    }
    # update if input data for an upsert record exists
    scene_insert = {key: scene_data.get(key, val) for key, val in scene_insert.items()}

    if existing_record_scene == None:
        scene_ids[location] = scene_table.create(scene_insert)
    else:
        scene_ids[location] = scene_table.update(
            existing_record_scene["id"], scene_insert
        )

    return (scene_to_image_ids, scene_ids)

get_image_record #

get_image_record(image_path, ds_name, at_api)

Read an image record from airtable and return supabase image, dataset, sample, and acquisition records.

Parameters:

Name	Type	Description	Default
`image_path`	`str`	path to dataset used as a filter parameter for airtable image table records	required
`at_api`	`api`	airtable api instance	required

Returns:

Name	Type	Description
`dict`		returns a dictionary of records, with pydantic models for supabase table records as values.

Source code in src/cellmap_utils/airtable/supabase/air_to_supabase.py

def get_image_record(image_path: str, ds_name: str, at_api: api):
    """Read an image record from airtable and return supabase image, dataset, sample, and acquisition records.

    Args:
        image_path (str): path to dataset used as a filter parameter for airtable image table records
        at_api (api): airtable api instance

    Returns:
        dict: returns a dictionary of records, with pydantic models for supabase table records as values.
    """

    # image table (switch between prod and test bases in .env)
    image_table = at_api.table(
        os.environ["AIRTABLE_BASE_ID"], os.environ["IMAGE_TABLE_ID"]
    )
    fibsem_table = at_api.table(
        os.environ["AIRTABLE_BASE_ID"], os.environ["FIBSEM_TABLE_ID"]
    )
    institution_table = at_api.table(
        os.environ["AIRTABLE_BASE_ID"], os.environ["INSTITUTION_TABLE_ID"]
    )

    image_record = image_table.all(formula=match({"location": image_path.rstrip("/")}))[0]

    name = image_record["fields"]["name"]
    display_settings = {
        "invertLUT": False,
        "contrastLimits": {"end": 255, "max": 255, "min": 0, "start": 0},
    }

    content_type_mapping = {
        "em": "em",
        "ml_segmentation": "segmentation",
        "human_segmentation": "segmentation",
    }
    content_type = content_type_mapping[image_record["fields"]["image_type"]]

    source = None
    try:

        fibsem_record = fibsem_table.get(image_record["fields"]["fibsem_imaging"][0])
        if image_record["fields"]["image_type"] == "em":
            display_settings["color"] = "white"

            format_string = "%Y-%m-%d"
            source = {
                "bias_V": fibsem_record["fields"]["bias_v"],
                "scan_hz": fibsem_record["fields"]["scan_rate_mhz"],
                "current_nA": fibsem_record["fields"]["imaging_current_nA"],
                "duration_days": (
                    datetime.strptime(
                        fibsem_record["fields"]["stop_date"], format_string
                    ).date()
                    - datetime.strptime(
                        fibsem_record["fields"]["start_date"], format_string
                    ).date()
                ).days,
                "landing_energy_eV": fibsem_record["fields"]["primary_energy_ev"],
            }
    except:
        print("NO FIBSEM RECORD", ds_name)

    if image_record["fields"]["image_type"] == "em":
        pub_name = "Reconstructed FIB-SEM data"
    else:
        pub_name = "Segmentations"

    print(image_record)
    supa_image = SupaImageModel(
        source=source,
        name=name,
        url=image_path,
        description=image_record["fields"]["title"],
        format="zarr",
        display_settings=display_settings,
        sample_type=image_record["fields"]["value_type"],
        content_type=content_type,
        dataset_name=ds_name,
        institution=institution_table.get( image_record['fields']['institution'][0])['fields']['name'],
        grid_dims=["z", "y", "x"],
        grid_scale=[
            image_record["fields"][f"resolution_{_}_nm"] for _ in ["z", "y", "x"]
        ],
        grid_translation=[0.0, 0.0, 0.0],
        # [
        #     image_record["fields"][f"offset_{_}_nm"] for _ in ["z", "y", "x"]
        # ],
        grid_units=["nm", "nm", "nm"],
        grid_index_order="C",
        stage="dev",
        image_stack=f"{ds_name}_groundtruth",
        #doi={"url": image_record["fields"]["doi_link_dataset"][0], "name": pub_name},
    )

    return supa_image

filter_records #

filter_records(records, filter_field, filter_value)

Custom method to filter records by a field value. all(formula=match({})) doesn't work when filtering by reference.

Parameters:

Name	Type	Description	Default
`records`	`dict)`	list of all records in a table. Not using table.all() within a method, since it slow to fetch all the records every time when a method is called.	required
`field`	`str`	table field name	required
`field_value`	`list[str] \| str`	table field value	required

Returns:

Type	Description
	list[dict]: return records with filter_value for filter_field field

Source code in src/cellmap_utils/airtable/filter/filter.py

def filter_records(records, filter_field: str, filter_value: list[str] | str):
    """Custom method to filter records by a field value. all(formula=match({})) doesn't work when filtering by reference.

    Args:
        records (dict) : list of all records in a table. Not using table.all() within a method, since it slow to fetch all the records every time when a method is called.
        field (str): table field name
        field_value (list[str] | str): table field value

    Returns:
        list[dict]: return records with filter_value for filter_field field
    """

    matches = []
    for record in records:
        try:
            if isinstance(record["fields"][filter_field], list):
                cond = filter_value in record["fields"][filter_field]
            else:
                cond = record["fields"][filter_field] == filter_value
            if cond:
                matches.append(record)
        except:
            pass
    return matches