Skip to content

read_gdf

cellseg_gsontools.read_gdf(fname, silence_warnigns=True)

Read a file into a geodataframe.

This is a wrapper around geopandas I/O that adds some extra functionality.

Note

Allowed formats:

  • .json,
  • .geojson,
  • .feather,
  • .parquet

Parameters:

Name Type Description Default
fname Union[Path, str]

The filename of the gson file.

required
silence_warnigns bool

Whether to silence warnings, by default True.

True

Raises:

Type Description
ValueError

If suffix is not one of ".json", ".geojson", ".feather", ".parquet".

Returns:

Type Description
GeoDataFrame

gpd.GeoDataFrame: The geodataframe.

Examples:

Read a geojson file that is QuPath-readable.

>>> from cellseg_gsontools.utils import read_gdf
>>> gdf = read_gdf("path/to/file.json")
Source code in cellseg_gsontools/utils.py
def read_gdf(
    fname: Union[Path, str],
    silence_warnigns: bool = True,
) -> gpd.GeoDataFrame:
    """Read a file into a geodataframe.

    This is a wrapper around `geopandas` I/O that adds some extra
    functionality.

    Note:
        Allowed formats:

        - `.json`,
        - `.geojson`,
        - `.feather`,
        - `.parquet`

    Parameters:
        fname (Union[Path, str]):
            The filename of the gson file.
        silence_warnigns (bool):
            Whether to silence warnings, by default True.

    Raises:
        ValueError:
            If suffix is not one of ".json", ".geojson", ".feather", ".parquet".

    Returns:
        gpd.GeoDataFrame:
            The geodataframe.

    Examples:
        Read a geojson file that is QuPath-readable.
        >>> from cellseg_gsontools.utils import read_gdf
        >>> gdf = read_gdf("path/to/file.json")
    """
    fname = Path(fname)
    format = fname.suffix
    allowed_formats = (".json", ".geojson", ".feather", ".parquet")
    if format not in allowed_formats:
        raise ValueError(
            f"Illegal `format`. Got: {format}. Allowed: {allowed_formats}."
        )

    if format == ".json":
        df = pd.read_json(fname)
    elif format == ".geojson":
        try:
            df = gpd.read_file(fname)
        except Exception:
            df = pd.read_json(fname)
            df = _set_gdf(df)
    elif format == ".feather":
        df = gpd.read_feather(fname)
    elif format == ".parquet":
        df = gpd.read_parquet(fname)

    if df.empty:
        if not silence_warnigns:
            warnings.warn(f"Empty geojson file: {fname.name}. Returning empty gdf.")
        return df

    property_col = "properties" if "properties" in df.columns else "classification"

    if "class_name" not in df.columns:
        try:
            df["class_name"] = gdf_apply(df, _get_class, columns=[property_col])
        except Exception:
            if not silence_warnigns:
                warnings.warn(
                    f"Could not find 'name' key in {property_col} column."
                    "Can't set the `class_name` column to the output gdf."
                )

    if "class_probs" not in df.columns:
        try:
            df["class_probs"] = gdf_apply(df, _get_prob, columns=[property_col])
        except Exception:
            if not silence_warnigns:
                warnings.warn(
                    f"Could not find 'probabilities' key in {property_col} column. "
                    "Can't set the `class_probs` column to the output gdf."
                )

    df["geometry"] = gdf_apply(df, shapely.geometry.shape, columns=["geometry"])
    return gpd.GeoDataFrame(df).set_geometry("geometry")