Skip to content

local_distances

cellseg_gsontools.character.local_distances(gdf, spatial_weights, id_col=None, reductions=('mean'), weight_by_area=False, invert=False, parallel=True, num_processes=-1, rm_nhood_cols=True, col_prefix=None, create_copy=True)

Compute the local sum/mean/median/min/max/std distance of the neighborhood distances for each geometry object in a gdf.

Note

Option to weight the nhood values by their area before reductions.

Parameters:

Name Type Description Default
gdf GeoDataFrame

The input GeoDataFrame.

required
spatial_weights W

Libpysal spatial weights object.

required
id_col str

The unique id column in the gdf. If None, this uses set_uid to set it. Defaults to None.

None
reductions Tuple[str, ...]

A list of reduction methods for the neighborhood. One of "sum", "mean", "median", "min", "max", "std". Defaults to ("sum", ).

('mean')
weight_by_area bool

Flag whether to weight the neighborhood values by the area of the object. Defaults to False.

False
invert bool

Flag whether to invert the distances. Defaults to False.

False
parallel bool

Flag whether to use parallel apply operations when computing the character. Defaults to True.

True
num_processes int, default=-1

The number of processes to use when parallel=True. If -1, this will use all available cores.

-1
rm_nhood_cols bool

Flag, whether to remove the extra neighborhood columns from the result gdf. Defaults to True.

True
col_prefix str

Prefix for the new column names.

None
create_copy bool

Flag whether to create a copy of the input gdf and return that. Defaults to True.

True

Returns:

Type Description
GeoDataFrame

gpd.GeoDataFrame: The input geodataframe with computed distances column added.

Examples:

Compute the mean of eccentricity values for each neighborhood

>>> from cellseg_gsontools.character import local_distances
>>> from cellseg_gsontools.graphs import fit_graph
>>> w = fit_graph(gdf, type="distband", thres=75.0)
>>> local_distances(gdf, spatial_weights=w, reduction=["mean"], weight_by_area=True)
Source code in cellseg_gsontools/character.py
def local_distances(
    gdf: gpd.GeoDataFrame,
    spatial_weights: W,
    id_col: str = None,
    reductions: Tuple[str, ...] = ("mean",),
    weight_by_area: bool = False,
    invert: bool = False,
    parallel: bool = True,
    num_processes: int = -1,
    rm_nhood_cols: bool = True,
    col_prefix: str = None,
    create_copy: bool = True,
) -> gpd.GeoDataFrame:
    """Compute the local sum/mean/median/min/max/std distance of the neighborhood
    distances for each geometry object in a gdf.

    Note:
        Option to weight the nhood values by their area before reductions.

    Parameters:
        gdf (gpd.GeoDataFrame):
            The input GeoDataFrame.
        spatial_weights (libysal.weights.W):
            Libpysal spatial weights object.
        id_col (str):
            The unique id column in the gdf. If None, this uses `set_uid` to set it.
            Defaults to None.
        reductions (Tuple[str, ...]):
            A list of reduction methods for the neighborhood. One of "sum", "mean",
            "median", "min", "max", "std". Defaults to ("sum", ).
        weight_by_area (bool):
            Flag whether to weight the neighborhood values by the area of the object.
            Defaults to False.
        invert (bool):
            Flag whether to invert the distances. Defaults to False.
        parallel (bool):
            Flag whether to use parallel apply operations when computing the character.
            Defaults to True.
        num_processes (int, default=-1):
            The number of processes to use when parallel=True. If -1,
            this will use all available cores.
        rm_nhood_cols (bool):
            Flag, whether to remove the extra neighborhood columns from the result gdf.
            Defaults to True.
        col_prefix (str):
            Prefix for the new column names.
        create_copy (bool):
            Flag whether to create a copy of the input gdf and return that.
            Defaults to True.

    Returns:
        gpd.GeoDataFrame:
            The input geodataframe with computed distances column added.

    Examples:
        Compute the mean of eccentricity values for each neighborhood
        >>> from cellseg_gsontools.character import local_distances
        >>> from cellseg_gsontools.graphs import fit_graph
        >>> w = fit_graph(gdf, type="distband", thres=75.0)
        >>> local_distances(gdf, spatial_weights=w, reduction=["mean"], weight_by_area=True)
    """
    allowed = ("sum", "mean", "median", "min", "max", "std")
    if not all(r in allowed for r in reductions):
        raise ValueError(
            f"Illegal reduction in `reductions`. Got: {reductions}. "
            f"Allowed reductions: {allowed}."
        )

    if create_copy:
        gdf = gdf.copy()

    # set uid
    if id_col is None:
        id_col = "uid"
        gdf = set_uid(gdf)

    # get the immediate node neighborhood
    func = partial(neighborhood, spatial_weights=spatial_weights)
    gdf["nhood"] = gdf_apply(
        gdf,
        func,
        columns=[id_col],
        axis=1,
        parallel=parallel,
        num_processes=num_processes,
    )

    # get areas
    area_col = None
    if weight_by_area:
        func = partial(nhood_vals, values=gdf.geometry.area)
        gdf[area_col] = gdf_apply(
            gdf,
            func,
            columns=["nhood"],
            axis=1,
            parallel=parallel,
            num_processes=num_processes,
        )

    # get distances
    func = partial(nhood_dists, centroids=gdf.centroid, invert=invert)
    gdf["nhood_dists"] = gdf_apply(
        gdf,
        func,
        columns=["nhood"],
        axis=1,
        parallel=parallel,
        num_processes=num_processes,
    )

    col_prefix = "" if col_prefix is None else col_prefix

    # loop over the reduction methods
    for r in reductions:
        columns = ["nhood_dists"]
        new_col = f"{col_prefix}nhood_dists_{r}"
        if area_col in gdf.columns:
            columns.append(area_col)
            new_col = f"{col_prefix}nhood_dists_{r}_area_weighted"

        func = partial(reduce, how=r)
        gdf[new_col] = gdf_apply(
            gdf,
            func,
            columns=columns,
            axis=1,
            parallel=parallel,
            num_processes=num_processes,
        )

    if rm_nhood_cols:
        labs = ["nhood", "nhood_dists"]
        if weight_by_area:
            labs.append(area_col)
        gdf = gdf.drop(labels=labs, axis=1)

    return gdf