Source code for towbintools.plotting.boxplots

from itertools import combinations

import bottleneck as bn
import matplotlib.axes
import matplotlib.figure
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from scipy import stats
from statannotations.Annotator import Annotator
from statannotations.stats.StatTest import StatTest
from statannotations.stats.StatTest import STATTEST_LIBRARY

from .utils_data_processing import rescale_without_flattening
from .utils_plotting import build_legend
from .utils_plotting import get_colors

STATANNOTATIONS_TESTS = STATTEST_LIBRARY.keys()
custom_test = ["Feltz-Miller", "MSLR"]


def _setup_figure(
    df: pd.DataFrame,
    figsize: tuple[float, float] | None,
    titles: list[str] | None,
) -> tuple[matplotlib.figure.Figure, matplotlib.axes.Axes | np.ndarray]:
    """
    Create a figure and axes grid sized to the number of unique ordering groups.

    Parameters:
        df (pandas.DataFrame) : Data DataFrame containing an ``"Order"`` column
            whose unique values determine the number of subplots.
        figsize (tuple[float, float] or None) : Explicit figure size.
            Defaults to ``(6 * n_groups, 10)`` when ``None``.
        titles (list[str] or None) : Subplot titles; set to ``None`` internally
            if the length does not match the number of groups.

    Returns:
        tuple[matplotlib.figure.Figure, matplotlib.axes.Axes or np.ndarray] :
            The created figure and axes (scalar or array depending on group count).
    """
    # Determine figure size
    if figsize is None:
        figsize = (6 * df["Order"].nunique(), 10)
    if titles is not None and len(titles) != df["Order"].nunique():
        print("Number of titles does not match the number of ecdysis events.")
        titles = None

    fig, ax = plt.subplots(
        1,
        df["Order"].nunique(),
        figsize=(figsize[0] + 3, figsize[1]),
        sharey=False,
        layout="constrained",
    )

    return fig, ax


[docs] def feltz_miller_asymptotic_cv_test( sample1: np.ndarray, sample2: np.ndarray ) -> tuple[float, float]: """ Perform the Feltz-Miller asymptotic test for equality of CV on two samples. Adapted from: https://github.com/benmarwick/cvequality/blob/master/R/functions.R Parameters: sample1 (array-like) : First sample values. sample2 (array-like) : Second sample values. Returns: tuple[float, float] : Test statistic ``D_AD`` and two-sided p-value. """ k = 2 n_j = [len(sample1), len(sample2)] s_j = [bn.nanstd(sample1), bn.nanstd(sample2)] x_j = [bn.nanmean(sample1), bn.nanmean(sample2)] n_j, s_j, x_j = np.array(n_j), np.array(s_j), np.array(x_j) m_j = n_j - 1 D = (np.sum(m_j * (s_j / x_j))) / np.sum(m_j) # test statistic D_AD = (np.sum(m_j * (s_j / x_j - D) ** 2)) / (D**2 * (0.5 + D**2)) # D_AD distributes as a Chi-squared distribution with k-1 degrees of freedom p_value = 1 - stats.chi2.cdf(D_AD, k - 1) return D_AD, p_value
def _LRT_STAT(n: np.ndarray, x: np.ndarray, s: np.ndarray) -> np.ndarray: """ Compute the likelihood-ratio test statistic required by ``mslr_test``. Adapted from: https://github.com/benmarwick/cvequality/blob/master/R/functions.R Parameters: n (array-like) : Sample sizes for each group. x (array-like) : Sample means for each group. s (array-like) : Sample standard deviations for each group. Returns: np.ndarray : Concatenated array ``[uh_0, ..., uh_{k-1}, tauh, stat]`` where ``uh`` are the MLE group means, ``tauh`` is the MLE CV, and ``stat`` is the log-likelihood-ratio statistic. """ n = np.asarray(n) x = np.asarray(x) s = np.asarray(s) k = len(x) df = n - 1 ssq = s**2 vsq = df * ssq / n v = np.sqrt(vsq) sn = np.sum(n) # MLES tau0 = np.sum(n * vsq / x**2) / sn iteration = 1 while True: uh = (-x + np.sqrt(x**2 + 4.0 * tau0 * (vsq + x**2))) / (2.0 * tau0) tau = np.sum(n * (vsq + (x - uh) ** 2) / uh**2) / sn if abs(tau - tau0) <= 1.0e-7 or iteration > 30: break iteration += 1 tau0 = tau tauh = np.sqrt(tau) elf = 0.0 clf = 0.0 for j in range(k): clf = ( clf - n[j] * np.log(tauh * uh[j]) - (n[j] * (vsq[j] + (x[j] - uh[j]) ** 2)) / (2.0 * tauh**2 * uh[j] ** 2) ) elf = elf - n[j] * np.log(v[j]) - n[j] / 2.0 stat = 2.0 * (elf - clf) return np.concatenate([uh, [tauh, stat]])
[docs] def mslr_test( sample1: np.ndarray, sample2: np.ndarray, nr: int = 1000 ) -> tuple[float, float]: """ Perform the Modified Signed-Likelihood Ratio Test (MSLR) for equality of CVs. Adapted from: https://github.com/benmarwick/cvequality/blob/master/R/functions.R Parameters: sample1 (array-like) : First sample values. sample2 (array-like) : Second sample values. nr (int) : Number of parametric bootstrap replicates used to calibrate the test statistic. Defaults to ``1000``. Returns: tuple[float, float] : Modified test statistic ``statm`` and two-sided p-value. """ k = 2 n = np.array([len(sample1), len(sample2)]) x = np.array([bn.nanmean(sample1), bn.nanmean(sample2)]) s = np.array([bn.nanstd(sample1), bn.nanstd(sample2)]) gv = np.zeros(nr) df = n - 1 xst0 = _LRT_STAT(n, x, s) uh0 = xst0[:k] tauh0 = xst0[k] stat0 = xst0[k + 1] sh0 = tauh0 * uh0 se0 = tauh0 * uh0 / np.sqrt(n) # PB estimates of the mean and SD of the LRT for ii in range(nr): z = np.random.normal(size=k) x_sim = uh0 + z * se0 ch = np.random.chisquare(df) s_sim = sh0 * np.sqrt(ch / df) xst = _LRT_STAT(n, x_sim, s_sim) gv[ii] = xst[k + 1] am = np.mean(gv) sd = np.std(gv, ddof=1) # end PB estimates statm = np.sqrt(2.0 * (k - 1)) * (stat0 - am) / sd + (k - 1) pval = 1.0 - stats.chi2.cdf(statm, k - 1) return statm, pval
def _annotate_significance( df: pd.DataFrame, conditions_to_plot: list, column: str, boxplot: matplotlib.axes.Axes, significance_pairs: list[tuple] | None, event_index: int, plot_type: str = "boxplot", test: str = "Mann-Whitney", verbose: bool = True, ) -> None: """ Add significance annotations to a single subplot using statannotations. Parameters: df (pandas.DataFrame) : Full data DataFrame with ``"Order"`` and ``"Condition"`` columns. conditions_to_plot (list) : Ordered condition identifiers. column (str) : Column name of the y-variable. boxplot (matplotlib.axes.Axes) : Axes object of the target subplot. significance_pairs (list[tuple] or None) : Explicit pairs to annotate; all pairwise combinations are used when ``None``. event_index (int) : The ``"Order"`` value identifying the current subplot. plot_type (str) : ``"boxplot"`` or ``"violinplot"``. Defaults to ``"boxplot"``. test (str) : Statistical test name. Statannotations built-in tests are supported as well as ``"Feltz-Miller"`` and ``"MSLR"``. Defaults to ``"Mann-Whitney"``. verbose (bool) : If ``True``, print sample sizes and test details. Defaults to ``True``. Returns: None """ # Filter data for the current event df_filtered = df[df["Order"] == event_index] # Print non-NaN counts for each condition print(f"\nSample sizes (non-NaN) for event index {event_index}, column '{column}':") if verbose: for condition in conditions_to_plot: condition_data = df_filtered[df_filtered["Condition"] == condition][column] n = condition_data.notna().sum() print(f"Condition {condition}: n={n}") # Original code continues... if significance_pairs is None: pairs = list(combinations(df["Condition"].unique(), 2)) else: pairs = significance_pairs annotator = Annotator( ax=boxplot, pairs=pairs, data=df_filtered, x="Condition", order=conditions_to_plot, y=column, plot=plot_type, ) if test in STATANNOTATIONS_TESTS: if test != "Mann-Whitney": annotator.configure( test=test, text_format="simple", loc="inside", verbose=verbose, test_short_name=test.capitalize(), ) else: annotator.configure( test=test, text_format="star", loc="inside", verbose=verbose ) else: if test == "Feltz-Miller": custom_long_name = "Feltz-Miller Asymptotic Test" custom_short_name = "Feltz-Miller" custom_func = feltz_miller_asymptotic_cv_test custom_test = StatTest(custom_func, custom_long_name, custom_short_name) annotator.configure( test=custom_test, text_format="simple", loc="inside", verbose=verbose, ) elif test == "MSLR": custom_long_name = "Modified Signed Likelihood Ratio Test" custom_short_name = "MSLR" custom_func = mslr_test custom_test = StatTest(custom_func, custom_long_name, custom_short_name) annotator.configure( test=custom_test, text_format="simple", loc="inside", verbose=verbose, ) else: raise ValueError( f"Test {test} is not supported. Please use one of the following: {STATANNOTATIONS_TESTS + custom_test}" ) annotator.apply_and_annotate() def _add_metric_text( df: pd.DataFrame, conditions_to_plot: list, column: str, ax: matplotlib.axes.Axes, event_index: int, log_scale: bool, test: str = "Mann-Whitney", y_offset_pct: float = 0.1, significant_digits: int = 3, ) -> None: """ Annotate each condition with its relevant summary statistic below the plot area. The statistic displayed depends on the test: median (Mann-Whitney, Kruskal-Wallis, Wilcoxon), mean (t-test, Welch), std (Levene), or CV % (Feltz-Miller, MSLR). Parameters: df (pandas.DataFrame) : Full data DataFrame with ``"Order"`` and ``"Condition"`` columns. conditions_to_plot (list) : Ordered condition identifiers. column (str) : Column name of the y-variable. ax (matplotlib.axes.Axes) : Axes object of the target subplot. event_index (int) : The ``"Order"`` value identifying the current subplot. log_scale (bool) : If ``True``, adjust y-position calculation for log-scale axes. test (str) : Statistical test name; determines which statistic to display. Defaults to ``"Mann-Whitney"``. y_offset_pct (float) : Downward offset of the text box as a fraction of the y-axis range. Defaults to ``0.1``. significant_digits (int) : Number of significant digits in the displayed value. Defaults to ``3``. Returns: None Raises: ValueError : If ``test`` is not in the supported list. """ test_metrics = { "Mann-Whitney": ("median", "M"), "Levene": ("std", "σ"), "t-test": ("mean", "μ"), "Kruskal-Wallis": ("median", "M"), "Welch": ("mean", "μ"), "Wilcoxon": ("median", "M"), "Feltz-Miller": ("cv", "CV"), "MSLR": ("cv", "CV"), } if test not in test_metrics: raise ValueError( f"Test '{test}' not supported. Available tests: {list(test_metrics.keys())}" ) metric_type, symbol = test_metrics[test] data = df[df["Order"] == event_index] y_min, y_max = ax.get_ylim() if log_scale: log_y_min = np.log10(y_min) if y_min > 0 else np.log10(y_max) - 1 log_y_max = np.log10(y_max) log_range = log_y_max - log_y_min y_position = 10 ** (log_y_min - log_range * y_offset_pct) else: y_range = y_max - y_min y_position = y_min - (y_range * y_offset_pct) for i, condition in enumerate(conditions_to_plot): condition_data = data[data["Condition"] == condition][column] if len(condition_data) == 0 or condition_data.isna().all(): continue if metric_type == "mean": metric_value = condition_data.mean() elif metric_type == "median": metric_value = condition_data.median() elif metric_type == "std": metric_value = condition_data.std() elif metric_type == "cv": metric_value = condition_data.std() / condition_data.mean() * 100 if np.isnan(metric_value): continue text = f"{symbol} = {metric_value:.{significant_digits}g}" if metric_type == "cv": text += " %" ax.text( i, y_position, text, ha="center", va="top", weight="bold", bbox=dict( boxstyle="round,pad=0.3", facecolor="white", edgecolor="black", linestyle="-.", alpha=0.8, ), ) if log_scale: ax.set_ylim(10 ** (log_y_min - log_range * (y_offset_pct + 0.04)), y_max) else: ax.set_ylim(y_position - (y_range * 0.04), y_max) def _plot_violinplot( df: pd.DataFrame, conditions_to_plot: list, column: str, color_palette: list, ax: matplotlib.axes.Axes | np.ndarray, titles: list[str] | None, share_y_axis: bool, plot_significance: bool, significance_pairs: list[tuple] | None, log_scale: bool, show_metric: bool = False, test: str = "Mann-Whitney", show_swarm: bool = True, hide_outliers: bool = False, ) -> tuple[list[float], list[float]]: """ Draw violin + swarm subplots for each ordering group. Parameters: df (pandas.DataFrame) : Data with ``"Order"``, ``"Condition"``, and ``column`` columns. conditions_to_plot (list) : Ordered condition identifiers. column (str) : Y-variable column name. color_palette (list) : Colors in the same order as ``conditions_to_plot``. ax (np.ndarray or matplotlib.axes.Axes) : Axes array (or scalar) produced by ``_setup_figure``. titles (list[str] or None) : Subplot titles. share_y_axis (bool) : If ``True``, hide y-axis ticks on all but the first subplot. plot_significance (bool) : If ``True``, add significance brackets. significance_pairs (list[tuple] or None) : Pairs to annotate; all pairs when ``None``. log_scale (bool) : Passed to ``_add_metric_text`` for back-transformation. show_metric (bool) : If ``True``, display summary statistics below the plot. Defaults to ``False``. test (str) : Statistical test for significance annotation. Defaults to ``"Mann-Whitney"``. show_swarm (bool) : If ``True``, overlay a swarm plot on the violin plot. Defaults to ``True``. hide_outliers (bool) : If ``True``, remove data points beyond ±3 std in the swarm plot (violin retains them). Defaults to ``False``. Returns: tuple[list[float], list[float]] : Per-subplot y-axis minima and maxima. """ y_min, y_max = [], [] for event_index in range(df["Order"].nunique()): if share_y_axis: if event_index > 0: ax[event_index].tick_params( axis="y", which="both", left=False, labelleft=False ) if isinstance(ax, np.ndarray): current_ax = ax[event_index] else: current_ax = ax violinplot = sns.violinplot( data=df[df["Order"] == event_index], x="Condition", y=column, order=conditions_to_plot, hue_order=conditions_to_plot, hue="Condition", palette=color_palette, cut=0, inner="box", ax=current_ax, linewidth=2, legend="full", ) plot_df = df.copy() if hide_outliers: data = df[df["Order"] == event_index] for condition in conditions_to_plot: condition_data = data[data["Condition"] == condition] mean = condition_data[column].mean() std = condition_data[column].std() outliers = condition_data[ (condition_data[column] < mean - 3 * std) | (condition_data[column] > mean + 3 * std) ] plot_df.loc[ (plot_df["Order"] == event_index) & (plot_df["Condition"] == condition) & (plot_df[column].isin(outliers[column])), column, ] = np.nan if show_swarm: dot_size = _swarm_dot_size(plot_df, event_index, column) sns.swarmplot( data=plot_df[plot_df["Order"] == event_index], x="Condition", order=conditions_to_plot, y=column, ax=current_ax, alpha=0.5, color="black", dodge=False, size=dot_size, ) current_ax.set_xlabel("") if event_index > 0: current_ax.set_ylabel("") if titles is not None: current_ax.set_title(titles[event_index]) if log_scale: current_ax.set_yscale("log") current_ax.tick_params( axis="x", which="both", bottom=False, top=False, labelbottom=False ) if plot_significance: _annotate_significance( df, conditions_to_plot, column, violinplot, significance_pairs, event_index, plot_type="violinplot", test=test, ) if show_metric: _add_metric_text( df, conditions_to_plot, column, violinplot, event_index, log_scale, test=test, ) min_y, max_y = current_ax.get_ylim() y_min.append(min_y) y_max.append(max_y) return y_min, y_max def _plot_boxplot( df: pd.DataFrame, conditions_to_plot: list, column: str, color_palette: list, ax: matplotlib.axes.Axes | np.ndarray, titles: list[str] | None, share_y_axis: bool, plot_significance: bool, significance_pairs: list[tuple] | None, log_scale: bool, show_metric: bool = False, show_swarm: bool = True, hide_outliers: bool = False, test: str = "Mann-Whitney", return_data: bool = False, ) -> tuple[list[float], list[float]]: """ Draw box + swarm subplots for each ordering group. Parameters: df (pandas.DataFrame) : Data with ``"Order"``, ``"Condition"``, and ``column`` columns. conditions_to_plot (list) : Ordered condition identifiers. column (str) : Y-variable column name. color_palette (list) : Colors in the same order as ``conditions_to_plot``. ax (np.ndarray or matplotlib.axes.Axes) : Axes array (or scalar) produced by ``_setup_figure``. titles (list[str] or None) : Subplot titles. share_y_axis (bool) : If ``True``, hide y-axis ticks on all but the first subplot. plot_significance (bool) : If ``True``, add significance brackets. significance_pairs (list[tuple] or None) : Pairs to annotate; all pairs when ``None``. log_scale (bool) : Passed to seaborn and ``_add_metric_text`` for log-scale handling. show_metric (bool) : If ``True``, display summary statistics below the plot. Defaults to ``False``. show_swarm (bool) : If ``True``, overlay a swarm plot on the box plot. Defaults to ``True``. hide_outliers (bool) : If ``True``, remove data points beyond ±3 std in the swarm plot. Defaults to ``False``. test (str) : Statistical test for significance annotation. Defaults to ``"Mann-Whitney"``. return_data (bool) : Unused; reserved for future use. Defaults to ``False``. Returns: tuple[list[float], list[float]] : Per-subplot y-axis minima and maxima. """ y_min, y_max = [], [] for event_index in range(df["Order"].nunique()): if share_y_axis: if event_index > 0: ax[event_index].tick_params( axis="y", which="both", left=False, labelleft=False ) if isinstance(ax, np.ndarray): current_ax = ax[event_index] else: current_ax = ax boxplot = sns.boxplot( data=df[df["Order"] == event_index], x="Condition", y=column, order=conditions_to_plot, hue_order=conditions_to_plot, hue="Condition", palette=color_palette, showfliers=False, ax=current_ax, dodge=False, linewidth=2, legend="full", linecolor="black", ) plot_df = df.copy() if hide_outliers: data = df[df["Order"] == event_index] for condition in conditions_to_plot: condition_data = data[data["Condition"] == condition] mean = condition_data[column].mean() std = condition_data[column].std() outliers = condition_data[ (condition_data[column] < mean - 3 * std) | (condition_data[column] > mean + 3 * std) ] plot_df.loc[ (plot_df["Order"] == event_index) & (plot_df["Condition"] == condition) & (plot_df[column].isin(outliers[column])), column, ] = np.nan if log_scale: current_ax.set_yscale("log") if show_swarm: dot_size = _swarm_dot_size(plot_df, event_index, column) sns.swarmplot( data=plot_df[plot_df["Order"] == event_index], x="Condition", order=conditions_to_plot, y=column, ax=current_ax, alpha=0.5, color="black", dodge=False, size=dot_size, ) current_ax.set_xlabel("") # Hide y-axis labels and ticks for all subplots except the first one if event_index > 0: current_ax.set_ylabel("") if titles is not None: current_ax.set_title(titles[event_index]) # remove ticks current_ax.tick_params( axis="x", which="both", bottom=False, top=False, labelbottom=False ) if plot_significance: _annotate_significance( df, conditions_to_plot, column, boxplot, significance_pairs, event_index, test=test, ) if show_metric: _add_metric_text( df, conditions_to_plot, column, boxplot, event_index, log_scale, test=test, ) min_y, max_y = current_ax.get_ylim() y_min.append(min_y) y_max.append(max_y) return y_min, y_max def _swarm_dot_size(df: pd.DataFrame, event_index: int, column: str) -> float: """ Compute a dot size for swarm plots that shrinks as sample count grows. Uses ``max(3, 6 * sqrt(20 / max(20, n_max)))`` so dots stay at 6 pt up to 20 points and decay smoothly above that, flooring at 3 pt. Parameters: df (pandas.DataFrame) : Full data DataFrame with ``"Order"`` and ``"Condition"`` columns. event_index (int) : The ``"Order"`` value identifying the current subplot. column (str) : Column name used to count non-NaN values. Returns: float : Dot size in points for ``sns.swarmplot``. """ event_data = df[df["Order"] == event_index] n_max = ( event_data.groupby("Condition")[column].apply(lambda s: s.notna().sum()).max() ) n_max = max(20, int(n_max)) return max(3.0, 6.0 * (20.0 / n_max) ** 0.5) def _set_all_y_limits(ax: np.ndarray, y_min: list[float], y_max: list[float]) -> None: """ Synchronise y-axis limits across all subplots with 10% padding. Parameters: ax (np.ndarray) : Array of Axes objects. y_min (list[float]) : Per-subplot y-axis minima. y_max (list[float]) : Per-subplot y-axis maxima. Returns: None """ global_min = min(y_min) global_max = max(y_max) range_padding = (global_max - global_min) * 0.1 # 5% padding global_min = global_min - range_padding global_max = global_max + range_padding for i in range(len(ax)): ax[i].set_ylim(global_min, global_max) def _set_labels_and_legend( ax: matplotlib.axes.Axes | np.ndarray, fig: matplotlib.figure.Figure, conditions_struct: list, conditions_to_plot: list, column: str, y_axis_label: str | None, legend: dict | None, ) -> None: """ Set the y-axis label and place a shared figure legend to the right of the subplots. Individual subplot legends are removed; a single legend is added to the figure. Parameters: ax (np.ndarray or matplotlib.axes.Axes) : Axes array or scalar. fig (matplotlib.figure.Figure) : Parent figure. conditions_struct (list) : List of condition dicts (used to build legend labels). conditions_to_plot (list) : Ordered condition identifiers. column (str) : Column name; used as the y-axis label fallback. y_axis_label (str or None) : Explicit y-axis label; falls back to ``column``. legend (dict or None) : Legend spec passed to ``build_legend``. Returns: None """ if not isinstance(ax, np.ndarray): ax = [ax] # Set y label for the first plot if y_axis_label is not None: ax[0].set_ylabel(y_axis_label) else: ax[0].set_ylabel(column) # Add legend to the right of the subplots legend_labels = [ build_legend(conditions_struct[condition_id], legend) for condition_id in conditions_to_plot ] legend_handles = ax[0].get_legend_handles_labels()[0] # Remove the legend from all subplots for i in range(len(ax)): ax[i].legend_.remove() # Place legend to the right of the subplots fig.legend( legend_handles, legend_labels, bbox_to_anchor=(1.005, 0.5), loc="center left", title=None, frameon=True, )
[docs] def violinplot( conditions_struct: list, column: str, conditions_to_plot: list, events_to_plot: list[int] | None = None, log_scale: bool = True, figsize: tuple[float, float] | None = None, colors: list | dict | None = None, plot_significance: bool = False, show_metric: bool = False, significance_pairs: list[tuple] | None = None, significance_test: str = "Mann-Whitney", legend: dict | None = None, y_axis_label: str | None = None, titles: list[str] | None = None, share_y_axis: bool = False, show_swarm: bool = True, hide_outliers: bool = True, return_data: bool = False, ) -> matplotlib.figure.Figure: """ Create violin plots for a per-molt measurement across conditions. Each column in ``column`` (axis 1) corresponds to one molt event subplot. Parameters: conditions_struct (list) : List of condition dicts. column (str) : Key of the per-molt measurement array (shape ``(n_worms, n_molts)``). conditions_to_plot (list) : Ordered condition identifiers. events_to_plot (list[int] or None) : Column indices (molt events) to include. All events are plotted when ``None``. Defaults to ``None``. log_scale (bool) : If ``True``, render y-axis in log scale via ``set_yscale``. Defaults to ``True``. figsize (tuple[float, float] or None) : Figure size; auto-sized when ``None``. Defaults to ``None``. colors (list or dict or None) : Color spec passed to ``get_colors``. Defaults to ``None``. plot_significance (bool) : If ``True``, add significance brackets. Defaults to ``False``. show_metric (bool) : If ``True``, display summary statistics below the plot. Defaults to ``False``. significance_pairs (list[tuple] or None) : Pairs to annotate; all pairs when ``None``. Defaults to ``None``. significance_test (str) : Statistical test for annotation. Defaults to ``"Mann-Whitney"``. legend (dict or None) : Legend spec passed to ``build_legend``. Defaults to ``None``. y_axis_label (str or None) : Y-axis label; falls back to ``column``. Defaults to ``None``. titles (list[str] or None) : Subplot titles. Defaults to ``None``. share_y_axis (bool) : If ``True``, synchronise y-axis limits. Defaults to ``False``. show_swarm (bool) : If ``True``, overlay a swarm plot on the violin plot. Defaults to ``True``. hide_outliers (bool) : If ``True``, hide swarm-plot points beyond ±3 std. Defaults to ``True``. return_data (bool) : If ``True``, also return the intermediate DataFrame. Defaults to ``False``. Returns: matplotlib.figure.Figure : The generated figure. tuple[matplotlib.figure.Figure, pandas.DataFrame] : Figure and DataFrame if ``return_data=True``. """ color_palette = get_colors( conditions_to_plot, colors, ) # Prepare data data_list = [] for condition_id in conditions_to_plot: condition_dict = conditions_struct[condition_id] data = condition_dict[column] if not events_to_plot: events_to_plot = range(conditions_struct[condition_id][column].shape[1]) for idx, j in enumerate(events_to_plot): for value in data[:, j]: order = idx data_list.append( { "Condition": condition_id, "Order": order, "Description": condition_dict["description"], column: value, } ) df = pd.DataFrame(data_list) fig, ax = _setup_figure( df, figsize, titles, ) y_min, y_max = _plot_violinplot( df, conditions_to_plot, column, color_palette, ax, titles, share_y_axis, plot_significance, significance_pairs, log_scale=log_scale, show_metric=show_metric, show_swarm=show_swarm, hide_outliers=hide_outliers, test=significance_test, ) _set_labels_and_legend( ax, fig, conditions_struct, conditions_to_plot, column, y_axis_label, legend, ) if share_y_axis: _set_all_y_limits(ax, y_min, y_max) # set the figure to sharey for i in range(len(ax)): ax[i].sharey(ax[0]) fig = plt.gcf() plt.show() if return_data: return fig, df return fig
[docs] def boxplot( conditions_struct: list, column: str, conditions_to_plot: list, events_to_plot: list[int] | None = None, log_scale: bool = True, figsize: tuple[float, float] | None = None, colors: list | dict | None = None, plot_significance: bool = False, show_metric: bool = False, significance_pairs: list[tuple] | None = None, significance_test: str = "Mann-Whitney", legend: dict | None = None, y_axis_label: str | None = None, titles: list[str] | None = None, share_y_axis: bool = False, show_swarm: bool = True, hide_outliers: bool = True, return_data: bool = False, ) -> matplotlib.figure.Figure: """ Create box plots for a per-molt measurement across conditions. Log scaling is handled natively by seaborn (unlike ``violinplot`` which pre-transforms values). Each column in ``column`` (axis 1) corresponds to one molt event subplot. Parameters: conditions_struct (list) : List of condition dicts. column (str) : Key of the per-molt measurement array (shape ``(n_worms, n_molts)``). conditions_to_plot (list) : Ordered condition identifiers. events_to_plot (list[int] or None) : Column indices (molt events) to include. All events are plotted when ``None``. Defaults to ``None``. log_scale (bool) : If ``True``, render y-axis in log scale via ``set_yscale``. Defaults to ``True``. figsize (tuple[float, float] or None) : Figure size; auto-sized when ``None``. Defaults to ``None``. colors (list or dict or None) : Color spec passed to ``get_colors``. Defaults to ``None``. plot_significance (bool) : If ``True``, add significance brackets. Defaults to ``False``. show_metric (bool) : If ``True``, display summary statistics below the plot. Defaults to ``False``. significance_pairs (list[tuple] or None) : Pairs to annotate; all pairs when ``None``. Defaults to ``None``. significance_test (str) : Statistical test for annotation. Defaults to ``"Mann-Whitney"``. legend (dict or None) : Legend spec passed to ``build_legend``. Defaults to ``None``. y_axis_label (str or None) : Y-axis label; falls back to ``column``. Defaults to ``None``. titles (list[str] or None) : Subplot titles. Defaults to ``None``. share_y_axis (bool) : If ``True``, synchronise y-axis limits. Defaults to ``False``. show_swarm (bool) : If ``True``, overlay a swarm plot on the box plot. Defaults to ``True``. hide_outliers (bool) : If ``True``, hide swarm-plot points beyond ±3 std. Defaults to ``True``. return_data (bool) : If ``True``, also return the intermediate DataFrame. Defaults to ``False``. Returns: matplotlib.figure.Figure : The generated figure. tuple[matplotlib.figure.Figure, pandas.DataFrame] : Figure and DataFrame if ``return_data=True``. """ color_palette = get_colors( conditions_to_plot, colors, ) # Prepare data data_list = [] for condition_id in conditions_to_plot: condition_dict = conditions_struct[condition_id] data = condition_dict[column] if not events_to_plot: events_to_plot = range(conditions_struct[condition_id][column].shape[1]) for idx, j in enumerate(events_to_plot): for value in data[:, j]: order = idx data_list.append( { "Condition": condition_id, "Order": order, "Description": condition_dict["description"], # column: np.log10(value) if log_scale else value, column: value, } ) df = pd.DataFrame(data_list) fig, ax = _setup_figure( df, figsize, titles, ) y_min, y_max = _plot_boxplot( df, conditions_to_plot, column, color_palette, ax, titles, share_y_axis, plot_significance, significance_pairs, show_swarm=show_swarm, hide_outliers=hide_outliers, log_scale=log_scale, show_metric=show_metric, test=significance_test, ) _set_labels_and_legend( ax, fig, conditions_struct, conditions_to_plot, column, y_axis_label, legend, ) if share_y_axis: _set_all_y_limits(ax, y_min, y_max) # set the figure to sharey for i in range(len(ax)): ax[i].sharey(ax[0]) fig = plt.gcf() plt.show() if return_data: return fig, df return fig
[docs] def violinplot_larval_stage( conditions_struct: list, column: str, conditions_to_plot: list, aggregation: str = "mean", n_points: int = 100, fraction: tuple[float, float] = (0.2, 0.8), log_scale: bool = True, figsize: tuple[float, float] | None = None, colors: list | dict | None = None, plot_significance: bool = False, significance_pairs: list[tuple] | None = None, significance_test: str = "Mann-Whitney", legend: dict | None = None, y_axis_label: str | None = None, titles: list[str] | None = None, share_y_axis: bool = False, show_metric: bool = False, show_swarm: bool = True, hide_outliers: bool = True, ) -> matplotlib.figure.Figure: """ Create violin plots with per-worm values aggregated within a fraction of each larval stage. If ``column`` does not contain ``"rescaled"``, the series is first rescaled via ``rescale_without_flattening`` to shape ``(n_worms, 4, n_points)``. The middle fraction of each stage (controlled by ``fraction``) is averaged per worm before plotting. Parameters: conditions_struct (list) : List of condition dicts. column (str) : Key of the measurement series. conditions_to_plot (list) : Ordered condition identifiers. aggregation (str) : Per-worm aggregation within the stage fraction; ``"mean"`` or ``"median"``. Defaults to ``"mean"``. n_points (int) : Number of resampled points per larval stage. Defaults to ``100``. fraction (tuple[float, float]) : Start and end fractions of each stage to include in the aggregation. Defaults to ``(0.2, 0.8)``. log_scale (bool) : If ``True``, render y-axis in log scale via ``set_yscale``. Defaults to ``True``. figsize (tuple[float, float] or None) : Figure size; auto-sized when ``None``. Defaults to ``None``. colors (list or dict or None) : Color spec passed to ``get_colors``. Defaults to ``None``. plot_significance (bool) : If ``True``, add significance brackets. Defaults to ``False``. significance_pairs (list[tuple] or None) : Pairs to annotate; all pairs when ``None``. Defaults to ``None``. significance_test (str) : Statistical test for annotation. Defaults to ``"Mann-Whitney"``. legend (dict or None) : Legend spec passed to ``build_legend``. Defaults to ``None``. y_axis_label (str or None) : Y-axis label; falls back to ``column``. Defaults to ``None``. titles (list[str] or None) : Subplot titles. Defaults to ``None``. share_y_axis (bool) : If ``True``, synchronise y-axis limits. Defaults to ``False``. show_metric (bool) : If ``True``, display summary statistics below the plot. Defaults to ``False``. show_swarm (bool) : If ``True``, overlay a swarm plot on the violin plot. Defaults to ``True``. hide_outliers (bool) : If ``True``, hide swarm-plot points beyond ±3 std. Defaults to ``True``. Returns: matplotlib.figure.Figure : The generated figure. """ color_palette = get_colors( conditions_to_plot, colors, ) if "rescaled" not in column: rescaled_column = column + "_rescaled" conditions_struct = rescale_without_flattening( conditions_struct, column, rescaled_column, aggregation, n_points ) column = rescaled_column # Prepare data data_list = [] for condition_id in conditions_to_plot: condition_dict = conditions_struct[condition_id] data = condition_dict[column] for i in range(data.shape[1]): data_of_stage = data[:, i] data_of_stage = data_of_stage[ :, int(fraction[0] * data_of_stage.shape[1]) : int( fraction[1] * data_of_stage.shape[1] ), ] if aggregation == "mean": aggregated_data_of_stage = np.nanmean(data_of_stage, axis=1) elif aggregation == "median": aggregated_data_of_stage = np.nanmedian(data_of_stage, axis=1) for j in range(aggregated_data_of_stage.shape[0]): data_list.append( { "Condition": condition_id, "Order": i, column: aggregated_data_of_stage[j], } ) df = pd.DataFrame(data_list) fig, ax = _setup_figure( df, figsize, titles, ) y_min, y_max = _plot_violinplot( df, conditions_to_plot, column, color_palette, ax, titles, share_y_axis, plot_significance, significance_pairs, log_scale=log_scale, show_metric=show_metric, show_swarm=show_swarm, hide_outliers=hide_outliers, test=significance_test, ) _set_labels_and_legend( ax, fig, conditions_struct, conditions_to_plot, column, y_axis_label, legend, ) if share_y_axis: _set_all_y_limits(ax, y_min, y_max) fig = plt.gcf() plt.show() return fig
[docs] def boxplot_larval_stage( conditions_struct: list, column: str, conditions_to_plot: list, aggregation: str = "mean", n_points: int = 100, fraction: tuple[float, float] = (0.2, 0.8), log_scale: bool = True, figsize: tuple[float, float] | None = None, colors: list | dict | None = None, plot_significance: bool = False, significance_pairs: list[tuple] | None = None, significance_test: str = "Mann-Whitney", legend: dict | None = None, y_axis_label: str | None = None, titles: list[str] | None = None, share_y_axis: bool = False, show_metric: bool = False, show_swarm: bool = True, hide_outliers: bool = True, ) -> matplotlib.figure.Figure: """ Create box plots with per-worm values aggregated within a fraction of each larval stage. Equivalent to ``violinplot_larval_stage`` but renders box plots instead of violin plots. If ``column`` does not contain ``"rescaled"``, the series is first rescaled via ``rescale_without_flattening``. Parameters: conditions_struct (list) : List of condition dicts. column (str) : Key of the measurement series. conditions_to_plot (list) : Ordered condition identifiers. aggregation (str) : Per-worm aggregation within the stage fraction; ``"mean"`` or ``"median"``. Defaults to ``"mean"``. n_points (int) : Number of resampled points per larval stage. Defaults to ``100``. fraction (tuple[float, float]) : Start and end fractions of each stage to include in the aggregation. Defaults to ``(0.2, 0.8)``. log_scale (bool) : If ``True``, render y-axis in log scale via ``set_yscale``. Defaults to ``True``. figsize (tuple[float, float] or None) : Figure size; auto-sized when ``None``. Defaults to ``None``. colors (list or dict or None) : Color spec passed to ``get_colors``. Defaults to ``None``. plot_significance (bool) : If ``True``, add significance brackets. Defaults to ``False``. significance_pairs (list[tuple] or None) : Pairs to annotate; all pairs when ``None``. Defaults to ``None``. significance_test (str) : Statistical test for annotation. Defaults to ``"Mann-Whitney"``. legend (dict or None) : Legend spec passed to ``build_legend``. Defaults to ``None``. y_axis_label (str or None) : Y-axis label; falls back to ``column``. Defaults to ``None``. titles (list[str] or None) : Subplot titles. Defaults to ``None``. share_y_axis (bool) : If ``True``, synchronise y-axis limits. Defaults to ``False``. show_metric (bool) : If ``True``, display summary statistics below the plot. Defaults to ``False``. show_swarm (bool) : If ``True``, overlay a swarm plot on the box plot. Defaults to ``True``. hide_outliers (bool) : If ``True``, hide swarm-plot points beyond ±3 std. Defaults to ``True``. Returns: matplotlib.figure.Figure : The generated figure. """ color_palette = get_colors( conditions_to_plot, colors, ) if "rescaled" not in column: rescaled_column = column + "_rescaled" conditions_struct = rescale_without_flattening( conditions_struct, column, rescaled_column, aggregation, n_points ) column = rescaled_column # Prepare data data_list = [] for condition_id in conditions_to_plot: condition_dict = conditions_struct[condition_id] data = condition_dict[column] for i in range(data.shape[1]): data_of_stage = data[:, i] data_of_stage = data_of_stage[ :, int(fraction[0] * data_of_stage.shape[1]) : int( fraction[1] * data_of_stage.shape[1] ), ] if aggregation == "mean": aggregated_data_of_stage = np.nanmean(data_of_stage, axis=1) elif aggregation == "median": aggregated_data_of_stage = np.nanmedian(data_of_stage, axis=1) for j in range(aggregated_data_of_stage.shape[0]): data_list.append( { "Condition": condition_id, "Order": i, column: aggregated_data_of_stage[j], } ) df = pd.DataFrame(data_list) fig, ax = _setup_figure( df, figsize, titles, ) y_min, y_max = _plot_boxplot( df, conditions_to_plot, column, color_palette, ax, titles, share_y_axis, plot_significance, significance_pairs, hide_outliers, log_scale, show_metric=show_metric, show_swarm=show_swarm, test=significance_test, ) _set_labels_and_legend( ax, fig, conditions_struct, conditions_to_plot, column, y_axis_label, legend, ) if share_y_axis: _set_all_y_limits(ax, y_min, y_max) fig = plt.gcf() plt.show() return fig