Morphometrics Analysis (ADS 2026 — Full QC) - Macaque Corpus Callosum G-Ratio Analysis

This notebook applies the full quality control exclusion database from the QC chapter to the corrected ADS 2026 morphometrics. All axons flagged during visual review (border-touching, sub-pixel artifacts, myelin-dominant artifacts, merge errors, invalid g-ratios) are excluded by their specific axon IDs.

Setup¶

DATA_DIR = Path("data/macaque/data")
RESULTS_DIR = Path("results")
PLOT_TEMPLATE = "plotly_white"

SAMPLE_SLICES = {
    "sample1": ["01", "03", "07"],
    "sample2": ["01", "03", "05"],
    "sample3": ["01", "03", "05"],
    "sample4": ["01", "05", "07"],
    "sample5": ["01", "03", "05"],
    "sample6": ["01", "03", "05"],
    "sample7": ["01", "03", "05"],
    "sample8": ["01", "03", "05"],
}

REGION_LABELS = {
    "sample1": "Genu (1)",
    "sample2": "(2)",
    "sample3": "(3)",
    "sample4": "(4)",
    "sample5": "(5)",
    "sample6": "(6)",
    "sample7": "(7)",
    "sample8": "Splenium (8)",
}

COLUMN_RENAME = {
    "x0 (px)": "x0", "y0 (px)": "y0",
    "axon_area (um^2)": "axon_area", "axon_perimeter (um)": "axon_perimeter",
    "myelin_area (um^2)": "myelin_area", "axon_diam (um)": "axon_diam",
    "myelin_thickness (um)": "myelin_thickness",
    "axonmyelin_area (um^2)": "axonmyelin_area",
    "axonmyelin_perimeter (um)": "axonmyelin_perimeter",
}

PAPER_REF = {
    "sample1": {"mri_g": "0.63\u00b10.05", "agg_g_p1": "0.69\u00b10.05", "agg_g_p2": "0.70", "mean_g_p2": "0.65"},
    "sample2": {"mri_g": "0.68\u00b10.04", "agg_g_p1": "0.68\u00b10.04", "agg_g_p2": "0.72", "mean_g_p2": "0.66"},
    "sample3": {"mri_g": "0.68\u00b10.04", "agg_g_p1": "0.64\u00b10.06", "agg_g_p2": "0.67", "mean_g_p2": "0.62"},
    "sample4": {"mri_g": "0.76\u00b10.03", "agg_g_p1": "0.69\u00b10.04", "agg_g_p2": "0.75", "mean_g_p2": "0.68"},
    "sample5": {"mri_g": "0.79\u00b10.04", "agg_g_p1": "0.72\u00b10.03", "agg_g_p2": "0.72", "mean_g_p2": "0.65"},
    "sample6": {"mri_g": "0.72\u00b10.06", "agg_g_p1": "0.64\u00b10.01", "agg_g_p2": "0.74", "mean_g_p2": "0.66"},
    "sample7": {"mri_g": "0.63\u00b10.04", "agg_g_p1": "0.60\u00b10.04", "agg_g_p2": "0.67", "mean_g_p2": "0.60"},
    "sample8": {"mri_g": "0.78\u00b10.03", "agg_g_p1": "0.74\u00b10.06", "agg_g_p2": "0.75", "mean_g_p2": "0.65"},
}

excl_db = pd.read_csv(RESULTS_DIR / "exclusion_database.csv")
print(f"Exclusion database: {len(excl_db)} axons to exclude")

Exclusion database: 1618 axons to exclude

Load and filter morphometrics¶

Each image’s morphometrics are loaded from the QC pipeline output (results/qa_sample*_*/morphometrics.csv), which uses the same ADS-computed values but preserves the original axon IDs. The exclusion database is then applied to remove all flagged axons by their specific IDs.

all_data = {}
agg_gratio = {}
filter_summary = []

# Ensure slice_id is string for matching
excl_db["slice_id"] = excl_db["slice_id"].astype(str).str.zfill(2)

for sample in sorted(SAMPLE_SLICES.keys()):
    dfs = []
    sample_agg = []
    for slice_id in SAMPLE_SLICES[sample]:
        # Load from QC pipeline output (has original axon IDs as row index)
        qa_folder = RESULTS_DIR / f"qa_{sample}_{slice_id}"
        df = pd.read_csv(qa_folder / "morphometrics.csv")
        df = df.rename(columns=COLUMN_RENAME)
        df = df.drop(columns=[c for c in df.columns if "Unnamed" in str(c)], errors="ignore")
        
        n_before = len(df)
        
        # Get excluded axon IDs for this image
        excl_ids = excl_db[
            (excl_db["sample"] == sample) & (excl_db["slice_id"] == slice_id)
        ]["axon_id"].values
        
        # Exclude by axon ID
        df = df[~df.index.isin(excl_ids)]
        n_after = len(df)
        
        filter_summary.append({
            "Region": REGION_LABELS[sample],
            "Slice": slice_id,
            "Before": n_before,
            "After": n_after,
            "Removed": n_before - n_after,
        })
        
        df["sample"] = sample
        df["slice_id"] = slice_id
        dfs.append(df)

        # Aggregate g-ratio from binary masks
        axon_mask = np.array(Image.open(DATA_DIR / sample / f"{slice_id}_seg-axon.png"))
        myelin_mask = np.array(Image.open(DATA_DIR / sample / f"{slice_id}_seg-myelin.png"))
        avf = np.sum(axon_mask > 0)
        mvf = np.sum(myelin_mask > 0)
        g_agg = np.sqrt(avf / (avf + mvf)) if (avf + mvf) > 0 else np.nan
        sample_agg.append(g_agg)

    combined = pd.concat(dfs, ignore_index=True)
    combined["fiber_diam"] = combined["axon_diam"] + 2 * combined["myelin_thickness"]
    all_data[sample] = combined
    agg_gratio[sample] = sample_agg

display(pd.DataFrame(filter_summary))

Summary statistics¶

The aggregate g-ratio is computed per image from the binary masks using volume fractions: $g_{\text{agg}} = \sqrt{\text{AVF} / \text{FVF}}$ Stikov et al. (2015), then averaged across images within each region.

Columns with a red background are reference values from previous publications Stikov et al. (2015)Stikov et al. (2015).

from IPython.display import HTML

summary_rows = []
for sample in sorted(all_data.keys()):
    df = all_data[sample]
    ag = agg_gratio[sample]
    ref = PAPER_REF[sample]
    summary_rows.append({
        "Region": REGION_LABELS[sample],
        "n": len(df),
        "Fiber diam (\u03bcm)": f"{df['fiber_diam'].mean():.2f} \u00b1 {df['fiber_diam'].std():.2f}",
        "G-ratio (per-axon)": f"{df['gratio'].mean():.2f} \u00b1 {df['gratio'].std():.2f}",
        "G-ratio (aggregate)": f"{np.mean(ag):.2f} \u00b1 {np.std(ag):.2f}",
        "MRI g [S1]": ref["mri_g"],
        "Agg g (hist) [S1]": ref["agg_g_p1"],
        "Agg g [S2]": ref["agg_g_p2"],
        "Mean g [S2]": ref["mean_g_p2"],
    })

summary_df = pd.DataFrame(summary_rows)

paper_cols = ["MRI g [S1]", "Agg g (hist) [S1]", "Agg g [S2]", "Mean g [S2]"]
styled = summary_df.style.map(
    lambda _: "background-color: #fde0e0",
    subset=paper_cols
).hide(axis="index")

display(HTML(styled.to_html()))

Axon diameter vs Fiber diameter¶

Use the slider to switch between CC regions.

samples_sorted = sorted(all_data.keys())
all_df = pd.concat(all_data.values())
x_max = all_df["axon_diam"].max() * 1.05
y_max = all_df["fiber_diam"].max() * 1.05

fig = go.Figure()

for i, sample in enumerate(samples_sorted):
    df = all_data[sample]
    x, y = df["axon_diam"].values, df["fiber_diam"].values
    visible = (i == 0)

    fig.add_trace(go.Scattergl(
        x=x, y=y, mode="markers",
        marker=dict(size=4, color="steelblue", opacity=0.3),
        showlegend=False, visible=visible,
        hovertemplate="Axon: %{x:.3f} \u03bcm<br>Fiber: %{y:.3f} \u03bcm<extra></extra>",
    ))

n_traces = len(samples_sorted)
steps = []
for i, sample in enumerate(samples_sorted):
    df = all_data[sample]
    vis = [False] * n_traces
    vis[i] = True
    step = dict(
        method="update",
        args=[
            {"visible": vis},
            {"title": f"{REGION_LABELS[sample]} (n = {len(df)})"},
        ],
        label=REGION_LABELS[sample],
    )
    steps.append(step)

fig.update_layout(
    sliders=[dict(active=0, steps=steps, currentvalue=dict(prefix="Region: "), pad=dict(t=50))],
    title=f"{REGION_LABELS[samples_sorted[0]]} (n = {len(all_data[samples_sorted[0]])})",
    xaxis_title="Axon diameter (\u03bcm)",
    yaxis_title="Fiber diameter (\u03bcm)",
    xaxis_range=[0, x_max], yaxis_range=[0, y_max],
    template=PLOT_TEMPLATE, height=600, width=800,
)
fig.show()

G-ratio vs Fiber diameter¶

Use the slider to switch between CC regions.

x_max_g = all_df["fiber_diam"].max() * 1.05
y_min_g = max(all_df["gratio"].min() * 0.95, 0)
y_max_g = min(all_df["gratio"].max() * 1.05, 1.0)

fig = go.Figure()

for i, sample in enumerate(samples_sorted):
    df = all_data[sample]
    x, y = df["fiber_diam"].values, df["gratio"].values
    visible = (i == 0)

    fig.add_trace(go.Scattergl(
        x=x, y=y, mode="markers",
        marker=dict(size=4, color="darkorange", opacity=0.3),
        showlegend=False, visible=visible,
        hovertemplate="Fiber: %{x:.3f} \u03bcm<br>G-ratio: %{y:.3f}<extra></extra>",
    ))

n_traces = len(samples_sorted)
steps = []
for i, sample in enumerate(samples_sorted):
    df = all_data[sample]
    vis = [False] * n_traces
    vis[i] = True
    step = dict(
        method="update",
        args=[
            {"visible": vis},
            {"title": f"{REGION_LABELS[sample]} (n = {len(df)})"},
        ],
        label=REGION_LABELS[sample],
    )
    steps.append(step)

fig.update_layout(
    sliders=[dict(active=0, steps=steps, currentvalue=dict(prefix="Region: "), pad=dict(t=50))],
    title=f"{REGION_LABELS[samples_sorted[0]]} (n = {len(all_data[samples_sorted[0]])})",
    xaxis_title="Fiber diameter (\u03bcm)",
    yaxis_title="G-ratio",
    xaxis_range=[0, x_max_g], yaxis_range=[y_min_g, y_max_g],
    template=PLOT_TEMPLATE, height=600, width=800,
)
fig.show()

References¶

Stikov, N., Campbell, J. S. W., Stroh, T., Lavelée, M., Frey, S., Novek, J., Nuara, S., Ho, M.-K., Bedell, B. J., Dougherty, R. F., Leppert, I. R., Boudreau, M., Narayanan, S., Duval, T., Cohen-Adad, J., Picard, P.-A., Gasecka, A., Côté, D., & Pike, G. B. (2015). In vivo histology of the myelin g-ratio with magnetic resonance imaging. NeuroImage, 118, 397–405. 10.1016/j.neuroimage.2015.05.023
Stikov, N., Campbell, J. S. W., Stroh, T., Lavelée, M., Frey, S., Novek, J., Nuara, S., Ho, M.-K., Bedell, B. J., Dougherty, R. F., Leppert, I. R., Boudreau, M., Narayanan, S., Duval, T., Cohen-Adad, J., Picard, P.-A., Gasecka, A., Côté, D., & Pike, G. B. (2015). Quantitative analysis of the myelin g-ratio from electron microscopy images of the macaque corpus callosum. Data in Brief, 4, 368–373. 10.1016/j.dib.2015.05.019