Source code for laion_fmri.captions
"""Access per-stimulus captions for the LAION-fMRI images.
Each stimulus carries a small set of short *human* captions (collected on
CloudResearch Connect). Shared non-OOD stimuli additionally carry one
*AI* caption (a GPT-generated description). The target is:
* **shared** images (seen by every participant) get **5 human captions**
and, for non-OOD images, **1 AI caption**
* **unique** images (one participant only) get **3 human captions** and
no AI caption
* **OOD** images get their target human captions and no AI caption
Together they give you a small set of independent natural-language
descriptions per image, useful for caption-conditioned modelling,
retrieval, or quick qualitative checks.
Files on disk:
.. code-block:: text
stimuli/
task-images_desc-captions.csv
The CSV is long-form (one row per caption) with columns:
======================= =====================================================
``image_name`` Stimulus filename. Join key against
``task-images_metadata.csv``.
``caption_idx`` Position within the image. Rank ``1`` is the
highest-quality human caption; ranks go up to ``3``
for unique images and up to ``5`` for shared images.
The AI caption (if any) gets ``0``.
``source`` ``"human"`` or ``"ai"``.
``caption`` The caption text.
``origin_collection`` Which collection the caption came from
(CloudResearch Connect batch labels for humans,
model name like ``"gpt-5.1"`` for AI).
``participant_id`` CloudResearch Connect participant identifier
(NaN for AI).
``ai_model`` Model name (NaN for human captions).
======================= =====================================================
All images have their target human-caption count. AI captions are
provided for shared non-OOD images only.
You normally reach :class:`Captions` through the :class:`~laion_fmri.Stimuli`
hub:
>>> import laion_fmri
>>> stim = laion_fmri.load_stimuli()
>>> stim.captions.human("shared_12rep_LAION_cluster_1003_i0.jpg")
['a hand with light pink painted nails with flower designs',
'A hand with finger painted nails with flowers in them',
...]
>>> stim.captions.ai("shared_12rep_LAION_cluster_1003_i0.jpg")
'A hand with short, pale pink polished nails features delicate floral nail art on two fingers.'
For a single row-level DataFrame of every caption attached to an image:
>>> stim.captions.get("shared_12rep_LAION_cluster_1003_i0.jpg")
"""
from __future__ import annotations
from pathlib import Path
import pandas as pd
from laion_fmri._paths import captions_path
from laion_fmri.config import get_data_dir
[docs]
class Captions:
"""Lazy reader for the per-stimulus captions CSV.
Loads the CSV on first access and caches a per-image lookup.
Parameters
----------
data_dir : str or Path, optional
Override the configured data directory. Defaults to
:func:`laion_fmri.config.get_data_dir`.
Raises
------
FileNotFoundError
If ``stimuli/task-images_desc-captions.csv`` is not present.
Captions are a public stimulus-side metadata file; run
``laion-fmri download-captions`` (or
:func:`laion_fmri.download.download_captions`) to fetch them.
"""
def __init__(self, data_dir=None):
self.data_dir = (
Path(data_dir) if data_dir is not None else Path(get_data_dir())
)
self._csv_path = captions_path(self.data_dir)
if not self._csv_path.exists():
raise FileNotFoundError(
f"Captions not found at {self._csv_path}. "
"Run `laion-fmri download-captions` first."
)
self._meta: pd.DataFrame | None = None
self._by_image: dict[str, pd.DataFrame] | None = None
# ── shape / inventory ─────────────────────────────────────
@property
def metadata(self) -> pd.DataFrame:
"""The captions CSV as a DataFrame (one row per caption).
Columns: ``image_name``, ``caption_idx``, ``source``,
``caption``, ``origin_collection``, ``participant_id``,
``ai_model``.
"""
if self._meta is None:
self._meta = pd.read_csv(self._csv_path)
self._by_image = {
name: g.reset_index(drop=True)
for name, g in self._meta.groupby("image_name")
}
return self._meta
def __len__(self) -> int:
return len(self.metadata)
[docs]
def images(self) -> list[str]:
"""Image names that have at least one caption."""
_ = self.metadata
return list((self._by_image or {}).keys())
def __contains__(self, image_name: str) -> bool:
_ = self.metadata
return image_name in (self._by_image or {})
# ── per-image access ──────────────────────────────────────
[docs]
def get(self, image_name: str) -> pd.DataFrame:
"""Return all captions for one image as a DataFrame.
Returns an **empty DataFrame** (not an error) when the image has
no captions. Rows are ordered by ``caption_idx``: AI first
(``idx=0``), then humans in rank order.
"""
rows = self._rows_for(image_name)
return rows.sort_values("caption_idx").reset_index(drop=True)
[docs]
def human(
self,
image_name: str,
limit: int | None = None,
) -> list[str]:
"""Human captions for ``image_name`` in rank order.
Parameters
----------
image_name : str
limit : int, optional
Cap to the top-``limit`` captions. ``None`` (default) returns
all available (currently up to five).
Returns an **empty list** if the image has no human captions.
"""
rows = self._rows_for(image_name)
if rows.empty:
return []
humans = rows[rows["source"] == "human"].sort_values("caption_idx")
if limit is not None:
humans = humans.head(limit)
return humans["caption"].tolist()
[docs]
def list(
self,
image_name: str,
source: str | None = None,
) -> list[str]:
"""Captions for ``image_name`` as a list of strings.
Parameters
----------
image_name : str
source : {"human", "ai"}, optional
Restrict to one source. ``None`` (default) returns all
available captions in ``caption_idx`` order.
"""
rows = self.get(image_name)
if source is not None:
if source not in {"human", "ai"}:
raise ValueError("source must be 'human', 'ai', or None")
rows = rows[rows["source"] == source]
return rows["caption"].tolist()
[docs]
def ai(self, image_name: str) -> str | None:
"""AI caption for ``image_name``, or ``None`` if not available.
AI captions are present for shared non-OOD images only.
"""
rows = self._rows_for(image_name)
if rows.empty:
return None
ai_rows = rows[rows["source"] == "ai"]
if ai_rows.empty:
return None
return str(ai_rows.iloc[0]["caption"])
# ── internals ─────────────────────────────────────────────
def _rows_for(self, image_name: str) -> pd.DataFrame:
_ = self.metadata
return (self._by_image or {}).get(image_name, self.metadata.iloc[0:0])