Source code for laion_fmri.captions

"""Access per-stimulus captions for the LAION-fMRI images.

Each stimulus carries a small set of short *human* captions (collected on
CloudResearch Connect). Shared non-OOD stimuli additionally carry one
*AI* caption (a GPT-generated description). The target is:

* **shared** images (seen by every participant) get **5 human captions**
  and, for non-OOD images, **1 AI caption**
* **unique** images (one participant only) get **3 human captions** and
  no AI caption
* **OOD** images get their target human captions and no AI caption

Together they give you a small set of independent natural-language
descriptions per image, useful for caption-conditioned modelling,
retrieval, or quick qualitative checks.

Files on disk:

.. code-block:: text

   stimuli/
     task-images_desc-captions.csv

The CSV is long-form (one row per caption) with columns:

=======================  =====================================================
``image_name``           Stimulus filename. Join key against
                         ``task-images_metadata.csv``.
``caption_idx``          Position within the image. Rank ``1`` is the
                         highest-quality human caption; ranks go up to ``3``
                         for unique images and up to ``5`` for shared images.
                         The AI caption (if any) gets ``0``.
``source``               ``"human"`` or ``"ai"``.
``caption``              The caption text.
``origin_collection``    Which collection the caption came from
                         (CloudResearch Connect batch labels for humans,
                         model name like ``"gpt-5.1"`` for AI).
``participant_id``       CloudResearch Connect participant identifier
                         (NaN for AI).
``ai_model``             Model name (NaN for human captions).
=======================  =====================================================

All images have their target human-caption count. AI captions are
provided for shared non-OOD images only.

You normally reach :class:`Captions` through the :class:`~laion_fmri.Stimuli`
hub:

>>> import laion_fmri
>>> stim = laion_fmri.load_stimuli()
>>> stim.captions.human("shared_12rep_LAION_cluster_1003_i0.jpg")
['a hand with light pink painted nails with flower designs',
 'A hand with finger painted nails with flowers in them',
 ...]
>>> stim.captions.ai("shared_12rep_LAION_cluster_1003_i0.jpg")
'A hand with short, pale pink polished nails features delicate floral nail art on two fingers.'

For a single row-level DataFrame of every caption attached to an image:

>>> stim.captions.get("shared_12rep_LAION_cluster_1003_i0.jpg")
"""

from __future__ import annotations

from pathlib import Path

import pandas as pd

from laion_fmri._paths import captions_path
from laion_fmri.config import get_data_dir


[docs] class Captions: """Lazy reader for the per-stimulus captions CSV. Loads the CSV on first access and caches a per-image lookup. Parameters ---------- data_dir : str or Path, optional Override the configured data directory. Defaults to :func:`laion_fmri.config.get_data_dir`. Raises ------ FileNotFoundError If ``stimuli/task-images_desc-captions.csv`` is not present. Captions are a public stimulus-side metadata file; run ``laion-fmri download-captions`` (or :func:`laion_fmri.download.download_captions`) to fetch them. """ def __init__(self, data_dir=None): self.data_dir = ( Path(data_dir) if data_dir is not None else Path(get_data_dir()) ) self._csv_path = captions_path(self.data_dir) if not self._csv_path.exists(): raise FileNotFoundError( f"Captions not found at {self._csv_path}. " "Run `laion-fmri download-captions` first." ) self._meta: pd.DataFrame | None = None self._by_image: dict[str, pd.DataFrame] | None = None # ── shape / inventory ───────────────────────────────────── @property def metadata(self) -> pd.DataFrame: """The captions CSV as a DataFrame (one row per caption). Columns: ``image_name``, ``caption_idx``, ``source``, ``caption``, ``origin_collection``, ``participant_id``, ``ai_model``. """ if self._meta is None: self._meta = pd.read_csv(self._csv_path) self._by_image = { name: g.reset_index(drop=True) for name, g in self._meta.groupby("image_name") } return self._meta def __len__(self) -> int: return len(self.metadata)
[docs] def images(self) -> list[str]: """Image names that have at least one caption.""" _ = self.metadata return list((self._by_image or {}).keys())
def __contains__(self, image_name: str) -> bool: _ = self.metadata return image_name in (self._by_image or {}) # ── per-image access ──────────────────────────────────────
[docs] def get(self, image_name: str) -> pd.DataFrame: """Return all captions for one image as a DataFrame. Returns an **empty DataFrame** (not an error) when the image has no captions. Rows are ordered by ``caption_idx``: AI first (``idx=0``), then humans in rank order. """ rows = self._rows_for(image_name) return rows.sort_values("caption_idx").reset_index(drop=True)
[docs] def human( self, image_name: str, limit: int | None = None, ) -> list[str]: """Human captions for ``image_name`` in rank order. Parameters ---------- image_name : str limit : int, optional Cap to the top-``limit`` captions. ``None`` (default) returns all available (currently up to five). Returns an **empty list** if the image has no human captions. """ rows = self._rows_for(image_name) if rows.empty: return [] humans = rows[rows["source"] == "human"].sort_values("caption_idx") if limit is not None: humans = humans.head(limit) return humans["caption"].tolist()
[docs] def list( self, image_name: str, source: str | None = None, ) -> list[str]: """Captions for ``image_name`` as a list of strings. Parameters ---------- image_name : str source : {"human", "ai"}, optional Restrict to one source. ``None`` (default) returns all available captions in ``caption_idx`` order. """ rows = self.get(image_name) if source is not None: if source not in {"human", "ai"}: raise ValueError("source must be 'human', 'ai', or None") rows = rows[rows["source"] == source] return rows["caption"].tolist()
[docs] def ai(self, image_name: str) -> str | None: """AI caption for ``image_name``, or ``None`` if not available. AI captions are present for shared non-OOD images only. """ rows = self._rows_for(image_name) if rows.empty: return None ai_rows = rows[rows["source"] == "ai"] if ai_rows.empty: return None return str(ai_rows.iloc[0]["caption"])
# ── internals ───────────────────────────────────────────── def _rows_for(self, image_name: str) -> pd.DataFrame: _ = self.metadata return (self._by_image or {}).get(image_name, self.metadata.iloc[0:0])