Source code for laion_fmri.download

"""Download logic for the LAION-fMRI dataset."""

import sys

from laion_fmri._constants import (
    LICENSE_AGREEMENT_TEXT,
    TERMS_OF_USE_TEXT,
    resolve_subject_id,
)
from laion_fmri._errors import LicenseNotAcceptedError
from laion_fmri._laion_fmri_fetch import fetch_laion_fmri
from laion_fmri._paths import license_marker_path, tou_marker_path
from laion_fmri.config import get_data_dir
from laion_fmri.discovery import get_subjects


def _check_license_accepted(data_dir):
    """Check whether the dataset license has been accepted.

    Parameters
    ----------
    data_dir : str
        Path to the data directory.

    Returns
    -------
    bool
        True if the license marker file exists.
    """
    return license_marker_path(data_dir).exists()


def _write_license_marker(data_dir):
    """Write the license acceptance marker file.

    Parameters
    ----------
    data_dir : str
        Path to the data directory.
    """
    marker = license_marker_path(data_dir)
    marker.parent.mkdir(parents=True, exist_ok=True)
    marker.touch()


def _prompt_license():
    """Display the license agreement and prompt the user for acceptance.

    Returns
    -------
    bool
        True if the user typed "I AGREE".
    """
    sys.stdout.write(LICENSE_AGREEMENT_TEXT)
    sys.stdout.flush()
    response = input().strip()
    return response == "I AGREE"


def _check_tou_accepted(data_dir):
    """Check whether the terms of use have been accepted.

    Parameters
    ----------
    data_dir : str
        Path to the data directory.

    Returns
    -------
    bool
        True if the ToU marker file exists.
    """
    return tou_marker_path(data_dir).exists()


def _write_tou_marker(data_dir):
    """Write the terms-of-use acceptance marker file.

    Parameters
    ----------
    data_dir : str
        Path to the data directory.
    """
    marker = tou_marker_path(data_dir)
    marker.parent.mkdir(parents=True, exist_ok=True)
    marker.touch()


def _prompt_tou():
    """Display the terms of use and prompt the user for acceptance.

    Returns
    -------
    bool
        True if the user typed "I AGREE".
    """
    sys.stdout.write(TERMS_OF_USE_TEXT)
    sys.stdout.flush()
    response = input().strip()
    return response == "I AGREE"


[docs] def accept_licenses(include_stimuli=False): """Walk through the license-acceptance flow without downloading. Same prompts ``download()`` triggers internally on first use: * The dataset license (CC0 1.0) is always presented. * The stimulus license is presented when ``include_stimuli`` is True. On acceptance the marker files are written so subsequent ``download(...)`` calls won't prompt again. Parameters ---------- include_stimuli : bool If True, also prompt for the stimulus license. Raises ------ LicenseNotAcceptedError If the dataset license is declined. RuntimeError If the stimulus license is declined when requested. """ data_dir = get_data_dir() if not _check_license_accepted(data_dir): if not _prompt_license(): raise LicenseNotAcceptedError( "Dataset license must be accepted before downloading." ) _write_license_marker(data_dir) if include_stimuli and not _check_tou_accepted(data_dir): if not _prompt_tou(): raise RuntimeError( "Terms of use must be accepted to download stimuli." ) _write_tou_marker(data_dir)
[docs] def download( subject, ses=None, task=None, space=None, desc=None, stat=None, suffix=None, extension=None, include_stimuli=False, n_jobs=1, ): """Download dataset files for a subject, narrowed by BIDS entities. The download is **idempotent**: a file whose local size already matches the S3 size is skipped, so re-running after an interrupted transfer only fetches what's missing. Parameters ---------- subject : int, str, or "all" Subject identifier (BIDS ID, integer index, or "all"). ses, task, space, desc, stat : str or list[str], optional BIDS-entity filters. Each accepts a bare value (``ses="04"``) or the full BIDS token (``ses="ses-04"``). A list narrows to multiple values. Files that don't carry an entity are not excluded by a filter on it (so subject-level summaries survive a ``ses=`` filter). suffix : str or list[str], optional BIDS suffix filter (``"statmap"``, ``"events"``, ...). extension : str or list[str], optional File extension filter (``"nii.gz"``, ``"tsv"``, ...). include_stimuli : bool Whether to include stimulus images (requires ToU acceptance). n_jobs : int Number of parallel download workers (``aws s3 cp`` subprocesses). ``1`` (default) is sequential. Raises ------ SubjectNotFoundError If the subject identifier is invalid. LicenseNotAcceptedError If the dataset license is declined. """ data_dir = get_data_dir() if subject != "all": resolve_subject_id(subject) if not _check_license_accepted(data_dir): accepted = _prompt_license() if not accepted: raise LicenseNotAcceptedError( "Dataset license must be accepted before downloading." ) _write_license_marker(data_dir) if include_stimuli and not _check_tou_accepted(data_dir): accepted = _prompt_tou() if not accepted: raise RuntimeError( "Terms of use must be accepted to download stimuli." ) _write_tou_marker(data_dir) if subject == "all": subjects = get_subjects() else: subjects = [resolve_subject_id(subject)] for sub_id in subjects: fetch_laion_fmri( data_dir, subject=sub_id, ses=ses, task=task, space=space, desc=desc, stat=stat, suffix=suffix, extension=extension, include_stimuli=include_stimuli, n_jobs=n_jobs, )