Source code for laion_fmri.io
"""Low-level file loaders for laion_fmri."""
import nibabel as nib
import numpy as np
import pandas as pd
[docs]
def load_nifti_mask(path):
"""Load a NIfTI mask as a flat 1-D boolean array.
Parameters
----------
path : str or Path
Returns
-------
np.ndarray
Shape ``(n_total_voxels,)``, dtype bool.
"""
img = nib.load(str(path))
data = np.asarray(img.dataobj)
return data.ravel().astype(bool)
[docs]
def load_nifti_data(path, mask_path):
"""Load a 3-D NIfTI's values within a brain mask.
Parameters
----------
path : str or Path
mask_path : str or Path
Returns
-------
np.ndarray
Shape ``(n_brain_voxels,)``, dtype float32.
"""
img = nib.load(str(path))
data = np.asarray(img.dataobj, dtype=np.float32).ravel()
mask = load_nifti_mask(mask_path)
return data[mask]
[docs]
def load_nifti_4d(path, mask_path, streaming=False):
"""Load a 4-D NIfTI's values within a brain mask.
Returns each volume as a row of voxel values.
Parameters
----------
path : str or Path
mask_path : str or Path
streaming : bool
If False (default), materialize the full 4-D array up
front, then mask per volume. Decompresses any ``.nii.gz``
once and is the right choice for the bucket's compressed
files; peak memory is the full 4-D file plus the masked
output, so a real session is ~12 GB. If True, read one
volume at a time -- peak memory stays at one volume plus
the masked output. **Streaming is only fast on raw
uncompressed ``.nii``** (or with the file already in OS
cache); on ``.nii.gz`` nibabel re-decompresses up to the
offset on every slice, so streaming becomes effectively
quadratic in the number of volumes.
Returns
-------
np.ndarray
Shape ``(n_volumes, n_brain_voxels)``, dtype float32,
C-contiguous so row indexing is cheap.
"""
img = nib.load(str(path))
shape = img.shape
if len(shape) != 4:
raise ValueError(
f"Expected 4-D NIfTI at {path}, got shape {shape}"
)
n_volumes = shape[3]
mask = load_nifti_mask(mask_path)
n_voxels = int(mask.sum())
out = np.empty((n_volumes, n_voxels), dtype=np.float32)
if streaming:
for t in range(n_volumes):
vol = np.asarray(img.dataobj[..., t])
out[t] = vol.ravel()[mask]
else:
data = np.asarray(img.dataobj)
for t in range(n_volumes):
out[t] = data[..., t].ravel()[mask]
return out
[docs]
def load_nifti_with_affine(path):
"""Load a NIfTI's data and 4×4 affine.
Parameters
----------
path : str or Path
Returns
-------
tuple[np.ndarray, np.ndarray]
"""
img = nib.load(str(path))
data = np.asarray(img.dataobj)
affine = np.array(img.affine)
return data, affine
[docs]
def load_tsv(path):
"""Load a TSV file as a pandas DataFrame.
Parameters
----------
path : str or Path
Returns
-------
pd.DataFrame
"""
return pd.read_csv(str(path), sep="\t")
[docs]
def load_gifti_mask(path):
"""Load a ``.func.gii`` surface mask as a 1-D boolean array.
Parameters
----------
path : str or Path
Returns
-------
np.ndarray
Shape ``(n_vertices,)``, dtype bool.
"""
img = nib.load(str(path))
return np.asarray(img.darrays[0].data).astype(bool)
[docs]
def load_freesurfer_label(path):
"""Load a FreeSurfer ``.label`` file's vertex indices.
Parameters
----------
path : str or Path
Returns
-------
np.ndarray
Shape ``(n_label_vertices,)``, dtype int.
"""
indices = nib.freesurfer.io.read_label(str(path))
return np.asarray(indices, dtype=int)