A simple second level t-test using nipy

Simple random effect model

Random effect model in neuroimaging refer to the fact that the variance used for testing the effect is the between subject variance. In other words, we take the effect computed for each subject (the contrast maps) and we enter these in a second level effect model.

In [1]:
# try Bertrand's and Alexis example on Matthew's data : 
import numpy as np
import nibabel as nib
from nibabel import Nifti1Image as Image
import nipy
#from nipy.utils import example_data

print nipy.__file__ # see which nipy we are using
/home/jb/.local/lib/python2.7/site-packages/nipy/__init__.pyc

Get the data located in ./ds107/analysis/sub0*.img

In [2]:
import os
import os.path as osp
from os.path import join as pjoin, isfile
import zipfile as zpf    

#----- SET THE DIRECTORY WHERE YOUR DATA SIT ------# 
DATADIR = pjoin(osp.expanduser('~'), 'data', 'ds107')

#----- if DATA ARE ZIP : UNZIP --------------------#
datazip = pjoin(pjoin(DATADIR,'analysis.zip')) 
if osp.isfile(datazip):
    with zpf.ZipFile(datazip, "r") as z:
        z.extractall(DATADIR)

list_subj = range(1,11)
dsubj = [pjoin(DATADIR,'analysis','sub' + "%03d" % i) for i in list_subj]
f_cnames = [pjoin(dsubj[i-1],'copes','_model_id_1cope01.nii.gz') for i in list_subj]
f_vnames = [pjoin(dsubj[i-1],'varcopes','_model_id_1varcope01.nii.gz') for i in list_subj]

#fnames = [os.path.join(scans_dir, 'snn03055dy%i.img' % i) for i in range(1,13)]
%pwd
print  [ (f, isfile(f)) for f in f_cnames]

# keep only images that are on disk !
f_cnames = [f for f in f_cnames if isfile(f)]
f_vnames = [f for f in f_vnames if isfile(f)]
[('/home/jb/data/ds107/analysis/sub001/copes/_model_id_1cope01.nii.gz', True), ('/home/jb/data/ds107/analysis/sub002/copes/_model_id_1cope01.nii.gz', True), ('/home/jb/data/ds107/analysis/sub003/copes/_model_id_1cope01.nii.gz', True), ('/home/jb/data/ds107/analysis/sub004/copes/_model_id_1cope01.nii.gz', True), ('/home/jb/data/ds107/analysis/sub005/copes/_model_id_1cope01.nii.gz', True), ('/home/jb/data/ds107/analysis/sub006/copes/_model_id_1cope01.nii.gz', True), ('/home/jb/data/ds107/analysis/sub007/copes/_model_id_1cope01.nii.gz', True), ('/home/jb/data/ds107/analysis/sub008/copes/_model_id_1cope01.nii.gz', True), ('/home/jb/data/ds107/analysis/sub009/copes/_model_id_1cope01.nii.gz', True), ('/home/jb/data/ds107/analysis/sub010/copes/_model_id_1cope01.nii.gz', False)]
In [3]:
c_images = [nib.load(f) for f in f_cnames if isfile(f)]
v_images = [nib.load(f) for f in f_vnames if isfile(f)]
In [4]:
print len(c_images), len(v_images)
9 9

Computing the mask of our images - within and between subjects

In [5]:
from nipy.labs.mask import compute_mask, compute_mask_files

# first try with the contrast image: 
image = c_images[0]

def plot_mask_image(image):
    data_img0 = image.get_data()
    aff0 = image.get_affine()
    hdr0 = image.get_header()
    mask_arr  = compute_mask(data_img0, None, m=0.2, M=0.9, \
                     cc=True, opening=2, exclude_zeros=False)

    mask_img = nib.Nifti1Image(mask_arr, aff0, hdr0)

    f, (a1, a2) = subplots(1,2,figsize=(10,3))
    a1.imshow(data_img0[:,:,12])
    a1.set_title('my data example')
    a2.imshow(mask_arr[:,:,12])
    a2.set_title('my mask example')
    
plot_mask_image(image)
In [6]:
image = v_images[0]
plot_mask_image(image)
In [7]:
the_data = image.get_data()
the_data.shape
#plt.hist(the_data.ravel())
plt.hist(np.log(the_data.ravel() + 1),50)
Out[7]:
(array([86377,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,    32,   494,  1763,  2199,  5108, 14758, 13586,
        7726,  4322,  2546,  1570,  1009,   639,   432,   286,   197,
         121,    69,    32,    31,    16,    22,     7,     4,     3,
           4,     3,     2,     1,     1]),
 array([  0.        ,   0.25445147,   0.50890293,   0.7633544 ,
         1.01780586,   1.27225733,   1.52670879,   1.78116026,
         2.03561172,   2.29006319,   2.54451466,   2.79896612,
         3.05341759,   3.30786905,   3.56232052,   3.81677198,
         4.07122345,   4.32567492,   4.58012638,   4.83457785,
         5.08902931,   5.34348078,   5.59793224,   5.85238371,
         6.10683517,   6.36128664,   6.61573811,   6.87018957,
         7.12464104,   7.3790925 ,   7.63354397,   7.88799543,
         8.1424469 ,   8.39689837,   8.65134983,   8.9058013 ,
         9.16025276,   9.41470423,   9.66915569,   9.92360716,
        10.17805862,  10.43251009,  10.68696156,  10.94141302,
        11.19586449,  11.45031595,  11.70476742,  11.95921888,
        12.21367035,  12.46812181,  12.72257328]),
 <a list of 50 Patch objects>)

Usually, you will like to use all subjects (for instance the mean across subjects) to compute the mask

In [8]:
print f_vnames
['/home/jb/data/ds107/analysis/sub001/varcopes/_model_id_1varcope01.nii.gz', '/home/jb/data/ds107/analysis/sub002/varcopes/_model_id_1varcope01.nii.gz', '/home/jb/data/ds107/analysis/sub003/varcopes/_model_id_1varcope01.nii.gz', '/home/jb/data/ds107/analysis/sub004/varcopes/_model_id_1varcope01.nii.gz', '/home/jb/data/ds107/analysis/sub005/varcopes/_model_id_1varcope01.nii.gz', '/home/jb/data/ds107/analysis/sub006/varcopes/_model_id_1varcope01.nii.gz', '/home/jb/data/ds107/analysis/sub007/varcopes/_model_id_1varcope01.nii.gz', '/home/jb/data/ds107/analysis/sub008/varcopes/_model_id_1varcope01.nii.gz', '/home/jb/data/ds107/analysis/sub009/varcopes/_model_id_1varcope01.nii.gz']
In [9]:
# you can also compute the mask from the files names (or list of files) directly
mask_arr, mean_img = compute_mask_files(f_vnames, output_filename=None, \
                        return_mean=True, m=0.2, M=0.9, cc=1,    \
                        exclude_zeros=False, opening=2)

mask_img = nib.Nifti1Image(mask_arr, image.get_affine(), image.get_header())
In [10]:
# print type(mask_arr)
f, (a1, a2) = subplots(1,2,figsize=(10,3))
a1.imshow(mean_img[:,:,12])
a1.set_title('my_example')
a2.imshow(mask_arr[:,:,12])
Out[10]:
<matplotlib.image.AxesImage at 0x3daae90>

Construct an "interesting" model:

In [11]:
image = c_images[0]
images = c_images
# transformation from voxel no to mm
M = image.get_affine()
# mm to voxel no
iM = np.linalg.inv(M)
# coordinates of voxel of interest in mm (MNI space)
posmm = [-20.0, -42, 34]
# coordinates in voxel space (in homogenous coordinates)
posvox = np.dot(iM, posmm + [1])
# We grab the spatial part of the output.  Since we want to use it as an 
# index, we need to make it a tuple
posvox = tuple(posvox[:3].astype(int))

print posvox

nimgs = len(images)
vdata = np.zeros(nimgs)
gdata = np.zeros(nimgs)

for i, V in enumerate(images):
    d = V.get_data()
    vdata[i] = d[posvox]

s = np.std(vdata)/20
x1 = vdata + np.random.randn(vdata.shape[0])*s
print x1

X = np.hstack((vdata[:,newaxis], ones((len(images),1))))
print X
(24, 20, 28)
[  1.81766892 -62.5359811   23.47866984 -42.77734432  13.09622161
 -28.95755905  11.14181709 -17.1068815   -9.37745997]
[[  1.66273332   1.        ]
 [-62.3576088    1.        ]
 [ 24.13196373   1.        ]
 [-42.97476196   1.        ]
 [ 13.57378292   1.        ]
 [-31.6486969    1.        ]
 [ 12.97807789   1.        ]
 [-17.6577549    1.        ]
 [ -9.78118229   1.        ]]

Checking that all files have the same affine :

In [12]:
aff0 = v_images[0].get_affine()
shape0 = v_images[0].shape

for i,img in enumerate(v_images): 
    if i < 2: # print only ..
        print img.get_data().shape
        print img.get_affine()
        print "same as the first one ?: ", np.allclose(aff0, img.get_affine())
        
#go from a list of 3d to one 4d image:
data_images_4d = nib.concat_images(c_images, check_affines=False)
(64, 64, 35)
[[   3.            0.            0.          -93.        ]
 [   0.            3.            0.         -103.55609131]
 [   0.            0.            3.          -51.73400116]
 [   0.            0.            0.            1.        ]]
same as the first one ?:  True
(64, 64, 35)
[[   3.            0.            0.          -93.        ]
 [   0.            3.            0.         -109.034729  ]
 [   0.            0.            3.          -76.77787018]
 [   0.            0.            0.            1.        ]]
same as the first one ?:  False
In [13]:
from nipy.modalities.fmri.glm import FMRILinearModel
#data_images = [nib.load(f) for f in f_cnames]

# FMRILinearModel will 
#  * load the data in fmri_model.fmri_data
#  * load X in fmri_model.design_matrices
#  * load the mask or compute it from the runs
fmri_model = FMRILinearModel(data_images_4d, X, mask_img)

# GLM fitting using ordinary least_squares
fmri_model.fit(do_scaling=False, model='ols')

# Here, we were doing the first level analysis, we would generally leave this unspecified, 
# and the fit would do a first pass with  model="ar1", whiten the data, then a second pass
# with model = 'ols'
In [14]:
# specify and estimate the contrast
contrast_val = np.array(([[1, 0]]))  # 
print contrast_val

# By default would do a t-test
z_map, = fmri_model.contrast(contrast_val, contrast_type='t', con_id='', output_z=True)
[[1 0]]
In [15]:
from nipy.utils import templates, DataError

templates
Out[15]:
<nibabel.data.VersionedDatasource at 0x4bb8ed0>
In [16]:
# look at the result
from nipy.labs.viz import plot_map, cm
import matplotlib.pyplot as plt
#anat = 

vmax = max(- z_map.get_data().min(), z_map.get_data().max())
vmin = - vmax
plot_map(z_map.get_data(), z_map.get_affine(), #anat=anat,
         cut_coords=(-20.0, -42, 34),
         cmap=cm.cold_hot,
         vmin=vmin,
         vmax=vmax,
         threshold=1.5,
         black_bg=True)
#plt.show()
/home/jb/.local/lib/python2.7/site-packages/nipy/labs/datasets/volumes/volume_img.py:284: FutureWarning: Numpy has detected that you (may be) writing to an array returned
by numpy.diagonal or by selecting multiple fields in a record
array. This code will likely break in the next numpy release --
see numpy.diagonal or arrays.indexing reference docs for details.
The quick fix is to make an explicit copy (e.g., do
arr.diagonal().copy() or arr[['f0','f1']].copy()).
  pixdim[0] = -pixdim[0]
Out[16]:
<nipy.labs.viz_tools.slicers.OrthoSlicer at 0x5e44650>

By looking at the object, we see a number of useful options:

In [17]:
# do something like
fmri_model.contrast??

F contrast can be handled the same way. If you have several lines on your contrast, by defaut this will be treated as an F-test.

In [18]:
# For instance :
contrast_val = np.array(([ [[1, 0],[0, 1]] ]))  #

Here are some: ....

  • output_z : bool, optional
          Return or not the corresponding z-stat image
  • output_stat : bool, optional
          Return or not the base (t/F) stat image
  • output_effects : bool, optional
          Return or not the corresponding effect image
  • output_variance : bool, optional
          Return or not the corresponding variance image

Mixed effect.

Mixed effect models refer to the situation when in a model some of the effects are random, and some are fixed. In neuroimaging, the issue is that there is two variances to think of.

  • The intra run variance
  • The inter subject variance.

The problem:

When you want to estimate the variance between subject, you are dealing with noisy observation: the total variance has to be decomposed. We often have a good idea of the first level variance, because in general there are many scans per run, but still there's no direct formula to get the intersubject variance.

For one voxel, let's $Y$ be the observed effects (a vector of $n$-subjects components)

As our $Y$ are measured with a know variance, we have

$$ Y = Z + \epsilon_1 $$

where the $Z_i$ represents the "true" measurement for suject $i$, $\epsilon_1 \sim N(0,\Sigma)$, with $\Sigma = diag(\sigma_i^2)$

and at the group level, we have a model between subjects for the $Z$ :

$$ Z = X\beta + \epsilon_2 $$

where $\beta$ is the group effect, and $\epsilon_2 \sim N(0,vI)$, $v$ being the group variance (subjects are assumed to be independant).

Overall, we have

$\mathrm{var}(Y) = \Sigma + vI_n $

Most of good packages now include a way of dealing with this. While it is possible to estimate simultaneously $v$, $\Sigma$, and $\beta$ , this requires lenghty computations, and we generally have a good confidence on the $\sigma_i$, such that they are assumed known after a first level estimation, so that only $v$ and $\beta$ remains to be estimated.

here - an explanation of the EM ?

Nipy has a couple of routines for this. Let's examine one with a simple example. First create an array of data:

In [19]:
n_sub = 15 # the number of subjects
n_vox = 77 # the number of voxels

# inter subject variance
V2 = np.arange(1,2,1.0/n_sub)
V2 = V2[:,np.newaxis]
vox_variance = np.arange(1.,10.,9.0/n_vox)
vox_variance = vox_variance[np.newaxis,:]
V2 = np.dot(V2, vox_variance)


# within subject variance > within variance
V1 = .5*np.ones(V2.shape) 
print np.max(V1)


# check that the variances are  positive
if (V1 < 0).any():
    raise ValueError('Variance should be positive')

Y = np.random.standard_normal(V1.shape)
Y *= np.sqrt(V2 + V1)

X = np.ones(n_sub)
if X.ndim == 1:
    X = X[:, np.newaxis]

# put enough signal such that we should see something significant
beta = 1.
if np.isscalar(beta):
    beta = beta * np.ones((X.shape[1], V1.shape[1]))
if beta.ndim == 1:
    beta = beta[np.newaxis]

Y += np.dot(X, beta)

print np.max(Y)
0.5
10.8152721104
In [20]:
from nipy.algorithms.statistics.mixed_effects_stat import mfx_stat 
# column to be tested in X
column = 0 
t, varh, betah =  mfx_stat(Y, V1, X, column, n_iter=5, return_t=True, return_f=False, return_effect=True, \
             return_var=True, verbose=False)

print t.shape, betah.shape, varh.shape

print sum(t>2.)

fig = plt.figure()
f, (ax1, ax2, ax3) = plt.subplots(1, 3, sharex=True, figsize=(12,4))

ax1.plot(t)
ax2.plot(betah)
ax3.plot(varh)
(77,) (77,) (77,)
20
Out[20]:
[<matplotlib.lines.Line2D at 0x7c6d850>]
<matplotlib.figure.Figure at 0x5e5a2d0>

Calculate significant voxels and clusters : see Multiple comparison notebook

In [ ]: