Playing with time series images - Part I: quality check

Converting Deanna's dicom file:

In [2]:
%pylab inline
import nibabel as nib
Welcome to pylab, a matplotlib-based Python environment [backend: module://IPython.kernel.zmq.pylab.backend_inline].
For more information, type 'help(pylab)'.
In [3]:
import nibabel
In [4]:
import numpy as np
import nibabel as nib
import nibabel.nicom as nic
import scipy as sc
import os
from os.path import join as pjoin

os.path.realpath(nib.__file__)
-c:3: UserWarning: The DICOM readers are highly experimental, unstable, and only work for Siemens time-series at the moment
Please use with caution.  We would be grateful for your help in improving them
Out[4]:
'/home/jb/.local/lib/python2.7/site-packages/nibabel/__init__.pyc'
In [5]:
pwd
Out[5]:
u'/home/jb/data/qcpna'
In [6]:
CURDIR = os.path.abspath(os.path.curdir)
ddata = os.path.join(CURDIR,"bolddata")
print os.listdir(ddata)
print os.listdir(CURDIR)
['smallbold.nii.gz', 'epi_01', 'dimg.nii.gz', 'bold.nii.gz']
['bold_QC_teacher.ipynb', 'bolddata', 'bold_QC.ipynb']
In [7]:
%alias
# install mricron ... 
# change to directory with epi_01 : os.chdir( pjoin(CURDIR,bolddata) )
#!dcm2nii epi_01
# move back : os.chdir( CURDIR )
Total number of aliases: 12
Out[7]:
[('cat', 'cat'),
 ('cp', 'cp -i'),
 ('ldir', 'ls -F -o --color %l | grep /$'),
 ('lf', 'ls -F -o --color %l | grep ^-'),
 ('lk', 'ls -F -o --color %l | grep ^l'),
 ('ll', 'ls -F -o --color'),
 ('ls', 'ls -F --color'),
 ('lx', 'ls -F -o --color %l | grep ^-..x'),
 ('mkdir', 'mkdir'),
 ('mv', 'mv -i'),
 ('rm', 'rm -i'),
 ('rmdir', 'rmdir')]
In [8]:
ls
bolddata/  bold_QC.ipynb  bold_QC_teacher.ipynb
In [9]:
# !mv epi_01/20090421_155120EPI9613TRSAGs005a001.nii.gz dimg.nii.gz

We are going to use a smaller data set to get started

In [10]:
fimg = pjoin(ddata, "bold.nii.gz")
print fimg
img = nib.load(fimg)
/home/jb/data/qcpna/bolddata/bold.nii.gz
In [11]:
print img.shape
arr = img.get_data()
print arr.shape
(64, 64, 35, 165)
(64, 64, 35, 165)

Let's reduce the size of this time series : take the 42 first scans

In [12]:
nib.nifti1.Nifti1Image?
In [13]:
small_arr = arr[:,:,:,:42]
print small_arr.shape

aff = img.get_affine()
print aff
#
fname_small_img = pjoin(ddata, "smallbold.nii.gz")
small_img = nib.nifti1.Nifti1Image(small_arr, aff)
nib.save(small_img, fname_small_img)
(64, 64, 35, 42)
[[   3.            0.            0.          -93.        ]
 [   0.            3.            0.         -103.41815186]
 [   0.            0.            3.          -45.47966003]
 [   0.            0.            0.            1.        ]]

Check that our small time series image is ok:

In [14]:
plt.gray()
print fname_small_img
simg = nib.load(fname_small_img)
imshow(simg.get_data()[:,:,17,5])
/home/jb/data/qcpna/bolddata/smallbold.nii.gz
Out[14]:
<matplotlib.image.AxesImage at 0x4475fd0>
In [15]:
plot(simg.get_data()[24,12,17,:])
Out[15]:
[<matplotlib.lines.Line2D at 0x46f6090>]

Variance of the images across TR:

In [16]:
arr = simg.get_data()
n_scans = arr.shape[3]
#
temporal_mean = arr.mean(axis=3)
print temporal_mean.shape
print temporal_mean.mean()
print arr.mean()
#
imshow(simg.get_data()[:,:,17,5] - temporal_mean[:,:,17])

print "temporal_mean[:,:,1].shape : ", temporal_mean[:,:,1].shape
(64, 64, 35)
273.428762921
273.428762921
temporal_mean[:,:,1].shape :  (64, 64)
In [17]:
t = 12
arr12 = temporal_mean - arr[:,:,:,t]
print arr12.shape
arr12sq = arr12**2
print arr12sq.shape
print np.sqrt(arr12sq.mean())
(64, 64, 35)
(64, 64, 35)
15.4273905419
In [18]:
rms = np.zeros(n_scans) # initialize an array to hold our rms values
for t in range(n_scans):
    squared_difference = (temporal_mean - arr[:,:,:,t])**2
    rms[t] = np.sqrt(np.mean(squared_difference))
plt.plot(rms)
Out[18]:
[<matplotlib.lines.Line2D at 0x47293d0>]
In [19]:
ashape = np.asarray(arr.shape)

print ashape, 64*64*35, ashape[:3].prod()
print ashape[:3].prod(),ashape[3]
[64 64 35 42] 143360 143360
143360 42
In [20]:
# another way ?
ashape = np.asarray(arr.shape)
arr_reshaped = arr.reshape(ashape[:3].prod(),ashape[3])
print arr_reshaped.shape
#plot(arr_reshaped.std(axis=0))
(143360, 42)
In [21]:
arr_demeaned = arr - arr.mean(axis=3)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-21-bb07f6068054> in <module>()
----> 1 arr_demeaned = arr - arr.mean(axis=3)

ValueError: operands could not be broadcast together with shapes (64,64,35,42) (64,64,35) 

So - that didn't work. Any idea why ? - how to correct ?

Getting confused - and then understanding what is reshaping

In [22]:
tmp = np.arange(24)
In [23]:
tmp
Out[23]:
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23])
In [53]:
tmp.shape = 2, 3*4
print tmp
print tmp.ravel()

# reshape 
print tmp.reshape(3*4, 2)

tmp.shape = 2, 3*4
print tmp

# transpose 
print tmp.T
print tmp.T.ravel()

print tmp
[[ 0  1  2  3  4  5  6  7  8  9 10 11]
 [12 13 14 15 16 17 18 19 20 21 22 23]]
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]
[[ 0  1]
 [ 2  3]
 [ 4  5]
 [ 6  7]
 [ 8  9]
 [10 11]
 [12 13]
 [14 15]
 [16 17]
 [18 19]
 [20 21]
 [22 23]]
[[ 0  1  2  3  4  5  6  7  8  9 10 11]
 [12 13 14 15 16 17 18 19 20 21 22 23]]
[[ 0 12]
 [ 1 13]
 [ 2 14]
 [ 3 15]
 [ 4 16]
 [ 5 17]
 [ 6 18]
 [ 7 19]
 [ 8 20]
 [ 9 21]
 [10 22]
 [11 23]]
[ 0 12  1 13  2 14  3 15  4 16  5 17  6 18  7 19  8 20  9 21 10 22 11 23]
[[ 0  1  2  3  4  5  6  7  8  9 10 11]
 [12 13 14 15 16 17 18 19 20 21 22 23]]
In [58]:
# has our array changed ? 
tmp.shape = 2, 3, 4
print tmp
print tmp.ravel()
[[[ 0  1  2  3]
  [ 4  5  6  7]
  [ 8  9 10 11]]

 [[12 13 14 15]
  [16 17 18 19]
  [20 21 22 23]]]
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]
In [59]:
print tmp[0].shape, "\n",  tmp[0],  "\n",  tmp[-1] 
print tmp[:,:,0].shape, tmp[:,:,-1].shape, "\n", tmp[:,:,0] 
(3, 4) 
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]] 
[[12 13 14 15]
 [16 17 18 19]
 [20 21 22 23]]
(2, 3) (2, 3) 
[[ 0  4  8]
 [12 16 20]]
In [60]:
tmp.shape = 2, 3, 4
print tmp

tmp = tmp.reshape(4, 3, 2)
# could also be done with tmp.shape = 4, 2, 3 
print tmp
print tmp.ravel()


tmp = tmp.reshape(2, 3, 4)
# could also be done with tmp.shape = 4, 2, 3 
print tmp
print tmp.ravel()
[[[ 0  1  2  3]
  [ 4  5  6  7]
  [ 8  9 10 11]]

 [[12 13 14 15]
  [16 17 18 19]
  [20 21 22 23]]]
[[[ 0  1]
  [ 2  3]
  [ 4  5]]

 [[ 6  7]
  [ 8  9]
  [10 11]]

 [[12 13]
  [14 15]
  [16 17]]

 [[18 19]
  [20 21]
  [22 23]]]
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]
[[[ 0  1  2  3]
  [ 4  5  6  7]
  [ 8  9 10 11]]

 [[12 13 14 15]
  [16 17 18 19]
  [20 21 22 23]]]
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]
In [61]:
# what does transpose do ? 
tmp.shape = 2, 3, 4
print tmp

print tmp.T.shape
print tmp.T
print tmp.T.ravel()
[[[ 0  1  2  3]
  [ 4  5  6  7]
  [ 8  9 10 11]]

 [[12 13 14 15]
  [16 17 18 19]
  [20 21 22 23]]]
(4, 3, 2)
[[[ 0 12]
  [ 4 16]
  [ 8 20]]

 [[ 1 13]
  [ 5 17]
  [ 9 21]]

 [[ 2 14]
  [ 6 18]
  [10 22]]

 [[ 3 15]
  [ 7 19]
  [11 23]]]
[ 0 12  4 16  8 20  1 13  5 17  9 21  2 14  6 18 10 22  3 15  7 19 11 23]

And what about np.rollaxis ?

In [76]:
print tmp.shape, tmp.ravel()

print np.rollaxis(tmp, 2, 0).shape, np.rollaxis(tmp, 2, 0).ravel()
print np.rollaxis(np.rollaxis(tmp, 2, 0), 2, 1).shape, \
      np.rollaxis(np.rollaxis(tmp, 2, 0), 2, 1).ravel()
(2, 3, 4) [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]
(4, 2, 3) [ 0  4  8 12 16 20  1  5  9 13 17 21  2  6 10 14 18 22  3  7 11 15 19 23]
(4, 2, 3) [ 0  4  8 12 16 20  1  5  9 13 17 21  2  6 10 14 18 22  3  7 11 15 19 23]
(4, 3, 2) [ 0 12  4 16  8 20  1 13  5 17  9 21  2 14  6 18 10 22  3 15  7 19 11 23]
In [63]:
#if more than 3 dim
tmp4d_shape = (2,3,4,5)
tmp4d = np.arange(np.prod(tmp4d_shape)).reshape(tmp4d_shape)
print tmp4d[0,0:2,:,:]
print np.prod(tmp4d_shape)
print tmp4d.T.shape
print tmp4d.T[0,0:2,:,:]
print tmp4d.T.ravel()[:21]
[[[ 0  1  2  3  4]
  [ 5  6  7  8  9]
  [10 11 12 13 14]
  [15 16 17 18 19]]

 [[20 21 22 23 24]
  [25 26 27 28 29]
  [30 31 32 33 34]
  [35 36 37 38 39]]]
120
(5, 4, 3, 2)
[[[  0  60]
  [ 20  80]
  [ 40 100]]

 [[  5  65]
  [ 25  85]
  [ 45 105]]]
[  0  60  20  80  40 100   5  65  25  85  45 105  10  70  30  90  50 110
  15  75  35]

Now, we understand (almost) fully numpy arrays. Let's apply this powerful knowledge to our problem:

In [65]:
mean_img = arr.mean(axis=3)
mean_img_shape = np.asarray(mean_img.shape)
print mean_img.shape, mean_img_shape

tximg = arr.reshape(mean_img_shape.prod(),arr.shape[3]).T
print tximg.shape

# last dimension is the same: broadcasting
tximg_centred = tximg - mean_img.ravel()

print tximg_centred.shape
print mean_img.ravel().shape

#fig = figure()
ax = subplots(1,1)

ax[1].plot(tximg_centred.std(axis=1))
ax[1].set_xlabel('TR')
(64, 64, 35) [64 64 35]
(42, 143360)
(42, 143360)
(143360,)
Out[65]:
<matplotlib.text.Text at 0x4b591d0>

Now - can we go back ?

In [67]:
tximg = tximg_centred + mean_img.ravel()
arr2 = tximg.T.reshape(arr.shape)
print (arr - arr2).max(), (arr - arr2).min()
print np.allclose(arr,arr2)
print np.all((arr-arr2) == 0)
print np.any(arr-arr2)
0.0 0.0
True
True
False

Summary image ?

In [77]:
arr_std = arr.std(axis = 3)
(fig,axes) = subplots(3,3)

for i in range(3):
    for j in range(3):
        axes[i,j].imshow(arr_std[:,:,(3*i + j)*4])

Are there big jumps between two consecutive volumes ?

In [78]:
diff_arr = arr[:,:,:,:-1] - arr[:,:,:,1:] 
In [79]:
print arr.shape, diff_arr.shape

imagedim = np.prod(arr.shape[:3])
print imagedim

squarediff = (diff_arr.reshape(imagedim, arr.shape[3]-1))**2
print squarediff.shape
m2diff = squarediff.mean(axis=0)
(64, 64, 35, 42) (64, 64, 35, 41)
143360
(143360, 41)
In [80]:
figure(num=None, figsize=(12,2))
plot(m2diff)
Out[80]:
[<matplotlib.lines.Line2D at 0x76cd990>]

Select the biggest difference:

In [81]:
print np.argmax(m2diff)
18

Create a vector that you would put in the design matrix

In [82]:
nuisance = np.zeros(n_scans)
nuisance[np.argmax(m2diff)] = 1
print nuisance
[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.]

Exercice : create a volume that will show a big difference - check that the covariate is indeed taking out this volume. This is the concept of a unit test !

Computing the variance per slice across time

In [83]:
# get the variance per slice - no difference between adjacent slices
ash = arr.shape
print ash

# slice std : mean taken within slice
slice_std = arr.reshape((ash[0]*ash[1], ash[2], ash[3])).std(axis=0)
slice_mean = arr.reshape((ash[0]*ash[1], ash[2], ash[3])).mean(axis=0)
print slice_std.shape
#(fig, axes) = subplots(1,2)
#axes[0].plot(slice_std.T)
#axes[1].plot(slice_mean.T)


# display slice std across TR: mean taken across slice
m = arr.mean(axis=3)
m_shape = np.asarray(m.shape)
tximg = arr.reshape(m_shape[0:3].prod(),-1)
tximg_T = np.rollaxis(tximg, 0, 2)
print np.rollaxis(tximg, 0, 2).shape

arr_demean = tximg_T - tximg_T.mean(axis=0)
arr_demean_reshaped = arr_demean.reshape((ash[-1],)+ash[:3]) 
print (ash[-1],)+ash[:3]
print 'arr_demean_reshaped.shape',  arr_demean_reshaped.shape

print arr_demean_reshaped[:,32,32,17].mean()

arr_demean_reshaped_std = arr_demean_reshaped.std(axis=0)

plot(arr_demean_reshaped_std.reshape(64*64, ash[2]).mean(axis=0))

# do it for each time ?

# do it for the difference between adjacent scans ?
# print diff_arr.shape
(64, 64, 35, 42)
(35, 42)
(42, 143360)
(42, 64, 64, 35)
arr_demean_reshaped.shape (42, 64, 64, 35)
5.41365893912e-15
Out[83]:
[<matplotlib.lines.Line2D at 0x8515b50>]
In [84]:
import nipy.algorithms.diagnostics as nads

tdiff = nads.time_slice_diffs(arr, time_axis=-1, slice_axis=-2)
nads.plot_tsdiffs(tdiff)
Out[84]:
[<matplotlib.axes.AxesSubplot at 0xb611250>,
 <matplotlib.axes.AxesSubplot at 0xb945a10>,
 <matplotlib.axes.AxesSubplot at 0xb961ed0>,
 <matplotlib.axes.AxesSubplot at 0xbacf410>]

Singular value decomposition to detect artifacts

In [85]:
import scipy.linalg as lin
In [98]:
print ash
tximg = arr.reshape(np.prod(ash[:3]),ash[3]).T
print tximg.shape
aTa = tximg.dot(tximg.T)
u,s,vh = lin.svd(aTa)
(64, 64, 35, 42)
(42, 143360)
In [99]:
plot(s)
Out[99]:
[<matplotlib.lines.Line2D at 0x7f8f0c023250>]
In [100]:
plot(u[:,0])
Out[100]:
[<matplotlib.lines.Line2D at 0x68b7410>]
In [101]:
tximg_centred = tximg - tximg.mean(axis=0)
print tximg_centred.shape
aTa = tximg_centred.dot(tximg_centred.T)
v,s,vh = lin.svd(aTa)
(42, 143360)
In [91]:
plot(s)
Out[91]:
[<matplotlib.lines.Line2D at 0x6746690>]
In [103]:
u1 = tximg_centred.T.dot(v[:,0])
In [104]:
print u1.shape
print tximg_centred.shape
u1 = u1.reshape(ash[:3])
print u1.shape
imshow(u1[:,:,17])
(143360,)
(42, 143360)
(64, 64, 35)
Out[104]:
<matplotlib.image.AxesImage at 0x7f8f0d08a0d0>

Exercise : write a script that replace the most suspicious scan by the average of surrounding scans

First - write a test !

In [148]:
# load the image again - to be sure
fimg = pjoin(ddata, "smallbold.nii.gz")
print fimg
img = nib.load(fimg)
arr = img.get_data()

# increase the mean at volume 18
arr[:,:,:,18] = arr[:,:,:,18] + 30

def std_diff_ts(a4d, timeaxis = 'last'):
    """
    Takes a 4d numpy array, take the diff between adjacent times, 
    compute and returns the std of the diff volume time series

    Inputs
    ------
    arr4d: numpy array
        The array on which we compute the std of the diff time series
    timeaxis: integer or in ['last', 'first']
        The axis where the time is

    Outputs
    -------
    (Ntmes,) numpy array
        The array with the std of the diff volume time series
    """
#    pass
    ash = np.asarray(a4d.shape)
    if len(a4d.shape) != 4:
        raise ValueError(" Array is not of shape 4")
    
    if timeaxis == 'first':
            timeaxis = 0
    if timeaxis != 'last':
        np.rollaxis(a4d, timeaxis, -1)
        
    print ash, ash[:3].prod(),ash[3]
    diff_arr = (a4d[:,:,:,:-1] - a4d[:,:,:,1:])**2
    diff_arr = diff_arr.reshape(ash[:3].prod(),ash[3]-1)
    return diff_arr.mean(axis=0)
/home/jb/data/qcpna/bolddata/smallbold.nii.gz
In [149]:
len(arr.shape)

stdts = std_diff_ts(arr)
plot(stdts)
[64 64 35 42] 143360 42
Out[149]:
[<matplotlib.lines.Line2D at 0x7f8f04b15590>]
In [155]:
print np.argmax(stdts)
rk = np.argsort(stdts)
print argmax([stdts[rk[-1]-1],stdts[rk[-1]+1]])
17
1
In [158]:
bad_volume = np.argmax(stdts) + argmax([stdts[rk[-1]-1],stdts[rk[-1]+1]])
In [ ]: