Playing with time series images - Part I: quality check¶

Converting Deanna's dicom file:¶

%pylab inline
import nibabel as nib

Welcome to pylab, a matplotlib-based Python environment [backend: module://IPython.kernel.zmq.pylab.backend_inline].
For more information, type 'help(pylab)'.

import nibabel

import numpy as np
import nibabel as nib
import nibabel.nicom as nic
import scipy as sc
import os
from os.path import join as pjoin

os.path.realpath(nib.__file__)

-c:3: UserWarning: The DICOM readers are highly experimental, unstable, and only work for Siemens time-series at the moment
Please use with caution.  We would be grateful for your help in improving them

'/home/jb/.local/lib/python2.7/site-packages/nibabel/__init__.pyc'

pwd

u'/home/jb/data/qcpna'

CURDIR = os.path.abspath(os.path.curdir)
ddata = os.path.join(CURDIR,"bolddata")
print os.listdir(ddata)
print os.listdir(CURDIR)

['smallbold.nii.gz', 'epi_01', 'dimg.nii.gz', 'bold.nii.gz']
['bold_QC_teacher.ipynb', 'bolddata', 'bold_QC.ipynb']

%alias
# install mricron ... 
# change to directory with epi_01 : os.chdir( pjoin(CURDIR,bolddata) )
#!dcm2nii epi_01
# move back : os.chdir( CURDIR )

Total number of aliases: 12

[('cat', 'cat'),
 ('cp', 'cp -i'),
 ('ldir', 'ls -F -o --color %l | grep /$'),
 ('lf', 'ls -F -o --color %l | grep ^-'),
 ('lk', 'ls -F -o --color %l | grep ^l'),
 ('ll', 'ls -F -o --color'),
 ('ls', 'ls -F --color'),
 ('lx', 'ls -F -o --color %l | grep ^-..x'),
 ('mkdir', 'mkdir'),
 ('mv', 'mv -i'),
 ('rm', 'rm -i'),
 ('rmdir', 'rmdir')]

ls

bolddata/  bold_QC.ipynb  bold_QC_teacher.ipynb

# !mv epi_01/20090421_155120EPI9613TRSAGs005a001.nii.gz dimg.nii.gz

We are going to use a smaller data set to get started¶

fimg = pjoin(ddata, "bold.nii.gz")
print fimg
img = nib.load(fimg)

/home/jb/data/qcpna/bolddata/bold.nii.gz

print img.shape
arr = img.get_data()
print arr.shape

(64, 64, 35, 165)
(64, 64, 35, 165)

Let's reduce the size of this time series : take the 42 first scans

nib.nifti1.Nifti1Image?

small_arr = arr[:,:,:,:42]
print small_arr.shape

aff = img.get_affine()
print aff
#
fname_small_img = pjoin(ddata, "smallbold.nii.gz")
small_img = nib.nifti1.Nifti1Image(small_arr, aff)
nib.save(small_img, fname_small_img)

(64, 64, 35, 42)
[[   3.            0.            0.          -93.        ]
 [   0.            3.            0.         -103.41815186]
 [   0.            0.            3.          -45.47966003]
 [   0.            0.            0.            1.        ]]

Check that our small time series image is ok:¶

plt.gray()
print fname_small_img
simg = nib.load(fname_small_img)
imshow(simg.get_data()[:,:,17,5])

/home/jb/data/qcpna/bolddata/smallbold.nii.gz

<matplotlib.image.AxesImage at 0x4475fd0>

plot(simg.get_data()[24,12,17,:])

[<matplotlib.lines.Line2D at 0x46f6090>]

Variance of the images across TR:¶

arr = simg.get_data()
n_scans = arr.shape[3]
#
temporal_mean = arr.mean(axis=3)
print temporal_mean.shape
print temporal_mean.mean()
print arr.mean()
#
imshow(simg.get_data()[:,:,17,5] - temporal_mean[:,:,17])

print "temporal_mean[:,:,1].shape : ", temporal_mean[:,:,1].shape

(64, 64, 35)
273.428762921
273.428762921
temporal_mean[:,:,1].shape :  (64, 64)

t = 12
arr12 = temporal_mean - arr[:,:,:,t]
print arr12.shape
arr12sq = arr12**2
print arr12sq.shape
print np.sqrt(arr12sq.mean())

(64, 64, 35)
(64, 64, 35)
15.4273905419

rms = np.zeros(n_scans) # initialize an array to hold our rms values
for t in range(n_scans):
    squared_difference = (temporal_mean - arr[:,:,:,t])**2
    rms[t] = np.sqrt(np.mean(squared_difference))
plt.plot(rms)

[<matplotlib.lines.Line2D at 0x47293d0>]

ashape = np.asarray(arr.shape)

print ashape, 64*64*35, ashape[:3].prod()
print ashape[:3].prod(),ashape[3]

[64 64 35 42] 143360 143360
143360 42

# another way ?
ashape = np.asarray(arr.shape)
arr_reshaped = arr.reshape(ashape[:3].prod(),ashape[3])
print arr_reshaped.shape
#plot(arr_reshaped.std(axis=0))

(143360, 42)

arr_demeaned = arr - arr.mean(axis=3)

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-21-bb07f6068054> in <module>()
----> 1 arr_demeaned = arr - arr.mean(axis=3)

ValueError: operands could not be broadcast together with shapes (64,64,35,42) (64,64,35)

So - that didn't work. Any idea why ? - how to correct ?

Getting confused - and then understanding what is reshaping¶

tmp = np.arange(24)

tmp

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23])

tmp.shape = 2, 3*4
print tmp
print tmp.ravel()

# reshape 
print tmp.reshape(3*4, 2)

tmp.shape = 2, 3*4
print tmp

# transpose 
print tmp.T
print tmp.T.ravel()

print tmp

[[ 0  1  2  3  4  5  6  7  8  9 10 11]
 [12 13 14 15 16 17 18 19 20 21 22 23]]
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]
[[ 0  1]
 [ 2  3]
 [ 4  5]
 [ 6  7]
 [ 8  9]
 [10 11]
 [12 13]
 [14 15]
 [16 17]
 [18 19]
 [20 21]
 [22 23]]
[[ 0  1  2  3  4  5  6  7  8  9 10 11]
 [12 13 14 15 16 17 18 19 20 21 22 23]]
[[ 0 12]
 [ 1 13]
 [ 2 14]
 [ 3 15]
 [ 4 16]
 [ 5 17]
 [ 6 18]
 [ 7 19]
 [ 8 20]
 [ 9 21]
 [10 22]
 [11 23]]
[ 0 12  1 13  2 14  3 15  4 16  5 17  6 18  7 19  8 20  9 21 10 22 11 23]
[[ 0  1  2  3  4  5  6  7  8  9 10 11]
 [12 13 14 15 16 17 18 19 20 21 22 23]]

# has our array changed ? 
tmp.shape = 2, 3, 4
print tmp
print tmp.ravel()

[[[ 0  1  2  3]
  [ 4  5  6  7]
  [ 8  9 10 11]]

 [[12 13 14 15]
  [16 17 18 19]
  [20 21 22 23]]]
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]

print tmp[0].shape, "\n",  tmp[0],  "\n",  tmp[-1] 
print tmp[:,:,0].shape, tmp[:,:,-1].shape, "\n", tmp[:,:,0]

(3, 4) 
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]] 
[[12 13 14 15]
 [16 17 18 19]
 [20 21 22 23]]
(2, 3) (2, 3) 
[[ 0  4  8]
 [12 16 20]]

tmp.shape = 2, 3, 4
print tmp

tmp = tmp.reshape(4, 3, 2)
# could also be done with tmp.shape = 4, 2, 3 
print tmp
print tmp.ravel()


tmp = tmp.reshape(2, 3, 4)
# could also be done with tmp.shape = 4, 2, 3 
print tmp
print tmp.ravel()

[[[ 0  1  2  3]
  [ 4  5  6  7]
  [ 8  9 10 11]]

 [[12 13 14 15]
  [16 17 18 19]
  [20 21 22 23]]]
[[[ 0  1]
  [ 2  3]
  [ 4  5]]

 [[ 6  7]
  [ 8  9]
  [10 11]]

 [[12 13]
  [14 15]
  [16 17]]

 [[18 19]
  [20 21]
  [22 23]]]
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]
[[[ 0  1  2  3]
  [ 4  5  6  7]
  [ 8  9 10 11]]

 [[12 13 14 15]
  [16 17 18 19]
  [20 21 22 23]]]
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]

# what does transpose do ? 
tmp.shape = 2, 3, 4
print tmp

print tmp.T.shape
print tmp.T
print tmp.T.ravel()

[[[ 0  1  2  3]
  [ 4  5  6  7]
  [ 8  9 10 11]]

 [[12 13 14 15]
  [16 17 18 19]
  [20 21 22 23]]]
(4, 3, 2)
[[[ 0 12]
  [ 4 16]
  [ 8 20]]

 [[ 1 13]
  [ 5 17]
  [ 9 21]]

 [[ 2 14]
  [ 6 18]
  [10 22]]

 [[ 3 15]
  [ 7 19]
  [11 23]]]
[ 0 12  4 16  8 20  1 13  5 17  9 21  2 14  6 18 10 22  3 15  7 19 11 23]

And what about np.rollaxis ?¶

print tmp.shape, tmp.ravel()

print np.rollaxis(tmp, 2, 0).shape, np.rollaxis(tmp, 2, 0).ravel()
print np.rollaxis(np.rollaxis(tmp, 2, 0), 2, 1).shape, \
      np.rollaxis(np.rollaxis(tmp, 2, 0), 2, 1).ravel()

(2, 3, 4) [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]
(4, 2, 3) [ 0  4  8 12 16 20  1  5  9 13 17 21  2  6 10 14 18 22  3  7 11 15 19 23]
(4, 2, 3) [ 0  4  8 12 16 20  1  5  9 13 17 21  2  6 10 14 18 22  3  7 11 15 19 23]
(4, 3, 2) [ 0 12  4 16  8 20  1 13  5 17  9 21  2 14  6 18 10 22  3 15  7 19 11 23]

#if more than 3 dim
tmp4d_shape = (2,3,4,5)
tmp4d = np.arange(np.prod(tmp4d_shape)).reshape(tmp4d_shape)
print tmp4d[0,0:2,:,:]
print np.prod(tmp4d_shape)
print tmp4d.T.shape
print tmp4d.T[0,0:2,:,:]
print tmp4d.T.ravel()[:21]

[[[ 0  1  2  3  4]
  [ 5  6  7  8  9]
  [10 11 12 13 14]
  [15 16 17 18 19]]

 [[20 21 22 23 24]
  [25 26 27 28 29]
  [30 31 32 33 34]
  [35 36 37 38 39]]]
120
(5, 4, 3, 2)
[[[  0  60]
  [ 20  80]
  [ 40 100]]

 [[  5  65]
  [ 25  85]
  [ 45 105]]]
[  0  60  20  80  40 100   5  65  25  85  45 105  10  70  30  90  50 110
  15  75  35]

Now, we understand (almost) fully numpy arrays. Let's apply this powerful knowledge to our problem:¶

mean_img = arr.mean(axis=3)
mean_img_shape = np.asarray(mean_img.shape)
print mean_img.shape, mean_img_shape

tximg = arr.reshape(mean_img_shape.prod(),arr.shape[3]).T
print tximg.shape

# last dimension is the same: broadcasting
tximg_centred = tximg - mean_img.ravel()

print tximg_centred.shape
print mean_img.ravel().shape

#fig = figure()
ax = subplots(1,1)

ax[1].plot(tximg_centred.std(axis=1))
ax[1].set_xlabel('TR')

(64, 64, 35) [64 64 35]
(42, 143360)
(42, 143360)
(143360,)

<matplotlib.text.Text at 0x4b591d0>

Now - can we go back ?¶

tximg = tximg_centred + mean_img.ravel()
arr2 = tximg.T.reshape(arr.shape)
print (arr - arr2).max(), (arr - arr2).min()
print np.allclose(arr,arr2)
print np.all((arr-arr2) == 0)
print np.any(arr-arr2)

0.0 0.0
True
True
False

Summary image ?¶

arr_std = arr.std(axis = 3)
(fig,axes) = subplots(3,3)

for i in range(3):
    for j in range(3):
        axes[i,j].imshow(arr_std[:,:,(3*i + j)*4])

Are there big jumps between two consecutive volumes ?¶

diff_arr = arr[:,:,:,:-1] - arr[:,:,:,1:]

print arr.shape, diff_arr.shape

imagedim = np.prod(arr.shape[:3])
print imagedim

squarediff = (diff_arr.reshape(imagedim, arr.shape[3]-1))**2
print squarediff.shape
m2diff = squarediff.mean(axis=0)

(64, 64, 35, 42) (64, 64, 35, 41)
143360
(143360, 41)

figure(num=None, figsize=(12,2))
plot(m2diff)

[<matplotlib.lines.Line2D at 0x76cd990>]

Select the biggest difference:

print np.argmax(m2diff)

18

Create a vector that you would put in the design matrix

nuisance = np.zeros(n_scans)
nuisance[np.argmax(m2diff)] = 1
print nuisance

[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.]

Exercice : create a volume that will show a big difference - check that the covariate is indeed taking out this volume. This is the concept of a unit test !

Computing the variance per slice across time¶

# get the variance per slice - no difference between adjacent slices
ash = arr.shape
print ash

# slice std : mean taken within slice
slice_std = arr.reshape((ash[0]*ash[1], ash[2], ash[3])).std(axis=0)
slice_mean = arr.reshape((ash[0]*ash[1], ash[2], ash[3])).mean(axis=0)
print slice_std.shape
#(fig, axes) = subplots(1,2)
#axes[0].plot(slice_std.T)
#axes[1].plot(slice_mean.T)


# display slice std across TR: mean taken across slice
m = arr.mean(axis=3)
m_shape = np.asarray(m.shape)
tximg = arr.reshape(m_shape[0:3].prod(),-1)
tximg_T = np.rollaxis(tximg, 0, 2)
print np.rollaxis(tximg, 0, 2).shape

arr_demean = tximg_T - tximg_T.mean(axis=0)
arr_demean_reshaped = arr_demean.reshape((ash[-1],)+ash[:3]) 
print (ash[-1],)+ash[:3]
print 'arr_demean_reshaped.shape',  arr_demean_reshaped.shape

print arr_demean_reshaped[:,32,32,17].mean()

arr_demean_reshaped_std = arr_demean_reshaped.std(axis=0)

plot(arr_demean_reshaped_std.reshape(64*64, ash[2]).mean(axis=0))

# do it for each time ?

# do it for the difference between adjacent scans ?
# print diff_arr.shape

(64, 64, 35, 42)
(35, 42)
(42, 143360)
(42, 64, 64, 35)
arr_demean_reshaped.shape (42, 64, 64, 35)
5.41365893912e-15

[<matplotlib.lines.Line2D at 0x8515b50>]

import nipy.algorithms.diagnostics as nads

tdiff = nads.time_slice_diffs(arr, time_axis=-1, slice_axis=-2)
nads.plot_tsdiffs(tdiff)

[<matplotlib.axes.AxesSubplot at 0xb611250>,
 <matplotlib.axes.AxesSubplot at 0xb945a10>,
 <matplotlib.axes.AxesSubplot at 0xb961ed0>,
 <matplotlib.axes.AxesSubplot at 0xbacf410>]

Singular value decomposition to detect artifacts¶

import scipy.linalg as lin

print ash
tximg = arr.reshape(np.prod(ash[:3]),ash[3]).T
print tximg.shape
aTa = tximg.dot(tximg.T)
u,s,vh = lin.svd(aTa)

(64, 64, 35, 42)
(42, 143360)

plot(s)

[<matplotlib.lines.Line2D at 0x7f8f0c023250>]

plot(u[:,0])

[<matplotlib.lines.Line2D at 0x68b7410>]

tximg_centred = tximg - tximg.mean(axis=0)
print tximg_centred.shape
aTa = tximg_centred.dot(tximg_centred.T)
v,s,vh = lin.svd(aTa)

(42, 143360)

plot(s)

[<matplotlib.lines.Line2D at 0x6746690>]

u1 = tximg_centred.T.dot(v[:,0])

print u1.shape
print tximg_centred.shape
u1 = u1.reshape(ash[:3])
print u1.shape
imshow(u1[:,:,17])

(143360,)
(42, 143360)
(64, 64, 35)

<matplotlib.image.AxesImage at 0x7f8f0d08a0d0>

Exercise : write a script that replace the most suspicious scan by the average of surrounding scans¶

First - write a test !

# load the image again - to be sure
fimg = pjoin(ddata, "smallbold.nii.gz")
print fimg
img = nib.load(fimg)
arr = img.get_data()

# increase the mean at volume 18
arr[:,:,:,18] = arr[:,:,:,18] + 30

def std_diff_ts(a4d, timeaxis = 'last'):
    """
    Takes a 4d numpy array, take the diff between adjacent times, 
    compute and returns the std of the diff volume time series

    Inputs
    ------
    arr4d: numpy array
        The array on which we compute the std of the diff time series
    timeaxis: integer or in ['last', 'first']
        The axis where the time is

    Outputs
    -------
    (Ntmes,) numpy array
        The array with the std of the diff volume time series
    """
#    pass
    ash = np.asarray(a4d.shape)
    if len(a4d.shape) != 4:
        raise ValueError(" Array is not of shape 4")
    
    if timeaxis == 'first':
            timeaxis = 0
    if timeaxis != 'last':
        np.rollaxis(a4d, timeaxis, -1)
        
    print ash, ash[:3].prod(),ash[3]
    diff_arr = (a4d[:,:,:,:-1] - a4d[:,:,:,1:])**2
    diff_arr = diff_arr.reshape(ash[:3].prod(),ash[3]-1)
    return diff_arr.mean(axis=0)

/home/jb/data/qcpna/bolddata/smallbold.nii.gz

len(arr.shape)

stdts = std_diff_ts(arr)
plot(stdts)

[64 64 35 42] 143360 42

[<matplotlib.lines.Line2D at 0x7f8f04b15590>]

print np.argmax(stdts)
rk = np.argsort(stdts)
print argmax([stdts[rk[-1]-1],stdts[rk[-1]+1]])

17
1

bad_volume = np.argmax(stdts) + argmax([stdts[rk[-1]-1],stdts[rk[-1]+1]])