In [2]:
%load_ext autoreload
%autoreload 2
import numpy as np
from vflow import Vset, init_args, dict_to_df, perturbation_stats
from functools import partial
from sklearn.linear_model import LassoCV
from sklearn.metrics import r2_score, explained_variance_score
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
The autoreload extension is already loaded. To reload it, use: %reload_ext autoreload
fMRI Voxel Prediction¶
This vflow
pipeline predicts using fMRI voxels.
In [3]:
ind = {}
def top_n_features(X, Y, n, i):
if i not in ind:
corr = np.abs(np.apply_along_axis(lambda x: np.corrcoef(x, Y[:, i])[0, 1], 0, X))
ind[i] = np.argsort(corr[~np.isnan(corr)])[::-1][:n]
return X[:, ind[i]]
def pca(X, n):
return PCA(n_components=n, copy=True).fit(X).transform(X)
In [4]:
# load data
data_dir = "./data/fmri/"
X = np.load(data_dir + "fit_feat.npy")
Y = np.load(data_dir + "resp_dat.npy")
In [5]:
np.random.seed(14)
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.33, random_state=14)
X_train, X_test, y_train, y_test = init_args((X_train, X_test, y_train, y_test),
names=['X_train', 'X_test', 'y_train', 'y_test'])
# split y_train by voxel and extract top 500 correlated features per voxel
voxel_extract_funcs = [partial(lambda x, y, i: (top_n_features(x, y, 20, i), y[:, i]), i=i) for i in range(20)]
voxel_extract_set = Vset(name='voxel_extract', vfuncs=voxel_extract_funcs, output_matching=True)
X_trains, y_trains = voxel_extract_set(X_train, y_train)
X_tests, y_tests = voxel_extract_set(X_test, y_test)
/home/james/.local/share/virtualenvs/veridical-flow-zFhOijFB/lib/python3.10/site-packages/numpy/lib/function_base.py:2853: RuntimeWarning: invalid value encountered in divide c /= stddev[:, None] /home/james/.local/share/virtualenvs/veridical-flow-zFhOijFB/lib/python3.10/site-packages/numpy/lib/function_base.py:2854: RuntimeWarning: invalid value encountered in divide c /= stddev[None, :]
In [6]:
# modeling
modeling_set = Vset(name='modeling', vfuncs=[LassoCV()], vfunc_keys=["Lasso"])
modeling_set.fit(X_trains, y_trains)
Out[6]:
<vflow.vset.Vset at 0x7f6ed36529e0>
In [7]:
preds = modeling_set.predict(X_trains)
hard_metrics_set = Vset(name='hard_metrics', vfuncs=[r2_score, explained_variance_score],
vfunc_keys=["R2", "EV"])
hard_metrics = hard_metrics_set.evaluate(y_trains, preds)
df = dict_to_df(hard_metrics)
df
/home/james/repos/Yu-Group/veridical-flow/vflow/utils.py:225: FutureWarning: DataFrame.set_axis 'inplace' keyword is deprecated and will be removed in a future version. Use `obj = obj.set_axis(..., copy=False)` instead df.set_axis(cols, axis=1, inplace=True)
Out[7]:
init-voxel_extract | voxel_extract | init-modeling | init-modeling | init-modeling | modeling | hard_metrics | out | |
---|---|---|---|---|---|---|---|---|
0 | y_train | voxel_extract_0 | X_train | X_train | y_train | Lasso | R2 | 0.203121 |
1 | y_train | voxel_extract_1 | X_train | X_train | y_train | Lasso | R2 | 0.275898 |
2 | y_train | voxel_extract_2 | X_train | X_train | y_train | Lasso | R2 | 0.224725 |
3 | y_train | voxel_extract_3 | X_train | X_train | y_train | Lasso | R2 | 0.202630 |
4 | y_train | voxel_extract_4 | X_train | X_train | y_train | Lasso | R2 | 0.167139 |
5 | y_train | voxel_extract_5 | X_train | X_train | y_train | Lasso | R2 | 0.228424 |
6 | y_train | voxel_extract_6 | X_train | X_train | y_train | Lasso | R2 | 0.247807 |
7 | y_train | voxel_extract_7 | X_train | X_train | y_train | Lasso | R2 | 0.264284 |
8 | y_train | voxel_extract_8 | X_train | X_train | y_train | Lasso | R2 | 0.178232 |
9 | y_train | voxel_extract_9 | X_train | X_train | y_train | Lasso | R2 | 0.055822 |
10 | y_train | voxel_extract_10 | X_train | X_train | y_train | Lasso | R2 | 0.045923 |
11 | y_train | voxel_extract_11 | X_train | X_train | y_train | Lasso | R2 | 0.199126 |
12 | y_train | voxel_extract_12 | X_train | X_train | y_train | Lasso | R2 | 0.032398 |
13 | y_train | voxel_extract_13 | X_train | X_train | y_train | Lasso | R2 | 0.107258 |
14 | y_train | voxel_extract_14 | X_train | X_train | y_train | Lasso | R2 | 0.188789 |
15 | y_train | voxel_extract_15 | X_train | X_train | y_train | Lasso | R2 | 0.038167 |
16 | y_train | voxel_extract_16 | X_train | X_train | y_train | Lasso | R2 | 0.089735 |
17 | y_train | voxel_extract_17 | X_train | X_train | y_train | Lasso | R2 | 0.181334 |
18 | y_train | voxel_extract_18 | X_train | X_train | y_train | Lasso | R2 | 0.122867 |
19 | y_train | voxel_extract_19 | X_train | X_train | y_train | Lasso | R2 | 0.001247 |
20 | y_train | voxel_extract_0 | X_train | X_train | y_train | Lasso | EV | 0.203121 |
21 | y_train | voxel_extract_1 | X_train | X_train | y_train | Lasso | EV | 0.275898 |
22 | y_train | voxel_extract_2 | X_train | X_train | y_train | Lasso | EV | 0.224725 |
23 | y_train | voxel_extract_3 | X_train | X_train | y_train | Lasso | EV | 0.202630 |
24 | y_train | voxel_extract_4 | X_train | X_train | y_train | Lasso | EV | 0.167139 |
25 | y_train | voxel_extract_5 | X_train | X_train | y_train | Lasso | EV | 0.228424 |
26 | y_train | voxel_extract_6 | X_train | X_train | y_train | Lasso | EV | 0.247807 |
27 | y_train | voxel_extract_7 | X_train | X_train | y_train | Lasso | EV | 0.264284 |
28 | y_train | voxel_extract_8 | X_train | X_train | y_train | Lasso | EV | 0.178232 |
29 | y_train | voxel_extract_9 | X_train | X_train | y_train | Lasso | EV | 0.055822 |
30 | y_train | voxel_extract_10 | X_train | X_train | y_train | Lasso | EV | 0.045923 |
31 | y_train | voxel_extract_11 | X_train | X_train | y_train | Lasso | EV | 0.199126 |
32 | y_train | voxel_extract_12 | X_train | X_train | y_train | Lasso | EV | 0.032398 |
33 | y_train | voxel_extract_13 | X_train | X_train | y_train | Lasso | EV | 0.107258 |
34 | y_train | voxel_extract_14 | X_train | X_train | y_train | Lasso | EV | 0.188789 |
35 | y_train | voxel_extract_15 | X_train | X_train | y_train | Lasso | EV | 0.038167 |
36 | y_train | voxel_extract_16 | X_train | X_train | y_train | Lasso | EV | 0.089735 |
37 | y_train | voxel_extract_17 | X_train | X_train | y_train | Lasso | EV | 0.181334 |
38 | y_train | voxel_extract_18 | X_train | X_train | y_train | Lasso | EV | 0.122867 |
39 | y_train | voxel_extract_19 | X_train | X_train | y_train | Lasso | EV | 0.001247 |
In [8]:
metrics_stats = perturbation_stats(df, 'voxel_extract')
metrics_stats
Out[8]:
voxel_extract | count | mean | std | |
---|---|---|---|---|
0 | voxel_extract_0 | 2 | 0.203121 | 7.850462e-17 |
1 | voxel_extract_1 | 2 | 0.275898 | 0.000000e+00 |
2 | voxel_extract_10 | 2 | 0.045923 | 7.850462e-17 |
3 | voxel_extract_11 | 2 | 0.199126 | 7.850462e-17 |
4 | voxel_extract_12 | 2 | 0.032398 | 0.000000e+00 |
5 | voxel_extract_13 | 2 | 0.107258 | 1.570092e-16 |
6 | voxel_extract_14 | 2 | 0.188789 | 0.000000e+00 |
7 | voxel_extract_15 | 2 | 0.038167 | 0.000000e+00 |
8 | voxel_extract_16 | 2 | 0.089735 | 0.000000e+00 |
9 | voxel_extract_17 | 2 | 0.181334 | 0.000000e+00 |
10 | voxel_extract_18 | 2 | 0.122867 | 7.850462e-17 |
11 | voxel_extract_19 | 2 | 0.001247 | 7.850462e-17 |
12 | voxel_extract_2 | 2 | 0.224725 | 0.000000e+00 |
13 | voxel_extract_3 | 2 | 0.202630 | 7.850462e-17 |
14 | voxel_extract_4 | 2 | 0.167139 | 0.000000e+00 |
15 | voxel_extract_5 | 2 | 0.228424 | 0.000000e+00 |
16 | voxel_extract_6 | 2 | 0.247807 | 0.000000e+00 |
17 | voxel_extract_7 | 2 | 0.264284 | 0.000000e+00 |
18 | voxel_extract_8 | 2 | 0.178232 | 0.000000e+00 |
19 | voxel_extract_9 | 2 | 0.055822 | 7.850462e-17 |
In [ ]: