Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
SFARI Grandjean application power analysis
================
Joanes Grandjean
This page contains the reproducible code for performing the power
analysis for our project proposal for the Simons Foundation Autism
Research Initiative Rat consortium call 2021.
Main applicant: Dr. Joanes Grandjean Co-applicants: Prof. Judith
Homberg, Prof. Rogier Kievit, Dr. Dirk Schubert
Printing the session info for environement reference.
``` r
sessionInfo()
```
For the purpose of this power analysis, we will consider 4 different
longitudinal models of brain growth. 1. Constant mean difference over
time, equal variance 2. Equal mean, constant variance difference 3.
Change in mean over time over time, equal variance 4. Equal mean, change
in variance over time
First we write a function that will generate the mock dataset
``` r
# function to make longitudianl dataset
make_dataset <-
function(sample_size = 40,
mean_dif = 1.5,
sd_dif = 0,
mean_time_dif = FALSE,
sd_time_dif = FALSE) {
param <- c() #the mock parameter of interest
id <- c() #the ID label
group <- c() #the group label
age <- c() #the age label
ctl_base_dist <-
rnorm(sample_size, mean_base, sd_base) #create normal dist for control group
exp_base_dist <-
rnorm(sample_size, mean_base+(mean_base * mean_dif), sd_base+(sd_base* sd_dif)) #create normal dist for experimental group
id_ctl <- glue("CTL{1:sample_size}")
id_exp <- glue("EXP{1:sample_size}")
for (i in 1:length(age_vec)) {
ctl_time_dist <-
ctl_base_dist + brain_vol[i] + rnorm(sample_size, 0, noise_sd)
#base distribution volume as a function of age w/o time dif volume as afunction of age w time dif variance effect
exp_time_dist <-
(
exp_base_dist + brain_vol[i] * as.numeric(!mean_time_dif) + (brain_vol[i] *
(log(i, 1000) + 1) * as.numeric(mean_time_dif))
) + (rnorm(sample_size, 0, i * sd_dif * sd_growth) * as.numeric(sd_time_dif)) +
rnorm(sample_size, 0, noise_sd)
param <- c(param, ctl_time_dist, exp_time_dist)
id <- c(id, id_ctl, id_exp)
group <-
c(group, rep('ctl', sample_size), rep('exp', sample_size))
age <-
c(age,
rep(age_vec[i], sample_size),
rep(age_vec[i], sample_size))
}
df <- data.frame(param, id, group, age)
return(df)
}
# function to run the stats for power analysis
run_stats <-
function(sample_size = 40,
mean_dif = 1.5,
sd_dif = 0,
mean_time_dif = FALSE,
sd_time_dif = FALSE) {
df <-
make_dataset(sample_size, mean_dif , sd_dif , mean_time_dif , sd_time_dif)
effect_true <- mean_dif
variance_true <- sd_dif
mod <- lmer(param ~ group * age + (1 | id), df)
p_mean <- estimate_contrasts(mod)$p
p_var <- var.test(param ~ group, df)$p.value
rdf <- data.frame(effect_true, variance_true, p_mean, p_var)
return(rdf)
}
```
Here we set global parameters for the execution of this code
``` r
mean_base<-100 #mean volume parameter on first imaging day
sd_base<-50 #sd volume parameter on first imaging day
sd_growth <- 50 #factor to increase SD with over time.
age_vec <- c(5, 10, 25, 35, 70, 150) #Age in Post-natal days when the rats are measured
brain_vol <- c(0, 200, 400, 450, 500, 600) #loosely based on https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3737272/
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
nperm <- 250 #number of permutations to estimate statistical power.
```
Make some plots to represent the 4 different group difference models
``` r
df <- make_dataset(40,mean_dif = 2, sd_dif = 0,mean_time_dif = FALSE, sd_time_dif=TRUE)
p <-
df %>% group_by(group, age) %>% summarise(
mean = mean(param),
upper = mean(param) + sd(param),
lower = mean(param) - sd(param)
) %>% mutate(lower = replace(lower, group == 'exp', mean)) %>% mutate(upper = replace(upper, group == 'ctl', mean)) %>% ggplot(aes(
x = age,
y = mean,
color = group,
fill = group
))
p1 <-
p + geom_line() + geom_errorbar(aes(ymin = lower, ymax = upper), width =
0) + labs(title = "Different mean, equal variance", y='Param [a.u.]')+theme(text = element_text(size=6))
df <- make_dataset(40,mean_dif = 0, sd_dif = 2,mean_time_dif = FALSE, sd_time_dif=FALSE)
p <-
df %>% group_by(group, age) %>% summarise(
mean = mean(param),
upper = mean(param) + sd(param),
lower = mean(param) - sd(param)
) %>% mutate(lower = replace(lower, group == 'exp', mean)) %>% mutate(upper = replace(upper, group == 'ctl', mean)) %>% ggplot(aes(
x = age,
y = mean,
color = group,
fill = group
))
p2 <-
p + geom_line() + geom_errorbar(aes(ymin = lower, ymax = upper), width =
0) + labs(title = "Equal mean, different variance", y='Param [a.u.]')+theme(text = element_text(size=6))
df <- make_dataset(40,mean_dif = 2, sd_dif = 0,mean_time_dif = TRUE, sd_time_dif=FALSE)
p <-
df %>% group_by(group, age) %>% summarise(
mean = mean(param),
upper = mean(param) + sd(param),
lower = mean(param) - sd(param)
) %>% mutate(lower = replace(lower, group == 'exp', mean)) %>% mutate(upper = replace(upper, group == 'ctl', mean)) %>% ggplot(aes(
x = age,
y = mean,
color = group,
fill = group
))
p3 <-
p + geom_line() + geom_errorbar(aes(ymin = lower, ymax = upper), width =
0) + labs(title = "Change in mean over time, equal variance", y='Param [a.u.]')+theme(text = element_text(size=6))
df <- make_dataset(40,mean_dif = 0, sd_dif = 2,mean_time_dif = FALSE, sd_time_dif=TRUE)
p <-
df %>% group_by(group, age) %>% summarise(
mean = mean(param),
upper = mean(param) + sd(param),
lower = mean(param) - sd(param)
)%>% mutate(lower = replace(lower, group == 'exp', mean)) %>% mutate(upper = replace(upper, group == 'ctl', mean)) %>% ggplot(aes(
x = age,
y = mean,
color = group,
fill = group
))
p4 <-
p + geom_line() + geom_errorbar(aes(ymin = lower, ymax = upper), width =
0) + labs(title = "Equal mean, change in variance over time", y='Param [a.u.]')+theme(text = element_text(size=6))
p_final <- ggarrange(p1, p2, p3, p4 , ncol = 2, nrow = 2, common.legend = TRUE, legend = "bottom")
ggsave('assets/model_types.png', plot=p_final, device = 'png', dpi=300)
Test the statistics, validate the linear model
``` r
sample_size<-40
sd_time_dif<-FALSE
df <- make_dataset(sample_size,mean_dif , sd_dif ,mean_time_dif , sd_time_dif) #generate a dataset
mod<-lmer(param ~ group * age + (1|id), df) #model group and age
performance(mod) #test model performance
parameters(mod,standardize = 'refit') #estimate standardized model parameters
estimate_contrasts(mod) #estimate contrasts
var.test(param~group, df) #test for difference in variance
```
Perform and plot the power analysis for the 4 difference group
difference models.
``` r
rdf_cond1<-mcmapply(run_stats, mean_dif = rep(seq(0,1,0.1),nperm),mc.cores=numcore)
rdf_cond1<-as.data.frame(t(rdf_cond1))
rdf_cond1 %>% group_by(effect_true, variance_true) %>% summarise(mean_power=sum(p_mean<0.05)/nperm,var_power=sum(p_var<0.05)/nperm ) %>% unnest(cols = c(effect_true, variance_true)) %>% write_csv('assets/rdf_cond1_sum.csv.gz')
rdf_cond2<-mcmapply(run_stats, mean_dif = 0, sd_dif=rep(seq(0,5,0.5),nperm),mc.cores=numcore)
rdf_cond2<-as.data.frame(t(rdf_cond2))
rdf_cond2 %>% group_by(effect_true, variance_true) %>% summarise(mean_power=sum(p_mean<0.05)/nperm,var_power=sum(p_var<0.05)/nperm ) %>% unnest(cols = c(effect_true, variance_true)) %>% write_csv('assets/rdf_cond2_sum.csv.gz')
rdf_cond3<-mcmapply(run_stats, mean_dif = rep(seq(0,1,0.1),nperm),mean_time_dif = TRUE,mc.cores=numcore)
rdf_cond3<-as.data.frame(t(rdf_cond3))
rdf_cond3 %>% group_by(effect_true, variance_true) %>% summarise(mean_power=sum(p_mean<0.05)/nperm,var_power=sum(p_var<0.05)/nperm ) %>% unnest(cols = c(effect_true, variance_true)) %>% write_csv('assets/rdf_cond3_sum.csv.gz')
rdf_cond4<-mcmapply(run_stats, mean_dif = 0, sd_dif=rep(seq(0,5,0.5),nperm),sd_time_dif=TRUE,mc.cores=numcore)
rdf_cond4<-as.data.frame(t(rdf_cond4))
rdf_cond4 %>% group_by(effect_true, variance_true) %>% summarise(mean_power=sum(p_mean<0.05)/nperm,var_power=sum(p_var<0.05)/nperm ) %>% unnest(cols = c(effect_true, variance_true)) %>% write_csv('assets/rdf_cond4_sum.csv.gz')
rdf_cond5<-mcmapply(run_stats, mean_dif = rep(seq(0,1,0.2),nperm), sd_dif=rep(seq(0,5,0.5),nperm),mc.cores=numcore)
rdf_cond5<-as.data.frame(t(rdf_cond5))
rdf_cond5 %>% group_by(effect_true, variance_true) %>% summarise(mean_power=sum(p_mean<0.05)/nperm,var_power=sum(p_var<0.05)/nperm ) %>% unnest(cols = c(effect_true, variance_true)) %>% write_csv('assets/rdf_cond5_sum.csv.gz')
```
rdf_cond1_sum<-read_csv('assets/rdf_cond1_sum.csv.gz')
rdf_cond2_sum<-read_csv('assets/rdf_cond2_sum.csv.gz')
rdf_cond3_sum<-read_csv('assets/rdf_cond3_sum.csv.gz')
rdf_cond4_sum<-read_csv('assets/rdf_cond4_sum.csv.gz')
p<-ggplot(rdf_cond1_sum,aes(x=effect_true*100,y=mean_power*100))
p1 <- p+geom_line()+labs(title = "Different mean, equal variance", y='Power [%]', x='Mean difference [%]')+theme(text = element_text(size=6))+geom_hline(yintercept = 80, linetype = "dashed")
p<-ggplot(rdf_cond2_sum,aes(x=variance_true*100,y=var_power*100))
p2 <- p+geom_line()+labs(title = "Equal mean, different variance", y='Power [%]', x='SD difference [%]')+theme(text = element_text(size=6))+geom_hline(yintercept = 80, linetype = "dashed")
p<-ggplot(rdf_cond3_sum,aes(x=effect_true*100,y=mean_power*100))
p3 <- p+geom_line()+labs(title = "Change in mean over time, equal variance", y='Power [%]', x='Mean difference [%]')+theme(text = element_text(size=6))+geom_hline(yintercept = 80, linetype = "dashed")
p<-ggplot(rdf_cond4_sum,aes(x=variance_true*100,y=var_power*100))
p4 <- p+geom_line()+labs(title = "Equal mean, change in variance over time", y='Power [%]', x='SD difference [%]')+theme(text = element_text(size=6))+geom_hline(yintercept = 80, linetype = "dashed")
p_final <- ggarrange(p1, p2, p3, p4 , ncol = 2, nrow = 2)
ggsave('assets/model_power.png', plot=p_final, device = 'png', dpi=300)