-
Xianzong Meng authoredXianzong Meng authored
README.md 31.23 KiB
robotod_stat
packages needed
install.packages('tidyverse', 'glue', 'glue','lme4','multcomp','parameters', 'effectsize','performance','ggpubr')
packages loading
#General utility packages
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.7
## v tidyr 1.1.4 v stringr 1.4.0
## v readr 2.1.1 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(glue)
library(knitr)
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## The following object is masked from 'package:purrr':
##
## transpose
#Stats packages
library(lme4)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
library(multcomp)
## Loading required package: mvtnorm
## Loading required package: survival
## Loading required package: TH.data
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
##
## Attaching package: 'TH.data'
## The following object is masked from 'package:MASS':
##
## geyser
library(parameters)
## Registered S3 method overwritten by 'parameters':
## method from
## format.parameters_distribution datawizard
library(effectsize)
library(performance)
library(emmeans)
#Plot packages
library(ggpubr)
session information
#output session info
sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19045)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_United States.1252
## [2] LC_CTYPE=English_United States.1252
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.1252
## system code page: 65001
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggpubr_0.4.0 emmeans_1.8.1-1 performance_0.8.0 effectsize_0.6.0.1
## [5] parameters_0.17.0 multcomp_1.4-18 TH.data_1.1-0 MASS_7.3-54
## [9] survival_3.2-13 mvtnorm_1.1-3 lme4_1.1-27.1 Matrix_1.3-4
## [13] data.table_1.14.2 knitr_1.37 glue_1.6.0 forcats_0.5.1
## [17] stringr_1.4.0 dplyr_1.0.7 purrr_0.3.4 readr_2.1.1
## [21] tidyr_1.1.4 tibble_3.1.6 ggplot2_3.3.5 tidyverse_1.3.1
##
## loaded via a namespace (and not attached):
## [1] nlme_3.1-153 fs_1.5.2 lubridate_1.8.0 insight_0.17.0
## [5] httr_1.4.2 tools_4.1.2 backports_1.4.1 utf8_1.2.2
## [9] R6_2.5.1 DBI_1.1.2 colorspace_2.0-2 withr_2.5.0
## [13] tidyselect_1.1.1 compiler_4.1.2 cli_3.1.1 rvest_1.0.2
## [17] xml2_1.3.3 sandwich_3.0-1 bayestestR_0.11.5 scales_1.1.1
## [21] digest_0.6.29 minqa_1.2.4 rmarkdown_2.11 pkgconfig_2.0.3
## [25] htmltools_0.5.2 dbplyr_2.1.1 fastmap_1.1.0 rlang_0.4.12
## [29] readxl_1.3.1 rstudioapi_0.13 generics_0.1.1 zoo_1.8-9
## [33] jsonlite_1.7.3 car_3.0-12 magrittr_2.0.1 Rcpp_1.0.8
## [37] munsell_0.5.0 fansi_1.0.2 abind_1.4-5 lifecycle_1.0.1
## [41] stringi_1.7.6 yaml_2.2.1 carData_3.0-5 grid_4.1.2
## [45] crayon_1.4.2 lattice_0.20-45 haven_2.4.3 splines_4.1.2
## [49] hms_1.1.1 pillar_1.6.4 boot_1.3-28 estimability_1.4.1
## [53] ggsignif_0.6.3 codetools_0.2-18 reprex_2.0.1 evaluate_0.14
## [57] modelr_0.1.8 vctrs_0.3.8 nloptr_1.2.2.3 tzdb_0.2.0
## [61] cellranger_1.1.0 gtable_0.3.0 assertthat_0.2.1 datawizard_0.4.0
## [65] xfun_0.29 xtable_1.8-4 broom_0.7.11 rstatix_0.7.0
## [69] ellipsis_0.3.2
loading data
df <- read_csv('https://gitlab.socsci.ru.nl/preclinical-neuroimaging/robotmod/-/raw/main/robotmod_data.csv') %>%
mutate(trials = as.factor(trials)) %>%
mutate(group = as.factor(group)) %>%
mutate(sex = as.factor(sex)) %>%
mutate(rat_ID = as.factor(rat_ID)) %>%
mutate(restriction = as.factor(restriction))%>%
mutate(age = as.factor(age))%>%
mutate(food.distance.cm = as.factor(food.distance.cm))%>%
# Create score based on time. Following log attribution of score as a function of time.
mutate(leaving.score = case_when(is.na(leaving.sec) ~ 7,
leaving.sec >= 200 ~ 6,
leaving.sec >= 100 ~ 5,
leaving.sec >= 50 ~ 4,
leaving.sec >= 25 ~ 3,
leaving.sec >= 12.5 ~ 2,
leaving.sec >= 6.25 ~ 1,
leaving.sec <6.25 ~ 0)) %>%
mutate(approaching.score = case_when(is.na(approaching.sec) ~ 7,
approaching.sec >= 200 ~ 6,
approaching.sec >= 100 ~ 5,
approaching.sec >= 50 ~ 4,
approaching.sec >= 25 ~ 3,
approaching.sec >= 12.5 ~ 2,
approaching.sec >= 6.25 ~ 1,
approaching.sec <6.25 ~ 0)) %>%
mutate(foraging.score = case_when(is.na(foraging.sec) ~ 7,
foraging.sec >= 200 ~ 6,
foraging.sec >= 100 ~ 5,
foraging.sec >= 50 ~ 4,
foraging.sec >= 25 ~ 3,
foraging.sec >= 12.5 ~ 2,
foraging.sec >= 6.25 ~ 1,
foraging.sec <6.25 ~ 0))
## Rows: 276 Columns: 15
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): sex, group
## dbl (13): rat_ID, baseline.day, robot.day, trials, age, restriction, food.di...
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
#head(df) %>% kable("pipe")
summary of the data
summary(df) %>% kable("pipe")
rat_ID | baseline.day | robot.day | trials | sex | group | age | restriction | food.distance.cm | got.food | leaving.sec | total_approaching.sec | backing.sec | foraging.sec | approaching.sec | leaving.score | approaching.score | foraging.score | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
407700 : 8 | Min. :0.0000 | Min. :0.0000 | 1:60 | F:138 | LE:156 | 70 :216 | 45: 60 | 25.4 :60 | Min. :0.0000 | Min. : 1.00 | Min. : 2.00 | Min. : 1.0 | Min. : 4.0 | Min. : 0.00 | Min. :0.000 | Min. :0.000 | Min. :0.000 | |
407701 : 8 | 1st Qu.:0.0000 | 1st Qu.:1.0000 | 2:60 | M:138 | SD:120 | 140: 60 | 60: 96 | 50.8 :60 | 1st Qu.:1.0000 | 1st Qu.: 4.00 | 1st Qu.: 9.00 | 1st Qu.: 3.0 | 1st Qu.: 14.0 | 1st Qu.: 2.00 | 1st Qu.:0.000 | 1st Qu.:0.000 | 1st Qu.:2.000 | |
407702 : 8 | Median :0.0000 | Median :1.0000 | 3:60 | 90:120 | 76.2 :60 | Median :1.0000 | Median : 7.00 | Median : 20.00 | Median : 13.0 | Median : 35.0 | Median : 9.00 | Median :1.000 | Median :2.000 | Median :4.000 | ||||
407703 : 8 | Mean :0.7826 | Mean :0.8696 | 4:48 | 101.6:48 | Mean :0.7536 | Mean :13.21 | Mean : 35.01 | Mean : 32.8 | Mean : 54.2 | Mean : 23.67 | Mean :2.442 | Mean :3.029 | Mean :3.935 | |||||
407704 : 8 | 3rd Qu.:0.0000 | 3rd Qu.:1.0000 | 5:48 | 127 :48 | 3rd Qu.:1.0000 | 3rd Qu.:13.00 | 3rd Qu.: 41.00 | 3rd Qu.: 50.0 | 3rd Qu.: 80.5 | 3rd Qu.: 30.00 | 3rd Qu.:4.000 | 3rd Qu.:7.000 | 3rd Qu.:5.250 | |||||
407705 : 8 | Max. :6.0000 | Max. :1.0000 | Max. :1.0000 | Max. :89.00 | Max. :191.00 | Max. :262.0 | Max. :282.0 | Max. :159.00 | Max. :7.000 | Max. :7.000 | Max. :7.000 | |||||||
(Other):228 | NA’s :66 | NA’s :78 | NA’s :119 | NA’s :68 | NA’s :78 |
select testing days (Rday1 here)
test_day <- df %>% filter(df$robot.day == 1)
model comparision
sec/score model comparision (data: SD rat, 90% restrction, foraging, Rday1)
df.SD_Rday1 <- df %>% filter(df$robot.day == 1, df$group == "SD")
p_sec <- ggplot(df.SD_Rday1, aes(x=trials, y=foraging.sec, group=group, colour=group, shape=sex))
p_sec + stat_summary(fun = "mean", colour = "darkgrey", size = 0.5, geom = "line", fun.min = 'sd', fun.max = 'sd')+
stat_summary(fun.data = mean_se, geom = "ribbon", alpha=0.3, aes(colour = group, fill = group))+ geom_point(alpha = 1)+theme_minimal()
p_sco <- ggplot(df.SD_Rday1, aes(x=trials, y=foraging.score, group=group, colour=group, shape=sex))
p_sco + stat_summary(fun = "mean", colour = "darkgrey", size = 0.5, geom = "line", fun.min = 'sd', fun.max = 'sd')+
stat_summary(fun.data = mean_se, geom = "ribbon", alpha=0.3, aes(colour = group, fill = group))+ geom_point(alpha = 1)+theme_minimal()
#***here we noticed,in sec dataset, many missing values in position5, so score dataset is more accurate, besides, the score dataset seems to recapitulate sec dataset
mod.sec <- lmer(foraging.sec~ trials:sex + trials + sex + (1|rat_ID), df.SD_Rday1)
mod.score <- lmer(foraging.score~ trials:sex + trials + sex + (1|rat_ID), df.SD_Rday1)
compare_performance(mod.sec, mod.score)
#***The model with scores has a lower AIC and BIC, but comparable R2. This suggest the model with scores is preferable.
print('checking model with seconds')
check_normality(mod.sec)
check_heteroscedasticity(mod.sec)
print('checking model with scores')
check_normality(mod.score)
check_heteroscedasticity(mod.score)
#***Model with scores seem to better follow normal distribution.
## # Comparison of Model Performance Indices
##
## Name | Model | AIC | AIC weights | BIC | BIC weights | R2 (cond.) | R2 (marg.) | ICC | RMSE | Sigma
## -----------------------------------------------------------------------------------------------------------------------
## mod.sec | lmerMod | 785.240 | < 0.001 | 813.674 | < 0.001 | 0.433 | 0.068 | 0.392 | 36.896 | 44.019
## mod.score | lmerMod | 484.861 | 1.00 | 518.311 | 1.00 | 0.634 | 0.064 | 0.609 | 1.227 | 1.413
## [1] "checking model with seconds"
## Warning: Non-normality of residuals detected (p = 0.002).
## Warning: Heteroscedasticity (non-constant error variance) detected (p < .001).
## [1] "checking model with scores"
## OK: residuals appear as normally distributed (p = 0.455).
## OK: Error variance appears to be homoscedastic (p = 0.702).
age effect (data: SD rat, 90% restrction, backing, Rday1)
df.SD_Rday1 <- df %>% filter(df$robot.day == 1, df$group == "SD")
SD_foraging <- lmer(foraging.score~ sex + trials + age + sex:trials + sex:age + trials:age + (1|rat_ID), df.SD_Rday1)
eta_squared(SD_foraging)
## # Effect Size for ANOVA (Type III)
##
## Parameter | Eta2 (partial) | 95% CI
## ------------------------------------------
## sex | 0.03 | [0.00, 1.00]
## trials | 0.16 | [0.04, 1.00]
## age | 0.54 | [0.28, 1.00]
## sex:trials | 0.01 | [0.00, 1.00]
## sex:age | 0.10 | [0.00, 1.00]
## trials:age | 2.37e-03 | [0.00, 1.00]
##
## - One-sided CIs: upper bound fixed at (1).
age effect plot, fig 1a 1b
SD_Rday1_sum<-df.SD_Rday1 %>% group_by(trials, age) %>% summarize(percentage = sum(foraging.score != 7)/n())