Skip to content
Snippets Groups Projects
README.md 31.23 KiB

robotod_stat

packages needed

install.packages('tidyverse', 'glue', 'glue','lme4','multcomp','parameters', 'effectsize','performance','ggpubr')

packages loading

#General utility packages
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --

## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.7
## v tidyr   1.1.4     v stringr 1.4.0
## v readr   2.1.1     v forcats 0.5.1

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(glue)
library(knitr)
library(data.table)
## 
## Attaching package: 'data.table'

## The following objects are masked from 'package:dplyr':
## 
##     between, first, last

## The following object is masked from 'package:purrr':
## 
##     transpose
#Stats packages
library(lme4)
## Loading required package: Matrix

## 
## Attaching package: 'Matrix'

## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
library(multcomp)
## Loading required package: mvtnorm

## Loading required package: survival

## Loading required package: TH.data

## Loading required package: MASS

## 
## Attaching package: 'MASS'

## The following object is masked from 'package:dplyr':
## 
##     select

## 
## Attaching package: 'TH.data'

## The following object is masked from 'package:MASS':
## 
##     geyser
library(parameters)
## Registered S3 method overwritten by 'parameters':
##   method                         from      
##   format.parameters_distribution datawizard
library(effectsize)
library(performance)
library(emmeans)
#Plot packages
library(ggpubr)

session information

#output session info
sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19045)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.1252 
## [2] LC_CTYPE=English_United States.1252   
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.1252    
## system code page: 65001
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] ggpubr_0.4.0       emmeans_1.8.1-1    performance_0.8.0  effectsize_0.6.0.1
##  [5] parameters_0.17.0  multcomp_1.4-18    TH.data_1.1-0      MASS_7.3-54       
##  [9] survival_3.2-13    mvtnorm_1.1-3      lme4_1.1-27.1      Matrix_1.3-4      
## [13] data.table_1.14.2  knitr_1.37         glue_1.6.0         forcats_0.5.1     
## [17] stringr_1.4.0      dplyr_1.0.7        purrr_0.3.4        readr_2.1.1       
## [21] tidyr_1.1.4        tibble_3.1.6       ggplot2_3.3.5      tidyverse_1.3.1   
## 
## loaded via a namespace (and not attached):
##  [1] nlme_3.1-153       fs_1.5.2           lubridate_1.8.0    insight_0.17.0    
##  [5] httr_1.4.2         tools_4.1.2        backports_1.4.1    utf8_1.2.2        
##  [9] R6_2.5.1           DBI_1.1.2          colorspace_2.0-2   withr_2.5.0       
## [13] tidyselect_1.1.1   compiler_4.1.2     cli_3.1.1          rvest_1.0.2       
## [17] xml2_1.3.3         sandwich_3.0-1     bayestestR_0.11.5  scales_1.1.1      
## [21] digest_0.6.29      minqa_1.2.4        rmarkdown_2.11     pkgconfig_2.0.3   
## [25] htmltools_0.5.2    dbplyr_2.1.1       fastmap_1.1.0      rlang_0.4.12      
## [29] readxl_1.3.1       rstudioapi_0.13    generics_0.1.1     zoo_1.8-9         
## [33] jsonlite_1.7.3     car_3.0-12         magrittr_2.0.1     Rcpp_1.0.8        
## [37] munsell_0.5.0      fansi_1.0.2        abind_1.4-5        lifecycle_1.0.1   
## [41] stringi_1.7.6      yaml_2.2.1         carData_3.0-5      grid_4.1.2        
## [45] crayon_1.4.2       lattice_0.20-45    haven_2.4.3        splines_4.1.2     
## [49] hms_1.1.1          pillar_1.6.4       boot_1.3-28        estimability_1.4.1
## [53] ggsignif_0.6.3     codetools_0.2-18   reprex_2.0.1       evaluate_0.14     
## [57] modelr_0.1.8       vctrs_0.3.8        nloptr_1.2.2.3     tzdb_0.2.0        
## [61] cellranger_1.1.0   gtable_0.3.0       assertthat_0.2.1   datawizard_0.4.0  
## [65] xfun_0.29          xtable_1.8-4       broom_0.7.11       rstatix_0.7.0     
## [69] ellipsis_0.3.2

loading data

df <- read_csv('https://gitlab.socsci.ru.nl/preclinical-neuroimaging/robotmod/-/raw/main/robotmod_data.csv') %>% 
          mutate(trials = as.factor(trials)) %>% 
          mutate(group = as.factor(group)) %>% 
          mutate(sex = as.factor(sex)) %>% 
          mutate(rat_ID = as.factor(rat_ID)) %>% 
          mutate(restriction = as.factor(restriction))%>%
          mutate(age = as.factor(age))%>%
          mutate(food.distance.cm = as.factor(food.distance.cm))%>%
         
        # Create score based on time. Following log attribution of score as a function of time. 
          mutate(leaving.score =  case_when(is.na(leaving.sec) ~ 7,
                                                  leaving.sec >= 200 ~ 6,
                                                  leaving.sec >= 100 ~ 5,
                                                  leaving.sec >= 50 ~ 4,
                                                  leaving.sec >= 25  ~ 3,
                                                  leaving.sec >= 12.5 ~ 2,
                                                  leaving.sec >= 6.25 ~ 1,
                                                  leaving.sec <6.25 ~ 0)) %>% 
            mutate(approaching.score =  case_when(is.na(approaching.sec) ~ 7,
                                                  approaching.sec >= 200 ~ 6,
                                                  approaching.sec >= 100 ~ 5,
                                                  approaching.sec >= 50 ~ 4,
                                                  approaching.sec >= 25  ~ 3,
                                                  approaching.sec >= 12.5 ~ 2,
                                                  approaching.sec >= 6.25 ~ 1,
                                                  approaching.sec <6.25 ~ 0)) %>% 
            mutate(foraging.score =  case_when(is.na(foraging.sec) ~ 7,
                                                  foraging.sec >= 200 ~ 6,
                                                  foraging.sec >= 100 ~ 5,
                                                  foraging.sec >= 50 ~ 4,
                                                  foraging.sec >= 25  ~ 3,
                                                  foraging.sec >= 12.5 ~ 2,
                                                  foraging.sec >= 6.25 ~ 1,
                                                  foraging.sec <6.25 ~ 0))
## Rows: 276 Columns: 15

## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr  (2): sex, group
## dbl (13): rat_ID, baseline.day, robot.day, trials, age, restriction, food.di...

## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
#head(df) %>% kable("pipe")

summary of the data

summary(df) %>% kable("pipe")
rat_ID baseline.day robot.day trials sex group age restriction food.distance.cm got.food leaving.sec total_approaching.sec backing.sec foraging.sec approaching.sec leaving.score approaching.score foraging.score
407700 : 8 Min. :0.0000 Min. :0.0000 1:60 F:138 LE:156 70 :216 45: 60 25.4 :60 Min. :0.0000 Min. : 1.00 Min. : 2.00 Min. : 1.0 Min. : 4.0 Min. : 0.00 Min. :0.000 Min. :0.000 Min. :0.000
407701 : 8 1st Qu.:0.0000 1st Qu.:1.0000 2:60 M:138 SD:120 140: 60 60: 96 50.8 :60 1st Qu.:1.0000 1st Qu.: 4.00 1st Qu.: 9.00 1st Qu.: 3.0 1st Qu.: 14.0 1st Qu.: 2.00 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:2.000
407702 : 8 Median :0.0000 Median :1.0000 3:60 90:120 76.2 :60 Median :1.0000 Median : 7.00 Median : 20.00 Median : 13.0 Median : 35.0 Median : 9.00 Median :1.000 Median :2.000 Median :4.000
407703 : 8 Mean :0.7826 Mean :0.8696 4:48 101.6:48 Mean :0.7536 Mean :13.21 Mean : 35.01 Mean : 32.8 Mean : 54.2 Mean : 23.67 Mean :2.442 Mean :3.029 Mean :3.935
407704 : 8 3rd Qu.:0.0000 3rd Qu.:1.0000 5:48 127 :48 3rd Qu.:1.0000 3rd Qu.:13.00 3rd Qu.: 41.00 3rd Qu.: 50.0 3rd Qu.: 80.5 3rd Qu.: 30.00 3rd Qu.:4.000 3rd Qu.:7.000 3rd Qu.:5.250
407705 : 8 Max. :6.0000 Max. :1.0000 Max. :1.0000 Max. :89.00 Max. :191.00 Max. :262.0 Max. :282.0 Max. :159.00 Max. :7.000 Max. :7.000 Max. :7.000
(Other):228 NA’s :66 NA’s :78 NA’s :119 NA’s :68 NA’s :78

select testing days (Rday1 here)

test_day <- df %>% filter(df$robot.day == 1)

model comparision

sec/score model comparision (data: SD rat, 90% restrction, foraging, Rday1)

df.SD_Rday1 <- df %>% filter(df$robot.day == 1, df$group == "SD")
p_sec <- ggplot(df.SD_Rday1, aes(x=trials, y=foraging.sec, group=group, colour=group, shape=sex))
p_sec + stat_summary(fun = "mean", colour = "darkgrey", size = 0.5, geom = "line", fun.min = 'sd', fun.max = 'sd')+
    stat_summary(fun.data = mean_se, geom = "ribbon", alpha=0.3, aes(colour = group, fill = group))+ geom_point(alpha = 1)+theme_minimal()

p_sco <- ggplot(df.SD_Rday1, aes(x=trials, y=foraging.score, group=group, colour=group, shape=sex))
p_sco + stat_summary(fun = "mean", colour = "darkgrey", size = 0.5, geom = "line", fun.min = 'sd', fun.max = 'sd')+
    stat_summary(fun.data = mean_se, geom = "ribbon", alpha=0.3, aes(colour = group, fill = group))+ geom_point(alpha = 1)+theme_minimal()

#***here we noticed,in sec dataset, many missing values in position5, so score dataset is more accurate, besides, the score dataset seems to recapitulate sec dataset

mod.sec <- lmer(foraging.sec~ trials:sex  + trials + sex + (1|rat_ID), df.SD_Rday1)
mod.score <- lmer(foraging.score~ trials:sex  + trials + sex + (1|rat_ID), df.SD_Rday1)
compare_performance(mod.sec, mod.score)
#***The model with scores has a lower AIC and BIC, but comparable R2. This suggest the model with scores is preferable.

print('checking model with seconds')
check_normality(mod.sec)
check_heteroscedasticity(mod.sec)
print('checking model with scores')
check_normality(mod.score)
check_heteroscedasticity(mod.score)
#***Model with scores seem to better follow normal distribution.
## # Comparison of Model Performance Indices
## 
## Name      |   Model |     AIC | AIC weights |     BIC | BIC weights | R2 (cond.) | R2 (marg.) |   ICC |   RMSE |  Sigma
## -----------------------------------------------------------------------------------------------------------------------
## mod.sec   | lmerMod | 785.240 |     < 0.001 | 813.674 |     < 0.001 |      0.433 |      0.068 | 0.392 | 36.896 | 44.019
## mod.score | lmerMod | 484.861 |        1.00 | 518.311 |        1.00 |      0.634 |      0.064 | 0.609 |  1.227 |  1.413
## [1] "checking model with seconds"
## Warning: Non-normality of residuals detected (p = 0.002).
## Warning: Heteroscedasticity (non-constant error variance) detected (p < .001).
## [1] "checking model with scores"
## OK: residuals appear as normally distributed (p = 0.455).
## OK: Error variance appears to be homoscedastic (p = 0.702).

age effect (data: SD rat, 90% restrction, backing, Rday1)

df.SD_Rday1 <- df %>% filter(df$robot.day == 1, df$group == "SD")
SD_foraging <- lmer(foraging.score~ sex + trials + age + sex:trials + sex:age + trials:age + (1|rat_ID), df.SD_Rday1)
eta_squared(SD_foraging)
## # Effect Size for ANOVA (Type III)
## 
## Parameter  | Eta2 (partial) |       95% CI
## ------------------------------------------
## sex        |           0.03 | [0.00, 1.00]
## trials     |           0.16 | [0.04, 1.00]
## age        |           0.54 | [0.28, 1.00]
## sex:trials |           0.01 | [0.00, 1.00]
## sex:age    |           0.10 | [0.00, 1.00]
## trials:age |       2.37e-03 | [0.00, 1.00]
## 
## - One-sided CIs: upper bound fixed at (1).

age effect plot, fig 1a 1b

SD_Rday1_sum<-df.SD_Rday1 %>% group_by(trials, age) %>% summarize(percentage = sum(foraging.score != 7)/n())