library(ggplot2,quietly = TRUE)
library(tidyverse,quietly = TRUE)
library(MusicScienceData,quiet=TRUE)Ch. 7 – Inferential statistics
This notebook demonstrates running inferential statistical tests in R.
Preliminaries
Load libraries and install MusicScienceData package where the example data is stored.
Code 7.1
See text for the explanation.
df <- MusicScienceData::sadness # define data
t <- t.test(ASM20 ~ gender, data=df) # t test
print(t$statistic) # show the t value t
-5.054596
print(scales::pvalue(t$p.value))[1] "<0.001"
dplyr::summarise(dplyr::group_by(df, gender), # means and SDs
M=mean(ASM20,na.rm=TRUE),
SD=sd(ASM20,na.rm=TRUE))# A tibble: 2 × 3
gender M SD
<fct> <dbl> <dbl>
1 Female 4.59 1.37
2 Male 4.96 1.24
Code 7.2
df <- MusicScienceData::sadness # define data
model.aov <- aov(ASM20 ~ age, data=df) # run anova
F <- summary(model.aov) # summarise
print(F) Df Sum Sq Mean Sq F value Pr(>F)
age 5 29.9 5.986 3.321 0.00548 **
Residuals 1564 2819.4 1.803
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
7 observations deleted due to missingness
Code 7.3
TABLE<-TukeyHSD(model.aov,conf.level = 0.95)
print(knitr::kable(TABLE$age,digits = 3,
caption = 'Comparison of age groups
for Item 20 in ASM survey.',
format = 'simple'))
Table: Comparison of age groups
for Item 20 in ASM survey.
diff lwr upr p adj
------------------ ------- ------- ------ ------
25 to 34-18 to 24 0.133 -0.133 0.399 0.713
35 to 44-18 to 24 0.232 -0.062 0.525 0.214
45 to 54-18 to 24 0.244 -0.088 0.576 0.289
55 to 64-18 to 24 0.493 0.107 0.879 0.004
65 to 74-18 to 24 0.418 -0.221 1.057 0.423
35 to 44-25 to 34 0.099 -0.174 0.371 0.906
45 to 54-25 to 34 0.111 -0.202 0.425 0.914
55 to 64-25 to 34 0.360 -0.011 0.731 0.063
65 to 74-25 to 34 0.285 -0.344 0.915 0.789
45 to 54-35 to 44 0.013 -0.324 0.349 1.000
55 to 64-35 to 44 0.261 -0.129 0.652 0.396
65 to 74-35 to 44 0.186 -0.455 0.828 0.962
55 to 64-45 to 54 0.249 -0.172 0.669 0.540
65 to 74-45 to 54 0.174 -0.486 0.834 0.975
65 to 74-55 to 64 -0.075 -0.764 0.614 1.000
Code 7.4
df <- MusicScienceData::sadness # define data
model2.aov <- aov(ASM20 ~ age * gender, data=df) # run anova
F2 <- summary(model2.aov)
print(F2) Df Sum Sq Mean Sq F value Pr(>F)
age 5 29.9 5.99 3.377 0.00488 **
gender 1 45.7 45.69 25.773 4.3e-07 ***
age:gender 5 11.5 2.31 1.303 0.25997
Residuals 1558 2762.1 1.77
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
7 observations deleted due to missingness
Code 7.5
This analysis requires extra libraries and raw data read from github. The installation might be slow in Colab because of package dependencies.
#install.packages("lme4",quiet=TRUE) # Required for LMM analysis
#install.packages("lmerTest",quiet=TRUE) # Optional
library(lme4,quiet=TRUE)
library(lmerTest,quiet=TRUE)library(lme4,quiet=TRUE)
library(lmerTest,quiet=TRUE)
d <- read.csv('https://raw.githubusercontent.com/tuomaseerola/emr/main/data/raw_ratings.csv')
d2 <- dplyr::filter(d,Emotion=='Dimensional') #
d3 <- dplyr::filter(d2, Category=='Anger' |
Category=='Fear' |
Category=='Happy' |
Category=='Sad' |
Category=='Tender')
m1 <- lmer(Valence ~ Category * Gender + (1|id) + (1|Track), data = d3)
s <- summary(m1,corr=FALSE)
S<-s$coefficients; S<-round(S,2); S[,5]<-scales::pvalue(S[,5])
print(knitr::kable(S,format = 'simple',
caption = 'LMM results of Valence ratings.'))
Table: LMM results of Valence ratings.
Estimate Std. Error df t value Pr(>|t|)
-------------------------- --------- ----------- ------- -------- ---------
(Intercept) 3.43 0.25 58.17 13.51 <0.001
CategoryFear 0.07 0.34 47.43 0.19 0.850
CategoryHappy 4.16 0.34 47.43 12.24 <0.001
CategorySad 1.63 0.34 47.43 4.79 <0.001
CategoryTender 3.4 0.34 47.43 10.01 <0.001
GenderMale -0.09 0.21 110.04 -0.45 0.650
CategoryFear:GenderMale -0.07 0.19 2348 -0.34 0.730
CategoryHappy:GenderMale -0.04 0.19 2348 -0.22 0.820
CategorySad:GenderMale -0.46 0.19 2348 -2.41 0.020
CategoryTender:GenderMale 0 0.19 2348 0.01 0.990
Code 7.6
d <- read.csv('https://raw.githubusercontent.com/tuomaseerola/emr/main/data/raw_ratings.csv')
S <- d %>%
filter(Category=='Sad') %>%
group_by(Category,Gender) %>%
summarise(M=mean(Valence,na.rm=T),SD=sd(Valence,na.rm=T),
.groups = 'drop')
print(S)# A tibble: 2 × 4
Category Gender M SD
<chr> <chr> <dbl> <dbl>
1 Sad Female 5.05 1.69
2 Sad Male 4.5 1.54
Code 7.7
library(MusicScienceData) # loads library w data
gender_age_xtab <- table(MusicScienceData::sadness$age,
MusicScienceData::sadness$gender)
print(gender_age_xtab)
Female Male
18 to 24 269 87
25 to 34 361 137
35 to 44 231 101
45 to 54 158 55
55 to 64 118 19
65 to 74 34 7
result <- chisq.test(gender_age_xtab) # Chi^2 test
print(result)
Pearson's Chi-squared test
data: gender_age_xtab
X-squared = 16.649, df = 5, p-value = 0.005215
Code 7.8
library(MusicScienceData) # load library w data
data <- MusicScienceData::soundtrack # define data
r<-cor.test(data$Valence, data$Tension) # calculate correlation
print(r$estimate) # print coefficient cor
-0.8269947
## cor
## -0.827
print(scales::pvalue(r$p.value)) # print pretty p value[1] "<0.001"
## [1] "<0.001"
print(r$parameter) # print df df
108
Code 7.9
library(MusicScienceData) # loads library w data
d1 <- MusicScienceData::soundtrack # get ratings
d2 <- MusicScienceData::soundtrack_features[,c(2:3,5:6)] # select only some features
d1[,17:21] <- as.data.frame(scale(d2)) # normalise
tmp <- cor(d1[,c(3,17:20)]) # get correlations
print(round(tmp[2:5,1],2)) # display first line RMS sp_centr spec_rolloff spec_zcr
0.58 0.36 0.40 0.32
Code 7.10
model.reg <- lm(Energy ~ RMS + sp_centr + spec_rolloff +
spec_zcr, data = d1)
s <- summary(model.reg) # R2adj = 0.424 (Energy)
print(s)
Call:
lm(formula = Energy ~ RMS + sp_centr + spec_rolloff + spec_zcr,
data = d1)
Residuals:
Min 1Q Median 3Q Max
-2.4719 -1.1042 -0.2064 0.9427 3.4504
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5.4865 0.1309 41.905 < 2e-16 ***
RMS 0.9067 0.1397 6.491 2.88e-09 ***
sp_centr -1.9069 1.2245 -1.557 0.122
spec_rolloff 1.9663 0.9502 2.069 0.041 *
spec_zcr 0.5995 0.4170 1.438 0.154
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.373 on 105 degrees of freedom
Multiple R-squared: 0.4504, Adjusted R-squared: 0.4295
F-statistic: 21.52 on 4 and 105 DF, p-value: 5.528e-13
Code 7.11
r <- cor(d1$Energy, d1$RMS)
print( r^2 ) # print the squared correlation[1] 0.3378173
summary(lm(Energy ~ RMS,data=d1)) # Summarise regression
Call:
lm(formula = Energy ~ RMS, data = d1)
Residuals:
Min 1Q Median 3Q Max
-2.6644 -1.1921 -0.3852 1.1875 3.3296
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5.4865 0.1417 38.717 < 2e-16 ***
RMS 1.0567 0.1424 7.423 2.79e-11 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.486 on 108 degrees of freedom
Multiple R-squared: 0.3378, Adjusted R-squared: 0.3317
F-statistic: 55.1 on 1 and 108 DF, p-value: 2.788e-11