library(ggplot2,quietly = TRUE)
library(tidyverse,quietly = TRUE)
library(MusicScienceData,quiet=TRUE)
Ch. 7 – Inferential statistics
This notebook demonstrates running inferential statistical tests in R.
Preliminaries
Load libraries and install MusicScienceData
package where the example data is stored.
Code 7.1
See text for the explanation.
<- MusicScienceData::sadness # define data
df <- t.test(ASM20 ~ gender, data=df) # t test
t print(t$statistic) # show the t value
t
-5.054596
print(scales::pvalue(t$p.value))
[1] "<0.001"
::summarise(dplyr::group_by(df, gender), # means and SDs
dplyrM=mean(ASM20,na.rm=TRUE),
SD=sd(ASM20,na.rm=TRUE))
# A tibble: 2 × 3
gender M SD
<fct> <dbl> <dbl>
1 Female 4.59 1.37
2 Male 4.96 1.24
Code 7.2
<- MusicScienceData::sadness # define data
df <- aov(ASM20 ~ age, data=df) # run anova
model.aov <- summary(model.aov) # summarise
F print(F)
Df Sum Sq Mean Sq F value Pr(>F)
age 5 29.9 5.986 3.321 0.00548 **
Residuals 1564 2819.4 1.803
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
7 observations deleted due to missingness
Code 7.3
<-TukeyHSD(model.aov,conf.level = 0.95)
TABLEprint(knitr::kable(TABLE$age,digits = 3,
caption = 'Comparison of age groups
for Item 20 in ASM survey.',
format = 'simple'))
Table: Comparison of age groups
for Item 20 in ASM survey.
diff lwr upr p adj
------------------ ------- ------- ------ ------
25 to 34-18 to 24 0.133 -0.133 0.399 0.713
35 to 44-18 to 24 0.232 -0.062 0.525 0.214
45 to 54-18 to 24 0.244 -0.088 0.576 0.289
55 to 64-18 to 24 0.493 0.107 0.879 0.004
65 to 74-18 to 24 0.418 -0.221 1.057 0.423
35 to 44-25 to 34 0.099 -0.174 0.371 0.906
45 to 54-25 to 34 0.111 -0.202 0.425 0.914
55 to 64-25 to 34 0.360 -0.011 0.731 0.063
65 to 74-25 to 34 0.285 -0.344 0.915 0.789
45 to 54-35 to 44 0.013 -0.324 0.349 1.000
55 to 64-35 to 44 0.261 -0.129 0.652 0.396
65 to 74-35 to 44 0.186 -0.455 0.828 0.962
55 to 64-45 to 54 0.249 -0.172 0.669 0.540
65 to 74-45 to 54 0.174 -0.486 0.834 0.975
65 to 74-55 to 64 -0.075 -0.764 0.614 1.000
Code 7.4
<- MusicScienceData::sadness # define data
df <- aov(ASM20 ~ age * gender, data=df) # run anova
model2.aov <- summary(model2.aov)
F2 print(F2)
Df Sum Sq Mean Sq F value Pr(>F)
age 5 29.9 5.99 3.377 0.00488 **
gender 1 45.7 45.69 25.773 4.3e-07 ***
age:gender 5 11.5 2.31 1.303 0.25997
Residuals 1558 2762.1 1.77
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
7 observations deleted due to missingness
Code 7.5
This analysis requires extra libraries and raw data read from github. The installation might be slow in Colab because of package dependencies.
#install.packages("lme4",quiet=TRUE) # Required for LMM analysis
#install.packages("lmerTest",quiet=TRUE) # Optional
library(lme4,quiet=TRUE)
library(lmerTest,quiet=TRUE)
library(lme4,quiet=TRUE)
library(lmerTest,quiet=TRUE)
<- read.csv('https://raw.githubusercontent.com/tuomaseerola/emr/main/data/raw_ratings.csv')
d <- dplyr::filter(d,Emotion=='Dimensional') #
d2 <- dplyr::filter(d2, Category=='Anger' |
d3 =='Fear' |
Category=='Happy' |
Category=='Sad' |
Category=='Tender')
Category<- lmer(Valence ~ Category * Gender + (1|id) + (1|Track), data = d3)
m1 <- summary(m1,corr=FALSE)
s <-s$coefficients; S<-round(S,2); S[,5]<-scales::pvalue(S[,5])
Sprint(knitr::kable(S,format = 'simple',
caption = 'LMM results of Valence ratings.'))
Table: LMM results of Valence ratings.
Estimate Std. Error df t value Pr(>|t|)
-------------------------- --------- ----------- ------- -------- ---------
(Intercept) 3.43 0.25 58.17 13.51 <0.001
CategoryFear 0.07 0.34 47.43 0.19 0.850
CategoryHappy 4.16 0.34 47.43 12.24 <0.001
CategorySad 1.63 0.34 47.43 4.79 <0.001
CategoryTender 3.4 0.34 47.43 10.01 <0.001
GenderMale -0.09 0.21 110.04 -0.45 0.650
CategoryFear:GenderMale -0.07 0.19 2348 -0.34 0.730
CategoryHappy:GenderMale -0.04 0.19 2348 -0.22 0.820
CategorySad:GenderMale -0.46 0.19 2348 -2.41 0.020
CategoryTender:GenderMale 0 0.19 2348 0.01 0.990
Code 7.6
<- read.csv('https://raw.githubusercontent.com/tuomaseerola/emr/main/data/raw_ratings.csv')
d <- d %>%
S filter(Category=='Sad') %>%
group_by(Category,Gender) %>%
summarise(M=mean(Valence,na.rm=T),SD=sd(Valence,na.rm=T),
.groups = 'drop')
print(S)
# A tibble: 2 × 4
Category Gender M SD
<chr> <chr> <dbl> <dbl>
1 Sad Female 5.05 1.69
2 Sad Male 4.5 1.54
Code 7.7
library(MusicScienceData) # loads library w data
<- table(MusicScienceData::sadness$age,
gender_age_xtab ::sadness$gender)
MusicScienceDataprint(gender_age_xtab)
Female Male
18 to 24 269 87
25 to 34 361 137
35 to 44 231 101
45 to 54 158 55
55 to 64 118 19
65 to 74 34 7
<- chisq.test(gender_age_xtab) # Chi^2 test
result print(result)
Pearson's Chi-squared test
data: gender_age_xtab
X-squared = 16.649, df = 5, p-value = 0.005215
Code 7.8
library(MusicScienceData) # load library w data
<- MusicScienceData::soundtrack # define data
data <-cor.test(data$Valence, data$Tension) # calculate correlation
rprint(r$estimate) # print coefficient
cor
-0.8269947
## cor
## -0.827
print(scales::pvalue(r$p.value)) # print pretty p value
[1] "<0.001"
## [1] "<0.001"
print(r$parameter) # print df
df
108
Code 7.9
library(MusicScienceData) # loads library w data
<- MusicScienceData::soundtrack # get ratings
d1 <- MusicScienceData::soundtrack_features[,c(2:3,5:6)] # select only some features
d2 17:21] <- as.data.frame(scale(d2)) # normalise
d1[,
<- cor(d1[,c(3,17:20)]) # get correlations
tmp print(round(tmp[2:5,1],2)) # display first line
RMS sp_centr spec_rolloff spec_zcr
0.58 0.36 0.40 0.32
Code 7.10
<- lm(Energy ~ RMS + sp_centr + spec_rolloff +
model.reg data = d1)
spec_zcr, <- summary(model.reg) # R2adj = 0.424 (Energy)
s print(s)
Call:
lm(formula = Energy ~ RMS + sp_centr + spec_rolloff + spec_zcr,
data = d1)
Residuals:
Min 1Q Median 3Q Max
-2.4719 -1.1042 -0.2064 0.9427 3.4504
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5.4865 0.1309 41.905 < 2e-16 ***
RMS 0.9067 0.1397 6.491 2.88e-09 ***
sp_centr -1.9069 1.2245 -1.557 0.122
spec_rolloff 1.9663 0.9502 2.069 0.041 *
spec_zcr 0.5995 0.4170 1.438 0.154
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.373 on 105 degrees of freedom
Multiple R-squared: 0.4504, Adjusted R-squared: 0.4295
F-statistic: 21.52 on 4 and 105 DF, p-value: 5.528e-13
Code 7.11
<- cor(d1$Energy, d1$RMS)
r print( r^2 ) # print the squared correlation
[1] 0.3378173
summary(lm(Energy ~ RMS,data=d1)) # Summarise regression
Call:
lm(formula = Energy ~ RMS, data = d1)
Residuals:
Min 1Q Median 3Q Max
-2.6644 -1.1921 -0.3852 1.1875 3.3296
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5.4865 0.1417 38.717 < 2e-16 ***
RMS 1.0567 0.1424 7.423 2.79e-11 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.486 on 108 degrees of freedom
Multiple R-squared: 0.3378, Adjusted R-squared: 0.3317
F-statistic: 55.1 on 1 and 108 DF, p-value: 2.788e-11