Ch. 7 – Inferential statistics

This notebook demonstrates running inferential statistical tests in R.

Preliminaries

Load libraries and install MusicScienceData package where the example data is stored.

library(ggplot2,quietly = TRUE)
library(tidyverse,quietly = TRUE)
library(MusicScienceData,quiet=TRUE)

Code 7.1

See text for the explanation.

df <- MusicScienceData::sadness         # define data
t <- t.test(ASM20 ~ gender, data=df)    # t test
print(t$statistic)                      # show the t value

        t 
-5.054596

print(scales::pvalue(t$p.value))

[1] "<0.001"

dplyr::summarise(dplyr::group_by(df, gender), # means and SDs
                 M=mean(ASM20,na.rm=TRUE),
                 SD=sd(ASM20,na.rm=TRUE))

# A tibble: 2 × 3
  gender     M    SD
  <fct>  <dbl> <dbl>
1 Female  4.59  1.37
2 Male    4.96  1.24

Code 7.2

df <- MusicScienceData::sadness         # define data
model.aov <- aov(ASM20 ~ age, data=df)  # run anova
F <- summary(model.aov)                 # summarise
print(F)

              Df Sum Sq Mean Sq F value  Pr(>F)   
age            5   29.9   5.986   3.321 0.00548 **
Residuals   1564 2819.4   1.803                   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
7 observations deleted due to missingness

Code 7.3

TABLE<-TukeyHSD(model.aov,conf.level = 0.95)
print(knitr::kable(TABLE$age,digits = 3,
                   caption = 'Comparison of age groups
                   for Item 20 in ASM survey.',
                   format = 'simple'))



Table: Comparison of age groups
                   for Item 20 in ASM survey.

                       diff      lwr     upr   p adj
------------------  -------  -------  ------  ------
25 to 34-18 to 24     0.133   -0.133   0.399   0.713
35 to 44-18 to 24     0.232   -0.062   0.525   0.214
45 to 54-18 to 24     0.244   -0.088   0.576   0.289
55 to 64-18 to 24     0.493    0.107   0.879   0.004
65 to 74-18 to 24     0.418   -0.221   1.057   0.423
35 to 44-25 to 34     0.099   -0.174   0.371   0.906
45 to 54-25 to 34     0.111   -0.202   0.425   0.914
55 to 64-25 to 34     0.360   -0.011   0.731   0.063
65 to 74-25 to 34     0.285   -0.344   0.915   0.789
45 to 54-35 to 44     0.013   -0.324   0.349   1.000
55 to 64-35 to 44     0.261   -0.129   0.652   0.396
65 to 74-35 to 44     0.186   -0.455   0.828   0.962
55 to 64-45 to 54     0.249   -0.172   0.669   0.540
65 to 74-45 to 54     0.174   -0.486   0.834   0.975
65 to 74-55 to 64    -0.075   -0.764   0.614   1.000

Code 7.4

df <- MusicScienceData::sadness                   # define data
model2.aov <- aov(ASM20 ~ age * gender, data=df)  # run anova
F2 <- summary(model2.aov)
print(F2)

              Df Sum Sq Mean Sq F value  Pr(>F)    
age            5   29.9    5.99   3.377 0.00488 ** 
gender         1   45.7   45.69  25.773 4.3e-07 ***
age:gender     5   11.5    2.31   1.303 0.25997    
Residuals   1558 2762.1    1.77                    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
7 observations deleted due to missingness

Code 7.5

This analysis requires extra libraries and raw data read from github. The installation might be slow in Colab because of package dependencies.

#install.packages("lme4",quiet=TRUE)     # Required for LMM analysis
#install.packages("lmerTest",quiet=TRUE) # Optional
library(lme4,quiet=TRUE)
library(lmerTest,quiet=TRUE)

library(lme4,quiet=TRUE)
library(lmerTest,quiet=TRUE)
d <- read.csv('https://raw.githubusercontent.com/tuomaseerola/emr/main/data/raw_ratings.csv')
d2 <- dplyr::filter(d,Emotion=='Dimensional')  #
d3 <- dplyr::filter(d2, Category=='Anger' |
  Category=='Fear' |
  Category=='Happy' |
  Category=='Sad' |
  Category=='Tender')
m1 <- lmer(Valence ~ Category * Gender + (1|id) + (1|Track), data = d3)
s <- summary(m1,corr=FALSE)
S<-s$coefficients; S<-round(S,2); S[,5]<-scales::pvalue(S[,5])
print(knitr::kable(S,format = 'simple',
                   caption = 'LMM results of Valence ratings.'))



Table: LMM results of Valence ratings.

                            Estimate   Std. Error   df       t value   Pr(>|t|) 
--------------------------  ---------  -----------  -------  --------  ---------
(Intercept)                 3.43       0.25         58.17    13.51     <0.001   
CategoryFear                0.07       0.34         47.43    0.19      0.850    
CategoryHappy               4.16       0.34         47.43    12.24     <0.001   
CategorySad                 1.63       0.34         47.43    4.79      <0.001   
CategoryTender              3.4        0.34         47.43    10.01     <0.001   
GenderMale                  -0.09      0.21         110.04   -0.45     0.650    
CategoryFear:GenderMale     -0.07      0.19         2348     -0.34     0.730    
CategoryHappy:GenderMale    -0.04      0.19         2348     -0.22     0.820    
CategorySad:GenderMale      -0.46      0.19         2348     -2.41     0.020    
CategoryTender:GenderMale   0          0.19         2348     0.01      0.990

Code 7.6

d <- read.csv('https://raw.githubusercontent.com/tuomaseerola/emr/main/data/raw_ratings.csv')
S <- d %>%
  filter(Category=='Sad') %>%
  group_by(Category,Gender) %>%
  summarise(M=mean(Valence,na.rm=T),SD=sd(Valence,na.rm=T),
            .groups = 'drop')
print(S)

# A tibble: 2 × 4
  Category Gender     M    SD
  <chr>    <chr>  <dbl> <dbl>
1 Sad      Female  5.05  1.69
2 Sad      Male    4.5   1.54

Code 7.7

library(MusicScienceData)               # loads library w data
gender_age_xtab <- table(MusicScienceData::sadness$age,
                         MusicScienceData::sadness$gender)
print(gender_age_xtab)

          
           Female Male
  18 to 24    269   87
  25 to 34    361  137
  35 to 44    231  101
  45 to 54    158   55
  55 to 64    118   19
  65 to 74     34    7

result <- chisq.test(gender_age_xtab)   # Chi^2 test
print(result)


    Pearson's Chi-squared test

data:  gender_age_xtab
X-squared = 16.649, df = 5, p-value = 0.005215

Code 7.8

library(MusicScienceData)               # load library w data
data <- MusicScienceData::soundtrack    # define data
r<-cor.test(data$Valence, data$Tension) # calculate correlation
print(r$estimate)                       # print coefficient

       cor 
-0.8269947

##    cor
## -0.827
print(scales::pvalue(r$p.value))        # print pretty p value

[1] "<0.001"

## [1] "<0.001"
print(r$parameter)                      # print df

 df 
108

Code 7.9

library(MusicScienceData)               # loads library w data
d1 <- MusicScienceData::soundtrack      # get ratings
d2 <- MusicScienceData::soundtrack_features[,c(2:3,5:6)] # select only some features
d1[,17:21] <- as.data.frame(scale(d2))  # normalise

tmp <- cor(d1[,c(3,17:20)])             # get correlations
print(round(tmp[2:5,1],2))              # display first line

         RMS     sp_centr spec_rolloff     spec_zcr 
        0.58         0.36         0.40         0.32

Code 7.10

model.reg <- lm(Energy ~ RMS + sp_centr + spec_rolloff +
  spec_zcr, data = d1)
s <- summary(model.reg) # R2adj = 0.424 (Energy)
print(s)


Call:
lm(formula = Energy ~ RMS + sp_centr + spec_rolloff + spec_zcr, 
    data = d1)

Residuals:
    Min      1Q  Median      3Q     Max 
-2.4719 -1.1042 -0.2064  0.9427  3.4504 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)    5.4865     0.1309  41.905  < 2e-16 ***
RMS            0.9067     0.1397   6.491 2.88e-09 ***
sp_centr      -1.9069     1.2245  -1.557    0.122    
spec_rolloff   1.9663     0.9502   2.069    0.041 *  
spec_zcr       0.5995     0.4170   1.438    0.154    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.373 on 105 degrees of freedom
Multiple R-squared:  0.4504,    Adjusted R-squared:  0.4295 
F-statistic: 21.52 on 4 and 105 DF,  p-value: 5.528e-13

Code 7.11

r <- cor(d1$Energy, d1$RMS)
print( r^2 )                      # print the squared correlation

[1] 0.3378173

summary(lm(Energy ~ RMS,data=d1)) # Summarise regression


Call:
lm(formula = Energy ~ RMS, data = d1)

Residuals:
    Min      1Q  Median      3Q     Max 
-2.6644 -1.1921 -0.3852  1.1875  3.3296 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)   5.4865     0.1417  38.717  < 2e-16 ***
RMS           1.0567     0.1424   7.423 2.79e-11 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.486 on 108 degrees of freedom
Multiple R-squared:  0.3378,    Adjusted R-squared:  0.3317 
F-statistic:  55.1 on 1 and 108 DF,  p-value: 2.788e-11