# Two-way table with (I,J) categories# Here, I=4 (age group) and J=3 (frequency)MULTI21_D1_long <- MULTI21_D1 |># pool data by age freq dplyr::group_by(age, frequency) |> dplyr::summarize(total =sum(count)) # aggregate countsMULTI21_D1_long |> knitr::kable()
Table 1: Count data for Elliot et al. (2021) in long format.
age
frequency
total
5yo
never
53
5yo
sometimes
80
5yo
usually
87
6yo
never
28
6yo
sometimes
84
6yo
usually
141
7yo
never
19
7yo
sometimes
73
7yo
usually
177
10yo
never
14
10yo
sometimes
40
10yo
usually
181
# Fit Poisson regressionmod_main <-glm(total ~ age + frequency, # null model, no interactionfamily = poisson, data = MULTI21_D1_long)mod_satur <-glm(total ~ age * frequency, # saturated modelfamily = poisson, data = MULTI21_D1_long)# The saturated model returns the observed countsisTRUE(all.equal(target =predict(mod_satur, type ="response"),current = MULTI21_D1_long$total,check.attributes =FALSE))
[1] TRUE
# Likelihood ratio test and score tests# There are general families of testing proceduresanova(mod_main, mod_satur, test ="LRT") # likelihood ratio test, aka deviance stat
Analysis of Deviance Table
Model 1: total ~ age + frequency
Model 2: total ~ age * frequency
Resid. Df Resid. Dev Df Deviance Pr(>Chi)
1 6 85.2
2 0 0.0 6 85.2 3e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(mod_main, mod_satur, test ="Rao") # Rao score test, aka Pearson chi-square test
Analysis of Deviance Table
Model 1: total ~ age + frequency
Model 2: total ~ age * frequency
Resid. Df Resid. Dev Df Deviance Rao Pr(>Chi)
1 6 85.2
2 0 0.0 6 85.2 87.5 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# We can also compute them manually without fitting the saturated model# The Pearson chi-square stat is the sum of squared Pearson residualsPearsonX2 <-sum(resid(mod_main, type ="pearson")^2)pchisq(q = PearsonX2, df = mod_main$df.residual, lower.tail =FALSE)
[1] 1.02e-16
# The likelihood ratio test is obtained from the deviancepchisq(q =deviance(mod_main),df = mod_main$df.residual, lower.tail =FALSE)
[1] 2.95e-16
Example 3 - Bertrand and Mullainathan (2004)
data(BM04_T2, package ="hecedsm")# Symmetric model with 6 parameters (3 diag + 3 upper triangular)mod_null <-glm(count ~ gnm::Symm(black, white),data = BM04_T2,family = poisson)# Compare the two nested models using a likelihood ratio testpchisq(deviance(mod_null), lower.tail =FALSE,df = mod_null$df.residual) # 9 cells - 6 parameters = 3
[1] 3.25e-06
PearsonX2 <-sum(residuals(mod_null, type ="pearson")^2)pchisq(PearsonX2, df = mod_null$df.residual, lower.tail =FALSE)
[1] 5.07e-06
Nonparametric tests
Example 1 - Brucks and Levav (2022)
data(BL22_E, package ="hecedsm")# Two-sample comparison using Mann-Whitney or Wilcoxon rank-sum testmww <- coin::wilcox_test( partner_time ~ cond,data = BL22_E,conf.int =TRUE)mww
Asymptotic Wilcoxon-Mann-Whitney Test
data: partner_time by cond (f2f, video)
Z = -6, p-value = 1e-10
alternative hypothesis: true mu is not equal to 0
95 percent confidence interval:
-50.7 -25.9
sample estimates:
difference in location
-37.8
# The point estimate is Welch's average# Values and bounds for confidence intervals are times in seconds# Compare results with two sample t-test(welch_ttest <-t.test(partner_time ~ cond,data = BL22_E,conf.int =TRUE))
Welch Two Sample t-test
data: partner_time by cond
t = -6, df = 268, p-value = 1e-08
alternative hypothesis: true difference in means between group f2f and group video is not equal to 0
95 percent confidence interval:
-52.9 -26.5
sample estimates:
mean in group f2f mean in group video
51.7 91.4
Example 2 - Brodeur et al. (2021)
data(BRLS21_T3, package ="hecedsm")# Friedman test (more popular, but less powerful than Quade)friedman <- coin::friedman_test( nviolation ~ task | id,data = BRLS21_T3)friedman
Asymptotic Friedman Test
data: nviolation by
task (phone, watch, speaker, texting)
stratified by id
chi-squared = 19, df = 3, p-value = 3e-04
# Compute ranks separately for each personBRLS21_T3_rank <- BRLS21_T3 |>group_by(id) |>mutate(rank =rank(nviolation)) |>ungroup()# Which violation type has the highest rank?BRLS21_T3_rank |>group_by(task) |>summarize(mrank =mean(rank)) |> knitr::kable(col.names =c("task","mean rank"))
Table 3: Average rank of the number of road safety violations.
task
mean rank
phone
2.19
watch
2.19
speaker
2.26
texting
3.35
# So texting leads to more violations# Transform to wide format - one line per idsmartwatch <- tidyr::pivot_wider(data = BRLS21_T3,names_from = task,values_from = nviolation)# Wilcoxon signed-rank test for all pairwise differencestests <-list(wilcoxsign_test(phone ~ watch,data = smartwatch),wilcoxsign_test(speaker ~ watch,data = smartwatch),wilcoxsign_test(phone ~ speaker,data = smartwatch),wilcoxsign_test(phone ~ texting,data = smartwatch),wilcoxsign_test(texting ~ watch,data = smartwatch),wilcoxsign_test(texting ~ speaker,data = smartwatch))# Extract p-values of tests(pvals <-sapply(tests, pvalue))
# Only differences with texting are significant (the latter is more distracting)
Source Code
---title: "MATH 80667A - Week 13"author: "Léo Belzile"format: htmleval: trueecho: truemessage: falsewarning: falsecode-tools: source: true toggle: false caption: "Download Quarto file"---```{r}#| echo: falseoptions(digits =3)```# Count data## Example 1 - Duke and Amir (2023), experiment 2```{r}library(coin)library(dplyr)library(gnm)data(DA23_E2, package ="hecedsm")tabs <-with(DA23_E2, table(purchased, format))# Chi-square test for independence(chisq <-chisq.test(tabs))```## Example 2 - Elliot et al. (2021) multilab```{r}data(MULTI21_D1, package ="hecedsm")# Create a contingency table(contingency <-xtabs( #pool data count ~ age + frequency,data = MULTI21_D1))# No correction to get same result as Poisson regression model(chisqtest <-chisq.test(contingency, correct =FALSE))``````{r}#| label: tbl-long-multi#| tbl-cap: "Count data for Elliot et al. (2021) in long format."# Two-way table with (I,J) categories# Here, I=4 (age group) and J=3 (frequency)MULTI21_D1_long <- MULTI21_D1 |># pool data by age freq dplyr::group_by(age, frequency) |> dplyr::summarize(total =sum(count)) # aggregate countsMULTI21_D1_long |> knitr::kable()``````{r}# Fit Poisson regressionmod_main <-glm(total ~ age + frequency, # null model, no interactionfamily = poisson, data = MULTI21_D1_long)mod_satur <-glm(total ~ age * frequency, # saturated modelfamily = poisson, data = MULTI21_D1_long)# The saturated model returns the observed countsisTRUE(all.equal(target =predict(mod_satur, type ="response"),current = MULTI21_D1_long$total,check.attributes =FALSE))# Likelihood ratio test and score tests# There are general families of testing proceduresanova(mod_main, mod_satur, test ="LRT") # likelihood ratio test, aka deviance statanova(mod_main, mod_satur, test ="Rao") # Rao score test, aka Pearson chi-square test``````{r}# We can also compute them manually without fitting the saturated model# The Pearson chi-square stat is the sum of squared Pearson residualsPearsonX2 <-sum(resid(mod_main, type ="pearson")^2)pchisq(q = PearsonX2, df = mod_main$df.residual, lower.tail =FALSE)# The likelihood ratio test is obtained from the deviancepchisq(q =deviance(mod_main),df = mod_main$df.residual, lower.tail =FALSE)```## Example 3 - Bertrand and Mullainathan (2004)```{r}data(BM04_T2, package ="hecedsm")# Symmetric model with 6 parameters (3 diag + 3 upper triangular)mod_null <-glm(count ~ gnm::Symm(black, white),data = BM04_T2,family = poisson)# Compare the two nested models using a likelihood ratio testpchisq(deviance(mod_null), lower.tail =FALSE,df = mod_null$df.residual) # 9 cells - 6 parameters = 3PearsonX2 <-sum(residuals(mod_null, type ="pearson")^2)pchisq(PearsonX2, df = mod_null$df.residual, lower.tail =FALSE)```# Nonparametric tests## Example 1 - Brucks and Levav (2022)```{r}data(BL22_E, package ="hecedsm")# Two-sample comparison using Mann-Whitney or Wilcoxon rank-sum testmww <- coin::wilcox_test( partner_time ~ cond,data = BL22_E,conf.int =TRUE)mww# The point estimate is Welch's average# Values and bounds for confidence intervals are times in seconds# Compare results with two sample t-test(welch_ttest <-t.test(partner_time ~ cond,data = BL22_E,conf.int =TRUE))```## Example 2 - Brodeur et al. (2021)```{r}#| label: tbl-nptest#| tbl-cap: "Effect size for rank-based test."data(BRLS21_T3, package ="hecedsm")# Friedman test (more popular, but less powerful than Quade)friedman <- coin::friedman_test( nviolation ~ task | id,data = BRLS21_T3)friedman# Quade testquade <- coin::quade_test( nviolation ~ task | id,data = BRLS21_T3)quade# Effect size for rank-based testseff_size <- effectsize::kendalls_w(x ="nviolation",groups ="task",blocks ="id",data = BRLS21_T3)eff_size |> knitr::kable(digits =2,col.names =c("Kendall's W", "level", "lower CL", "upper CL"))``````{r}#| label: tbl-ranks-brodeur#| tbl-cap: "Average rank of the number of road safety violations."# Compute ranks separately for each personBRLS21_T3_rank <- BRLS21_T3 |>group_by(id) |>mutate(rank =rank(nviolation)) |>ungroup()# Which violation type has the highest rank?BRLS21_T3_rank |>group_by(task) |>summarize(mrank =mean(rank)) |> knitr::kable(col.names =c("task","mean rank"))``````{r}# So texting leads to more violations# Transform to wide format - one line per idsmartwatch <- tidyr::pivot_wider(data = BRLS21_T3,names_from = task,values_from = nviolation)# Wilcoxon signed-rank test for all pairwise differencestests <-list(wilcoxsign_test(phone ~ watch,data = smartwatch),wilcoxsign_test(speaker ~ watch,data = smartwatch),wilcoxsign_test(phone ~ speaker,data = smartwatch),wilcoxsign_test(phone ~ texting,data = smartwatch),wilcoxsign_test(texting ~ watch,data = smartwatch),wilcoxsign_test(texting ~ speaker,data = smartwatch))# Extract p-values of tests(pvals <-sapply(tests, pvalue))# Adjust for multiple testing using Holm-Bonferronip.adjust(pvals, method ="holm")# Only differences with texting are significant (the latter is more distracting)```