Two-Way ANOVA
Data
This data is SAT scores and GPA for every student who entered the University of Texas at Austin at 2000 and graduated within 6 years. As a two-way ANOVA model, we want to analyze the impact from 2 factor variables as well as the interaction between them.
sat = read.csv("ut2000.csv")
attach(sat)
tapply(SAT.C, list(Gender,School),mean)
## ARCHITECTURE BUSINESS COMMUNICATIONS EDUCATION ENGINEERING FINE ARTS
## F 1338.500 1232.114 1204.792 1105.645 1279.062 1186.133
## M 1359.167 1228.418 1197.099 1096.750 1284.111 1195.062
## LIBERAL ARTS NATURAL SCIENCE NURSING SOCIAL WORK
## F 1183.540 1234.531 1114.231 1168.333
## M 1190.767 1226.107 1119.375 1208.750
boxplot(SAT.Q~Gender+School, col="blue", data=sat)
interaction.plot(Gender, School, SAT.Q, col=c("blue","red"))
out=lm(SAT.Q~Gender*School,data=sat)
out1=lm(SAT.Q~Gender+School+Gender*School,data=sat)
out2=lm(SAT.Q~Gender+School,data=sat)
anova(out)
## Analysis of Variance Table
##
## Response: SAT.Q
## Df Sum Sq Mean Sq F value Pr(>F)
## Gender 1 1565 1565 0.2592 0.6107
## School 9 4592051 510228 84.5374 <2e-16 ***
## Gender:School 9 21355 2373 0.3931 0.9390
## Residuals 5171 31209727 6036
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(out1)
##
## Call:
## lm(formula = SAT.Q ~ Gender + School + Gender * School, data = sat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -311.338 -51.911 1.695 53.301 205.972
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 676.000 17.372 38.914 < 2e-16 ***
## GenderM 23.167 28.368 0.817 0.4142
## SchoolBUSINESS -41.534 17.780 -2.336 0.0195 *
## SchoolCOMMUNICATIONS -81.972 18.539 -4.422 1.00e-05 ***
## SchoolEDUCATION -113.903 19.978 -5.701 1.25e-08 ***
## SchoolENGINEERING -2.591 17.858 -0.145 0.8847
## SchoolFINE ARTS -82.133 19.551 -4.201 2.70e-05 ***
## SchoolLIBERAL ARTS -79.301 17.559 -4.516 6.44e-06 ***
## SchoolNATURAL SCIENCE -42.106 17.677 -2.382 0.0173 *
## SchoolNURSING -119.462 23.107 -5.170 2.43e-07 ***
## SchoolSOCIAL WORK -71.000 36.162 -1.963 0.0497 *
## GenderM:SchoolBUSINESS -26.294 28.875 -0.911 0.3625
## GenderM:SchoolCOMMUNICATIONS -27.688 29.731 -0.931 0.3517
## GenderM:SchoolEDUCATION -36.388 31.265 -1.164 0.2445
## GenderM:SchoolENGINEERING -19.608 28.974 -0.677 0.4986
## GenderM:SchoolFINE ARTS -16.786 30.979 -0.542 0.5879
## GenderM:SchoolLIBERAL ARTS -21.561 28.597 -0.754 0.4509
## GenderM:SchoolNATURAL SCIENCE -25.150 28.744 -0.875 0.3816
## GenderM:SchoolNURSING -10.955 37.604 -0.291 0.7708
## GenderM:SchoolSOCIAL WORK -28.167 50.647 -0.556 0.5781
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 77.69 on 5171 degrees of freedom
## Multiple R-squared: 0.1288, Adjusted R-squared: 0.1256
## F-statistic: 40.24 on 19 and 5171 DF, p-value: < 2.2e-16
summary(out2)
##
## Call:
## lm(formula = SAT.Q ~ Gender + School, data = sat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -312.870 -52.870 2.446 52.546 208.313
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 684.72488 13.75012 49.798 < 2e-16 ***
## GenderM -0.09969 2.15720 -0.046 0.963142
## SchoolBUSINESS -51.75496 13.99014 -3.699 0.000218 ***
## SchoolCOMMUNICATIONS -93.03812 14.43002 -6.448 1.24e-10 ***
## SchoolEDUCATION -130.02083 15.19985 -8.554 < 2e-16 ***
## SchoolENGINEERING -9.51022 14.04108 -0.677 0.498237
## SchoolFINE ARTS -87.49363 15.07172 -5.805 6.81e-09 ***
## SchoolLIBERAL ARTS -87.17084 13.84738 -6.295 3.32e-10 ***
## SchoolNATURAL SCIENCE -51.76859 13.92267 -3.718 0.000203 ***
## SchoolNURSING -123.49643 18.21986 -6.778 1.35e-11 ***
## SchoolSOCIAL WORK -82.52506 24.88465 -3.316 0.000918 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 77.65 on 5180 degrees of freedom
## Multiple R-squared: 0.1282, Adjusted R-squared: 0.1265
## F-statistic: 76.19 on 10 and 5180 DF, p-value: < 2.2e-16
par(mfrow=c(2,2))
plot(out)
Written on February 19, 2020