例解

Last modified: Oct 19, 2015


1 問題

d <- read.csv("http://aoki2.si.gunma-u.ac.jp/dp/no15/exam2013.csv") 
d$BMI <- d$wt / (d$ht/100)^2 
hist(d$BMI) 

by(d[, -1], d$sex, colMeans) 
## d$sex: F 
##       age        ht        wt     grmax       BMI  
##  59.98966 151.17500  54.81241  25.81552  23.96996  
## --------------------------------------------------------  
## d$sex: M 
##       age        ht        wt     grmax       BMI  
##  62.63222 163.09696  63.56231  40.94225  23.88832 
lapply(d[, -1], function(x) t.test(x ~ d$sex)) 
## $age 
##  
##  Welch Two Sample t-test 
##  
## data:  x by d$sex 
## t = -3.6848, df = 660.38, p-value = 0.0002477 
## alternative hypothesis: true difference in means is not equal to 0 
## 95 percent confidence interval: 
##  -4.050741 -1.234386 
## sample estimates: 
## mean in group F mean in group M  
##        59.98966        62.63222  
##  
##  
## $ht 
##  
##  Welch Two Sample t-test 
##  
## data:  x by d$sex 
## t = -29.101, df = 665.29, p-value < 2.2e-16 
## alternative hypothesis: true difference in means is not equal to 0 
## 95 percent confidence interval: 
##  -12.72636 -11.11756 
## sample estimates: 
## mean in group F mean in group M  
##         151.175         163.097  
##  
##  
## $wt 
##  
##  Welch Two Sample t-test 
##  
## data:  x by d$sex 
## t = -14.508, df = 627.35, p-value < 2.2e-16 
## alternative hypothesis: true difference in means is not equal to 0 
## 95 percent confidence interval: 
##  -9.934276 -7.565516 
## sample estimates: 
## mean in group F mean in group M  
##        54.81241        63.56231  
##  
##  
## $grmax 
##  
##  Welch Two Sample t-test 
##  
## data:  x by d$sex 
## t = -30.769, df = 501.18, p-value < 2.2e-16 
## alternative hypothesis: true difference in means is not equal to 0 
## 95 percent confidence interval: 
##  -16.09262 -14.16085 
## sample estimates: 
## mean in group F mean in group M  
##        25.81552        40.94225  
##  
##  
## $BMI 
##  
##  Welch Two Sample t-test 
##  
## data:  x by d$sex 
## t = 0.37812, df = 706.91, p-value = 0.7055 
## alternative hypothesis: true difference in means is not equal to 0 
## 95 percent confidence interval: 
##  -0.3422317  0.5054977 
## sample estimates: 
## mean in group F mean in group M  
##        23.96996        23.88832 

agehtwtgrmax の平均値には有意な差がある。

lapply(split(d[, -1], d$sex), cor) 
## $F 
##              age          ht         wt        grmax          BMI 
## age    1.0000000 -0.43072784 -0.1102999 -0.476964772  0.115863030 
## ht    -0.4307278  1.00000000  0.4351449  0.539162299 -0.087155814 
## wt    -0.1102999  0.43514492  1.0000000  0.277484303  0.856660937 
## grmax -0.4769648  0.53916230  0.2774843  1.000000000  0.002346073 
## BMI    0.1158630 -0.08715581  0.8566609  0.002346073  1.000000000 
##  
## $M 
##               age         ht         wt      grmax         BMI 
## age    1.00000000 -0.3962871 -0.2661494 -0.6313267 -0.06434023 
## ht    -0.39628715  1.0000000  0.4148648  0.4396469 -0.11233513 
## wt    -0.26614942  0.4148648  1.0000000  0.4457683  0.85469583 
## grmax -0.63132667  0.4396469  0.4457683  1.0000000  0.23435933 
## BMI   -0.06434023 -0.1123351  0.8546958  0.2343593  1.00000000 
summary(lm(grmax ~ ., d)) 
##  
## Call: 
## lm(formula = grmax ~ ., data = d) 
##  
## Residuals: 
##      Min       1Q   Median       3Q      Max  
## -24.6950  -3.0135  -0.1867   3.1411  17.6905  
##  
## Coefficients: 
##             Estimate Std. Error t value Pr(>|t|) 
## (Intercept) 80.66682   23.54207   3.426 0.000639 
## sexM        11.43984    0.51771  22.097  < 2e-16 
## age         -0.25419    0.01758 -14.456  < 2e-16 
## ht          -0.29266    0.15058  -1.944 0.052257 
## wt           0.87992    0.19926   4.416 1.13e-05 
## BMI         -1.81856    0.48610  -3.741 0.000195 
##  
## Residual standard error: 4.944 on 903 degrees of freedom 
## Multiple R-squared:  0.7417, Adjusted R-squared:  0.7403  
## F-statistic: 518.6 on 5 and 903 DF,  p-value: < 2.2e-16 
cor(d[, -1]) 
##               age          ht          wt       grmax         BMI 
## age    1.00000000 -0.21137776 -0.09817669 -0.26290616  0.04903315 
## ht    -0.21137776  1.00000000  0.58365585  0.75308685 -0.07749662 
## wt    -0.09817669  0.58365585  1.00000000  0.54288844  0.75967132 
## grmax -0.26290616  0.75308685  0.54288844  1.00000000  0.05893901 
## BMI    0.04903315 -0.07749662  0.75967132  0.05893901  1.00000000 

ht との相関係数が一番大きい。

summary(lm(grmax ~ ht, d)) 
##  
## Call: 
## lm(formula = grmax ~ ht, data = d) 
##  
## Residuals: 
##      Min       1Q   Median       3Q      Max  
## -25.9019  -4.0089  -0.2739   3.9494  23.2542  
##  
## Coefficients: 
##              Estimate Std. Error t value Pr(>|t|) 
## (Intercept) -106.9868     4.0168  -26.64   <2e-16 
## ht             0.8893     0.0258   34.47   <2e-16 
##  
## Residual standard error: 6.386 on 907 degrees of freedom 
## Multiple R-squared:  0.5671, Adjusted R-squared:  0.5667  
## F-statistic:  1188 on 1 and 907 DF,  p-value: < 2.2e-16