d <- read.csv("http://aoki2.si.gunma-u.ac.jp/dp/no15/exam2013.csv") d$BMI <- d$wt / (d$ht/100)^2 hist(d$BMI)
by(d[, -1], d$sex, colMeans)
## d$sex: F ## age ht wt grmax BMI ## 59.98966 151.17500 54.81241 25.81552 23.96996 ## -------------------------------------------------------- ## d$sex: M ## age ht wt grmax BMI ## 62.63222 163.09696 63.56231 40.94225 23.88832
lapply(d[, -1], function(x) t.test(x ~ d$sex))
## $age ## ## Welch Two Sample t-test ## ## data: x by d$sex ## t = -3.6848, df = 660.38, p-value = 0.0002477 ## alternative hypothesis: true difference in means is not equal to 0 ## 95 percent confidence interval: ## -4.050741 -1.234386 ## sample estimates: ## mean in group F mean in group M ## 59.98966 62.63222 ## ## ## $ht ## ## Welch Two Sample t-test ## ## data: x by d$sex ## t = -29.101, df = 665.29, p-value < 2.2e-16 ## alternative hypothesis: true difference in means is not equal to 0 ## 95 percent confidence interval: ## -12.72636 -11.11756 ## sample estimates: ## mean in group F mean in group M ## 151.175 163.097 ## ## ## $wt ## ## Welch Two Sample t-test ## ## data: x by d$sex ## t = -14.508, df = 627.35, p-value < 2.2e-16 ## alternative hypothesis: true difference in means is not equal to 0 ## 95 percent confidence interval: ## -9.934276 -7.565516 ## sample estimates: ## mean in group F mean in group M ## 54.81241 63.56231 ## ## ## $grmax ## ## Welch Two Sample t-test ## ## data: x by d$sex ## t = -30.769, df = 501.18, p-value < 2.2e-16 ## alternative hypothesis: true difference in means is not equal to 0 ## 95 percent confidence interval: ## -16.09262 -14.16085 ## sample estimates: ## mean in group F mean in group M ## 25.81552 40.94225 ## ## ## $BMI ## ## Welch Two Sample t-test ## ## data: x by d$sex ## t = 0.37812, df = 706.91, p-value = 0.7055 ## alternative hypothesis: true difference in means is not equal to 0 ## 95 percent confidence interval: ## -0.3422317 0.5054977 ## sample estimates: ## mean in group F mean in group M ## 23.96996 23.88832
age,ht,wt,grmax の平均値には有意な差がある。
lapply(split(d[, -1], d$sex), cor)
## $F ## age ht wt grmax BMI ## age 1.0000000 -0.43072784 -0.1102999 -0.476964772 0.115863030 ## ht -0.4307278 1.00000000 0.4351449 0.539162299 -0.087155814 ## wt -0.1102999 0.43514492 1.0000000 0.277484303 0.856660937 ## grmax -0.4769648 0.53916230 0.2774843 1.000000000 0.002346073 ## BMI 0.1158630 -0.08715581 0.8566609 0.002346073 1.000000000 ## ## $M ## age ht wt grmax BMI ## age 1.00000000 -0.3962871 -0.2661494 -0.6313267 -0.06434023 ## ht -0.39628715 1.0000000 0.4148648 0.4396469 -0.11233513 ## wt -0.26614942 0.4148648 1.0000000 0.4457683 0.85469583 ## grmax -0.63132667 0.4396469 0.4457683 1.0000000 0.23435933 ## BMI -0.06434023 -0.1123351 0.8546958 0.2343593 1.00000000
summary(lm(grmax ~ ., d))
## ## Call: ## lm(formula = grmax ~ ., data = d) ## ## Residuals: ## Min 1Q Median 3Q Max ## -24.6950 -3.0135 -0.1867 3.1411 17.6905 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 80.66682 23.54207 3.426 0.000639 ## sexM 11.43984 0.51771 22.097 < 2e-16 ## age -0.25419 0.01758 -14.456 < 2e-16 ## ht -0.29266 0.15058 -1.944 0.052257 ## wt 0.87992 0.19926 4.416 1.13e-05 ## BMI -1.81856 0.48610 -3.741 0.000195 ## ## Residual standard error: 4.944 on 903 degrees of freedom ## Multiple R-squared: 0.7417, Adjusted R-squared: 0.7403 ## F-statistic: 518.6 on 5 and 903 DF, p-value: < 2.2e-16
cor(d[, -1])
## age ht wt grmax BMI ## age 1.00000000 -0.21137776 -0.09817669 -0.26290616 0.04903315 ## ht -0.21137776 1.00000000 0.58365585 0.75308685 -0.07749662 ## wt -0.09817669 0.58365585 1.00000000 0.54288844 0.75967132 ## grmax -0.26290616 0.75308685 0.54288844 1.00000000 0.05893901 ## BMI 0.04903315 -0.07749662 0.75967132 0.05893901 1.00000000
ht との相関係数が一番大きい。
summary(lm(grmax ~ ht, d))
## ## Call: ## lm(formula = grmax ~ ht, data = d) ## ## Residuals: ## Min 1Q Median 3Q Max ## -25.9019 -4.0089 -0.2739 3.9494 23.2542 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) -106.9868 4.0168 -26.64 <2e-16 ## ht 0.8893 0.0258 34.47 <2e-16 ## ## Residual standard error: 6.386 on 907 degrees of freedom ## Multiple R-squared: 0.5671, Adjusted R-squared: 0.5667 ## F-statistic: 1188 on 1 and 907 DF, p-value: < 2.2e-16