Import packages
Split data in training and test sets
set.seed(90)
n <- nrow(CO2)
index = sample(1:n, size = round(0.75*n), replace = FALSE)
train = CO2[index, ]
test = CO2[-index, ]
Logistic model predicting Treatment from CO2 train dataset
log_mod_train <- glm(Treatment ~ Type + conc + uptake, data = train, family = binomial(link = "logit"))
summary(log_mod_train)
##
## Call:
## glm(formula = Treatment ~ Type + conc + uptake, family = binomial(link = "logit"),
## data = train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.5326 -0.7801 -0.4139 0.8967 1.7586
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.746515 1.752166 3.280 0.001039 **
## TypeMississippi -3.084621 1.062665 -2.903 0.003699 **
## conc 0.003917 0.001567 2.499 0.012449 *
## uptake -0.217115 0.060521 -3.587 0.000334 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 87.194 on 62 degrees of freedom
## Residual deviance: 66.050 on 59 degrees of freedom
## AIC: 74.05
##
## Number of Fisher Scoring iterations: 5
Predict model on test data and calculate model accuracy
test$predictions <- predict(log_mod_train, test, type = "response")
acc_tab <- base::table(test$Treatment, test$predictions > 0.5)
round(sum(diag(acc_tab))/sum(acc_tab), 3)