Import packages

library(stats)


Split data in training and test sets

set.seed(90)
n <- nrow(CO2)
index = sample(1:n, size = round(0.75*n), replace = FALSE)
train = CO2[index, ]
test = CO2[-index, ]


Logistic model predicting Treatment from CO2 train dataset

class(CO2$Treatment)
## [1] "factor"
log_mod_train <- glm(Treatment ~ Type + conc + uptake, data = train, family = binomial(link = "logit"))
summary(log_mod_train)
## 
## Call:
## glm(formula = Treatment ~ Type + conc + uptake, family = binomial(link = "logit"), 
##     data = train)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.5326  -0.7801  -0.4139   0.8967   1.7586  
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)      5.746515   1.752166   3.280 0.001039 ** 
## TypeMississippi -3.084621   1.062665  -2.903 0.003699 ** 
## conc             0.003917   0.001567   2.499 0.012449 *  
## uptake          -0.217115   0.060521  -3.587 0.000334 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 87.194  on 62  degrees of freedom
## Residual deviance: 66.050  on 59  degrees of freedom
## AIC: 74.05
## 
## Number of Fisher Scoring iterations: 5


Predict model on test data and calculate model accuracy

test$predictions <- predict(log_mod_train, test, type = "response")
acc_tab <- base::table(test$Treatment, test$predictions > 0.5)
round(sum(diag(acc_tab))/sum(acc_tab), 3)
## [1] 0.667