Import packages
Split Data in Training (75%) and Test Sets (25%)
n <- nrow(mtcars)
index = sample(1:n, size = round(0.75*n), replace = FALSE)
train = mtcars[index, ]
test = mtcars[-index, ]
paste("Observations in training data: ", nrow(train), sep = "")
## [1] "Observations in training data: 24"
paste("Observations in testing data: ", nrow(test), sep = "")
## [1] "Observations in testing data: 8"
Running a linear model predicting miles per gallon, mpg, from the mtcars dataset
linear_mod <- lm(mpg ~ cyl + disp + hp + wt + am + gear + carb, data = train)
linear_mod$coefficients # view linear model coefficients
## (Intercept) cyl6 cyl8 disp hp wt
## 31.92724195 -5.56261475 -6.71136735 0.02773889 -0.06515848 -2.36256443
## am1 gear4 gear5 carb
## 2.37977028 -0.81976225 1.32591626 0.92080813
summary(linear_mod) # view summary of linear model
##
## Call:
## lm(formula = mpg ~ cyl + disp + hp + wt + am + gear + carb, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.0630 -1.2369 -0.2293 1.2138 6.0903
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 31.92724 4.18194 7.635 2.35e-06 ***
## cyl6 -5.56261 2.40366 -2.314 0.0364 *
## cyl8 -6.71137 4.09767 -1.638 0.1237
## disp 0.02774 0.02146 1.292 0.2171
## hp -0.06516 0.03384 -1.926 0.0747 .
## wt -2.36256 1.96274 -1.204 0.2487
## am1 2.37977 2.39729 0.993 0.3377
## gear4 -0.81976 2.85270 -0.287 0.7780
## gear5 1.32592 3.20258 0.414 0.6851
## carb 0.92081 1.04999 0.877 0.3953
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.539 on 14 degrees of freedom
## Multiple R-squared: 0.8762, Adjusted R-squared: 0.7967
## F-statistic: 11.01 on 9 and 14 DF, p-value: 6.053e-05
Predicting linear model on test set
mod_predict <- predict(linear_mod, test)
mean((mod_predict - test$mpg)^2) # calculating MSE