Import packages

library(stats)


Split Data in Training (75%) and Test Sets (25%)

n <- nrow(mtcars)
index = sample(1:n, size = round(0.75*n), replace = FALSE)
train = mtcars[index, ]
test = mtcars[-index, ]
paste("Observations in training data: ", nrow(train), sep = "")
## [1] "Observations in training data: 24"
paste("Observations in testing data: ", nrow(test), sep = "")
## [1] "Observations in testing data: 8"


Running a linear model predicting miles per gallon, mpg, from the mtcars dataset

linear_mod <- lm(mpg ~ cyl + disp + hp + wt + am + gear + carb, data = train)
linear_mod$coefficients # view linear model coefficients
## (Intercept)        cyl6        cyl8        disp          hp          wt 
## 31.92724195 -5.56261475 -6.71136735  0.02773889 -0.06515848 -2.36256443 
##         am1       gear4       gear5        carb 
##  2.37977028 -0.81976225  1.32591626  0.92080813
summary(linear_mod) # view summary of linear model 
## 
## Call:
## lm(formula = mpg ~ cyl + disp + hp + wt + am + gear + carb, data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.0630 -1.2369 -0.2293  1.2138  6.0903 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 31.92724    4.18194   7.635 2.35e-06 ***
## cyl6        -5.56261    2.40366  -2.314   0.0364 *  
## cyl8        -6.71137    4.09767  -1.638   0.1237    
## disp         0.02774    0.02146   1.292   0.2171    
## hp          -0.06516    0.03384  -1.926   0.0747 .  
## wt          -2.36256    1.96274  -1.204   0.2487    
## am1          2.37977    2.39729   0.993   0.3377    
## gear4       -0.81976    2.85270  -0.287   0.7780    
## gear5        1.32592    3.20258   0.414   0.6851    
## carb         0.92081    1.04999   0.877   0.3953    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.539 on 14 degrees of freedom
## Multiple R-squared:  0.8762, Adjusted R-squared:  0.7967 
## F-statistic: 11.01 on 9 and 14 DF,  p-value: 6.053e-05


Predicting linear model on test set

mod_predict <- predict(linear_mod, test)
mean((mod_predict - test$mpg)^2) # calculating MSE
## [1] 14.73828