Import packages
Random Split Data in Training (75%) and Test Sets (25%)
n <- nrow(iris)
index = sample(1:n, size = round(0.75*n), replace = FALSE)
train = iris[index, ]
test = iris[-index, ]
paste("Observations in training data: ", nrow(train), sep = "")
## [1] "Observations in training data: 112"
paste("Observations in testing data: ", nrow(test), sep = "")
## [1] "Observations in testing data: 38"
Stratified Sampling (Preserving Class Distributions)
set.seed(99)
table(iris$Species) # 50/50/50 split within Species claddification
##
## versicolor setosa virginica
## 50 50 50
index <- createDataPartition(iris$Species, p = .75, list = FALSE, times = 1)
train <- iris[index, ]
test <- iris[-index, ]
table(train$Species) # 38/38/38, equal ratio preserved in training data
##
## versicolor setosa virginica
## 38 38 38
table(test$Species) # 12/12/12, equal ratio preserved in test data
##
## versicolor setosa virginica
## 12 12 12