Import packages
Create data frame with missing categorical features
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
iris_test <- iris
iris_test$Species <- as.character(iris_test$Species)
iris_test[c(sample(1:150, 10)), 5] <- NA # generating NAs at 10 random values from iris dataset
Impute missing Species using mode
# Defining mode function
mode <- function(variable){
data <- data.frame(table(as.factor(variable))) # convert variable frequency in data frame
as.character(data[which(data$Freq %in% max(data$Freq))[[1]], 1]) # find mode
}
mode(iris_test$Species)
iris_test$Species[is.na(iris_test$Species)] <- mode(iris_test$Species)