Check data frame for missing values

head(airquality) # airquality dataset contains missing values 
##   Ozone Solar.R Wind Temp Month Day
## 1    41     190  7.4   67     5   1
## 2    36     118  8.0   72     5   2
## 3    12     149 12.6   74     5   3
## 4    18     313 11.5   62     5   4
## 5    NA      NA 14.3   56     5   5
## 6    28      NA 14.9   66     5   6
sum(is.na(airquality)) # sum missing values in airquality
## [1] 44
colnames(airquality)[colSums(is.na(airquality)) > 0] # select all columns with missing observations 
## [1] "Ozone"   "Solar.R"


Replace missing values with the mean

lapply(airquality, class) # check type of all the variables 
## $Ozone
## [1] "integer"
## 
## $Solar.R
## [1] "integer"
## 
## $Wind
## [1] "numeric"
## 
## $Temp
## [1] "integer"
## 
## $Month
## [1] "integer"
## 
## $Day
## [1] "integer"
# create a function to replace all missing observations in the numeric/integer predictors in the airquality dataframe with mean 
imputeMean <- function(x) { 
  if (class(x) == "numeric" | class(x) == "integer")
  {
    replace(x, is.na(x), mean(x, na.rm = TRUE))
  }
}
airquality_impute <- lapply(airquality, imputeMean)
airquality_impute <- as.data.frame(airquality_impute)
head(airquality_impute)
##      Ozone  Solar.R Wind Temp Month Day
## 1 41.00000 190.0000  7.4   67     5   1
## 2 36.00000 118.0000  8.0   72     5   2
## 3 12.00000 149.0000 12.6   74     5   3
## 4 18.00000 313.0000 11.5   62     5   4
## 5 42.12931 185.9315 14.3   56     5   5
## 6 28.00000 185.9315 14.9   66     5   6
sum(is.na(airquality_impute))
## [1] 0