Hosted by CoCalc
Download
Kernel: R (R-Project)

Neural Networks

Part 1: Load all the required libraries

#install the required libraries #install.packages('NeuralNetTools') #install.packages('nnet') #install.packages('RColorBrewer') #load the required libraries library(NeuralNetTools) library(nnet) library(RColorBrewer) library(caret)
Loading required package: lattice Loading required package: ggplot2

Part 2: Read and prepare the dataset

#read the dataset data <- read.delim("data.csv",header=T,sep=',') #remove the NA column data <- subset( #this function is used to subset a dataset data, #give it the data you want to subset select=-X #give it the feature that you want to include or exclude. - means exclude ) #remove the id column data <- subset( data, select=-id ) #change the label to factor and move it to last column diagnosis <- as.factor(data$diagnosis) data <- subset( data, select=-diagnosis ) data$diagnosis <- diagnosis head(data)

Part 3: Prepare the train and test data

#set seed for reproducible results set.seed(1234) #split to train and test idx <- sample.int( #this function generates a random set of integer numbers nrow(data), #the numbers you want to sample from nrow(data) * 0.7 #the numbers you want to sample ) train <- data[idx, ] # keep the 70% sample. we will use this as a training set test <- data[-idx, ] # discard the 70% sample, this leaves us with the rest 30% test set #let's check the new dimensions of the train and test sets dim(train) dim(test) # for neural networks standardizing the inputs can decrease training time as well as the chance that the algorithm finds a local minimum when optimizing. # standardize inputs: transform on the train set and use calculations on the test set scaled_mat <- scale( train[,1:30], center = TRUE, scale = TRUE ) train_scaled <- data.frame(scaled_mat) train_scaled$diagnosis <- as.factor(train$diagnosis) head(train) head(train_scaled) test_scaled <- data.frame( scale( test[,1:30], center = attributes(scaled_mat)$'scaled:center', scale = attributes(scaled_mat)$'scaled:scale' )) test_scaled$diagnosis <- as.factor(test$diagnosis) head(test) head(test_scaled)

Part 4a: Neural Network | Model Training

Now that we have created our training and testing datasets, we can start training our model. The rule of thumb is to set the size of the hidden layer to be the square root of the number of features. In our data set that is 5. Let us explore the parameter size, which is the number of nodes in the hidden layer.

floor(sqrt(30))
#set up a list to store your trained models model_nnet_list <- list() #in a loop fit 25 models, one for each hidden layer size for(size_use in 1:25){ #show output of iterations for model 1 only if(size_use==1){trace=TRUE}else{trace=FALSE} set.seed(1234); model_nnet_list[[size_use]] <- nnet( #fit single-hidden-layer neural network formula = diagnosis ~ ., #the . notation indicates include all variables in the data as features data = train_scaled, #data frame containing the variables in the formula size = size_use, #number of units in the hidden layer decay = 0.01, #weight decay, for regularization rang = 0.6,#initial random weights on [-rang, rang] trace = trace,#show iterations maxit = 200#maximum number of iterations ) }
# weights: 33 initial value 257.863979 iter 10 value 55.528871 iter 20 value 35.379013 iter 30 value 21.402967 iter 40 value 14.943263 iter 50 value 10.809916 iter 60 value 10.614514 iter 70 value 10.604750 iter 80 value 10.603735 iter 90 value 10.603652 iter 100 value 10.603638 final value 10.603635 converged

Part 4b: Neural Network | Prediction

Now that we have created several model fits, let us see how well each of the above models is at predicting the class in the test dataset.
#set up a list to store the predictions of the models pred_nnet_list <- list() #in a loop for each model predict the classes in the test data set for(size_use in 1:25){ pred_nnet_list[[size_use]] <- predict( model_nnet_list[[size_use]], test_scaled[,1:30], #exclude the 'diagnosis' column type = "class" #gives class_values (1 or 0 ) ) } #let's look at the M and B classes predicted for a few select models table(pred_nnet_list[[1]]) table(pred_nnet_list[[2]]) table(pred_nnet_list[[5]]) table(pred_nnet_list[[25]]) table(test_scaled$diagnosis)
B M 98 73
B M 97 74
B M 101 70
B M 100 71
B M 101 70

Part 4c: Neural Network | Accuracy

Lets try to quantify the accuracy of the predictions in the test set.
#set up a list to store the performance of the models performance_nnet_list <- list() #in a loop for each model calculate the confusion matrix and extract the accuracy of each model for(size_use in 1:25){ performance_nnet_list[[size_use]] <- confusionMatrix( data = as.factor(pred_nnet_list[[size_use]]), reference = test_scaled$diagnosis, positive = "M", dnn = c("predicted","actual") ) } # extract the accuracy of the models accuracy_nnet <- unlist( lapply( lapply( performance_nnet_list , "[[" , "overall" ), "[[", "Accuracy") ) #lets try to plot the accuracy for the 25 models plot( accuracy_nnet, #vector of accuracies ylim=c(min(accuracy_nnet)-0.01,1), #range of y-axis xlab="Model", #label of x-axis ylab="Accuracy", #label of y-axis type="l", #line plot instead of points xaxt="n" #remove x tick labels we can add our own ) #add our own x tick labels axis(1, #x axis, 2 is y axis at=1:25, #positions 1 to 25 labels=1:25 #label them with 1 to 25 only ) #model(s) with highest accuracy which(accuracy_nnet==max(accuracy_nnet))
Image in a Jupyter notebook

Part 4d: Neural Network | Sensitivity Analysis

#select model that you would like to do sensitivity analysis on model_number <- 18 #calculate the relative importance of the inputs in model using Garson's method rel_imp_nnet <- garson(#Garson's method of calculating importance model_nnet_list[[model_number]], bar_plot = FALSE ) # set colour of input nodes based on their relative importance cols <- colorRampPalette(brewer.pal(9,"Oranges"))(30)[rank(rel_imp_nnet)] #widen the plots options(repr.plot.width=10,repr.plot.height=6) # plot network plotnet( model_nnet_list[[model_number]], cex_val = 0.5, circle_cex = 3, circle_col = list(cols, 'lightblue'), max_sp = TRUE ); # Take a look at the relative importance values for each input variable, the variables have been sorted from highest to lowest relative importance. rel_imp_nnet$variable <- row.names(rel_imp_nnet); rel_imp_sorted <- rel_imp_nnet[order(rel_imp_nnet$rel_imp, decreasing = TRUE),] rel_imp_sorted # Lets profile the the top 4 variables with highest relative importance values as well as the variable with the lowest variable importance value. That is, let us see the relationship of each variable on the response. #choose variables to analyse plot_variables <- rel_imp_sorted$variable[c(1:4, 28:30)] # profile each variable while holding all others at quantiles ranging from 0 to 1 lekprofile( model_nnet_list[[model_number]], xsel = plot_variables, group_vals = seq(0, 1, by = 0.25), grp_nms = seq(0, 1, by = 0.25) ) # Since covariance among the predictors can create unlikely scenarios if one holds all the other variables at the same value, one can use kmeans clustering to find natural clusters of variables and hold each variable in the cluster at a constant value equal to the mean of the cluster #profile each variable while holding all others at the means of each cluster lekprofile( model_nnet_list[[model_number]], xsel = plot_variables, group_vals = 4 )
Image in a Jupyter notebookImage in a Jupyter notebookImage in a Jupyter notebook