Path: blob/master/Part 7 - Natural Language Processing/natural_language_processing.R
1002 views
# Natural Language Processing12# Importing the dataset3dataset_original = read.delim('Restaurant_Reviews.tsv', quote = '', stringsAsFactors = FALSE)45# Cleaning the texts6# install.packages('tm')7# install.packages('SnowballC')8library(tm)9library(SnowballC)10corpus = VCorpus(VectorSource(dataset_original$Review))11corpus = tm_map(corpus, content_transformer(tolower))12corpus = tm_map(corpus, removeNumbers)13corpus = tm_map(corpus, removePunctuation)14corpus = tm_map(corpus, removeWords, stopwords())15corpus = tm_map(corpus, stemDocument)16corpus = tm_map(corpus, stripWhitespace)1718# Creating the Bag of Words model19dtm = DocumentTermMatrix(corpus)20dtm = removeSparseTerms(dtm, 0.999)21dataset = as.data.frame(as.matrix(dtm))22dataset$Liked = dataset_original$Liked2324# Importing the dataset25dataset = read.csv('Social_Network_Ads.csv')26dataset = dataset[3:5]2728# Encoding the target feature as factor29dataset$Liked = factor(dataset$Liked, levels = c(0, 1))3031# Splitting the dataset into the Training set and Test set32# install.packages('caTools')33library(caTools)34set.seed(123)35split = sample.split(dataset$Liked, SplitRatio = 0.8)36training_set = subset(dataset, split == TRUE)37test_set = subset(dataset, split == FALSE)3839# Fitting Random Forest Classification to the Training set40# install.packages('randomForest')41library(randomForest)42classifier = randomForest(x = training_set[-692],43y = training_set$Liked,44ntree = 10)4546# Predicting the Test set results47y_pred = predict(classifier, newdata = test_set[-692])4849# Making the Confusion Matrix50cm = table(test_set[, 692], y_pred)5152