Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
afnan47
GitHub Repository: afnan47/sem7
Path: blob/main/ML/2. Email Spam Classification/Email Spam Classification.ipynb
423 views
Kernel: Python 3.8.6 64-bit
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv) from sklearn.model_selection import train_test_split from sklearn.svm import SVC from sklearn.metrics import accuracy_score from sklearn.neighbors import KNeighborsClassifier
df = pd.read_csv("./emails.csv")
df.head()
df.isnull().sum()
Email No. 0 the 0 to 0 ect 0 and 0 .. military 0 allowing 0 ff 0 dry 0 Prediction 0 Length: 3002, dtype: int64
X = df.iloc[:,1:3001] X
Y = df.iloc[:,-1].values Y
array([0, 0, 0, ..., 1, 1, 0], dtype=int64)
train_x,test_x,train_y,test_y = train_test_split(X,Y,test_size = 0.25)
svc = SVC(C=1.0,kernel='rbf',gamma='auto') # C here is the regularization parameter. Here, L2 penalty is used(default). It is the inverse of the strength of regularization. # As C increases, model overfits. # Kernel here is the radial basis function kernel. # gamma (only used for rbf kernel) : As gamma increases, model overfits. svc.fit(train_x,train_y) y_pred2 = svc.predict(test_x) print("Accuracy Score for SVC : ", accuracy_score(y_pred2,test_y))
Accuracy Score for SVC : 0.8979118329466357
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.2, random_state=42)
knn = KNeighborsClassifier(n_neighbors=7)
knn.fit(X_train, y_train)
print(knn.predict(X_test))
[0 0 1 ... 0 1 0]
print(knn.score(X_test, y_test))
0.8685990338164251