CoCalc -- Moon_Dataset_(High

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.

"Guiding Future STEM Leaders through Innovative Research Training" ~ thinkingbeyond.education

Path: ThinkingBeyond Activities / BeyondAI-2024-Mentee-Projects / palak-sumayah / Moon_Dataset_(High_Noise).ipynb

Views: ¹¹⁸⁶
Image: ubuntu2204

Kernel: Python 3

In [6]:

from sklearn.datasets import make_moons
import matplotlib.pyplot as plt

# Generate dataset
X, y = make_moons(n_samples=1000, noise=0.5, random_state=42)

plt.scatter(X[:, 0], X[:, 1], c=y, cmap='coolwarm', edgecolor='k', s=20)
plt.title("Moon Dataset (High Noise)")
plt.show()

Out[6]:

In [7]:

import time
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt
import numpy as np

# Step 1: Generate the dataset with more noise
X, y = make_moons(n_samples=1000, noise=0.5, random_state=42)

# Step 2: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 3: Create the Logistic Regression model
model = LogisticRegression()

# Track runtime
start_time = time.time()

# Step 4: Train the model
model.fit(X_train, y_train)

# End runtime
end_time = time.time()
runtime = end_time - start_time

# Step 5: Make predictions and evaluate the model
y_pred = model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)

# Print classification metrics
print(f"Accuracy on the test set: {accuracy:.2f}")
print("Classification Metrics:")
print(classification_report(y_test, y_pred))
print(f"Runtime for training and evaluation: {runtime:.4f} seconds")

#  Visualize the decision boundary
xx, yy = np.meshgrid(np.linspace(X[:, 0].min() - 1, X[:, 0].max() + 1, 100),
                     np.linspace(X[:, 1].min() - 1, X[:, 1].max() + 1, 100))
Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)


plt.scatter(X[:, 0], X[:, 1], c=y, cmap='coolwarm', edgecolor='k', s=20)
plt.contour(xx, yy, Z, levels=[0.5], linewidths=2, colors='black')
plt.title("Logistic Regression Decision Boundary")
plt.show()

Out[7]:

Accuracy on the test set: 0.81
Classification Metrics:
              precision    recall  f1-score   support

           0       0.84      0.79      0.81       156
           1       0.78      0.83      0.81       144

    accuracy                           0.81       300
   macro avg       0.81      0.81      0.81       300
weighted avg       0.81      0.81      0.81       300

Runtime for training and evaluation: 0.0076 seconds

In [ ]:

import time
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Perceptron  # Change from LogisticRegression to Perceptron
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt
import numpy as np

# Step 1: Generate the dataset with more noise
X, y = make_moons(n_samples=1000, noise=0.5, random_state=42)

# Step 2: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 3: Create the Perceptron model
model = Perceptron()

# Track runtime
start_time = time.time()

# Step 4: Train the model
model.fit(X_train, y_train)

# End runtime
end_time = time.time()
runtime = end_time - start_time

# Step 5: Make predictions and evaluate the model
y_pred = model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)

# Print classification metrics
print(f"Accuracy on the test set: {accuracy:.2f}")
print("Classification Metrics:")
print(classification_report(y_test, y_pred))
print(f"Runtime for training and evaluation: {runtime:.4f} seconds")

# Visualize the decision boundary
xx, yy = np.meshgrid(np.linspace(X[:, 0].min() - 1, X[:, 0].max() + 1, 100),
                     np.linspace(X[:, 1].min() - 1, X[:, 1].max() + 1, 100))
Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

# Plot the dataset
plt.scatter(X[:, 0], X[:, 1], c=y, cmap='coolwarm', edgecolor='k', s=20)
plt.contour(xx, yy, Z, levels=[0.5], linewidths=2, colors='black')
plt.title("Perceptron Decision Boundary (without background color)")
plt.show()

Accuracy on the test set: 0.76
Classification Metrics:
              precision    recall  f1-score   support

           0       0.83      0.67      0.74       156
           1       0.71      0.85      0.77       144

    accuracy                           0.76       300
   macro avg       0.77      0.76      0.76       300
weighted avg       0.77      0.76      0.76       300

Runtime for training and evaluation: 0.0045 seconds

In [ ]:

import time
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC  # Support Vector Classifier
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt
import numpy as np

# Step 1: Generate the dataset with more noise
X, y = make_moons(n_samples=1000, noise=0.5, random_state=42)

# Step 2: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 3: Create the SVM model with RBF kernel
model = SVC(kernel='rbf', gamma='scale')  # 'gamma=scale' is a good default for non-linear data

# Track runtime
start_time = time.time()

# Step 4: Train the model
model.fit(X_train, y_train)

# End runtime
end_time = time.time()
runtime = end_time - start_time

# Step 5: Make predictions and evaluate the model
y_pred = model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)

# Print classification metrics
print(f"Accuracy on the test set: {accuracy:.2f}")
print("Classification Metrics:")
print(classification_report(y_test, y_pred))
print(f"Runtime for training and evaluation: {runtime:.4f} seconds")

#  Visualize the decision boundary
xx, yy = np.meshgrid(np.linspace(X[:, 0].min() - 1, X[:, 0].max() + 1, 100),
                     np.linspace(X[:, 1].min() - 1, X[:, 1].max() + 1, 100))
Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

# Plot the dataset
plt.scatter(X[:, 0], X[:, 1], c=y, cmap='coolwarm', edgecolor='k', s=20)
plt.contour(xx, yy, Z, levels=[0.5], linewidths=4, colors='red')
plt.title("SVM with RBF Kernel Decision Boundary")
plt.show()

Accuracy on the test set: 0.82
Classification Metrics:
              precision    recall  f1-score   support

           0       0.82      0.83      0.82       156
           1       0.81      0.81      0.81       144

    accuracy                           0.82       300
   macro avg       0.82      0.82      0.82       300
weighted avg       0.82      0.82      0.82       300

Runtime for training and evaluation: 0.0216 seconds

In [5]:

import time
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier  # Importing Random Forest Classifier
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt
import numpy as np

# Step 1: Generate the dataset with more noise
X, y = make_moons(n_samples=1000, noise=0.5, random_state=42)

# Step 2: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 3: Create the Random Forest model
model = RandomForestClassifier(n_estimators=100, random_state=42)  # 100 trees in the forest

# Track runtime
start_time = time.time()

# Step 4: Train the model
model.fit(X_train, y_train)

# End runtime
end_time = time.time()
runtime = end_time - start_time

# Step 5: Make predictions and evaluate the model
y_pred = model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)

# Print classification metrics
print(f"Accuracy on the test set: {accuracy:.2f}")
print("Classification Metrics:")
print(classification_report(y_test, y_pred))
print(f"Runtime for training and evaluation: {runtime:.4f} seconds")

#  Visualize the decision boundary
xx, yy = np.meshgrid(np.linspace(X[:, 0].min() - 1, X[:, 0].max() + 1, 100),
                     np.linspace(X[:, 1].min() - 1, X[:, 1].max() + 1, 100))
Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

# Plot the dataset
plt.scatter(X[:, 0], X[:, 1], c=y, cmap='coolwarm', edgecolor='k', s=20)
plt.contour(xx, yy, Z, levels=[0.5], linewidths=2, colors='red')
plt.title("Random Forest Classifier Decision Boundary")
plt.show()

Out[5]:

Accuracy on the test set: 0.78
Classification Metrics:
              precision    recall  f1-score   support

           0       0.78      0.80      0.79       156
           1       0.78      0.76      0.77       144

    accuracy                           0.78       300
   macro avg       0.78      0.78      0.78       300
weighted avg       0.78      0.78      0.78       300

Runtime for training and evaluation: 0.2201 seconds

In [4]:

import time
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier  # Importing MLP Classifier
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt
import numpy as np

# Step 1: Generate the dataset with more noise
X, y = make_moons(n_samples=1000, noise=0.5, random_state=42)

# Step 2: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 3: Create the MLP model
model = MLPClassifier(hidden_layer_sizes=(100,50), max_iter=1000, random_state=42)  # 100 units in the hidden layer

# Track runtime
start_time = time.time()

# Step 4: Train the model
model.fit(X_train, y_train)

# End runtime
end_time = time.time()
runtime = end_time - start_time

# Step 5: Make predictions and evaluate the model
y_pred = model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)

# Print classification metrics
print(f"Accuracy on the test set: {accuracy:.2f}")
print("Classification Metrics:")
print(classification_report(y_test, y_pred))
print(f"Runtime for training and evaluation: {runtime:.4f} seconds")

# Visualize the decision boundary
xx, yy = np.meshgrid(np.linspace(X[:, 0].min() - 1, X[:, 0].max() + 1, 100),
                     np.linspace(X[:, 1].min() - 1, X[:, 1].max() + 1, 100))
Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

# Plot the dataset
plt.scatter(X[:, 0], X[:, 1], c=y, cmap='coolwarm', edgecolor='k', s=20)
plt.contour(xx, yy, Z, levels=[0.5], linewidths=3, colors='red')
plt.title("MLP Classifier Decision Boundary")
plt.show()

Out[4]:

Accuracy on the test set: 0.82
Classification Metrics:
              precision    recall  f1-score   support

           0       0.82      0.83      0.83       156
           1       0.82      0.81      0.81       144

    accuracy                           0.82       300
   macro avg       0.82      0.82      0.82       300
weighted avg       0.82      0.82      0.82       300

Runtime for training and evaluation: 4.8854 seconds

In [3]:

import time
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB  # Importing Naive Bayes Classifier
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt
import numpy as np

# Step 1: Generate the dataset with more noise
X, y = make_moons(n_samples=1000, noise=0.5, random_state=42)

# Step 2: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 3: Create the Naive Bayes model (GaussianNB)
model = GaussianNB()

# Track runtime
start_time = time.time()

# Step 4: Train the model
model.fit(X_train, y_train)

# End runtime
end_time = time.time()
runtime = end_time - start_time

# Step 5: Make predictions and evaluate the model
y_pred = model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)

# Print classification metrics
print(f"Accuracy on the test set: {accuracy:.2f}")
print("Classification Metrics:")
print(classification_report(y_test, y_pred))
print(f"Runtime for training and evaluation: {runtime:.4f} seconds")

# Visualize the decision boundary
xx, yy = np.meshgrid(np.linspace(X[:, 0].min() - 1, X[:, 0].max() + 1, 100),
                     np.linspace(X[:, 1].min() - 1, X[:, 1].max() + 1, 100))
Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

# Plot the dataset
plt.scatter(X[:, 0], X[:, 1], c=y, cmap='coolwarm', edgecolor='k', s=20)
plt.contour(xx, yy, Z, levels=[0.5], linewidths=4, colors='red')
plt.title("Naive Bayes Classifier Decision Boundary")
plt.show()

Out[3]:

Accuracy on the test set: 0.81
Classification Metrics:
              precision    recall  f1-score   support

           0       0.84      0.79      0.81       156
           1       0.79      0.84      0.81       144

    accuracy                           0.81       300
   macro avg       0.81      0.81      0.81       300
weighted avg       0.82      0.81      0.81       300

Runtime for training and evaluation: 0.0017 seconds

In [2]:

import time
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier  # Importing Gradient Boosting Classifier
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt
import numpy as np

# Step 1: Generate the dataset with more noise
X, y = make_moons(n_samples=1000, noise=0.5, random_state=42)

# Step 2: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 3: Create the Gradient Boosting model
model = GradientBoostingClassifier(random_state=42)

# Track runtime
start_time = time.time()

# Step 4: Train the model
model.fit(X_train, y_train)

# End runtime
end_time = time.time()
runtime = end_time - start_time

# Step 5: Make predictions and evaluate the model
y_pred = model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)

# Print classification metrics
print(f"Accuracy on the test set: {accuracy:.2f}")
print("Classification Metrics:")
print(classification_report(y_test, y_pred))
print(f"Runtime for training and evaluation: {runtime:.4f} seconds")

# Visualize the decision boundary
xx, yy = np.meshgrid(np.linspace(X[:, 0].min() - 1, X[:, 0].max() + 1, 100),
                     np.linspace(X[:, 1].min() - 1, X[:, 1].max() + 1, 100))
Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)


plt.scatter(X[:, 0], X[:, 1], c=y, cmap='coolwarm', edgecolor='k', s=20)
plt.contour(xx, yy, Z, levels=[0.5], linewidths=4, colors='red')
plt.title("Gradient Boosting Classifier Decision Boundary")
plt.show()

Out[2]:

Accuracy on the test set: 0.79
Classification Metrics:
              precision    recall  f1-score   support

           0       0.80      0.81      0.80       156
           1       0.79      0.78      0.78       144

    accuracy                           0.79       300
   macro avg       0.79      0.79      0.79       300
weighted avg       0.79      0.79      0.79       300

Runtime for training and evaluation: 0.2142 seconds

In [1]:

import time
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier  # Importing Decision Tree Classifier
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt
import numpy as np

# Step 1: Generate the dataset with more noise
X, y = make_moons(n_samples=1000, noise=0.5, random_state=42)

# Step 2: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 3: Create the Decision Tree model
model = DecisionTreeClassifier(random_state=42)

# Track runtime
start_time = time.time()

# Step 4: Train the model
model.fit(X_train, y_train)

# End runtime
end_time = time.time()
runtime = end_time - start_time

# Step 5: Make predictions and evaluate the model
y_pred = model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)

# Print classification metrics
print(f"Accuracy on the test set: {accuracy:.2f}")
print("Classification Metrics:")
print(classification_report(y_test, y_pred))
print(f"Runtime for training and evaluation: {runtime:.4f} seconds")

#  Visualize the decision boundary
xx, yy = np.meshgrid(np.linspace(X[:, 0].min() - 1, X[:, 0].max() + 1, 100),
                     np.linspace(X[:, 1].min() - 1, X[:, 1].max() + 1, 100))
Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

# Plot the dataset
plt.scatter(X[:, 0], X[:, 1], c=y, cmap='coolwarm', edgecolor='k', s=20)
plt.contour(xx, yy, Z, levels=[0.5], linewidths=4, colors='red')
plt.title("Decision Tree Classifier Decision Boundary")
plt.show()

Out[1]:

Accuracy on the test set: 0.73
Classification Metrics:
              precision    recall  f1-score   support

           0       0.76      0.71      0.73       156
           1       0.71      0.75      0.73       144

    accuracy                           0.73       300
   macro avg       0.73      0.73      0.73       300
weighted avg       0.73      0.73      0.73       300

Runtime for training and evaluation: 0.0044 seconds

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.

Product

Resources

Company

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more, all in one place. Commercial Alternative to JupyterHub.

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.