Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.
Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.
| Download
"Guiding Future STEM Leaders through Innovative Research Training" ~ thinkingbeyond.education
Project: stephanie's main branch
Path: ThinkingBeyond Activities / BeyondAI-2024-Mentee-Projects / palak-sumayah / MNIST_dataset.ipynb~1
Views: 1096Image: ubuntu2204
{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "id": "giMJQwZTlemC" }, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "from sklearn.datasets import fetch_openml\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.svm import SVC\n", "from sklearn.tree import DecisionTreeClassifier\n", "from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier\n", "from sklearn.naive_bayes import GaussianNB\n", "from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score\n", "import time\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Aq3GeEZClnE_", "outputId": "51327237-d0c6-4697-ad7c-bbfe4ec15607" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Fetching MNIST dataset...\n" ] } ], "source": [ "print(\"Fetching MNIST dataset...\")\n", "mnist = fetch_openml('mnist_784', version=1)\n", "X, y = mnist.data, mnist.target\n", "y = y.astype(int)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "_FPvJAUwltZV" }, "outputs": [], "source": [ "scaler = StandardScaler()\n", "X_scaled = scaler.fit_transform(X)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "BueCqRGDlxoF" }, "outputs": [], "source": [ "X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "OruThisOl8Tv" }, "outputs": [], "source": [ "def evaluate_classifier(name, clf):\n", " print(f\"### {name} ###\")\n", " start_time = time.time()\n", " clf.fit(X_train, y_train)\n", " train_time = time.time() - start_time\n", " y_pred = clf.predict(X_test)\n", "\n", " # Calculate metrics\n", " acc = accuracy_score(y_test, y_pred)\n", " f1 = f1_score(y_test, y_pred, average='weighted')\n", " recall = recall_score(y_test, y_pred, average='weighted')\n", " precision = precision_score(y_test, y_pred, average='weighted')\n", "\n", " print(f\"Accuracy: {acc:.2f}\")\n", " print(f\"F1 Score: {f1:.2f}\")\n", " print(f\"Recall: {recall:.2f}\")\n", " print(f\"Precision: {precision:.2f}\")\n", " print(f\"Training Time: {train_time:.4f} seconds\\n\")\n", "\n", " # Return results as a dictionary\n", " return {\n", " \"Classifier\": name,\n", " \"Accuracy\": acc,\n", " \"F1 Score\": f1,\n", " \"Recall\": recall,\n", " \"Precision\": precision,\n", " \"Training Time (s)\": train_time\n", " }\n", "\n", "# List of classifiers\n", "classifiers = [\n", " (\"Logistic Regression\", LogisticRegression(max_iter=1000)),\n", " (\"SVM with RBF Kernel\", SVC(kernel=\"rbf\", probability=True)),\n", " (\"Decision Tree\", DecisionTreeClassifier()),\n", " (\"Random Forest\", RandomForestClassifier()),\n", " (\"Gradient Boosting\", GradientBoostingClassifier()),\n", " (\"Naive Bayes\", GaussianNB())\n", "]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "background_save": true, "base_uri": "https://localhost:8080/" }, "id": "aeOh_3fgl99y", "outputId": "fc0b74e3-474c-46db-d6ee-267f140fb2c4" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "### Logistic Regression ###\n", "Accuracy: 0.92\n", "F1 Score: 0.92\n", "Recall: 0.92\n", "Precision: 0.92\n", "Training Time: 56.5794 seconds\n", "\n", "### SVM with RBF Kernel ###\n", "Accuracy: 0.96\n", "F1 Score: 0.96\n", "Recall: 0.96\n", "Precision: 0.96\n", "Training Time: 2258.5055 seconds\n", "\n", "### Decision Tree ###\n", "Accuracy: 0.87\n", "F1 Score: 0.87\n", "Recall: 0.87\n", "Precision: 0.87\n", "Training Time: 24.0900 seconds\n", "\n", "### Random Forest ###\n", "Accuracy: 0.97\n", "F1 Score: 0.97\n", "Recall: 0.97\n", "Precision: 0.97\n", "Training Time: 49.2545 seconds\n", "\n", "### Gradient Boosting ###\n" ] } ], "source": [ "results = []\n", "for name, clf in classifiers:\n", " results.append(evaluate_classifier(name, clf))\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "w8zuWe1OmECV" }, "outputs": [], "source": [ "df_results = pd.DataFrame(results)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "SP5qPX_uOJ1T" }, "outputs": [], "source": [ "print(\"\\n### Comparison Table ###\\n\")\n", "print(df_results)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "LF00VeHKOUVZ" }, "outputs": [], "source": [ "from IPython.display import display\n", "display(df_results)" ] } ], "metadata": { "colab": { "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "name": "python" } }, "nbformat": 4, "nbformat_minor": 0 }