CoCalc -- MNIST_dataset.ipynb~1

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.

"Guiding Future STEM Leaders through Innovative Research Training" ~ thinkingbeyond.education

Path: ThinkingBeyond Activities / BeyondAI-2024-Mentee-Projects / palak-sumayah / MNIST_dataset.ipynb~1

Views: ¹⁰⁹⁶
Image: ubuntu2204

{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "giMJQwZTlemC"
      },
      "outputs": [],
      "source": [
        "import numpy as np\n",
        "import pandas as pd\n",
        "from sklearn.datasets import fetch_openml\n",
        "from sklearn.model_selection import train_test_split\n",
        "from sklearn.preprocessing import StandardScaler\n",
        "from sklearn.linear_model import LogisticRegression\n",
        "from sklearn.svm import SVC\n",
        "from sklearn.tree import DecisionTreeClassifier\n",
        "from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier\n",
        "from sklearn.naive_bayes import GaussianNB\n",
        "from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score\n",
        "import time\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Aq3GeEZClnE_",
        "outputId": "51327237-d0c6-4697-ad7c-bbfe4ec15607"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Fetching MNIST dataset...\n"
          ]
        }
      ],
      "source": [
        "print(\"Fetching MNIST dataset...\")\n",
        "mnist = fetch_openml('mnist_784', version=1)\n",
        "X, y = mnist.data, mnist.target\n",
        "y = y.astype(int)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "_FPvJAUwltZV"
      },
      "outputs": [],
      "source": [
        "scaler = StandardScaler()\n",
        "X_scaled = scaler.fit_transform(X)\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "BueCqRGDlxoF"
      },
      "outputs": [],
      "source": [
        "X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "OruThisOl8Tv"
      },
      "outputs": [],
      "source": [
        "def evaluate_classifier(name, clf):\n",
        "    print(f\"### {name} ###\")\n",
        "    start_time = time.time()\n",
        "    clf.fit(X_train, y_train)\n",
        "    train_time = time.time() - start_time\n",
        "    y_pred = clf.predict(X_test)\n",
        "\n",
        "    # Calculate metrics\n",
        "    acc = accuracy_score(y_test, y_pred)\n",
        "    f1 = f1_score(y_test, y_pred, average='weighted')\n",
        "    recall = recall_score(y_test, y_pred, average='weighted')\n",
        "    precision = precision_score(y_test, y_pred, average='weighted')\n",
        "\n",
        "    print(f\"Accuracy: {acc:.2f}\")\n",
        "    print(f\"F1 Score: {f1:.2f}\")\n",
        "    print(f\"Recall: {recall:.2f}\")\n",
        "    print(f\"Precision: {precision:.2f}\")\n",
        "    print(f\"Training Time: {train_time:.4f} seconds\\n\")\n",
        "\n",
        "    # Return results as a dictionary\n",
        "    return {\n",
        "        \"Classifier\": name,\n",
        "        \"Accuracy\": acc,\n",
        "        \"F1 Score\": f1,\n",
        "        \"Recall\": recall,\n",
        "        \"Precision\": precision,\n",
        "        \"Training Time (s)\": train_time\n",
        "    }\n",
        "\n",
        "# List of classifiers\n",
        "classifiers = [\n",
        "    (\"Logistic Regression\", LogisticRegression(max_iter=1000)),\n",
        "    (\"SVM with RBF Kernel\", SVC(kernel=\"rbf\", probability=True)),\n",
        "    (\"Decision Tree\", DecisionTreeClassifier()),\n",
        "    (\"Random Forest\", RandomForestClassifier()),\n",
        "    (\"Gradient Boosting\", GradientBoostingClassifier()),\n",
        "    (\"Naive Bayes\", GaussianNB())\n",
        "]"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "background_save": true,
          "base_uri": "https://localhost:8080/"
        },
        "id": "aeOh_3fgl99y",
        "outputId": "fc0b74e3-474c-46db-d6ee-267f140fb2c4"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "### Logistic Regression ###\n",
            "Accuracy: 0.92\n",
            "F1 Score: 0.92\n",
            "Recall: 0.92\n",
            "Precision: 0.92\n",
            "Training Time: 56.5794 seconds\n",
            "\n",
            "### SVM with RBF Kernel ###\n",
            "Accuracy: 0.96\n",
            "F1 Score: 0.96\n",
            "Recall: 0.96\n",
            "Precision: 0.96\n",
            "Training Time: 2258.5055 seconds\n",
            "\n",
            "### Decision Tree ###\n",
            "Accuracy: 0.87\n",
            "F1 Score: 0.87\n",
            "Recall: 0.87\n",
            "Precision: 0.87\n",
            "Training Time: 24.0900 seconds\n",
            "\n",
            "### Random Forest ###\n",
            "Accuracy: 0.97\n",
            "F1 Score: 0.97\n",
            "Recall: 0.97\n",
            "Precision: 0.97\n",
            "Training Time: 49.2545 seconds\n",
            "\n",
            "### Gradient Boosting ###\n"
          ]
        }
      ],
      "source": [
        "results = []\n",
        "for name, clf in classifiers:\n",
        "    results.append(evaluate_classifier(name, clf))\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "w8zuWe1OmECV"
      },
      "outputs": [],
      "source": [
        "df_results = pd.DataFrame(results)\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "SP5qPX_uOJ1T"
      },
      "outputs": [],
      "source": [
        "print(\"\\n### Comparison Table ###\\n\")\n",
        "print(df_results)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "LF00VeHKOUVZ"
      },
      "outputs": [],
      "source": [
        "from IPython.display import display\n",
        "display(df_results)"
      ]
    }
  ],
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.

Product

Resources

Company

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more, all in one place. Commercial Alternative to JupyterHub.

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.