Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.
Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.
| Download
"Guiding Future STEM Leaders through Innovative Research Training" ~ thinkingbeyond.education
Project: stephanie's main branch
Path: ThinkingBeyond Activities / BeyondAI-2024-Mentee-Projects / palak-sumayah / Moon_Dataset_(Low_noise).ipynb~2
Views: 1172Image: ubuntu2204
{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "id": "dEzTM2gSKrgl" }, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "from sklearn.datasets import make_classification\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.svm import SVC\n", "from sklearn.tree import DecisionTreeClassifier\n", "from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier\n", "from sklearn.naive_bayes import GaussianNB\n", "from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score\n", "import time\n" ] }, { "cell_type": "code", "source": [ "X, y = make_classification(\n", " n_samples=1000,\n", " n_features=10,\n", " n_informative=5,\n", " n_redundant=2,\n", " n_clusters_per_class=1,\n", " flip_y=0.1,\n", " random_state=42\n", ")" ], "metadata": { "id": "aZUHxORgKtlk" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "scaler = StandardScaler()\n", "X_scaled = scaler.fit_transform(X)\n" ], "metadata": { "id": "bznsNS_lK8Tg" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)" ], "metadata": { "id": "cfbLwgkyLBMy" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "def add_outliers(X, y, outlier_fraction=0.1):\n", " n_outliers = int(outlier_fraction * X.shape[0])\n", " random_state = np.random.RandomState(42)\n", " outliers = random_state.uniform(low=-10, high=10, size=(n_outliers, X.shape[1])) # Random noise\n", " outlier_labels = random_state.randint(0, 2, size=n_outliers) # Random binary labels\n", " X_with_outliers = np.vstack([X, outliers])\n", " y_with_outliers = np.hstack([y, outlier_labels])\n", " return X_with_outliers, y_with_outliers\n" ], "metadata": { "id": "RU2MDBXhLGQI" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "X_train_outliers, y_train_outliers = add_outliers(X_train, y_train)\n" ], "metadata": { "id": "9dpJ14J_LMdF" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "def evaluate_classifier(name, clf, X_train, y_train, X_test, y_test):\n", " print(f\"### {name} ###\")\n", " start_time = time.time()\n", " clf.fit(X_train, y_train)\n", " train_time = time.time() - start_time\n", " y_pred = clf.predict(X_test)\n", "\n", " # Calculate metrics\n", " acc = accuracy_score(y_test, y_pred)\n", " f1 = f1_score(y_test, y_pred)\n", " recall = recall_score(y_test, y_pred)\n", " precision = precision_score(y_test, y_pred)\n", "\n", " print(f\"Accuracy: {acc:.2f}\")\n", " print(f\"F1 Score: {f1:.2f}\")\n", " print(f\"Recall: {recall:.2f}\")\n", " print(f\"Precision: {precision:.2f}\")\n", " print(f\"Training Time: {train_time:.4f} seconds\\n\")\n", "\n", " return {\n", " \"Classifier\": name,\n", " \"Accuracy\": acc,\n", " \"F1 Score\": f1,\n", " \"Recall\": recall,\n", " \"Precision\": precision,\n", " \"Training Time (s)\": train_time\n", " }" ], "metadata": { "id": "3xQCPmTTLRDi" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "classifiers = [\n", " (\"Logistic Regression\", LogisticRegression(max_iter=1000)),\n", " (\"SVM with RBF Kernel\", SVC(kernel=\"rbf\", probability=True)),\n", " (\"Decision Tree\", DecisionTreeClassifier()),\n", " (\"Random Forest\", RandomForestClassifier()),\n", " (\"Gradient Boosting\", GradientBoostingClassifier()),\n", " (\"Naive Bayes\", GaussianNB())\n", "]\n" ], "metadata": { "id": "mqQea0GDLUOq" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "results_no_outliers = []\n", "results_with_outliers = []\n", "\n", "for name, clf in classifiers:\n", " print(f\"Evaluating {name} without outliers...\")\n", " results_no_outliers.append(evaluate_classifier(name, clf, X_train, y_train, X_test, y_test))\n", "\n", " print(f\"Evaluating {name} with outliers...\")\n", " results_with_outliers.append(evaluate_classifier(name, clf, X_train_outliers, y_train_outliers, X_test, y_test))" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "DrS1LC-0LXE4", "outputId": "5c5f4efa-e729-46cc-bbda-b4dae9727266" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Evaluating Logistic Regression without outliers...\n", "### Logistic Regression ###\n", "Accuracy: 0.94\n", "F1 Score: 0.94\n", "Recall: 0.93\n", "Precision: 0.96\n", "Training Time: 0.0078 seconds\n", "\n", "Evaluating Logistic Regression with outliers...\n", "### Logistic Regression ###\n", "Accuracy: 0.87\n", "F1 Score: 0.87\n", "Recall: 0.79\n", "Precision: 0.97\n", "Training Time: 0.0053 seconds\n", "\n", "Evaluating SVM with RBF Kernel without outliers...\n", "### SVM with RBF Kernel ###\n", "Accuracy: 0.95\n", "F1 Score: 0.96\n", "Recall: 0.95\n", "Precision: 0.96\n", "Training Time: 0.1233 seconds\n", "\n", "Evaluating SVM with RBF Kernel with outliers...\n", "### SVM with RBF Kernel ###\n", "Accuracy: 0.94\n", "F1 Score: 0.95\n", "Recall: 0.93\n", "Precision: 0.96\n", "Training Time: 0.1549 seconds\n", "\n", "Evaluating Decision Tree without outliers...\n", "### Decision Tree ###\n", "Accuracy: 0.83\n", "F1 Score: 0.83\n", "Recall: 0.78\n", "Precision: 0.90\n", "Training Time: 0.0156 seconds\n", "\n", "Evaluating Decision Tree with outliers...\n", "### Decision Tree ###\n", "Accuracy: 0.86\n", "F1 Score: 0.87\n", "Recall: 0.85\n", "Precision: 0.89\n", "Training Time: 0.0197 seconds\n", "\n", "Evaluating Random Forest without outliers...\n", "### Random Forest ###\n", "Accuracy: 0.95\n", "F1 Score: 0.95\n", "Recall: 0.94\n", "Precision: 0.96\n", "Training Time: 0.4334 seconds\n", "\n", "Evaluating Random Forest with outliers...\n", "### Random Forest ###\n", "Accuracy: 0.95\n", "F1 Score: 0.96\n", "Recall: 0.95\n", "Precision: 0.96\n", "Training Time: 0.3818 seconds\n", "\n", "Evaluating Gradient Boosting without outliers...\n", "### Gradient Boosting ###\n", "Accuracy: 0.94\n", "F1 Score: 0.95\n", "Recall: 0.93\n", "Precision: 0.97\n", "Training Time: 0.4897 seconds\n", "\n", "Evaluating Gradient Boosting with outliers...\n", "### Gradient Boosting ###\n", "Accuracy: 0.94\n", "F1 Score: 0.95\n", "Recall: 0.93\n", "Precision: 0.97\n", "Training Time: 0.5542 seconds\n", "\n", "Evaluating Naive Bayes without outliers...\n", "### Naive Bayes ###\n", "Accuracy: 0.89\n", "F1 Score: 0.89\n", "Recall: 0.81\n", "Precision: 0.98\n", "Training Time: 0.0036 seconds\n", "\n", "Evaluating Naive Bayes with outliers...\n", "### Naive Bayes ###\n", "Accuracy: 0.72\n", "F1 Score: 0.78\n", "Recall: 0.95\n", "Precision: 0.67\n", "Training Time: 0.0029 seconds\n", "\n" ] } ] }, { "cell_type": "code", "source": [ "df_no_outliers = pd.DataFrame(results_no_outliers)\n", "df_with_outliers = pd.DataFrame(results_with_outliers)" ], "metadata": { "id": "vFIaMoaELbJD" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "df_difference = df_with_outliers.copy()\n", "df_difference[[\"Accuracy\", \"F1 Score\", \"Recall\", \"Precision\"]] -= df_no_outliers[[\"Accuracy\", \"F1 Score\", \"Recall\", \"Precision\"]]\n", "df_difference[\"Classifier\"] = df_no_outliers[\"Classifier\"]\n", "df_difference.rename(columns={\"Accuracy\": \"Accuracy Change\", \"F1 Score\": \"F1 Score Change\",\n", " \"Recall\": \"Recall Change\", \"Precision\": \"Precision Change\"}, inplace=True)\n" ], "metadata": { "id": "ueI1JgpvMUpo" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "print(\"\\n### Metrics Without Outliers ###\\n\")\n", "print(df_no_outliers)\n", "\n", "print(\"\\n### Metrics With Outliers ###\\n\")\n", "print(df_with_outliers)\n", "\n", "print(\"\\n### Outlier Sensitivity (Difference in Metrics) ###\\n\")\n", "print(df_difference)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "cEppogmIMXk7", "outputId": "1ecbda1e-ac0e-4f94-b616-ae51bd5a35ef" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\n", "### Metrics Without Outliers ###\n", "\n", " Classifier Accuracy F1 Score Recall Precision \\\n", "0 Logistic Regression 0.940 0.942857 0.925234 0.961165 \n", "1 SVM with RBF Kernel 0.955 0.957746 0.953271 0.962264 \n", "2 Decision Tree 0.835 0.834171 0.775701 0.902174 \n", "3 Random Forest 0.950 0.952830 0.943925 0.961905 \n", "4 Gradient Boosting 0.945 0.947368 0.925234 0.970588 \n", "5 Naive Bayes 0.890 0.887755 0.813084 0.977528 \n", "\n", " Training Time (s) \n", "0 0.007818 \n", "1 0.123307 \n", "2 0.015565 \n", "3 0.433434 \n", "4 0.489675 \n", "5 0.003552 \n", "\n", "### Metrics With Outliers ###\n", "\n", " Classifier Accuracy F1 Score Recall Precision \\\n", "0 Logistic Regression 0.870 0.865979 0.785047 0.965517 \n", "1 SVM with RBF Kernel 0.945 0.947867 0.934579 0.961538 \n", "2 Decision Tree 0.865 0.870813 0.850467 0.892157 \n", "3 Random Forest 0.955 0.957746 0.953271 0.962264 \n", "4 Gradient Boosting 0.945 0.947368 0.925234 0.970588 \n", "5 Naive Bayes 0.720 0.784615 0.953271 0.666667 \n", "\n", " Training Time (s) \n", "0 0.005347 \n", "1 0.154885 \n", "2 0.019695 \n", "3 0.381819 \n", "4 0.554216 \n", "5 0.002920 \n", "\n", "### Outlier Sensitivity (Difference in Metrics) ###\n", "\n", " Classifier Accuracy Change F1 Score Change Recall Change \\\n", "0 Logistic Regression -0.070 -0.076878 -0.140187 \n", "1 SVM with RBF Kernel -0.010 -0.009879 -0.018692 \n", "2 Decision Tree 0.030 0.036643 0.074766 \n", "3 Random Forest 0.005 0.004916 0.009346 \n", "4 Gradient Boosting 0.000 0.000000 0.000000 \n", "5 Naive Bayes -0.170 -0.103140 0.140187 \n", "\n", " Precision Change Training Time (s) \n", "0 0.004352 0.005347 \n", "1 -0.000726 0.154885 \n", "2 -0.010017 0.019695 \n", "3 0.000359 0.381819 \n", "4 0.000000 0.554216 \n", "5 -0.310861 0.002920 \n" ] } ] }, { "cell_type": "code", "source": [ "from IPython.display import display\n", "print(\"\\n### Results in Colab-Friendly Format ###\")\n", "print(\"\\nMetrics Without Outliers:\")\n", "display(df_no_outliers)\n" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 307 }, "id": "7M9sQ6O-Mc-T", "outputId": "1704d60f-f08c-47ba-f0e3-438365b462c1" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\n", "### Results in Colab-Friendly Format ###\n", "\n", "Metrics Without Outliers:\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ " Classifier Accuracy F1 Score Recall Precision \\\n", "0 Logistic Regression 0.940 0.942857 0.925234 0.961165 \n", "1 SVM with RBF Kernel 0.955 0.957746 0.953271 0.962264 \n", "2 Decision Tree 0.835 0.834171 0.775701 0.902174 \n", "3 Random Forest 0.950 0.952830 0.943925 0.961905 \n", "4 Gradient Boosting 0.945 0.947368 0.925234 0.970588 \n", "5 Naive Bayes 0.890 0.887755 0.813084 0.977528 \n", "\n", " Training Time (s) \n", "0 0.007818 \n", "1 0.123307 \n", "2 0.015565 \n", "3 0.433434 \n", "4 0.489675 \n", "5 0.003552 " ], "text/html": [ "\n", " <div id=\"df-f6189f22-8eac-4d26-bea1-e0fa4450fd2c\" class=\"colab-df-container\">\n", " <div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>Classifier</th>\n", " <th>Accuracy</th>\n", " <th>F1 Score</th>\n", " <th>Recall</th>\n", " <th>Precision</th>\n", " <th>Training Time (s)</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>Logistic Regression</td>\n", " <td>0.940</td>\n", " <td>0.942857</td>\n", " <td>0.925234</td>\n", " <td>0.961165</td>\n", " <td>0.007818</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>SVM with RBF Kernel</td>\n", " <td>0.955</td>\n", " <td>0.957746</td>\n", " <td>0.953271</td>\n", " <td>0.962264</td>\n", " <td>0.123307</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>Decision Tree</td>\n", " <td>0.835</td>\n", " <td>0.834171</td>\n", " <td>0.775701</td>\n", " <td>0.902174</td>\n", " <td>0.015565</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>Random Forest</td>\n", " <td>0.950</td>\n", " <td>0.952830</td>\n", " <td>0.943925</td>\n", " <td>0.961905</td>\n", " <td>0.433434</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>Gradient Boosting</td>\n", " <td>0.945</td>\n", " <td>0.947368</td>\n", " <td>0.925234</td>\n", " <td>0.970588</td>\n", " <td>0.489675</td>\n", " </tr>\n", " <tr>\n", " <th>5</th>\n", " <td>Naive Bayes</td>\n", " <td>0.890</td>\n", " <td>0.887755</td>\n", " <td>0.813084</td>\n", " <td>0.977528</td>\n", " <td>0.003552</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>\n", " <div class=\"colab-df-buttons\">\n", "\n", " <div class=\"colab-df-container\">\n", " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-f6189f22-8eac-4d26-bea1-e0fa4450fd2c')\"\n", " title=\"Convert this dataframe to an interactive table.\"\n", " style=\"display:none;\">\n", "\n", " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n", " <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n", " </svg>\n", " </button>\n", "\n", " <style>\n", " .colab-df-container {\n", " display:flex;\n", " gap: 12px;\n", " }\n", "\n", " .colab-df-convert {\n", " background-color: #E8F0FE;\n", " border: none;\n", " border-radius: 50%;\n", " cursor: pointer;\n", " display: none;\n", " fill: #1967D2;\n", " height: 32px;\n", " padding: 0 0 0 0;\n", " width: 32px;\n", " }\n", "\n", " .colab-df-convert:hover {\n", " background-color: #E2EBFA;\n", " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", " fill: #174EA6;\n", " }\n", "\n", " .colab-df-buttons div {\n", " margin-bottom: 4px;\n", " }\n", "\n", " [theme=dark] .colab-df-convert {\n", " background-color: #3B4455;\n", " fill: #D2E3FC;\n", " }\n", "\n", " [theme=dark] .colab-df-convert:hover {\n", " background-color: #434B5C;\n", " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", " fill: #FFFFFF;\n", " }\n", " </style>\n", "\n", " <script>\n", " const buttonEl =\n", " document.querySelector('#df-f6189f22-8eac-4d26-bea1-e0fa4450fd2c button.colab-df-convert');\n", " buttonEl.style.display =\n", " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", "\n", " async function convertToInteractive(key) {\n", " const element = document.querySelector('#df-f6189f22-8eac-4d26-bea1-e0fa4450fd2c');\n", " const dataTable =\n", " await google.colab.kernel.invokeFunction('convertToInteractive',\n", " [key], {});\n", " if (!dataTable) return;\n", "\n", " const docLinkHtml = 'Like what you see? Visit the ' +\n", " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n", " + ' to learn more about interactive tables.';\n", " element.innerHTML = '';\n", " dataTable['output_type'] = 'display_data';\n", " await google.colab.output.renderOutput(dataTable, element);\n", " const docLink = document.createElement('div');\n", " docLink.innerHTML = docLinkHtml;\n", " element.appendChild(docLink);\n", " }\n", " </script>\n", " </div>\n", "\n", "\n", "<div id=\"df-78c1e5d1-8de4-4197-8818-b918d851cb45\">\n", " <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-78c1e5d1-8de4-4197-8818-b918d851cb45')\"\n", " title=\"Suggest charts\"\n", " style=\"display:none;\">\n", "\n", "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", " width=\"24px\">\n", " <g>\n", " <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n", " </g>\n", "</svg>\n", " </button>\n", "\n", "<style>\n", " .colab-df-quickchart {\n", " --bg-color: #E8F0FE;\n", " --fill-color: #1967D2;\n", " --hover-bg-color: #E2EBFA;\n", " --hover-fill-color: #174EA6;\n", " --disabled-fill-color: #AAA;\n", " --disabled-bg-color: #DDD;\n", " }\n", "\n", " [theme=dark] .colab-df-quickchart {\n", " --bg-color: #3B4455;\n", " --fill-color: #D2E3FC;\n", " --hover-bg-color: #434B5C;\n", " --hover-fill-color: #FFFFFF;\n", " --disabled-bg-color: #3B4455;\n", " --disabled-fill-color: #666;\n", " }\n", "\n", " .colab-df-quickchart {\n", " background-color: var(--bg-color);\n", " border: none;\n", " border-radius: 50%;\n", " cursor: pointer;\n", " display: none;\n", " fill: var(--fill-color);\n", " height: 32px;\n", " padding: 0;\n", " width: 32px;\n", " }\n", "\n", " .colab-df-quickchart:hover {\n", " background-color: var(--hover-bg-color);\n", " box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n", " fill: var(--button-hover-fill-color);\n", " }\n", "\n", " .colab-df-quickchart-complete:disabled,\n", " .colab-df-quickchart-complete:disabled:hover {\n", " background-color: var(--disabled-bg-color);\n", " fill: var(--disabled-fill-color);\n", " box-shadow: none;\n", " }\n", "\n", " .colab-df-spinner {\n", " border: 2px solid var(--fill-color);\n", " border-color: transparent;\n", " border-bottom-color: var(--fill-color);\n", " animation:\n", " spin 1s steps(1) infinite;\n", " }\n", "\n", " @keyframes spin {\n", " 0% {\n", " border-color: transparent;\n", " border-bottom-color: var(--fill-color);\n", " border-left-color: var(--fill-color);\n", " }\n", " 20% {\n", " border-color: transparent;\n", " border-left-color: var(--fill-color);\n", " border-top-color: var(--fill-color);\n", " }\n", " 30% {\n", " border-color: transparent;\n", " border-left-color: var(--fill-color);\n", " border-top-color: var(--fill-color);\n", " border-right-color: var(--fill-color);\n", " }\n", " 40% {\n", " border-color: transparent;\n", " border-right-color: var(--fill-color);\n", " border-top-color: var(--fill-color);\n", " }\n", " 60% {\n", " border-color: transparent;\n", " border-right-color: var(--fill-color);\n", " }\n", " 80% {\n", " border-color: transparent;\n", " border-right-color: var(--fill-color);\n", " border-bottom-color: var(--fill-color);\n", " }\n", " 90% {\n", " border-color: transparent;\n", " border-bottom-color: var(--fill-color);\n", " }\n", " }\n", "</style>\n", "\n", " <script>\n", " async function quickchart(key) {\n", " const quickchartButtonEl =\n", " document.querySelector('#' + key + ' button');\n", " quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n", " quickchartButtonEl.classList.add('colab-df-spinner');\n", " try {\n", " const charts = await google.colab.kernel.invokeFunction(\n", " 'suggestCharts', [key], {});\n", " } catch (error) {\n", " console.error('Error during call to suggestCharts:', error);\n", " }\n", " quickchartButtonEl.classList.remove('colab-df-spinner');\n", " quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n", " }\n", " (() => {\n", " let quickchartButtonEl =\n", " document.querySelector('#df-78c1e5d1-8de4-4197-8818-b918d851cb45 button');\n", " quickchartButtonEl.style.display =\n", " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", " })();\n", " </script>\n", "</div>\n", "\n", " <div id=\"id_cc250be6-2266-45be-b5ed-d108aa4db259\">\n", " <style>\n", " .colab-df-generate {\n", " background-color: #E8F0FE;\n", " border: none;\n", " border-radius: 50%;\n", " cursor: pointer;\n", " display: none;\n", " fill: #1967D2;\n", " height: 32px;\n", " padding: 0 0 0 0;\n", " width: 32px;\n", " }\n", "\n", " .colab-df-generate:hover {\n", " background-color: #E2EBFA;\n", " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", " fill: #174EA6;\n", " }\n", "\n", " [theme=dark] .colab-df-generate {\n", " background-color: #3B4455;\n", " fill: #D2E3FC;\n", " }\n", "\n", " [theme=dark] .colab-df-generate:hover {\n", " background-color: #434B5C;\n", " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", " fill: #FFFFFF;\n", " }\n", " </style>\n", " <button class=\"colab-df-generate\" onclick=\"generateWithVariable('df_no_outliers')\"\n", " title=\"Generate code using this dataframe.\"\n", " style=\"display:none;\">\n", "\n", " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", " width=\"24px\">\n", " <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n", " </svg>\n", " </button>\n", " <script>\n", " (() => {\n", " const buttonEl =\n", " document.querySelector('#id_cc250be6-2266-45be-b5ed-d108aa4db259 button.colab-df-generate');\n", " buttonEl.style.display =\n", " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", "\n", " buttonEl.onclick = () => {\n", " google.colab.notebook.generateWithVariable('df_no_outliers');\n", " }\n", " })();\n", " </script>\n", " </div>\n", "\n", " </div>\n", " </div>\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "df_no_outliers", "summary": "{\n \"name\": \"df_no_outliers\",\n \"rows\": 6,\n \"fields\": [\n {\n \"column\": \"Classifier\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"Logistic Regression\",\n \"SVM with RBF Kernel\",\n \"Naive Bayes\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Accuracy\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.047478065110813705,\n \"min\": 0.835,\n \"max\": 0.955,\n \"num_unique_values\": 6,\n \"samples\": [\n 0.94,\n 0.955,\n 0.89\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"F1 Score\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.049354295873882084,\n \"min\": 0.8341708542713567,\n \"max\": 0.9577464788732394,\n \"num_unique_values\": 6,\n \"samples\": [\n 0.9428571428571428,\n 0.9577464788732394,\n 0.8877551020408163\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Recall\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.07532888072544487,\n \"min\": 0.7757009345794392,\n \"max\": 0.9532710280373832,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.9532710280373832,\n 0.8130841121495327,\n 0.7757009345794392\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Precision\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.027108238191573854,\n \"min\": 0.9021739130434783,\n \"max\": 0.9775280898876404,\n \"num_unique_values\": 6,\n \"samples\": [\n 0.9611650485436893,\n 0.9622641509433962,\n 0.9775280898876404\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Training Time (s)\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.2241223882246531,\n \"min\": 0.0035517215728759766,\n \"max\": 0.48967528343200684,\n \"num_unique_values\": 6,\n \"samples\": [\n 0.007817506790161133,\n 0.1233072280883789,\n 0.0035517215728759766\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {} } ] }, { "cell_type": "code", "source": [ "print(\"\\nMetrics With Outliers:\")\n", "display(df_with_outliers)\n" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 272 }, "id": "Gg4ObWGoMgQp", "outputId": "059d970b-9a9e-4c7a-861f-2280d2036c8b" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\n", "Metrics With Outliers:\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ " Classifier Accuracy F1 Score Recall Precision \\\n", "0 Logistic Regression 0.870 0.865979 0.785047 0.965517 \n", "1 SVM with RBF Kernel 0.945 0.947867 0.934579 0.961538 \n", "2 Decision Tree 0.865 0.870813 0.850467 0.892157 \n", "3 Random Forest 0.955 0.957746 0.953271 0.962264 \n", "4 Gradient Boosting 0.945 0.947368 0.925234 0.970588 \n", "5 Naive Bayes 0.720 0.784615 0.953271 0.666667 \n", "\n", " Training Time (s) \n", "0 0.005347 \n", "1 0.154885 \n", "2 0.019695 \n", "3 0.381819 \n", "4 0.554216 \n", "5 0.002920 " ], "text/html": [ "\n", " <div id=\"df-3ec0a532-a0d6-45b9-994d-bae238618ea1\" class=\"colab-df-container\">\n", " <div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>Classifier</th>\n", " <th>Accuracy</th>\n", " <th>F1 Score</th>\n", " <th>Recall</th>\n", " <th>Precision</th>\n", " <th>Training Time (s)</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>Logistic Regression</td>\n", " <td>0.870</td>\n", " <td>0.865979</td>\n", " <td>0.785047</td>\n", " <td>0.965517</td>\n", " <td>0.005347</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>SVM with RBF Kernel</td>\n", " <td>0.945</td>\n", " <td>0.947867</td>\n", " <td>0.934579</td>\n", " <td>0.961538</td>\n", " <td>0.154885</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>Decision Tree</td>\n", " <td>0.865</td>\n", " <td>0.870813</td>\n", " <td>0.850467</td>\n", " <td>0.892157</td>\n", " <td>0.019695</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>Random Forest</td>\n", " <td>0.955</td>\n", " <td>0.957746</td>\n", " <td>0.953271</td>\n", " <td>0.962264</td>\n", " <td>0.381819</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>Gradient Boosting</td>\n", " <td>0.945</td>\n", " <td>0.947368</td>\n", " <td>0.925234</td>\n", " <td>0.970588</td>\n", " <td>0.554216</td>\n", " </tr>\n", " <tr>\n", " <th>5</th>\n", " <td>Naive Bayes</td>\n", " <td>0.720</td>\n", " <td>0.784615</td>\n", " <td>0.953271</td>\n", " <td>0.666667</td>\n", " <td>0.002920</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>\n", " <div class=\"colab-df-buttons\">\n", "\n", " <div class=\"colab-df-container\">\n", " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-3ec0a532-a0d6-45b9-994d-bae238618ea1')\"\n", " title=\"Convert this dataframe to an interactive table.\"\n", " style=\"display:none;\">\n", "\n", " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n", " <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n", " </svg>\n", " </button>\n", "\n", " <style>\n", " .colab-df-container {\n", " display:flex;\n", " gap: 12px;\n", " }\n", "\n", " .colab-df-convert {\n", " background-color: #E8F0FE;\n", " border: none;\n", " border-radius: 50%;\n", " cursor: pointer;\n", " display: none;\n", " fill: #1967D2;\n", " height: 32px;\n", " padding: 0 0 0 0;\n", " width: 32px;\n", " }\n", "\n", " .colab-df-convert:hover {\n", " background-color: #E2EBFA;\n", " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", " fill: #174EA6;\n", " }\n", "\n", " .colab-df-buttons div {\n", " margin-bottom: 4px;\n", " }\n", "\n", " [theme=dark] .colab-df-convert {\n", " background-color: #3B4455;\n", " fill: #D2E3FC;\n", " }\n", "\n", " [theme=dark] .colab-df-convert:hover {\n", " background-color: #434B5C;\n", " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", " fill: #FFFFFF;\n", " }\n", " </style>\n", "\n", " <script>\n", " const buttonEl =\n", " document.querySelector('#df-3ec0a532-a0d6-45b9-994d-bae238618ea1 button.colab-df-convert');\n", " buttonEl.style.display =\n", " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", "\n", " async function convertToInteractive(key) {\n", " const element = document.querySelector('#df-3ec0a532-a0d6-45b9-994d-bae238618ea1');\n", " const dataTable =\n", " await google.colab.kernel.invokeFunction('convertToInteractive',\n", " [key], {});\n", " if (!dataTable) return;\n", "\n", " const docLinkHtml = 'Like what you see? Visit the ' +\n", " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n", " + ' to learn more about interactive tables.';\n", " element.innerHTML = '';\n", " dataTable['output_type'] = 'display_data';\n", " await google.colab.output.renderOutput(dataTable, element);\n", " const docLink = document.createElement('div');\n", " docLink.innerHTML = docLinkHtml;\n", " element.appendChild(docLink);\n", " }\n", " </script>\n", " </div>\n", "\n", "\n", "<div id=\"df-bd336c71-ed61-4b15-9e67-161faf00aaa5\">\n", " <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-bd336c71-ed61-4b15-9e67-161faf00aaa5')\"\n", " title=\"Suggest charts\"\n", " style=\"display:none;\">\n", "\n", "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", " width=\"24px\">\n", " <g>\n", " <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n", " </g>\n", "</svg>\n", " </button>\n", "\n", "<style>\n", " .colab-df-quickchart {\n", " --bg-color: #E8F0FE;\n", " --fill-color: #1967D2;\n", " --hover-bg-color: #E2EBFA;\n", " --hover-fill-color: #174EA6;\n", " --disabled-fill-color: #AAA;\n", " --disabled-bg-color: #DDD;\n", " }\n", "\n", " [theme=dark] .colab-df-quickchart {\n", " --bg-color: #3B4455;\n", " --fill-color: #D2E3FC;\n", " --hover-bg-color: #434B5C;\n", " --hover-fill-color: #FFFFFF;\n", " --disabled-bg-color: #3B4455;\n", " --disabled-fill-color: #666;\n", " }\n", "\n", " .colab-df-quickchart {\n", " background-color: var(--bg-color);\n", " border: none;\n", " border-radius: 50%;\n", " cursor: pointer;\n", " display: none;\n", " fill: var(--fill-color);\n", " height: 32px;\n", " padding: 0;\n", " width: 32px;\n", " }\n", "\n", " .colab-df-quickchart:hover {\n", " background-color: var(--hover-bg-color);\n", " box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n", " fill: var(--button-hover-fill-color);\n", " }\n", "\n", " .colab-df-quickchart-complete:disabled,\n", " .colab-df-quickchart-complete:disabled:hover {\n", " background-color: var(--disabled-bg-color);\n", " fill: var(--disabled-fill-color);\n", " box-shadow: none;\n", " }\n", "\n", " .colab-df-spinner {\n", " border: 2px solid var(--fill-color);\n", " border-color: transparent;\n", " border-bottom-color: var(--fill-color);\n", " animation:\n", " spin 1s steps(1) infinite;\n", " }\n", "\n", " @keyframes spin {\n", " 0% {\n", " border-color: transparent;\n", " border-bottom-color: var(--fill-color);\n", " border-left-color: var(--fill-color);\n", " }\n", " 20% {\n", " border-color: transparent;\n", " border-left-color: var(--fill-color);\n", " border-top-color: var(--fill-color);\n", " }\n", " 30% {\n", " border-color: transparent;\n", " border-left-color: var(--fill-color);\n", " border-top-color: var(--fill-color);\n", " border-right-color: var(--fill-color);\n", " }\n", " 40% {\n", " border-color: transparent;\n", " border-right-color: var(--fill-color);\n", " border-top-color: var(--fill-color);\n", " }\n", " 60% {\n", " border-color: transparent;\n", " border-right-color: var(--fill-color);\n", " }\n", " 80% {\n", " border-color: transparent;\n", " border-right-color: var(--fill-color);\n", " border-bottom-color: var(--fill-color);\n", " }\n", " 90% {\n", " border-color: transparent;\n", " border-bottom-color: var(--fill-color);\n", " }\n", " }\n", "</style>\n", "\n", " <script>\n", " async function quickchart(key) {\n", " const quickchartButtonEl =\n", " document.querySelector('#' + key + ' button');\n", " quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n", " quickchartButtonEl.classList.add('colab-df-spinner');\n", " try {\n", " const charts = await google.colab.kernel.invokeFunction(\n", " 'suggestCharts', [key], {});\n", " } catch (error) {\n", " console.error('Error during call to suggestCharts:', error);\n", " }\n", " quickchartButtonEl.classList.remove('colab-df-spinner');\n", " quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n", " }\n", " (() => {\n", " let quickchartButtonEl =\n", " document.querySelector('#df-bd336c71-ed61-4b15-9e67-161faf00aaa5 button');\n", " quickchartButtonEl.style.display =\n", " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", " })();\n", " </script>\n", "</div>\n", "\n", " <div id=\"id_7eac9cc1-f8d9-435e-83c0-a9bd3b668dec\">\n", " <style>\n", " .colab-df-generate {\n", " background-color: #E8F0FE;\n", " border: none;\n", " border-radius: 50%;\n", " cursor: pointer;\n", " display: none;\n", " fill: #1967D2;\n", " height: 32px;\n", " padding: 0 0 0 0;\n", " width: 32px;\n", " }\n", "\n", " .colab-df-generate:hover {\n", " background-color: #E2EBFA;\n", " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", " fill: #174EA6;\n", " }\n", "\n", " [theme=dark] .colab-df-generate {\n", " background-color: #3B4455;\n", " fill: #D2E3FC;\n", " }\n", "\n", " [theme=dark] .colab-df-generate:hover {\n", " background-color: #434B5C;\n", " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", " fill: #FFFFFF;\n", " }\n", " </style>\n", " <button class=\"colab-df-generate\" onclick=\"generateWithVariable('df_with_outliers')\"\n", " title=\"Generate code using this dataframe.\"\n", " style=\"display:none;\">\n", "\n", " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", " width=\"24px\">\n", " <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n", " </svg>\n", " </button>\n", " <script>\n", " (() => {\n", " const buttonEl =\n", " document.querySelector('#id_7eac9cc1-f8d9-435e-83c0-a9bd3b668dec button.colab-df-generate');\n", " buttonEl.style.display =\n", " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", "\n", " buttonEl.onclick = () => {\n", " google.colab.notebook.generateWithVariable('df_with_outliers');\n", " }\n", " })();\n", " </script>\n", " </div>\n", "\n", " </div>\n", " </div>\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "df_with_outliers", "summary": "{\n \"name\": \"df_with_outliers\",\n \"rows\": 6,\n \"fields\": [\n {\n \"column\": \"Classifier\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"Logistic Regression\",\n \"SVM with RBF Kernel\",\n \"Naive Bayes\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Accuracy\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.08936815241833448,\n \"min\": 0.72,\n \"max\": 0.955,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.945,\n 0.72,\n 0.865\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"F1 Score\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.06794595237537131,\n \"min\": 0.7846153846153846,\n \"max\": 0.9577464788732394,\n \"num_unique_values\": 6,\n \"samples\": [\n 0.865979381443299,\n 0.9478672985781991,\n 0.7846153846153846\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Recall\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.06808118818516005,\n \"min\": 0.7850467289719626,\n \"max\": 0.9532710280373832,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.9345794392523364,\n 0.9252336448598131,\n 0.8504672897196262\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Precision\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.11948765203585018,\n \"min\": 0.6666666666666666,\n \"max\": 0.9705882352941176,\n \"num_unique_values\": 6,\n \"samples\": [\n 0.9655172413793104,\n 0.9615384615384616,\n 0.6666666666666666\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Training Time (s)\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.2318213154775346,\n \"min\": 0.002920389175415039,\n \"max\": 0.554215669631958,\n \"num_unique_values\": 6,\n \"samples\": [\n 0.005347251892089844,\n 0.15488481521606445,\n 0.002920389175415039\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {} } ] }, { "cell_type": "code", "source": [ "print(\"\\nOutlier Sensitivity (Change in Metrics):\")\n", "display(df_difference)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 307 }, "id": "aiVh1FOQMjXe", "outputId": "f9c9277d-96f3-4446-936d-993246c77573" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\n", "Outlier Sensitivity (Change in Metrics):\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ " Classifier Accuracy Change F1 Score Change Recall Change \\\n", "0 Logistic Regression -0.070 -0.076878 -0.140187 \n", "1 SVM with RBF Kernel -0.010 -0.009879 -0.018692 \n", "2 Decision Tree 0.030 0.036643 0.074766 \n", "3 Random Forest 0.005 0.004916 0.009346 \n", "4 Gradient Boosting 0.000 0.000000 0.000000 \n", "5 Naive Bayes -0.170 -0.103140 0.140187 \n", "\n", " Precision Change Training Time (s) \n", "0 0.004352 0.005347 \n", "1 -0.000726 0.154885 \n", "2 -0.010017 0.019695 \n", "3 0.000359 0.381819 \n", "4 0.000000 0.554216 \n", "5 -0.310861 0.002920 " ], "text/html": [ "\n", " <div id=\"df-1d1306a7-50a0-4c82-8582-701894256a61\" class=\"colab-df-container\">\n", " <div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>Classifier</th>\n", " <th>Accuracy Change</th>\n", " <th>F1 Score Change</th>\n", " <th>Recall Change</th>\n", " <th>Precision Change</th>\n", " <th>Training Time (s)</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>Logistic Regression</td>\n", " <td>-0.070</td>\n", " <td>-0.076878</td>\n", " <td>-0.140187</td>\n", " <td>0.004352</td>\n", " <td>0.005347</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>SVM with RBF Kernel</td>\n", " <td>-0.010</td>\n", " <td>-0.009879</td>\n", " <td>-0.018692</td>\n", " <td>-0.000726</td>\n", " <td>0.154885</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>Decision Tree</td>\n", " <td>0.030</td>\n", " <td>0.036643</td>\n", " <td>0.074766</td>\n", " <td>-0.010017</td>\n", " <td>0.019695</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>Random Forest</td>\n", " <td>0.005</td>\n", " <td>0.004916</td>\n", " <td>0.009346</td>\n", " <td>0.000359</td>\n", " <td>0.381819</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>Gradient Boosting</td>\n", " <td>0.000</td>\n", " <td>0.000000</td>\n", " <td>0.000000</td>\n", " <td>0.000000</td>\n", " <td>0.554216</td>\n", " </tr>\n", " <tr>\n", " <th>5</th>\n", " <td>Naive Bayes</td>\n", " <td>-0.170</td>\n", " <td>-0.103140</td>\n", " <td>0.140187</td>\n", " <td>-0.310861</td>\n", " <td>0.002920</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>\n", " <div class=\"colab-df-buttons\">\n", "\n", " <div class=\"colab-df-container\">\n", " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-1d1306a7-50a0-4c82-8582-701894256a61')\"\n", " title=\"Convert this dataframe to an interactive table.\"\n", " style=\"display:none;\">\n", "\n", " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n", " <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n", " </svg>\n", " </button>\n", "\n", " <style>\n", " .colab-df-container {\n", " display:flex;\n", " gap: 12px;\n", " }\n", "\n", " .colab-df-convert {\n", " background-color: #E8F0FE;\n", " border: none;\n", " border-radius: 50%;\n", " cursor: pointer;\n", " display: none;\n", " fill: #1967D2;\n", " height: 32px;\n", " padding: 0 0 0 0;\n", " width: 32px;\n", " }\n", "\n", " .colab-df-convert:hover {\n", " background-color: #E2EBFA;\n", " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", " fill: #174EA6;\n", " }\n", "\n", " .colab-df-buttons div {\n", " margin-bottom: 4px;\n", " }\n", "\n", " [theme=dark] .colab-df-convert {\n", " background-color: #3B4455;\n", " fill: #D2E3FC;\n", " }\n", "\n", " [theme=dark] .colab-df-convert:hover {\n", " background-color: #434B5C;\n", " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", " fill: #FFFFFF;\n", " }\n", " </style>\n", "\n", " <script>\n", " const buttonEl =\n", " document.querySelector('#df-1d1306a7-50a0-4c82-8582-701894256a61 button.colab-df-convert');\n", " buttonEl.style.display =\n", " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", "\n", " async function convertToInteractive(key) {\n", " const element = document.querySelector('#df-1d1306a7-50a0-4c82-8582-701894256a61');\n", " const dataTable =\n", " await google.colab.kernel.invokeFunction('convertToInteractive',\n", " [key], {});\n", " if (!dataTable) return;\n", "\n", " const docLinkHtml = 'Like what you see? Visit the ' +\n", " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n", " + ' to learn more about interactive tables.';\n", " element.innerHTML = '';\n", " dataTable['output_type'] = 'display_data';\n", " await google.colab.output.renderOutput(dataTable, element);\n", " const docLink = document.createElement('div');\n", " docLink.innerHTML = docLinkHtml;\n", " element.appendChild(docLink);\n", " }\n", " </script>\n", " </div>\n", "\n", "\n", "<div id=\"df-fd704534-5f6a-4387-9ba7-3efe50153161\">\n", " <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-fd704534-5f6a-4387-9ba7-3efe50153161')\"\n", " title=\"Suggest charts\"\n", " style=\"display:none;\">\n", "\n", "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", " width=\"24px\">\n", " <g>\n", " <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n", " </g>\n", "</svg>\n", " </button>\n", "\n", "<style>\n", " .colab-df-quickchart {\n", " --bg-color: #E8F0FE;\n", " --fill-color: #1967D2;\n", " --hover-bg-color: #E2EBFA;\n", " --hover-fill-color: #174EA6;\n", " --disabled-fill-color: #AAA;\n", " --disabled-bg-color: #DDD;\n", " }\n", "\n", " [theme=dark] .colab-df-quickchart {\n", " --bg-color: #3B4455;\n", " --fill-color: #D2E3FC;\n", " --hover-bg-color: #434B5C;\n", " --hover-fill-color: #FFFFFF;\n", " --disabled-bg-color: #3B4455;\n", " --disabled-fill-color: #666;\n", " }\n", "\n", " .colab-df-quickchart {\n", " background-color: var(--bg-color);\n", " border: none;\n", " border-radius: 50%;\n", " cursor: pointer;\n", " display: none;\n", " fill: var(--fill-color);\n", " height: 32px;\n", " padding: 0;\n", " width: 32px;\n", " }\n", "\n", " .colab-df-quickchart:hover {\n", " background-color: var(--hover-bg-color);\n", " box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n", " fill: var(--button-hover-fill-color);\n", " }\n", "\n", " .colab-df-quickchart-complete:disabled,\n", " .colab-df-quickchart-complete:disabled:hover {\n", " background-color: var(--disabled-bg-color);\n", " fill: var(--disabled-fill-color);\n", " box-shadow: none;\n", " }\n", "\n", " .colab-df-spinner {\n", " border: 2px solid var(--fill-color);\n", " border-color: transparent;\n", " border-bottom-color: var(--fill-color);\n", " animation:\n", " spin 1s steps(1) infinite;\n", " }\n", "\n", " @keyframes spin {\n", " 0% {\n", " border-color: transparent;\n", " border-bottom-color: var(--fill-color);\n", " border-left-color: var(--fill-color);\n", " }\n", " 20% {\n", " border-color: transparent;\n", " border-left-color: var(--fill-color);\n", " border-top-color: var(--fill-color);\n", " }\n", " 30% {\n", " border-color: transparent;\n", " border-left-color: var(--fill-color);\n", " border-top-color: var(--fill-color);\n", " border-right-color: var(--fill-color);\n", " }\n", " 40% {\n", " border-color: transparent;\n", " border-right-color: var(--fill-color);\n", " border-top-color: var(--fill-color);\n", " }\n", " 60% {\n", " border-color: transparent;\n", " border-right-color: var(--fill-color);\n", " }\n", " 80% {\n", " border-color: transparent;\n", " border-right-color: var(--fill-color);\n", " border-bottom-color: var(--fill-color);\n", " }\n", " 90% {\n", " border-color: transparent;\n", " border-bottom-color: var(--fill-color);\n", " }\n", " }\n", "</style>\n", "\n", " <script>\n", " async function quickchart(key) {\n", " const quickchartButtonEl =\n", " document.querySelector('#' + key + ' button');\n", " quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n", " quickchartButtonEl.classList.add('colab-df-spinner');\n", " try {\n", " const charts = await google.colab.kernel.invokeFunction(\n", " 'suggestCharts', [key], {});\n", " } catch (error) {\n", " console.error('Error during call to suggestCharts:', error);\n", " }\n", " quickchartButtonEl.classList.remove('colab-df-spinner');\n", " quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n", " }\n", " (() => {\n", " let quickchartButtonEl =\n", " document.querySelector('#df-fd704534-5f6a-4387-9ba7-3efe50153161 button');\n", " quickchartButtonEl.style.display =\n", " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", " })();\n", " </script>\n", "</div>\n", "\n", " <div id=\"id_c0d05440-7262-4ea2-960e-4eb3fd6983a4\">\n", " <style>\n", " .colab-df-generate {\n", " background-color: #E8F0FE;\n", " border: none;\n", " border-radius: 50%;\n", " cursor: pointer;\n", " display: none;\n", " fill: #1967D2;\n", " height: 32px;\n", " padding: 0 0 0 0;\n", " width: 32px;\n", " }\n", "\n", " .colab-df-generate:hover {\n", " background-color: #E2EBFA;\n", " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", " fill: #174EA6;\n", " }\n", "\n", " [theme=dark] .colab-df-generate {\n", " background-color: #3B4455;\n", " fill: #D2E3FC;\n", " }\n", "\n", " [theme=dark] .colab-df-generate:hover {\n", " background-color: #434B5C;\n", " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", " fill: #FFFFFF;\n", " }\n", " </style>\n", " <button class=\"colab-df-generate\" onclick=\"generateWithVariable('df_difference')\"\n", " title=\"Generate code using this dataframe.\"\n", " style=\"display:none;\">\n", "\n", " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", " width=\"24px\">\n", " <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n", " </svg>\n", " </button>\n", " <script>\n", " (() => {\n", " const buttonEl =\n", " document.querySelector('#id_c0d05440-7262-4ea2-960e-4eb3fd6983a4 button.colab-df-generate');\n", " buttonEl.style.display =\n", " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", "\n", " buttonEl.onclick = () => {\n", " google.colab.notebook.generateWithVariable('df_difference');\n", " }\n", " })();\n", " </script>\n", " </div>\n", "\n", " </div>\n", " </div>\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "df_difference", "summary": "{\n \"name\": \"df_difference\",\n \"rows\": 6,\n \"fields\": [\n {\n \"column\": \"Classifier\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"Logistic Regression\",\n \"SVM with RBF Kernel\",\n \"Naive Bayes\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Accuracy Change\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.07364894206074293,\n \"min\": -0.17000000000000004,\n \"max\": 0.030000000000000027,\n \"num_unique_values\": 6,\n \"samples\": [\n -0.06999999999999995,\n -0.010000000000000009,\n -0.17000000000000004\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"F1 Score Change\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.05356320340641182,\n \"min\": -0.10313971742543171,\n \"max\": 0.03664254285782986,\n \"num_unique_values\": 6,\n \"samples\": [\n -0.07687776141384384,\n -0.009879180295040313,\n -0.10313971742543171\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Recall Change\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.09446497943960316,\n \"min\": -0.14018691588785048,\n \"max\": 0.14018691588785048,\n \"num_unique_values\": 6,\n \"samples\": [\n -0.14018691588785048,\n -0.01869158878504673,\n 0.14018691588785048\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Precision Change\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.1265052924776638,\n \"min\": -0.31086142322097376,\n \"max\": 0.004352192835621049,\n \"num_unique_values\": 6,\n \"samples\": [\n 0.004352192835621049,\n -0.0007256894049346707,\n -0.31086142322097376\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Training Time (s)\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.2318213154775346,\n \"min\": 0.002920389175415039,\n \"max\": 0.554215669631958,\n \"num_unique_values\": 6,\n \"samples\": [\n 0.005347251892089844,\n 0.15488481521606445,\n 0.002920389175415039\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {} } ] }, { "cell_type": "code", "source": [], "metadata": { "id": "XCpH0MpYMnVz" }, "execution_count": null, "outputs": [] } ] }