Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
suyashi29
GitHub Repository: suyashi29/python-su
Path: blob/master/Applied Data Modelling using Gradio/End-to-End Data Modelling Workflow with Gradio.ipynb
7216 views
Kernel: Python 3 (ipykernel)

End-to-End Data Science Workflow with Gradio (Sample Dataset)

  • Explore data (EDA)

  • Preprocess features

  • Train and tune a model -Evaluate with metrics and plots

  • Deploy an interactive Gradio UI for EDA, training, and prediction

import pandas as pd import numpy as np # Load dataset df = pd.read_csv("sample_credit_risk.csv") import pandas as pd import numpy as np import gradio as gr from sklearn.model_selection import train_test_split from sklearn.preprocessing import OneHotEncoder from sklearn.compose import ColumnTransformer from sklearn.pipeline import Pipeline from sklearn.linear_model import LogisticRegression from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
df.head()
df.isnull().sum()
income 29 age 31 loan_amount 27 loan_term 35 credit_score 23 employment_type 31 city_tier 30 default 0 dtype: int64
df=df.dropna()
# ----------------------------- # Data Preparation # ----------------------------- # Features & target X = df.drop("default", axis=1) y = df["default"] categorical_cols = ["employment_type", "city_tier"] numerical_cols = ["income", "age", "loan_amount", "loan_term", "credit_score"] # Preprocessing preprocessor = ColumnTransformer( transformers=[ ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_cols), ("num", "passthrough", numerical_cols) ] ) # Model pipeline model = Pipeline([ ("preprocessor", preprocessor), ("classifier", LogisticRegression(max_iter=500)) ]) # Train-test split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Train model model.fit(X_train, y_train) # Predictions y_pred = model.predict(X_test) accuracy = accuracy_score(y_test, y_pred) report = classification_report(y_test, y_pred) cm = confusion_matrix(y_test, y_pred) # ----------------------------- # EDA Functions # ----------------------------- def eda_summary(): return df.describe().to_string() def eda_missing(): return df.isnull().sum().to_string() def eda_distribution(): return df.groupby("default").mean(numeric_only=True).to_string() # ----------------------------- # Model Results # ----------------------------- def model_results(): result = f"Accuracy: {accuracy:.4f}\n\n" result += "Classification Report:\n" + report + "\n" result += "Confusion Matrix:\n" + str(cm) return result # ----------------------------- # Prediction Function # ----------------------------- def predict(income, age, loan_amount, loan_term, credit_score, employment_type, city_tier): input_df = pd.DataFrame({ "income": [income], "age": [age], "loan_amount": [loan_amount], "loan_term": [loan_term], "credit_score": [credit_score], "employment_type": [employment_type], "city_tier": [city_tier] }) pred = model.predict(input_df)[0] prob = model.predict_proba(input_df)[0][1] return f"Prediction (Default=1): {pred}\nProbability of Default: {prob:.2f}" # ----------------------------- # Gradio Interface # ----------------------------- with gr.Blocks(title="Credit Risk Analysis") as app: gr.Markdown("# Credit Risk Analysis Dashboard") with gr.Tab("EDA"): gr.Markdown("## Exploratory Data Analysis") btn1 = gr.Button("Summary Stats") out1 = gr.Textbox(label="Summary") btn1.click(eda_summary, outputs=out1) btn2 = gr.Button("Missing Values") out2 = gr.Textbox(label="Missing") btn2.click(eda_missing, outputs=out2) btn3 = gr.Button("Default Distribution") out3 = gr.Textbox(label="Distribution") btn3.click(eda_distribution, outputs=out3) with gr.Tab("Model Outcomes"): gr.Markdown("## Model Performance") btn4 = gr.Button("Show Results") out4 = gr.Textbox(label="Model Output") btn4.click(model_results, outputs=out4) with gr.Tab("Prediction"): gr.Markdown("## Predict Credit Default") income_in = gr.Number(label="Income") age_in = gr.Number(label="Age") loan_amount_in = gr.Number(label="Loan Amount") loan_term_in = gr.Number(label="Loan Term") credit_score_in = gr.Number(label="Credit Score") employment_type_in = gr.Dropdown(choices=list(df["employment_type"].unique()), label="Employment Type") city_tier_in = gr.Dropdown(choices=list(df["city_tier"].unique()), label="City Tier") btn5 = gr.Button("Predict") out5 = gr.Textbox(label="Prediction Result") btn5.click( predict, inputs=[income_in, age_in, loan_amount_in, loan_term_in, credit_score_in, employment_type_in, city_tier_in], outputs=out5 ) # Launch app app.launch()
C:\Users\Suyashi144893\AppData\Local\anaconda3\Lib\site-packages\sklearn\linear_model\_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1): STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. Increase the number of iterations (max_iter) or scale the data as shown in: https://scikit-learn.org/stable/modules/preprocessing.html Please also refer to the documentation for alternative solver options: https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression n_iter_i = _check_optimize_result(
* Running on local URL: http://127.0.0.1:7864 * To create a public link, set `share=True` in `launch()`.