Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
probml
GitHub Repository: probml/pyprobml
Path: blob/master/notebooks/book2/15/linreg_rank_stats.ipynb
1193 views
Kernel: base

Perform 1d linear regression on some toy data, and its rank-transformed version. Code is from https://www.georgeho.org/tests-as-linear/

import matplotlib.pyplot as plt import numpy as np import pandas as pd import patsy import scipy import statsmodels.api as sm import statsmodels.formula.api as smf
<frozen importlib._bootstrap>:219: RuntimeWarning: scipy._lib.messagestream.MessageStream size changed, may indicate binary incompatibility. Expected 56 from C header, got 64 from PyObject
try: from probml_utils import savefig, latexify except ModuleNotFoundError: %pip install -qq git+https://github.com/probml/probml-utils.git from probml_utils import savefig, latexify
import os os.environ["FIG_DIR"] = "/Users/kpmurphy/github/bookv2/figures" os.environ["LATEXIFY"] = "1" latexify(fig_height=2)
def format_decimals_factory(num_decimals=1): return lambda x: "{1:.{0}f}".format(num_decimals, x) def pearson_spearman_plot(): # Construct data as pd.DataFrames x = np.random.normal(0, 2, 30) y = 0.8 * x + 0.2 * 5 * np.random.randn(30) data_pearson = pd.DataFrame() data_pearson["x"], data_pearson["y"] = x, y data_spearman = data_pearson.rank() # Pearson equivalent linear model res_pearson = smf.ols("y ~ 1 + x", data_pearson).fit() intercept_pearson, slope_pearson = res_pearson.params print("pearson params", res_pearson.params) # Spearman equivalent linear model res_spearman = smf.ols("y ~ 1 + x", data_spearman).fit() intercept_spearman, slope_spearman = res_spearman.params print("spearman params", res_spearman.params) # Plot # fig, axarr = plt.subplots(ncols=2, figsize=[18, 8]) fig, axarr = plt.subplots(ncols=2) for ax, dataset, to_str, title, a, b in zip( axarr, [data_pearson, data_spearman], [format_decimals_factory(), format_decimals_factory(0)], ["Pearson", "Spearman"], [slope_pearson, slope_spearman], [intercept_pearson, intercept_spearman], ): ax.scatter(dataset["x"], dataset["y"], color="k") annotations = "(" + dataset["x"].apply(to_str) + ", " + dataset["y"].apply(to_str) + ")" for i, annot in enumerate(annotations): ax.annotate(annot, (dataset["x"][i], dataset["y"][i]), color="gray") ax.axhline(a, color="b", label=r"$\beta_0$ (Intercept)") ax.plot( ax.get_xlim(), [a * x + b for x in ax.get_xlim()], color="r", label=r"$\beta_1$ (Slope)", ) ax.set_title(title) ax.legend(fontsize="large") return fig, ax
np.random.seed(1618) fig, ax = pearson_spearman_plot() plt.tight_layout() savefig("linreg_rank_stats") # plt.savefig('linreg_pearson_spearman.pdf') plt.show()
pearson params Intercept 0.044719 x 0.966266 dtype: float64 spearman params Intercept 3.537931 x 0.771746 dtype: float64 saving image to /Users/kpmurphy/github/bookv2/figures/linreg_rank_stats_latexified.pdf Figure size: [6. 2.]
<ipython-input-15-c94adc1b84ce>:8: UserWarning: Matplotlib is currently using ps, which is a non-GUI backend, so cannot show the figure. plt.show()