Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
probml
GitHub Repository: probml/pyprobml
Path: blob/master/notebooks/book1/03/sprinkler_pgm.ipynb
1193 views
Kernel: Python [conda env:py37]

Open In Colab

Directed graphical models

We illustrate some basic properties of DGMs.

try: from causalgraphicalmodels import CausalGraphicalModel except ModuleNotFoundError: %pip install -qq causalgraphicalmodels from causalgraphicalmodels import CausalGraphicalModel try: import pgmpy except ModuleNotFoundError: %pip install -qq pgmpy import pgmpy import numpy as np import pandas as pd

Make the model

sprinkler = CausalGraphicalModel( nodes=["cloudy", "rain", "sprinkler", "wet", "slippery"], edges=[("cloudy", "rain"), ("cloudy", "sprinkler"), ("rain", "wet"), ("sprinkler", "wet"), ("wet", "slippery")], )

Draw the model

# draw return a graphviz `dot` object, which jupyter can render out = sprinkler.draw()
type(out)
graphviz.graphs.Digraph
display(out)
Image in a Jupyter notebook
out.render()
'Digraph.gv.pdf'

Display the factorization

print(sprinkler.get_distribution())
P(cloudy)P(rain|cloudy)P(sprinkler|cloudy)P(wet|rain,sprinkler)P(slippery|wet)

D-separation

# check for d-seperation of two nodes sprinkler.is_d_separated("slippery", "cloudy", {"wet"})
True

Extract CI relationships

# get all the conditional independence relationships implied by a CGM CI = sprinkler.get_all_independence_relationships() print(CI)
[('wet', 'cloudy', {'sprinkler', 'rain'}), ('wet', 'cloudy', {'slippery', 'sprinkler', 'rain'}), ('slippery', 'sprinkler', {'wet'}), ('slippery', 'sprinkler', {'wet', 'cloudy'}), ('slippery', 'sprinkler', {'wet', 'rain'}), ('slippery', 'sprinkler', {'wet', 'cloudy', 'rain'}), ('slippery', 'rain', {'wet'}), ('slippery', 'rain', {'wet', 'cloudy'}), ('slippery', 'rain', {'wet', 'sprinkler'}), ('slippery', 'rain', {'wet', 'cloudy', 'sprinkler'}), ('slippery', 'cloudy', {'wet'}), ('slippery', 'cloudy', {'wet', 'sprinkler'}), ('slippery', 'cloudy', {'wet', 'rain'}), ('slippery', 'cloudy', {'sprinkler', 'rain'}), ('slippery', 'cloudy', {'wet', 'sprinkler', 'rain'}), ('sprinkler', 'rain', {'cloudy'})]
records = [] for ci in CI: record = (ci[0], ci[1], ", ".join(x for x in ci[2])) records.append(record) print(records) df = pd.DataFrame(records, columns=("X", "Y", "Z")) display(df)
[('wet', 'cloudy', 'sprinkler, rain'), ('wet', 'cloudy', 'slippery, sprinkler, rain'), ('slippery', 'sprinkler', 'wet'), ('slippery', 'sprinkler', 'wet, cloudy'), ('slippery', 'sprinkler', 'wet, rain'), ('slippery', 'sprinkler', 'wet, cloudy, rain'), ('slippery', 'rain', 'wet'), ('slippery', 'rain', 'wet, cloudy'), ('slippery', 'rain', 'wet, sprinkler'), ('slippery', 'rain', 'wet, cloudy, sprinkler'), ('slippery', 'cloudy', 'wet'), ('slippery', 'cloudy', 'wet, sprinkler'), ('slippery', 'cloudy', 'wet, rain'), ('slippery', 'cloudy', 'sprinkler, rain'), ('slippery', 'cloudy', 'wet, sprinkler, rain'), ('sprinkler', 'rain', 'cloudy')]
print(df.to_latex(index=False))
\begin{tabular}{lll} \toprule X & Y & Z \\ \midrule wet & cloudy & sprinkler, rain \\ wet & cloudy & slippery, sprinkler, rain \\ slippery & sprinkler & wet \\ slippery & sprinkler & wet, cloudy \\ slippery & sprinkler & wet, rain \\ slippery & sprinkler & wet, cloudy, rain \\ slippery & rain & wet \\ slippery & rain & wet, cloudy \\ slippery & rain & wet, sprinkler \\ slippery & rain & wet, cloudy, sprinkler \\ slippery & cloudy & wet \\ slippery & cloudy & wet, sprinkler \\ slippery & cloudy & wet, rain \\ slippery & cloudy & sprinkler, rain \\ slippery & cloudy & wet, sprinkler, rain \\ sprinkler & rain & cloudy \\ \bottomrule \end{tabular}

Parameterize the model

try: from pgmpy.models import BayesianModel except ModuleNotFoundError: %pip install -qq pgmpy from pgmpy.models import BayesianModel from pgmpy.factors.discrete import TabularCPD # Defining the model structure. We can define the network by just passing a list of edges. model = BayesianModel([("C", "S"), ("C", "R"), ("S", "W"), ("R", "W"), ("W", "L")]) # Defining individual CPDs. cpd_c = TabularCPD(variable="C", variable_card=2, values=np.reshape([0.5, 0.5], (2, 1))) # In pgmpy the columns are the evidences and rows are the states of the variable. cpd_s = TabularCPD(variable="S", variable_card=2, values=[[0.5, 0.9], [0.5, 0.1]], evidence=["C"], evidence_card=[2]) cpd_r = TabularCPD(variable="R", variable_card=2, values=[[0.8, 0.2], [0.2, 0.8]], evidence=["C"], evidence_card=[2]) cpd_w = TabularCPD( variable="W", variable_card=2, values=[[1.0, 0.1, 0.1, 0.01], [0.0, 0.9, 0.9, 0.99]], evidence=["S", "R"], evidence_card=[2, 2], ) cpd_l = TabularCPD(variable="L", variable_card=2, values=[[0.9, 0.1], [0.1, 0.9]], evidence=["W"], evidence_card=[2]) # Associating the CPDs with the network model.add_cpds(cpd_c, cpd_s, cpd_r, cpd_w, cpd_l) # check_model checks for the network structure and CPDs and verifies that the CPDs are correctly # defined and sum to 1. model.check_model()
/root/miniconda3/envs/py37/lib/python3.7/site-packages/pgmpy/models/BayesianModel.py:10: FutureWarning: BayesianModel has been renamed to BayesianNetwork. Please use BayesianNetwork class, BayesianModel will be removed in future. FutureWarning,
True

Inference

try: from pgmpy.inference import VariableElimination except ModuleNotFoundError: %pip install -qq pgmpy from pgmpy.inference import VariableElimination infer = VariableElimination(model) # p(R=1)= 0.5*0.2 + 0.5*0.8 = 0.5 probs = infer.query(["R"]).values print("\np(R=1) = ", probs[1]) # P(R=1|W=1) = 0.7079 probs = infer.query(["R"], evidence={"W": 1}).values print("\np(R=1|W=1) = ", probs[1]) # P(R=1|W=1,S=1) = 0.3204 probs = infer.query(["R"], evidence={"W": 1, "S": 1}).values print("\np(R=1|W=1,S=1) = ", probs[1])
0%| | 0/1 [00:00<?, ?it/s]
0%| | 0/1 [00:00<?, ?it/s]
p(R=1) = 0.5
0%| | 0/2 [00:00<?, ?it/s]
0%| | 0/2 [00:00<?, ?it/s]
p(R=1|W=1) = 0.7079276773296245
0%| | 0/1 [00:00<?, ?it/s]
0%| | 0/1 [00:00<?, ?it/s]
p(R=1|W=1,S=1) = 0.32038834951456313