Path: blob/main/latex-templates/templates/other/information_theory.tex
51 views
unlisted
\documentclass[a4paper, 11pt]{article}1\usepackage[utf8]{inputenc}2\usepackage[T1]{fontenc}3\usepackage{amsmath, amssymb}4\usepackage{graphicx}5\usepackage{siunitx}6\usepackage{booktabs}7\usepackage[makestderr]{pythontex}89\title{Information Theory: Entropy, Coding, and Channel Capacity}10\author{Computational Science Templates}11\date{\today}1213\begin{document}14\maketitle1516\section{Introduction}17Information theory quantifies information content, compression limits, and communication channel capacity. Founded by Claude Shannon in 1948, it provides the mathematical framework for data compression (source coding) and error-correcting codes (channel coding). This analysis computes entropy for various probability distributions, demonstrates Huffman and arithmetic coding efficiency, explores mutual information, and examines channel capacity.1819\section{Mathematical Framework}2021\subsection{Shannon Entropy}22The entropy of a discrete random variable $X$ with probability mass function $p(x)$:23\begin{equation}24H(X) = -\sum_{x \in \mathcal{X}} p(x) \log_2 p(x)25\end{equation}2627\subsection{Conditional Entropy and Mutual Information}28Conditional entropy and mutual information:29\begin{align}30H(X|Y) &= -\sum_{x,y} p(x,y) \log_2 p(x|y) \\31I(X;Y) &= H(X) - H(X|Y) = H(X) + H(Y) - H(X,Y)32\end{align}3334\subsection{Channel Capacity}35For a discrete memoryless channel:36\begin{equation}37C = \max_{p(x)} I(X;Y)38\end{equation}3940For the binary symmetric channel with crossover probability $p$:41\begin{equation}42C = 1 - H_b(p) = 1 + p\log_2 p + (1-p)\log_2(1-p)43\end{equation}4445\section{Environment Setup}46\begin{pycode}47import numpy as np48import matplotlib.pyplot as plt49from collections import Counter50import heapq51plt.rc('text', usetex=True)52plt.rc('font', family='serif')5354np.random.seed(42)5556def save_plot(filename, caption=""):57plt.savefig(filename, bbox_inches='tight', dpi=150)58print(r'\begin{figure}[htbp]')59print(r'\centering')60print(r'\includegraphics[width=0.95\textwidth]{' + filename + '}')61if caption:62print(r'\caption{' + caption + '}')63print(r'\end{figure}')64plt.close()6566def entropy(p):67"""Calculate Shannon entropy in bits."""68p = np.array(p)69p = p[p > 0] # Remove zeros70return -np.sum(p * np.log2(p))7172def binary_entropy(p):73"""Binary entropy function H_b(p)."""74if p <= 0 or p >= 1:75return 076return -p * np.log2(p) - (1-p) * np.log2(1-p)77\end{pycode}7879\section{Entropy and Information Content}80\begin{pycode}81# Binary entropy function82p_range = np.linspace(0.001, 0.999, 200)83H_binary = [binary_entropy(p) for p in p_range]8485# Entropy of different distributions86distributions = {87'Uniform (8)': np.ones(8) / 8,88'Peaked': np.array([0.5, 0.25, 0.125, 0.0625, 0.03125, 0.015625, 0.0078125, 0.0078125]),89'Bimodal': np.array([0.3, 0.05, 0.05, 0.1, 0.1, 0.05, 0.05, 0.3]),90'Skewed': np.array([0.6, 0.2, 0.1, 0.05, 0.03, 0.01, 0.005, 0.005])91}9293entropies = {name: entropy(p) for name, p in distributions.items()}9495fig, axes = plt.subplots(2, 2, figsize=(10, 8))9697# Plot 1: Binary entropy function98axes[0, 0].plot(p_range, H_binary, 'b-', linewidth=2)99axes[0, 0].axvline(x=0.5, color='gray', linestyle='--', alpha=0.7)100axes[0, 0].axhline(y=1, color='gray', linestyle='--', alpha=0.7)101axes[0, 0].set_xlabel('Probability $p$')102axes[0, 0].set_ylabel('Entropy $H_b(p)$ (bits)')103axes[0, 0].set_title('Binary Entropy Function')104axes[0, 0].grid(True, alpha=0.3)105106# Plot 2: Different distributions107x = np.arange(8)108width = 0.2109colors = plt.cm.viridis(np.linspace(0.2, 0.8, 4))110111for i, (name, p) in enumerate(distributions.items()):112axes[0, 1].bar(x + i * width, p, width, label=f'{name}: H={entropies[name]:.2f}',113color=colors[i], alpha=0.8)114115axes[0, 1].set_xlabel('Symbol')116axes[0, 1].set_ylabel('Probability')117axes[0, 1].set_title('Probability Distributions and Entropy')118axes[0, 1].legend(fontsize=7)119axes[0, 1].grid(True, alpha=0.3, axis='y')120121# Plot 3: Entropy vs alphabet size (uniform distribution)122alphabet_sizes = np.arange(2, 65)123uniform_entropies = np.log2(alphabet_sizes)124125axes[1, 0].plot(alphabet_sizes, uniform_entropies, 'b-', linewidth=2)126axes[1, 0].set_xlabel('Alphabet size $|\\mathcal{X}|$')127axes[1, 0].set_ylabel('Entropy $H(X)$ (bits)')128axes[1, 0].set_title('Maximum Entropy (Uniform Distribution)')129axes[1, 0].grid(True, alpha=0.3)130131# Plot 4: Information content of individual symbols132probs = distributions['Peaked']133information = -np.log2(probs)134135axes[1, 1].bar(x, information, color='steelblue', alpha=0.7)136axes[1, 1].set_xlabel('Symbol')137axes[1, 1].set_ylabel('Information content (bits)')138axes[1, 1].set_title('Self-Information: $-\\log_2 p(x)$')139axes[1, 1].grid(True, alpha=0.3, axis='y')140141plt.tight_layout()142save_plot('entropy_basics.pdf', 'Entropy fundamentals: binary entropy, distributions, and information content.')143\end{pycode}144145\section{Huffman Coding}146\begin{pycode}147# Huffman coding implementation148class HuffmanNode:149def __init__(self, symbol=None, freq=0, left=None, right=None):150self.symbol = symbol151self.freq = freq152self.left = left153self.right = right154155def __lt__(self, other):156return self.freq < other.freq157158def huffman_codes(symbols, probs):159"""Build Huffman codes for given symbols and probabilities."""160# Create leaf nodes161heap = [HuffmanNode(s, p) for s, p in zip(symbols, probs)]162heapq.heapify(heap)163164# Build tree165while len(heap) > 1:166left = heapq.heappop(heap)167right = heapq.heappop(heap)168merged = HuffmanNode(freq=left.freq + right.freq, left=left, right=right)169heapq.heappush(heap, merged)170171# Generate codes172root = heap[0]173codes = {}174175def generate_codes(node, code=""):176if node.symbol is not None:177codes[node.symbol] = code if code else "0"178else:179generate_codes(node.left, code + "0")180generate_codes(node.right, code + "1")181182generate_codes(root)183return codes184185# Example source186symbols = ['A', 'B', 'C', 'D', 'E', 'F']187probs = np.array([0.35, 0.25, 0.18, 0.12, 0.07, 0.03])188189source_entropy = entropy(probs)190codes = huffman_codes(symbols, probs)191code_lengths = np.array([len(codes[s]) for s in symbols])192avg_length = np.sum(probs * code_lengths)193efficiency = source_entropy / avg_length * 100194redundancy = avg_length - source_entropy195196fig, axes = plt.subplots(2, 2, figsize=(10, 8))197198# Plot 1: Source distribution199x_pos = np.arange(len(symbols))200axes[0, 0].bar(x_pos, probs, color='steelblue', alpha=0.7)201axes[0, 0].set_xticks(x_pos)202axes[0, 0].set_xticklabels(symbols)203axes[0, 0].set_xlabel('Symbol')204axes[0, 0].set_ylabel('Probability')205axes[0, 0].set_title(f'Source Distribution (H = {source_entropy:.3f} bits)')206axes[0, 0].grid(True, alpha=0.3, axis='y')207208# Plot 2: Code lengths vs optimal209optimal_lengths = -np.log2(probs)210bar_width = 0.35211212axes[0, 1].bar(x_pos - bar_width/2, code_lengths, bar_width, label='Huffman', color='steelblue', alpha=0.7)213axes[0, 1].bar(x_pos + bar_width/2, optimal_lengths, bar_width, label='Optimal $-\\log_2 p$', color='coral', alpha=0.7)214axes[0, 1].set_xticks(x_pos)215axes[0, 1].set_xticklabels(symbols)216axes[0, 1].set_xlabel('Symbol')217axes[0, 1].set_ylabel('Code length (bits)')218axes[0, 1].set_title('Huffman vs Optimal Code Length')219axes[0, 1].legend()220axes[0, 1].grid(True, alpha=0.3, axis='y')221222# Plot 3: Coding efficiency metrics223metrics = ['Entropy', 'Avg Length', 'Redundancy']224values = [source_entropy, avg_length, redundancy]225colors_bar = ['blue', 'green', 'red']226227bars = axes[1, 0].bar(metrics, values, color=colors_bar, alpha=0.7)228axes[1, 0].set_ylabel('Bits')229axes[1, 0].set_title(f'Coding Performance (Efficiency: {efficiency:.1f}\\%)')230axes[1, 0].grid(True, alpha=0.3, axis='y')231232# Add value labels233for bar, val in zip(bars, values):234axes[1, 0].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.05,235f'{val:.3f}', ha='center', fontsize=9)236237# Plot 4: Compression ratio for different sources238n_sources = 10239compression_ratios = []240entropies_random = []241242for _ in range(n_sources):243# Generate random probability distribution244p_random = np.random.dirichlet(np.ones(6))245H_random = entropy(p_random)246entropies_random.append(H_random)247248codes_random = huffman_codes(symbols, p_random)249lengths_random = np.array([len(codes_random[s]) for s in symbols])250avg_len_random = np.sum(p_random * lengths_random)251252# Fixed-length code would need ceil(log2(6)) = 3 bits253compression_ratios.append(3 / avg_len_random)254255axes[1, 1].scatter(entropies_random, compression_ratios, alpha=0.7)256axes[1, 1].set_xlabel('Source Entropy (bits)')257axes[1, 1].set_ylabel('Compression Ratio')258axes[1, 1].set_title('Huffman Compression vs Source Entropy')259axes[1, 1].grid(True, alpha=0.3)260261plt.tight_layout()262save_plot('huffman_coding.pdf', 'Huffman coding: source distribution, code lengths, and compression performance.')263264# Print Huffman code table265\end{pycode}266267\section{Mutual Information and Joint Entropy}268\begin{pycode}269# Joint distribution example270# X and Y are correlated binary random variables271p_xy = np.array([[0.4, 0.1], # X=0, Y=0,1272[0.1, 0.4]]) # X=1, Y=0,1273274# Marginal distributions275p_x = np.sum(p_xy, axis=1)276p_y = np.sum(p_xy, axis=0)277278# Entropies279H_X = entropy(p_x)280H_Y = entropy(p_y)281H_XY = entropy(p_xy.flatten())282H_X_given_Y = H_XY - H_Y283H_Y_given_X = H_XY - H_X284I_XY = H_X - H_X_given_Y285286fig, axes = plt.subplots(2, 2, figsize=(10, 8))287288# Plot 1: Joint distribution heatmap289im = axes[0, 0].imshow(p_xy, cmap='Blues', aspect='equal')290axes[0, 0].set_xticks([0, 1])291axes[0, 0].set_yticks([0, 1])292axes[0, 0].set_xlabel('Y')293axes[0, 0].set_ylabel('X')294axes[0, 0].set_title(f'Joint Distribution $p(x,y)$')295for i in range(2):296for j in range(2):297axes[0, 0].text(j, i, f'{p_xy[i,j]:.2f}', ha='center', va='center', fontsize=12)298plt.colorbar(im, ax=axes[0, 0])299300# Plot 2: Information measures as Venn diagram (simplified bar chart)301measures = ['H(X)', 'H(Y)', 'H(X,Y)', 'H(X|Y)', 'H(Y|X)', 'I(X;Y)']302values_info = [H_X, H_Y, H_XY, H_X_given_Y, H_Y_given_X, I_XY]303colors_info = ['blue', 'green', 'purple', 'lightblue', 'lightgreen', 'red']304305axes[0, 1].bar(measures, values_info, color=colors_info, alpha=0.7)306axes[0, 1].set_ylabel('Bits')307axes[0, 1].set_title('Information Measures')308axes[0, 1].tick_params(axis='x', rotation=45)309axes[0, 1].grid(True, alpha=0.3, axis='y')310311# Plot 3: Mutual information vs correlation312correlations = np.linspace(0, 0.49, 50)313mi_values = []314315for c in correlations:316# Create joint distribution with correlation c317p = 0.5 + c # Diagonal probability318q = 0.5 - c # Off-diagonal probability319p_joint = np.array([[p/2, q/2], [q/2, p/2]])320321# Marginals are uniform322p_marg = np.array([0.5, 0.5])323H_marg = entropy(p_marg)324H_joint = entropy(p_joint.flatten())325mi = 2 * H_marg - H_joint326mi_values.append(mi)327328axes[1, 0].plot(correlations * 2, mi_values, 'b-', linewidth=2)329axes[1, 0].set_xlabel('Correlation strength')330axes[1, 0].set_ylabel('Mutual Information $I(X;Y)$ (bits)')331axes[1, 0].set_title('MI vs Correlation')332axes[1, 0].grid(True, alpha=0.3)333334# Plot 4: Conditional entropy visualization335# H(X|Y=y) for different y336y_values = [0, 1]337H_X_given_y = []338339for y in y_values:340p_x_given_y = p_xy[:, y] / p_y[y]341H_X_given_y.append(entropy(p_x_given_y))342343axes[1, 1].bar(['Y=0', 'Y=1', 'Average'], H_X_given_y + [H_X_given_Y],344color=['steelblue', 'coral', 'green'], alpha=0.7)345axes[1, 1].set_ylabel('$H(X|Y=y)$ (bits)')346axes[1, 1].set_title('Conditional Entropy')347axes[1, 1].grid(True, alpha=0.3, axis='y')348349plt.tight_layout()350save_plot('mutual_information.pdf', 'Mutual information: joint distribution, information measures, and correlation.')351\end{pycode}352353\section{Channel Capacity}354\begin{pycode}355# Binary Symmetric Channel (BSC)356def bsc_capacity(p):357"""Capacity of binary symmetric channel."""358return 1 - binary_entropy(p)359360# Binary Erasure Channel (BEC)361def bec_capacity(epsilon):362"""Capacity of binary erasure channel."""363return 1 - epsilon364365# AWGN channel capacity366def awgn_capacity_snr(snr_db):367"""Capacity of AWGN channel in bits per channel use."""368snr = 10 ** (snr_db / 10)369return 0.5 * np.log2(1 + snr)370371fig, axes = plt.subplots(2, 2, figsize=(10, 8))372373# Plot 1: BSC and BEC capacity374p_range = np.linspace(0, 0.5, 100)375C_bsc = [bsc_capacity(p) for p in p_range]376C_bec = [bec_capacity(p) for p in p_range]377378axes[0, 0].plot(p_range, C_bsc, 'b-', linewidth=2, label='BSC')379axes[0, 0].plot(p_range, C_bec, 'r--', linewidth=2, label='BEC')380axes[0, 0].set_xlabel('Error/Erasure probability')381axes[0, 0].set_ylabel('Channel capacity (bits/use)')382axes[0, 0].set_title('Binary Channel Capacities')383axes[0, 0].legend()384axes[0, 0].grid(True, alpha=0.3)385386# Plot 2: AWGN channel capacity387snr_db = np.linspace(-10, 30, 100)388C_awgn = [awgn_capacity_snr(s) for s in snr_db]389390axes[0, 1].plot(snr_db, C_awgn, 'b-', linewidth=2)391axes[0, 1].set_xlabel('SNR (dB)')392axes[0, 1].set_ylabel('Capacity (bits/channel use)')393axes[0, 1].set_title('AWGN Channel Capacity')394axes[0, 1].grid(True, alpha=0.3)395396# Plot 3: BSC transition diagram (simplified visualization)397p_error = 0.1398399# Show error probability400x_diag = [0, 1]401y_diag = [0, 1]402axes[1, 0].plot([0, 0], [0, 1], 'b-', linewidth=3, alpha=0.5)403axes[1, 0].plot([1, 1], [0, 1], 'b-', linewidth=3, alpha=0.5)404axes[1, 0].plot([0, 1], [0, 0], 'b-', linewidth=2 * (1-p_error), alpha=0.7)405axes[1, 0].plot([0, 1], [1, 1], 'b-', linewidth=2 * (1-p_error), alpha=0.7)406axes[1, 0].plot([0, 1], [0, 1], 'r-', linewidth=2 * p_error, alpha=0.7)407axes[1, 0].plot([0, 1], [1, 0], 'r-', linewidth=2 * p_error, alpha=0.7)408409axes[1, 0].scatter([0, 0, 1, 1], [0, 1, 0, 1], s=200, c='steelblue', zorder=5)410axes[1, 0].text(-0.15, 0, 'X=0', fontsize=10)411axes[1, 0].text(-0.15, 1, 'X=1', fontsize=10)412axes[1, 0].text(1.05, 0, 'Y=0', fontsize=10)413axes[1, 0].text(1.05, 1, 'Y=1', fontsize=10)414axes[1, 0].set_xlim([-0.3, 1.3])415axes[1, 0].set_ylim([-0.2, 1.2])416axes[1, 0].set_title(f'BSC ($p = {p_error}$, $C = {bsc_capacity(p_error):.3f}$ bits)')417axes[1, 0].axis('off')418419# Plot 4: Achievable rates420# Shannon's coding theorem: rates below capacity are achievable421p_test = 0.1422C_test = bsc_capacity(p_test)423424rates = np.linspace(0, 1, 100)425# Probability of error for random coding (simplified)426Pe_approx = np.exp(-len(rates) * (C_test - rates))427Pe_approx[rates > C_test] = 1428429axes[1, 1].fill_between(rates, 0, 1, where=rates <= C_test, alpha=0.3, color='green', label='Achievable')430axes[1, 1].fill_between(rates, 0, 1, where=rates > C_test, alpha=0.3, color='red', label='Not achievable')431axes[1, 1].axvline(x=C_test, color='black', linestyle='--', linewidth=2, label=f'$C = {C_test:.3f}$')432axes[1, 1].set_xlabel('Rate $R$ (bits/use)')433axes[1, 1].set_ylabel('Region')434axes[1, 1].set_title("Shannon's Coding Theorem")435axes[1, 1].legend(loc='upper right')436axes[1, 1].set_xlim([0, 1])437axes[1, 1].set_ylim([0, 1])438439plt.tight_layout()440save_plot('channel_capacity.pdf', 'Channel capacity: BSC, BEC, AWGN, and achievable rates.')441\end{pycode}442443\section{Source Coding and Compression}444\begin{pycode}445# Lempel-Ziv-like compression demonstration446def simple_lz_compress(text):447"""Simple LZ-style compression."""448dictionary = {}449result = []450current = ""451452for char in text:453extended = current + char454if extended in dictionary:455current = extended456else:457if current:458result.append(dictionary.get(current, current))459dictionary[extended] = len(dictionary)460current = char461462if current:463result.append(dictionary.get(current, current))464465return result, dictionary466467# Demonstration text468text = "ABABABABABCABCABCABCDEFABCDEF"469compressed, dictionary = simple_lz_compress(text)470471# Calculate compression metrics472original_bits = len(text) * 8 # ASCII473compressed_symbols = len(compressed)474bits_per_symbol = np.ceil(np.log2(max(len(dictionary), 1) + 26))475compressed_bits = compressed_symbols * bits_per_symbol476compression_ratio = original_bits / compressed_bits477478fig, axes = plt.subplots(2, 2, figsize=(10, 8))479480# Plot 1: Entropy rate for Markov source481# First-order Markov chain482P_transition = np.array([[0.7, 0.3],483[0.4, 0.6]])484485# Stationary distribution486eigenvalues, eigenvectors = np.linalg.eig(P_transition.T)487stationary = eigenvectors[:, np.argmax(eigenvalues)]488stationary = np.real(stationary / np.sum(stationary))489490# Entropy rate: H = sum_i pi_i * sum_j -p_ij * log2(p_ij)491entropy_rate = 0492for i in range(2):493for j in range(2):494if P_transition[i, j] > 0:495entropy_rate -= stationary[i] * P_transition[i, j] * np.log2(P_transition[i, j])496497# Show transition matrix498im = axes[0, 0].imshow(P_transition, cmap='Blues', vmin=0, vmax=1)499axes[0, 0].set_xticks([0, 1])500axes[0, 0].set_yticks([0, 1])501axes[0, 0].set_xlabel('Next state')502axes[0, 0].set_ylabel('Current state')503axes[0, 0].set_title(f'Markov Source (Entropy rate: {entropy_rate:.3f} bits)')504for i in range(2):505for j in range(2):506axes[0, 0].text(j, i, f'{P_transition[i,j]:.1f}', ha='center', va='center', fontsize=12)507plt.colorbar(im, ax=axes[0, 0])508509# Plot 2: Entropy rate vs iid entropy510# Vary the off-diagonal transition probability511off_diag = np.linspace(0.01, 0.5, 50)512entropy_rates = []513iid_entropies = []514515for p in off_diag:516P = np.array([[1-p, p], [p, 1-p]])517# Stationary is uniform for symmetric transition518pi = np.array([0.5, 0.5])519H_rate = -2 * pi[0] * (P[0,0] * np.log2(P[0,0]) + P[0,1] * np.log2(P[0,1]))520entropy_rates.append(H_rate)521iid_entropies.append(binary_entropy(0.5)) # IID entropy is 1 bit522523axes[0, 1].plot(off_diag, entropy_rates, 'b-', linewidth=2, label='Markov entropy rate')524axes[0, 1].plot(off_diag, iid_entropies, 'r--', linewidth=2, label='IID entropy')525axes[0, 1].set_xlabel('Transition probability $p$')526axes[0, 1].set_ylabel('Entropy (bits)')527axes[0, 1].set_title('Entropy Rate vs IID Entropy')528axes[0, 1].legend()529axes[0, 1].grid(True, alpha=0.3)530531# Plot 3: Compression ratio vs data length532data_lengths = [10, 20, 50, 100, 200, 500, 1000]533ratios = []534535for length in data_lengths:536# Generate text with some repetition537base = "ABCABC"538text_gen = (base * (length // len(base) + 1))[:length]539comp, _ = simple_lz_compress(text_gen)540ratio = len(text_gen) / len(comp) if len(comp) > 0 else 1541ratios.append(ratio)542543axes[1, 0].plot(data_lengths, ratios, 'bo-', linewidth=2, markersize=6)544axes[1, 0].set_xlabel('Data length (symbols)')545axes[1, 0].set_ylabel('Compression ratio')546axes[1, 0].set_title('LZ Compression vs Data Length')547axes[1, 0].grid(True, alpha=0.3)548549# Plot 4: Typical sequences550# For a binary source with p=0.3551p_source = 0.3552n_bits = 100553n_sequences = 1000554555# Generate sequences and count ones556fractions = []557for _ in range(n_sequences):558seq = np.random.choice([0, 1], size=n_bits, p=[1-p_source, p_source])559fractions.append(np.mean(seq))560561axes[1, 1].hist(fractions, bins=30, density=True, alpha=0.7, edgecolor='black')562axes[1, 1].axvline(x=p_source, color='red', linestyle='--', linewidth=2, label=f'$p = {p_source}$')563axes[1, 1].set_xlabel('Fraction of ones')564axes[1, 1].set_ylabel('Density')565axes[1, 1].set_title(f'Typical Sequences ($n = {n_bits}$)')566axes[1, 1].legend()567axes[1, 1].grid(True, alpha=0.3)568569plt.tight_layout()570save_plot('source_coding.pdf', 'Source coding: Markov entropy, compression ratio, and typical sequences.')571\end{pycode}572573\section{Results Summary}574\begin{pycode}575# Generate results table576print(r'\begin{table}[htbp]')577print(r'\centering')578print(r'\caption{Summary of Information Theory Results}')579print(r'\begin{tabular}{lll}')580print(r'\toprule')581print(r'Measure & Value & Description \\')582print(r'\midrule')583print(r'\multicolumn{3}{c}{\textit{Huffman Coding}} \\')584print(f'Source entropy & {source_entropy:.3f} bits & $H(X)$ \\\\')585print(f'Average code length & {avg_length:.3f} bits & $\\bar{{L}}$ \\\\')586print(f'Coding efficiency & {efficiency:.1f}\\% & $H(X)/\\bar{{L}}$ \\\\')587print(f'Redundancy & {redundancy:.3f} bits & $\\bar{{L}} - H(X)$ \\\\')588print(r'\midrule')589print(r'\multicolumn{3}{c}{\textit{Mutual Information}} \\')590print(f'$H(X)$ & {H_X:.3f} bits & Marginal entropy \\\\')591print(f'$H(Y)$ & {H_Y:.3f} bits & Marginal entropy \\\\')592print(f'$H(X,Y)$ & {H_XY:.3f} bits & Joint entropy \\\\')593print(f'$I(X;Y)$ & {I_XY:.3f} bits & Mutual information \\\\')594print(r'\midrule')595print(r'\multicolumn{3}{c}{\textit{Channel Capacity}} \\')596print(f'BSC ($p = {p_error}$) & {bsc_capacity(p_error):.3f} bits/use & $1 - H_b(p)$ \\\\')597print(f'BEC ($\\epsilon = 0.1$) & {bec_capacity(0.1):.3f} bits/use & $1 - \\epsilon$ \\\\')598print(f'AWGN (10 dB) & {awgn_capacity_snr(10):.3f} bits/use & $\\frac{{1}}{{2}}\\log_2(1+SNR)$ \\\\')599print(r'\bottomrule')600print(r'\end{tabular}')601print(r'\end{table}')602\end{pycode}603604\section{Statistical Summary}605\begin{itemize}606\item \textbf{Source entropy}: \py{f"{source_entropy:.3f}"} bits607\item \textbf{Maximum entropy} ($|\mathcal{X}| = $ \py{f"{len(symbols)}"}): \py{f"{np.log2(len(symbols)):.3f}"} bits608\item \textbf{Average Huffman code length}: \py{f"{avg_length:.3f}"} bits609\item \textbf{Coding efficiency}: \py{f"{efficiency:.1f}"}\%610\item \textbf{Joint entropy} $H(X,Y)$: \py{f"{H_XY:.3f}"} bits611\item \textbf{Mutual information} $I(X;Y)$: \py{f"{I_XY:.3f}"} bits612\item \textbf{Markov entropy rate}: \py{f"{entropy_rate:.3f}"} bits613\item \textbf{BSC capacity} ($p = $ \py{f"{p_error}"}): \py{f"{bsc_capacity(p_error):.3f}"} bits/use614\end{itemize}615616\section{Conclusion}617Information theory provides fundamental limits for data compression and communication. Shannon entropy measures the average information content and sets the minimum average code length. Huffman coding achieves near-optimal compression for known probability distributions. Mutual information quantifies the shared information between random variables, critical for channel coding. The channel coding theorem guarantees error-free communication at rates below capacity, achieved by modern codes like turbo codes and LDPC codes. These principles underpin data compression (ZIP, JPEG), error-correcting codes (WiFi, 5G), and cryptography.618619\end{document}620621622