CoCalc -- information

GitHub Repository: Ok-landscape/computational-pipeline
Path: blob/main/latex-templates/templates/other/information_theory.tex
⁵¹ views
unlisted
1
\documentclass[a4paper, 11pt]{article}
2
\usepackage[utf8]{inputenc}
3
\usepackage[T1]{fontenc}
4
\usepackage{amsmath, amssymb}
5
\usepackage{graphicx}
6
\usepackage{siunitx}
7
\usepackage{booktabs}
8
\usepackage[makestderr]{pythontex}
9

10
\title{Information Theory: Entropy, Coding, and Channel Capacity}
11
\author{Computational Science Templates}
12
\date{\today}
13

14
\begin{document}
15
\maketitle
16

17
\section{Introduction}
18
Information theory quantifies information content, compression limits, and communication channel capacity. Founded by Claude Shannon in 1948, it provides the mathematical framework for data compression (source coding) and error-correcting codes (channel coding). This analysis computes entropy for various probability distributions, demonstrates Huffman and arithmetic coding efficiency, explores mutual information, and examines channel capacity.
19

20
\section{Mathematical Framework}
21

22
\subsection{Shannon Entropy}
23
The entropy of a discrete random variable $X$ with probability mass function $p(x)$:
24
\begin{equation}
25
H(X) = -\sum_{x \in \mathcal{X}} p(x) \log_2 p(x)
26
\end{equation}
27

28
\subsection{Conditional Entropy and Mutual Information}
29
Conditional entropy and mutual information:
30
\begin{align}
31
H(X|Y) &= -\sum_{x,y} p(x,y) \log_2 p(x|y) \\
32
I(X;Y) &= H(X) - H(X|Y) = H(X) + H(Y) - H(X,Y)
33
\end{align}
34

35
\subsection{Channel Capacity}
36
For a discrete memoryless channel:
37
\begin{equation}
38
C = \max_{p(x)} I(X;Y)
39
\end{equation}
40

41
For the binary symmetric channel with crossover probability $p$:
42
\begin{equation}
43
C = 1 - H_b(p) = 1 + p\log_2 p + (1-p)\log_2(1-p)
44
\end{equation}
45

46
\section{Environment Setup}
47
\begin{pycode}
48
import numpy as np
49
import matplotlib.pyplot as plt
50
from collections import Counter
51
import heapq
52
plt.rc('text', usetex=True)
53
plt.rc('font', family='serif')
54

55
np.random.seed(42)
56

57
def save_plot(filename, caption=""):
58
    plt.savefig(filename, bbox_inches='tight', dpi=150)
59
    print(r'\begin{figure}[htbp]')
60
    print(r'\centering')
61
    print(r'\includegraphics[width=0.95\textwidth]{' + filename + '}')
62
    if caption:
63
        print(r'\caption{' + caption + '}')
64
    print(r'\end{figure}')
65
    plt.close()
66

67
def entropy(p):
68
    """Calculate Shannon entropy in bits."""
69
    p = np.array(p)
70
    p = p[p > 0]  # Remove zeros
71
    return -np.sum(p * np.log2(p))
72

73
def binary_entropy(p):
74
    """Binary entropy function H_b(p)."""
75
    if p <= 0 or p >= 1:
76
        return 0
77
    return -p * np.log2(p) - (1-p) * np.log2(1-p)
78
\end{pycode}
79

80
\section{Entropy and Information Content}
81
\begin{pycode}
82
# Binary entropy function
83
p_range = np.linspace(0.001, 0.999, 200)
84
H_binary = [binary_entropy(p) for p in p_range]
85

86
# Entropy of different distributions
87
distributions = {
88
    'Uniform (8)': np.ones(8) / 8,
89
    'Peaked': np.array([0.5, 0.25, 0.125, 0.0625, 0.03125, 0.015625, 0.0078125, 0.0078125]),
90
    'Bimodal': np.array([0.3, 0.05, 0.05, 0.1, 0.1, 0.05, 0.05, 0.3]),
91
    'Skewed': np.array([0.6, 0.2, 0.1, 0.05, 0.03, 0.01, 0.005, 0.005])
92
}
93

94
entropies = {name: entropy(p) for name, p in distributions.items()}
95

96
fig, axes = plt.subplots(2, 2, figsize=(10, 8))
97

98
# Plot 1: Binary entropy function
99
axes[0, 0].plot(p_range, H_binary, 'b-', linewidth=2)
100
axes[0, 0].axvline(x=0.5, color='gray', linestyle='--', alpha=0.7)
101
axes[0, 0].axhline(y=1, color='gray', linestyle='--', alpha=0.7)
102
axes[0, 0].set_xlabel('Probability $p$')
103
axes[0, 0].set_ylabel('Entropy $H_b(p)$ (bits)')
104
axes[0, 0].set_title('Binary Entropy Function')
105
axes[0, 0].grid(True, alpha=0.3)
106

107
# Plot 2: Different distributions
108
x = np.arange(8)
109
width = 0.2
110
colors = plt.cm.viridis(np.linspace(0.2, 0.8, 4))
111

112
for i, (name, p) in enumerate(distributions.items()):
113
    axes[0, 1].bar(x + i * width, p, width, label=f'{name}: H={entropies[name]:.2f}',
114
                   color=colors[i], alpha=0.8)
115

116
axes[0, 1].set_xlabel('Symbol')
117
axes[0, 1].set_ylabel('Probability')
118
axes[0, 1].set_title('Probability Distributions and Entropy')
119
axes[0, 1].legend(fontsize=7)
120
axes[0, 1].grid(True, alpha=0.3, axis='y')
121

122
# Plot 3: Entropy vs alphabet size (uniform distribution)
123
alphabet_sizes = np.arange(2, 65)
124
uniform_entropies = np.log2(alphabet_sizes)
125

126
axes[1, 0].plot(alphabet_sizes, uniform_entropies, 'b-', linewidth=2)
127
axes[1, 0].set_xlabel('Alphabet size $|\\mathcal{X}|$')
128
axes[1, 0].set_ylabel('Entropy $H(X)$ (bits)')
129
axes[1, 0].set_title('Maximum Entropy (Uniform Distribution)')
130
axes[1, 0].grid(True, alpha=0.3)
131

132
# Plot 4: Information content of individual symbols
133
probs = distributions['Peaked']
134
information = -np.log2(probs)
135

136
axes[1, 1].bar(x, information, color='steelblue', alpha=0.7)
137
axes[1, 1].set_xlabel('Symbol')
138
axes[1, 1].set_ylabel('Information content (bits)')
139
axes[1, 1].set_title('Self-Information: $-\\log_2 p(x)$')
140
axes[1, 1].grid(True, alpha=0.3, axis='y')
141

142
plt.tight_layout()
143
save_plot('entropy_basics.pdf', 'Entropy fundamentals: binary entropy, distributions, and information content.')
144
\end{pycode}
145

146
\section{Huffman Coding}
147
\begin{pycode}
148
# Huffman coding implementation
149
class HuffmanNode:
150
    def __init__(self, symbol=None, freq=0, left=None, right=None):
151
        self.symbol = symbol
152
        self.freq = freq
153
        self.left = left
154
        self.right = right
155

156
    def __lt__(self, other):
157
        return self.freq < other.freq
158

159
def huffman_codes(symbols, probs):
160
    """Build Huffman codes for given symbols and probabilities."""
161
    # Create leaf nodes
162
    heap = [HuffmanNode(s, p) for s, p in zip(symbols, probs)]
163
    heapq.heapify(heap)
164

165
    # Build tree
166
    while len(heap) > 1:
167
        left = heapq.heappop(heap)
168
        right = heapq.heappop(heap)
169
        merged = HuffmanNode(freq=left.freq + right.freq, left=left, right=right)
170
        heapq.heappush(heap, merged)
171

172
    # Generate codes
173
    root = heap[0]
174
    codes = {}
175

176
    def generate_codes(node, code=""):
177
        if node.symbol is not None:
178
            codes[node.symbol] = code if code else "0"
179
        else:
180
            generate_codes(node.left, code + "0")
181
            generate_codes(node.right, code + "1")
182

183
    generate_codes(root)
184
    return codes
185

186
# Example source
187
symbols = ['A', 'B', 'C', 'D', 'E', 'F']
188
probs = np.array([0.35, 0.25, 0.18, 0.12, 0.07, 0.03])
189

190
source_entropy = entropy(probs)
191
codes = huffman_codes(symbols, probs)
192
code_lengths = np.array([len(codes[s]) for s in symbols])
193
avg_length = np.sum(probs * code_lengths)
194
efficiency = source_entropy / avg_length * 100
195
redundancy = avg_length - source_entropy
196

197
fig, axes = plt.subplots(2, 2, figsize=(10, 8))
198

199
# Plot 1: Source distribution
200
x_pos = np.arange(len(symbols))
201
axes[0, 0].bar(x_pos, probs, color='steelblue', alpha=0.7)
202
axes[0, 0].set_xticks(x_pos)
203
axes[0, 0].set_xticklabels(symbols)
204
axes[0, 0].set_xlabel('Symbol')
205
axes[0, 0].set_ylabel('Probability')
206
axes[0, 0].set_title(f'Source Distribution (H = {source_entropy:.3f} bits)')
207
axes[0, 0].grid(True, alpha=0.3, axis='y')
208

209
# Plot 2: Code lengths vs optimal
210
optimal_lengths = -np.log2(probs)
211
bar_width = 0.35
212

213
axes[0, 1].bar(x_pos - bar_width/2, code_lengths, bar_width, label='Huffman', color='steelblue', alpha=0.7)
214
axes[0, 1].bar(x_pos + bar_width/2, optimal_lengths, bar_width, label='Optimal $-\\log_2 p$', color='coral', alpha=0.7)
215
axes[0, 1].set_xticks(x_pos)
216
axes[0, 1].set_xticklabels(symbols)
217
axes[0, 1].set_xlabel('Symbol')
218
axes[0, 1].set_ylabel('Code length (bits)')
219
axes[0, 1].set_title('Huffman vs Optimal Code Length')
220
axes[0, 1].legend()
221
axes[0, 1].grid(True, alpha=0.3, axis='y')
222

223
# Plot 3: Coding efficiency metrics
224
metrics = ['Entropy', 'Avg Length', 'Redundancy']
225
values = [source_entropy, avg_length, redundancy]
226
colors_bar = ['blue', 'green', 'red']
227

228
bars = axes[1, 0].bar(metrics, values, color=colors_bar, alpha=0.7)
229
axes[1, 0].set_ylabel('Bits')
230
axes[1, 0].set_title(f'Coding Performance (Efficiency: {efficiency:.1f}\\%)')
231
axes[1, 0].grid(True, alpha=0.3, axis='y')
232

233
# Add value labels
234
for bar, val in zip(bars, values):
235
    axes[1, 0].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.05,
236
                    f'{val:.3f}', ha='center', fontsize=9)
237

238
# Plot 4: Compression ratio for different sources
239
n_sources = 10
240
compression_ratios = []
241
entropies_random = []
242

243
for _ in range(n_sources):
244
    # Generate random probability distribution
245
    p_random = np.random.dirichlet(np.ones(6))
246
    H_random = entropy(p_random)
247
    entropies_random.append(H_random)
248

249
    codes_random = huffman_codes(symbols, p_random)
250
    lengths_random = np.array([len(codes_random[s]) for s in symbols])
251
    avg_len_random = np.sum(p_random * lengths_random)
252

253
    # Fixed-length code would need ceil(log2(6)) = 3 bits
254
    compression_ratios.append(3 / avg_len_random)
255

256
axes[1, 1].scatter(entropies_random, compression_ratios, alpha=0.7)
257
axes[1, 1].set_xlabel('Source Entropy (bits)')
258
axes[1, 1].set_ylabel('Compression Ratio')
259
axes[1, 1].set_title('Huffman Compression vs Source Entropy')
260
axes[1, 1].grid(True, alpha=0.3)
261

262
plt.tight_layout()
263
save_plot('huffman_coding.pdf', 'Huffman coding: source distribution, code lengths, and compression performance.')
264

265
# Print Huffman code table
266
\end{pycode}
267

268
\section{Mutual Information and Joint Entropy}
269
\begin{pycode}
270
# Joint distribution example
271
# X and Y are correlated binary random variables
272
p_xy = np.array([[0.4, 0.1],   # X=0, Y=0,1
273
                 [0.1, 0.4]])   # X=1, Y=0,1
274

275
# Marginal distributions
276
p_x = np.sum(p_xy, axis=1)
277
p_y = np.sum(p_xy, axis=0)
278

279
# Entropies
280
H_X = entropy(p_x)
281
H_Y = entropy(p_y)
282
H_XY = entropy(p_xy.flatten())
283
H_X_given_Y = H_XY - H_Y
284
H_Y_given_X = H_XY - H_X
285
I_XY = H_X - H_X_given_Y
286

287
fig, axes = plt.subplots(2, 2, figsize=(10, 8))
288

289
# Plot 1: Joint distribution heatmap
290
im = axes[0, 0].imshow(p_xy, cmap='Blues', aspect='equal')
291
axes[0, 0].set_xticks([0, 1])
292
axes[0, 0].set_yticks([0, 1])
293
axes[0, 0].set_xlabel('Y')
294
axes[0, 0].set_ylabel('X')
295
axes[0, 0].set_title(f'Joint Distribution $p(x,y)$')
296
for i in range(2):
297
    for j in range(2):
298
        axes[0, 0].text(j, i, f'{p_xy[i,j]:.2f}', ha='center', va='center', fontsize=12)
299
plt.colorbar(im, ax=axes[0, 0])
300

301
# Plot 2: Information measures as Venn diagram (simplified bar chart)
302
measures = ['H(X)', 'H(Y)', 'H(X,Y)', 'H(X|Y)', 'H(Y|X)', 'I(X;Y)']
303
values_info = [H_X, H_Y, H_XY, H_X_given_Y, H_Y_given_X, I_XY]
304
colors_info = ['blue', 'green', 'purple', 'lightblue', 'lightgreen', 'red']
305

306
axes[0, 1].bar(measures, values_info, color=colors_info, alpha=0.7)
307
axes[0, 1].set_ylabel('Bits')
308
axes[0, 1].set_title('Information Measures')
309
axes[0, 1].tick_params(axis='x', rotation=45)
310
axes[0, 1].grid(True, alpha=0.3, axis='y')
311

312
# Plot 3: Mutual information vs correlation
313
correlations = np.linspace(0, 0.49, 50)
314
mi_values = []
315

316
for c in correlations:
317
    # Create joint distribution with correlation c
318
    p = 0.5 + c  # Diagonal probability
319
    q = 0.5 - c  # Off-diagonal probability
320
    p_joint = np.array([[p/2, q/2], [q/2, p/2]])
321

322
    # Marginals are uniform
323
    p_marg = np.array([0.5, 0.5])
324
    H_marg = entropy(p_marg)
325
    H_joint = entropy(p_joint.flatten())
326
    mi = 2 * H_marg - H_joint
327
    mi_values.append(mi)
328

329
axes[1, 0].plot(correlations * 2, mi_values, 'b-', linewidth=2)
330
axes[1, 0].set_xlabel('Correlation strength')
331
axes[1, 0].set_ylabel('Mutual Information $I(X;Y)$ (bits)')
332
axes[1, 0].set_title('MI vs Correlation')
333
axes[1, 0].grid(True, alpha=0.3)
334

335
# Plot 4: Conditional entropy visualization
336
# H(X|Y=y) for different y
337
y_values = [0, 1]
338
H_X_given_y = []
339

340
for y in y_values:
341
    p_x_given_y = p_xy[:, y] / p_y[y]
342
    H_X_given_y.append(entropy(p_x_given_y))
343

344
axes[1, 1].bar(['Y=0', 'Y=1', 'Average'], H_X_given_y + [H_X_given_Y],
345
               color=['steelblue', 'coral', 'green'], alpha=0.7)
346
axes[1, 1].set_ylabel('$H(X|Y=y)$ (bits)')
347
axes[1, 1].set_title('Conditional Entropy')
348
axes[1, 1].grid(True, alpha=0.3, axis='y')
349

350
plt.tight_layout()
351
save_plot('mutual_information.pdf', 'Mutual information: joint distribution, information measures, and correlation.')
352
\end{pycode}
353

354
\section{Channel Capacity}
355
\begin{pycode}
356
# Binary Symmetric Channel (BSC)
357
def bsc_capacity(p):
358
    """Capacity of binary symmetric channel."""
359
    return 1 - binary_entropy(p)
360

361
# Binary Erasure Channel (BEC)
362
def bec_capacity(epsilon):
363
    """Capacity of binary erasure channel."""
364
    return 1 - epsilon
365

366
# AWGN channel capacity
367
def awgn_capacity_snr(snr_db):
368
    """Capacity of AWGN channel in bits per channel use."""
369
    snr = 10 ** (snr_db / 10)
370
    return 0.5 * np.log2(1 + snr)
371

372
fig, axes = plt.subplots(2, 2, figsize=(10, 8))
373

374
# Plot 1: BSC and BEC capacity
375
p_range = np.linspace(0, 0.5, 100)
376
C_bsc = [bsc_capacity(p) for p in p_range]
377
C_bec = [bec_capacity(p) for p in p_range]
378

379
axes[0, 0].plot(p_range, C_bsc, 'b-', linewidth=2, label='BSC')
380
axes[0, 0].plot(p_range, C_bec, 'r--', linewidth=2, label='BEC')
381
axes[0, 0].set_xlabel('Error/Erasure probability')
382
axes[0, 0].set_ylabel('Channel capacity (bits/use)')
383
axes[0, 0].set_title('Binary Channel Capacities')
384
axes[0, 0].legend()
385
axes[0, 0].grid(True, alpha=0.3)
386

387
# Plot 2: AWGN channel capacity
388
snr_db = np.linspace(-10, 30, 100)
389
C_awgn = [awgn_capacity_snr(s) for s in snr_db]
390

391
axes[0, 1].plot(snr_db, C_awgn, 'b-', linewidth=2)
392
axes[0, 1].set_xlabel('SNR (dB)')
393
axes[0, 1].set_ylabel('Capacity (bits/channel use)')
394
axes[0, 1].set_title('AWGN Channel Capacity')
395
axes[0, 1].grid(True, alpha=0.3)
396

397
# Plot 3: BSC transition diagram (simplified visualization)
398
p_error = 0.1
399

400
# Show error probability
401
x_diag = [0, 1]
402
y_diag = [0, 1]
403
axes[1, 0].plot([0, 0], [0, 1], 'b-', linewidth=3, alpha=0.5)
404
axes[1, 0].plot([1, 1], [0, 1], 'b-', linewidth=3, alpha=0.5)
405
axes[1, 0].plot([0, 1], [0, 0], 'b-', linewidth=2 * (1-p_error), alpha=0.7)
406
axes[1, 0].plot([0, 1], [1, 1], 'b-', linewidth=2 * (1-p_error), alpha=0.7)
407
axes[1, 0].plot([0, 1], [0, 1], 'r-', linewidth=2 * p_error, alpha=0.7)
408
axes[1, 0].plot([0, 1], [1, 0], 'r-', linewidth=2 * p_error, alpha=0.7)
409

410
axes[1, 0].scatter([0, 0, 1, 1], [0, 1, 0, 1], s=200, c='steelblue', zorder=5)
411
axes[1, 0].text(-0.15, 0, 'X=0', fontsize=10)
412
axes[1, 0].text(-0.15, 1, 'X=1', fontsize=10)
413
axes[1, 0].text(1.05, 0, 'Y=0', fontsize=10)
414
axes[1, 0].text(1.05, 1, 'Y=1', fontsize=10)
415
axes[1, 0].set_xlim([-0.3, 1.3])
416
axes[1, 0].set_ylim([-0.2, 1.2])
417
axes[1, 0].set_title(f'BSC ($p = {p_error}$, $C = {bsc_capacity(p_error):.3f}$ bits)')
418
axes[1, 0].axis('off')
419

420
# Plot 4: Achievable rates
421
# Shannon's coding theorem: rates below capacity are achievable
422
p_test = 0.1
423
C_test = bsc_capacity(p_test)
424

425
rates = np.linspace(0, 1, 100)
426
# Probability of error for random coding (simplified)
427
Pe_approx = np.exp(-len(rates) * (C_test - rates))
428
Pe_approx[rates > C_test] = 1
429

430
axes[1, 1].fill_between(rates, 0, 1, where=rates <= C_test, alpha=0.3, color='green', label='Achievable')
431
axes[1, 1].fill_between(rates, 0, 1, where=rates > C_test, alpha=0.3, color='red', label='Not achievable')
432
axes[1, 1].axvline(x=C_test, color='black', linestyle='--', linewidth=2, label=f'$C = {C_test:.3f}$')
433
axes[1, 1].set_xlabel('Rate $R$ (bits/use)')
434
axes[1, 1].set_ylabel('Region')
435
axes[1, 1].set_title("Shannon's Coding Theorem")
436
axes[1, 1].legend(loc='upper right')
437
axes[1, 1].set_xlim([0, 1])
438
axes[1, 1].set_ylim([0, 1])
439

440
plt.tight_layout()
441
save_plot('channel_capacity.pdf', 'Channel capacity: BSC, BEC, AWGN, and achievable rates.')
442
\end{pycode}
443

444
\section{Source Coding and Compression}
445
\begin{pycode}
446
# Lempel-Ziv-like compression demonstration
447
def simple_lz_compress(text):
448
    """Simple LZ-style compression."""
449
    dictionary = {}
450
    result = []
451
    current = ""
452

453
    for char in text:
454
        extended = current + char
455
        if extended in dictionary:
456
            current = extended
457
        else:
458
            if current:
459
                result.append(dictionary.get(current, current))
460
            dictionary[extended] = len(dictionary)
461
            current = char
462

463
    if current:
464
        result.append(dictionary.get(current, current))
465

466
    return result, dictionary
467

468
# Demonstration text
469
text = "ABABABABABCABCABCABCDEFABCDEF"
470
compressed, dictionary = simple_lz_compress(text)
471

472
# Calculate compression metrics
473
original_bits = len(text) * 8  # ASCII
474
compressed_symbols = len(compressed)
475
bits_per_symbol = np.ceil(np.log2(max(len(dictionary), 1) + 26))
476
compressed_bits = compressed_symbols * bits_per_symbol
477
compression_ratio = original_bits / compressed_bits
478

479
fig, axes = plt.subplots(2, 2, figsize=(10, 8))
480

481
# Plot 1: Entropy rate for Markov source
482
# First-order Markov chain
483
P_transition = np.array([[0.7, 0.3],
484
                         [0.4, 0.6]])
485

486
# Stationary distribution
487
eigenvalues, eigenvectors = np.linalg.eig(P_transition.T)
488
stationary = eigenvectors[:, np.argmax(eigenvalues)]
489
stationary = np.real(stationary / np.sum(stationary))
490

491
# Entropy rate: H = sum_i pi_i * sum_j -p_ij * log2(p_ij)
492
entropy_rate = 0
493
for i in range(2):
494
    for j in range(2):
495
        if P_transition[i, j] > 0:
496
            entropy_rate -= stationary[i] * P_transition[i, j] * np.log2(P_transition[i, j])
497

498
# Show transition matrix
499
im = axes[0, 0].imshow(P_transition, cmap='Blues', vmin=0, vmax=1)
500
axes[0, 0].set_xticks([0, 1])
501
axes[0, 0].set_yticks([0, 1])
502
axes[0, 0].set_xlabel('Next state')
503
axes[0, 0].set_ylabel('Current state')
504
axes[0, 0].set_title(f'Markov Source (Entropy rate: {entropy_rate:.3f} bits)')
505
for i in range(2):
506
    for j in range(2):
507
        axes[0, 0].text(j, i, f'{P_transition[i,j]:.1f}', ha='center', va='center', fontsize=12)
508
plt.colorbar(im, ax=axes[0, 0])
509

510
# Plot 2: Entropy rate vs iid entropy
511
# Vary the off-diagonal transition probability
512
off_diag = np.linspace(0.01, 0.5, 50)
513
entropy_rates = []
514
iid_entropies = []
515

516
for p in off_diag:
517
    P = np.array([[1-p, p], [p, 1-p]])
518
    # Stationary is uniform for symmetric transition
519
    pi = np.array([0.5, 0.5])
520
    H_rate = -2 * pi[0] * (P[0,0] * np.log2(P[0,0]) + P[0,1] * np.log2(P[0,1]))
521
    entropy_rates.append(H_rate)
522
    iid_entropies.append(binary_entropy(0.5))  # IID entropy is 1 bit
523

524
axes[0, 1].plot(off_diag, entropy_rates, 'b-', linewidth=2, label='Markov entropy rate')
525
axes[0, 1].plot(off_diag, iid_entropies, 'r--', linewidth=2, label='IID entropy')
526
axes[0, 1].set_xlabel('Transition probability $p$')
527
axes[0, 1].set_ylabel('Entropy (bits)')
528
axes[0, 1].set_title('Entropy Rate vs IID Entropy')
529
axes[0, 1].legend()
530
axes[0, 1].grid(True, alpha=0.3)
531

532
# Plot 3: Compression ratio vs data length
533
data_lengths = [10, 20, 50, 100, 200, 500, 1000]
534
ratios = []
535

536
for length in data_lengths:
537
    # Generate text with some repetition
538
    base = "ABCABC"
539
    text_gen = (base * (length // len(base) + 1))[:length]
540
    comp, _ = simple_lz_compress(text_gen)
541
    ratio = len(text_gen) / len(comp) if len(comp) > 0 else 1
542
    ratios.append(ratio)
543

544
axes[1, 0].plot(data_lengths, ratios, 'bo-', linewidth=2, markersize=6)
545
axes[1, 0].set_xlabel('Data length (symbols)')
546
axes[1, 0].set_ylabel('Compression ratio')
547
axes[1, 0].set_title('LZ Compression vs Data Length')
548
axes[1, 0].grid(True, alpha=0.3)
549

550
# Plot 4: Typical sequences
551
# For a binary source with p=0.3
552
p_source = 0.3
553
n_bits = 100
554
n_sequences = 1000
555

556
# Generate sequences and count ones
557
fractions = []
558
for _ in range(n_sequences):
559
    seq = np.random.choice([0, 1], size=n_bits, p=[1-p_source, p_source])
560
    fractions.append(np.mean(seq))
561

562
axes[1, 1].hist(fractions, bins=30, density=True, alpha=0.7, edgecolor='black')
563
axes[1, 1].axvline(x=p_source, color='red', linestyle='--', linewidth=2, label=f'$p = {p_source}$')
564
axes[1, 1].set_xlabel('Fraction of ones')
565
axes[1, 1].set_ylabel('Density')
566
axes[1, 1].set_title(f'Typical Sequences ($n = {n_bits}$)')
567
axes[1, 1].legend()
568
axes[1, 1].grid(True, alpha=0.3)
569

570
plt.tight_layout()
571
save_plot('source_coding.pdf', 'Source coding: Markov entropy, compression ratio, and typical sequences.')
572
\end{pycode}
573

574
\section{Results Summary}
575
\begin{pycode}
576
# Generate results table
577
print(r'\begin{table}[htbp]')
578
print(r'\centering')
579
print(r'\caption{Summary of Information Theory Results}')
580
print(r'\begin{tabular}{lll}')
581
print(r'\toprule')
582
print(r'Measure & Value & Description \\')
583
print(r'\midrule')
584
print(r'\multicolumn{3}{c}{\textit{Huffman Coding}} \\')
585
print(f'Source entropy & {source_entropy:.3f} bits & $H(X)$ \\\\')
586
print(f'Average code length & {avg_length:.3f} bits & $\\bar{{L}}$ \\\\')
587
print(f'Coding efficiency & {efficiency:.1f}\\% & $H(X)/\\bar{{L}}$ \\\\')
588
print(f'Redundancy & {redundancy:.3f} bits & $\\bar{{L}} - H(X)$ \\\\')
589
print(r'\midrule')
590
print(r'\multicolumn{3}{c}{\textit{Mutual Information}} \\')
591
print(f'$H(X)$ & {H_X:.3f} bits & Marginal entropy \\\\')
592
print(f'$H(Y)$ & {H_Y:.3f} bits & Marginal entropy \\\\')
593
print(f'$H(X,Y)$ & {H_XY:.3f} bits & Joint entropy \\\\')
594
print(f'$I(X;Y)$ & {I_XY:.3f} bits & Mutual information \\\\')
595
print(r'\midrule')
596
print(r'\multicolumn{3}{c}{\textit{Channel Capacity}} \\')
597
print(f'BSC ($p = {p_error}$) & {bsc_capacity(p_error):.3f} bits/use & $1 - H_b(p)$ \\\\')
598
print(f'BEC ($\\epsilon = 0.1$) & {bec_capacity(0.1):.3f} bits/use & $1 - \\epsilon$ \\\\')
599
print(f'AWGN (10 dB) & {awgn_capacity_snr(10):.3f} bits/use & $\\frac{{1}}{{2}}\\log_2(1+SNR)$ \\\\')
600
print(r'\bottomrule')
601
print(r'\end{tabular}')
602
print(r'\end{table}')
603
\end{pycode}
604

605
\section{Statistical Summary}
606
\begin{itemize}
607
    \item \textbf{Source entropy}: \py{f"{source_entropy:.3f}"} bits
608
    \item \textbf{Maximum entropy} ($|\mathcal{X}| = $ \py{f"{len(symbols)}"}): \py{f"{np.log2(len(symbols)):.3f}"} bits
609
    \item \textbf{Average Huffman code length}: \py{f"{avg_length:.3f}"} bits
610
    \item \textbf{Coding efficiency}: \py{f"{efficiency:.1f}"}\%
611
    \item \textbf{Joint entropy} $H(X,Y)$: \py{f"{H_XY:.3f}"} bits
612
    \item \textbf{Mutual information} $I(X;Y)$: \py{f"{I_XY:.3f}"} bits
613
    \item \textbf{Markov entropy rate}: \py{f"{entropy_rate:.3f}"} bits
614
    \item \textbf{BSC capacity} ($p = $ \py{f"{p_error}"}): \py{f"{bsc_capacity(p_error):.3f}"} bits/use
615
\end{itemize}
616

617
\section{Conclusion}
618
Information theory provides fundamental limits for data compression and communication. Shannon entropy measures the average information content and sets the minimum average code length. Huffman coding achieves near-optimal compression for known probability distributions. Mutual information quantifies the shared information between random variables, critical for channel coding. The channel coding theorem guarantees error-free communication at rates below capacity, achieved by modern codes like turbo codes and LDPC codes. These principles underpin data compression (ZIP, JPEG), error-correcting codes (WiFi, 5G), and cryptography.
619

620
\end{document}
621

622
Product

Resources

Company