CoCalc -- network_analysis.tex

GitHub Repository: Ok-landscape/computational-pipeline
Path: blob/main/latex-templates/templates/other/network_analysis.tex
⁵¹ views
unlisted
1
\documentclass[a4paper, 11pt]{article}
2
\usepackage[utf8]{inputenc}
3
\usepackage[T1]{fontenc}
4
\usepackage{amsmath, amssymb}
5
\usepackage{graphicx}
6
\usepackage{siunitx}
7
\usepackage{booktabs}
8
\usepackage{multirow}
9
\usepackage{float}
10
\usepackage[makestderr]{pythontex}
11

12
\title{Network Science: Graph Metrics and Community Detection}
13
\author{Computational Science Templates}
14
\date{\today}
15

16
\begin{document}
17
\maketitle
18

19
\begin{abstract}
20
Network science provides tools for analyzing complex systems through graph theory. This document demonstrates computational methods for calculating centrality measures, detecting community structure, analyzing random graph models, and characterizing small-world and scale-free properties. Using PythonTeX, we implement algorithms for betweenness centrality, modularity optimization, Erdos-Renyi and Barabasi-Albert models, and compute characteristic path lengths and clustering coefficients.
21
\end{abstract}
22

23
\section{Introduction}
24
Network analysis characterizes complex systems through graph metrics. Networks appear throughout science: social networks, biological networks (protein interactions, neural connections), infrastructure networks (power grids, transportation), and information networks (World Wide Web, citation graphs). This analysis computes centrality measures, identifies community structure, and compares random network models.
25

26
\section{Mathematical Framework}
27

28
\subsection{Centrality Measures}
29
Degree centrality measures local connectivity:
30
\begin{equation}
31
C_D(v) = \frac{\deg(v)}{n-1}
32
\end{equation}
33

34
Betweenness centrality quantifies importance in shortest paths:
35
\begin{equation}
36
C_B(v) = \sum_{s \neq v \neq t} \frac{\sigma_{st}(v)}{\sigma_{st}}
37
\end{equation}
38
where $\sigma_{st}$ is the number of shortest paths from $s$ to $t$, and $\sigma_{st}(v)$ passes through $v$.
39

40
Closeness centrality measures average distance to all nodes:
41
\begin{equation}
42
C_C(v) = \frac{n-1}{\sum_{u \neq v} d(v, u)}
43
\end{equation}
44

45
Eigenvector centrality assigns importance based on neighbor importance:
46
\begin{equation}
47
x_v = \frac{1}{\lambda} \sum_{u \in N(v)} x_u
48
\end{equation}
49

50
\subsection{Clustering and Transitivity}
51
Local clustering coefficient:
52
\begin{equation}
53
C_i = \frac{2|e_{jk} : v_j, v_k \in N_i, e_{jk} \in E|}{k_i(k_i-1)}
54
\end{equation}
55

56
Global clustering coefficient (transitivity):
57
\begin{equation}
58
C = \frac{3 \times \text{number of triangles}}{\text{number of connected triples}}
59
\end{equation}
60

61
\subsection{Modularity}
62
Modularity measures community quality:
63
\begin{equation}
64
Q = \frac{1}{2m} \sum_{ij} \left[ A_{ij} - \frac{k_i k_j}{2m} \right] \delta(c_i, c_j)
65
\end{equation}
66
where $A_{ij}$ is the adjacency matrix, $k_i$ is degree, and $\delta(c_i, c_j) = 1$ if nodes $i$ and $j$ are in the same community.
67

68
\section{Environment Setup}
69

70
\begin{pycode}
71
import numpy as np
72
import matplotlib.pyplot as plt
73
from scipy import sparse
74
from collections import deque
75
import warnings
76
warnings.filterwarnings('ignore')
77

78
plt.rc('text', usetex=True)
79
plt.rc('font', family='serif')
80

81
np.random.seed(42)
82

83
def save_plot(filename, caption=""):
84
    plt.savefig(filename, bbox_inches='tight', dpi=150)
85
    print(r'\begin{figure}[htbp]')
86
    print(r'\centering')
87
    print(r'\includegraphics[width=0.95\textwidth]{' + filename + '}')
88
    if caption:
89
        print(r'\caption{' + caption + '}')
90
    print(r'\end{figure}')
91
    plt.close()
92
\end{pycode}
93

94
\section{Network Generation and Basic Metrics}
95

96
\begin{pycode}
97
# Generate random network (Erdos-Renyi model)
98
n = 50
99
p = 0.1
100
adj = np.zeros((n, n))
101
for i in range(n):
102
    for j in range(i+1, n):
103
        if np.random.rand() < p:
104
            adj[i, j] = adj[j, i] = 1
105

106
# Compute degrees
107
degrees = np.sum(adj, axis=1).astype(int)
108

109
# Degree centrality
110
degree_centrality = degrees / (n - 1)
111

112
# Compute shortest paths using BFS
113
def bfs_shortest_paths(adj, source):
114
    n = adj.shape[0]
115
    dist = np.full(n, np.inf)
116
    dist[source] = 0
117
    queue = deque([source])
118
    while queue:
119
        u = queue.popleft()
120
        for v in range(n):
121
            if adj[u, v] == 1 and dist[v] == np.inf:
122
                dist[v] = dist[u] + 1
123
                queue.append(v)
124
    return dist
125

126
# Compute all-pairs shortest paths
127
all_distances = np.zeros((n, n))
128
for i in range(n):
129
    all_distances[i] = bfs_shortest_paths(adj, i)
130

131
# Closeness centrality (only for reachable nodes)
132
closeness_centrality = np.zeros(n)
133
for i in range(n):
134
    reachable = all_distances[i] < np.inf
135
    n_reachable = np.sum(reachable) - 1
136
    if n_reachable > 0:
137
        closeness_centrality[i] = n_reachable / np.sum(all_distances[i][reachable])
138

139
# Local clustering coefficient
140
clustering = np.zeros(n)
141
for i in range(n):
142
    neighbors = np.where(adj[i] == 1)[0]
143
    k = len(neighbors)
144
    if k >= 2:
145
        edges = 0
146
        for j in range(len(neighbors)):
147
            for l in range(j+1, len(neighbors)):
148
                if adj[neighbors[j], neighbors[l]] == 1:
149
                    edges += 1
150
        clustering[i] = 2 * edges / (k * (k - 1))
151

152
# Network statistics
153
n_edges = int(np.sum(adj) / 2)
154
avg_degree = np.mean(degrees)
155
avg_clustering = np.mean(clustering)
156
density = 2 * n_edges / (n * (n - 1))
157

158
# Characteristic path length (average shortest path)
159
finite_distances = all_distances[all_distances < np.inf]
160
finite_distances = finite_distances[finite_distances > 0]
161
char_path_length = np.mean(finite_distances) if len(finite_distances) > 0 else np.inf
162

163
# Count triangles
164
triangles = 0
165
for i in range(n):
166
    neighbors = np.where(adj[i] == 1)[0]
167
    for j in range(len(neighbors)):
168
        for k in range(j+1, len(neighbors)):
169
            if adj[neighbors[j], neighbors[k]] == 1:
170
                triangles += 1
171
triangles = triangles // 3  # Each triangle counted 3 times
172

173
# Global clustering coefficient
174
connected_triples = sum(k*(k-1)//2 for k in degrees)
175
global_clustering = 3 * triangles / connected_triples if connected_triples > 0 else 0
176
\end{pycode}
177

178
\section{Betweenness Centrality}
179

180
\begin{pycode}
181
# Compute betweenness centrality using Brandes algorithm
182
def compute_betweenness(adj):
183
    n = adj.shape[0]
184
    betweenness = np.zeros(n)
185

186
    for s in range(n):
187
        # Single-source shortest paths
188
        S = []  # Stack of vertices in order of non-increasing distance
189
        P = [[] for _ in range(n)]  # Predecessors
190
        sigma = np.zeros(n)  # Number of shortest paths
191
        sigma[s] = 1
192
        d = np.full(n, -1)  # Distance from source
193
        d[s] = 0
194

195
        Q = deque([s])
196
        while Q:
197
            v = Q.popleft()
198
            S.append(v)
199
            for w in range(n):
200
                if adj[v, w] == 1:
201
                    if d[w] < 0:
202
                        Q.append(w)
203
                        d[w] = d[v] + 1
204
                    if d[w] == d[v] + 1:
205
                        sigma[w] += sigma[v]
206
                        P[w].append(v)
207

208
        # Accumulation
209
        delta = np.zeros(n)
210
        while S:
211
            w = S.pop()
212
            for v in P[w]:
213
                delta[v] += (sigma[v] / sigma[w]) * (1 + delta[w])
214
            if w != s:
215
                betweenness[w] += delta[w]
216

217
    # Normalize
218
    betweenness = betweenness / ((n-1) * (n-2))
219
    return betweenness
220

221
betweenness_centrality = compute_betweenness(adj)
222

223
# Visualization
224
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
225

226
# Network visualization with degree centrality
227
theta = np.linspace(0, 2*np.pi, n, endpoint=False)
228
pos = np.column_stack([np.cos(theta), np.sin(theta)])
229

230
for i in range(n):
231
    for j in range(i+1, n):
232
        if adj[i, j] == 1:
233
            axes[0, 0].plot([pos[i, 0], pos[j, 0]], [pos[i, 1], pos[j, 1]],
234
                           'gray', linewidth=0.5, alpha=0.4)
235

236
sizes = 50 + 400 * degree_centrality
237
sc = axes[0, 0].scatter(pos[:, 0], pos[:, 1], s=sizes, c=degree_centrality,
238
                        cmap='viridis', edgecolors='black', linewidth=0.5)
239
plt.colorbar(sc, ax=axes[0, 0], label='Degree Centrality')
240
axes[0, 0].set_title('Network (node size = degree)')
241
axes[0, 0].set_aspect('equal')
242
axes[0, 0].axis('off')
243

244
# Degree distribution
245
unique_degrees, degree_counts = np.unique(degrees, return_counts=True)
246
axes[0, 1].bar(unique_degrees, degree_counts, alpha=0.7, edgecolor='black', color='steelblue')
247
axes[0, 1].set_xlabel('Degree $k$')
248
axes[0, 1].set_ylabel('Frequency')
249
axes[0, 1].set_title('Degree Distribution')
250
axes[0, 1].grid(True, alpha=0.3)
251

252
# Expected Poisson distribution for ER graph
253
k_range = np.arange(0, max(degrees)+5)
254
from scipy.stats import poisson
255
expected_freq = n * poisson.pmf(k_range, avg_degree)
256
axes[0, 1].plot(k_range, expected_freq, 'r--', linewidth=2, label='Poisson')
257
axes[0, 1].legend()
258

259
# Centrality comparison
260
axes[1, 0].scatter(degree_centrality, betweenness_centrality, alpha=0.6)
261
axes[1, 0].set_xlabel('Degree Centrality')
262
axes[1, 0].set_ylabel('Betweenness Centrality')
263
axes[1, 0].set_title('Degree vs Betweenness Centrality')
264
axes[1, 0].grid(True, alpha=0.3)
265

266
# Correlation coefficient
267
corr = np.corrcoef(degree_centrality, betweenness_centrality)[0, 1]
268
axes[1, 0].text(0.05, 0.95, f'$r = {corr:.3f}$', transform=axes[1, 0].transAxes,
269
               fontsize=10, verticalalignment='top')
270

271
# Clustering vs degree
272
axes[1, 1].scatter(degrees, clustering, alpha=0.6, color='green')
273
axes[1, 1].set_xlabel('Degree $k$')
274
axes[1, 1].set_ylabel('Local Clustering $C_i$')
275
axes[1, 1].set_title('Degree vs Clustering Coefficient')
276
axes[1, 1].grid(True, alpha=0.3)
277

278
# Expected clustering for ER graph
279
axes[1, 1].axhline(y=p, color='r', linestyle='--', label=f'Expected (p={p})')
280
axes[1, 1].legend()
281

282
plt.tight_layout()
283
save_plot('network_basic_metrics.pdf', 'Basic network metrics for Erdos-Renyi random graph')
284
\end{pycode}
285

286
\section{Eigenvector Centrality and PageRank}
287

288
\begin{pycode}
289
# Eigenvector centrality using power iteration
290
def eigenvector_centrality(adj, max_iter=100, tol=1e-6):
291
    n = adj.shape[0]
292
    x = np.ones(n) / np.sqrt(n)
293

294
    for _ in range(max_iter):
295
        x_new = adj @ x
296
        norm = np.linalg.norm(x_new)
297
        if norm > 0:
298
            x_new = x_new / norm
299
        if np.linalg.norm(x_new - x) < tol:
300
            break
301
        x = x_new
302

303
    return x_new
304

305
# PageRank (random walk with damping)
306
def pagerank(adj, damping=0.85, max_iter=100, tol=1e-6):
307
    n = adj.shape[0]
308
    out_degree = np.sum(adj, axis=1)
309

310
    # Create transition matrix
311
    M = np.zeros((n, n))
312
    for i in range(n):
313
        if out_degree[i] > 0:
314
            M[:, i] = adj[:, i] / out_degree[i]
315
        else:
316
            M[:, i] = 1.0 / n  # Dangling nodes
317

318
    # Power iteration
319
    pr = np.ones(n) / n
320
    for _ in range(max_iter):
321
        pr_new = damping * M @ pr + (1 - damping) / n
322
        if np.linalg.norm(pr_new - pr) < tol:
323
            break
324
        pr = pr_new
325

326
    return pr_new
327

328
eig_centrality = eigenvector_centrality(adj)
329
pr = pagerank(adj)
330

331
# Katz centrality
332
def katz_centrality(adj, alpha=0.1, beta=1.0):
333
    n = adj.shape[0]
334
    I = np.eye(n)
335
    ones = np.ones(n)
336
    try:
337
        centrality = np.linalg.solve(I - alpha * adj.T, beta * ones)
338
    except:
339
        centrality = np.zeros(n)
340
    return centrality / np.max(centrality) if np.max(centrality) > 0 else centrality
341

342
katz_cent = katz_centrality(adj, alpha=0.05)
343

344
# Visualization
345
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
346

347
# Compare centrality measures
348
centrality_measures = {
349
    'Degree': degree_centrality,
350
    'Betweenness': betweenness_centrality,
351
    'Closeness': closeness_centrality,
352
    'Eigenvector': eig_centrality
353
}
354

355
# Heatmap of centrality correlations
356
names = list(centrality_measures.keys())
357
measures = np.array([centrality_measures[name] for name in names])
358
corr_matrix = np.corrcoef(measures)
359

360
im = axes[0, 0].imshow(corr_matrix, cmap='RdYlBu_r', vmin=-1, vmax=1)
361
axes[0, 0].set_xticks(range(len(names)))
362
axes[0, 0].set_yticks(range(len(names)))
363
axes[0, 0].set_xticklabels(names, rotation=45)
364
axes[0, 0].set_yticklabels(names)
365
for i in range(len(names)):
366
    for j in range(len(names)):
367
        axes[0, 0].text(j, i, f'{corr_matrix[i, j]:.2f}', ha='center', va='center')
368
plt.colorbar(im, ax=axes[0, 0], label='Correlation')
369
axes[0, 0].set_title('Centrality Measure Correlations')
370

371
# Network with eigenvector centrality
372
for i in range(n):
373
    for j in range(i+1, n):
374
        if adj[i, j] == 1:
375
            axes[0, 1].plot([pos[i, 0], pos[j, 0]], [pos[i, 1], pos[j, 1]],
376
                           'gray', linewidth=0.5, alpha=0.4)
377

378
sizes = 50 + 400 * (eig_centrality / max(eig_centrality))
379
sc = axes[0, 1].scatter(pos[:, 0], pos[:, 1], s=sizes, c=eig_centrality,
380
                        cmap='plasma', edgecolors='black', linewidth=0.5)
381
plt.colorbar(sc, ax=axes[0, 1], label='Eigenvector Centrality')
382
axes[0, 1].set_title('Network (size = eigenvector centrality)')
383
axes[0, 1].set_aspect('equal')
384
axes[0, 1].axis('off')
385

386
# PageRank distribution
387
sorted_pr = np.sort(pr)[::-1]
388
axes[1, 0].bar(range(n), sorted_pr, alpha=0.7, color='coral')
389
axes[1, 0].set_xlabel('Node Rank')
390
axes[1, 0].set_ylabel('PageRank')
391
axes[1, 0].set_title('PageRank Distribution (sorted)')
392
axes[1, 0].grid(True, alpha=0.3)
393

394
# Eigenvector vs PageRank
395
axes[1, 1].scatter(eig_centrality, pr, alpha=0.6, color='purple')
396
axes[1, 1].set_xlabel('Eigenvector Centrality')
397
axes[1, 1].set_ylabel('PageRank')
398
axes[1, 1].set_title('Eigenvector Centrality vs PageRank')
399
axes[1, 1].grid(True, alpha=0.3)
400
corr_pr = np.corrcoef(eig_centrality, pr)[0, 1]
401
axes[1, 1].text(0.05, 0.95, f'$r = {corr_pr:.3f}$', transform=axes[1, 1].transAxes,
402
               fontsize=10, verticalalignment='top')
403

404
plt.tight_layout()
405
save_plot('network_centrality_advanced.pdf', 'Advanced centrality measures')
406

407
# Top nodes by centrality
408
top_k = 5
409
top_degree = np.argsort(degree_centrality)[-top_k:][::-1]
410
top_between = np.argsort(betweenness_centrality)[-top_k:][::-1]
411
top_eigen = np.argsort(eig_centrality)[-top_k:][::-1]
412
\end{pycode}
413

414
\section{Community Detection}
415

416
\begin{pycode}
417
# Simple community detection using label propagation
418
def label_propagation(adj, max_iter=100):
419
    n = adj.shape[0]
420
    labels = np.arange(n)
421

422
    for _ in range(max_iter):
423
        order = np.random.permutation(n)
424
        changed = False
425
        for node in order:
426
            neighbors = np.where(adj[node] == 1)[0]
427
            if len(neighbors) > 0:
428
                neighbor_labels = labels[neighbors]
429
                unique, counts = np.unique(neighbor_labels, return_counts=True)
430
                max_count = max(counts)
431
                candidates = unique[counts == max_count]
432
                new_label = np.random.choice(candidates)
433
                if new_label != labels[node]:
434
                    labels[node] = new_label
435
                    changed = True
436
        if not changed:
437
            break
438

439
    # Relabel communities to 0, 1, 2, ...
440
    unique_labels = np.unique(labels)
441
    label_map = {old: new for new, old in enumerate(unique_labels)}
442
    return np.array([label_map[l] for l in labels])
443

444
# Compute modularity
445
def compute_modularity(adj, communities):
446
    n = adj.shape[0]
447
    m = np.sum(adj) / 2
448
    degrees = np.sum(adj, axis=1)
449

450
    Q = 0
451
    for i in range(n):
452
        for j in range(n):
453
            if communities[i] == communities[j]:
454
                Q += adj[i, j] - degrees[i] * degrees[j] / (2 * m)
455

456
    return Q / (2 * m)
457

458
# Spectral clustering for community detection
459
def spectral_clustering(adj, k=3):
460
    n = adj.shape[0]
461
    degrees = np.sum(adj, axis=1)
462
    D = np.diag(degrees)
463
    L = D - adj  # Laplacian
464

465
    # Normalized Laplacian
466
    D_inv_sqrt = np.diag(1.0 / np.sqrt(degrees + 1e-10))
467
    L_norm = D_inv_sqrt @ L @ D_inv_sqrt
468

469
    # Eigendecomposition
470
    eigenvalues, eigenvectors = np.linalg.eigh(L_norm)
471

472
    # Use first k eigenvectors (after 0)
473
    features = eigenvectors[:, 1:k+1]
474

475
    # K-means clustering on eigenvector features
476
    from scipy.cluster.vq import kmeans2
477
    _, labels = kmeans2(features, k, minit='++')
478

479
    return labels, eigenvalues
480

481
# Detect communities
482
communities_lp = label_propagation(adj)
483
n_communities_lp = len(np.unique(communities_lp))
484

485
# Spectral clustering
486
k_communities = 4
487
communities_spec, laplacian_eigenvalues = spectral_clustering(adj, k=k_communities)
488

489
# Compute modularities
490
mod_lp = compute_modularity(adj, communities_lp)
491
mod_spec = compute_modularity(adj, communities_spec)
492

493
# Visualization
494
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
495

496
# Label propagation communities
497
colors_lp = plt.cm.Set1(communities_lp / (n_communities_lp - 1 + 0.01))
498
for i in range(n):
499
    for j in range(i+1, n):
500
        if adj[i, j] == 1:
501
            axes[0, 0].plot([pos[i, 0], pos[j, 0]], [pos[i, 1], pos[j, 1]],
502
                           'gray', linewidth=0.5, alpha=0.3)
503

504
axes[0, 0].scatter(pos[:, 0], pos[:, 1], s=100, c=communities_lp,
505
                   cmap='Set1', edgecolors='black', linewidth=0.5)
506
axes[0, 0].set_title(f'Label Propagation ({n_communities_lp} communities, Q={mod_lp:.3f})')
507
axes[0, 0].set_aspect('equal')
508
axes[0, 0].axis('off')
509

510
# Spectral clustering communities
511
for i in range(n):
512
    for j in range(i+1, n):
513
        if adj[i, j] == 1:
514
            axes[0, 1].plot([pos[i, 0], pos[j, 0]], [pos[i, 1], pos[j, 1]],
515
                           'gray', linewidth=0.5, alpha=0.3)
516

517
axes[0, 1].scatter(pos[:, 0], pos[:, 1], s=100, c=communities_spec,
518
                   cmap='Set2', edgecolors='black', linewidth=0.5)
519
axes[0, 1].set_title(f'Spectral Clustering ({k_communities} communities, Q={mod_spec:.3f})')
520
axes[0, 1].set_aspect('equal')
521
axes[0, 1].axis('off')
522

523
# Laplacian eigenvalues (spectral gap)
524
axes[1, 0].plot(laplacian_eigenvalues[:15], 'o-', markersize=8)
525
axes[1, 0].set_xlabel('Index')
526
axes[1, 0].set_ylabel('Eigenvalue')
527
axes[1, 0].set_title('Laplacian Eigenvalues (spectral gap)')
528
axes[1, 0].grid(True, alpha=0.3)
529

530
# Community size distribution
531
for i, (comm, name) in enumerate([(communities_lp, 'Label Prop'),
532
                                   (communities_spec, 'Spectral')]):
533
    unique, counts = np.unique(comm, return_counts=True)
534
    x_pos = np.arange(len(counts)) + i*0.4
535
    axes[1, 1].bar(x_pos, counts, width=0.35, alpha=0.7, label=name)
536

537
axes[1, 1].set_xlabel('Community')
538
axes[1, 1].set_ylabel('Size')
539
axes[1, 1].set_title('Community Size Distribution')
540
axes[1, 1].legend()
541
axes[1, 1].grid(True, alpha=0.3)
542

543
plt.tight_layout()
544
save_plot('network_communities.pdf', 'Community detection using label propagation and spectral clustering')
545
\end{pycode}
546

547
\section{Random Graph Models}
548

549
\begin{pycode}
550
# Compare Erdos-Renyi and Barabasi-Albert models
551
n_compare = 100
552

553
# Erdos-Renyi G(n, p)
554
p_er = 0.05
555
adj_er = np.zeros((n_compare, n_compare))
556
for i in range(n_compare):
557
    for j in range(i+1, n_compare):
558
        if np.random.rand() < p_er:
559
            adj_er[i, j] = adj_er[j, i] = 1
560

561
# Barabasi-Albert preferential attachment
562
def barabasi_albert(n, m):
563
    adj = np.zeros((n, n))
564
    # Start with m+1 fully connected nodes
565
    for i in range(m+1):
566
        for j in range(i+1, m+1):
567
            adj[i, j] = adj[j, i] = 1
568

569
    degrees = np.sum(adj, axis=1)
570

571
    # Add remaining nodes
572
    for new_node in range(m+1, n):
573
        # Preferential attachment
574
        probs = degrees[:new_node] / np.sum(degrees[:new_node])
575
        targets = np.random.choice(new_node, size=m, replace=False, p=probs)
576
        for target in targets:
577
            adj[new_node, target] = adj[target, new_node] = 1
578
        degrees = np.sum(adj, axis=1)
579

580
    return adj
581

582
m_ba = 2
583
adj_ba = barabasi_albert(n_compare, m_ba)
584

585
# Compute metrics for both
586
degrees_er = np.sum(adj_er, axis=1).astype(int)
587
degrees_ba = np.sum(adj_ba, axis=1).astype(int)
588

589
avg_degree_er = np.mean(degrees_er)
590
avg_degree_ba = np.mean(degrees_ba)
591

592
# Clustering coefficients
593
def compute_clustering(adj):
594
    n = adj.shape[0]
595
    degrees = np.sum(adj, axis=1).astype(int)
596
    clustering = np.zeros(n)
597
    for i in range(n):
598
        neighbors = np.where(adj[i] == 1)[0]
599
        k = len(neighbors)
600
        if k >= 2:
601
            edges = 0
602
            for j in range(len(neighbors)):
603
                for l in range(j+1, len(neighbors)):
604
                    if adj[neighbors[j], neighbors[l]] == 1:
605
                        edges += 1
606
            clustering[i] = 2 * edges / (k * (k - 1))
607
    return clustering
608

609
clustering_er = compute_clustering(adj_er)
610
clustering_ba = compute_clustering(adj_ba)
611

612
avg_clust_er = np.mean(clustering_er)
613
avg_clust_ba = np.mean(clustering_ba)
614

615
# Visualization
616
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
617

618
# Degree distributions comparison
619
max_degree = max(max(degrees_er), max(degrees_ba))
620
bins = np.arange(0, max_degree + 2) - 0.5
621

622
axes[0, 0].hist(degrees_er, bins=bins, alpha=0.5, label=f'ER (avg={avg_degree_er:.1f})',
623
                edgecolor='blue', color='blue')
624
axes[0, 0].hist(degrees_ba, bins=bins, alpha=0.5, label=f'BA (avg={avg_degree_ba:.1f})',
625
                edgecolor='red', color='red')
626
axes[0, 0].set_xlabel('Degree $k$')
627
axes[0, 0].set_ylabel('Frequency')
628
axes[0, 0].set_title('Degree Distribution Comparison')
629
axes[0, 0].legend()
630
axes[0, 0].grid(True, alpha=0.3)
631

632
# Log-log degree distribution for BA (scale-free test)
633
unique_ba, counts_ba = np.unique(degrees_ba, return_counts=True)
634
axes[0, 1].loglog(unique_ba[unique_ba > 0], counts_ba[unique_ba > 0], 'ro', markersize=8, label='BA Data')
635

636
# Power law fit
637
log_k = np.log(unique_ba[unique_ba > 1])
638
log_p = np.log(counts_ba[unique_ba > 1] / n_compare)
639
if len(log_k) > 2:
640
    coeffs = np.polyfit(log_k, log_p, 1)
641
    gamma = -coeffs[0]
642
    k_fit = np.logspace(np.log10(2), np.log10(max(degrees_ba)), 50)
643
    p_fit = np.exp(coeffs[1]) * n_compare * k_fit**(-gamma)
644
    axes[0, 1].loglog(k_fit, p_fit, 'b--', linewidth=2, label=f'Power law $\\gamma={gamma:.2f}$')
645

646
axes[0, 1].set_xlabel('Degree $k$')
647
axes[0, 1].set_ylabel('Frequency')
648
axes[0, 1].set_title('BA Degree Distribution (log-log)')
649
axes[0, 1].legend()
650
axes[0, 1].grid(True, alpha=0.3)
651

652
# Clustering coefficient vs degree
653
axes[1, 0].scatter(degrees_er, clustering_er, alpha=0.4, label='Erdos-Renyi', s=30)
654
axes[1, 0].scatter(degrees_ba, clustering_ba, alpha=0.4, label='Barabasi-Albert', s=30)
655
axes[1, 0].set_xlabel('Degree $k$')
656
axes[1, 0].set_ylabel('Local Clustering $C_i$')
657
axes[1, 0].set_title('Clustering vs Degree')
658
axes[1, 0].legend()
659
axes[1, 0].grid(True, alpha=0.3)
660

661
# Network metrics comparison
662
metrics_names = ['Nodes', 'Edges', 'Avg Degree', 'Avg Clustering']
663
metrics_er = [n_compare, int(np.sum(adj_er)/2), avg_degree_er, avg_clust_er * 10]
664
metrics_ba = [n_compare, int(np.sum(adj_ba)/2), avg_degree_ba, avg_clust_ba * 10]
665

666
x = np.arange(len(metrics_names))
667
width = 0.35
668
axes[1, 1].bar(x - width/2, metrics_er, width, label='Erdos-Renyi', alpha=0.7)
669
axes[1, 1].bar(x + width/2, metrics_ba, width, label='Barabasi-Albert', alpha=0.7)
670
axes[1, 1].set_ylabel('Value (clustering x10)')
671
axes[1, 1].set_xticks(x)
672
axes[1, 1].set_xticklabels(metrics_names)
673
axes[1, 1].set_title('Network Metrics Comparison')
674
axes[1, 1].legend()
675
axes[1, 1].grid(True, alpha=0.3)
676

677
plt.tight_layout()
678
save_plot('network_random_models.pdf', 'Comparison of Erdos-Renyi and Barabasi-Albert random graph models')
679

680
# Store for summary
681
n_edges_er = int(np.sum(adj_er) / 2)
682
n_edges_ba = int(np.sum(adj_ba) / 2)
683
\end{pycode}
684

685
\section{Small-World Networks}
686

687
\begin{pycode}
688
# Watts-Strogatz small-world model
689
def watts_strogatz(n, k, p):
690
    """Generate Watts-Strogatz small-world network."""
691
    adj = np.zeros((n, n))
692

693
    # Create ring lattice
694
    for i in range(n):
695
        for j in range(1, k//2 + 1):
696
            adj[i, (i+j) % n] = adj[(i+j) % n, i] = 1
697

698
    # Rewire edges with probability p
699
    for i in range(n):
700
        for j in range(1, k//2 + 1):
701
            if np.random.rand() < p:
702
                neighbor = (i + j) % n
703
                if adj[i, neighbor] == 1:
704
                    adj[i, neighbor] = adj[neighbor, i] = 0
705
                    # Choose new target
706
                    possible = [x for x in range(n) if x != i and adj[i, x] == 0]
707
                    if possible:
708
                        new_target = np.random.choice(possible)
709
                        adj[i, new_target] = adj[new_target, i] = 1
710

711
    return adj
712

713
# Characteristic path length
714
def compute_path_length(adj):
715
    n = adj.shape[0]
716
    total_dist = 0
717
    count = 0
718
    for i in range(n):
719
        dist = bfs_shortest_paths(adj, i)
720
        for j in range(i+1, n):
721
            if dist[j] < np.inf:
722
                total_dist += dist[j]
723
                count += 1
724
    return total_dist / count if count > 0 else np.inf
725

726
# Generate networks for different rewiring probabilities
727
n_ws = 50
728
k_ws = 6
729
p_values = [0, 0.01, 0.05, 0.1, 0.2, 0.5, 1.0]
730

731
path_lengths = []
732
clusterings = []
733

734
for p_ws in p_values:
735
    adj_ws = watts_strogatz(n_ws, k_ws, p_ws)
736
    pl = compute_path_length(adj_ws)
737
    clust = np.mean(compute_clustering(adj_ws))
738
    path_lengths.append(pl)
739
    clusterings.append(clust)
740

741
# Normalize by p=0 values
742
L0 = path_lengths[0]
743
C0 = clusterings[0]
744
L_norm = [L/L0 for L in path_lengths]
745
C_norm = [C/C0 for C in clusterings]
746

747
# Generate examples for visualization
748
adj_lattice = watts_strogatz(30, 4, 0)
749
adj_small_world = watts_strogatz(30, 4, 0.1)
750
adj_random = watts_strogatz(30, 4, 1.0)
751

752
# Visualization
753
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
754

755
# Small-world transition
756
axes[0, 0].semilogx(p_values[1:], L_norm[1:], 'o-', markersize=8, label='$L(p)/L(0)$')
757
axes[0, 0].semilogx(p_values[1:], C_norm[1:], 's-', markersize=8, label='$C(p)/C(0)$')
758
axes[0, 0].set_xlabel('Rewiring probability $p$')
759
axes[0, 0].set_ylabel('Normalized value')
760
axes[0, 0].set_title('Small-World Transition')
761
axes[0, 0].legend()
762
axes[0, 0].grid(True, alpha=0.3)
763
axes[0, 0].axvspan(0.01, 0.1, alpha=0.2, color='yellow', label='Small-world regime')
764

765
# Plot the three network types
766
for idx, (adj_plot, title) in enumerate([(adj_lattice, 'Ring Lattice (p=0)'),
767
                                          (adj_small_world, 'Small-World (p=0.1)'),
768
                                          (adj_random, 'Random (p=1)')]):
769
    if idx == 0:
770
        ax = axes[0, 1]
771
    elif idx == 1:
772
        ax = axes[1, 0]
773
    else:
774
        ax = axes[1, 1]
775

776
    n_plot = adj_plot.shape[0]
777
    theta = np.linspace(0, 2*np.pi, n_plot, endpoint=False)
778
    pos_plot = np.column_stack([np.cos(theta), np.sin(theta)])
779

780
    for i in range(n_plot):
781
        for j in range(i+1, n_plot):
782
            if adj_plot[i, j] == 1:
783
                ax.plot([pos_plot[i, 0], pos_plot[j, 0]],
784
                       [pos_plot[i, 1], pos_plot[j, 1]],
785
                       'gray', linewidth=0.5, alpha=0.5)
786

787
    ax.scatter(pos_plot[:, 0], pos_plot[:, 1], s=50, c='steelblue',
788
               edgecolors='black', linewidth=0.5)
789
    ax.set_title(title)
790
    ax.set_aspect('equal')
791
    ax.axis('off')
792

793
plt.tight_layout()
794
save_plot('network_small_world.pdf', 'Watts-Strogatz small-world network model')
795

796
# Small-world index
797
sw_index = (C_norm[3] / C_norm[-1]) / (L_norm[3] / L_norm[-1]) if L_norm[-1] > 0 else 0
798
\end{pycode}
799

800
\section{Network Motifs and Structural Patterns}
801

802
\begin{pycode}
803
# Analyze network motifs
804
# Count common subgraph patterns
805

806
def count_triangles(adj):
807
    n = adj.shape[0]
808
    count = 0
809
    for i in range(n):
810
        for j in range(i+1, n):
811
            for k in range(j+1, n):
812
                if adj[i,j] == 1 and adj[j,k] == 1 and adj[i,k] == 1:
813
                    count += 1
814
    return count
815

816
def count_stars(adj, k=3):
817
    """Count k-stars (one hub connected to k nodes)."""
818
    n = adj.shape[0]
819
    degrees = np.sum(adj, axis=1).astype(int)
820
    from math import comb
821
    count = sum(comb(d, k) for d in degrees if d >= k)
822
    return count
823

824
def count_paths(adj, length=2):
825
    """Count paths of given length."""
826
    if length == 2:
827
        return count_stars(adj, 2)  # 2-star = path of length 2
828
    else:
829
        return 0
830

831
# Analyze original ER network
832
tri_count = count_triangles(adj)
833
star3_count = count_stars(adj, 3)
834
star2_count = count_stars(adj, 2)
835

836
# Assortativity (degree correlation)
837
def compute_assortativity(adj):
838
    n = adj.shape[0]
839
    degrees = np.sum(adj, axis=1)
840
    edges = []
841
    for i in range(n):
842
        for j in range(i+1, n):
843
            if adj[i, j] == 1:
844
                edges.append((degrees[i], degrees[j]))
845

846
    if len(edges) == 0:
847
        return 0
848

849
    edges = np.array(edges)
850
    x, y = edges[:, 0], edges[:, 1]
851

852
    # Pearson correlation
853
    r = np.corrcoef(np.concatenate([x, y]), np.concatenate([y, x]))[0, 1]
854
    return r
855

856
assortativity = compute_assortativity(adj)
857
assortativity_ba = compute_assortativity(adj_ba)
858

859
# Visualization
860
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
861

862
# Motif counts
863
motifs = ['Triangles', '3-Stars', '2-Paths']
864
counts = [tri_count, star3_count, star2_count]
865
colors = ['coral', 'steelblue', 'seagreen']
866
axes[0, 0].bar(motifs, counts, color=colors, alpha=0.7, edgecolor='black')
867
axes[0, 0].set_ylabel('Count')
868
axes[0, 0].set_title('Network Motif Counts')
869
axes[0, 0].grid(True, alpha=0.3)
870

871
# Degree-degree correlation plot (assortativity visualization)
872
edges = []
873
for i in range(n):
874
    for j in range(i+1, n):
875
        if adj[i, j] == 1:
876
            edges.append((degrees[i], degrees[j]))
877

878
if edges:
879
    edges = np.array(edges)
880
    axes[0, 1].scatter(edges[:, 0], edges[:, 1], alpha=0.3, s=50)
881
    axes[0, 1].set_xlabel('Degree of node $i$')
882
    axes[0, 1].set_ylabel('Degree of node $j$')
883
    axes[0, 1].set_title(f'Degree Correlation (assortativity r={assortativity:.3f})')
884
    axes[0, 1].grid(True, alpha=0.3)
885

886
# Rich-club coefficient
887
def rich_club_coefficient(adj):
888
    degrees = np.sum(adj, axis=1).astype(int)
889
    unique_degrees = np.unique(degrees)
890
    phi = {}
891

892
    for k in unique_degrees:
893
        # Nodes with degree > k
894
        rich_nodes = np.where(degrees > k)[0]
895
        n_rich = len(rich_nodes)
896
        if n_rich < 2:
897
            continue
898

899
        # Edges among rich nodes
900
        edges_rich = 0
901
        for i in range(len(rich_nodes)):
902
            for j in range(i+1, len(rich_nodes)):
903
                if adj[rich_nodes[i], rich_nodes[j]] == 1:
904
                    edges_rich += 1
905

906
        max_edges = n_rich * (n_rich - 1) / 2
907
        if max_edges > 0:
908
            phi[k] = edges_rich / max_edges
909

910
    return phi
911

912
phi = rich_club_coefficient(adj)
913
k_vals = sorted(phi.keys())
914
phi_vals = [phi[k] for k in k_vals]
915

916
axes[1, 0].plot(k_vals, phi_vals, 'o-', markersize=6)
917
axes[1, 0].set_xlabel('Degree threshold $k$')
918
axes[1, 0].set_ylabel('Rich-club coefficient $\\phi(k)$')
919
axes[1, 0].set_title('Rich-Club Organization')
920
axes[1, 0].grid(True, alpha=0.3)
921

922
# Compare ER and BA assortativity
923
network_types = ['Erdos-Renyi', 'Barabasi-Albert']
924
assort_values = [assortativity, assortativity_ba]
925
colors = ['steelblue', 'coral']
926
axes[1, 1].bar(network_types, assort_values, color=colors, alpha=0.7, edgecolor='black')
927
axes[1, 1].axhline(y=0, color='black', linestyle='--', linewidth=1)
928
axes[1, 1].set_ylabel('Assortativity Coefficient')
929
axes[1, 1].set_title('Network Assortativity Comparison')
930
axes[1, 1].grid(True, alpha=0.3)
931

932
plt.tight_layout()
933
save_plot('network_motifs.pdf', 'Network motifs, assortativity, and rich-club organization')
934
\end{pycode}
935

936
\section{Results Summary}
937

938
\begin{pycode}
939
# Create comprehensive results table
940
print(r'\begin{table}[H]')
941
print(r'\centering')
942
print(r'\caption{Network Analysis Results}')
943
print(r'\begin{tabular}{lcc}')
944
print(r'\toprule')
945
print(r'Metric & ER Network & BA Network \\')
946
print(r'\midrule')
947
print(f'Nodes & {n} & {n_compare} \\\\')
948
print(f'Edges & {n_edges} & {n_edges_ba} \\\\')
949
print(f'Average Degree & {avg_degree:.2f} & {avg_degree_ba:.2f} \\\\')
950
print(f'Average Clustering & {avg_clustering:.4f} & {avg_clust_ba:.4f} \\\\')
951
print(f'Characteristic Path Length & {char_path_length:.2f} & -- \\\\')
952
print(f'Global Clustering & {global_clustering:.4f} & -- \\\\')
953
print(f'Assortativity & {assortativity:.3f} & {assortativity_ba:.3f} \\\\')
954
print(r'\bottomrule')
955
print(r'\end{tabular}')
956
print(r'\end{table}')
957

958
# Centrality summary table
959
print(r'\begin{table}[H]')
960
print(r'\centering')
961
print(r'\caption{Top Nodes by Centrality Measures}')
962
print(r'\begin{tabular}{lccc}')
963
print(r'\toprule')
964
print(r'Rank & Degree & Betweenness & Eigenvector \\')
965
print(r'\midrule')
966
for i in range(5):
967
    print(f'{i+1} & Node {top_degree[i]} & Node {top_between[i]} & Node {top_eigen[i]} \\\\')
968
print(r'\bottomrule')
969
print(r'\end{tabular}')
970
print(r'\end{table}')
971

972
# Community detection results
973
print(r'\begin{table}[H]')
974
print(r'\centering')
975
print(r'\caption{Community Detection Results}')
976
print(r'\begin{tabular}{lcc}')
977
print(r'\toprule')
978
print(r'Method & Communities & Modularity \\')
979
print(r'\midrule')
980
print(f'Label Propagation & {n_communities_lp} & {mod_lp:.4f} \\\\')
981
print(f'Spectral Clustering & {k_communities} & {mod_spec:.4f} \\\\')
982
print(r'\bottomrule')
983
print(r'\end{tabular}')
984
print(r'\end{table}')
985

986
# Motif counts
987
print(r'\begin{table}[H]')
988
print(r'\centering')
989
print(r'\caption{Network Motif Counts}')
990
print(r'\begin{tabular}{lc}')
991
print(r'\toprule')
992
print(r'Motif Type & Count \\')
993
print(r'\midrule')
994
print(f'Triangles & {tri_count} \\\\')
995
print(f'3-Stars & {star3_count} \\\\')
996
print(f'2-Paths & {star2_count} \\\\')
997
print(r'\bottomrule')
998
print(r'\end{tabular}')
999
print(r'\end{table}')
1000
\end{pycode}
1001

1002
\section{Statistical Summary}
1003

1004
Network analysis results:
1005
\begin{itemize}
1006
    \item Nodes: \py{f"{n}"}, Edges: \py{f"{n_edges}"}
1007
    \item Average degree: \py{f"{avg_degree:.2f}"}
1008
    \item Average clustering: \py{f"{avg_clustering:.4f}"}
1009
    \item Global clustering: \py{f"{global_clustering:.4f}"}
1010
    \item Characteristic path length: \py{f"{char_path_length:.2f}"}
1011
    \item Network density: \py{f"{density:.4f}"}
1012
    \item Number of triangles: \py{f"{triangles}"}
1013
    \item Assortativity coefficient: \py{f"{assortativity:.3f}"}
1014
    \item Communities detected (LP): \py{f"{n_communities_lp}"}
1015
    \item Modularity (LP): \py{f"{mod_lp:.4f}"}
1016
    \item Modularity (Spectral): \py{f"{mod_spec:.4f}"}
1017
\end{itemize}
1018

1019
\section{Conclusion}
1020

1021
Network metrics reveal structural properties of complex systems. This analysis demonstrated:
1022

1023
\begin{enumerate}
1024
    \item \textbf{Centrality measures}: Degree, betweenness, closeness, and eigenvector centrality capture different aspects of node importance. High correlation between measures indicates consistent network structure.
1025

1026
    \item \textbf{Community detection}: Label propagation and spectral clustering identify community structure with modularity quantifying partition quality.
1027

1028
    \item \textbf{Random graph models}: Erdos-Renyi produces homogeneous degree distributions, while Barabasi-Albert generates scale-free networks with power-law degree distributions through preferential attachment.
1029

1030
    \item \textbf{Small-world networks}: Watts-Strogatz model shows that small rewiring probability creates networks with high clustering and short path lengths, characteristic of real-world networks.
1031

1032
    \item \textbf{Network motifs}: Triangle counting and rich-club analysis reveal hierarchical organization and hub structure.
1033
\end{enumerate}
1034

1035
These tools apply to social networks, biological systems (protein interactions, neural networks), infrastructure (power grids, transportation), and information networks (WWW, citations). Understanding network topology enables prediction of system behavior, identification of critical nodes, and optimization of network performance.
1036

1037
\end{document}
1038

1039
Product

Resources

Company