Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Ok-landscape
GitHub Repository: Ok-landscape/computational-pipeline
Path: blob/main/latex-templates/templates/other/network_analysis.tex
51 views
unlisted
1
\documentclass[a4paper, 11pt]{article}
2
\usepackage[utf8]{inputenc}
3
\usepackage[T1]{fontenc}
4
\usepackage{amsmath, amssymb}
5
\usepackage{graphicx}
6
\usepackage{siunitx}
7
\usepackage{booktabs}
8
\usepackage{multirow}
9
\usepackage{float}
10
\usepackage[makestderr]{pythontex}
11
12
\title{Network Science: Graph Metrics and Community Detection}
13
\author{Computational Science Templates}
14
\date{\today}
15
16
\begin{document}
17
\maketitle
18
19
\begin{abstract}
20
Network science provides tools for analyzing complex systems through graph theory. This document demonstrates computational methods for calculating centrality measures, detecting community structure, analyzing random graph models, and characterizing small-world and scale-free properties. Using PythonTeX, we implement algorithms for betweenness centrality, modularity optimization, Erdos-Renyi and Barabasi-Albert models, and compute characteristic path lengths and clustering coefficients.
21
\end{abstract}
22
23
\section{Introduction}
24
Network analysis characterizes complex systems through graph metrics. Networks appear throughout science: social networks, biological networks (protein interactions, neural connections), infrastructure networks (power grids, transportation), and information networks (World Wide Web, citation graphs). This analysis computes centrality measures, identifies community structure, and compares random network models.
25
26
\section{Mathematical Framework}
27
28
\subsection{Centrality Measures}
29
Degree centrality measures local connectivity:
30
\begin{equation}
31
C_D(v) = \frac{\deg(v)}{n-1}
32
\end{equation}
33
34
Betweenness centrality quantifies importance in shortest paths:
35
\begin{equation}
36
C_B(v) = \sum_{s \neq v \neq t} \frac{\sigma_{st}(v)}{\sigma_{st}}
37
\end{equation}
38
where $\sigma_{st}$ is the number of shortest paths from $s$ to $t$, and $\sigma_{st}(v)$ passes through $v$.
39
40
Closeness centrality measures average distance to all nodes:
41
\begin{equation}
42
C_C(v) = \frac{n-1}{\sum_{u \neq v} d(v, u)}
43
\end{equation}
44
45
Eigenvector centrality assigns importance based on neighbor importance:
46
\begin{equation}
47
x_v = \frac{1}{\lambda} \sum_{u \in N(v)} x_u
48
\end{equation}
49
50
\subsection{Clustering and Transitivity}
51
Local clustering coefficient:
52
\begin{equation}
53
C_i = \frac{2|e_{jk} : v_j, v_k \in N_i, e_{jk} \in E|}{k_i(k_i-1)}
54
\end{equation}
55
56
Global clustering coefficient (transitivity):
57
\begin{equation}
58
C = \frac{3 \times \text{number of triangles}}{\text{number of connected triples}}
59
\end{equation}
60
61
\subsection{Modularity}
62
Modularity measures community quality:
63
\begin{equation}
64
Q = \frac{1}{2m} \sum_{ij} \left[ A_{ij} - \frac{k_i k_j}{2m} \right] \delta(c_i, c_j)
65
\end{equation}
66
where $A_{ij}$ is the adjacency matrix, $k_i$ is degree, and $\delta(c_i, c_j) = 1$ if nodes $i$ and $j$ are in the same community.
67
68
\section{Environment Setup}
69
70
\begin{pycode}
71
import numpy as np
72
import matplotlib.pyplot as plt
73
from scipy import sparse
74
from collections import deque
75
import warnings
76
warnings.filterwarnings('ignore')
77
78
plt.rc('text', usetex=True)
79
plt.rc('font', family='serif')
80
81
np.random.seed(42)
82
83
def save_plot(filename, caption=""):
84
plt.savefig(filename, bbox_inches='tight', dpi=150)
85
print(r'\begin{figure}[htbp]')
86
print(r'\centering')
87
print(r'\includegraphics[width=0.95\textwidth]{' + filename + '}')
88
if caption:
89
print(r'\caption{' + caption + '}')
90
print(r'\end{figure}')
91
plt.close()
92
\end{pycode}
93
94
\section{Network Generation and Basic Metrics}
95
96
\begin{pycode}
97
# Generate random network (Erdos-Renyi model)
98
n = 50
99
p = 0.1
100
adj = np.zeros((n, n))
101
for i in range(n):
102
for j in range(i+1, n):
103
if np.random.rand() < p:
104
adj[i, j] = adj[j, i] = 1
105
106
# Compute degrees
107
degrees = np.sum(adj, axis=1).astype(int)
108
109
# Degree centrality
110
degree_centrality = degrees / (n - 1)
111
112
# Compute shortest paths using BFS
113
def bfs_shortest_paths(adj, source):
114
n = adj.shape[0]
115
dist = np.full(n, np.inf)
116
dist[source] = 0
117
queue = deque([source])
118
while queue:
119
u = queue.popleft()
120
for v in range(n):
121
if adj[u, v] == 1 and dist[v] == np.inf:
122
dist[v] = dist[u] + 1
123
queue.append(v)
124
return dist
125
126
# Compute all-pairs shortest paths
127
all_distances = np.zeros((n, n))
128
for i in range(n):
129
all_distances[i] = bfs_shortest_paths(adj, i)
130
131
# Closeness centrality (only for reachable nodes)
132
closeness_centrality = np.zeros(n)
133
for i in range(n):
134
reachable = all_distances[i] < np.inf
135
n_reachable = np.sum(reachable) - 1
136
if n_reachable > 0:
137
closeness_centrality[i] = n_reachable / np.sum(all_distances[i][reachable])
138
139
# Local clustering coefficient
140
clustering = np.zeros(n)
141
for i in range(n):
142
neighbors = np.where(adj[i] == 1)[0]
143
k = len(neighbors)
144
if k >= 2:
145
edges = 0
146
for j in range(len(neighbors)):
147
for l in range(j+1, len(neighbors)):
148
if adj[neighbors[j], neighbors[l]] == 1:
149
edges += 1
150
clustering[i] = 2 * edges / (k * (k - 1))
151
152
# Network statistics
153
n_edges = int(np.sum(adj) / 2)
154
avg_degree = np.mean(degrees)
155
avg_clustering = np.mean(clustering)
156
density = 2 * n_edges / (n * (n - 1))
157
158
# Characteristic path length (average shortest path)
159
finite_distances = all_distances[all_distances < np.inf]
160
finite_distances = finite_distances[finite_distances > 0]
161
char_path_length = np.mean(finite_distances) if len(finite_distances) > 0 else np.inf
162
163
# Count triangles
164
triangles = 0
165
for i in range(n):
166
neighbors = np.where(adj[i] == 1)[0]
167
for j in range(len(neighbors)):
168
for k in range(j+1, len(neighbors)):
169
if adj[neighbors[j], neighbors[k]] == 1:
170
triangles += 1
171
triangles = triangles // 3 # Each triangle counted 3 times
172
173
# Global clustering coefficient
174
connected_triples = sum(k*(k-1)//2 for k in degrees)
175
global_clustering = 3 * triangles / connected_triples if connected_triples > 0 else 0
176
\end{pycode}
177
178
\section{Betweenness Centrality}
179
180
\begin{pycode}
181
# Compute betweenness centrality using Brandes algorithm
182
def compute_betweenness(adj):
183
n = adj.shape[0]
184
betweenness = np.zeros(n)
185
186
for s in range(n):
187
# Single-source shortest paths
188
S = [] # Stack of vertices in order of non-increasing distance
189
P = [[] for _ in range(n)] # Predecessors
190
sigma = np.zeros(n) # Number of shortest paths
191
sigma[s] = 1
192
d = np.full(n, -1) # Distance from source
193
d[s] = 0
194
195
Q = deque([s])
196
while Q:
197
v = Q.popleft()
198
S.append(v)
199
for w in range(n):
200
if adj[v, w] == 1:
201
if d[w] < 0:
202
Q.append(w)
203
d[w] = d[v] + 1
204
if d[w] == d[v] + 1:
205
sigma[w] += sigma[v]
206
P[w].append(v)
207
208
# Accumulation
209
delta = np.zeros(n)
210
while S:
211
w = S.pop()
212
for v in P[w]:
213
delta[v] += (sigma[v] / sigma[w]) * (1 + delta[w])
214
if w != s:
215
betweenness[w] += delta[w]
216
217
# Normalize
218
betweenness = betweenness / ((n-1) * (n-2))
219
return betweenness
220
221
betweenness_centrality = compute_betweenness(adj)
222
223
# Visualization
224
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
225
226
# Network visualization with degree centrality
227
theta = np.linspace(0, 2*np.pi, n, endpoint=False)
228
pos = np.column_stack([np.cos(theta), np.sin(theta)])
229
230
for i in range(n):
231
for j in range(i+1, n):
232
if adj[i, j] == 1:
233
axes[0, 0].plot([pos[i, 0], pos[j, 0]], [pos[i, 1], pos[j, 1]],
234
'gray', linewidth=0.5, alpha=0.4)
235
236
sizes = 50 + 400 * degree_centrality
237
sc = axes[0, 0].scatter(pos[:, 0], pos[:, 1], s=sizes, c=degree_centrality,
238
cmap='viridis', edgecolors='black', linewidth=0.5)
239
plt.colorbar(sc, ax=axes[0, 0], label='Degree Centrality')
240
axes[0, 0].set_title('Network (node size = degree)')
241
axes[0, 0].set_aspect('equal')
242
axes[0, 0].axis('off')
243
244
# Degree distribution
245
unique_degrees, degree_counts = np.unique(degrees, return_counts=True)
246
axes[0, 1].bar(unique_degrees, degree_counts, alpha=0.7, edgecolor='black', color='steelblue')
247
axes[0, 1].set_xlabel('Degree $k$')
248
axes[0, 1].set_ylabel('Frequency')
249
axes[0, 1].set_title('Degree Distribution')
250
axes[0, 1].grid(True, alpha=0.3)
251
252
# Expected Poisson distribution for ER graph
253
k_range = np.arange(0, max(degrees)+5)
254
from scipy.stats import poisson
255
expected_freq = n * poisson.pmf(k_range, avg_degree)
256
axes[0, 1].plot(k_range, expected_freq, 'r--', linewidth=2, label='Poisson')
257
axes[0, 1].legend()
258
259
# Centrality comparison
260
axes[1, 0].scatter(degree_centrality, betweenness_centrality, alpha=0.6)
261
axes[1, 0].set_xlabel('Degree Centrality')
262
axes[1, 0].set_ylabel('Betweenness Centrality')
263
axes[1, 0].set_title('Degree vs Betweenness Centrality')
264
axes[1, 0].grid(True, alpha=0.3)
265
266
# Correlation coefficient
267
corr = np.corrcoef(degree_centrality, betweenness_centrality)[0, 1]
268
axes[1, 0].text(0.05, 0.95, f'$r = {corr:.3f}$', transform=axes[1, 0].transAxes,
269
fontsize=10, verticalalignment='top')
270
271
# Clustering vs degree
272
axes[1, 1].scatter(degrees, clustering, alpha=0.6, color='green')
273
axes[1, 1].set_xlabel('Degree $k$')
274
axes[1, 1].set_ylabel('Local Clustering $C_i$')
275
axes[1, 1].set_title('Degree vs Clustering Coefficient')
276
axes[1, 1].grid(True, alpha=0.3)
277
278
# Expected clustering for ER graph
279
axes[1, 1].axhline(y=p, color='r', linestyle='--', label=f'Expected (p={p})')
280
axes[1, 1].legend()
281
282
plt.tight_layout()
283
save_plot('network_basic_metrics.pdf', 'Basic network metrics for Erdos-Renyi random graph')
284
\end{pycode}
285
286
\section{Eigenvector Centrality and PageRank}
287
288
\begin{pycode}
289
# Eigenvector centrality using power iteration
290
def eigenvector_centrality(adj, max_iter=100, tol=1e-6):
291
n = adj.shape[0]
292
x = np.ones(n) / np.sqrt(n)
293
294
for _ in range(max_iter):
295
x_new = adj @ x
296
norm = np.linalg.norm(x_new)
297
if norm > 0:
298
x_new = x_new / norm
299
if np.linalg.norm(x_new - x) < tol:
300
break
301
x = x_new
302
303
return x_new
304
305
# PageRank (random walk with damping)
306
def pagerank(adj, damping=0.85, max_iter=100, tol=1e-6):
307
n = adj.shape[0]
308
out_degree = np.sum(adj, axis=1)
309
310
# Create transition matrix
311
M = np.zeros((n, n))
312
for i in range(n):
313
if out_degree[i] > 0:
314
M[:, i] = adj[:, i] / out_degree[i]
315
else:
316
M[:, i] = 1.0 / n # Dangling nodes
317
318
# Power iteration
319
pr = np.ones(n) / n
320
for _ in range(max_iter):
321
pr_new = damping * M @ pr + (1 - damping) / n
322
if np.linalg.norm(pr_new - pr) < tol:
323
break
324
pr = pr_new
325
326
return pr_new
327
328
eig_centrality = eigenvector_centrality(adj)
329
pr = pagerank(adj)
330
331
# Katz centrality
332
def katz_centrality(adj, alpha=0.1, beta=1.0):
333
n = adj.shape[0]
334
I = np.eye(n)
335
ones = np.ones(n)
336
try:
337
centrality = np.linalg.solve(I - alpha * adj.T, beta * ones)
338
except:
339
centrality = np.zeros(n)
340
return centrality / np.max(centrality) if np.max(centrality) > 0 else centrality
341
342
katz_cent = katz_centrality(adj, alpha=0.05)
343
344
# Visualization
345
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
346
347
# Compare centrality measures
348
centrality_measures = {
349
'Degree': degree_centrality,
350
'Betweenness': betweenness_centrality,
351
'Closeness': closeness_centrality,
352
'Eigenvector': eig_centrality
353
}
354
355
# Heatmap of centrality correlations
356
names = list(centrality_measures.keys())
357
measures = np.array([centrality_measures[name] for name in names])
358
corr_matrix = np.corrcoef(measures)
359
360
im = axes[0, 0].imshow(corr_matrix, cmap='RdYlBu_r', vmin=-1, vmax=1)
361
axes[0, 0].set_xticks(range(len(names)))
362
axes[0, 0].set_yticks(range(len(names)))
363
axes[0, 0].set_xticklabels(names, rotation=45)
364
axes[0, 0].set_yticklabels(names)
365
for i in range(len(names)):
366
for j in range(len(names)):
367
axes[0, 0].text(j, i, f'{corr_matrix[i, j]:.2f}', ha='center', va='center')
368
plt.colorbar(im, ax=axes[0, 0], label='Correlation')
369
axes[0, 0].set_title('Centrality Measure Correlations')
370
371
# Network with eigenvector centrality
372
for i in range(n):
373
for j in range(i+1, n):
374
if adj[i, j] == 1:
375
axes[0, 1].plot([pos[i, 0], pos[j, 0]], [pos[i, 1], pos[j, 1]],
376
'gray', linewidth=0.5, alpha=0.4)
377
378
sizes = 50 + 400 * (eig_centrality / max(eig_centrality))
379
sc = axes[0, 1].scatter(pos[:, 0], pos[:, 1], s=sizes, c=eig_centrality,
380
cmap='plasma', edgecolors='black', linewidth=0.5)
381
plt.colorbar(sc, ax=axes[0, 1], label='Eigenvector Centrality')
382
axes[0, 1].set_title('Network (size = eigenvector centrality)')
383
axes[0, 1].set_aspect('equal')
384
axes[0, 1].axis('off')
385
386
# PageRank distribution
387
sorted_pr = np.sort(pr)[::-1]
388
axes[1, 0].bar(range(n), sorted_pr, alpha=0.7, color='coral')
389
axes[1, 0].set_xlabel('Node Rank')
390
axes[1, 0].set_ylabel('PageRank')
391
axes[1, 0].set_title('PageRank Distribution (sorted)')
392
axes[1, 0].grid(True, alpha=0.3)
393
394
# Eigenvector vs PageRank
395
axes[1, 1].scatter(eig_centrality, pr, alpha=0.6, color='purple')
396
axes[1, 1].set_xlabel('Eigenvector Centrality')
397
axes[1, 1].set_ylabel('PageRank')
398
axes[1, 1].set_title('Eigenvector Centrality vs PageRank')
399
axes[1, 1].grid(True, alpha=0.3)
400
corr_pr = np.corrcoef(eig_centrality, pr)[0, 1]
401
axes[1, 1].text(0.05, 0.95, f'$r = {corr_pr:.3f}$', transform=axes[1, 1].transAxes,
402
fontsize=10, verticalalignment='top')
403
404
plt.tight_layout()
405
save_plot('network_centrality_advanced.pdf', 'Advanced centrality measures')
406
407
# Top nodes by centrality
408
top_k = 5
409
top_degree = np.argsort(degree_centrality)[-top_k:][::-1]
410
top_between = np.argsort(betweenness_centrality)[-top_k:][::-1]
411
top_eigen = np.argsort(eig_centrality)[-top_k:][::-1]
412
\end{pycode}
413
414
\section{Community Detection}
415
416
\begin{pycode}
417
# Simple community detection using label propagation
418
def label_propagation(adj, max_iter=100):
419
n = adj.shape[0]
420
labels = np.arange(n)
421
422
for _ in range(max_iter):
423
order = np.random.permutation(n)
424
changed = False
425
for node in order:
426
neighbors = np.where(adj[node] == 1)[0]
427
if len(neighbors) > 0:
428
neighbor_labels = labels[neighbors]
429
unique, counts = np.unique(neighbor_labels, return_counts=True)
430
max_count = max(counts)
431
candidates = unique[counts == max_count]
432
new_label = np.random.choice(candidates)
433
if new_label != labels[node]:
434
labels[node] = new_label
435
changed = True
436
if not changed:
437
break
438
439
# Relabel communities to 0, 1, 2, ...
440
unique_labels = np.unique(labels)
441
label_map = {old: new for new, old in enumerate(unique_labels)}
442
return np.array([label_map[l] for l in labels])
443
444
# Compute modularity
445
def compute_modularity(adj, communities):
446
n = adj.shape[0]
447
m = np.sum(adj) / 2
448
degrees = np.sum(adj, axis=1)
449
450
Q = 0
451
for i in range(n):
452
for j in range(n):
453
if communities[i] == communities[j]:
454
Q += adj[i, j] - degrees[i] * degrees[j] / (2 * m)
455
456
return Q / (2 * m)
457
458
# Spectral clustering for community detection
459
def spectral_clustering(adj, k=3):
460
n = adj.shape[0]
461
degrees = np.sum(adj, axis=1)
462
D = np.diag(degrees)
463
L = D - adj # Laplacian
464
465
# Normalized Laplacian
466
D_inv_sqrt = np.diag(1.0 / np.sqrt(degrees + 1e-10))
467
L_norm = D_inv_sqrt @ L @ D_inv_sqrt
468
469
# Eigendecomposition
470
eigenvalues, eigenvectors = np.linalg.eigh(L_norm)
471
472
# Use first k eigenvectors (after 0)
473
features = eigenvectors[:, 1:k+1]
474
475
# K-means clustering on eigenvector features
476
from scipy.cluster.vq import kmeans2
477
_, labels = kmeans2(features, k, minit='++')
478
479
return labels, eigenvalues
480
481
# Detect communities
482
communities_lp = label_propagation(adj)
483
n_communities_lp = len(np.unique(communities_lp))
484
485
# Spectral clustering
486
k_communities = 4
487
communities_spec, laplacian_eigenvalues = spectral_clustering(adj, k=k_communities)
488
489
# Compute modularities
490
mod_lp = compute_modularity(adj, communities_lp)
491
mod_spec = compute_modularity(adj, communities_spec)
492
493
# Visualization
494
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
495
496
# Label propagation communities
497
colors_lp = plt.cm.Set1(communities_lp / (n_communities_lp - 1 + 0.01))
498
for i in range(n):
499
for j in range(i+1, n):
500
if adj[i, j] == 1:
501
axes[0, 0].plot([pos[i, 0], pos[j, 0]], [pos[i, 1], pos[j, 1]],
502
'gray', linewidth=0.5, alpha=0.3)
503
504
axes[0, 0].scatter(pos[:, 0], pos[:, 1], s=100, c=communities_lp,
505
cmap='Set1', edgecolors='black', linewidth=0.5)
506
axes[0, 0].set_title(f'Label Propagation ({n_communities_lp} communities, Q={mod_lp:.3f})')
507
axes[0, 0].set_aspect('equal')
508
axes[0, 0].axis('off')
509
510
# Spectral clustering communities
511
for i in range(n):
512
for j in range(i+1, n):
513
if adj[i, j] == 1:
514
axes[0, 1].plot([pos[i, 0], pos[j, 0]], [pos[i, 1], pos[j, 1]],
515
'gray', linewidth=0.5, alpha=0.3)
516
517
axes[0, 1].scatter(pos[:, 0], pos[:, 1], s=100, c=communities_spec,
518
cmap='Set2', edgecolors='black', linewidth=0.5)
519
axes[0, 1].set_title(f'Spectral Clustering ({k_communities} communities, Q={mod_spec:.3f})')
520
axes[0, 1].set_aspect('equal')
521
axes[0, 1].axis('off')
522
523
# Laplacian eigenvalues (spectral gap)
524
axes[1, 0].plot(laplacian_eigenvalues[:15], 'o-', markersize=8)
525
axes[1, 0].set_xlabel('Index')
526
axes[1, 0].set_ylabel('Eigenvalue')
527
axes[1, 0].set_title('Laplacian Eigenvalues (spectral gap)')
528
axes[1, 0].grid(True, alpha=0.3)
529
530
# Community size distribution
531
for i, (comm, name) in enumerate([(communities_lp, 'Label Prop'),
532
(communities_spec, 'Spectral')]):
533
unique, counts = np.unique(comm, return_counts=True)
534
x_pos = np.arange(len(counts)) + i*0.4
535
axes[1, 1].bar(x_pos, counts, width=0.35, alpha=0.7, label=name)
536
537
axes[1, 1].set_xlabel('Community')
538
axes[1, 1].set_ylabel('Size')
539
axes[1, 1].set_title('Community Size Distribution')
540
axes[1, 1].legend()
541
axes[1, 1].grid(True, alpha=0.3)
542
543
plt.tight_layout()
544
save_plot('network_communities.pdf', 'Community detection using label propagation and spectral clustering')
545
\end{pycode}
546
547
\section{Random Graph Models}
548
549
\begin{pycode}
550
# Compare Erdos-Renyi and Barabasi-Albert models
551
n_compare = 100
552
553
# Erdos-Renyi G(n, p)
554
p_er = 0.05
555
adj_er = np.zeros((n_compare, n_compare))
556
for i in range(n_compare):
557
for j in range(i+1, n_compare):
558
if np.random.rand() < p_er:
559
adj_er[i, j] = adj_er[j, i] = 1
560
561
# Barabasi-Albert preferential attachment
562
def barabasi_albert(n, m):
563
adj = np.zeros((n, n))
564
# Start with m+1 fully connected nodes
565
for i in range(m+1):
566
for j in range(i+1, m+1):
567
adj[i, j] = adj[j, i] = 1
568
569
degrees = np.sum(adj, axis=1)
570
571
# Add remaining nodes
572
for new_node in range(m+1, n):
573
# Preferential attachment
574
probs = degrees[:new_node] / np.sum(degrees[:new_node])
575
targets = np.random.choice(new_node, size=m, replace=False, p=probs)
576
for target in targets:
577
adj[new_node, target] = adj[target, new_node] = 1
578
degrees = np.sum(adj, axis=1)
579
580
return adj
581
582
m_ba = 2
583
adj_ba = barabasi_albert(n_compare, m_ba)
584
585
# Compute metrics for both
586
degrees_er = np.sum(adj_er, axis=1).astype(int)
587
degrees_ba = np.sum(adj_ba, axis=1).astype(int)
588
589
avg_degree_er = np.mean(degrees_er)
590
avg_degree_ba = np.mean(degrees_ba)
591
592
# Clustering coefficients
593
def compute_clustering(adj):
594
n = adj.shape[0]
595
degrees = np.sum(adj, axis=1).astype(int)
596
clustering = np.zeros(n)
597
for i in range(n):
598
neighbors = np.where(adj[i] == 1)[0]
599
k = len(neighbors)
600
if k >= 2:
601
edges = 0
602
for j in range(len(neighbors)):
603
for l in range(j+1, len(neighbors)):
604
if adj[neighbors[j], neighbors[l]] == 1:
605
edges += 1
606
clustering[i] = 2 * edges / (k * (k - 1))
607
return clustering
608
609
clustering_er = compute_clustering(adj_er)
610
clustering_ba = compute_clustering(adj_ba)
611
612
avg_clust_er = np.mean(clustering_er)
613
avg_clust_ba = np.mean(clustering_ba)
614
615
# Visualization
616
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
617
618
# Degree distributions comparison
619
max_degree = max(max(degrees_er), max(degrees_ba))
620
bins = np.arange(0, max_degree + 2) - 0.5
621
622
axes[0, 0].hist(degrees_er, bins=bins, alpha=0.5, label=f'ER (avg={avg_degree_er:.1f})',
623
edgecolor='blue', color='blue')
624
axes[0, 0].hist(degrees_ba, bins=bins, alpha=0.5, label=f'BA (avg={avg_degree_ba:.1f})',
625
edgecolor='red', color='red')
626
axes[0, 0].set_xlabel('Degree $k$')
627
axes[0, 0].set_ylabel('Frequency')
628
axes[0, 0].set_title('Degree Distribution Comparison')
629
axes[0, 0].legend()
630
axes[0, 0].grid(True, alpha=0.3)
631
632
# Log-log degree distribution for BA (scale-free test)
633
unique_ba, counts_ba = np.unique(degrees_ba, return_counts=True)
634
axes[0, 1].loglog(unique_ba[unique_ba > 0], counts_ba[unique_ba > 0], 'ro', markersize=8, label='BA Data')
635
636
# Power law fit
637
log_k = np.log(unique_ba[unique_ba > 1])
638
log_p = np.log(counts_ba[unique_ba > 1] / n_compare)
639
if len(log_k) > 2:
640
coeffs = np.polyfit(log_k, log_p, 1)
641
gamma = -coeffs[0]
642
k_fit = np.logspace(np.log10(2), np.log10(max(degrees_ba)), 50)
643
p_fit = np.exp(coeffs[1]) * n_compare * k_fit**(-gamma)
644
axes[0, 1].loglog(k_fit, p_fit, 'b--', linewidth=2, label=f'Power law $\\gamma={gamma:.2f}$')
645
646
axes[0, 1].set_xlabel('Degree $k$')
647
axes[0, 1].set_ylabel('Frequency')
648
axes[0, 1].set_title('BA Degree Distribution (log-log)')
649
axes[0, 1].legend()
650
axes[0, 1].grid(True, alpha=0.3)
651
652
# Clustering coefficient vs degree
653
axes[1, 0].scatter(degrees_er, clustering_er, alpha=0.4, label='Erdos-Renyi', s=30)
654
axes[1, 0].scatter(degrees_ba, clustering_ba, alpha=0.4, label='Barabasi-Albert', s=30)
655
axes[1, 0].set_xlabel('Degree $k$')
656
axes[1, 0].set_ylabel('Local Clustering $C_i$')
657
axes[1, 0].set_title('Clustering vs Degree')
658
axes[1, 0].legend()
659
axes[1, 0].grid(True, alpha=0.3)
660
661
# Network metrics comparison
662
metrics_names = ['Nodes', 'Edges', 'Avg Degree', 'Avg Clustering']
663
metrics_er = [n_compare, int(np.sum(adj_er)/2), avg_degree_er, avg_clust_er * 10]
664
metrics_ba = [n_compare, int(np.sum(adj_ba)/2), avg_degree_ba, avg_clust_ba * 10]
665
666
x = np.arange(len(metrics_names))
667
width = 0.35
668
axes[1, 1].bar(x - width/2, metrics_er, width, label='Erdos-Renyi', alpha=0.7)
669
axes[1, 1].bar(x + width/2, metrics_ba, width, label='Barabasi-Albert', alpha=0.7)
670
axes[1, 1].set_ylabel('Value (clustering x10)')
671
axes[1, 1].set_xticks(x)
672
axes[1, 1].set_xticklabels(metrics_names)
673
axes[1, 1].set_title('Network Metrics Comparison')
674
axes[1, 1].legend()
675
axes[1, 1].grid(True, alpha=0.3)
676
677
plt.tight_layout()
678
save_plot('network_random_models.pdf', 'Comparison of Erdos-Renyi and Barabasi-Albert random graph models')
679
680
# Store for summary
681
n_edges_er = int(np.sum(adj_er) / 2)
682
n_edges_ba = int(np.sum(adj_ba) / 2)
683
\end{pycode}
684
685
\section{Small-World Networks}
686
687
\begin{pycode}
688
# Watts-Strogatz small-world model
689
def watts_strogatz(n, k, p):
690
"""Generate Watts-Strogatz small-world network."""
691
adj = np.zeros((n, n))
692
693
# Create ring lattice
694
for i in range(n):
695
for j in range(1, k//2 + 1):
696
adj[i, (i+j) % n] = adj[(i+j) % n, i] = 1
697
698
# Rewire edges with probability p
699
for i in range(n):
700
for j in range(1, k//2 + 1):
701
if np.random.rand() < p:
702
neighbor = (i + j) % n
703
if adj[i, neighbor] == 1:
704
adj[i, neighbor] = adj[neighbor, i] = 0
705
# Choose new target
706
possible = [x for x in range(n) if x != i and adj[i, x] == 0]
707
if possible:
708
new_target = np.random.choice(possible)
709
adj[i, new_target] = adj[new_target, i] = 1
710
711
return adj
712
713
# Characteristic path length
714
def compute_path_length(adj):
715
n = adj.shape[0]
716
total_dist = 0
717
count = 0
718
for i in range(n):
719
dist = bfs_shortest_paths(adj, i)
720
for j in range(i+1, n):
721
if dist[j] < np.inf:
722
total_dist += dist[j]
723
count += 1
724
return total_dist / count if count > 0 else np.inf
725
726
# Generate networks for different rewiring probabilities
727
n_ws = 50
728
k_ws = 6
729
p_values = [0, 0.01, 0.05, 0.1, 0.2, 0.5, 1.0]
730
731
path_lengths = []
732
clusterings = []
733
734
for p_ws in p_values:
735
adj_ws = watts_strogatz(n_ws, k_ws, p_ws)
736
pl = compute_path_length(adj_ws)
737
clust = np.mean(compute_clustering(adj_ws))
738
path_lengths.append(pl)
739
clusterings.append(clust)
740
741
# Normalize by p=0 values
742
L0 = path_lengths[0]
743
C0 = clusterings[0]
744
L_norm = [L/L0 for L in path_lengths]
745
C_norm = [C/C0 for C in clusterings]
746
747
# Generate examples for visualization
748
adj_lattice = watts_strogatz(30, 4, 0)
749
adj_small_world = watts_strogatz(30, 4, 0.1)
750
adj_random = watts_strogatz(30, 4, 1.0)
751
752
# Visualization
753
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
754
755
# Small-world transition
756
axes[0, 0].semilogx(p_values[1:], L_norm[1:], 'o-', markersize=8, label='$L(p)/L(0)$')
757
axes[0, 0].semilogx(p_values[1:], C_norm[1:], 's-', markersize=8, label='$C(p)/C(0)$')
758
axes[0, 0].set_xlabel('Rewiring probability $p$')
759
axes[0, 0].set_ylabel('Normalized value')
760
axes[0, 0].set_title('Small-World Transition')
761
axes[0, 0].legend()
762
axes[0, 0].grid(True, alpha=0.3)
763
axes[0, 0].axvspan(0.01, 0.1, alpha=0.2, color='yellow', label='Small-world regime')
764
765
# Plot the three network types
766
for idx, (adj_plot, title) in enumerate([(adj_lattice, 'Ring Lattice (p=0)'),
767
(adj_small_world, 'Small-World (p=0.1)'),
768
(adj_random, 'Random (p=1)')]):
769
if idx == 0:
770
ax = axes[0, 1]
771
elif idx == 1:
772
ax = axes[1, 0]
773
else:
774
ax = axes[1, 1]
775
776
n_plot = adj_plot.shape[0]
777
theta = np.linspace(0, 2*np.pi, n_plot, endpoint=False)
778
pos_plot = np.column_stack([np.cos(theta), np.sin(theta)])
779
780
for i in range(n_plot):
781
for j in range(i+1, n_plot):
782
if adj_plot[i, j] == 1:
783
ax.plot([pos_plot[i, 0], pos_plot[j, 0]],
784
[pos_plot[i, 1], pos_plot[j, 1]],
785
'gray', linewidth=0.5, alpha=0.5)
786
787
ax.scatter(pos_plot[:, 0], pos_plot[:, 1], s=50, c='steelblue',
788
edgecolors='black', linewidth=0.5)
789
ax.set_title(title)
790
ax.set_aspect('equal')
791
ax.axis('off')
792
793
plt.tight_layout()
794
save_plot('network_small_world.pdf', 'Watts-Strogatz small-world network model')
795
796
# Small-world index
797
sw_index = (C_norm[3] / C_norm[-1]) / (L_norm[3] / L_norm[-1]) if L_norm[-1] > 0 else 0
798
\end{pycode}
799
800
\section{Network Motifs and Structural Patterns}
801
802
\begin{pycode}
803
# Analyze network motifs
804
# Count common subgraph patterns
805
806
def count_triangles(adj):
807
n = adj.shape[0]
808
count = 0
809
for i in range(n):
810
for j in range(i+1, n):
811
for k in range(j+1, n):
812
if adj[i,j] == 1 and adj[j,k] == 1 and adj[i,k] == 1:
813
count += 1
814
return count
815
816
def count_stars(adj, k=3):
817
"""Count k-stars (one hub connected to k nodes)."""
818
n = adj.shape[0]
819
degrees = np.sum(adj, axis=1).astype(int)
820
from math import comb
821
count = sum(comb(d, k) for d in degrees if d >= k)
822
return count
823
824
def count_paths(adj, length=2):
825
"""Count paths of given length."""
826
if length == 2:
827
return count_stars(adj, 2) # 2-star = path of length 2
828
else:
829
return 0
830
831
# Analyze original ER network
832
tri_count = count_triangles(adj)
833
star3_count = count_stars(adj, 3)
834
star2_count = count_stars(adj, 2)
835
836
# Assortativity (degree correlation)
837
def compute_assortativity(adj):
838
n = adj.shape[0]
839
degrees = np.sum(adj, axis=1)
840
edges = []
841
for i in range(n):
842
for j in range(i+1, n):
843
if adj[i, j] == 1:
844
edges.append((degrees[i], degrees[j]))
845
846
if len(edges) == 0:
847
return 0
848
849
edges = np.array(edges)
850
x, y = edges[:, 0], edges[:, 1]
851
852
# Pearson correlation
853
r = np.corrcoef(np.concatenate([x, y]), np.concatenate([y, x]))[0, 1]
854
return r
855
856
assortativity = compute_assortativity(adj)
857
assortativity_ba = compute_assortativity(adj_ba)
858
859
# Visualization
860
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
861
862
# Motif counts
863
motifs = ['Triangles', '3-Stars', '2-Paths']
864
counts = [tri_count, star3_count, star2_count]
865
colors = ['coral', 'steelblue', 'seagreen']
866
axes[0, 0].bar(motifs, counts, color=colors, alpha=0.7, edgecolor='black')
867
axes[0, 0].set_ylabel('Count')
868
axes[0, 0].set_title('Network Motif Counts')
869
axes[0, 0].grid(True, alpha=0.3)
870
871
# Degree-degree correlation plot (assortativity visualization)
872
edges = []
873
for i in range(n):
874
for j in range(i+1, n):
875
if adj[i, j] == 1:
876
edges.append((degrees[i], degrees[j]))
877
878
if edges:
879
edges = np.array(edges)
880
axes[0, 1].scatter(edges[:, 0], edges[:, 1], alpha=0.3, s=50)
881
axes[0, 1].set_xlabel('Degree of node $i$')
882
axes[0, 1].set_ylabel('Degree of node $j$')
883
axes[0, 1].set_title(f'Degree Correlation (assortativity r={assortativity:.3f})')
884
axes[0, 1].grid(True, alpha=0.3)
885
886
# Rich-club coefficient
887
def rich_club_coefficient(adj):
888
degrees = np.sum(adj, axis=1).astype(int)
889
unique_degrees = np.unique(degrees)
890
phi = {}
891
892
for k in unique_degrees:
893
# Nodes with degree > k
894
rich_nodes = np.where(degrees > k)[0]
895
n_rich = len(rich_nodes)
896
if n_rich < 2:
897
continue
898
899
# Edges among rich nodes
900
edges_rich = 0
901
for i in range(len(rich_nodes)):
902
for j in range(i+1, len(rich_nodes)):
903
if adj[rich_nodes[i], rich_nodes[j]] == 1:
904
edges_rich += 1
905
906
max_edges = n_rich * (n_rich - 1) / 2
907
if max_edges > 0:
908
phi[k] = edges_rich / max_edges
909
910
return phi
911
912
phi = rich_club_coefficient(adj)
913
k_vals = sorted(phi.keys())
914
phi_vals = [phi[k] for k in k_vals]
915
916
axes[1, 0].plot(k_vals, phi_vals, 'o-', markersize=6)
917
axes[1, 0].set_xlabel('Degree threshold $k$')
918
axes[1, 0].set_ylabel('Rich-club coefficient $\\phi(k)$')
919
axes[1, 0].set_title('Rich-Club Organization')
920
axes[1, 0].grid(True, alpha=0.3)
921
922
# Compare ER and BA assortativity
923
network_types = ['Erdos-Renyi', 'Barabasi-Albert']
924
assort_values = [assortativity, assortativity_ba]
925
colors = ['steelblue', 'coral']
926
axes[1, 1].bar(network_types, assort_values, color=colors, alpha=0.7, edgecolor='black')
927
axes[1, 1].axhline(y=0, color='black', linestyle='--', linewidth=1)
928
axes[1, 1].set_ylabel('Assortativity Coefficient')
929
axes[1, 1].set_title('Network Assortativity Comparison')
930
axes[1, 1].grid(True, alpha=0.3)
931
932
plt.tight_layout()
933
save_plot('network_motifs.pdf', 'Network motifs, assortativity, and rich-club organization')
934
\end{pycode}
935
936
\section{Results Summary}
937
938
\begin{pycode}
939
# Create comprehensive results table
940
print(r'\begin{table}[H]')
941
print(r'\centering')
942
print(r'\caption{Network Analysis Results}')
943
print(r'\begin{tabular}{lcc}')
944
print(r'\toprule')
945
print(r'Metric & ER Network & BA Network \\')
946
print(r'\midrule')
947
print(f'Nodes & {n} & {n_compare} \\\\')
948
print(f'Edges & {n_edges} & {n_edges_ba} \\\\')
949
print(f'Average Degree & {avg_degree:.2f} & {avg_degree_ba:.2f} \\\\')
950
print(f'Average Clustering & {avg_clustering:.4f} & {avg_clust_ba:.4f} \\\\')
951
print(f'Characteristic Path Length & {char_path_length:.2f} & -- \\\\')
952
print(f'Global Clustering & {global_clustering:.4f} & -- \\\\')
953
print(f'Assortativity & {assortativity:.3f} & {assortativity_ba:.3f} \\\\')
954
print(r'\bottomrule')
955
print(r'\end{tabular}')
956
print(r'\end{table}')
957
958
# Centrality summary table
959
print(r'\begin{table}[H]')
960
print(r'\centering')
961
print(r'\caption{Top Nodes by Centrality Measures}')
962
print(r'\begin{tabular}{lccc}')
963
print(r'\toprule')
964
print(r'Rank & Degree & Betweenness & Eigenvector \\')
965
print(r'\midrule')
966
for i in range(5):
967
print(f'{i+1} & Node {top_degree[i]} & Node {top_between[i]} & Node {top_eigen[i]} \\\\')
968
print(r'\bottomrule')
969
print(r'\end{tabular}')
970
print(r'\end{table}')
971
972
# Community detection results
973
print(r'\begin{table}[H]')
974
print(r'\centering')
975
print(r'\caption{Community Detection Results}')
976
print(r'\begin{tabular}{lcc}')
977
print(r'\toprule')
978
print(r'Method & Communities & Modularity \\')
979
print(r'\midrule')
980
print(f'Label Propagation & {n_communities_lp} & {mod_lp:.4f} \\\\')
981
print(f'Spectral Clustering & {k_communities} & {mod_spec:.4f} \\\\')
982
print(r'\bottomrule')
983
print(r'\end{tabular}')
984
print(r'\end{table}')
985
986
# Motif counts
987
print(r'\begin{table}[H]')
988
print(r'\centering')
989
print(r'\caption{Network Motif Counts}')
990
print(r'\begin{tabular}{lc}')
991
print(r'\toprule')
992
print(r'Motif Type & Count \\')
993
print(r'\midrule')
994
print(f'Triangles & {tri_count} \\\\')
995
print(f'3-Stars & {star3_count} \\\\')
996
print(f'2-Paths & {star2_count} \\\\')
997
print(r'\bottomrule')
998
print(r'\end{tabular}')
999
print(r'\end{table}')
1000
\end{pycode}
1001
1002
\section{Statistical Summary}
1003
1004
Network analysis results:
1005
\begin{itemize}
1006
\item Nodes: \py{f"{n}"}, Edges: \py{f"{n_edges}"}
1007
\item Average degree: \py{f"{avg_degree:.2f}"}
1008
\item Average clustering: \py{f"{avg_clustering:.4f}"}
1009
\item Global clustering: \py{f"{global_clustering:.4f}"}
1010
\item Characteristic path length: \py{f"{char_path_length:.2f}"}
1011
\item Network density: \py{f"{density:.4f}"}
1012
\item Number of triangles: \py{f"{triangles}"}
1013
\item Assortativity coefficient: \py{f"{assortativity:.3f}"}
1014
\item Communities detected (LP): \py{f"{n_communities_lp}"}
1015
\item Modularity (LP): \py{f"{mod_lp:.4f}"}
1016
\item Modularity (Spectral): \py{f"{mod_spec:.4f}"}
1017
\end{itemize}
1018
1019
\section{Conclusion}
1020
1021
Network metrics reveal structural properties of complex systems. This analysis demonstrated:
1022
1023
\begin{enumerate}
1024
\item \textbf{Centrality measures}: Degree, betweenness, closeness, and eigenvector centrality capture different aspects of node importance. High correlation between measures indicates consistent network structure.
1025
1026
\item \textbf{Community detection}: Label propagation and spectral clustering identify community structure with modularity quantifying partition quality.
1027
1028
\item \textbf{Random graph models}: Erdos-Renyi produces homogeneous degree distributions, while Barabasi-Albert generates scale-free networks with power-law degree distributions through preferential attachment.
1029
1030
\item \textbf{Small-world networks}: Watts-Strogatz model shows that small rewiring probability creates networks with high clustering and short path lengths, characteristic of real-world networks.
1031
1032
\item \textbf{Network motifs}: Triangle counting and rich-club analysis reveal hierarchical organization and hub structure.
1033
\end{enumerate}
1034
1035
These tools apply to social networks, biological systems (protein interactions, neural networks), infrastructure (power grids, transportation), and information networks (WWW, citations). Understanding network topology enables prediction of system behavior, identification of critical nodes, and optimization of network performance.
1036
1037
\end{document}
1038
1039