Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
chinoogawa
GitHub Repository: chinoogawa/fbht
Path: blob/master/community.py
206 views
1
#!/usr/bin/python
2
# -*- coding: utf-8 -*-
3
"""
4
This module implements community detection.
5
"""
6
__all__ = ["partition_at_level", "modularity", "best_partition", "generate_dendogram", "induced_graph"]
7
__author__ = """Thomas Aynaud ([email protected])"""
8
# Copyright (C) 2009 by
9
# Thomas Aynaud <[email protected]>
10
# All rights reserved.
11
# BSD license.
12
13
__PASS_MAX = -1
14
__MIN = 0.0000001
15
16
import networkx as nx
17
import sys
18
import types
19
import array
20
21
22
def partition_at_level(dendogram, level) :
23
"""Return the partition of the nodes at the given level
24
25
A dendogram is a tree and each level is a partition of the graph nodes.
26
Level 0 is the first partition, which contains the smallest communities, and the best is len(dendogram) - 1.
27
The higher the level is, the bigger are the communities
28
29
Parameters
30
----------
31
dendogram : list of dict
32
a list of partitions, ie dictionnaries where keys of the i+1 are the values of the i.
33
level : int
34
the level which belongs to [0..len(dendogram)-1]
35
36
Returns
37
-------
38
partition : dictionnary
39
A dictionary where keys are the nodes and the values are the set it belongs to
40
41
Raises
42
------
43
KeyError
44
If the dendogram is not well formed or the level is too high
45
46
See Also
47
--------
48
best_partition which directly combines partition_at_level and generate_dendogram to obtain the partition of highest modularity
49
50
Examples
51
--------
52
>>> G=nx.erdos_renyi_graph(100, 0.01)
53
>>> dendo = generate_dendogram(G)
54
>>> for level in range(len(dendo) - 1) :
55
>>> print "partition at level", level, "is", partition_at_level(dendo, level)
56
"""
57
partition = dendogram[0].copy()
58
for index in range(1, level + 1) :
59
for node, community in partition.iteritems() :
60
partition[node] = dendogram[index][community]
61
return partition
62
63
64
def modularity(partition, graph) :
65
"""Compute the modularity of a partition of a graph
66
67
Parameters
68
----------
69
partition : dict
70
the partition of the nodes, i.e a dictionary where keys are their nodes and values the communities
71
graph : networkx.Graph
72
the networkx graph which is decomposed
73
74
Returns
75
-------
76
modularity : float
77
The modularity
78
79
Raises
80
------
81
KeyError
82
If the partition is not a partition of all graph nodes
83
ValueError
84
If the graph has no link
85
TypeError
86
If graph is not a networkx.Graph
87
88
References
89
----------
90
.. 1. Newman, M.E.J. & Girvan, M. Finding and evaluating community structure in networks. Physical Review E 69, 26113(2004).
91
92
Examples
93
--------
94
>>> G=nx.erdos_renyi_graph(100, 0.01)
95
>>> part = best_partition(G)
96
>>> modularity(part, G)
97
"""
98
if type(graph) != nx.Graph :
99
raise TypeError("Bad graph type, use only non directed graph")
100
101
inc = dict([])
102
deg = dict([])
103
links = graph.size(weight='weight')
104
if links == 0 :
105
raise ValueError("A graph without link has an undefined modularity")
106
107
for node in graph :
108
com = partition[node]
109
deg[com] = deg.get(com, 0.) + graph.degree(node, weight = 'weight')
110
for neighbor, datas in graph[node].iteritems() :
111
weight = datas.get("weight", 1)
112
if partition[neighbor] == com :
113
if neighbor == node :
114
inc[com] = inc.get(com, 0.) + float(weight)
115
else :
116
inc[com] = inc.get(com, 0.) + float(weight) / 2.
117
118
res = 0.
119
for com in set(partition.values()) :
120
res += (inc.get(com, 0.) / links) - (deg.get(com, 0.) / (2.*links))**2
121
return res
122
123
124
def best_partition(graph, partition = None) :
125
"""Compute the partition of the graph nodes which maximises the modularity
126
(or try..) using the Louvain heuristices
127
128
This is the partition of highest modularity, i.e. the highest partition of the dendogram
129
generated by the Louvain algorithm.
130
131
Parameters
132
----------
133
graph : networkx.Graph
134
the networkx graph which is decomposed
135
partition : dict, optionnal
136
the algorithm will start using this partition of the nodes. It's a dictionary where keys are their nodes and values the communities
137
138
Returns
139
-------
140
partition : dictionnary
141
The partition, with communities numbered from 0 to number of communities
142
143
Raises
144
------
145
NetworkXError
146
If the graph is not Eulerian.
147
148
See Also
149
--------
150
generate_dendogram to obtain all the decompositions levels
151
152
Notes
153
-----
154
Uses Louvain algorithm
155
156
References
157
----------
158
.. 1. Blondel, V.D. et al. Fast unfolding of communities in large networks. J. Stat. Mech 10008, 1-12(2008).
159
160
Examples
161
--------
162
>>> #Basic usage
163
>>> G=nx.erdos_renyi_graph(100, 0.01)
164
>>> part = best_partition(G)
165
166
>>> #other example to display a graph with its community :
167
>>> #better with karate_graph() as defined in networkx examples
168
>>> #erdos renyi don't have true community structure
169
>>> G = nx.erdos_renyi_graph(30, 0.05)
170
>>> #first compute the best partition
171
>>> partition = community.best_partition(G)
172
>>> #drawing
173
>>> size = float(len(set(partition.values())))
174
>>> pos = nx.spring_layout(G)
175
>>> count = 0.
176
>>> for com in set(partition.values()) :
177
>>> count = count + 1.
178
>>> list_nodes = [nodes for nodes in partition.keys()
179
>>> if partition[nodes] == com]
180
>>> nx.draw_networkx_nodes(G, pos, list_nodes, node_size = 20,
181
node_color = str(count / size))
182
>>> nx.draw_networkx_edges(G,pos, alpha=0.5)
183
>>> plt.show()
184
"""
185
dendo = generate_dendogram(graph, partition)
186
return partition_at_level(dendo, len(dendo) - 1 )
187
188
189
def generate_dendogram(graph, part_init = None) :
190
"""Find communities in the graph and return the associated dendogram
191
192
A dendogram is a tree and each level is a partition of the graph nodes. Level 0 is the first partition, which contains the smallest communities, and the best is len(dendogram) - 1. The higher the level is, the bigger are the communities
193
194
195
Parameters
196
----------
197
graph : networkx.Graph
198
the networkx graph which will be decomposed
199
part_init : dict, optionnal
200
the algorithm will start using this partition of the nodes. It's a dictionary where keys are their nodes and values the communities
201
202
Returns
203
-------
204
dendogram : list of dictionaries
205
a list of partitions, ie dictionnaries where keys of the i+1 are the values of the i. and where keys of the first are the nodes of graph
206
207
Raises
208
------
209
TypeError
210
If the graph is not a networkx.Graph
211
212
See Also
213
--------
214
best_partition
215
216
Notes
217
-----
218
Uses Louvain algorithm
219
220
References
221
----------
222
.. 1. Blondel, V.D. et al. Fast unfolding of communities in large networks. J. Stat. Mech 10008, 1-12(2008).
223
224
Examples
225
--------
226
>>> G=nx.erdos_renyi_graph(100, 0.01)
227
>>> dendo = generate_dendogram(G)
228
>>> for level in range(len(dendo) - 1) :
229
>>> print "partition at level", level, "is", partition_at_level(dendo, level)
230
"""
231
if type(graph) != nx.Graph :
232
raise TypeError("Bad graph type, use only non directed graph")
233
current_graph = graph.copy()
234
status = Status()
235
status.init(current_graph, part_init)
236
mod = __modularity(status)
237
status_list = list()
238
__one_level(current_graph, status)
239
new_mod = __modularity(status)
240
partition = __renumber(status.node2com)
241
status_list.append(partition)
242
mod = new_mod
243
current_graph = induced_graph(partition, current_graph)
244
status.init(current_graph)
245
246
while True :
247
__one_level(current_graph, status)
248
new_mod = __modularity(status)
249
if new_mod - mod < __MIN :
250
break
251
partition = __renumber(status.node2com)
252
status_list.append(partition)
253
mod = new_mod
254
current_graph = induced_graph(partition, current_graph)
255
status.init(current_graph)
256
return status_list[:]
257
258
259
def induced_graph(partition, graph) :
260
"""Produce the graph where nodes are the communities
261
262
there is a link of weight w between communities if the sum of the weights of the links between their elements is w
263
264
Parameters
265
----------
266
partition : dict
267
a dictionary where keys are graph nodes and values the part the node belongs to
268
graph : networkx.Graph
269
the initial graph
270
271
Returns
272
-------
273
g : networkx.Graph
274
a networkx graph where nodes are the parts
275
276
Examples
277
--------
278
>>> n = 5
279
>>> g = nx.complete_graph(2*n)
280
>>> part = dict([])
281
>>> for node in g.nodes() :
282
>>> part[node] = node % 2
283
>>> ind = induced_graph(part, g)
284
>>> goal = nx.Graph()
285
>>> goal.add_weighted_edges_from([(0,1,n*n),(0,0,n*(n-1)/2), (1, 1, n*(n-1)/2)])
286
>>> nx.is_isomorphic(int, goal)
287
True
288
"""
289
ret = nx.Graph()
290
ret.add_nodes_from(partition.values())
291
292
for node1, node2, datas in graph.edges_iter(data = True) :
293
weight = datas.get("weight", 1)
294
com1 = partition[node1]
295
com2 = partition[node2]
296
w_prec = ret.get_edge_data(com1, com2, {"weight":0}).get("weight", 1)
297
ret.add_edge(com1, com2, weight = w_prec + weight)
298
299
return ret
300
301
302
def __renumber(dictionary) :
303
"""Renumber the values of the dictionary from 0 to n
304
"""
305
count = 0
306
ret = dictionary.copy()
307
new_values = dict([])
308
309
for key in dictionary.keys() :
310
value = dictionary[key]
311
new_value = new_values.get(value, -1)
312
if new_value == -1 :
313
new_values[value] = count
314
new_value = count
315
count = count + 1
316
ret[key] = new_value
317
318
return ret
319
320
321
def __load_binary(data) :
322
"""Load binary graph as used by the cpp implementation of this algorithm
323
"""
324
if type(data) == types.StringType :
325
data = open(data, "rb")
326
327
reader = array.array("I")
328
reader.fromfile(data, 1)
329
num_nodes = reader.pop()
330
reader = array.array("I")
331
reader.fromfile(data, num_nodes)
332
cum_deg = reader.tolist()
333
num_links = reader.pop()
334
reader = array.array("I")
335
reader.fromfile(data, num_links)
336
links = reader.tolist()
337
graph = nx.Graph()
338
graph.add_nodes_from(range(num_nodes))
339
prec_deg = 0
340
341
for index in range(num_nodes) :
342
last_deg = cum_deg[index]
343
neighbors = links[prec_deg:last_deg]
344
graph.add_edges_from([(index, int(neigh)) for neigh in neighbors])
345
prec_deg = last_deg
346
347
return graph
348
349
350
def __one_level(graph, status) :
351
"""Compute one level of communities
352
"""
353
modif = True
354
nb_pass_done = 0
355
cur_mod = __modularity(status)
356
new_mod = cur_mod
357
358
while modif and nb_pass_done != __PASS_MAX :
359
cur_mod = new_mod
360
modif = False
361
nb_pass_done += 1
362
363
for node in graph.nodes() :
364
com_node = status.node2com[node]
365
degc_totw = status.gdegrees.get(node, 0.) / (status.total_weight*2.)
366
neigh_communities = __neighcom(node, graph, status)
367
__remove(node, com_node,
368
neigh_communities.get(com_node, 0.), status)
369
best_com = com_node
370
best_increase = 0
371
for com, dnc in neigh_communities.iteritems() :
372
incr = dnc - status.degrees.get(com, 0.) * degc_totw
373
if incr > best_increase :
374
best_increase = incr
375
best_com = com
376
__insert(node, best_com,
377
neigh_communities.get(best_com, 0.), status)
378
if best_com != com_node :
379
modif = True
380
new_mod = __modularity(status)
381
if new_mod - cur_mod < __MIN :
382
break
383
384
385
class Status :
386
"""
387
To handle several data in one struct.
388
389
Could be replaced by named tuple, but don't want to depend on python 2.6
390
"""
391
node2com = {}
392
total_weight = 0
393
internals = {}
394
degrees = {}
395
gdegrees = {}
396
397
def __init__(self) :
398
self.node2com = dict([])
399
self.total_weight = 0
400
self.degrees = dict([])
401
self.gdegrees = dict([])
402
self.internals = dict([])
403
self.loops = dict([])
404
405
def __str__(self) :
406
return ("node2com : " + str(self.node2com) + " degrees : "
407
+ str(self.degrees) + " internals : " + str(self.internals)
408
+ " total_weight : " + str(self.total_weight))
409
410
def copy(self) :
411
"""Perform a deep copy of status"""
412
new_status = Status()
413
new_status.node2com = self.node2com.copy()
414
new_status.internals = self.internals.copy()
415
new_status.degrees = self.degrees.copy()
416
new_status.gdegrees = self.gdegrees.copy()
417
new_status.total_weight = self.total_weight
418
419
def init(self, graph, part = None) :
420
"""Initialize the status of a graph with every node in one community"""
421
count = 0
422
self.node2com = dict([])
423
self.total_weight = 0
424
self.degrees = dict([])
425
self.gdegrees = dict([])
426
self.internals = dict([])
427
self.total_weight = graph.size(weight = 'weight')
428
if part == None :
429
for node in graph.nodes() :
430
self.node2com[node] = count
431
deg = float(graph.degree(node, weight = 'weight'))
432
self.degrees[count] = deg
433
self.gdegrees[node] = deg
434
self.loops[node] = float(graph.get_edge_data(node, node,
435
{"weight":0}).get("weight", 1))
436
self.internals[count] = self.loops[node]
437
count = count + 1
438
else :
439
for node in graph.nodes() :
440
com = part[node]
441
self.node2com[node] = com
442
deg = float(graph.degree(node, weigh = 'weight'))
443
self.degrees[com] = self.degrees.get(com, 0) + deg
444
self.gdegrees[node] = deg
445
inc = 0.
446
for neighbor, datas in graph[node].iteritems() :
447
weight = datas.get("weight", 1)
448
if part[neighbor] == com :
449
if neighbor == node :
450
inc += float(weight)
451
else :
452
inc += float(weight) / 2.
453
self.internals[com] = self.internals.get(com, 0) + inc
454
455
456
457
def __neighcom(node, graph, status) :
458
"""
459
Compute the communities in the neighborood of node in the graph given
460
with the decomposition node2com
461
"""
462
weights = {}
463
for neighbor, datas in graph[node].iteritems() :
464
if neighbor != node :
465
weight = datas.get("weight", 1)
466
neighborcom = status.node2com[neighbor]
467
weights[neighborcom] = weights.get(neighborcom, 0) + weight
468
469
return weights
470
471
472
def __remove(node, com, weight, status) :
473
""" Remove node from community com and modify status"""
474
status.degrees[com] = ( status.degrees.get(com, 0.)
475
- status.gdegrees.get(node, 0.) )
476
status.internals[com] = float( status.internals.get(com, 0.) -
477
weight - status.loops.get(node, 0.) )
478
status.node2com[node] = -1
479
480
481
def __insert(node, com, weight, status) :
482
""" Insert node into community and modify status"""
483
status.node2com[node] = com
484
status.degrees[com] = ( status.degrees.get(com, 0.) +
485
status.gdegrees.get(node, 0.) )
486
status.internals[com] = float( status.internals.get(com, 0.) +
487
weight + status.loops.get(node, 0.) )
488
489
490
def __modularity(status) :
491
"""
492
Compute the modularity of the partition of the graph faslty using status precomputed
493
"""
494
links = float(status.total_weight)
495
result = 0.
496
for community in set(status.node2com.values()) :
497
in_degree = status.internals.get(community, 0.)
498
degree = status.degrees.get(community, 0.)
499
if links > 0 :
500
result = result + in_degree / links - ((degree / (2.*links))**2)
501
return result
502
503
504
def __main() :
505
"""Main function to mimic C++ version behavior"""
506
try :
507
filename = sys.argv[1]
508
graphfile = __load_binary(filename)
509
partition = best_partition(graphfile)
510
print >> sys.stderr, str(modularity(partition, graphfile))
511
for elem, part in partition.iteritems() :
512
print str(elem) + " " + str(part)
513
except (IndexError, IOError):
514
print "Usage : ./community filename"
515
print "find the communities in graph filename and display the dendogram"
516
print "Parameters:"
517
print "filename is a binary file as generated by the "
518
print "convert utility distributed with the C implementation"
519
520
521
522
if __name__ == "__main__" :
523
__main()
524
525
526