Graphviz 12.0.1~dev.20240716.0800
Loading...
Searching...
No Matches
clustering.c
Go to the documentation of this file.
1/*************************************************************************
2 * Copyright (c) 2011 AT&T Intellectual Property
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Eclipse Public License v1.0
5 * which accompanies this distribution, and is available at
6 * https://www.eclipse.org/legal/epl-v10.html
7 *
8 * Contributors: Details at https://graphviz.org
9 *************************************************************************/
10
11#define STANDALONE
12#include <cgraph/alloc.h>
13#include <math.h>
14#include <sparse/general.h>
15#include <sparse/SparseMatrix.h>
16#include <sparse/clustering.h>
17#include <stdbool.h>
18
19
21 int n = A->n, i, j;
22
23 assert(A->type == MATRIX_TYPE_REAL);
24 assert(SparseMatrix_is_symmetric(A, false));
25
26 if (!A) return NULL;
27 assert(A->m == n);
29 grid->level = level;
30 grid->n = n;
31 grid->A = A;
32 grid->P = NULL;
33 grid->next = NULL;
34 grid->prev = NULL;
35 grid->delete_top_level_A = false;
36 grid->matching = gv_calloc(n, sizeof(double));
37 grid->deg = NULL;
38 grid->agglomerate_regardless = false;
39
40 if (level == 0){
41 double modularity = 0;
42 int *ia = A->ia, *ja = A->ja;
43 double deg_total = 0;
44 double *deg, *a = A->a;
45
46 grid->deg_total = 0.;
47 grid->deg = gv_calloc(n, sizeof(double));
48 deg = grid->deg;
49
50 double *indeg = gv_calloc(n, sizeof(double));
51 for (i = 0; i < n; i++){
52 deg[i] = 0;
53 indeg[i] = 0.;
54 for (j = ia[i]; j < ia[i+1]; j++){
55 deg[i] += a[j];
56 if (ja[j] == i) indeg[i] = a[j];
57 }
58 deg_total += deg[i];
59 }
60 deg_total = fmax(deg_total, 1);
61 for (i = 0; i < n; i++){
62 modularity += (indeg[i] - deg[i]*deg[i]/deg_total)/deg_total;
63 }
64 grid->deg_total = deg_total;
65 grid->deg = deg;
66 grid->modularity = modularity;
67 free(indeg);
68 }
69
70
71 return grid;
72}
73
75 if (!grid) return;
76 if (grid->A){
77 if (grid->level == 0) {
78 if (grid->delete_top_level_A) SparseMatrix_delete(grid->A);
79 } else {
81 }
82 }
84 free(grid->matching);
85 free(grid->deg);
86
88 free(grid);
89}
90
92 int *matching = grid->matching;
93 SparseMatrix A = grid->A;
94 int n = grid->n, level = grid->level, nc = 0;
95 double modularity = 0;
96 int *ia = A->ia, *ja = A->ja;
97 double *deg = grid->deg;
98 int i, j, jj, jc, jmax;
99 double inv_deg_total = 1./ grid->deg_total;
100 double gain;
101 double maxgain;
102 double total_gain = 0;
103 modularity = grid->modularity;
104
105 double *deg_new = gv_calloc(n, sizeof(double));
106 double *deg_inter = gv_calloc(n, sizeof(double));
107 int *mask = gv_calloc(n, sizeof(int));
108 for (i = 0; i < n; i++) mask[i] = -1;
109
110 assert(n == A->n);
111 for (i = 0; i < n; i++) matching[i] = UNMATCHED;
112
113 /* gain in merging node i into cluster j is
114 deg(i,j)/deg_total - 2*deg(i)*deg(j)/deg_total^2
115 */
116 double *a = A->a;
117 for (i = 0; i < n; i++){
118 if (matching[i] != UNMATCHED) continue;
119 /* accumulate connections between i and clusters */
120 for (j = ia[i]; j < ia[i+1]; j++){
121 jj = ja[j];
122 if (jj == i) continue;
123 if ((jc=matching[jj]) != UNMATCHED){
124 if (mask[jc] != i) {
125 mask[jc] = i;
126 deg_inter[jc] = a[j];
127 } else {
128 deg_inter[jc] += a[j];
129 }
130 }
131 }
132
133 maxgain = 0;
134 jmax = -1;
135 for (j = ia[i]; j < ia[i+1]; j++){
136 jj = ja[j];
137 if (jj == i) continue;
138 if ((jc=matching[jj]) == UNMATCHED){
139 /* the first 2 is due to the fact that deg_iter gives edge weight from i to jj, but there are also edges from jj to i */
140 gain = (2*a[j] - 2*deg[i]*deg[jj]*inv_deg_total)*inv_deg_total;
141 } else {
142 if (deg_inter[jc] > 0){
143 /* the first 2 is due to the fact that deg_iter gives edge weight from i to jc, but there are also edges from jc to i */
144 gain = (2*deg_inter[jc] - 2*deg[i]*deg_new[jc]*inv_deg_total)*inv_deg_total;
145 // printf("mod = %f deg_inter[jc] =%f, deg[i] = %f, deg_new[jc]=%f, gain = %f\n",modularity, deg_inter[jc],deg[i],deg_new[jc],gain);
146 deg_inter[jc] = -1; /* so that we do not redo the calulation when we hit another neighbor in cluster jc */
147 } else {
148 gain = -1;
149 }
150 }
151 if (jmax < 0 || gain > maxgain){
152 maxgain = gain;
153 jmax = jj;
154 }
155
156 }
157
158 /* now merge i and jmax */
159 if (maxgain > 0 || grid->agglomerate_regardless){
160 total_gain += maxgain;
161 jc = matching[jmax];
162 if (jc == UNMATCHED){
163 //fprintf(stderr, "maxgain=%f, merge %d, %d\n",maxgain, i, jmax);
164 matching[i] = matching[jmax] = nc;
165 deg_new[nc] = deg[i] + deg[jmax];
166 nc++;
167 } else {
168 //fprintf(stderr, "maxgain=%f, merge with existing cluster %d, %d\n",maxgain, i, jc);
169 deg_new[jc] += deg[i];
170 matching[i] = jc;
171 }
172 } else {
173 assert(maxgain <= 0);
174 matching[i] = nc;
175 deg_new[nc] = deg[i];
176 nc++;
177 }
178
179 }
180
181 if (Verbose) fprintf(stderr,"modularity = %f new modularity = %f level = %d, n = %d, nc = %d, gain = %g\n", modularity, modularity + total_gain,
182 level, n, nc, total_gain);
183
184 if (ncluster_target > 0){
185 if (nc <= ncluster_target && n >= ncluster_target){
186 if (n - ncluster_target > ncluster_target - nc){/* ncluster = nc */
187
188 } else if (n - ncluster_target <= ncluster_target - nc){/* ncluster_target close to n */
189 fprintf(stderr,"ncluster_target = %d, close to n=%d\n", ncluster_target, n);
190 for (i = 0; i < n; i++) matching[i] = i;
191 free(deg_new);
192 goto RETURN;
193 }
194 } else if (n < ncluster_target){
195 fprintf(stderr,"n < target\n");
196 for (i = 0; i < n; i++) matching[i] = i;
197 free(deg_new);
198 goto RETURN;
199 }
200 }
201
202 if (nc >= 1 && (total_gain > 0 || nc < n)){
203 /* now set up restriction and prolongation operator */
204 SparseMatrix P, R, R0, B, cA;
205 double one = 1.;
207
209 for (i = 0; i < n; i++){
210 jj = matching[i];
212 }
218 if (!B) {
219 free(deg_new);
220 goto RETURN;
221 }
222 cA = SparseMatrix_multiply(B, P);
224 if (!cA) {
225 free(deg_new);
226 goto RETURN;
227 }
228 grid->P = P;
229 level++;
230 cgrid = Multilevel_Modularity_Clustering_init(cA, level);
231 cgrid->deg = deg_new;
232 cgrid->modularity = grid->modularity + total_gain;
233 cgrid->deg_total = grid->deg_total;
234 cgrid = Multilevel_Modularity_Clustering_establish(cgrid, ncluster_target);
235 grid->next = cgrid;
236 cgrid->prev = grid;
237 } else {
238 /* if we want a small number of cluster but right now we have too many, we will force agglomeration */
239 if (ncluster_target > 0 && nc > ncluster_target && !(grid->agglomerate_regardless)){
241 free(deg_inter);
242 free(mask);
243 free(deg_new);
244 return Multilevel_Modularity_Clustering_establish(grid, ncluster_target);
245 }
246 /* no more improvement, stop and final clustering found */
247 for (i = 0; i < n; i++) matching[i] = i;
248 free(deg_new);
249 }
250
251 RETURN:
252 free(deg_inter);
253 free(mask);
254 return grid;
255}
256
258 /* ncluster_target is used to specify the target number of cluster desired, e.g., ncluster_target=10 means that around 10 clusters
259 is desired. The resulting clustering will give as close to this number as possible.
260 If this number != the optimal number of clusters, the resulting modularity may be lower, or equal to, the optimal modularity.
261 . Agglomeration will be forced even if that reduces the modularity when there are too many clusters. It will stop when nc <= ncluster_target <= nc2,
262 . where nc and nc2 are the number of clusters in the current and next level of clustering. The final cluster number will be
263 . selected among nc or nc2, which ever is closer to ncluster_target.
264 Default: ncluster_target <= 0 */
265
267 SparseMatrix A = A0;
268
269 if (!SparseMatrix_is_symmetric(A, false) || A->type != MATRIX_TYPE_REAL){
271 }
273
275
276 if (A != A0) grid->delete_top_level_A = true; // be sure to clean up later
277 return grid;
278}
279
280
281static void hierachical_modularity_clustering(SparseMatrix A, int ncluster_target,
282 int *nclusters, int **assignment, double *modularity){
283 /* find a clustering of vertices by maximize modularity
284 A: symmetric square matrix n x n. If real value, value will be used as edges weights, otherwise edge weights are considered as 1.
285
286 ncluster_target: is used to specify the target number of cluster desired, e.g., ncluster_target=10 means that around 10 clusters
287 is desired. The resulting clustering will give as close to this number as possible.
288 If this number != the optimal number of clusters, the resulting modularity may be lower, or equal to, the optimal modularity.
289 . Agglomeration will be forced even if that reduces the modularity when there are too many clusters. It will stop when nc <= ncluster_target <= nc2,
290 . where nc and nc2 are the number of clusters in the current and next level of clustering. The final cluster number will be
291 . selected among nc or nc2, which ever is closer to ncluster_target.
292 Default: ncluster_target <= 0
293
294 nclusters: on output the number of clusters
295 assignment: dimension n. Node i is assigned to cluster "assignment[i]". 0 <= assignment < nclusters
296 */
297
299 int *matching, i;
300 SparseMatrix P;
301 assert(A->m == A->n);
302
303 *modularity = 0.;
304
305 grid = Multilevel_Modularity_Clustering_new(A, ncluster_target);
306
307 /* find coarsest */
308 cgrid = grid;
309 while (cgrid->next){
310 cgrid = cgrid->next;
311 }
312
313 /* project clustering up */
314 double *u = gv_calloc(cgrid->n, sizeof(double));
315 for (i = 0; i < cgrid->n; i++) u[i] = (double) (cgrid->matching)[i];
316 *nclusters = cgrid->n;
317 *modularity = cgrid->modularity;
318
319 while (cgrid->prev){
320 double *v = NULL;
321 P = cgrid->prev->P;
323 free(u);
324 u = v;
325 cgrid = cgrid->prev;
326 }
327
328 if (*assignment){
329 matching = *assignment;
330 } else {
331 matching = gv_calloc(grid->n, sizeof(int));
332 *assignment = matching;
333 }
334 for (i = 0; i < grid->n; i++) (matching)[i] = (int) u[i];
335 free(u);
336
338}
339
340
341
342void modularity_clustering(SparseMatrix A, bool inplace, int ncluster_target,
343 int *nclusters, int **assignment, double *modularity){
344 /* find a clustering of vertices by maximize modularity
345 A: symmetric square matrix n x n. If real value, value will be used as edges weights, otherwise edge weights are considered as 1.
346 inplace: whether A can e modified. If true, A will be modified by removing diagonal.
347 ncluster_target: is used to specify the target number of cluster desired, e.g., ncluster_target=10 means that around 10 clusters
348 is desired. The resulting clustering will give as close to this number as possible.
349 If this number != the optimal number of clusters, the resulting modularity may be lower, or equal to, the optimal modularity.
350 . Agglomeration will be forced even if that reduces the modularity when there are too many clusters. It will stop when nc <= ncluster_target <= nc2,
351 . where nc and nc2 are the number of clusters in the current and next level of clustering. The final cluster number will be
352 . selected among nc or nc2, which ever is closer to ncluster_target.
353 Default: ncluster_target <= 0
354 nclusters: on output the number of clusters
355 assignment: dimension n. Node i is assigned to cluster "assignment[i]". 0 <= assignment < nclusters
356 */
358
359 assert(A->m == A->n);
360
361 B = SparseMatrix_symmetrize(A, false);
362
363 if (!inplace && B == A) {
365 }
366
368
370
371 hierachical_modularity_clustering(B, ncluster_target, nclusters, assignment, modularity);
372
373 if (B != A) SparseMatrix_delete(B);
374
375}
SparseMatrix SparseMatrix_from_coordinate_format(SparseMatrix A)
SparseMatrix SparseMatrix_transpose(SparseMatrix A)
SparseMatrix SparseMatrix_symmetrize(SparseMatrix A, bool pattern_symmetric_only)
SparseMatrix SparseMatrix_coordinate_form_add_entry(SparseMatrix A, int irn, int jcn, const void *val)
bool SparseMatrix_is_symmetric(SparseMatrix A, bool test_pattern_symmetry_only)
void SparseMatrix_multiply_vector(SparseMatrix A, double *v, double **res)
SparseMatrix SparseMatrix_multiply(SparseMatrix A, SparseMatrix B)
void SparseMatrix_delete(SparseMatrix A)
SparseMatrix SparseMatrix_copy(SparseMatrix A)
SparseMatrix SparseMatrix_set_entries_to_real_one(SparseMatrix A)
SparseMatrix SparseMatrix_get_real_adjacency_matrix_symmetrized(SparseMatrix A)
SparseMatrix SparseMatrix_remove_diagonal(SparseMatrix A)
SparseMatrix SparseMatrix_new(int m, int n, int nz, int type, int format)
@ FORMAT_COORD
@ MATRIX_TYPE_REAL
Memory allocation wrappers that exit on failure.
static void * gv_calloc(size_t nmemb, size_t size)
Definition alloc.h:26
static void * gv_alloc(size_t size)
Definition alloc.h:47
static Multilevel_Modularity_Clustering Multilevel_Modularity_Clustering_establish(Multilevel_Modularity_Clustering grid, int ncluster_target)
Definition clustering.c:91
void modularity_clustering(SparseMatrix A, bool inplace, int ncluster_target, int *nclusters, int **assignment, double *modularity)
Definition clustering.c:342
static Multilevel_Modularity_Clustering Multilevel_Modularity_Clustering_new(SparseMatrix A0, int ncluster_target)
Definition clustering.c:257
static Multilevel_Modularity_Clustering Multilevel_Modularity_Clustering_init(SparseMatrix A, int level)
Definition clustering.c:20
static void hierachical_modularity_clustering(SparseMatrix A, int ncluster_target, int *nclusters, int **assignment, double *modularity)
Definition clustering.c:281
static void Multilevel_Modularity_Clustering_delete(Multilevel_Modularity_Clustering grid)
Definition clustering.c:74
#define A(n, t)
Definition expr.h:76
@ UNMATCHED
Definition general.h:85
static int Verbose
Definition gml2gv.c:22
void free(void *)
node NULL
Definition grammar.y:149
@ grid
Definition gvgen.c:32
#define B
Definition hierarchy.c:117
#define RETURN(v)
Definition strmatch.c:144
Multilevel_Modularity_Clustering prev
Definition clustering.h:27
Multilevel_Modularity_Clustering next
Definition clustering.h:26