Graphviz 13.0.0~dev.20250204.0809
Loading...
Searching...
No Matches
clustering.c
Go to the documentation of this file.
1/*************************************************************************
2 * Copyright (c) 2011 AT&T Intellectual Property
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Eclipse Public License v1.0
5 * which accompanies this distribution, and is available at
6 * https://www.eclipse.org/legal/epl-v10.html
7 *
8 * Contributors: Details at https://graphviz.org
9 *************************************************************************/
10
11#define STANDALONE
12#include <math.h>
13#include <sparse/general.h>
14#include <sparse/SparseMatrix.h>
15#include <sparse/clustering.h>
16#include <stdbool.h>
17#include <util/alloc.h>
18
20 int n = A->n, i, j;
21
22 assert(A->type == MATRIX_TYPE_REAL);
23 assert(SparseMatrix_is_symmetric(A, false));
24
25 if (!A) return NULL;
26 assert(A->m == n);
28 grid->level = level;
29 grid->n = n;
30 grid->A = A;
31 grid->P = NULL;
32 grid->next = NULL;
33 grid->prev = NULL;
34 grid->delete_top_level_A = false;
35 grid->matching = gv_calloc(n, sizeof(double));
36 grid->deg = NULL;
37 grid->agglomerate_regardless = false;
38
39 if (level == 0){
40 double modularity = 0;
41 int *ia = A->ia, *ja = A->ja;
42 double deg_total = 0;
43 double *deg, *a = A->a;
44
45 grid->deg_total = 0.;
46 grid->deg = gv_calloc(n, sizeof(double));
47 deg = grid->deg;
48
49 double *indeg = gv_calloc(n, sizeof(double));
50 for (i = 0; i < n; i++){
51 deg[i] = 0;
52 indeg[i] = 0.;
53 for (j = ia[i]; j < ia[i+1]; j++){
54 deg[i] += a[j];
55 if (ja[j] == i) indeg[i] = a[j];
56 }
57 deg_total += deg[i];
58 }
59 deg_total = fmax(deg_total, 1);
60 for (i = 0; i < n; i++){
61 modularity += (indeg[i] - deg[i]*deg[i]/deg_total)/deg_total;
62 }
63 grid->deg_total = deg_total;
64 grid->deg = deg;
65 grid->modularity = modularity;
66 free(indeg);
67 }
68
69
70 return grid;
71}
72
74 if (!grid) return;
75 if (grid->A){
76 if (grid->level == 0) {
77 if (grid->delete_top_level_A) SparseMatrix_delete(grid->A);
78 } else {
80 }
81 }
83 free(grid->matching);
84 free(grid->deg);
85
87 free(grid);
88}
89
91 int *matching = grid->matching;
92 SparseMatrix A = grid->A;
93 int n = grid->n, level = grid->level, nc = 0;
94 double modularity = 0;
95 int *ia = A->ia, *ja = A->ja;
96 double *deg = grid->deg;
97 int i, j, jj, jc, jmax;
98 double inv_deg_total = 1./ grid->deg_total;
99 double gain;
100 double maxgain;
101 double total_gain = 0;
102 modularity = grid->modularity;
103
104 double *deg_new = gv_calloc(n, sizeof(double));
105 double *deg_inter = gv_calloc(n, sizeof(double));
106 int *mask = gv_calloc(n, sizeof(int));
107 for (i = 0; i < n; i++) mask[i] = -1;
108
109 assert(n == A->n);
110 for (i = 0; i < n; i++) matching[i] = UNMATCHED;
111
112 /* gain in merging node i into cluster j is
113 deg(i,j)/deg_total - 2*deg(i)*deg(j)/deg_total^2
114 */
115 double *a = A->a;
116 for (i = 0; i < n; i++){
117 if (matching[i] != UNMATCHED) continue;
118 /* accumulate connections between i and clusters */
119 for (j = ia[i]; j < ia[i+1]; j++){
120 jj = ja[j];
121 if (jj == i) continue;
122 if ((jc=matching[jj]) != UNMATCHED){
123 if (mask[jc] != i) {
124 mask[jc] = i;
125 deg_inter[jc] = a[j];
126 } else {
127 deg_inter[jc] += a[j];
128 }
129 }
130 }
131
132 maxgain = 0;
133 jmax = -1;
134 for (j = ia[i]; j < ia[i+1]; j++){
135 jj = ja[j];
136 if (jj == i) continue;
137 if ((jc=matching[jj]) == UNMATCHED){
138 /* the first 2 is due to the fact that deg_iter gives edge weight from i to jj, but there are also edges from jj to i */
139 gain = (2*a[j] - 2*deg[i]*deg[jj]*inv_deg_total)*inv_deg_total;
140 } else {
141 if (deg_inter[jc] > 0){
142 /* the first 2 is due to the fact that deg_iter gives edge weight from i to jc, but there are also edges from jc to i */
143 gain = (2*deg_inter[jc] - 2*deg[i]*deg_new[jc]*inv_deg_total)*inv_deg_total;
144 // printf("mod = %f deg_inter[jc] =%f, deg[i] = %f, deg_new[jc]=%f, gain = %f\n",modularity, deg_inter[jc],deg[i],deg_new[jc],gain);
145 deg_inter[jc] = -1; /* so that we do not redo the calulation when we hit another neighbor in cluster jc */
146 } else {
147 gain = -1;
148 }
149 }
150 if (jmax < 0 || gain > maxgain){
151 maxgain = gain;
152 jmax = jj;
153 }
154
155 }
156
157 /* now merge i and jmax */
158 if (maxgain > 0 || grid->agglomerate_regardless){
159 total_gain += maxgain;
160 jc = matching[jmax];
161 if (jc == UNMATCHED){
162 //fprintf(stderr, "maxgain=%f, merge %d, %d\n",maxgain, i, jmax);
163 matching[i] = matching[jmax] = nc;
164 deg_new[nc] = deg[i] + deg[jmax];
165 nc++;
166 } else {
167 //fprintf(stderr, "maxgain=%f, merge with existing cluster %d, %d\n",maxgain, i, jc);
168 deg_new[jc] += deg[i];
169 matching[i] = jc;
170 }
171 } else {
172 assert(maxgain <= 0);
173 matching[i] = nc;
174 deg_new[nc] = deg[i];
175 nc++;
176 }
177
178 }
179
180 if (Verbose) fprintf(stderr,"modularity = %f new modularity = %f level = %d, n = %d, nc = %d, gain = %g\n", modularity, modularity + total_gain,
181 level, n, nc, total_gain);
182
183 if (ncluster_target > 0){
184 if (nc <= ncluster_target && n >= ncluster_target){
185 if (n - ncluster_target > ncluster_target - nc){/* ncluster = nc */
186
187 } else if (n - ncluster_target <= ncluster_target - nc){/* ncluster_target close to n */
188 fprintf(stderr,"ncluster_target = %d, close to n=%d\n", ncluster_target, n);
189 for (i = 0; i < n; i++) matching[i] = i;
190 free(deg_new);
191 goto RETURN;
192 }
193 } else if (n < ncluster_target){
194 fprintf(stderr,"n < target\n");
195 for (i = 0; i < n; i++) matching[i] = i;
196 free(deg_new);
197 goto RETURN;
198 }
199 }
200
201 if (nc >= 1 && (total_gain > 0 || nc < n)){
202 /* now set up restriction and prolongation operator */
203 SparseMatrix P, R, R0, B, cA;
204 double one = 1.;
206
208 for (i = 0; i < n; i++){
209 jj = matching[i];
211 }
217 if (!B) {
218 free(deg_new);
219 goto RETURN;
220 }
221 cA = SparseMatrix_multiply(B, P);
223 if (!cA) {
224 free(deg_new);
225 goto RETURN;
226 }
227 grid->P = P;
228 level++;
229 cgrid = Multilevel_Modularity_Clustering_init(cA, level);
230 cgrid->deg = deg_new;
231 cgrid->modularity = grid->modularity + total_gain;
232 cgrid->deg_total = grid->deg_total;
233 cgrid = Multilevel_Modularity_Clustering_establish(cgrid, ncluster_target);
234 grid->next = cgrid;
235 cgrid->prev = grid;
236 } else {
237 /* if we want a small number of cluster but right now we have too many, we will force agglomeration */
238 if (ncluster_target > 0 && nc > ncluster_target && !(grid->agglomerate_regardless)){
240 free(deg_inter);
241 free(mask);
242 free(deg_new);
243 return Multilevel_Modularity_Clustering_establish(grid, ncluster_target);
244 }
245 /* no more improvement, stop and final clustering found */
246 for (i = 0; i < n; i++) matching[i] = i;
247 free(deg_new);
248 }
249
250 RETURN:
251 free(deg_inter);
252 free(mask);
253 return grid;
254}
255
257 /* ncluster_target is used to specify the target number of cluster desired, e.g., ncluster_target=10 means that around 10 clusters
258 is desired. The resulting clustering will give as close to this number as possible.
259 If this number != the optimal number of clusters, the resulting modularity may be lower, or equal to, the optimal modularity.
260 . Agglomeration will be forced even if that reduces the modularity when there are too many clusters. It will stop when nc <= ncluster_target <= nc2,
261 . where nc and nc2 are the number of clusters in the current and next level of clustering. The final cluster number will be
262 . selected among nc or nc2, which ever is closer to ncluster_target.
263 Default: ncluster_target <= 0 */
264
266 SparseMatrix A = A0;
267
268 if (!SparseMatrix_is_symmetric(A, false) || A->type != MATRIX_TYPE_REAL){
270 }
272
274
275 if (A != A0) grid->delete_top_level_A = true; // be sure to clean up later
276 return grid;
277}
278
279
280static void hierachical_modularity_clustering(SparseMatrix A, int ncluster_target,
281 int *nclusters, int **assignment, double *modularity){
282 /* find a clustering of vertices by maximize modularity
283 A: symmetric square matrix n x n. If real value, value will be used as edges weights, otherwise edge weights are considered as 1.
284
285 ncluster_target: is used to specify the target number of cluster desired, e.g., ncluster_target=10 means that around 10 clusters
286 is desired. The resulting clustering will give as close to this number as possible.
287 If this number != the optimal number of clusters, the resulting modularity may be lower, or equal to, the optimal modularity.
288 . Agglomeration will be forced even if that reduces the modularity when there are too many clusters. It will stop when nc <= ncluster_target <= nc2,
289 . where nc and nc2 are the number of clusters in the current and next level of clustering. The final cluster number will be
290 . selected among nc or nc2, which ever is closer to ncluster_target.
291 Default: ncluster_target <= 0
292
293 nclusters: on output the number of clusters
294 assignment: dimension n. Node i is assigned to cluster "assignment[i]". 0 <= assignment < nclusters
295 */
296
298 int *matching, i;
299 SparseMatrix P;
300 assert(A->m == A->n);
301
302 *modularity = 0.;
303
304 grid = Multilevel_Modularity_Clustering_new(A, ncluster_target);
305
306 /* find coarsest */
307 cgrid = grid;
308 while (cgrid->next){
309 cgrid = cgrid->next;
310 }
311
312 /* project clustering up */
313 double *u = gv_calloc(cgrid->n, sizeof(double));
314 for (i = 0; i < cgrid->n; i++) u[i] = (double) (cgrid->matching)[i];
315 *nclusters = cgrid->n;
316 *modularity = cgrid->modularity;
317
318 while (cgrid->prev){
319 double *v = NULL;
320 P = cgrid->prev->P;
322 free(u);
323 u = v;
324 cgrid = cgrid->prev;
325 }
326
327 if (*assignment){
328 matching = *assignment;
329 } else {
330 matching = gv_calloc(grid->n, sizeof(int));
331 *assignment = matching;
332 }
333 for (i = 0; i < grid->n; i++) (matching)[i] = (int) u[i];
334 free(u);
335
337}
338
339
340
341void modularity_clustering(SparseMatrix A, bool inplace, int ncluster_target,
342 int *nclusters, int **assignment, double *modularity){
343 /* find a clustering of vertices by maximize modularity
344 A: symmetric square matrix n x n. If real value, value will be used as edges weights, otherwise edge weights are considered as 1.
345 inplace: whether A can e modified. If true, A will be modified by removing diagonal.
346 ncluster_target: is used to specify the target number of cluster desired, e.g., ncluster_target=10 means that around 10 clusters
347 is desired. The resulting clustering will give as close to this number as possible.
348 If this number != the optimal number of clusters, the resulting modularity may be lower, or equal to, the optimal modularity.
349 . Agglomeration will be forced even if that reduces the modularity when there are too many clusters. It will stop when nc <= ncluster_target <= nc2,
350 . where nc and nc2 are the number of clusters in the current and next level of clustering. The final cluster number will be
351 . selected among nc or nc2, which ever is closer to ncluster_target.
352 Default: ncluster_target <= 0
353 nclusters: on output the number of clusters
354 assignment: dimension n. Node i is assigned to cluster "assignment[i]". 0 <= assignment < nclusters
355 */
357
358 assert(A->m == A->n);
359
360 B = SparseMatrix_symmetrize(A, false);
361
362 if (!inplace && B == A) {
364 }
365
367
369
370 hierachical_modularity_clustering(B, ncluster_target, nclusters, assignment, modularity);
371
372 if (B != A) SparseMatrix_delete(B);
373
374}
SparseMatrix SparseMatrix_from_coordinate_format(SparseMatrix A)
SparseMatrix SparseMatrix_transpose(SparseMatrix A)
SparseMatrix SparseMatrix_symmetrize(SparseMatrix A, bool pattern_symmetric_only)
SparseMatrix SparseMatrix_coordinate_form_add_entry(SparseMatrix A, int irn, int jcn, const void *val)
bool SparseMatrix_is_symmetric(SparseMatrix A, bool test_pattern_symmetry_only)
void SparseMatrix_multiply_vector(SparseMatrix A, double *v, double **res)
SparseMatrix SparseMatrix_multiply(SparseMatrix A, SparseMatrix B)
void SparseMatrix_delete(SparseMatrix A)
SparseMatrix SparseMatrix_copy(SparseMatrix A)
SparseMatrix SparseMatrix_set_entries_to_real_one(SparseMatrix A)
SparseMatrix SparseMatrix_get_real_adjacency_matrix_symmetrized(SparseMatrix A)
SparseMatrix SparseMatrix_remove_diagonal(SparseMatrix A)
SparseMatrix SparseMatrix_new(int m, int n, int nz, int type, int format)
@ MATRIX_TYPE_REAL
@ FORMAT_COORD
Memory allocation wrappers that exit on failure.
static void * gv_calloc(size_t nmemb, size_t size)
Definition alloc.h:26
static void * gv_alloc(size_t size)
Definition alloc.h:47
static Multilevel_Modularity_Clustering Multilevel_Modularity_Clustering_establish(Multilevel_Modularity_Clustering grid, int ncluster_target)
Definition clustering.c:90
void modularity_clustering(SparseMatrix A, bool inplace, int ncluster_target, int *nclusters, int **assignment, double *modularity)
Definition clustering.c:341
static Multilevel_Modularity_Clustering Multilevel_Modularity_Clustering_new(SparseMatrix A0, int ncluster_target)
Definition clustering.c:256
static Multilevel_Modularity_Clustering Multilevel_Modularity_Clustering_init(SparseMatrix A, int level)
Definition clustering.c:19
static void hierachical_modularity_clustering(SparseMatrix A, int ncluster_target, int *nclusters, int **assignment, double *modularity)
Definition clustering.c:280
static void Multilevel_Modularity_Clustering_delete(Multilevel_Modularity_Clustering grid)
Definition clustering.c:73
#define A(n, t)
Definition expr.h:76
@ UNMATCHED
Definition general.h:81
static bool Verbose
Definition gml2gv.c:23
void free(void *)
node NULL
Definition grammar.y:163
@ grid
Definition gvgen.c:32
#define B
Definition hierarchy.c:117
#define RETURN(v)
Definition strmatch.c:144
Multilevel_Modularity_Clustering prev
Definition clustering.h:27
Multilevel_Modularity_Clustering next
Definition clustering.h:26