mq.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610
  1. /*************************************************************************
  2. * Copyright (c) 2011 AT&T Intellectual Property
  3. * All rights reserved. This program and the accompanying materials
  4. * are made available under the terms of the Eclipse Public License v1.0
  5. * which accompanies this distribution, and is available at
  6. * https://www.eclipse.org/legal/epl-v10.html
  7. *
  8. * Contributors: Details at https://graphviz.org
  9. *************************************************************************/
  10. /* Modularity Quality definition:
  11. We assume undirected graph. Directed graph should be converted by summing edge weights.
  12. Given a partition P of V into k clusters.
  13. Let E(i,j) be the set of edges between cluster i and j.
  14. Let |E(i,j)| be the sum of edge weights of edges in E(i,j).
  15. Let E(i,i) be the set of edges within cluster i, but excluding self-edges.
  16. Let |E(i,i)| be the sum of edge weights of edges in E(i,i).
  17. Let V(i) be the sets of vertices in i
  18. The intra-cluster edges concentration for a cluster i is
  19. (the denominator could be |V(i)|*(|V(i)-1)/2 strictly speaking as we exclude self-edges):
  20. |E(i,i)|
  21. -----------
  22. (|V(i)|^2/2)
  23. The inter-cluster edges concentration between cluster i and j is
  24. |E(i,j)|
  25. ------------
  26. |V(i)|*|V(j)|
  27. So the cluster index is defined as the average intra cluster edge concentration, minus
  28. the inter-cluster edge concentration:
  29. . |E(i,i)| |E(i,j)|
  30. MQ(P) = (1/k) * \sum_{i=1...k} ------------ - (1/(k*(k-1)/2)) * \sum_{i<j} ------------------- = mq_in/k - mq_out/(k*(k-1)/2)
  31. . (|V(i)|^2/2) |V(i)|*|V(j)|
  32. or
  33. . |E(i,i)| |E(i,j)|
  34. MQ(P)/2 = (1/k) * \sum_{i=1...k} ------------ - (1/(k*(k-1))) * \sum_{i<j} ------------------ = mq_in/k - mq_out/(k*(k-1))
  35. . |V(i)|^2 |V(i)|*|V(j)|
  36. Notice that if we assume the graph is unweights (edge weights = 1), then 0<= MQ <= 1.
  37. For weighted graph, MQ may not be within 0 to 1. We could normalized it, but
  38. for comparing clustering quality of the same graph but different partitioning, this
  39. unnormalized quantity is not a problem.
  40. */
  41. #define STANDALONE
  42. #include <cgraph/list.h>
  43. #include <limits.h>
  44. #include <sparse/general.h>
  45. #include <sparse/SparseMatrix.h>
  46. #include <sparse/mq.h>
  47. #include <stdbool.h>
  48. #include <string.h>
  49. #include <util/alloc.h>
  50. static double get_mq(SparseMatrix A, int *assignment, int *ncluster0, double *mq_in0, double *mq_out0, double **dout0){
  51. /* given a symmetric matrix representation of a graph and an assignment of nodes into clusters, calculate the modularity quality.
  52. assignment: assignment[i] gives the cluster assignment of node i. 0 <= assignment[i] < ncluster.
  53. ncluster: number of clusters
  54. mq_in: the part of MQ to do with intra-cluster edges, before divide by 1/k
  55. mq_out: the part of MQ to do with inter-cluster edges, before divide by 1/(k*(k-1))
  56. mq = 2*(mq_in/k - mq_out/(k*(k-1)));
  57. */
  58. int ncluster = 0;
  59. int n = A->m;
  60. bool test_pattern_symmetry_only = false;
  61. int *counts, *ia = A->ia, *ja = A->ja, k, i, j, jj;
  62. double mq_in = 0, mq_out = 0, *a = NULL, Vi, Vj;
  63. int c;
  64. double *dout;
  65. assert(SparseMatrix_is_symmetric(A, test_pattern_symmetry_only));
  66. (void)test_pattern_symmetry_only;
  67. assert(A->n == n);
  68. if (A->type == MATRIX_TYPE_REAL) a = A->a;
  69. counts = gv_calloc(n, sizeof(int));
  70. for (i = 0; i < n; i++){
  71. assert(assignment[i] >= 0 && assignment[i] < n);
  72. if (counts[assignment[i]] == 0) ncluster++;
  73. counts[assignment[i]]++;
  74. }
  75. k = ncluster;
  76. assert(ncluster <= n);
  77. for (i = 0; i < n; i++){
  78. assert(assignment[i] < ncluster);
  79. c = assignment[i];
  80. Vi = counts[c];
  81. for (j = ia[i] ; j < ia[i+1]; j++){
  82. /* ASSUME UNDIRECTED */
  83. jj = ja[j];
  84. if (jj >= i) continue;
  85. assert(assignment[jj] < ncluster);
  86. Vj = counts[assignment[jj]];
  87. if (assignment[jj] == c){
  88. if (a) {
  89. mq_in += a[j]/(Vi*Vi);
  90. } else {
  91. mq_in += 1./(Vi*Vi);
  92. }
  93. } else {
  94. if (a) {
  95. mq_out += a[j]/(Vi*Vj);
  96. } else {
  97. mq_out += 1./(Vi*Vj);
  98. }
  99. }
  100. }
  101. }
  102. /* calculate scaled out degree */
  103. dout = gv_calloc(n, sizeof(double));
  104. for (i = 0; i < n; i++){
  105. for (j = ia[i]; j < ia[i+1]; j++){
  106. jj = ja[j];
  107. if (jj == i) continue;
  108. if (a){
  109. dout[i] += a[j]/(double) counts[assignment[jj]];
  110. } else {
  111. dout[i] += 1./(double) counts[assignment[jj]];
  112. }
  113. }
  114. }
  115. *ncluster0 = k;
  116. *mq_in0 = mq_in;
  117. *mq_out0 = mq_out;
  118. *dout0 = dout;
  119. free(counts);
  120. if (k > 1){
  121. return 2*(mq_in/k - mq_out/(k*(k-1)));
  122. } else {
  123. return 2*mq_in;
  124. }
  125. }
  126. static Multilevel_MQ_Clustering Multilevel_MQ_Clustering_init(SparseMatrix A, int level){
  127. Multilevel_MQ_Clustering grid;
  128. int n = A->n, i;
  129. int *matching;
  130. assert(A->type == MATRIX_TYPE_REAL);
  131. assert(SparseMatrix_is_symmetric(A, false));
  132. if (!A) return NULL;
  133. assert(A->m == n);
  134. grid = gv_alloc(sizeof(struct Multilevel_MQ_Clustering_struct));
  135. grid->level = level;
  136. grid->n = n;
  137. grid->A = A;
  138. grid->P = NULL;
  139. grid->next = NULL;
  140. grid->prev = NULL;
  141. grid->delete_top_level_A = false;
  142. matching = grid->matching = gv_calloc(n, sizeof(double));
  143. grid->deg_intra = NULL;
  144. grid->dout = NULL;
  145. grid->wgt = NULL;
  146. if (level == 0){
  147. double mq = 0, mq_in, mq_out;
  148. int ncluster;
  149. double *deg_intra, *wgt, *dout;
  150. grid->deg_intra = gv_calloc(n, sizeof(double));
  151. deg_intra = grid->deg_intra;
  152. grid->wgt = gv_calloc(n, sizeof(double));
  153. wgt = grid->wgt;
  154. for (i = 0; i < n; i++){
  155. deg_intra[i] = 0;
  156. wgt[i] = 1.;
  157. }
  158. for (i = 0; i < n; i++) matching[i] = i;
  159. mq = get_mq(A, matching, &ncluster, &mq_in, &mq_out, &dout);
  160. fprintf(stderr,"ncluster = %d, mq = %f\n", ncluster, mq);
  161. grid->mq = mq;
  162. grid->mq_in = mq_in;
  163. grid->mq_out = mq_out;
  164. grid->dout = dout;
  165. grid->ncluster = ncluster;
  166. }
  167. return grid;
  168. }
  169. static void Multilevel_MQ_Clustering_delete(Multilevel_MQ_Clustering grid){
  170. if (!grid) return;
  171. if (grid->A){
  172. if (grid->level == 0) {
  173. if (grid->delete_top_level_A) SparseMatrix_delete(grid->A);
  174. } else {
  175. SparseMatrix_delete(grid->A);
  176. }
  177. }
  178. SparseMatrix_delete(grid->P);
  179. free(grid->matching);
  180. free(grid->deg_intra);
  181. free(grid->dout);
  182. free(grid->wgt);
  183. Multilevel_MQ_Clustering_delete(grid->next);
  184. free(grid);
  185. }
  186. DEFINE_LIST(ints, int)
  187. static Multilevel_MQ_Clustering Multilevel_MQ_Clustering_establish(Multilevel_MQ_Clustering grid, int maxcluster){
  188. int *matching = grid->matching;
  189. SparseMatrix A = grid->A;
  190. int n = grid->n, level = grid->level, nc = 0, nclusters = n;
  191. double mq = 0, mq_in = 0, mq_out = 0, mq_new, mq_in_new, mq_out_new, mq_max = 0, mq_in_max = 0, mq_out_max = 0;
  192. int *ia = A->ia, *ja = A->ja;
  193. double amax = 0;
  194. double *deg_intra = grid->deg_intra, *wgt = grid->wgt;
  195. int i, j, k, jj, jc, jmax;
  196. double gain = 0, *dout = grid->dout, deg_in_i, deg_in_j, wgt_i, wgt_j, a_ij, dout_i, dout_j, dout_max = 0, wgt_jmax = 0;
  197. double maxgain = 0;
  198. double total_gain = 0;
  199. ints_t *neighbors = gv_calloc(n, sizeof(ints_t));
  200. mq = grid->mq;
  201. mq_in = grid->mq_in;
  202. mq_out = grid->mq_out;
  203. double *deg_intra_new = gv_calloc(n, sizeof(double));
  204. double *wgt_new = gv_calloc(n, sizeof(double));
  205. double *deg_inter = gv_calloc(n, sizeof(double));
  206. int *mask = gv_calloc(n, sizeof(int));
  207. double *dout_new = gv_calloc(n, sizeof(double));
  208. for (i = 0; i < n; i++) mask[i] = -1;
  209. assert(n == A->n);
  210. for (i = 0; i < n; i++) matching[i] = UNMATCHED;
  211. /* gain in merging node A into cluster B is
  212. mq_in_new = mq_in - |E(A,A)|/(V(A))^2 - |E(B,B)|/(V(B))^2 + (|E(A,A)|+|E(B,B)|+|E(A,B)|)/(|V(A)|+|V(B)|)^2
  213. . = mq_in - deg_intra(A)/|A|^2 - deg_intra(B)/|B|^2 + (deg_intra(A)+deg_intra(B)+a(A,B))/(|A|+|B|)^2
  214. mq_out_new = mq_out - |E(A,B)|/(|V(A)|*V(B)|)-\sum_{C and A connected, C!=B} |E(A,C)|/(|V(A)|*|V(C)|)-\sum_{C and B connected,C!=B} |E(B,C)|/(|V(B)|*|V(C)|)
  215. . + \sum_{C connected to A or B, C!=A, C!=B} (|E(A,C)|+|E(B,C)|)/(|V(C)|*(|V(A)|+|V(B)|)
  216. . = mq_out + a(A,B)/(|A|*|B|)-\sum_{C and A connected} a(A,C)/(|A|*|C|)-\sum_{C and B connected} a(B,C)/(|B|*|C|)
  217. . + \sum_{C connected to A or B, C!=A, C!=B} (a(A,C)+a(B,C))/(|C|*(|A|+|B|))
  218. Denote:
  219. dout(i) = \sum_{j -- i} a(i,j)/|j|
  220. then
  221. mq_out_new = mq_out - |E(A,B)|/(|V(A)|*V(B)|)-\sum_{C and A connected, C!=B} |E(A,C)|/(|V(A)|*|V(C)|)-\sum_{C and B connected,C!=B} |E(B,C)|/(|V(B)|*|V(C)|)
  222. . + \sum_{C connected to A or B, C!=A, C!=B} (|E(A,C)|+|E(B,C)|)/(|V(C)|*(|V(A)|+|V(B)|)
  223. . = mq_out + a(A,B)/(|A|*|B|)-dout(A)/|A| - dout(B)/|B|
  224. . + (dout(A)+dout(B))/(|A|+|B|) - (a(A,B)/|A|+a(A,B)/|B|)/(|A|+|B|)
  225. . = mq_out -dout(A)/|A| - dout(B)/|B| + (dout(A)+dout(B))/(|A|+|B|)
  226. after merging A and B into cluster AB,
  227. dout(AB) = dout(A) + dout(B);
  228. dout(C) := dout(C) - a(A,C)/|A| - a(B,C)/|B| + a(A,C)/(|A|+|B|) + a(B, C)/(|A|+|B|)
  229. mq_new = mq_in_new/(k-1) - mq_out_new/((k-1)*(k-2))
  230. gain = mq_new - mq
  231. */
  232. double *a = A->a;
  233. for (i = 0; i < n; i++){
  234. if (matching[i] != UNMATCHED) continue;
  235. /* accumulate connections between i and clusters */
  236. for (j = ia[i]; j < ia[i+1]; j++){
  237. jj = ja[j];
  238. if (jj == i) continue;
  239. if ((jc=matching[jj]) != UNMATCHED){
  240. if (mask[jc] != i) {
  241. mask[jc] = i;
  242. deg_inter[jc] = a[j];
  243. } else {
  244. deg_inter[jc] += a[j];
  245. }
  246. }
  247. }
  248. deg_in_i = deg_intra[i];
  249. wgt_i = wgt[i];
  250. dout_i = dout[i];
  251. maxgain = 0;
  252. jmax = -1;
  253. for (j = ia[i]; j < ia[i+1]; j++){
  254. jj = ja[j];
  255. if (jj == i) continue;
  256. jc = matching[jj];
  257. if (jc == UNMATCHED){
  258. a_ij = a[j];
  259. wgt_j = wgt[jj];
  260. deg_in_j = deg_intra[jj];
  261. dout_j = dout[jj];
  262. } else if (deg_inter[jc] < 0){
  263. continue;
  264. } else {
  265. a_ij = deg_inter[jc];
  266. wgt_j = wgt_new[jc];
  267. deg_inter[jc] = -1; // so that we do not redo the calculation when we hit another neighbor in cluster jc
  268. deg_in_j = deg_intra_new[jc];
  269. dout_j = dout_new[jc];
  270. }
  271. mq_in_new = mq_in - deg_in_i/pow(wgt_i, 2) - deg_in_j/pow(wgt_j,2)
  272. + (deg_in_i + deg_in_j + a_ij)/pow(wgt_i + wgt_j,2);
  273. mq_out_new = mq_out - dout_i/wgt_i - dout_j/wgt_j + (dout_i + dout_j)/(wgt_i + wgt_j);
  274. if (nclusters > 2){
  275. mq_new = 2*(mq_in_new/(nclusters - 1) - mq_out_new/((nclusters - 1)*(nclusters - 2)));
  276. } else {
  277. mq_new = 2*mq_in_new/(nclusters - 1);
  278. }
  279. #ifdef DEBUG
  280. {int ncluster;
  281. double mq2, mq_in2, mq_out2, *dout2;
  282. int nc2 = nc;
  283. int *matching2 = gv_calloc(A->m, sizeof(int));
  284. memcpy(matching2, matching, sizeof(double)*A->m);
  285. if (jc != UNMATCHED) {
  286. matching2[i] = jc;
  287. } else {
  288. matching2[i] = nc2;
  289. matching2[jj] = nc2;
  290. nc2++;
  291. }
  292. for (k = 0; k < n; k++) if (matching2[k] == UNMATCHED) matching2[k] =nc2++;
  293. mq2 = get_mq(A, matching2, &ncluster, &mq_in2, &mq_out2, &dout2);
  294. fprintf(stderr," {dout_i, dout_j}={%f,%f}, {predicted, calculated}: mq = {%f, %f}, mq_in ={%f,%f}, mq_out = {%f,%f}\n",dout_i, dout_j, mq_new, mq2, mq_in_new, mq_in2, mq_out_new, mq_out2);
  295. mq_new = mq2;
  296. }
  297. #endif
  298. gain = mq_new - mq;
  299. if (Verbose) fprintf(stderr,"gain in merging node %d with node %d = %f-%f = %f\n", i, jj, mq, mq_new, gain);
  300. if (j == ia[i] || gain > maxgain){
  301. maxgain = gain;
  302. jmax = jj;
  303. amax = a_ij;
  304. dout_max = dout_j;
  305. wgt_jmax = wgt_j;
  306. mq_max = mq_new;
  307. mq_in_max = mq_in_new;
  308. mq_out_max = mq_out_new;
  309. }
  310. }
  311. /* now merge i and jmax */
  312. if (maxgain > 0 || (nc >= 1 && nc > maxcluster)){
  313. total_gain += maxgain;
  314. jc = matching[jmax];
  315. if (jc == UNMATCHED){
  316. fprintf(stderr, "maxgain=%f, merge %d, %d\n",maxgain, i, jmax);
  317. ints_append(&neighbors[nc], jmax);
  318. ints_append(&neighbors[nc], i);
  319. dout_new[nc] = dout_i + dout_max;
  320. matching[i] = matching[jmax] = nc;
  321. wgt_new[nc] = wgt[i] + wgt[jmax];
  322. deg_intra_new[nc] = deg_intra[i] + deg_intra[jmax] + amax;
  323. nc++;
  324. } else {
  325. fprintf(stderr,"maxgain=%f, merge with existing cluster %d, %d\n",maxgain, i, jc);
  326. ints_append(&neighbors[jc], i);
  327. dout_new[jc] = dout_i + dout_max;
  328. wgt_new[jc] += wgt[i];
  329. matching[i] = jc;
  330. deg_intra_new[jc] += deg_intra[i] + amax;
  331. }
  332. mq = mq_max;
  333. mq_in = mq_in_max;
  334. mq_out = mq_out_max;
  335. nclusters--;
  336. } else {
  337. fprintf(stderr,"gain: %f -- no gain, skip merging node %d\n", maxgain, i);
  338. assert(maxgain <= 0);
  339. ints_append(&neighbors[nc], i);
  340. matching[i] = nc;
  341. deg_intra_new[nc] = deg_intra[i];
  342. wgt_new[nc] = wgt[i];
  343. nc++;
  344. }
  345. /* update scaled outdegree of neighbors of i and its merged node/cluster jmax */
  346. jc = matching[i];
  347. for (size_t l = ints_size(&neighbors[jc]) - 1; l != SIZE_MAX; --l) {
  348. mask[ints_get(&neighbors[jc], l)] = n + i;
  349. }
  350. for (size_t l = ints_size(&neighbors[jc]) - 1; l != SIZE_MAX; --l) {
  351. k = ints_get(&neighbors[jc], l);
  352. for (j = ia[k]; j < ia[k+1]; j++){
  353. jj = ja[j];
  354. if (mask[jj] == n+i) continue;/* link to within cluster */
  355. if ((jc = matching[jj]) == UNMATCHED){
  356. if (k == i){
  357. dout[jj] += -a[j]/wgt_i + a[j]/(wgt_i + wgt_jmax);
  358. } else {
  359. dout[jj] += -a[j]/wgt_jmax + a[j]/(wgt_i + wgt_jmax);
  360. }
  361. } else {
  362. if (k == i){
  363. dout_new[jc] += -a[j]/wgt_i + a[j]/(wgt_i + wgt_jmax);
  364. } else {
  365. dout_new[jc] += -a[j]/wgt_jmax + a[j]/(wgt_i + wgt_jmax);
  366. }
  367. }
  368. }
  369. }
  370. }
  371. fprintf(stderr,"verbose=%d\n",Verbose);
  372. if (Verbose) fprintf(stderr,"mq = %f new mq = %f level = %d, n = %d, nc = %d, gain = %g, mq_in = %f, mq_out = %f\n", mq, mq + total_gain,
  373. level, n, nc, total_gain, mq_in, mq_out);
  374. #ifdef DEBUG
  375. {int ncluster;
  376. mq = get_mq(A, matching, &ncluster, &mq_in, &mq_out, &dout);
  377. fprintf(stderr," mq = %f\n",mq);
  378. }
  379. #endif
  380. if (nc >= 1 && (total_gain > 0 || nc < n)){
  381. /* now set up restriction and prolongation operator */
  382. SparseMatrix P, R, R0, B, cA;
  383. double one = 1.;
  384. Multilevel_MQ_Clustering cgrid;
  385. R0 = SparseMatrix_new(nc, n, 1, MATRIX_TYPE_REAL, FORMAT_COORD);
  386. for (i = 0; i < n; i++){
  387. jj = matching[i];
  388. SparseMatrix_coordinate_form_add_entry(R0, jj, i, &one);
  389. }
  390. R = SparseMatrix_from_coordinate_format(R0);
  391. SparseMatrix_delete(R0);
  392. P = SparseMatrix_transpose(R);
  393. B = SparseMatrix_multiply(R, A);
  394. SparseMatrix_delete(R);
  395. if (!B) {
  396. free(deg_intra_new);
  397. free(wgt_new);
  398. free(dout_new);
  399. goto RETURN;
  400. }
  401. cA = SparseMatrix_multiply(B, P);
  402. SparseMatrix_delete(B);
  403. if (!cA) {
  404. free(deg_intra_new);
  405. free(wgt_new);
  406. free(dout_new);
  407. goto RETURN;
  408. }
  409. grid->P = P;
  410. level++;
  411. cgrid = Multilevel_MQ_Clustering_init(cA, level);
  412. deg_intra_new = gv_recalloc(deg_intra_new, n, nc, sizeof(double));
  413. wgt_new = gv_recalloc(wgt_new, n, nc, sizeof(double));
  414. cgrid->deg_intra = deg_intra_new;
  415. cgrid->mq = grid->mq + total_gain;
  416. cgrid->wgt = wgt_new;
  417. dout_new = gv_recalloc(dout_new, n, nc, sizeof(double));
  418. cgrid->dout = dout_new;
  419. cgrid = Multilevel_MQ_Clustering_establish(cgrid, maxcluster);
  420. grid->next = cgrid;
  421. cgrid->prev = grid;
  422. } else {
  423. /* no more improvement, stop and final clustering found */
  424. for (i = 0; i < n; i++) matching[i] = i;
  425. free(deg_intra_new);
  426. free(wgt_new);
  427. free(dout_new);
  428. }
  429. RETURN:
  430. for (i = 0; i < n; i++) ints_free(&neighbors[i]);
  431. free(neighbors);
  432. free(deg_inter);
  433. free(mask);
  434. return grid;
  435. }
  436. static Multilevel_MQ_Clustering Multilevel_MQ_Clustering_new(SparseMatrix A0, int maxcluster){
  437. /* maxcluster is used to specify the maximum number of cluster desired, e.g., maxcluster=10 means that a maximum of 10 clusters
  438. is desired. this may not always be realized, and mq may be low when this is specified. Default: maxcluster = 0 */
  439. Multilevel_MQ_Clustering grid;
  440. SparseMatrix A = A0;
  441. if (maxcluster <= 0) maxcluster = A->m;
  442. if (!SparseMatrix_is_symmetric(A, false) || A->type != MATRIX_TYPE_REAL){
  443. A = SparseMatrix_get_real_adjacency_matrix_symmetrized(A);
  444. }
  445. grid = Multilevel_MQ_Clustering_init(A, 0);
  446. grid = Multilevel_MQ_Clustering_establish(grid, maxcluster);
  447. if (A != A0) grid->delete_top_level_A = true; // be sure to clean up later
  448. return grid;
  449. }
  450. static void hierachical_mq_clustering(SparseMatrix A, int maxcluster,
  451. int *nclusters, int **assignment, double *mq){
  452. /* find a clustering of vertices by maximize mq
  453. A: symmetric square matrix n x n. If real value, value will be used as edges weights, otherwise edge weights are considered as 1.
  454. maxcluster: used to specify the maximum number of cluster desired, e.g., maxcluster=10 means that a maximum of 10 clusters
  455. . is desired. this may not always be realized, and mq may be low when this is specified. Default: maxcluster = 0
  456. nclusters: on output the number of clusters
  457. assignment: dimension n. Node i is assigned to cluster "assignment[i]". 0 <= assignment < nclusters
  458. */
  459. Multilevel_MQ_Clustering grid, cgrid;
  460. int *matching, i;
  461. SparseMatrix P;
  462. assert(A->m == A->n);
  463. *mq = 0.;
  464. grid = Multilevel_MQ_Clustering_new(A, maxcluster);
  465. /* find coarsest */
  466. cgrid = grid;
  467. while (cgrid->next){
  468. cgrid = cgrid->next;
  469. }
  470. /* project clustering up */
  471. double *u = gv_calloc(cgrid->n, sizeof(double));
  472. for (i = 0; i < cgrid->n; i++) u[i] = (double) (cgrid->matching)[i];
  473. *nclusters = cgrid->n;
  474. *mq = cgrid->mq;
  475. while (cgrid->prev){
  476. double *v = NULL;
  477. P = cgrid->prev->P;
  478. SparseMatrix_multiply_vector(P, u, &v);
  479. free(u);
  480. u = v;
  481. cgrid = cgrid->prev;
  482. }
  483. if (*assignment){
  484. matching = *assignment;
  485. } else {
  486. matching = gv_calloc(grid->n, sizeof(int));
  487. *assignment = matching;
  488. }
  489. for (i = 0; i < grid->n; i++) (matching)[i] = (int) u[i];
  490. free(u);
  491. Multilevel_MQ_Clustering_delete(grid);
  492. }
  493. void mq_clustering(SparseMatrix A, int maxcluster,
  494. int *nclusters, int **assignment, double *mq){
  495. /* find a clustering of vertices by maximize mq
  496. A: symmetric square matrix n x n. If real value, value will be used as edges weights, otherwise edge weights are considered as 1.
  497. maxcluster: used to specify the maximum number of cluster desired, e.g., maxcluster=10 means that a maximum of 10 clusters
  498. . is desired. this may not always be realized, and mq may be low when this is specified. Default: maxcluster = 0
  499. nclusters: on output the number of clusters
  500. assignment: dimension n. Node i is assigned to cluster "assignment[i]". 0 <= assignment < nclusters
  501. */
  502. SparseMatrix B;
  503. assert(A->m == A->n);
  504. B = SparseMatrix_symmetrize(A, false);
  505. if (B == A) {
  506. B = SparseMatrix_copy(A);
  507. }
  508. B = SparseMatrix_remove_diagonal(B);
  509. if (B->type != MATRIX_TYPE_REAL) B = SparseMatrix_set_entries_to_real_one(B);
  510. hierachical_mq_clustering(B, maxcluster, nclusters, assignment, mq);
  511. if (B != A) SparseMatrix_delete(B);
  512. }