dotmcl.pl 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. #!perl -w
  2. # dotmcl.pl <factor> <in.gv> <out.gv>
  3. # <factor> the bigger, the more clusters (values from 1.2 to 3.0)
  4. # <in.gv> dot in file to clusterize
  5. # <out.gv> dot out file with clusters added
  6. # Vladimir Alexiev <[email protected]>
  7. # This quick hack takes a dot graph description file, adds clusters using the
  8. # mcl utility, and writes to stdout a dot graph amended with "subgraph
  9. # cluster_n" declarations that put the nodes in clusters.
  10. # dot: http://www.research.att.com/sw/tools/graphviz
  11. # mcl: http://members.ams.chello.nl/svandong/thesis/
  12. # It passes the input file through "dot -Tplain" so it can parse it more
  13. # easily. Restrictions on the input file:
  14. # - node names must be plain \w+
  15. # - the closing "}" must be on the last line, in first position, and
  16. # MUST be the only closing brace in first position in the whole file
  17. my $factor = shift;
  18. my $dotin = shift; # why not stdin: because need to read twice
  19. my $dotout = shift; # why not stdout: because DOS commingles mcl's stderr into
  20. # dotmcl's stdout
  21. my (@nodes, %node_index, @graph);
  22. for (`dot -Tplain $dotin`) {
  23. /^node (\w+)/ and do {
  24. push @nodes, $1;
  25. $node_index{$1} = $#nodes;
  26. next
  27. };
  28. /^edge (\w+) (\w+)/ and do {
  29. $graph[$node_index{$1}][$node_index{$2}] = 1;
  30. $graph[$node_index{$2}][$node_index{$1}] = 1;
  31. # dot handles digraphs but mcl handles undirected graphs
  32. next
  33. }
  34. }
  35. my $nodes = @nodes;
  36. my $mclin = "dotmcl-in.tmp";
  37. my $mclout = "dotmcl-out.tmp";
  38. open (MCLIN, ">$mclin") or die "can't create $mclin: $!\n";
  39. # mcl is a nice program but its input format sucks!
  40. print MCLIN << "MCLHEADER";
  41. (mclheader
  42. mcltype matrix
  43. dimensions $ {nodes}x$ {nodes}
  44. )
  45. (mclmatrix
  46. begin
  47. MCLHEADER
  48. for (my $i=0; $i<$nodes; $i++) {
  49. print MCLIN $i," ";
  50. for (my $j=0; $j<$nodes; $j++)
  51. {print MCLIN $j," " if $graph[$i][$j]};
  52. print MCLIN "\$\n";
  53. }
  54. print MCLIN << "MCLFOOTER";
  55. )
  56. MCLFOOTER
  57. close(MCLIN);
  58. system("mcl $mclin --silent -v mcl -I $factor -o $mclout")==0 or
  59. die "can't run 'mcl $mclin -o $mclout: $!\n";
  60. # read in clusters
  61. my @cluster;
  62. # mcl output format sucks even worse because it can have "continuation lines"
  63. # like this:
  64. # (mclmatrix
  65. # begin
  66. # 0 0 1 2 3 4 5 6 7 8 9 12 13 14 15 16 18 19 20
  67. # 86 87 88 89 90 91 92 94 95 96 98 105 106 107 109 110 112 113
  68. # 115 116 117 118 121 125 127 128 131 132 133 137 138 145 $
  69. # 1 28 29 31 32 33 34 35 36 78 93 129 130 134 135 136 $
  70. # )
  71. open (MCLOUT, $mclout) or die "can't open $mclout: $!\n";
  72. my $line = '';
  73. for (<MCLOUT>) {
  74. /^\d+(.+)/ and $line = $1;
  75. /^ +\d+/ and $line .= $_;
  76. /\$$/ and do {
  77. $line =~ s/\$$//;
  78. my @cl = split' ', $line;
  79. push @cluster, [@cl]
  80. unless @cl <= 1; # don't want trivial clusters
  81. }
  82. }
  83. close(MCLOUT);
  84. open (DOTIN, $dotin) or die "can't open $dotin: $!\n";
  85. open (DOTOUT, ">$dotout") or die "can't create $dotout: $!\n";
  86. for (<DOTIN>) {
  87. /^\}$/ and last;
  88. print DOTOUT;
  89. }
  90. for (my $i=0; $i<@cluster; $i++) {
  91. print DOTOUT " subgraph cluster_$i {label=\"\" ";
  92. for (@{$cluster[$i]}) {print DOTOUT " $nodes[$_]"};
  93. print DOTOUT "}\n";
  94. }
  95. print DOTOUT "}\n";
  96. close(DOTOUT);
  97. close(DOTIN);