distribution.c 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247
  1. /********************************************************************
  2. * *
  3. * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE. *
  4. * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
  5. * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
  6. * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
  7. * *
  8. * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2001 *
  9. * by the Xiph.Org Foundation https://xiph.org/ *
  10. * *
  11. ********************************************************************
  12. function: utility for finding the distribution in a data set
  13. ********************************************************************/
  14. #include <stdlib.h>
  15. #include <stdio.h>
  16. #include <math.h>
  17. #include <string.h>
  18. #include <errno.h>
  19. #include "bookutil.h"
  20. /* command line:
  21. distribution file.vqd
  22. */
  23. int ascend(const void *a,const void *b){
  24. return(**((long **)a)-**((long **)b));
  25. }
  26. int main(int argc,char *argv[]){
  27. FILE *in;
  28. long lines=0;
  29. float min;
  30. float max;
  31. long bins=-1;
  32. int flag=0;
  33. long *countarray;
  34. long total=0;
  35. char *line;
  36. if(argv[1]==NULL){
  37. fprintf(stderr,"Usage: distribution {data.vqd [bins]| book.vqh} \n\n");
  38. exit(1);
  39. }
  40. if(argv[2]!=NULL)
  41. bins=atoi(argv[2])-1;
  42. in=fopen(argv[1],"r");
  43. if(!in){
  44. fprintf(stderr,"Could not open input file %s\n",argv[1]);
  45. exit(1);
  46. }
  47. if(strrchr(argv[1],'.') && strcmp(strrchr(argv[1],'.'),".vqh")==0){
  48. /* load/decode a book */
  49. codebook *b=codebook_load(argv[1]);
  50. static_codebook *c=(static_codebook *)(b->c);
  51. float delta;
  52. int i;
  53. fclose(in);
  54. switch(c->maptype){
  55. case 0:
  56. printf("entropy codebook only; no mappings\n");
  57. exit(0);
  58. break;
  59. case 1:
  60. bins=_book_maptype1_quantvals(c);
  61. break;
  62. case 2:
  63. bins=c->entries*c->dim;
  64. break;
  65. }
  66. max=min=_float32_unpack(c->q_min);
  67. delta=_float32_unpack(c->q_delta);
  68. for(i=0;i<bins;i++){
  69. float val=c->quantlist[i]*delta+min;
  70. if(val>max)max=val;
  71. }
  72. printf("Minimum scalar value: %f\n",min);
  73. printf("Maximum scalar value: %f\n",max);
  74. switch(c->maptype){
  75. case 1:
  76. {
  77. /* lattice codebook. dump it. */
  78. int j,k;
  79. long maxcount=0;
  80. long **sort=calloc(bins,sizeof(long *));
  81. long base=c->lengthlist[0];
  82. countarray=calloc(bins,sizeof(long));
  83. for(i=0;i<bins;i++)sort[i]=c->quantlist+i;
  84. qsort(sort,bins,sizeof(long *),ascend);
  85. for(i=0;i<b->entries;i++)
  86. if(c->lengthlist[i]>base)base=c->lengthlist[i];
  87. /* dump a full, correlated count */
  88. for(j=0;j<b->entries;j++){
  89. if(c->lengthlist[j]){
  90. int indexdiv=1;
  91. printf("%4d: ",j);
  92. for(k=0;k<b->dim;k++){
  93. int index= (j/indexdiv)%bins;
  94. printf("%+3.1f,", c->quantlist[index]*_float32_unpack(c->q_delta)+
  95. _float32_unpack(c->q_min));
  96. indexdiv*=bins;
  97. }
  98. printf("\t|");
  99. for(k=0;k<base-c->lengthlist[j];k++)printf("*");
  100. printf("\n");
  101. }
  102. }
  103. /* do a rough count */
  104. for(j=0;j<b->entries;j++){
  105. int indexdiv=1;
  106. for(k=0;k<b->dim;k++){
  107. if(c->lengthlist[j]){
  108. int index= (j/indexdiv)%bins;
  109. countarray[index]+=(1<<(base-c->lengthlist[j]));
  110. indexdiv*=bins;
  111. }
  112. }
  113. }
  114. /* dump the count */
  115. {
  116. long maxcount=0,i,j;
  117. for(i=0;i<bins;i++)
  118. if(countarray[i]>maxcount)maxcount=countarray[i];
  119. for(i=0;i<bins;i++){
  120. int ptr=sort[i]-c->quantlist;
  121. int stars=rint(50./maxcount*countarray[ptr]);
  122. printf("%+08f (%8ld) |",c->quantlist[ptr]*delta+min,countarray[ptr]);
  123. for(j=0;j<stars;j++)printf("*");
  124. printf("\n");
  125. }
  126. }
  127. }
  128. break;
  129. case 2:
  130. {
  131. /* trained, full mapping codebook. */
  132. printf("Can't do probability dump of a trained [type 2] codebook (yet)\n");
  133. }
  134. break;
  135. }
  136. }else{
  137. /* load/count a data file */
  138. /* do it the simple way; two pass. */
  139. line=setup_line(in);
  140. while(line){
  141. float code;
  142. char buf[80];
  143. lines++;
  144. sprintf(buf,"getting min/max (%.2f::%.2f). lines...",min,max);
  145. if(!(lines&0xff))spinnit(buf,lines);
  146. while(!flag && sscanf(line,"%f",&code)==1){
  147. line=strchr(line,',');
  148. min=max=code;
  149. flag=1;
  150. }
  151. while(line && sscanf(line,"%f",&code)==1){
  152. line=strchr(line,',');
  153. if(line)line++;
  154. if(code<min)min=code;
  155. if(code>max)max=code;
  156. }
  157. line=setup_line(in);
  158. }
  159. if(bins<1){
  160. if((int)(max-min)==min-max){
  161. bins=max-min;
  162. }else{
  163. bins=25;
  164. }
  165. }
  166. printf("\r \r");
  167. printf("Minimum scalar value: %f\n",min);
  168. printf("Maximum scalar value: %f\n",max);
  169. if(argv[2]){
  170. printf("\n counting hits into %ld bins...\n",bins+1);
  171. countarray=calloc(bins+1,sizeof(long));
  172. rewind(in);
  173. line=setup_line(in);
  174. while(line){
  175. float code;
  176. lines--;
  177. if(!(lines&0xff))spinnit("counting distribution. lines so far...",lines);
  178. while(line && sscanf(line,"%f",&code)==1){
  179. line=strchr(line,',');
  180. if(line)line++;
  181. code-=min;
  182. code/=(max-min);
  183. code*=bins;
  184. countarray[(int)rint(code)]++;
  185. total++;
  186. }
  187. line=setup_line(in);
  188. }
  189. /* make a pretty graph */
  190. {
  191. long maxcount=0,i,j;
  192. for(i=0;i<bins+1;i++)
  193. if(countarray[i]>maxcount)maxcount=countarray[i];
  194. printf("\r \r");
  195. printf("Total scalars: %ld\n",total);
  196. for(i=0;i<bins+1;i++){
  197. int stars=rint(50./maxcount*countarray[i]);
  198. printf("%08f (%8ld) |",(max-min)/bins*i+min,countarray[i]);
  199. for(j=0;j<stars;j++)printf("*");
  200. printf("\n");
  201. }
  202. }
  203. }
  204. fclose(in);
  205. }
  206. printf("\nDone.\n");
  207. exit(0);
  208. }