reformat_dat_file.pl 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312
  1. #!/usr/bin/perl
  2. #----------------------------------------------------------------------
  3. #
  4. # reformat_dat_file.pl
  5. # Perl script that reads in catalog data file(s) and writes out
  6. # functionally equivalent file(s) in a standard format.
  7. #
  8. # In each entry of a reformatted file, metadata fields (if present)
  9. # come first, with normal attributes starting on the following line,
  10. # in the same order as the columns of the corresponding catalog.
  11. # Comments and blank lines are preserved.
  12. #
  13. # Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
  14. # Portions Copyright (c) 1994, Regents of the University of California
  15. #
  16. # src/include/catalog/reformat_dat_file.pl
  17. #
  18. #----------------------------------------------------------------------
  19. use strict;
  20. use warnings;
  21. use FindBin;
  22. use Getopt::Long;
  23. # If you copy this script to somewhere other than src/include/catalog,
  24. # you'll need to modify this "use lib" or provide a suitable -I switch.
  25. use lib "$FindBin::RealBin/../../backend/catalog/";
  26. use Catalog;
  27. # Names of the metadata fields of a catalog entry.
  28. # Note: oid is a normal column from a storage perspective, but it's more
  29. # important than the rest, so it's listed first among the metadata fields.
  30. # Note: line_number is also a metadata field, but we never write it out,
  31. # so it's not listed here.
  32. my @METADATA =
  33. ('oid', 'oid_symbol', 'array_type_oid', 'descr', 'autogenerated');
  34. # Process command line switches.
  35. my $output_path = '';
  36. my $full_tuples = 0;
  37. GetOptions(
  38. 'output=s' => \$output_path,
  39. 'full-tuples' => \$full_tuples) || usage();
  40. # Sanity check arguments.
  41. die "No input files.\n" unless @ARGV;
  42. # Make sure output_path ends in a slash.
  43. if ($output_path ne '' && substr($output_path, -1) ne '/')
  44. {
  45. $output_path .= '/';
  46. }
  47. # Read all the input files into internal data structures.
  48. # We pass data file names as arguments and then look for matching
  49. # headers to parse the schema from.
  50. my %catalogs;
  51. my %catalog_data;
  52. my @catnames;
  53. foreach my $datfile (@ARGV)
  54. {
  55. $datfile =~ /(.+)\.dat$/
  56. or die "Input files need to be data (.dat) files.\n";
  57. my $header = "$1.h";
  58. die "There in no header file corresponding to $datfile"
  59. if !-e $header;
  60. my $catalog = Catalog::ParseHeader($header);
  61. my $catname = $catalog->{catname};
  62. my $schema = $catalog->{columns};
  63. push @catnames, $catname;
  64. $catalogs{$catname} = $catalog;
  65. $catalog_data{$catname} = Catalog::ParseData($datfile, $schema, 1);
  66. }
  67. ########################################################################
  68. # At this point, we have read all the data. If you are modifying this
  69. # script for bulk editing, this is a good place to build lookup tables,
  70. # if you need to. In the following example, the "next if !ref $row"
  71. # check below is a hack to filter out non-hash objects. This is because
  72. # we build the lookup tables from data that we read using the
  73. # "preserve_formatting" parameter.
  74. #
  75. ##Index access method lookup.
  76. #my %amnames;
  77. #foreach my $row (@{ $catalog_data{pg_am} })
  78. #{
  79. # next if !ref $row;
  80. # $amnames{$row->{oid}} = $row->{amname};
  81. #}
  82. ########################################################################
  83. # Write the data.
  84. foreach my $catname (@catnames)
  85. {
  86. my $catalog = $catalogs{$catname};
  87. my @attnames;
  88. my $schema = $catalog->{columns};
  89. foreach my $column (@$schema)
  90. {
  91. my $attname = $column->{name};
  92. # We may have ordinary columns at the storage level that we still
  93. # want to format as a special value. Exclude these from the column
  94. # list so they are not written twice.
  95. push @attnames, $attname
  96. if !(grep { $_ eq $attname } @METADATA);
  97. }
  98. # Write output files to specified directory.
  99. my $datfile = "$output_path$catname.dat";
  100. open my $dat, '>', $datfile
  101. or die "can't open $datfile: $!";
  102. foreach my $data (@{ $catalog_data{$catname} })
  103. {
  104. # Hash ref representing a data entry.
  105. if (ref $data eq 'HASH')
  106. {
  107. my %values = %$data;
  108. ############################################################
  109. # At this point we have the full tuple in memory as a hash
  110. # and can do any operations we want. As written, it only
  111. # removes default values, but this script can be adapted to
  112. # do one-off bulk-editing.
  113. ############################################################
  114. if (!$full_tuples)
  115. {
  116. # If it's an autogenerated entry, drop it completely.
  117. next if $values{autogenerated};
  118. # Else, just drop any default/computed fields.
  119. strip_default_values(\%values, $schema, $catname);
  120. }
  121. print $dat "{";
  122. # Separate out metadata fields for readability.
  123. my $metadata_str = format_hash(\%values, @METADATA);
  124. if ($metadata_str)
  125. {
  126. print $dat $metadata_str;
  127. # User attributes start on next line.
  128. print $dat ",\n ";
  129. }
  130. my $data_str = format_hash(\%values, @attnames);
  131. print $dat $data_str;
  132. print $dat " },\n";
  133. }
  134. # Preserve blank lines.
  135. elsif ($data =~ /^\s*$/)
  136. {
  137. print $dat "\n";
  138. }
  139. # Preserve comments or brackets that are on their own line.
  140. elsif ($data =~ /^\s*(\[|\]|#.*?)\s*$/)
  141. {
  142. print $dat "$1\n";
  143. }
  144. }
  145. close $dat;
  146. }
  147. # Remove column values for which there is a matching default,
  148. # or if the value can be computed from other columns.
  149. sub strip_default_values
  150. {
  151. my ($row, $schema, $catname) = @_;
  152. # Delete values that match defaults.
  153. foreach my $column (@$schema)
  154. {
  155. my $attname = $column->{name};
  156. # It's okay if we have no oid value, since it will be assigned
  157. # automatically before bootstrap.
  158. die "strip_default_values: $catname.$attname undefined\n"
  159. if !defined $row->{$attname} and $attname ne 'oid';
  160. if (defined $column->{default}
  161. and ($row->{$attname} eq $column->{default}))
  162. {
  163. delete $row->{$attname};
  164. }
  165. }
  166. # Delete computed values. See AddDefaultValues() in Catalog.pm.
  167. # Note: This must be done after deleting values matching defaults.
  168. if ($catname eq 'pg_proc')
  169. {
  170. delete $row->{pronargs} if defined $row->{proargtypes};
  171. }
  172. # If a pg_type entry has an auto-generated array type, then its
  173. # typarray field is a computed value too (see GenerateArrayTypes).
  174. if ($catname eq 'pg_type')
  175. {
  176. delete $row->{typarray} if defined $row->{array_type_oid};
  177. }
  178. return;
  179. }
  180. # Format the individual elements of a Perl hash into a valid string
  181. # representation. We do this ourselves, rather than use native Perl
  182. # facilities, so we can keep control over the exact formatting of the
  183. # data files.
  184. sub format_hash
  185. {
  186. my $data = shift;
  187. my @orig_attnames = @_;
  188. # Copy attname to new array if it has a value, so we can determine
  189. # the last populated element. We do this because we may have default
  190. # values or empty metadata fields.
  191. my @attnames;
  192. foreach my $orig_attname (@orig_attnames)
  193. {
  194. push @attnames, $orig_attname
  195. if defined $data->{$orig_attname};
  196. }
  197. # When calling this function, we ether have an open-bracket or a
  198. # leading space already.
  199. my $char_count = 1;
  200. my $threshold;
  201. my $hash_str = '';
  202. my $element_count = 0;
  203. foreach my $attname (@attnames)
  204. {
  205. $element_count++;
  206. # To limit the line to 80 chars, we need to account for the
  207. # trailing characters.
  208. if ($element_count == $#attnames + 1)
  209. {
  210. # Last element, so allow space for ' },'
  211. $threshold = 77;
  212. }
  213. else
  214. {
  215. # Just need space for trailing comma
  216. $threshold = 79;
  217. }
  218. if ($element_count > 1)
  219. {
  220. $hash_str .= ',';
  221. $char_count++;
  222. }
  223. my $value = $data->{$attname};
  224. # Escape single quotes.
  225. $value =~ s/'/\\'/g;
  226. # Include a leading space in the key-value pair, since this will
  227. # always go after either a comma or an additional padding space on
  228. # the next line.
  229. my $element = " $attname => '$value'";
  230. my $element_length = length($element);
  231. # If adding the element to the current line would expand the line
  232. # beyond 80 chars, put it on the next line. We don't do this for
  233. # the first element, since that would create a blank line.
  234. if ($element_count > 1 and $char_count + $element_length > $threshold)
  235. {
  236. # Put on next line with an additional space preceding. There
  237. # are now two spaces in front of the key-value pair, lining
  238. # it up with the line above it.
  239. $hash_str .= "\n $element";
  240. $char_count = $element_length + 1;
  241. }
  242. else
  243. {
  244. $hash_str .= $element;
  245. $char_count += $element_length;
  246. }
  247. }
  248. return $hash_str;
  249. }
  250. sub usage
  251. {
  252. die <<EOM;
  253. Usage: reformat_dat_file.pl [options] datafile...
  254. Options:
  255. --output PATH output directory (default '.')
  256. --full-tuples write out full tuples, including default values
  257. Non-option arguments are the names of input .dat files.
  258. Updated files are written to the output directory,
  259. possibly overwriting the input files.
  260. EOM
  261. }