123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312 |
- #!/usr/bin/perl
- #----------------------------------------------------------------------
- #
- # reformat_dat_file.pl
- # Perl script that reads in catalog data file(s) and writes out
- # functionally equivalent file(s) in a standard format.
- #
- # In each entry of a reformatted file, metadata fields (if present)
- # come first, with normal attributes starting on the following line,
- # in the same order as the columns of the corresponding catalog.
- # Comments and blank lines are preserved.
- #
- # Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
- # Portions Copyright (c) 1994, Regents of the University of California
- #
- # src/include/catalog/reformat_dat_file.pl
- #
- #----------------------------------------------------------------------
- use strict;
- use warnings;
- use FindBin;
- use Getopt::Long;
- # If you copy this script to somewhere other than src/include/catalog,
- # you'll need to modify this "use lib" or provide a suitable -I switch.
- use lib "$FindBin::RealBin/../../backend/catalog/";
- use Catalog;
- # Names of the metadata fields of a catalog entry.
- # Note: oid is a normal column from a storage perspective, but it's more
- # important than the rest, so it's listed first among the metadata fields.
- # Note: line_number is also a metadata field, but we never write it out,
- # so it's not listed here.
- my @METADATA =
- ('oid', 'oid_symbol', 'array_type_oid', 'descr', 'autogenerated');
- # Process command line switches.
- my $output_path = '';
- my $full_tuples = 0;
- GetOptions(
- 'output=s' => \$output_path,
- 'full-tuples' => \$full_tuples) || usage();
- # Sanity check arguments.
- die "No input files.\n" unless @ARGV;
- # Make sure output_path ends in a slash.
- if ($output_path ne '' && substr($output_path, -1) ne '/')
- {
- $output_path .= '/';
- }
- # Read all the input files into internal data structures.
- # We pass data file names as arguments and then look for matching
- # headers to parse the schema from.
- my %catalogs;
- my %catalog_data;
- my @catnames;
- foreach my $datfile (@ARGV)
- {
- $datfile =~ /(.+)\.dat$/
- or die "Input files need to be data (.dat) files.\n";
- my $header = "$1.h";
- die "There in no header file corresponding to $datfile"
- if !-e $header;
- my $catalog = Catalog::ParseHeader($header);
- my $catname = $catalog->{catname};
- my $schema = $catalog->{columns};
- push @catnames, $catname;
- $catalogs{$catname} = $catalog;
- $catalog_data{$catname} = Catalog::ParseData($datfile, $schema, 1);
- }
- ########################################################################
- # At this point, we have read all the data. If you are modifying this
- # script for bulk editing, this is a good place to build lookup tables,
- # if you need to. In the following example, the "next if !ref $row"
- # check below is a hack to filter out non-hash objects. This is because
- # we build the lookup tables from data that we read using the
- # "preserve_formatting" parameter.
- #
- ##Index access method lookup.
- #my %amnames;
- #foreach my $row (@{ $catalog_data{pg_am} })
- #{
- # next if !ref $row;
- # $amnames{$row->{oid}} = $row->{amname};
- #}
- ########################################################################
- # Write the data.
- foreach my $catname (@catnames)
- {
- my $catalog = $catalogs{$catname};
- my @attnames;
- my $schema = $catalog->{columns};
- foreach my $column (@$schema)
- {
- my $attname = $column->{name};
- # We may have ordinary columns at the storage level that we still
- # want to format as a special value. Exclude these from the column
- # list so they are not written twice.
- push @attnames, $attname
- if !(grep { $_ eq $attname } @METADATA);
- }
- # Write output files to specified directory.
- my $datfile = "$output_path$catname.dat";
- open my $dat, '>', $datfile
- or die "can't open $datfile: $!";
- foreach my $data (@{ $catalog_data{$catname} })
- {
- # Hash ref representing a data entry.
- if (ref $data eq 'HASH')
- {
- my %values = %$data;
- ############################################################
- # At this point we have the full tuple in memory as a hash
- # and can do any operations we want. As written, it only
- # removes default values, but this script can be adapted to
- # do one-off bulk-editing.
- ############################################################
- if (!$full_tuples)
- {
- # If it's an autogenerated entry, drop it completely.
- next if $values{autogenerated};
- # Else, just drop any default/computed fields.
- strip_default_values(\%values, $schema, $catname);
- }
- print $dat "{";
- # Separate out metadata fields for readability.
- my $metadata_str = format_hash(\%values, @METADATA);
- if ($metadata_str)
- {
- print $dat $metadata_str;
- # User attributes start on next line.
- print $dat ",\n ";
- }
- my $data_str = format_hash(\%values, @attnames);
- print $dat $data_str;
- print $dat " },\n";
- }
- # Preserve blank lines.
- elsif ($data =~ /^\s*$/)
- {
- print $dat "\n";
- }
- # Preserve comments or brackets that are on their own line.
- elsif ($data =~ /^\s*(\[|\]|#.*?)\s*$/)
- {
- print $dat "$1\n";
- }
- }
- close $dat;
- }
- # Remove column values for which there is a matching default,
- # or if the value can be computed from other columns.
- sub strip_default_values
- {
- my ($row, $schema, $catname) = @_;
- # Delete values that match defaults.
- foreach my $column (@$schema)
- {
- my $attname = $column->{name};
- # It's okay if we have no oid value, since it will be assigned
- # automatically before bootstrap.
- die "strip_default_values: $catname.$attname undefined\n"
- if !defined $row->{$attname} and $attname ne 'oid';
- if (defined $column->{default}
- and ($row->{$attname} eq $column->{default}))
- {
- delete $row->{$attname};
- }
- }
- # Delete computed values. See AddDefaultValues() in Catalog.pm.
- # Note: This must be done after deleting values matching defaults.
- if ($catname eq 'pg_proc')
- {
- delete $row->{pronargs} if defined $row->{proargtypes};
- }
- # If a pg_type entry has an auto-generated array type, then its
- # typarray field is a computed value too (see GenerateArrayTypes).
- if ($catname eq 'pg_type')
- {
- delete $row->{typarray} if defined $row->{array_type_oid};
- }
- return;
- }
- # Format the individual elements of a Perl hash into a valid string
- # representation. We do this ourselves, rather than use native Perl
- # facilities, so we can keep control over the exact formatting of the
- # data files.
- sub format_hash
- {
- my $data = shift;
- my @orig_attnames = @_;
- # Copy attname to new array if it has a value, so we can determine
- # the last populated element. We do this because we may have default
- # values or empty metadata fields.
- my @attnames;
- foreach my $orig_attname (@orig_attnames)
- {
- push @attnames, $orig_attname
- if defined $data->{$orig_attname};
- }
- # When calling this function, we ether have an open-bracket or a
- # leading space already.
- my $char_count = 1;
- my $threshold;
- my $hash_str = '';
- my $element_count = 0;
- foreach my $attname (@attnames)
- {
- $element_count++;
- # To limit the line to 80 chars, we need to account for the
- # trailing characters.
- if ($element_count == $#attnames + 1)
- {
- # Last element, so allow space for ' },'
- $threshold = 77;
- }
- else
- {
- # Just need space for trailing comma
- $threshold = 79;
- }
- if ($element_count > 1)
- {
- $hash_str .= ',';
- $char_count++;
- }
- my $value = $data->{$attname};
- # Escape single quotes.
- $value =~ s/'/\\'/g;
- # Include a leading space in the key-value pair, since this will
- # always go after either a comma or an additional padding space on
- # the next line.
- my $element = " $attname => '$value'";
- my $element_length = length($element);
- # If adding the element to the current line would expand the line
- # beyond 80 chars, put it on the next line. We don't do this for
- # the first element, since that would create a blank line.
- if ($element_count > 1 and $char_count + $element_length > $threshold)
- {
- # Put on next line with an additional space preceding. There
- # are now two spaces in front of the key-value pair, lining
- # it up with the line above it.
- $hash_str .= "\n $element";
- $char_count = $element_length + 1;
- }
- else
- {
- $hash_str .= $element;
- $char_count += $element_length;
- }
- }
- return $hash_str;
- }
- sub usage
- {
- die <<EOM;
- Usage: reformat_dat_file.pl [options] datafile...
- Options:
- --output PATH output directory (default '.')
- --full-tuples write out full tuples, including default values
- Non-option arguments are the names of input .dat files.
- Updated files are written to the output directory,
- possibly overwriting the input files.
- EOM
- }
|