#!/usr/bin/perl -w # $Id: pks-db-lister,v 1.13 2004/09/23 21:14:40 jason Exp jason $ # This program can print words, times, fingerprints, and keyids from pks # databases, fetch a list of keys to a pipe/file, and print the sizes and # SHA-1 hashes over each keydb entry (usually single keys, except where # short (32bit) keyids collide). # NB: This program doesn't implement transactions or locking. It doesn't # update the dbs and opens them in read-only mode, so this should be OK. # Feel free to kill or suspend this program at any time. Want to see # just the first few records? Pipe the output through head(1) and # don't worry about it. use Getopt::Std; use BerkeleyDB; use Digest::SHA1; require "flush.pl"; $db_dir = $ENV{'pk'}; # we need a path to the dbs (if -d isn't given...) $record_count = 0; $grand_total_key_size = 0; $do_keys = 0; $do_times = 0; $do_words = 0; $do_prints = 0; $interpret_timestamps = 0; # arg processing... %opts = (); getopts ('d:kf:b:o:ptc:wm:nsviFxe:', \%opts); $db_file = $opts{'d'} if (defined ($opts{'d'})); $do_keys = 1 if (defined ($opts{'k'})); $do_times = 1 if (defined ($opts{'t'})); $do_words = 1 if (defined ($opts{'w'})); $do_prints = 1 if (defined ($opts{'p'})); $verbose = 1 if (defined ($opts{'v'})); $extrapolate = 1 if (defined ($opts{'x'})); $word = $opts{'m'}; $fetch = $opts{'f'}; $bypass = $opts{'b'}; $pipe = $opts{'o'}; $no_output = 1 if (defined ($opts{'n'})); $skip_fetch = 1 if (defined ($opts{'s'})); $interpret_timestamps = 1 if (defined ($opts{'i'})); $delta_file = $opts{'e'}; if (defined ($bypass)) { die "error: '-b ' to bypass keys is not fully implemented yet, sorry. exiting.\n"; } if (defined ($opts{'c'})) { $opts{'c'} =~ /^(\d+)(\D?)/; $time_compare = $1 + 0; if ($2 eq "d") { $time_multiplier = 86400; } elsif ($2 eq "h") { $time_multiplier = 3600; } elsif ($2 eq "m") { $time_multiplier = 60; } elsif ($2 eq "s") { $time_multiplier = 1; } } if (defined ($opts{'F'})) { $find_v4_rsa = 1; $skip_fetch = 0; # must fetch keys from db to see if they're v4 RSA undef $fetch; # if we knew the keyids, we wouldn't be using -F... $no_output = 1; # don't want regular output } die ("\$pk not set in envt.") if (!defined ($db_dir) && !defined ($db_file)); &usage() if (($do_keys + $do_times + $do_words + $do_prints) != 1); print STDERR "info: started at ", $^T, " seconds, ", scalar gmtime ($^T), " UTC\n"; do_keys(), stats(), exit(0) if $do_keys; do_times(), stats(), exit(0) if $do_times; do_words(), stats(), exit(0) if $do_words; do_prints(), stats(), exit(0) if $do_prints; # end of main() ############################################################################### sub usage () { local ($msg) = @_; print "error: $msg\n" if defined ($msg); print "usage: $0 \\\n"; print " [-k [-d ] [-f [-b ]] -o ] -F] OR \\\n"; print " [-p [-x]] OR \\\n"; print " [-t [-d ] [-l #{d,h,m,s}]] OR \\\n"; print " [-w [-m ] [-d ]] AND \\\n"; print " [-n] [-s] [-v] [-i]\n"; print " where: -k [-d ] = process keys from all keydbs or \n"; print " where: -f = fetch (short/32bit) keyids listed in \n"; print " where: -b = bypass fetching (short/32bit) keyids listed in \n"; print " where: -o = output keys to named file/pipe\n"; print " where: -F = find v4 RSA keys\n"; print " where: -p = print (raw) fingerprint data\n"; print " where: -x = extrapolate dupes and key versions\n"; print " where: -t [-d ] = print/count times from timedb or \n"; print " where: -c #[d,h,m,s] = only show times from absolute timestamp or\n"; print " last n days/hours/mins/secs\n"; print " where: -w [-m ] [-d ] = print/count or all words\n"; print " from wordb or \n"; print " where: -n = do not print keyids/times/words to stdout (stderr w/-o)\n"; print " where: -s = skip all data (record) fetches, just provide limited stats\n"; print " where: -v = be verbose when outputting data\n"; print " where: -i = interpret/convert (some) timestamps\n"; print "\n"; exit (1); } # usage() ############################################################################### sub stats { $now = time(); $runtime = $now - $^T; print STDERR "info: completed at ", $now, " seconds, ", scalar gmtime ($now), " UTC\n"; print STDERR "info: completed in $runtime second(s)\n\n"; # try to print some process status information... times() isn't enough. # for more data, use time(1) from the shell. # FIXME: make this optional and/or work on systems with different ps args? if (open (PS, "ps -uxwp $$|")) { while () { print STDERR "info: ps: $_"; } print STDERR "\n"; } } # stats() ############################################################################### sub do_keys { @db_files = (); # make a list of keydb files to operate on... if (!defined($db_file)) { $file = "$db_dir/num_keydb"; open (FILE, "< $file") or die "can't open $file for reading: $!"; $count = ; close (FILE); die "need a number from 1-999 (not $count) in $file" if ($count + 0 < 1 || $count + 0 > 999); for ($i = 0; $i < $count; $i++) { push @db_files, sprintf ("%s/keydb%03d", $db_dir, $i); } } else { push @db_files, $db_file; if (defined ($delta_file)) { open (DELTA, "< $delta_file") || die "error opening $delta_file"; } } if (defined ($bypass)) { $bp_count = 0; open (BYPASS, "< $bypass") || die "error opening $bypass"; while () { chomp; next if /^#/; next if /^$/; $bypass{$_} = 1; $bp_count++; } # while BYPASS print STDERR "info: bypassing $bp_count keys\n"; $bypass = 1; } else { $bypass = 0; } # tie contents of all db files to anonymous hashes stored in @db_th foreach $db_file (@db_files) { push @db_th, {}; tie %{$db_th[$#db_th]}, 'BerkeleyDB::Hash', -Filename => "$db_file", -Flags => DB_RDONLY or die "can't open $db_file: $!"; } fetch_keys(), return if (defined($fetch)); open (PIPE, "> $pipe") || die "can't open $pipe for writing" if defined ($pipe); delta_keys($db_th[0]), return if (defined ($delta_file)); foreach $th (@db_th) { process_keydb($th); } printf STDERR "info: found %7d keys total\n", $record_count; printf STDERR "info: grand total key size: $grand_total_key_size bytes (%1.2f MB)\n", $grand_total_key_size / (1024*1024) if (!$skip_fetch); } # do_keys() ############################################################################### sub process_keydb { local ($th) = @_; $total_key_size = 0; $key_smallest = 10000000000; $key_largest = 0; $key_count = 0; $ctx = Digest::SHA1->new; while (($key) = each %{$th}) { $key_count++; $record_count++; next if ($skip_fetch); $len = length ${$th}{$key}; $total_key_size += $len; $grand_total_key_size += $len; if ($len < $key_smallest) { $key_smallest = $len; $key_smallest_keyid = unpack ("N", $key); } elsif ($len > $key_largest) { $key_largest = $len; $key_largest_keyid = unpack ("N", $key); } if (defined ($find_v4_rsa)) { $keyid_s = sprintf ("%08X", unpack ("N", $key)); @bytes = unpack ("C9", ${$th}{$key}); die "error: not enough bytes in record/key $keyid_s,\nexiting" if ($#bytes != 8); # severely hosed record? if ($bytes[0] != 0x99) { # flag any "non-standard" keys... printf STDERR "warning: first byte != 0x99, $keyid_s = 0x%02X\n", $bytes[0]; next; } next if ($bytes[3] != 0x04); # not a v4 key next if ($bytes[8] > 0x03); # not an RSA key print "$keyid_s, v4 RSA, type $bytes[8]\n"; flush (STDOUT); } print PIPE ${$th}{$key} if defined ($pipe); next if ($no_output); $keyid = unpack ("N", $key); # keys are only indexed by their 32b keyid if (!$verbose) { printf "%08X\n", $keyid; } else { # NB: the data is simply the entire PGP key in binary format. # FUTURE: we could extract multiple keys given keyids, use the # timedb data to update a fingerprint db and a # 64b <-> 32b keyid map, and use the timedb to return # keys updated during certain time windows. $ctx->new; # reset hash $ctx->add (${$th}{$key}); # hash the key printf "%08X\t%0d\t%s\n", $keyid, length ${$th}{$key}, $ctx->hexdigest; } } printf STDERR "info: found %7d keys this keydb file\n", $key_count; if (! $skip_fetch) { printf STDERR "info: total key size: $total_key_size bytes (%1.2f MB)\n", $total_key_size / (1024*1024); printf STDERR "info: keys: smallest = $key_smallest, largest = $key_largest, average = %1.2f bytes\n", $total_key_size/$key_count; printf STDERR "info: keys: smallest = 0x%08X, largest = 0x%08X\n", $key_smallest_keyid, $key_largest_keyid; } } # process_keydb() ############################################################################### sub fetch_keys { local ($fetched, $not_fetched); die "fetching keys requires -o (for now...)" if (!defined($pipe)); open (PIPE, "> $pipe") || die "can't open $pipe for writing"; open (FETCH, "< $fetch") || die "can't open $fetch for reading"; $fetched = 0; $not_fetched = 0; while () { chomp; s/^/0x/ if (! /^0x/); s/^0x......../0x/ if (length == 18); # long keyid -> short keyid $val = hex; $selector = ($val & 0x0000FFFF) % ($#db_th + 1); $keyid = pack ("N", $val); if (exists (${$db_th[$selector]}{$keyid})) { print PIPE ${$db_th[$selector]}{$keyid}; print "$_, fetched\n"; $fetched++; } else { print "$_, not fetched\n"; $not_fetched++; } } # while printf STDERR "info: %d keys requested, $fetched fetched, $not_fetched not fetched, %3.2f%% found.\n", $fetched + $not_fetched, $fetched / ($fetched + $not_fetched) * 100.0; } # fetch_keys() ############################################################################### sub delta_keys { local ($th) = @_; $ctx = Digest::SHA1->new; while () { @tokens = split; # FIXME - look for dupes $last{$tokens[0]} = $tokens[1] . "\t" . $tokens[2]; } while (($key) = each %{$th}) { $keyid = sprintf ("%08X", unpack ("N", $key)); if (!exists ($last{$keyid})) { # new key print STDERR "$keyid\tsize\t0\t" . length (${$th}{$key}) . "\n"; print PIPE ${$th}{$key} if defined ($pipe); $ctx->add (${$th}{$key}); # hash the key printf "%s\t%0d\t%s\n", $keyid, length ${$th}{$key}, $ctx->hexdigest; } else { # existing key ($last_size, $last_hash) = split ("\t", $last{$keyid}); $len[0] = $last_size + 0; $len[1] = length (${$th}{$key}); if ($len[0] != $len[1]) { printf STDERR "$keyid\tsize\t$len[0]\t$len[1]\t%d\n", $len[1] - $len[0]; print PIPE ${$th}{$key} if defined ($pipe); $ctx->add (${$th}{$key}); # hash the key printf "%s\t%0d\t%s\n", $keyid, length ${$th}{$key}, $ctx->hexdigest; } else { $ctx->add (${$th}{$key}); # hash the key $new_hash = $ctx->hexdigest; # can only deref. this once... if ($last_hash ne $new_hash) { print STDERR "$keyid\tsha-1\t$last_hash\t$new_hash\n"; print PIPE ${$th}{$key} if defined ($pipe); printf "%s\t%0d\t%s\n", $keyid, $len[1], $new_hash; } } } # if else exists() } # while each key } # delta_keys() ############################################################################### sub do_words { $db_file = "$db_dir/worddb" if (!defined($db_file)); $words = new BerkeleyDB::Btree -Filename => "$db_file", -Flags => DB_RDONLY or die "can't open $db_file: $!"; # see dump_worddb() in pks_dump.c... the db key is the word from the uid. # the data is the same as in timedb (see above). $record_count = 0; $cursor = $words->db_cursor(); $key = ""; $value = ""; if (defined $word) { $key = $word; $status = $cursor->c_get($key, $value, DB_SET); $flag = DB_NEXT_DUP; } else { $status = $cursor->c_get($key, $value, DB_FIRST); $flag = DB_NEXT; } if ($status == 0) { do { $record_count++; next if ($no_output || $skip_fetch); if (!$verbose) { print "$key\n"; } else { @data = unpack ("NNN", $value); printf "%19s, 0x%08X%08X 0x%08X %12ld\n", $key, $data[1], $data[2], $data[2], $data[0]; } } while ($cursor->c_get($key, $value, $flag) == 0); } flush (STDOUT); print STDERR "info: found $record_count word record(s)\n"; } # do_words() ############################################################################### sub do_times { # FUTURE: add "-since " option? $db_file = "$db_dir/timedb" if (!defined($db_file)); tie %times, 'BerkeleyDB::Btree', -Filename => "$db_file", -Flags => DB_RDONLY or die "can't open $db_file: $!"; # calculate time cutoff if an absolute timestamp wasn't given if (defined ($time_compare) && defined ($time_multiplier)) { $time_compare = $^T - ($time_multiplier * $time_compare); } # see dump_timedb() in pks_dump.c... the db key is a standard timestamp # stored in big-endian order, db data is one or more 12 byte blocks. # data[0] is the key creation time, data[1] is the upper 32b of the keyid, # data[2] is the lower 32b of the keyid $record_count = 0; $time_from_today = 0; while (($key) = each %times) { $record_count++; $time = unpack ("N", $key); if (defined ($time_compare)) { # FIXME: really should do a btree search for this... next if ($time < $time_compare); $time_from_today++; } next if ($no_output); if (!$interpret_timestamps) { print "time $time\n"; } else { $gmtime = gmtime ($time); printf "time %12ld = $gmtime UTC\n", $time; } next if ($skip_fetch); if ($verbose) { $len = (length $times{$key}) / 4; @data = unpack ("N$len", $times{$key}); for ($i = 0; $i < $len; $i += 3) { if ($data[$i] != 0) { # don't bother with null records... # FUTURE: count regular and null records, count unique # keyids. printf "\tkeyid 0x%08X%08X 0x%08X created %12ld\n", $data[$i+1], $data[$i+2], $data[$i+2], $data[$i]; } } } } # while each key flush (STDOUT); print STDERR "\ninfo: found $record_count time records\n"; print STDERR "info: found $time_from_today time record(s) matching cutoff criteria (since $time_compare)\n" if (defined ($time_compare)); flush (STDERR); } # do_times() ############################################################################### sub do_prints { %dupes = (); $v3 = 0; $v4 = 0; $subkeys = 0; $last = 0xffffffff; $db_file = "$db_dir/printdb" if (!defined($db_file)); $prints = new BerkeleyDB::Btree -Filename => "$db_file", -Flags => DB_RDONLY or die "can't open $db_file: $!"; # see ???() in ???.c... the db key is a short keyid for v4 keys # stored in big-endian order, db data is the primary's long keyid and # the pubkey or subkey's fp. $record_count = 0; $cursor = $prints->db_cursor(); $key = ""; $value = ""; $status = $cursor->c_get($key, $value, DB_FIRST); $flag = DB_NEXT; if ($status == 0) { do {{ $record_count++; next if ($no_output || $skip_fetch); $keyid = unpack ("N", $key); if ($last == $keyid) { $dupes{$keyid}++; } else { $last = $keyid; } if ($extrapolate) { @fpwords = unpack ("N7", $value); die "bad unpack" if ($#fpwords != 6); if ($fpwords[1] != $keyid) { $subkeys++; } elsif ($fpwords[2] || $fpwords[3] || $fpwords[4] || $fpwords[5] || $fpwords[6]) { $v4++; } else { $v3++; } next; } printf "%08X", $keyid; if ($verbose) { $len = (length $value) / 4; @data = unpack ("N$len", $value); printf "\t%08X%08X", $data[0], $data[1]; printf "\t%08X%08X%08X%08X%08X", $data[2], $data[3], $data[4], $data[5], $data[6]; } print "\n"; }} while ($cursor->c_get($key, $value, $flag) == 0); } flush (STDOUT); if ($extrapolate) { $dupes = 0; foreach $keyid (sort (keys (%dupes))) { printf "%08X\t%d\n", $keyid, $dupes{$keyid}; $dupes++; } flush (STDOUT); print STDERR "\ninfo: $v3 v3 pubkeys, $v4 v4 pubkeys, $subkeys subkeys, $dupes duplicate keyids\n"; } print STDERR "\ninfo: found $record_count fingerprint records\n"; flush (STDERR); } # do_prints() ###############################################################################