#!PERL
#       cleandb.pl - clean up the database, ie remove all the entries
#			that have been deleted on the fly (*$DELETEDOBJECT:)
#                       and rebuild the index.
#
#       $RCSfile: cleandb.pl,v $
#       $Revision: 0.46 $
#       $Author: ripe-dbm $
#       $Date: 1995/11/17 14:43:00 $

# $opt_v = 1;

USE_DBM

@INC = ("LIBDIR", @INC);

require "rconf.pl";
require "dblock.pl";
require "dbopen.pl";
require "dbclose.pl";
require "donetdbm.pl";
require "enread.pl";
require "enwrite.pl";
require "encmp.pl";
require "syslog.pl";
require "getopts.pl";

$CLASSA = &quad2int("128.0.0.0",0);
$CLASSB = &quad2int("192.0.0.0",0);

# cleandbadd
# 
# special version of dbadd. Basically the same, but some checks are
# taken out for the sake of speed. One can assume that there are no
# two entries with the same key, and the seek has been removed, since
# this file can only be used by one process at the same time, so it
# keep the current file position properly.

sub cleandbadd {
    local(*db, *en) = @_;
    
    local($unikey) = &enukey(*en);
    
    select(db);
    print "\n";
    local($offset) = tell(db);
    &enwrite(*en);
    &addkey(*db, $unikey, $offset);
    
    local($key);
    
    foreach (&enkeys(*en)) {
	next if /\d+\.\d+\.\d+\.\d+\/\d+/;
	&addkey(*db, $_, $offset);
    }

    return $OK;
}

# parsech
# 
# Parse the changed field, and return the e-mail from associated with the
# last changed date. Used to notify the last changer of a block that his
# block has been split because of guarded field additions.

sub parsech {

    local($line) = @_;
    local($em, $chdate) = "";
    local($rtem, $rtch) = "";

    foreach (split(/\n/, $line)) {
	($em, $chdate) = split(/\s+/, $_);
	if ($chdate gt $rtch) {
	    $rtch = $chdate;
	    $rtem = $em;
	}
    }

    $rtem = "$DEFMAIL" if $rtem eq "";
    $rtem = "$DEFMAIL" if $TESTMODE;
    return $rtem;
}

# AddConflict
# 
# Builds a structure with all conflicts that can be walked and turned into
# mails later in MailConflicts.

sub AddConflict {

    local($attr, $key, $curvalue, $newvalue) = @_;
	
    if ($conflict{$key}) {
	if ($curvalue ne "CONFLICT") {
	    if (!($conflict{$key} =~ s/(\@$attr\%)/$1$curvalue,/)) {
		$conflict{$key} .= "\@$attr\%$curvalue";
	    }
	}
	if (!($conflict{$key} =~ s/(\@$attr%)/$1$newvalue,/)) {
	    $conflict{$key} .= "\@$attr\%$newvalue";
	}
    } else {
	$conflict{$key} = "\@$attr\%$curvalue,$newvalue";
    }
    if ($opt_v) {
	print STDERR "cleandb - \$conflict{$key} = $conflict{$key}\n";
    }
}

# MailConflicts
# 
# mail the conflicts out as built in AddConflict
# Mind you, this routine makes the assumption that the filename, which is
# the guarded value is also the e-mail address to mail the conflicts to !!!
# you'd better make sure these mailboxes exist. A nicer solution will be
# found later ...
# My my, this is really ugly, remind me to do this better some time!

sub MailConflicts {

    local($i, $j, $k, $n);
    local($attr, $vals);
    local(%guardianfile) = ();
    local($nonexist) = 0;

    if ($opt_v) {
	print STDERR "cleandb - mailing conflicts\n";
    }

    foreach $i (sort keys %conflict) {
	foreach $j (split(/\@/, $conflict{$i})) {
	    next if $j eq "";
	    ($attr, $vals) = split(/%/, $j);
	    foreach $k (split(/,/, $vals)) {
		next if $k eq "NONEXIST";
		next if $k eq "CONFLICT";
		if (!$guardianfile{$k}) {
		    $guardianfile{$k} = &ConflictTmpFile($k);
		    &ConflictMailHeaders($guardianfile{$k}, $k);
		}
		local($newval) = $vals;
		$nonexist = 0;

		if ($newval =~ /NONEXIST/) {
		    $newval =~ s/^NONEXIST,|,NONEXIST$|^NONEXIST$//;
		    $newval =~ s/,NONEXIST,/,/g;
		    $nonexist = 1;
		}
		$newval =~ s/^CONFLICT,|,CONFLICT$|^CONFLICT//;
		$newval =~ s/,CONFLICT,/,/g;
		$newval =~ s/^$k,|,$k$//;
		$newval =~ s/,$k,/,/g;
		$newval =~ s/(.*),([^,]+)$/$1 and $2/;
                $newval =~ s/,/, /g;
		open(TMP, ">>$guardianfile{$k}");

		print TMP "\"$i\" also appears in guardian files: $newval\n" if $newval && ($newval ne $k) && ($GUARDTYPE{$attr} eq "SINGLE");
		if ($nonexist) {
		    print TMP "\"$i\" not found in database !!\n";
		}
		close(TMP);
	    }
	}
    }
    foreach $i (keys %guardianfile) {
	if ($opt_v) {
	    print STDERR "cleandb - send mail to $i\n";
	}
	system("$MAILCMD < $guardianfile{$i}");
#	system("cat $guardianfile{$i}");
	unlink($guardianfile{$i});
    }
}

sub ConflictTmpFile {

    local($seed) = @_;
    return "/tmp/dbconfl.$seed.$$";
}

sub ConflictMailHeaders {

    local($filename, $guardedvalue) = @_;
    $guardedvalue =~ tr/A-Z/a-z/;

    open(TMP, ">$filename") ||
	&syslog("ERRLOG", "cleandb cannot create conflict file $filename");
    select(TMP);
    if ($TESTMODE) {
	print TMP "To: \"$guardedvalue Guardian\" <$DEFMAIL>\n";
    } else {
	print TMP "To: \"$guardedvalue Guardian\" <$guardedvalue>\n";
    }
    eval "print \"$GRDCONFLICT\n\";";
    close(TMP);
    select(STDOUT);
    return;
}


# readguard
# 
# read all the guarded values for a certain object type
# the values are stored in an associative array "guarded" with the
# index being the composition of "guarded attribute (short form)", "%"
# and the key mentioned in the guarded file. The value will be
# the file name, which is the guarded value.
# 
# So, $guarded{"as%192.87.45.0"}="AS1104" would mean that the guarded
# attribute "as" for object with key "192.87.45.0" should be "AS1104";
# 
# There are two types of guarded attributes, SINGLE and MULTIPLE. SINGLE
# guarded attributes can only have one single value, and if a certain key
# appears in more than one guardian file, a special CONFLICT value is
# created, and the current value in the database is kept. CONFLICTS will
# then be mailed to the guardians of these values to resolve.
# MULTIPLE guarded attributes are not checked on conflicts, multiple values
# are allowed, and all these values are concatenated.


sub readguard {

    local($type) = @_;
    local($i);
    local(%done) = ();

    foreach $i (split(/\s+/, $GRD{$type})) {
	$curfield = $i;
	opendir(A, "$GUARD{$i}") ||
	    &syslog("ERRLOG", "cleandb cannot opendir $GUARD{$i}");
	local(@allfiles) = sort grep(!/^\./,readdir(A));
	closedir(A);
	foreach $curfile (@allfiles) {
	    open (TMP2, "$GUARD{$i}/$curfile") ||
		&syslog("ERRLOG","cleandb cannot open $GUARD{$i}/$curfile");
	    while (<TMP2>) {
		chop;

		next if /^;/;
		next if /^#/;

		s/^inetnum:\s*//;

		next if /^\s*$/;

		s/\s*$//;

		$value = $_;
		$value =~ s/\s//g;

		next if $done{$value};

		if ($GUARDTYPE{$curfield} eq "MULTIPLE") {
		    if ($guarded{"$curfield%$value"}) {
			$guarded{"$curfield%$value"} .= " ".$curfile;
		    } else {
			$guarded{"$curfield%$value"} = $curfile;
		    }
		} else {
		    if ($guarded{"$curfield%$value"}) {
			&AddConflict($curfield,
				     $value,
				     $guarded{"$curfield%$value"},
				     $curfile);
			$guarded{"$curfield%$value"} = "CONFLICT";
		    } else {
			$guarded{"$curfield%$value"} = $curfile;
		    }
		}
		$done{$value} = 1;
	    }
		
	    %done = ();
	}
    }
    $guarddone{$type} = 1;
}

# dosplit
# 
# Kind of yucky routine needed for block splits. This is called when
# a block split is needed. It will make a block from $from to $to, and
# will reset all guarded attributes according to the guardian files.

# -------------------
# Not needed any more in new database software!!!
# -------------------
# sub dosplit {
#
#    local(*en, $from, $to) = @_;
#
#    local($begin) = &int2quad($from);
#    local($end) = &int2quad($to);
#
#    if ($from eq $to) {
#	$en{"in"} = "$begin";
#    }
#    else {
#	$en{"in"} = "$begin - $end";
#    }
#
#    foreach $j (split(/\s+/, $GRD{$type})) {
#	$en{"$j"} = $guarded{"$j%$begin"};
#	if ($from >= $CLASSB) {$inc = 256;}
#	elsif ($from >= $CLASSA) {$inc = 256**2;}
#	else {$inc = 256**3;}
#	for ($p=$from;$p<=$to;$p+=$inc) {
#	    local($num) = &int2quad($p);
#	    delete $guarded{"$j%$num"};
#	}
#    }
#
#    &cleandbadd(*newdb, *en);
#
#    if ($domail && $firstmail) {
#	$firstmail = 0;
#	$BEGINADDRESS = $beginaddr;
#	$ENDADDRESS = $endaddr;
#	open(MAIL, ">/tmp/domail.$$");
#	$to = &parsech($en{"ch"});
#	print MAIL "To: $to\n";
#	select(MAIL);
#	eval "print \"$SPLITMSG\n\";";
#	select(STDOUT);
#    }
#    if ($domail) {
#	select(MAIL);
#	print "\n" if &enwrite(*en, 1);
#	select(STDOUT);
#    }
#}
		

	
# This is the routine where guarded fields are checked and deleted or added
# if necessary. All changes are logged. 
# Now, there is a special case for IP network numbers, hardcoded I am 
# afraid to deal with splitting of blocks ....

sub checkguardandprint {

    local(*en, $type) = @_;

    if (!scalar(%en)) {
	return 0;
    }

    &readguard($type) unless ($guarddone{$type} || !$GRD{$type});


# No guarded fields, just print and leave

    if (!$GRD{$type}) {
	&cleandbadd(*newdb, *en);
	return 1;
    }

    $key = $en{$type};
    $key =~ s/\n*$//;
    $key =~ s/\s//g;
    foreach $j (split(/\s+/, $GRD{$type})) {
				# Conflict handling, set value to guarded
				# value is there is no conflict.
				# conflicts are reported in readguard()
	if ($guarded{"$j%$key"} ne "CONFLICT") {
	    $en{"$j"} = $guarded{"$j%$key"};
	}
	delete $guarded{"$j%$key"};
    }
    &cleandbadd(*newdb, *en);
    return 1;
}

# Main program

&Getopts('cvVp');

$opt_v = 1 if $opt_V;
$opt_V = 1 if $opt_v;

if (! -f $ARGV[0]) {
   print STDERR "Database file \"$ARGV[0]\" doesn't exist\n\n";
}

if ((!$ARGV[0]) || (! -f $ARGV[0])) {
    print STDERR "Usage: $PROGRAMNAME [-c] database\n";
    exit;
}

# Read config file from RIPEDBCNF, or set to default.

if ($opt_v) {
    print STDERR "cleandb - reading config\n";
}
$conffile=$ENV{"RIPEDBCNF"};
$conffile= "DEFCONFIG" unless $conffile;
&rconf($conffile);

# Now we open the database file defined in $ARGV[0]
# Maybe later it is nice to open based on the SOURCE in stead of hard
# file names

if ($opt_v) {
    print STDERR "cleandb - opening previous database\n";
}

local(*i) = 'curdb';

%nothing=();
%en=();
$type="";

&dbopen(i, *nothing, 1, "$ARGV[0]") || die "Cannot open $ARGV[0]";

if ($ARGV[0] =~ /^(.*\/)([^\s\/]+)\s*$/) {
   $NEWDIR=$1;
   $filename=$2;
} 
else {
   $NEWDIR="./";
   $filename=$ARGV[0];
}

# We will create a temporary directory to build the new database and
# indexes.

$NEWDIR.=$filename.".new.".$$;
$NEWDB=$NEWDIR."\/".$filename;

# print STDERR "$NEWDB $NEWDIR\n";

if (-d $NEWDIR) {
    &dbclose(*i);
    die "Temporary directory \"$NEWDIR\" already exists!";
} 

if (!mkdir($NEWDIR, 0750)) {
    &dbclose(*i);
    die "Failed to create temporary directory ($!): $NEWDIR";
}

print STDERR "cleandb - opening new database\n" if ($opt_v);

local(*newdb) = 'new';

# Print the copyright notice to the new file

print STDERR "cleandb - printing rights\n" if ($opt_v);
open(newdb, ">$NEWDB");
&printrights(newdb);
close(newdb);

&dbopen(newdb, *nothing, 1, $NEWDB ) || die "Cannot open $NEWDB";

# Create the lock file, so that dbupdates are put on hold
# Put the process ID in the lockfile. This lockfile is created to
# avoid "rename" and "open" race conditions between cleandb and dbupdate
# or basically anything that wishes to write to the database file.
# dbopen in write mode checks to see if this file is there, and then
# holds the open.

if ($opt_v) {
    print STDERR "cleandb - checking for lockfile\n";
}

if (-e $LOCKDIR.$CLEANLOCK.".".$filename) {
    &syslog("ERRLOG", "$LOCKDIR$CLEANLOCK.$filename already exists");
    die "lockfile $LOCKDIR$CLEANLOCK.$filename already exists";
}

if ($opt_v) {
    print STDERR "cleandb - creating lock file $LOCKDIR$CLEANLOCK.$filename\n";
}

open(LOCKFILE, ">$LOCKDIR$CLEANLOCK.$filename") || die "cannot create $LOCKDIR$CLEANLOCK.$filename";
print LOCKFILE "$$\n";
close(LOCKFILE);

# Print the copyright notice to the new file

if ($opt_v) {
    print STDERR "cleandb - printing rights\n";
}

# Print generation date and copyright to the database

&printrights(newdb);

# Now we can simply read the database using enread() since it will skip
# objects that are not defined (like *$DELETEDOBJECT:) so that is the
# basic cleanup operation.

# Make sure the enwrite() output goes to the new file

select(newdb);

# Now, since we are going to clean up the database, we would not want
# some daemons to alter the database when we are working on it, so
# we lock it.

if ($opt_v) {
    print STDERR "cleandb - locking files\n";
}

&dblock(*i);
&dblock(*newdb);

if ($opt_v) {
    print STDERR "cleandb - main loop\n";
}

while ($type=&enread(*en,i)) {
    &checkguardandprint(*en, $type);
}

print STDERR "cleandb - done with main loop\n" if $opt_v;

&dbunlock(*newdb);
&dbclose(*newdb);

# Now, if we have a classless we have to re-open the newly created database
# and do classless indexing on it.

if ($opt_c) {
    print STDERR "cleandb - classless index\n" if $opt_v;
    &dbopen(newdb, *nothing, 0, $NEWDB) ||
	die "Cannot open $NEWDB for classless index";
    &dbclopen(*nothing, 1, $NEWDB);
    &dblock(*newdb);
    
    &docldbm(*newdb);
    
    &dbunlock(*newdb);
    &dbclose(*newdb);
    &dbclclose();

    print STDERR "cleandb - moving new index files\n" if $opt_v;

    &delormoveindices($NEWDB.".".$CLASSLESSEXT, $ARGV[0].".".$CLASSLESSEXT);
}

print STDERR "cleandb - moving index files\n" if $opt_v;

&delormoveindices($NEWDB, $ARGV[0]);

print STDERR "cleandb - removing temporary directory\n" if $opt_v;

if (!rmdir($NEWDIR)) {
   warn "could\'t remove directory ($NEWDIR) errorcode:$!\n";
}

# That's it, we can safely unlock now and close both databases

&dbunlock(*i);
&dbclose(*i);

# now we can remove the lock file

print STDERR "cleandb - removing lock directory\n" if $opt_v;

unlink($LOCKDIR.$CLEANLOCK.".".$filename);

print STDERR "cleandb - sending NONEXIST guardian messages\n" if $opt_v;

foreach $i (keys %guarded) {
    local($attr, $key) = split(/%/, $i);
    local(@gv) = split(/\s+/, $guarded{$i});
    foreach $j (0..$#gv) {
	&AddConflict($attr, $key, "NONEXIST", $gv[$j]);
    }
}

&MailConflicts;

print STDERR "cleandb - all done\n" if $opt_v;
