########################## -*- Mode: Perl -*- ##########################
##
## File             : Databases.pm
##
## Description      : Handling of WAIS databases
##
#
# Copyright (C) 1996 Ulrich Pfeifer, Norbert Goevert
#
# This file is part of SFgate.
#
# SFgate is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# SFgate is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with SFgate; see the file COPYING.  If not, write to
# the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
#
##
## Author           : Norbert Goevert
## Created On       : Thu Feb 15 14:37:11 1996
##
## Last Modified By : Norbert Goevert
## Last Modified On : Wed Feb 12 16:44:37 1997
##
## $State: Exp $
##
## $Id: Databases.pm,v 5.1.1.3 1997/04/04 17:30:34 goevert Exp goevert $
##
## $Log: Databases.pm,v $
## Revision 5.1.1.3  1997/04/04 17:30:34  goevert
## patch11: back to FileHandle instead of IO::File
##
## Revision 5.1.1.2  1997/02/17 12:56:23  goevert
## patch10: local retrieve reimplemented
##
## Revision 5.1.1.1  1996/12/23 12:49:57  goevert
## patch6: handling for WAIT databases
##
## Revision 5.1  1996/11/05 16:55:19  goevert
## *** empty log message ***
##
## Revision 5.0.1.6  1996/11/04 13:10:04  goevert
## patch21: cons instead of MakeMaker
## patch21: conditions, empty queries
##
## Revision 5.0.1.5  1996/07/03 13:27:53  goevert
## patch19: database specification bug fixed
##
## Revision 5.0.1.4  1996/05/31 15:43:55  goevert
## patch14: reading of database files changed (SFgate-server)
##
## Revision 5.0.1.3  1996/05/23 17:07:47  goevert
## patch13: fixed bug with database specification
##
## Revision 5.0.1.2  1996/05/13 11:28:27  goevert
## patch1:
##
########################################################################


use strict;


package SFgate::Databases::Databases;


use File::Basename;
use FileHandle;
require SFgate::Databases::Database;


sub new
{
    my $class = shift;
    my $self  = {};
    bless $self, $class;

    $self->initialize(@_);

    return $self;
}


sub initialize
{
    my $self = shift;

    my($databases,
       $database_dir,
       $application_dir,
       $use_attributes)    = @_;
    ## local variables
    local($_);
    my($ref, $path);
    my($server, $port, $name, $description);
    my($attributes, $converter, $diagnostic);

    $self->{'attributes'} = $use_attributes;

    my(%databases, @databases);
    if ($databases) {
        # delete database doubles 
        foreach (@$databases) {
            next if defined($databases{$_});
            $databases{$_} = 1;
            push(@databases, $_);
        }
    }

    # parse databases
    foreach (@databases) {
        # test if there is a database configuration file

        my $database_file;
        if (m:^/: && -r $_) {
            $database_file = $_;
        }
        elsif ($_ !~ m:/: && -r $application_dir . '/' . $_) {
            $database_file = $application_dir . '/' . $_;
        }

        if ($database_file) {
            # read database configuration file
            ($server,
             $port,
             $name,
             $description,
             $attributes,
             $converter,
             $diagnostic) = $self->read_database_file($database_file);

            next if $diagnostic;

            if ($name) {
                $path = $database_dir if $server eq 'local' || $server eq 'wait';
                if ($name =~ m:^/:) {
                    ($name, $path) = fileparse($name);
                }
                if (!$port && $server ne 'local') {
                    $port = '210';
                    ($server, $port) = split(/:/, $server, 2);
                }
            }
            next;
        }
        else {
            $server = $_;
        }

        # parse $server to get port, name, etc.
        if ($server =~ m:^/:) {
            # local search
            ($name, $path) = fileparse($server);
            $server = 'local';
        }
        elsif ($server =~ m:^local/(.*):) {
            # local search
            $path = $1;
            if ($path =~ m:^/:) {
                ($name, $path) = fileparse($path);
            }
            else {
                $name = $path;
                $path = $database_dir;
            }
            $server = 'local';
        }
        elsif ($server =~ m:^wait/(.*):) {
            # search with WAIT
            $path = $1;
            if ($path =~ m:^/:) {
                ($name, $path) = fileparse($path);
            }
            else {
                require WAIT::Config;
                $name = $path;
                $path = $WAIT::Config->{WAIT_home};
            }
            $server = 'wait';
        }
        elsif ($server =~ m:([^/]+)/(.*):) {
            # global search
            $server = $1;
            $name   = $2;
            if ($server =~ /([^:]+):(\d+)/) {
                $server = $1;
                $port   = $2;
            }
            else {
                $port   = 210;
            }
        }
        else {
            $diagnostic = "Couldn't find specification for database $name";
        }
    }
    continue {
        $path =~ s:/$::;
        $name =~ s:\.(doc|src)$::;
        $port = '' if $server eq 'local';

        # generate the database object
        $ref = new SFgate::Databases::Database ($_,
                                                $server,
                                                $port,
                                                $name,
                                                $path,
                                                $description,
                                                $attributes,
                                                $converter,
                                                $diagnostic);

        $self->{'database'}->{$_} = $ref;
        push(@{$self->{'databases'}}, $ref);

        $server     = '';
        $port       = '';
        $name       = '';
        $path       = '';
        $attributes = undef;
        $converter  = undef;
        $diagnostic = '';
    }
}


sub display
{
    my $self = shift;

    ## local variables
    local($_);

    foreach (@{$self->{'databases'}}) {
        $_->display;
    }
}


sub get_queries
{
    my $self = shift;

    ## local variables
    local($_);
    ## return value
    my($queries) = '';

    foreach (@{$self->{'databases'}}) {
        my $query = $_->get_query;
        next unless $query;
        $queries .= $_->get_description . ': <B>';
        $query =~ s/&/&amp;/g;
        $query =~ s/</&lt;/g;
        $query =~ s/>/&gt;/g;
        $queries .= $query . "</B><BR>\n";
    }

    return $queries;
}


sub get_databases
{
    my $self = shift;

    ## local variables
    local($_);
    ## return values
    my(@databases);

    foreach (@{$self->{'databases'}}) {
        push(@databases, $_->get_database);
    }

    return @databases;
}


sub get_waisqueries
{
    my $self = shift;

    ## local variables
    local($_);
    my($waisquery);
    ## return values
    my(@waisqueries);

    foreach (@{$self->{'databases'}}) {
        $waisquery = $_->get_waisquery;
        push(@waisqueries, $waisquery) if defined($waisquery);
    }

    return @waisqueries;
}


sub get_waitqueries
{
    my $self = shift;

    ## local variables
    local($_);
    my($waitquery);
    ## return values
    my(@waitqueries);

    foreach (@{$self->{'databases'}}) {
        $waitquery = $_->get_waitquery;
        push(@waitqueries, $waitquery) if defined($waitquery);
    }

    return @waitqueries;
}


sub set_conditions
{
    my $self = shift;
    my($tag, $conditions) = @_;

    $self->{'databases'}->{$tag}->set_conditions($conditions);
}


sub get_conditions
{
    my $self = shift;
    my $tag = shift;

    return $self->{'database'}->{$tag}->get_conditions;
}


sub get_database_file
{
    my $self = shift;
    my $tag = shift;

    return $self->{'database'}->{$tag}->get_database_file;
}


sub get_encoded_conditions
{
    my $self = shift;
    my $tag = shift;
    ## local variables
    local($_);
    ## return value
    my $encoded_conditions;

    if ($tag) {
        $encoded_conditions = $self->{'database'}->{$tag}->get_encoded_conditions;
    }
    else {
        foreach (keys %{$self->{'database'}}) {
            my $db_cond;
            if ($db_cond = $self->{'database'}->{$_}->get_encoded_conditions) {
                $encoded_conditions .= '|||' .
                    $self->{'database'}->{$_}->{'database_id'} . $db_cond;
            }
        }
        $encoded_conditions =~ s/^\|{3}//;
    }

    return $encoded_conditions;
}


sub get_descriptions
{
    my $self = shift;

    ## local variables
    local($_);
    ## return values
    my(@descriptions);

    foreach (@{$self->{'databases'}}) {
        push(@descriptions, $_->get_description);
    }

    return @descriptions;
}


sub get_map_databases
{
    my $self = shift;

    ## local variables
    local($_);
    my(@diagnostics);
    ## return values
    my(@map_databases);

    return () if !$self->{'attributes'};
    
    foreach (@{$self->{'databases'}}) {
        @diagnostics = $_->get_diagnostics;
        next if @diagnostics;
        next if !keys(%{$_->{'attributes'}});
        push(@map_databases, $_);
    }
    
    return @map_databases;
}


sub set_query
{
    my $self = shift;

    my($query) = @_;
    ## local variables
    local($_);

    foreach (@{$self->{'databases'}}) {
        if ($query) {
            $_->set_query($query) unless $_->get_query || $_->get_diagnostics;
        }
        else {
            $_->add_diagnostic("Empty query");
        }
    }
}


sub set_diagnostics
{
    my $self = shift;

    my($tag, @diagnostics) = @_;
    ## local variables
    local($_);

    foreach (@diagnostics) {
        if ($_->[0] ne 'document') {
            $tag = $_->[0];
        }
        $self->{'database'}->{$tag}->add_diagnostic($_->[2]);
    }
}


sub get_diagnostics
{
    my $self = shift;

    ## local variables
    local($_);
    my($database, $diagnostic, $server, $text);
    ## return value
    my(%diagnostics);

    foreach (@{$self->{'databases'}}) {
        $server = $_->get_server;
        $database = $_->get_description;
        $text = '';
        foreach $diagnostic ($_->get_diagnostics) {
            $text .= "<B>$database</B>: $diagnostic\n";
        }
        push(@{$diagnostics{$server}}, $text) if $text;
    }

    return %diagnostics;
}
        

sub get_converter
{
    my $self = shift;

    my($tag, $converter) = @_;

    return $self->{'database'}->{$tag}->get_converter($converter);
}


sub get_server
{
    my $self = shift;

    my($tag) = @_;
    
    return $self->{'database'}->{$tag}->get_server;
}


sub get_name
{
    my $self = shift;

    my($tag) = @_;
    
    return $self->{'database'}->{$tag}->get_name;
}


sub get_documentrequest
{
    my $self = shift;

    my($tag, $docid, $type) = @_;
    
    return $self->{'database'}->{$tag}->get_documentrequest($docid, $type);
}

    
sub read_database_file
{
    my $self = shift;
    my($database_file) = @_;

    my $package = $database_file;
    $package =~ s/.*\///;
    $package =~ s/-//g;

    no strict 'refs';
    
    my $package_exists = $package . '::exists';

    eval qq[
            package $package;

            no strict 'vars';

            \$exists = 1;
            require '$database_file';
            
            1;
           ] unless $$package_exists;

    my $package_error = $package . '::error';
    if ($@) {
        $$package_error = $@;
    }
    if ($$package_error) {
        die "Error with $database_file: $$package_error\n";
        undef $@;
    }
    
    my $server      = $package . '::server';
    my $converter   = $package . '::converter';
    my $name        = $package . '::name';
    my $attributes  = $package . '::attributes';
    my $description = $package . '::description';
    my $port        = $package . '::port';

    return($$server,
           $$port,
           $$name,
           $$description,
           $$attributes,
           $$converter,
           '');
}


sub Search
{
    my $self = shift;
    my($query, $maxhits, $directget, $_lines, $format) = @_;
    
    my @databases_wais = $self->get_waisqueries;
    my @databases_wait = $self->get_waitqueries;

    # the %tags hash is used to determine those databases
    # which do not give result documents
    my %tags;
    foreach (@databases_wais, @databases_wait) {
        $tags{$_->{'tag'}} = 1;
    }

    # get search results for WAIS and WAIT
    my(@waisresults, @waitresults);
    my $normalize = 1;
    if (@databases_wais) {
        # WAIS
        require Wais;
        $Wais::maxdoc = 0; # make perl -w happy
        $Wais::maxdoc = $maxhits;
        my $result = &Wais::Search(@databases_wais);
        $self->set_diagnostics('', $result->diagnostics);
        @waisresults = $result->header;
        if (@waisresults && $waisresults[0]->[1] > 1000) {
            $normalize = 1000/$waisresults[0]->[1];
        }
    }
    if (@databases_wait) {
        # WAIT
        require WAIT::Wais;
        my $result = &WAIT::Wais::Search(@databases_wait);
        $self->set_diagnostics('', $result->diagnostics);
        @waitresults = $result->header;
        @waitresults = sort {$b->[1] <=> $a->[1]} @waitresults;
        if (@waitresults && $waitresults[0]->[1] > 1/$normalize) {
            $normalize = 1/$waitresults[0]->[1];
        }
    }
    
    # merge results and normalize RSVs
    my($header, $i);
    my @headlines;
    while (@waisresults || @waitresults) {
        if (@waisresults
            && (!@waitresults
                || $waisresults[0]->[1] * $normalize > $waitresults[0]->[1] * $normalize * 1000)) {
            $header = shift @waisresults;
        }
        elsif (@waitresults) {
            $header = shift @waitresults;
            $header->[1] *= 1000;
        }
        else {
            last;
        }
        next if $header->[4] =~ /Search produced no result\. Here/;
        delete($tags{$header->[0]});
        next if $i++ >= $maxhits;
        $header->[1] = int($header->[1] * $normalize);
        $headlines[$i - 1] = [$header];
    }

    local($_);
    my($http_failure, $type, @types, $types);
    foreach (@headlines) {

        @types = @{$_->[0]->[5]};
        $types = ':' . join(':', @types) . ':';
        if ($self->get_server($_->[0]->[0]) eq 'wait') {
            # select appropriate type for WAIT documents
            my $dbname = $self->get_name($_->[0]->[0]);
            $dbname =~ s/.*?(\w+)$/$1/;
            my $type = 'HTML';
            $type = $dbname if $types =~ /:$dbname:/i;
            $type = $format if $format && $types =~ /:$format:/i;
            @types = ($type);
            $_->[0]->[5] = \@types;
            $types = ":$type:";
        }
        # get document directly?
        if ($directget) {
            foreach $type (@types) {
                ($_->[1]->{$type}, $http_failure) = $self->Retrieve($_->[0]->[0],
                                                                    $_->[0]->[6],
                                                                    $type,
                                                                    $query);
                if ($http_failure) {
                    $_->[0]->[4] .= "\n(Error while retrieving document: $http_failure)";
                }
            }
        }
        elsif (index($types, ':WSRC:') > -1) {
            ($_->[1]->{'WSRC'}) = $self->Retrieve($_->[0]->[0],
                                                  $_->[0]->[6],
                                                  'WSRC',
                                                  '',
                                                  $_lines);
        }
    }

    foreach (keys %tags) {
        $self->set_diagnostics($_, ([$_, '', 'Search produced no result']));
    }

    return @headlines;
}


sub Retrieve
{
    my $self = shift;
    my($tag, $docid, $type, $query, $_lines) = @_;

    my($text, $http_failure);
    
    if ($type eq 'URL') {
        # fetch document via http
        require SFgate::Http_client;
        my $url = (split(' ', ($docid->split)[2]))[2];
        ($text, $http_failure) = &SFgate::Http_client::do_http($url);
        $self->set_diagnostics($tag, ([$tag, '', $http_failure])) if $http_failure;
    }
    else {
        # fetch document via WAIS or WAIT
        my $result;
        if ($self->{'database'}->{$tag}->get_server eq 'wait') {
            # WAIT
            require WAIT::Wais;
            my $waitdocid = WAIT::Wais::Docid->new ($docid->split);
            my %request = $self->get_documentrequest($tag, $waitdocid, $type);
            $request{query} = $query;
            $request{lines} = $_lines;
            $result = WAIT::Wais::Retrieve(%request);
            $self->set_diagnostics($tag, $result->diagnostics);
            $text = $result->text;
        }
        else {
            # WAIS
            if ($self->{'database'}->{$tag}->get_server eq 'local') {
                my $id = ($docid->split)[2];
                my($start, $end, $file) = split(' ', $id);
                my $FH = new FileHandle("< $file");
                if (defined $FH) {
                    seek($FH, $start, 0);
                    read($FH, $text, $end - $start);
                    $FH->close;
                }
            }
            unless ($text) {
                require Wais;
                my %request = $self->get_documentrequest($tag, $docid, $type);
                $result = Wais::Retrieve(%request);
                $self->set_diagnostics($tag, $result->diagnostics);
                $text = $result->text;
            }
        }
    }
    
    return($text, $http_failure);
}


1;
