#!/usr/bin/perl
# -*- fill-column: 78 -*-

# tag2upload-oracled -- tag2upload simple Oracle protocol communicator

# Copyright (C) 2024-2026  Sean Whitton
# Copyright (C) 2025-2026  Ian Jackson
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.

# usage:
#   tag2upload-oracled [-D] [--force-production]			\
#                      [--workers=WORKERS] [--no-restart-workers]	\
#                      [--worker-restart-timeout=SECONDS]               \
#                      [--processing-timeout=SECONDS]                   \
#                      [--ssh=SSH]					\
#                      [--autopkgtest-virt=autopkgtest-virt-SERVER]	\
#                      [--autopkgtest-arg=VIRT-SERVER-ARG] ...		\
#                      [--retain-tmp]					\
#                      --manager=[USER@]MNGR-HOST			\
#                      --manager-socket=MNGR-SOCK			\
#                      --builder=[USER@]BLDR-HOST			\
#                      --from=FROM					\
#                      --reply-to=REPLY-TO				\
#                      --copies=COPIES					\
#                      [--] DISTRO DISTRO-DIR AUTH-SPEC [<settings>]
#  tag2upload-oracled --version
#
# Option -D may be repeated, e.g. -DDD, to increase the debug level.
# --processing-timeout=0 means no timeout, and is the default.
# If --version is present then all other arguments are ignored.
#
# Uses whatever one ambient gpg key is available.

use 5.028;
use warnings;
use POSIX qw(:errno_h :signal_h strftime WNOHANG);
use IPC::Open3;
use Symbol qw(gensym);
use URI::Escape;
use Getopt::Long;
use Fcntl qw(:flock);

use Debian::Dgit::Infra;	# must precede Debian::Dgit
use Debian::Dgit qw(!fail);
use Debian::Dgit::ProtoConn;

sub fail ($);
sub test_signing_key ();
sub report_reaped_worker ($);
sub block_signals ();
sub unblock_signals();
sub get_dgit_version(@);

our ($production, $force_production) = (0, 0);
our ($workers_n, $restart_workers, $ssh, $adt_virt)
  = (1, 1, "ssh", "autopkgtest-virt-null");
our ($processing_timeout, $worker_restart_timeout) = (0, 20);
our ($retain_tmp, $manager, $socket, $builder,
     $from, $reply_to, $copies, $version_opt, @adt_args);
our $our_version = $ENV{DGIT_VERSION} // "UNRELEASED"; ###substituted###

Getopt::Long::Configure "bundling";
GetOptions
  # Optional arguments.
  "D+"				=> \$debuglevel,
  "force-production!"		=> \$force_production,
  "workers=i"			=> \$workers_n,
  "ssh=s"			=> \$ssh,
  "autopkgtest-virt|adt-virt=s" => \$adt_virt,
  "processing-timeout=i"	=> \$processing_timeout,
  "worker-restart-timeout=i"	=> \$worker_restart_timeout,
  "retain-tmp"			=> \$retain_tmp,
  "autopkgtest-arg=s"		=> \@adt_args,
  "restart-workers!"		=> \$restart_workers,
  "version!"			=> \$version_opt,

  # Required arguments.
  "manager=s"			=> \$manager,
  "manager-socket=s"		=> \$socket,
  "builder=s"			=> \$builder,
  "from=s"			=> \$from,
  "reply-to=s"			=> \$reply_to,
  "copies=s"			=> \$copies;
$manager && $socket && $builder && $from && $reply_to && $copies
  || $version_opt
  or fail "not enough arguments";

if ($version_opt) {
    say "tag2upload-oracled version $our_version";
    exit;
}

@ARGV >= 3 or fail "not enough arguments for dgit-repos-server";
our @drs_args = @ARGV;

initdebug "tag2upload-oracled ";
enabledebug if $debuglevel;

our @fatal_signals = qw(HUP TERM INT QUIT);
our $sigset
  = POSIX::SigSet->new(map { no strict; &{"SIG$_"} } @fatal_signals);

our $signing_keyid;
our $production_string;

# We are expecting to be on a LAN with the Manager & Builder, so be fairly
# intolerant of connection issues.
our @ssh_opts = qw( -oBatchMode=yes -oConnectTimeout=30
		    -oServerAliveInterval=120 -oServerAliveCountMax=8 );

sub me () { (my $b = $builder) =~ s/^.+@//; "$b,$$" }
sub say_log (@) {
    # We output to STDERR and let systemd pick it up for its journal.
    # In the future, in addition, some kind of remote syslogging would be good
    # so that we can inspect the live logs without shell access to the host
    # running this daemon.
    #
    # Given these outputs, only do whole lines at once.
    printf STDERR "[t2u-oracled %s][%s] %s\n",
      me, strftime("%FT%T", gmtime), $_
      for @_
}

sub fail ($) {
    # Use this function in preference to using die() directly.
    my $msg = shift;
    $msg .= " at line ".(caller)[2];
    $builder //= "none";
    say_log "ERROR: $msg";
    die $msg."\n";
}

sub warn_log ($) {
    # Use this function in preference to warn().
    say_log "WARNING: $_[0] at line ".(caller)[2];
}

# Main procedure.
{
    # Decide whether we are a production or testing instance.
    # The Manager should not send ordinary user jobs to a testing instance
    # without manual intervention.
    # Normally, only a clean install running everything out of dgit.deb and
    # dgit-infrastructure.deb counts as a production instance.
    # The output of systemctl's 'show' subcommand is a stable interface.
    if ($force_production) {
	$production = 1;
    } elsif ($ENV{DBUS_SESSION_BUS_ADDRESS} && $ENV{XDG_RUNTIME_DIR}) {
	my @wanted = qw(MainPID FragmentPath DropInPaths);
	my $ret = open my $systemctl, "-|",
	  qw(systemctl --user show tag2upload-oracled.service),
	  map "--property=$_", @wanted;
	if (!$ret) {
	    $! == ENOENT or fail "'systemctl show': $!";
	} else {
	    chomp(my @lines = <$systemctl>);
	    close $systemctl
	      or fail "systemctl failed: ".failedcmd_waitstatus();
	    @lines == @wanted
	      or fail "unexpected number of systemctl output lines";

	    my %vals;

	    for (@lines) {
		my ($k, $v) = split /=/, $_, 2;
		exists $vals{$k}
		  and fail "unexpected systemctl output: repeated $k field";
		$vals{$k} = $v // "";
	    }
	    $vals{$_} // fail "expected $_ in systemctl output" for @wanted;

	    $production = $vals{MainPID} == $$
	      && $vals{FragmentPath} =~ m#^(?:/usr)?/lib/#
	      && $vals{DropInPaths} eq "";
	}
    }
    $production_string = $production ? "production" : "testing";
    say_log sprintf "instance fidelity=%s", $production_string;

    -d or mkdir or fail $! for "worker-cwd";
    test_signing_key();

    # WARNING!  Be careful manipulating this without signals blocked!
    # This variable is used by our signal handlers.
    # (Right here is OK because we haven't set up the signal handlers yet.)
    #
    # Invariants:
    #   1. Every one of our unreaped children is in this array,
    #      except briefly with signals blocked (while we're forking).
    #   2. The converse is NOT true -- this may contain pids of
    #      workers that we have already reaped!
    #   3. But *at the start of each iteration of the main loop*,
    #      it contains only (and therefore precisely) our unreaped children.
    #   4. We reap only (a) in the main loop or (b) with signals blocked,
    #      in a signal handler which will definitely exit rather than return.
    #      Therefore code in the main loop can assume no children
    #      have been reaped other than by the main loop.
    my @worker_slots = (undef)x$workers_n;

    foreach my $sig (@fatal_signals) {
	$SIG{$sig} = sub {
	    say_log "group_leader: received SIG$sig; shutting down workers";
	    # See the comment for @worker_slots, notably the invariants.
	    #
	    # We mustn't kill anything that isn't actually one of our
	    # children.  @worker_slots might contain already-reaped pids.
	    # We can check a pid with waitpid, because we know that
	    # no-one else is reaping in between (given that we block signals).
	    #
	    # We might run this code more than once.  So we might send
	    # multiple signals each child.  That's OK and intended.
	    block_signals();
	    kill $sig => grep {
		# waitpid returns:
		#   -1   Not our child, or doesn't exist.  This is normal!
		#   >0   Was our child but we just reaped it.
		#   0    Is our unfinished. unreaped, child..
		# Only in the final case do we want to kill.
		my $child = waitpid $_, WNOHANG;
		if ($child > 0) {
		    fail "$child != $_" unless $child == $_;
		    report_reaped_worker($child);
		    # The pid remains in @worker_slots, despite being reaped.
		    # This is OK according to our invariants.
		}
		!$child
	    } grep defined, @worker_slots;
	    unblock_signals();
	    exit 0;
	};
    }

    my $start_worker = sub {
	# We're forking, and manipulating @worker_slots.
	# Also, avoid entering our (parent-appropriate) signal handler in
	# the child right after fork, before the child has reset %SIG.
	block_signals();

	my $free_slot;
	for my $i (0..$#worker_slots) {
	    if (!defined $worker_slots[$i]) {
		$free_slot = $i;
		last;
	    }
	}
	$free_slot // fail "No free slot to start worker -- shouldn't happen";

	if (my $child = fork // fail $!) {
	    $worker_slots[$free_slot] = $child;
	    unblock_signals();
	} else {
	    $SIG{$_} = "DEFAULT" for @fatal_signals;
	    @worker_slots = (); # just in case
	    unblock_signals();
	    # Jump out of the parent process's lexical scope.
	    worker($free_slot);
	    # worker() should never return, but ensure no grandchild workers.
	    exit 255;
	}
    };

    for (;;) {
	# Particularly useful in the test suite: leaked oracleds will die.
	stat '.' or fail "parent cwd has become inaccessibe: $!";
	(stat _)[3] or fail "parent cwd deleted (link count 0), quitting";

	# If we have empty worker slots, (re)start worker(s).
	#
	# We don't modify @worker_slots in this test,
	# so this access with signals unblocked is OK.
	$start_worker->() while grep !defined, @worker_slots;

	# Now we do nothing until after at least one worker dies, then wait
	# for a bit longer before going round again to start up a replacement.
	# We start up one replacement at a time.
	#
	# If the worker died then it's probably because either the SSH
	# connection failed, or there was a bug triggered by the particular
	# manager request the worker was trying to handle.  In both cases it
	# is fine to restart workers: in the latter case, it's okay because no
	# state is shared between workers, and the manager shouldn't send the
	# bug-triggering request again immediately.
	#
	# In both cases, though, we want a delay.  In the second case this is
	# to prevent us getting stuck in a pointless tight forking loop if
	# workers are dying over and over again in quick succession.

	my $child = wait;
	$child == -1 and fail "No workers to reap -- shouldn't be possible";

	# We're manipulating worker_slots.
	# We must block signals only now, *after* the wait,
	# because we need such signals to interrupt the wait.
	# Hence the possible presence of reaped pids in @worker_slots.
	block_signals();

	my $child_i;
	for my $i (0..$#worker_slots) {
	    if ((defined $worker_slots[$i]) && $worker_slots[$i] == $child) {
		$child_i = $i;
		last;
	    }
	}
	if (defined $child_i) {
	    $worker_slots[$child_i] = undef;
	    unblock_signals();
	    report_reaped_worker($child);
	    # This could become more sophisticated (e.g. exponential backoff)
	    # if necessary, but hopefully things will be reliable enough.
	    fail "group leader: restarting workers disabled"
	      unless $restart_workers;
	    sleep $worker_restart_timeout;
	} else {
	    unblock_signals();
	    say_log "group_leader: wait(2) returned unexpected PID $child";
	}
    }
}

sub worker ($) {
    my $slot = shift;

    # say_log will include our identity.
    say_log "worker: new worker starting up";

    # Try to establish a connection to the builder right away.  If we can't,
    # then we don't even want to make ourselves available to the manager.
    my ($virt, $virt_dir, $virt_cmd_enclist, @virt_cmd, $virt_dgit_vstr);
    my $run_cmd = sub {
	# Check return value or $?, which are zero on success.
	# Otherwise, use failedcmd_waitstatus to report the status.
	$? = -1;
	system $ssh, @ssh_opts, $builder, shellquote @virt_cmd, @_;
    };
    my $new_virt = sub {
	# Use autopkgtest's virtualisation server protocol so that we can
	# easily upgrade the isolation.  Spec.:
	# /usr/share/doc/autopkgtest/README.virtualisation-server.rst.gz
	#
	# The protocol requires that we ensure here, in this call to
	# Debian::Dgit::ProtoConn::open2, that the way we invoke the
	# virtualisation server will ensure that we have exclusive use of the
	# testbed.
	$virt = Debian::Dgit::ProtoConn->open2(
	    $ssh, @ssh_opts, $builder, $adt_virt, @adt_args);

	$virt->set_description('virt');
	$virt->set_fail_hook(sub {
	    (waitpid $virt->get_pid(), WNOHANG) == 0
	      or say_log "virt-server: ".waitstatusmsg;
	});

	$virt->expect(sub { /^ok$/ });
	$virt->send("open");
	($virt_dir) = $virt->expect(sub { /^ok (\S+)$/ });
	$virt->send("print-execute-command");
	($virt_cmd_enclist) = $virt->expect(sub { /^ok (\S+)/ });
	@virt_cmd = map uri_unescape($_), split /,/, $virt_cmd_enclist;

	$run_cmd->("true");
	$? == 0 or fail "Cannot execute commands in builder virt: "
	  .failedcmd_waitstatus();

	$virt_dgit_vstr = get_dgit_version $ssh, @ssh_opts, $builder,
	  shellquote @virt_cmd, $ENV{DGIT_DRS_DGIT} // "dgit", "--version";

	say_log "worker: established builder virt environment";
    };
    my $quit_virt = sub {
	unless ($retain_tmp) {
	    # Most virtualisation backends will take care of this, but
	    # it's not guaranteed by the protocol.
	    $run_cmd->(qw(rm -rf), $virt_dir);
	    $? == 0 or fail "failed to remove $virt_dir in builder virt: "
	      .failedcmd_waitstatus;
	}

	$virt->send("quit");

	# Spec says we should expect `ok` but many autopkgtest-virt-*
	# don't send it.  #1092808.  Anyway, we can safely waitpid without
	# risk of deadlock -- the pipe would fit an ok if it sent one.
	(waitpid $virt->get_pid(), 0) == $virt->get_pid() or fail $!;
	fail sprintf "autopkgtest virt server: %s", waitstatusmsg() if $?;

	undef $virt;
    };
    $new_virt->();

    # Need our own cwd -- see dgit-repos-server's file header.
    my $wcwd = "worker-cwd/w$slot";
    -d or mkdir or fail $! for $wcwd;
    chdir $wcwd or fail $!;

    my $mngr = Debian::Dgit::ProtoConn->open2(
	$ssh, @ssh_opts, $manager,
        shellquote qw(nc.openbsd -U -N), $socket
    );
    $mngr->set_description('manager');
    $mngr->set_fail_hook(
	sub {
	    my $msg = shift;
            (waitpid $mngr->get_pid(), WNOHANG) == 0
		or say_log "worker: ssh to manager: ".waitstatusmsg;
	    eval { $mngr->send("protocol-violation $msg") };
	    say_log sprintf "worker: %s to inform manager: %s",
	      ($@ ? "failed" : "attempted"), $msg;
	});
    $mngr->expect(sub { /^t2u-manager-ready$/ });
    say_log "worker: established connection to Manager";
    $mngr->send("t2u-oracle-version 8");
    $mngr->send(sprintf "worker-id %s,w%s %s", me, $slot, $production_string);

    for (;;) {
	my ($msg, $payld_id, $payld_rs, $payld_pkg, $payld_url)
	  = $mngr->expect(sub {/^(?|
	    (ayt)
	   |(restart-worker)
	   |(job)
	       \ ([[:alnum:]][[:alnum:],-.]*)
	       \ (last-attempt|not-last-attempt)
	       \ ($package_re)
	       \ ([[:graph:]]+)
	   )$/ax});
	if ($msg eq "ayt") {
	    # Check the connection to the builder is still up.
	    $virt->send("capabilities");
	    $virt->expect(sub { /^ok(?: |$)/ });
	    # Check the hardware token is still working.
	    test_signing_key();
	    my $orac_dgit_vstr
	      = get_dgit_version $ENV{DGIT_DRS_DGIT} // "dgit", "--version";
	    # In both ad-hoc from-git and .deb deployments, d-r-s is the
	    # same version as oracled.  Report that for the admins' benefit.
	    my $infra_version_str = "oracled/d-r-s version $our_version";
	    $mngr->send(sprintf "software-versions %s, Oracle %s, Builder %s",
			$infra_version_str, $orac_dgit_vstr, $virt_dgit_vstr);
	    $mngr->send("ack");
	} elsif ($msg eq "restart-worker") {
	    $quit_virt->();
	    exit;
	} elsif ($msg eq "job") {
	    my $last_attempt = $payld_rs eq "last-attempt";
	    my $tag = $mngr->receive_data_block;
	    my ($user_email) =
	      $mngr->expect(sub { /^user-email ([\t -\x7e]+)$/ });
	    my ($last_attempt_msg) =
	      $mngr->expect(sub { /^last-attempt-message (.+)$/ })
	      if $last_attempt;

	    my $lock_fail = sub {
		my $msg = shift;
		$mngr->send($_)
		  for "message $msg", "email unreported", "retriable";
		fail $msg;
	    };

	    # Block fatal signals to avoid interrupting actual builds.
	    # (So not to protect @worker_slots -- we're the child.)
	    block_signals();
	    # Take DSA's reboot locks (RT ticket #9884) on both hosts to
	    # request waiting for this job to be complete before rebooting.
	    # If we can't get a lock immediately, assume that means a reboot
	    # is imminent, and so give up.  The Manager will retry the job.
	    open my $oracle_flock, "<",
	      $ENV{DGIT_TEST_REBOOT_LOCK_1} // "/var/run/reboot-lock"
	      or $lock_fail->("open oracle host reboot lock: $!");
	    flock $oracle_flock, LOCK_SH|LOCK_NB
	      or $lock_fail->("take oracle host reboot lock: $!");
	    my ($bflock_child, $bflock_in, $bflock_out);
	    my $bflock_err = gensym; # see IPC::Open3 docs
	    eval {
		$bflock_child
		  = open3($bflock_in, $bflock_out, $bflock_err,
			  $ssh, @ssh_opts, $builder, shellquote
			  qw(flock --verbose -sn),
			  $ENV{DGIT_TEST_REBOOT_LOCK_2} // "/var/run/reboot-lock",
			  qw(sh -ec), "echo yes; read l")
	      };
	    $lock_fail->("Failed to start builder reboot lock script: $@")
	      if $@;
	    unless ($bflock_out && <$bflock_out> =~ /^yes$/) {
		chomp(my @lines = <$bflock_err>);
		say_log "builder flock(1) stderr: $_\n" for @lines;
		@lines or @lines = ("<unknown error>");
		$lock_fail->("Couldn't take builder reboot lock: "
			     .join " // ", @lines);
	    }
	    # warn_log() not fail() for locking issues from now on because
	    # there is no sense making the job irrecoverable if got this far.
	    close $bflock_out or warn_log $!;
	    handle_job($mngr, $virt_dir, $virt_cmd_enclist,
		       $payld_id, $user_email,
		       ($last_attempt && $last_attempt_msg),
		       $tag, $payld_pkg, $payld_url);
	    # Release locks as soon as we're finished with the critical part.
	    print $bflock_in "\n" or warn_log $!;
	    (waitpid $bflock_child, 0) == $bflock_child or warn_log $!;
	    warn_log sprintf "builder reboot lock script %s", waitstatusmsg()
	      if $?;
	    close $oracle_flock or warn_log $!;
	    unblock_signals();
	    # Now stop the autopkgtest-virt-* process, and bring up another
	    # one.  This means that we don't have to assume anything about
	    # what capabilities are available, which is more flexible.
	    $quit_virt->();
	    $new_virt->();
	} else {
	    fail "ProtoConn's expect() has failed us";
	}
    }
}

sub handle_job ($$$$$$$$) {
    my ($mngr, $virt_dir, $virt_cmd_enclist,
	$id, $user_email, $retry_msg, $tag, $putative_pkg, $url) = @_;

    # Parse it just enough to log something useful.
    # Leave the real parsing, and emailing, to dgit-repos-server.
    my ($tag_name) = $tag =~ /^tag (\S+)$/m or fail "couldn't find tag name";
    my $log_info = sprintf "job=%s last_attempt=%d package=%s tag=%s",
      $id, (defined $retry_msg), $putative_pkg, $tag_name;
    say_log "$log_info url=$url starting";

    # dgit-tmp is in trusted, and not in a .git, unlike elsewhere.
    # This is a bit confusing but it means readtag etc. in dgit-repos-server
    # can just use 'dgit-tmp' from their cwd.
    rmdir_r "dgit-tmp";
    mkdir "dgit-tmp" or fail $!;

    # dgit-repos-server expects to find the tag here.
    open my $wholetag_fh, ">dgit-tmp/wholetag" or fail $!;
    print $wholetag_fh $tag;
    close $wholetag_fh or fail $!;

    # The diversion of the code path into dgit-repos-server now is for
    # historical reasons.  While invoking 'dgit rpush-source' is essential to
    # the design, the parts of dgit-repos-server we use could be refactored
    # and moved here.
    my @drs
      = ($ENV{DGIT_REPOS_SERVER_TEST} // qw(dgit-repos-server), @drs_args,
	 qw(--tag2upload11), $ssh, $builder, $virt_dir, $virt_cmd_enclist,
	 $from, $reply_to, $copies, $processing_timeout, $signing_keyid,
	 qw(--), $id, $url, $tag_name, $putative_pkg, $user_email,
	 $retry_msg // "");
    say_log "worker: invoking <<@drs>>";
    my $drs_child;
    unless ($drs_child = fork // fail $!) {
	# dgit-repos-server generates the remainder of the protocol messages.
	# It needs both directions because, all being well, it will need to
	# receive a 'go-ahead' from the Manager.
	open STDIN, "<&=", $mngr->get_fh_r->fileno or fail $!;
	open STDOUT, ">&=", $mngr->get_fh_w->fileno or fail $!;
	exec @drs;
    }

    (waitpid $drs_child, 0) == $drs_child or fail $!;

    fail sprintf "dgit-repos-server %s", waitstatusmsg() if $?;
}

sub test_signing_key () {
    # debsign, which dgit-repos-server's dgit call will use, defaults
    # to looking at the changelog to find a -u option to pass to gnupg,
    # and there's no way to tell it to not pass any such option.
    #
    # Also it's probably a good idea to make sure that we're not implicitly
    # doing something surprising.
    #
    # So, list our secret keys, and insist that there's exactly one,
    # and pass its keyid to dgit-repos-server to pass to dgit to
    # pass to debsign.
    my $keys = cmdoutput qw(gpg --list-secret --with-colons);
    my @keys = $keys =~ m{^fpr:.*}mg;
    @keys or fail "no signing keys available";
    @keys == 1 or fail "multiple signing keys available";
    $signing_keyid = (split /:/, $keys[0])[9];
    (defined $signing_keyid) && $signing_keyid =~ m{^[0-9a-f]+$}i
      or fail "bad output from gnupg $keys[0]";

    open my $gpg_in, "|gpg -u$signing_keyid --clearsign >/dev/null"
      or fail $!;
    print $gpg_in "Test of signing key.";
    close $gpg_in
      or fail "Signing key is not usable: ".failedcmd_waitstatus();
}

sub block_signals () { sigprocmask(SIG_BLOCK, $sigset) or fail $! }
sub unblock_signals () { sigprocmask(SIG_UNBLOCK, $sigset) or fail $! }

sub report_reaped_worker ($) {
    # Logs a message about worker $pid, using $?.
    # Doesn't update @worker_slots.
    say_log sprintf "group_leader worker=%s: %s", shift, waitstatusmsg;
}

sub get_dgit_version (@) {
    my @lines = split /\n/, cmdoutput @_;
    @lines == 1
      or fail "unexpected 'dgit --version' output: ".join " // ", @lines;
    return $lines[0];
}
