#! /bin/sh

# arbitron -- this program produces rating sweeps for USENET.
# This version, and the accompanying program, are for NOTES systems
# based on the Lou Salkind notes that keeps sequencers as editable
# ascii in ~/.nfseq with ARPA-style dates.
#
# "Notes 1.7" keeps sequencers in a binary format.  Some intermediate
# versions ("NYU B Notes 1.6" and "Notes 2.7") use ascii sequencers
# with ctime(3) format dates.  You will need to modify the date-parsing
# part of the awk script to use this arbitron with those versions.
#
# To participate in the international monthly ratings sweeps, run
# "arbitron" every month, ON OR AFTER the first. Brian Reid combines the
# results and posts the information to news.groups.
#
# This script is based on one written by Brian Reid at Stanford, and
# updated and bugfixed by Spencer Thomas at Univ. of Utah and Geoff
# Kuenning at SAH Counsulting.  It was written by Rich $alz at Mirror
# Systems.
#
# This version was rewritten more-or-less from the ground up by
# Tw Cook at Hewlett-Packard to correspond to HP Notes, and later
# modified by Jacob Gore at Northwestern University to make it portable enough
# to run on 4.3BSD with some customization.
#
# This script will likely have to be run by root or daemon so that
# it will be able to read everyone's .nfseq files.
#
# The "-l" flag is used inside Hewlett-Packard to run an internal
# survey similar to Brian's net-wide survey.  Feel free to modify it 
# for use in your own organization-wide surveys.

######## Customization ###########################################

# Name of the special notes user on your system.  Used to find ~notes/config,
# and to receive a local copy of the arbitron message.

notesusr=__NOTES_UID__

# LowUID and highUID should be set to the lowest and highest uid of real users
# on your system.  Nonusers like "rje" and "daemon" don't count.  UUCP logins
# don't count either.

lowUID=100;
highUID=999;

# Newsgroups is the list of groups you want to report on; this should be
# a pattern suitable for awk.  For normal usenet use there is no need
# to change this.
#
# Inside Hewlett-Packard, localgroups should include the usual HP notesfiles.
# Feel free to include categories that do not get wide distribution, if
# you are interested in their readership.

newsgroups='/^soc|^talk|^comp|^sci|^rec|^misc|^news|^alt/'
localgroups='/^hp|^hpf|^hug|^hpic|^hpl/'

# Address is the mail address you want to send the output to.   To
# participate in the normal Usenet ratings sweep, this should eventually
# reach:
#   uucp path: {sun, pyramid, hplabs, bellcore}!decwrl!netsurvey
#   internet: netsurvey@decwrl.DEC.COM

netaddress="netsurvey@decwrl.dec.com"		# Brian Reid's survey
hpaddress=#"hpnetsurvey@hpcea"			# HP survey
localaddress="$notesusr"			# local only

# Deliver is the mail command to get these bits there.  For
# debugging, you may wish to make this just "more".

#deliver="Mail -s Arbitron-data"		# BSD
#deliver="mailx -s Arbitron-data"		# System V
deliver="v6mail -s Arbitron-data -t"		# MMDF
debugdeliver="more"

# SysV systems should have "/usr/bin:/bin" in their path;
# BSD systems should also include "/usr/ucb".

PATH=/usr/bin:/bin:/usr/ucb

# Go down to the line after the "CHECK HERE" comment, and fix up the awk
# variable assignments.

######## End Customization #######################################

cutoff=1;
hostname=`hostname`;
totalusers=0;
notesusers=0;
nfseq=/tmp/nf$$seq
arbout=/tmp/arb$$

libdir=`awk -F: "/^$notesusr:/ {print \\$6;exit}" /etc/passwd`

# Version of this arbitron program.

arbversion="Notes-2.9-arbitron-v1.1"

address="$netadress $hpaddress $localaddress"

while [ $# -gt 0 ]
do
    case $1 in

	-c) 	# Cutoff value below which nf readership isn't reported
		shift
		cutoff=$1
		shift ;;

	-c*)	# ditto, specified a different way
		cutoff=`expr substr $1 3 1`
		shift ;;

	-t*)	# Set time limit to other than 14 days
		echo "Sorry, the -t option is not supported"
		exit 1 ;;

	-x*)	# Turn on debugging - just means don't mail it.
		deliver=$debugdeliver
		address=""
		shift ;;

	-z*)	# Different kind of debugging -- just mail locally
		address="$localaddress"
		shift ;;

	-l*)	# Show local groups only
		newsgroups=$localgroups
		address="$hpaddress $localaddress"
		shift ;;

	*)
	    	echo "Usage: arbitron [-c #] [-x] [-z] [-l]"
	    	exit 1 ;;
    esac
done

echo `TZ=GMT0 date` $cutoff > $nfseq

# Cruise everyone's home directory to see if they have a .nfseq file.  If
# so, then assume they're a notes reader - count them as such, and save a
# copy of the .nfseq file for subsequent crunching.  Users with a shell not
# ending in "sh" are skipped, as are those outside lowUID <= uid <= highUID.

for homedir in `awk -F: '
$3 >= '$lowUID' && $3 <= '$highUID' && $7 ~ /sh/ { print $6 }
' < /etc/passwd`

do
    totalusers=`expr $totalusers + 1`

    if [ -r $homedir/.nfseq ]
    then
	cat $homedir/.nfseq >> $nfseq
	notesusers=`expr $notesusers + 1`
    fi
done

echo "Host        $hostname"		>  $arbout
echo "Users       $totalusers"		>> $arbout
echo "NetReaders  $notesusers"		>> $arbout
echo "ReportDate  `date`"		>> $arbout
echo "SystemType  $arbversion"		>> $arbout

awk '
# Arbitron script to find out how many notesfiles users have read each group
# within the previous two weeks.  The first line of input to this awk script
# should be the current date in the form "Wed Oct 22 14:00:00 CDT 1986 9",
# where the 9 is the cutoff value below which notesfiles should not be reported.
# The rest should be the concatenation of all the ~/.nfseq files on the system.
#
# Sample .nfseq entry:
# hp.graphics: Tue, 24 Nov 1987 19:43:09 GMT

BEGIN	{
    FS=" ";
    split ("Dec Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec Jan", months, " ");
    true=1;
    false=0;
}

##############  Check the form of the output which your 'date' command
#            #  produces, and set the parameter numbers accordingly.
# CHECK HERE #  Then set 'cutoff' to the parameter right after the last
#            #  word of the 'date' output (since the cutoff number will
##############  follow the 'date' output in the input line).

# Parse: "Wed Oct 22 14:00:00 CDT 1986 cutoff#" -- BSD 4.3
NR == 1 {
    now_dayofmonth=$3;
    now_month=$2;
    now_year=$6;
    cutoff=$7;
}

($1 ~ '$newsgroups') && (NR > 1) {
    nfname = substr ($1, 1, length($1)-1);
    dayofmonth = $3;
    month = $4;
    year = $5;
    
#   Innocent until proven guilty.

    recent = false;

#   If months and years are the same, then if days are within 14 of each other
#   then this nf has been read recently.

    if ((month == now_month) && \
	(year == now_year) && \
	((dayofmonth+14) >= (now_dayofmonth+0))) {
	    recent = true;
	}
    else {
    
#   If not, then it is a bit harder to figure out.  First,
#   Find which month this is so we can compare consecutive months.

	for (i=2; i<14; i++) {
	    if (month == months[i]) {
		monthindex=i;
		break;
	    }
	}

#   Now, if the current month is the one following the month in this
#   sequencer entry, then if the lastmonth date is greater than the
#   currentmonth date (i.e. they are within approx 15 days of each
#   other) and the years are the same, then declare this recent.

	if ((now_month == months[monthindex+1]) && \
	    (dayofmonth >= now_dayofmonth) && \
	    (year == now_year)) {
		recent = true;
	    }

#   Similar to the above, but for the special case of year-end where
#   now_year and year are different.

	if ((now_month == "Jan") && \
	    (month == "Dec") && \
	    (dayofmonth >= now_dayofmonth) && \
	    ((year + 1) == now_year)) {
		recent = true;
	}
    }

#   If this notesfile has been read recently, then increment the
#   readership count for it.  Otherwise, skip it.

    if (recent == true) {
	active[nfname] = active[nfname] + 1;
    }
}

#   Print the summary readership information - ignore any notesfiles
#   with less readers than "cutoff" (default 1).

END	{
    for (nf in active) {
	if (active[nf] >= cutoff) \
	    printf "%d %s\n", active[nf], nf;
    }
}
' < $nfseq | sort -n	-r >> $arbout

# Ship off the output to wherever it goes...

$deliver $address < $arbout

rm -f $arbout $nfseq
