#! /bin/sh
# @(#)arbitron	2.4.4	10/20/89
# arbitron -- this program produces rating sweeps for USENET.
#
# Usage: arbitron
#
# To use this program, edit the "configuration" section below so that the
# information is correct for your site, and then run it. It will produce a
# readership survey for your machine and mail that survey to decwrl.dec.com,
# with a cc to you.
#
# To participate in the international monthly ratings sweeps, 
# run "arbitron" every month. I will run the statistics program on the first
# day of each month; it will include any report that has reached it by that
# time. To make sure your site's data is included, run the survey program no
# later than the 20th day of each month.
#
# Brian Reid, DEC Western Research Lab, reid@decwrl.dec.com
# Updated and bugfixed by 
#	Spencer Thomas, U.of Utah
#	Geoff Kuenning, SAH Consulting
# Updated to work with 2.10.1 and older news systems by
#	Lindsay Cleveland, AT&T Technologies/Bell Labs
# Made to work with 16-bit address spaces by
#	Andy Walker, Maths Dept., University of Nottingham, UK
# Nagging Bourne shell bug fixed by
#	Tom Donahue, Rabbit Software Corp
# Various suggestions provided by:
#	Karl Tombre, CRIN, Vandoeuvre France
#	Dan Kolkowitz, Stanford
#	Paul Eggert, Unisys
# Fixes for YP and NN by
#	Dan Kolkowitz, Stanford
#	Brent Chapman, Capital Market Technology
#
# Note that the results of this program are dependent on the rate at which
# you expire news.  If you are a small site that expires news rapidly, the
# results may indicate fewer active readers than you actually have.
#
###########################################################################
# Configuration information. Edit this section to reflect your site data. #
TMPDIR=/tmp
NEWS=/usr/local/lib/news
SPOOL=/usr/spool/news

# Make a crude stab at determining the system type. If your installation has
# only one type of system, you can edit out the "if" statement and just turn
# this into an assignment statement of the correct value.
if [ -d /usr/ucb ]
then
    STYPE="bsd"
else
    STYPE="usg"
fi

# Range of /etc/passwd UID's that represent actual people (rather than
# maintenance accounts or daemons or whatever)
lowUID=5
highUID=9999

# If you aren't running a distributed news system (nntpd & rrn, usually),
# leave NEWSHOST blank. Else set it to the name of the host from which you
# can rcp a copy of the active file.
NEWSHOST=tahoma

# uucp path: {sun, hplabs, pyramid, decvax, ucbvax}!decwrl!netsurvey
#summarypath="netsurvey@decwrl.dec.com $USER"
summarypath="$USER"


# We need to find the uucp name of your host. If this code doesn't work,
# then just put it in literally like this:
#	hostname="decwrl"

case $STYPE in
	bsd) cmd='hostname || uuname -l';;
	sysv)cmd='uname -n || uuname -l || hostname';;
	*)   cmd='uuname -l';;
esac;

hostname=`sh -c "$cmd" 2>&-`

PATH=$NEWS:$PATH
############################################################################
export PATH
# ---------------------------------------------------------------------------
trap exit 1 2 3 15
trap 's=$?; rm -f $TMPDIR/arb.*.$$; exit $s' 0
set `date`
dat="$2$6"
destination="${MAILER-mail} $summarypath"

################################
# Here are several expressions, each of which figures out approximately how
# many people use this machine. Comment out all but 1 of them; pick the one
# you like best. Initially the most universal but least reliable of them is
# uncommented.
PASSWD=$TMPDIR/arb.passwd.$$
(ypcat passwd || cat /etc/passwd) > $PASSWD

# # ###### Scheme #1: fast but usually returns too big a number
nusers=`awk -F: "BEGIN {N=0}\\$3>=$lowUID && \\$3<=$highUID{N=N+1}END{print N}" <$PASSWD`

# # ###### Scheme #2 (works with BSD systems)
#nusers=`last | sed '/^wtmp begins/d; s/ .*//; /^$/d' | sort -u | wc -l`

# # ###### Scheme #3 (works with USG systems)
#nusers=`who /etc/wtmp | sort -u +0 -1 | wc -l`

################################
#
# Set up awk scripts;  these are too large to pass as arguments on most
# systems.
#
# This awk script generates the actual output report.
# We use 'sed' to substitute in the shell variables to save ourselves
# endless hassle trying to find quoting/backslashing problems.
#
# The input to this script consists of two types of lines (pre-sorted):
#
#	(1) Active-file lines.  These have four fields:  newsgroup name,
#	    first existing article, last article number, 'y' or 'n'
#	    to allow/disallow posting.
#			mod.mac 00001 00001 y
#
#	(2) .newsrc-derived lines.  These have three fields:  the newsgroup
#	    name, the user name and the articles-read information.  The latter
#	    can be arbitrarily complex.  It can also be arbitrarily long;
#	    this can potentially break either awk or sed, in which
#	    case the script will not work.
#			mod.map joe 1-199
#
#	The script uses the type 1 lines to define the newsgroups
#	and their active article ranges.  The .newsrc (type 2) lines are
#	then used to deduce which users are reading that group (a group
#	is being read if the last article seen is in that group's active
#	article range).
#
sed "/^#/d
     s/NUSERS/$nusers/g
     s/HOSTNAME/$hostname/g
     s/DATE/$dat/g" > $TMPDIR/arb.fmt.$$ << 'DOG'
# makereport -- utility for "arbitron". Early versions were copied from a
# similar script distributed with "subscribers.sh" by Blonder, McCreery, and
# Herron.
#
	BEGIN	{ rdrcount = 0 ; reader = "" ; grpcount = 0 ; realusers = 0}
#
# Active file line:  dispose of previous group (if any), record group, and
# record first and last article numbers.  Set group's reader count to none.
	NF == 4 { if (grpname != "") {
			printf("%d %s\n",grpcount, grpname)
		  }
		  grpname = $1
		  grpfirst = $3
		  grplast = $2
		  grpcount = 0
		}
#
# .newsrc line.  Break out the final number, which is the last article that
# has actually been read.  This is a pretty good indicator of the person's
# true interest in the group.  If 'lastread' for the group is a current
# (unexpired) article, record a reader for that group.  Finally, record
# the user as a "real" user of the news system.
#
	NF == 3 { if ($1 != grpname) next;
		  n1 = split($3, n2, "-")
		  n3 = split(n2[n1], n4, ",")
		  lastread = n4[n3]
	if ((grpfirst != grplast) && (lastread >= grpfirst) && (lastread <= grplast)) {
			grpcount++
			if (realuser[$2] != 1) {
			    realuser[$2] = 1
			    realusers++
			}
		  }
		}
#
# End of file.  Print the report in 2 columns.
	END	{ printf("9999 Host\t\t%s\n","HOSTNAME")
		  printf("9998 Users\t\t%d\n",NUSERS)
		  printf("9997 NetReaders\t%d\n",realusers)
		  printf("9996 ReportDate\t%s\n","DATE")
		  printf("9995 SystemType\tnews-arbitron-2.4\n")
# For reorganized network, report a group even if nobody reads it. This will
# help us keep track of where the groups propagate.
		  printf("%d %s\n",grpcount, grpname)
		}
DOG

cat >$TMPDIR/arb.pwd.$$ <<'MOUSE'
BEGIN	{ seen["/"]=1; seen[""] = 1; }
	{ if (seen[$6]!=1) {
		printf("if [ -r %s/.nn/rc ] ; then ", $6)
		printf("sed -n '/^+/s/^. \([0-9]*\) \(.*\)/\2 %s \1/p'",$1)
		printf(" <%s/.nn/rc;\n",$6)
		printf("elif [ -r %s/.newsrc ] ; then ", $6)
		printf("sed -n '/: [0-9]/s/:/ %s/p' <%s/.newsrc; fi\n",$1,$6)
		seen[$6]=1;
	  }
}
MOUSE

# First, make sure we have an active file
if [ -z "$NEWSHOST" ]
then ACTIVE=$NEWS/active
else ACTIVE=/tmp/arb.active.$$
     rcp $NEWSHOST:$NEWS/active $ACTIVE
fi

if [ ! -s $ACTIVE ]
then
    echo arbitron: ACTIVE file missing or empty. Cannot continue. >&2
    (exit 1); exit
fi

# Next, get the list of .newsrc files with duplicates and unreadable files
# removed.
awk -F: -f $TMPDIR/arb.pwd.$$ <$PASSWD | sh >$TMPDIR/arb.tmp.$$

# Check to make sure that we found some
if [ -s $TMPDIR/arb.tmp.$$ ]
then # See if "active" file has 4 fields or only two (pre-2.10.2)
     set `sed 1q < $ACTIVE`
     if [ $# -eq 2 ]
     then egrep  '^[a-z][-0-9_a-z]*\.' $ACTIVE |
	  while read group last
	  do dir=`echo "$group" | sed 's;\.;/;g'`
	     first=`ls $SPOOL/$dir | grep '^[0-9]*' | sort -n | sed 1q`
	     case $STYPE in
		usg) echo "$group $last ${first:-$last} X";;
		  *) echo "$group $last ${first-$last} X"
	     esac
	  done
     else egrep '^[a-z][-0-9_a-z]*\.' $ACTIVE
     fi |
     sort - $TMPDIR/arb.tmp.$$ |
     awk -f $TMPDIR/arb.fmt.$$ |
     sort -nr |
     sed '/^$/d
	  s/^999[0-9] //' |
     $destination
else echo arbitron: Unable to find any readable .newsrc files >&2
     (exit 1); exit
fi


