# iffincidences.awk
#
#/*****************************************************************************
#                Copyright Carnegie Mellon University 1992
#
#                      All Rights Reserved
#
# Permission to use, copy, modify, and distribute this software and its
# documentation for any purpose and without fee is hereby granted,
# provided that the above copyright notice appear in all copies and that
# both that copyright notice and this permission notice appear in
# supporting documentation, and that the name of CMU not be
# used in advertising or publicity pertaining to distribution of the
# software without specific, written prior permission.
#
# CMU DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
# CMU BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
# ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
# SOFTWARE.
#*****************************************************************************/
#
#
# Gawk(1) program to print number of arrows incident on each box.
#
# $Header: iffincidences.awk,v 1.2 91/10/24 17:03:02 heydon Exp $
#
# Written by Allan Heydon for the Miro project at Carnegie Mellon
#
# SYNTAX
#   gawk -f iffincidences.awk [ +u [num1] ] [ +f [num1] ] [ +n|+N ] file
#
# SYNOPSIS
#   Process the IFF file 'file' (or the standard input if 'file' is given as
#   "-") representing an instance picture, and print the number of arrows
#   incident on each box; counts are given for each arrow type and in total.
#
#   By default, incidence information is printed for every box. If "+u[num1]"
#   is specified, only user boxes having >= 'num1' arrows (in total) incident
#   on them are printed. If 'num1' is omitted, *no* user boxes are considered.
#   The "+f[num2]" option is treated similarly with respect to file boxes.
#
#   Also by default, the boxes are identified by sysname. If "+n" is
#   specified, the names of user boxes are printed, and the names of file
#   boxes are printed. If "+N" is specified, the *full* pathnames of the file
#   boxes are printed (a full pathname is the concatenation of names of boxes
#   on *some* path from the box to a root).
#
# BUGS
#   The options cannot be specified with a "-" prefix because bawk
#   mysteriously gobbles any of its command-line arguments starting with "-".
#
#   There are restrictions on the input iff file.
#
# FILES
#   ~miro/bin/iffincidences	script using this awk program
#
# SEE ALSO
#   fs2iff(1), iff2ciff(1), iffstats(1)

# GLOBAL VARIABLES ============================================================
#
#   bit error_found		indicates if END code should be skipped
#   int user_cnt		no. of user boxes
#   int user[1-user_cnt]	sysnames of user boxes
#   int file_cnt		no. of file boxes
#   int file[1-file_cnt]	sysnames of file boxes
#   String names[sysname]	name of box with 'sysname'
#   int inc_cnt[sysname,type]	no. of arrows of 'type' incident on 'sysname'
#   int parent[sysname]		sysname of one of the parent boxes of
#				  'sysname'; empty string if 'sysname' a root
#   void arrow_types[type]	array indexed by all arrow types

# PRINT_TYPE_HEADER() =========================================================
#
function print_type_header(  type) {
  if (names_flag) { printf(Name_Fmt,"Name"); } else { printf("Sys_Nm") }
  for (type in arrow_types) { printf(Type_Fmt,type); }
  printf(Type_Fmt "\n", "Total");
}

# FULL_NAME(sysname) ==========================================================
#
function full_name(sysname,  i,sn_path,result) {
  # build sn_path[] array from node up to root
  for (i=1; sysname != ""; i++) {
    sn_path[i] = sysname;
    sysname = parent[sysname];
  }
  # work down to produce the result in the correct order (root first)
  while (--i > 0) { result = result names[sn_path[i]]; }
  return(result);
}

# PRINT_COUNTS(sysname,min,flag) ==============================================
#
function print_counts(sysname,min,flag,  type,total) {
  total = 0;
  for (type in arrow_types) { total += inc_cnt[sysname,type]; }
  if (total >= min) {
    if (names_flag) {
      if (names_flag==2 && flag) { printf(Name_Fmt,full_name(sysname)); }
      else { printf(Name_Fmt,names[sysname]); }
    } else {
      printf(Sysname_Fmt,sysname);
    }
    for (type in arrow_types) { printf(Cnt_Fmt,inc_cnt[sysname,type]); }
    printf(Cnt_Fmt "\n",total);
  }
}

# PRINT_INCIDENCES(cnt,sysnames,min,flag) =====================================
#
# Prints the incidence information for all boxes in the array 'sysnames[]',
# where the array is indexed from 1 through 'cnt' (inclusive), that have a
# total of >= 'min' arrows incident on them. If 'flag' != 0,then the "full"
# names of the boxes are printed; otherwise, only the tail names are printed.
#
function print_incidences(cnt,sysnames,min,flag,  i,sysname) {
  print_type_header();
  for (i=1; i<=cnt; i++) {
    sysname = sysnames[i];
    print_counts(sysname,min,flag);
  }
}

# PRINT_ALL_INCIDENCES() ======================================================
#
# Prints the incidence information for both user and file boxes.
#
function print_all_incidences() {
  if (user_min != "") {
    printf("USER BOXES:\n\n");
    print_incidences(user_cnt,user,user_min,0);
    printf("\n");
  }
  if (file_min != "") {
    printf("FILE BOXES:\n\n");
    print_incidences(file_cnt,file,file_min,1);
  }
}

# SHIFT_ARGV() ================================================================
#
# Left-shifts the contents of ARGV[2] through ARGV[ARGC-1] and then decrements
# ARGC, so long as ARGC-1 >= 2.
#
function shift_argv(i) {
  if (ARGC > 2) {
    ARGC--;
    for (i=1; i<ARGC; i++) { ARGV[i] = ARGV[i+1]; }
  }
}

# BEGIN =======================================================================

BEGIN {
  # initialize global variables when possible
  error_found = 0;
  user_cnt = 0;
  file_cnt = 0;
  user_min = 0;
  file_min = 0;
  names_flag = 0;

  # parse command-line arguments
  while (ARGC >= 1) {
    if (ARGV[1] ~ /^\+/) {		# check for command-line flag
      if      (ARGV[1] ~ /^\+u([0-9])*$/) { user_min = substr(ARGV[1],3); }
      else if (ARGV[1] ~ /^\+f([0-9])*$/) { file_min = substr(ARGV[1],3); }
      else if (ARGV[1] ~ /^\+n$/)         { names_flag = 1; }
      else if (ARGV[1] ~ /^\+N$/)         { names_flag = 2; }
      else {
        printf("Unrecognized command-line flag '%s'.\n",ARGV[1]);
        error_found = 1; exit(1);
      }
    } else {
      break;				# exit loop on non-command-line arg
    }
    shift_argv();
  }

  # print formats
  Sysname_Width = 6;
  Name_Width = (names_flag == 2 ? 40 : 15);
  Col_Width = 5;
  Sysname_Fmt = "%" Sysname_Width "d";
  Name_Fmt = "%-" Name_Width "." Name_Width "s";
  Type_Fmt = " %" Col_Width "." Col_Width "s";
  Cnt_Fmt = " %" Col_Width "d";

  # set field separator so "=", ";" separate fields
  FS = "[ \t]*[;=][ \t]*"
}

# BOX =========================================================================

/^[ \t]*>[ \t]*BOX/ {
  match($1,"^[ \t]*>[ \t]*BOX[ \t]*");
  $1 = substr($1,RLENGTH+1);
  for (i=1; i < NF; i+=2) {
    if      ($i == "role")               { role = $(i+1); }
    else if ($i == "sysname")            { sysname = $(i+1); }
    else if (names_flag && $i == "name") { name = $(i+1); }
  }

  if (names_flag) {
    # strip off ".." chars from name if necessary
    if (substr(name,1,1)=="\"" && substr(name,length(name))=="\"") {
      name = substr(name,2,length(name)-2);
    }
    # assign to names[]
    names[sysname] = name;
  }

  # increment proper count
  if      (role == "user") { user[++user_cnt] = sysname; }
  else if (role == "file") { file[++file_cnt] = sysname; }
  else {
    print "Unknown role: " role;
    error_found = 1; exit(1);
  }
  next;
}

# ARROW =======================================================================

/^[ \t]*>[ \t]*ARROW/ {
  match($1,"^[ \t]*>[ \t]*ARROW[ \t]*");
  $1 = substr($1,RLENGTH+1);
  for (i=1; i < NF; i = i+2) {
    if      ($i == "from") { from = $(i+1); }
    else if ($i == "to")   { to = $(i+1);   }
    else if ($i == "type") { type = $(i+1); }
  }
  inc_cnt[from,type]++;			# increment count for 'from' box
  inc_cnt[to,type]++;			# increment count for 'to' box
  arrow_types[type] = "";		# install 'type' in the array
  next;					# process next input line
}

# INSIDE ======================================================================

names_flag==2 && /^[ \t]*>[ \t]*INSIDE/ {
  match($1,"^[ \t]*>[ \t]*INSIDE[ \t]*");
  $1 = substr($1,RLENGTH+1);
  for (i=1; i < NF; i = i+2) {
    if ($i == "parent")        { pindex = $(i+1); }
    else if ($i == "children") { clist = $(i+1);  }
  }
  if (substr(clist,1,1) == "{" && substr(clist,length(clist)) == "}") {
    # split the list of children up to form the "children[]" array
    clist = substr(clist,2,length(clist)-2);
    child_cnt = split(clist,children,",");

    # install "pindex" as each child's parent
    for (i=1; i <= child_cnt; i++) { parent[children[i]] = pindex; }
  } else {
    print "Children string '" clist "' of improper form.";
    error_found = 1; exit(1);
  }
  next;
}

# END =========================================================================

END {
  if (!error_found) { print_all_incidences(); }
}
