#!/usr/bin/perl -w
use Socket;
use strict;

my $word = $ARGV[0] or die "Usage: mw word\n";
my $host = "proxy.kla-tencor.com";
my $port = 80;
my $socketaddr;
my $content = "jump=$word"; # This seems to work with white space in word
my $wholePage = "";
my $form = "";
my $buf = "";
my @listvalue = ();
my @option = ();
my $selections = 0;
my $count = 0;

while ($content) {

openSock();
post ($content);

$wholePage = "";
while ( <SOCK> ) {
	$wholePage .= $_;
}
close SOCK;

$wholePage =~ /(<form .*<\/form>)/gs;
$form = $1 or die "Can not find the word\n";

# this is heavy duty kludge, geared toward www.m-w.com, needs maintenance
# find out if the form has a selection of options
$selections = 0;
if ($form =~ s/^To view.*?GO TO.$//m) {
	$selections = 1;
	@option = ($form =~ /^<option.*>(.*)$/mg);
	@listvalue = ($form =~ /name=list value="(.*)">/g);
}

# convert html into something more readable
$form =~ s/<br>/\n/g;       # change html linebreak to newline
$form =~ s/<option.*?\n//mg;# delete the selection list, to be shown later
$form =~ s/<[^>]*>//g;      # delete all the other html tags
$form =~ s/&gt;/>/g;        # make visible the greater-than sign
$form =~ s/&lt;/</g;        # make visible the less-than sign
$form =~ s/&amp;/&/g;       # make visible the less-than sign
$form =~ s/\n+/\n/g;       # delete multiple newlines
print $form;
print "\n";

# prompt the user for further actions: look up another word or stop here
$content = "";
if ($selections) {
	print "Here are the related words:\n";
	for (my $i=0;$i<@option;$i++){
		print "$i: $option[$i]\n";
	}
	print "\nEnter a number to select from the list, or enter . to quit\n";

	$buf = <STDIN>; # don't know how to use "read"
	chomp $buf;
	if ($buf eq '.') {
		$content = "";
	}
	elsif ($buf !~ /\d/ or $buf >= @option) {
		print "What did you just do?\n";
		$content = "";
	}
	else{
		$content = "hdwd=$word&book=Dictionary&jump=";
		$content .= urlencode ($option[$buf]);
		$content .= "&list=";
		$content .= urlencode ($listvalue[0]);
	}
} # end of if selections

} # end of while content


###########
# subroutine: open a socket at SOCK
###########

sub openSock {
$socketaddr= sockaddr_in $port, inet_aton $host or die "Bad hostname\n";
socket SOCK, PF_INET, SOCK_STREAM, getprotobyname('tcp') or die "Bad socket\n";
connect SOCK, $socketaddr or die "Bad connection\n";
select((select(SOCK), $| = 1)[0]);
}


###########
# subroutine: urlencode a string
###########

sub urlencode {

my $ask = shift @_;
my @a2 = unpack "C*", $ask;
my $s2 = "";
while (@a2) {
    $s2 .= sprintf "%%%X", shift @a2;
}
return $s2;

}


###########
# subroutine: send post request to target web site
###########

sub post {

my $content = shift @_;
print SOCK "POST http://www.m-w.com/cgi-bin/dictionary HTTP/1.0\n";
print SOCK "Content-type: application/x-www-form-urlencoded\n";
my $contentLength = length $content;
print SOCK "Content-length: $contentLength\n";
print SOCK "\n";
print SOCK "$content";
}

=head1 NAME
Save this file to "mw", which stands for merriam-webster, then you can run it as
"mw word" or "perl mw word"

=head1 DESCRIPTION
a simple web robot to look up a word from Merriam-Webster site using POST 
method, and print the text response to STDOUT. 

=head1 README
A special-purpose simple script that looks up a word from Merriam-Webster site.
This script only uses Socket and no other external modules or packages, and it 
demonstrates the use of POST method to submit a FORM. However, the specific use 
of this script is limited to talking to www.m-w.com, and the fact that many 
parameters are hard-coded makes it dependent on the stability of that web site. 
Nonetheless, since everything is explicitly written, it is very easy to manually
change those hard-coded strings

=head1 PREREQUISITES
requires strict module and Socket module

=head1 SCRIPT CATEGORIES
Web

=cut