#! /usr/bin/perl -w
#
#	Search for Jerome in Bede
#		copyright Steve Harris and Paul Early, 2003-05
#		Amherst, MA
use diagnostics;
use strict;

# -------------------------------declarations
my @wdhier; my @wdbeda; my @setpron;
my %wdchecked;
my $a=0; my $b = 1; my $i =0; my $x=0; my $this=0;
my $numwdhier =0; my $numwdbeda=0;
my $abstr = ""; my $tempwd = "";
my $hier=""; my $beda="";

sub abstractword($);
sub checkword($$);
sub checkpron($$);
sub checkabstr($);
sub srchbeda($$);

open RES, "results.txt" or die $!;

# -------------------------------load grammar stuff
open PRON, "pron.txt" or die $!;
        my @pron = ;
        my $numprons = @pron;
close (PRON);

open UNDEC, "undec.txt" or die $!;
	my @undec = ;
        my $numundecs = @undec;
close (UNDEC);

# -------------------------------load Jerome text
if (-e "HIER.txt"){
	open HIER, "HIER.txt" or die $!;
	$hier = ;
}
close (HIER);

if (-e "BEDA.txt"){
        open BEDA, "BEDA.txt" or die $!;
        $beda = ;
}
close (BEDA);
# ------------------------just filled two arrays w texts
# --------now split them into single words

@wdhier = split /s+/, $hier;
  $numwdhier = @wdhier;
@wdbeda = split /s+/, $beda;
  $numwdbeda = @wdbeda;

# --------------------------------------------------------------MAIN
# iterates by distance, ie 10+ away from target word

foreach $tempwd (@wdhier){
	$abstr = abstractword($tempwd);		

  # ------check to see if pair has already been searched
	if ( defined $wdchecked{$tempwd} ){next;}
	else { $wdchecked{$tempwd} = $wdhier[$a + 1]; }
		# --this won't work, value needs to be ref to array
		# or each value will be replaced anew

  # ------iterate ten forward
	for ($b; $b <10; $b++){
	  checkword ($tempwd,$wdhier[$this+$b]) if defined $wdhier[$this+$b];
	}
	$this++;
}

# -----------------------------------ABSTRACTWORD()
sub abstractword($){
	my $thiswd = shift;
	my $numsetpron = 0;

 # ----split pronouns by group
	for ($i; $i < $numprons; $i++){
		@setpron = split /s/, $pron[$i];
		$numsetpron = @setpron;
		for ($x; $x < $numsetpron; $x++){
		  if ($thiswd == $setpron[$x]) {
			return $i+1;
			# -this returns the index of prons to search, no 0 
			}
		}
	}
	$i =0;
 # ----check the undeclinables
	for ($i; $i < $numundecs; $i++){
		if ($thiswd == $undec[$i]){
		 	$i = 0;
			return 99;
                        # -this says search only this term 
			}
	}
	$i=0;
	return 0;
}

# -----------------------------------CHECKWORD()
sub checkword($$){
	my $one = shift;
	my $two = shift;

	if (defined $abstr){
		for ($abstr){
		  $_ == 1 && checkpron(0, $two);
		  $_ == 2 && checkpron(1, $two);
                  $_ == 3 && checkpron(2, $two);
                  $_ == 4 && checkpron(3, $two);
                  $_ == 5 && checkpron(4, $two);
                  $_ == 6 && checkpron(5, $two);
                  $_ == 7 && checkpron(6, $two);
                  $_ == 8 && checkpron(7, $two);
                  $_ == 9 && checkpron(8, $two);
                  $_ == 10 && checkpron(9, $two);
                  $_ == 11 && checkpron(10, $two);
                  $_ == 12 && checkpron(11, $two);
                  $_ == 13 && checkpron(12, $two);
                  $_ == 14 && checkpron(13, $two);
                  $_ == 15 && checkpron(14, $two);
		}
	}
	if ($abstr == 99){checkabstr($two);}
	else { srchbeda($two,0) };
}

# -----------------------------------CHECKPRON()
sub checkpron($$){

        my $thispronset = shift;
        my $otherwd = shift;
	my $c = 0; my $d=0; my $dd=1; 

 

	my @thispron = split /s/, $pron[$thispronset];
	my $numthispron = @thispron;	
	for ($c; $c < $numthispron; $c++){
		srchbeda($thispron[$c],$c);
	}
}

# ------------------------------------CHECKABSTR()
sub checkabstr($){
	my $thisabstr = SHIFT;
	my $thisabstrnum = SHIFT;
	srchbeda(0,0);
}

# ------------------------------------SRCHBEDA()
sub srchbeda($$){
	my $thing = SHIFT;
	my $thisnum = SHIFT;
	my $f=0; my $ff =0;

	foreach (@wdbeda){
		$ff++;
                if ($thing == $_){
                  for ($f; $f <10; $f++){
		  if (defined $wdbeda[$f + $ff]){
                    if ($thing == $wdbeda[$f + $ff]){
                      print RES "\n$thispron[$f] $otherwd";
                    }
		  }
                  }
                }
          }
}