#!/usr/bin/perl -w
#
# parsescr v0.501, Harry Plantinga, Nov. 18, 1998. This program may be 
# distributed and used under the terms of the Artistic License.
#
# 5.01 -- fixed to handle scripture references without passage
#         attribute but with enclosed ref: <scripRef>john 1:1</scripRef>
#      -- fixed handling of references like Psalms 75, 76, and 77
#      -- also added debug output
# 
# parseScr: find all <scripRef>, <scripCom>, or <scripture> elements
# and add parsed=".." attribute based on passage=".." attribute or
# the element contents, if there's no passage attribute.
#
# bugs:  should handle Roman numerals in scripture references. 
# Doesn't handle scripContext right -- context is set for whole file
#
use strict;
my %bookID;		#book name or alias => bookID 
my @bookName;		#gives official book name for bookID
my @chapters;		#gives number of chapters for bookID
my @bookaliases;	#book aliases reverse sorted by length
my $debug=0;

my $input = "";
while (<>) { 
  $input .= $_; 
}
print STDERR "\nparsescr: " . length($input) . " bytes read\n";

&readbookID;		#read in Bible book names, IDs, aliases, chapters
sub bylength { length($b) <=> length($a) }
foreach $b (sort bylength keys %bookID) {
  push @bookaliases, $b;
}

# get default version and book, if any (bug: should really parse through
# and see when defaults apply, not just use them for whole file...)
#
my ($versionDefault) = $input =~ m|<scripContext[^>]*version="(.*?)"|;
my ($bookDefault) = $input =~ m|<scripContext[^>]*passage="(^ )*|;

# first we should copy enclosed scripture passage in scripRef tag to
# passage="..." attribute, if it isn't already there.
$input =~ s|(<scripRef[^>]*)>([^<]*?)(</scripRef>)|$1 passage="$2">$2$3|gsi;
#print $input;
$input =~ s|(<scripRef[^>]*passage="[^>]*?"[^>]*)passage="[^>]*?">|$1>|gsi;
#print $input;

# now parse the passage in the passage attribute and put result in 
# parsed attribute.
$input =~ s#(<(scripRef|scripCom|scripture)[^>]*>)#&parsetag($1)#gse;

print $input;	
exit(0);
  

#-----------------------index subroutines-----------------------

sub parsetag
{
  my $tag = shift;
  print "--------\nIn parsetag: got $tag\n" if $debug;
  my ($passage) = $tag =~ m|passage="(.*?)"|;
  my ($version) = $tag =~ (m|version="(.*?)"|);
  $version ||= $versionDefault || "";
  my $parsed = parse($passage,$version) if $passage;
  $tag =~ s|\s*(/?>)| parsed="$parsed"$1| if $parsed;
  print "Parsetag: returning $tag\n" if $debug;
  return $tag;
}



#-------------------------subroutines------------------
#
# this subroutine finds all of the attributes of the form xxx="yyy" and
# returns them as a reference to a hash.
sub attributes
{
  my $tag = shift;
  my $rec = {};
  my $s;

  while ($tag =~ s|(\w+)="(.*?)"||s) {		#find all attributes
    $rec->{$1} = $2; }

  return $rec;
}
  

#parse a scripref, returning parsed format, 
#(version|book|fch|fv|tch|tv) [; (version|book|fch|fv|tch|tv)*]
sub parse
{
  my $passage = shift;
  my $orig = $passage;
  my $version = shift;
  my ($parsed, $chaps, $chap, $bookcomma);
  print "Parse: got $version $passage\n" if $debug;
  my $bk = $bookName[$bookID{$bookDefault}] if $bookDefault;
  my ($fch, $tch, $fv, $tv);
  $fch=$tch=$fv=$tv=0;

  $passage =~ s/ff//g;			#delete ff in v. 37ff
  $passage =~ s/etc\.?//g;		#delete etc
  $passage =~ s/[\.,]$//;		#delete trailing dot or comma
  while (length($passage)) {
    $passage =~ s/\s*and\s*//;
    $passage =~ s/^III\s*/3/;
    $passage =~ s/^II\s*/2/;
    $passage =~ s/^I\s*/1/ unless $passage =~ m|^Is|; #don't mangle Isaiah
    $passage =~ s/^([1-3])\s+/$1/;

    foreach $b (@bookaliases) {		#search for book among bookaliases
      $chaps ||= 0;
      if ($passage =~ s|^$b||i) {
        $bk = $bookName[$bookID{$b}];
        $chaps = $chapters[$bookID{$b}];
        print "  Found $b => $bookName[$bookID{$b}], $chaps chs\n" if $debug;
      }
    }
    $passage =~ s/^[. ]*//;

    if ($passage =~ s#^(verse|ver|vs|vv|v)s?\.?\s*##) {
      print "Found vv. --  looking for a verse\n" if $debug;
      $chap=$fch;
      }
    if ($passage =~ s#^(chapter|chapt|chap|ch|c)s?\.?\s*##) {
      print "Found chs. --  looking for a chapter\n" if $debug;
      $chap=0;
      }

    print "  Passage remaining: $passage\n" if $debug;
  
    # look for references like 3:16-4:2
    if ($passage =~ s|^\s*(\d+)[:.](\d+)\s*-\s*(\d+)[:.](\d+)\s*||) {
      print "  case 1. ch:v-ch:v\n" if $debug;
      $fch=$1; $fv  = $2; $tch=$3; $tv=$4; }
    
    # look for 3:16-18
    elsif ($passage =~ s|^\s*(\d+)[:.](\d+)\s*-\s*(\d+)\s*||) {
      print "  case 2. ch:v-v\n" if $debug;
      $fch=$1; $fv=$2; $tch=$1; $tv=$3;}

    # look for 3:16
    elsif ($passage =~ s|^\s*(\d+)[:.](\d+)\s*||) {
      print "  case 3. ch:v\n" if $debug;
      $fch=$1; $fv=$2; $tch=0; $tv=0; }

    # look for Phm. 5-7 (one-chapter book)
    elsif ($chaps==1 && ($passage =~ s#^(\d+)-(\d+)\s*##)) {
      print "  case 4. 1ch bk, v-v\n" if $debug;
      $fch=1; $fv=$1; $tch=1; $tv=$2; }

    # look for John 5-7 (expecting a verse, e.g John 3:3,5-7)
    elsif ($chap && ($passage =~ s#^(\d+)-(\d+)\s*##)) {
      print "  case 5 v-v.\n" if $debug;
      $fv=$1; $tv=$2; $tch=$fch; }

    # look for John 3--4 (multi-chapter books)
    elsif ($chaps > 1 && $passage =~ s|^(\d+)--(\d+)\s*||) {
      print "  case 6. ch--ch\n" if $debug;
      $fch=$1; $tch=$2; }

    # look for John ch. 3-4 (multi-chapter books)
    elsif (!$chap && $chaps > 1 && $passage =~ s#^(\d+)--?(\d+)\s*##) {
      print "  case 7. ch. ch-ch\n" if $debug;
      $fch=$1; $tch=$2; }
   
    # John 5 (multi-chapter books, not expecting a verse, e.g. John 3:16; 5
    # then set bookcomma: even if we get a comma, we're expecting a book.
    # e.g. Psalms 75, 76, and 77.
    elsif (!$chap && $chaps > 1 && $passage =~ s#^(\d+)\s*##) {
      print "  case 8. ch\n" if $debug;
      $fch=$1; $fv=0; $tch=0; $tv=0; $bookcomma=1; }
   
    # John 5 (expecting a verse, e.g. John 3:3, 5
    elsif ($chap && ($chaps > 1) && $passage =~ s#^(\d+)\s*##) {
      print "  case 9. v\n" if $debug;
      $fv=$1; $tch=0; $tv=0; $chap=0; }

    # Phm. 3 or Phm. v.3 (one-chapter book)
    elsif ($chaps==1 && $passage=~s#^(\d+)\s*##) {
      print "  case 10. (1ch bk) v\n" if $debug;
      $fch=1; $fv=$1; $tch=0; $tv=0; }

    # otherwise, punt
    else {
      warn "Couldn't parse scripref $orig, near $passage\n";
      print "Couldn't parse scripref $orig, near $passage\n" if $debug;
      return "";
    }

    print "  found $version|$bk|$fch|$fv|$tch|$tv\n" if $debug;
    $parsed .= "$version|$bk|$fch|$fv|$tch|$tv;";
    
    $passage =~ s|^\s*||;
    if ($passage =~ s|^;\s*||) {
      print "Found ';' -- looking for chapter\n" if $debug;
      $fv=0; 
      $chap=0;
    }
    if ($passage =~ s|^,\s*||) {
      if (!$bookcomma) {
        print "Found ',' --  looking for a verse\n" if $debug;
        $chap=$fch;
      }
      else
      {
        print "Found ',', but looking for a chapter anyway\n" if $debug;
        $chap=0;
        $bookcomma=0;
      }
    }
  }
  $parsed =~ s/;$//;
  return $parsed;
}


sub readbookID
{
  open BOOKID, "</home/hplantin/bin/bookID.unl" or die "!$\n";
  my $n;
  while (<BOOKID>) {
    chop;
    s|#.*||;
    next if m|^\s*$|;
#   print "\nRead $_\n";

    my @names  = split /:/, $_;
    my $bkID   = shift @names;
    my $bkname = shift @names;
    my $chaps  = shift @names;
    $bookName[$bkID] = $bkname;
    $chapters[$bkID] = $chaps;
    $bookID{$bkname} = $bkID;
    foreach $n (@names) {
      $bookID{$n} = $bkID; }

#   print "Book ID=$bkID name=$bookName[$bkID]; chaps=$chapters[$bkID]\n";
#   print "Aliases: ";
#   foreach $n (keys %bookID) {
#     print "key $n -> $bookID{$n}\n";
#     print " $n" if $bookID{$n} eq $bkID;
#   }
  }	#end while

  close BOOKID;
}

