#!/usr/local/bin/perl -w
use strict;
my ($head, $dc, %dc, %info);

my $input = "";
while (<>)
  { $input .= $_; }
$_ = $input;
m|<DC>(.*?)</DC>|s; $dc = $1;
m|(.*?</ThML.head>)|s; $head = $1;


my ($publisherID, $authorID, $bookID, $version);
$head =~ m|<publisherID>(.*?)</publisherID>|; $publisherID = $1;
$head =~ m|<authorID>(.*?)</authorID>|; $authorID = $1;
$head =~ m|<bookID>(.*?)</bookID>|; $bookID = $1;
$head =~ m|<version>(.*?)</version>|; $version = $1;
die "Missing publisherID, authorID, or bookID\n" 
	unless $publisherID and $authorID and $bookID;
my $url = "http://$publisherID/$authorID/$bookID.htm";

my $filename=">dbimport.sql";
print STDERR "Opening $filename...\n";
open OF, $filename or die $!;

&book();
&bib($dc);
&index($input);
&scripRef($input);
&scripCom($input);
&hymn($input);

close OF;
print "Database import file: $filename\n";

exit(0);


sub book
{
  print OF "delete from book where publisherID='$publisherID' and \n";
  print OF "	authorID='$authorID' and bookID='$bookID';\n\n";
  print OF "insert into book values('$publisherID', '$authorID',";
  print OF "'$bookID', DATE(TODAY));\n\n";
}
  
#--------------------------------------------------------------------
# bib: output bibliographic information about the book and authors
sub bib
{
  $_ = shift;
  my ($elt, $atts, $content, $scheme, $sub);
  while (s|<DC\.(\S+)(.*?)>(.*?)</.*?>||){
    ($elt, $atts, $content) = ($1, $2, $3);
    $scheme = $sub = "";
    $scheme = $1 if $atts =~ m/scheme="(.*?)"/; 
    $sub = $1 if $atts =~ m/sub="(.*?)"/; 
    $elt .= ".$sub";
    next unless $content;
#   print "--processing elt=$elt sub=$sub scheme=$scheme content=$content\n";

    print OF "insert into table DCelt values(\n";
    print OF qq!	"$publisherID",		#publisherID\n!;
    print OF qq!	"$authorID",	#authorID\n! ;
    print OF qq!	"$bookID",	#bookID\n! ;
    print OF qq!	"$elt",	#element\n! ;
    print OF qq!	"$scheme",		#scheme\n! ;
    print OF qq!	"$content");	#contents of element\n\n! ;
  }


my ($firstauthor, $firstauthorID);
$SIG{__WARN__} = sub {};	#turn warnings off

# now write insert rows in "author" table -- authors, translators, editors
  my ($s,$t, @author); 
  foreach $s (split(/\|/, $info{'author'})) {
    push @author, ($s, "author"); }
  foreach $s (split(/\|/, $info{'translator'})) {
    push @author, ($s, "translator"); }
  foreach $s (split(/\|/, $info{'editor'})) {
    push @author, ($s, "editor"); }

$SIG{__WARN__} = sub {warn $_[0];};	#turn warnings back on
  while ($s=shift(@author)) {
    $t = shift(@author);
#   print "Found $s--$t\n";
    print OF "\ninsert into author values(\n";
    print OF qq!	"$publisherID",		#publisherID\n!;
    print OF qq!	"$authorID",	#authorID\n!;
    print OF qq!	"$bookID",	#bookID\n!;
    print OF qq!	$t,	#authorType\n!;
    print OF qq!	select personID from personAuth where\n!;
    print OF qq!		name="$s"); #authorID\n!;
  }
}


sub index
{
  $_ = shift;

  print "In index\n";
  while (s|(<index[^>]*type="globalSubject".*?>)||si)
  {
    print "Index: found $1\n";
    my ($subject1, $subject2, $subject3, $subject4);
    $subject1=$subject2=$subject3=$subject4="";
    my $idx = $1;
    my $id = $1 if $idx =~ m|id="(.*?)"|i;
    my $title = $1 if $idx =~ m|title="(.*?)"|;
    $title ||= $id;
    $subject1 = $1 if $idx =~ m|subject1="(.*?)"|;
    $subject2 = $1 if $idx =~ m|subject2="(.*?)"|;
    $subject3 = $1 if $idx =~ m|subject3="(.*?)"|;
    $subject4 = $1 if $idx =~ m|subject4="(.*?)"|;
    $subject1 ||= "";
    $subject2 ||= "";
    $subject3 ||= "";
    $subject4 ||= "";
    print OF "\nInsert into subject values(\n";
      print OF qq!	"$publisherID",		#publisherID\n!;
      print OF qq!	"$authorID",	#authorID\n!;
      print OF qq!	"$bookID",	#bookID\n!;
      print OF qq!	"$url|$id",	#URL\n!;
      print OF qq!	"$id",	#id\n!;
      print OF qq!	"$title",	#title\n!;
      print OF qq!	"$subject1",	#subject1\n!;
      print OF qq!	"$subject2",	#subject2\n!;
      print OF qq!	"$subject3",	#subject3\n!;
      print OF qq!	"$subject4"	#subject4\n!;
      print OF "	);\n";
  }
}


sub scripRef
{
  $_ = shift;

  while (s|.*?(<scripRef[^>]*parsed="([^>]*?)"[^>]*?)>||s)
  {
#   print OF "\n\n------\nIn scripRef -- found $1 $2\n";
    my $ref = $1;
    my $parsed = $2;
    my $id = $1 if $ref =~ m|id="(.*?)"|;

    my $r;
    foreach $r (split /;/, $parsed) {
      print OF "\ninsert into scripRef values (\n";
      print OF qq!	"$publisherID",	#publisherID\n!;
      print OF qq!	"$authorID",	#authorID\n!;
      print OF qq!	"$bookID",	#bookID\n!;
      print OF qq!	"$url|$id",	#URL\n!;
      $r =~ s/\|/", "/;
      $r =~ s/\|/", /;
      $r =~ s/\|/, /g;
      print OF qq!	"$r,	#reference\n!;
      print OF "	);\n";
    }
  }
}


sub scripCom
{
  $_ = shift;

  while (s|.*?(<scripCom[^>]*parsed="([^>]*?)"[^>]*?)>||s)
  {
#   print OF "\n\n------\nIn scripCom -- found $1 $2\n";
    my $ref = $1;
    my $parsed = $2;
    my $id = $1 if $ref =~ m|id="(.*?)"|;
    my $title = $1 if $ref =~ m|title="(.*?)"|;
    $title ||= $id;
    my $type = $1 if $ref =~ m|type="(.*?)"|;
    $type ||= "commentary";

    my $r;
    foreach $r (split /;/, $parsed) {
      print OF "\ninsert into scripCom values (\n";
      print OF qq!	"$publisherID",	#publisherID\n!;
      print OF qq!	"$authorID",	#authorID\n!;
      print OF qq!	"$bookID",	#bookID\n!;
      print OF qq!	"$url#$id",	#URL\n!;
      print OF qq!	"$title",	#title\n!;
      print OF qq!	"$type",	#type\n!;
      $r =~ s/\|/", "/;
      $r =~ s/\|/", /;
      $r =~ s/\|/, /g;
      print OF qq!	"$r,	#reference\n!;
      print OF "	);\n";
    }
  }
}


#find hymn-related info
sub hymn 
{
}


#------------------------------------------------------
#
# this subroutine gets the info out of the header 
# and puts it into a hash, $info{$key}.
# Repeated entries are separated with a vertical bar(|).
# In the DC record, subelements are added to the element name
# and scheme is added after a "#".
#
sub getInfo
{
  $_ = shift;
  my ($elt, $att, $cont);
  s/<\/?(generalInfo|electronicEdInfo|printSourceInfo|DC)\>//g;
 
  while (s|<(\S*)([^>]*)>([^<]*)</\1>||s)
  {
    ($elt, $att, $cont) = ($1, $2, $3);
    if ($att =~ m/sub="(.*?)"/) 
      { $elt .= ".$1"; }
    if ($att =~ m/scheme="(.*?)"/) 
      { $elt .= "#$1"; }
    if ($info{$elt})
      { $info{$elt} .= "|$cont"; }
    else
      { $info{$elt} = $cont; }
#   print "  $elt -> $info{$elt}\n";
  }
}

sub parseDC
{
  $_ = shift;
  my ($element, $atts, $content, $scheme, $sub);
  print "In parseDC: parsing\n$_\n";
  while (s|<DC\.(\S+)(.*?)>(.*?)</.*?>||){
    ($element, $atts, $content) = ($1, $2, $3);
    $scheme = $sub = "";
    $scheme = $1 if $atts =~ m/scheme="(.*?)"/; 
    $sub = $1 if $atts =~ m/sub="(.*?)"/; 
    print "--processing $element sub=$sub scheme=$scheme content=$content\n";
  }
}
  

