#!/usr/bin/perl -w
# 
# thm2htm,  Harry Plantinga.  This program may be copied
# under the terms of the Artistic License.
#
# This script is a (second) attempt at ThML to HTML conversion.
#   (I still hope that this will be done eventually with an XSL stylesheet.)
#
# v0.32, 00-03-11  Bug fixes
# v0.31, 00-03-04  Incorporate Skip Gaede's table improvements and date
#   improvement.  Fix name computation bug.
# v0.30, 00-03-01  Make a dbmfile/hash to store the id -> filename 
#   mapping, since now the filename can't be derived from ID. Add the
#   filebreak="nnn" attribute to <divn> elements. Use filebreak="0" to
#   force a break after a div, filebreak="999999" to prevent one, or some
#   other value to set a minimum div size.
# v0.29, 00-02-21  Arrow keys to navigate in IE; fixed bugs; went 
#   back to putting all the files in one directory (htm); start at
#   "TitlePage.htm" if it exists -- else TOC.htm; make a hash of
#   filenames for each ID and store in idfile dbm file. Use javascript
#   /ss/util.js script. Move ThML10.css to ss directory.
# v0.28, 00-02-15  Added page icons; expandable outline; don't put
#   really small sections into separate files
# v0.27, 99-11-23  Add search; put "about" page (as index.html) in 
#   current directory and other files down one level; better doc titles;
#   metadata in index.html file; create il and nl files containing link info
# v0.26, 99-11-04  Changes from Stephen Hutcheson rolled in -- dumb
#   down mdash and ndash entities; footnote enhancements
# v0.25, 99-11-03  Various bugfixes; meta tags in HTML.  This version
#   handles stylesheets in a way that Netscape understands.
# v0.24, 99-01-30  Fixed various bugs to make it work better with Voyager
#   version of DTD
# v0.23, 99-01-12  Fixed bug that prevented scriprefs in footnotes from
#   being linked to bible gateway.
# v0.23, 99-01-02  Modified to work with ThML0.99 and DC header
# v0.22, 12/7/98.  Modified to work with new, ccel-style URLs
# v0.21, 12/1/98.  Modified to use the division ID as its name, rather
#   than trying to recompute what the ID should be.  Hopefully this
#   will make it work when div elements have manually-inserted ids.
# v0.2, 11/25/98. This version makes a number of unspecified improvements.
#   The program works reasonably well for one or two files, but it has 
#   not been tested very well yet and needs added features.
#
use strict;
use ThMLutil;
use Getopt::Long;

my ($bookID, $author, $authorID, $publisherID, $version, $title, $input);
my ($head,$body,$prev,$filename,$notenum,$footnotes,$rights, $status);
my ($DCpublisher, $DCdate, $URL, $description, $meta);
my ($digitized, $typed, $thml, $display, %idfile, $id);
my ($debug, $MinDivSizeDefault, $ccelhacks);
my $unknowns = 0;
my $divid = "1";

GetOptions('debug' => \$debug,			# turn on debugging
           'ccel' => \$ccelhacks,		# turn on ccel hacks
	   'size=i' => \$MinDivSizeDefault);	# set min div size default

$MinDivSizeDefault ||= 3000; # -snnnn to set mindivsize
$debug ||= 0;
$ccelhacks ||= 0;


while (<>) 				#read entire file into $input
  { $input .= $_; }
$_ = $input;
die "Uuurrgh! empty input file\n" unless $input;
print "\nthm2htm: " . length($input) . " bytes read\n";

$input =~ s|<deleted.*?</deleted>||gsi;	#delete deleted stuff
$input = &dumbquo($input);		#dumb down quotes, etc for HTML

&getInfo;			#get author, title, etc out of header
mkdir ("htm",0777) unless -e "htm"; #make directory for html
dbmopen %idfile, "htm/idfile", 0666 or die "Can't open idfile dbm file-$!\n";

$input =~ s|(href=")#(.*?)(\.p.*?")|$1$2.htm#$2$3|gsi;		#fix URIs
$input =~ s|(<pb\s*n="([^"]*)"[^>]*>)|&pageref($1,$2)|gsie;	#page images
$input =~ s|(<index[^>]*>)(.*?)(</index>)|&linkIndex($1,$2,$3)|gsie; #index

# separate out head and body
$input =~ m|^(.*</ThML.head\s*>).*?(<ThML.body\s*>.*)$|si;
$head = $1; $body = $2;
die "Hey -- there's no <div1> element.\n" . 
    "You should be using thm2htm-nodiv instead.\n" if ($body !~ m|<div1|);


&processHead($head);		#process ThML.head -- make info page
&processBody($body);		#process ThML.body -- make web
&makeLinks if $ccelhacks;	#make il, nl link files

print "HTML starts at \"htm/TitlePage.htm\"\n";
print "$unknowns id mappings were unresolved: " .
        "it might help to re-run this program.\n" if $unknowns;
exit(0);


#-------------------------subroutines----------------------------
#
# This subroutine converts ccel-style URIs to URLs that will work
# with the multiple htm files generated by this program.
# The URIs that are not to this authorID/bookID are left in place.
# The can be interpreted on the fly by the CCEL server (or perhaps 
# converted by another program reading the idfile dbm to get the 
# filename).
#
sub uri2url
{
  my $uri=shift;
  my $url;
# print "Processing $uri\n";
  if ($uri =~ s@(/ccel/$authorID/$bookID.htm)?\|@@) {
    $url = $idfile{$uri} || "_unknown";
    $unknowns++ if $url eq "_unknown";
    print "  Couldn't find ID $uri\n" if $url eq "_unknown";
    $url .= "#$uri";
  } else {
    $url = $uri;
  }
# print "  returning $url\n";
  return "href=\"$url\"";
}


# get some important info from the header: 
# title, author, bookID, authorID, etc.
#
sub getInfo
{
  m|<DC.Title.*?>(.*?)</DC.Title\s*>|is; $title = $1 || "";
  m|<DC.Creator.*?>(.*?)</DC.Creator\s*>|is; $author = $1 || "";
  m|<DC.Rights.*?>(.*?)</DC.Rights\s*>|is; $rights = $1 || "";
  m|<DC.Publisher.*?>(.*?)</DC.Publisher\s*>|is; $DCpublisher = $1 || "";
  m|<DC.Date.*?>(.*?)</DC.Date\s*>|is; $DCdate = $1 || "";
  m|<DC.Identifier.*?>(http.*?)</DC.Identifier\s*>|is; $URL = $1 || "";
  $bookID=$1        if m|<bookID\s*>(.*?)</bookID\s*>|is; 
  $authorID=$1      if m|<authorID\s*>(.*?)</authorID\s*>|is; 
  $publisherID=$1   if m|<publisherID\s*>(.*?)</publisherID\s*>|is; 
  $version=$1       if m|<version\s*>(.*?)</version\s*>|is; 
  $status=$1        if m|<status\s*>(.*?)</status\s*>|is; 
  $description=$1   if m|<description\s*>(.*?)</description\s*>|is;
  $description||=$1 if m|<DC.description\s*>(.*?)</DC.description\s*>|is;
  $description||="";
  print "Making HTML version of $title by $author\n";
}
  

#
#  Make an info page out of the ThML.head information
#
sub processHead
{
  $_ = shift;

#
#output the stylesheet which is common for all sections of this doc
#
  my $name=">htm/styles.css";
  open STYLES, $name or die $!;
  my $styles="";
  $styles = $1 if s|<style.*?>(.*?)</style\s*>||is;
  s|<link.*/?>||gis;
  print STYLES $styles;
  close STYLES;

#
# create the About.htm file, which has info about the book.
# It should have meta tags, doc info, links to other formats.
#
$name=">htm/About.htm";
open INFO, $name or die $!;

my $infosect = "
<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\"
    \"http://www.w3c.org/TR/REC-html40/loose.dtd\">
<html><head>
<title>$title</title>
<link rel=\"stylesheet\" type=\"text/css\" href=\"/css/ThML10.css\">
<link rel=\"stylesheet\" type=\"text/css\" href=\"htm/styles.css\">
==metadata==
</head><body>
<h1 class=\"title\">$title</h1>
<h3 class=\"subhead\">by</h3>
<h2 class=\"subhead\">$author</h2>
<h3 class=\"subhead\"><i>CCEL Edition v$version</i></h3>
<p>&nbsp;</p>
$description
<hr>
<h3 class=\"subhead\"><a class=\"TOC\" href=\"TOC.htm\">Table 
  of Contents</a></h3>
<p style=\"text-align: center\">
<form method=\"get\" action=\"/cgi-bin/htsearch\"> 
<input type=\"hidden\" name=\"config\" value=\"htdig\">
<input type=\"hidden\" name=\"restrict\" value=\"/$authorID/htm\">
<b>Search</b> <i>$title</i>: &nbsp;
<br><input type=\"text\" size=\"20\" name=\"words\" value=\"\">
Match: <select name=\"method\">
<option value=\"and\">All
<option value=\"or\">Any
</select>&nbsp;&nbsp;&nbsp;
<input type=\"submit\" value=\"Search\">
</form></p>
<hr>
<p><b>Also available:</b>
<ul>
";

  opendir DIR, "." or die "Couldn't open current directory!\n";
  my @files = readdir DIR;
  closedir DIR;
  my $f;			# add a link to non-.htm files in the same
  foreach $f (@files) {		# directory (e.g. xml, thm, txt versions of
    next if $f =~ m/^(il|nl|htm|htm|.|\.|\.\.|t2h)$/;
    next if $f =~ m/^\./;
    next if $f =~ m/old$/;
    next if $f =~ m/_files$/;
    next if $f =~ m/\.[a-g]$/;

    my $size = -s "$f";
    if ($size == 0) { $size = ""; }
    elsif ($size < 1024) { $size = "[$size bytes]"; }
    else {
      $size >>= 10;		#divide by 1024
      $size = "[$size KB]";
    }
    $size = " [directory]" if -d "$f";	# don't report size for directories
    
    $infosect .= "<li><a href=\"../$f\">$f</a> $size\n";
    $infosect .= "(TIFF original page images)\n" if $f eq "tif";
    $infosect .= "(PNG reduced page images)\n" if $f eq "png";
    $infosect .= "(ThML -- Theological Markup Language [base version])\n" 
				if $f =~ m/\.thm$/;
    $infosect .= "(RTF -- word processor version)\n" if $f =~ m/\.rtf$/;
    $infosect .= "(TXT -- text file)\n" if $f =~ m/\.txt$/;
    $infosect .= "(HTML [gnarly, from Microsoft Word 2000])\n" 
                                if $f =~ m/\.htm$/;
  }
  $infosect .= "</ul><hr>\n";

  s|<(.*?)></\1\s*>\s*||gm;  #delete empty <x></x> tags
  retag("!DOCTYPE",	"_detag");
  retag("ThML",	"_detag");
  retag("ThML.head",	"_detag");
  retag("meta",	"_detag");

  #
  # process each of generalInfo, printSourceInfo, elecEdInfo separately
  #
  s@<(generalInfo|printSourceInfo|electronicEdInfo)\s*>(.*?)</\1\s*>@headsect($1,$2)@egs;

  $infosect =~ s/==metadata==/$meta/;
  print INFO $infosect . $_ . "</body></html>\n";
  close INFO;
}


# 
#  Put the Table of Contents and each <divn>...</divn> into a 
#  separate file.
#
sub processBody
{
  $_ = shift;
  $filename = "TOC.htm";
  
  retag('attr',	      'p class="Attribution"', "p");
  retag('argument',   'p class="Argument"', "p");
  retag('meter',      'p class="meter"', "p");
  retag('sectionInfo','p class="sectionInfo"', 'p');
  retag('name',       'span class="Name"', 'span');
  retag('date',       'span class="Date"', 'span');
  retag('unclear',    'span class="unclear"', 'span');
  retag('l',          'p', 'p');
  retag('verse',      'div class="Verse"', 'div');

  my $toc = head("$title - TOC");
  $toc .= 
  	 "<h1 class=\"title\">$title</h1>\n" .
  	 "<h3 class=\"subhead\">by</h3>\n" .
  	 "<h2 class=\"subhead\">$author</h2>\n" .
  	 "<hr><h1 class=\"title\">Table of Contents</h1>\n" .
  	 "<p class=\"TOC1\"><a class=\"TOC\" href=\"About.htm\">" .
       	 "<i>About This Book</i></a></p>\n";
  
  #
  # For each divn tag we add a TOC entry, then put the contents
  # into a separate file.
  #
  my ($oldlevel, $level, $rest, $content, $r, $n, $shortdiv, $add_to_div);
  my ($divID, $divtitle, $nextID, $nexttitle, $next, $fname);
  my ($levelup, $leveldown, $nextlevel);

  $level="0";
  while (m|<div[1-7]|)		# while there is a remaining <div
  {
    $oldlevel = $level;
    if (m|<div[1-7].*?<div[1-7]|s) 	#if there are two divns left
    { 
      s|.*?<div([1-7])(.*?)>(.*?)(<div[1-7])|$4|s; 
      $level = $1;
      $rest = $2;
      $content = $3;
    } 
    else
    { 
      s|.*?<div([1-7])(.*?)>||s; 
      $level = $1;
      $rest = $2;
      $content = $_;
    }

    my $fileBreakSize = $MinDivSizeDefault;
    $fileBreakSize = $1 if $rest =~ m/filebreak="([^"]*)"/i;

    $content = "$add_to_div" . "\n<p>&nbsp;</p>\n" . $content if $add_to_div;
    $shortdiv = (length($content) < $fileBreakSize);
    $n = "" if $level ne $oldlevel;
    $levelup   = ($level < $oldlevel); 
    $leveldown = ($level > $oldlevel); 

    ($divID, $divtitle, $fname) = getName($rest);
    if (!$add_to_div) {		# if not merging this div with the last
        $prev = $filename;	# remember last division
        $filename = $fname;
    }

    $next = "TOC.htm";
    if (m|.*?<div([1-7])(.*?)>|s)
    { 
      $nextlevel = $1;
      ($nextID, $nexttitle, $next) = getName($2);
    }

    # print div tags needed for collapsable outline
    if ($leveldown && $oldlevel ne "0") {
	$display = "block";
	$display = "none" if $level > 2;
        $toc .= "<div id=\"d$divid\" style=\"display:$display\">\n";
	$divid++;
    }
    if ($levelup) {
        my $ol = $oldlevel;
	while ($level < $ol) {
	    $toc .= "</div>\n";
	    $ol--
	}
    }

    $toc .= "<p class=\"TOC$level\"><a class=\"TOC\" href=\"$filename\">";
    $toc .= "$divtitle</a></p>\n";

    if ($shortdiv and ($level<=$nextlevel)) {  # if division is too short and 
        $add_to_div = $content;		       # next div is not lower level
        $add_to_div = "  " unless $add_to_div; # just remember this division
	$idfile{$divID} = $filename;
    } else {				       # else write out file
        $add_to_div = "";
        open OF, ">htm/$filename" or die $!;
        print OF processDiv($content,$divID,$divtitle,$prev,$next, $filename);
        close OF;
    }
  }
  if ($add_to_div) {		#stuff left over -- process it
    open OF, ">htm/$filename" or die $!;
    print OF processDiv($content, $divID, $divtitle, $prev, $next);
    close OF;
  }
  
  $toc .= "\n<hr><p style=\"font-size:11pt\"><a href=";
  $toc .= "\"/info/nav.htm\">Navigation and searching hints</a>\n";
  $toc .= "</body></html>\n";

  # now add expandability to the TOC
  $toc =~ s|(<p class="TOC[^>]*>)(<a class="TOC[^>]*>[^<]*</a></p>\s*<div id="d([^"]*)" style="display:([^"]*)")|$1<a href="javascript:t(d$3,p$3)"><img border=0 src="/pix/$4.png" id="p$3"></a> $2|gsi;
  $toc =~ s/block.png/open.gif/g;
  $toc =~ s/none.png/shut.gif/g;

  open TOC, ">htm/TOC.htm" or die $!;
  print TOC $toc;
  close TOC;

}


#
# handle one divn of the document
#
sub processDiv
{
  my $div = shift; 
  my $divID = shift;
  my $divtitle = shift;
  my $prev = shift;
  my $next = shift;
  my $fn = shift;
  $notenum = 1;
  $footnotes = "";

  my $front = head("$divtitle",$prev,$next);	#construct HTML head
  my $back = "</body></html>\n";
  my $nav = navbar($prev, $next);

  # here we attempt to make a hash of the file containing each ID
  my $div1 = $div;
  $idfile{$divID} = $fn;
  while ($div1 =~ s/^.*?id="([^"]*)"//is) {
    $idfile{$1} = $fn;
#   print "Adding $1 --> $idfile{$1}\n";
  }

  $div =~ s/href="(.*?)"/&uri2url($1)/gsie;	#use real filenames in urls

  # link scripture references to the bible gateway
  $div =~ s|<scripRef(\s+[^>]*parsed="(.*?)"[^>]*)>(.*?)</scripRef>|&bglink($1,$2,$3)|gsie;
  # process footnotes
  $div =~ s|(<note.*?>.*?</note>)|&note($1,$notenum)|gsie;

  $div =~ s|\&line;|<br>|g;		       #change &line; to <br>
  $div =~ s|(<p\s[^>]*>)(</p>)|$1&nbsp;$2|gs;  #add space to blank paragraphs

#sg added
  $div =~ s|(<table[^>]*>)(.*?)(</table>)|$1.&formatTable($2).$3|gsie;
#sg end added 

  #now for something really nasty: lists were generated as 
  #  <li><ul>: the <li> is required in valid HTML4.
  #But it looks terrible, with blank lines where they're not wanted.
  #This hack deletes that extra <li>, resulting in invalid (but better?)
  #HTML.

  $div =~ s/<li>(<ul class="Index)/$1/g;

  return "$front$nav\n$div\n$footnotes$nav$back";
}


#
# process footnotes -- change to linked superscripted number, and
# store up the footnote bodies in $footnotes
#
sub note
{
  my $note=shift;
# print "Processing footnote $note -- number $notenum\n" if $debug;
  $footnotes = "\n<hr class=\"Note\">\n" if $footnotes eq "";

  $note =~ s|</?note[^>]*>||g;
  my $bref="<a class=\"Note\" name=\"_fnf$notenum\" " .
     "href=\"#_fnb$notenum\"><sup class=\"Note\">$notenum</sup></a>";
  $note =~ s|^(<p.*?>)|$1$bref |;
  $note = "\n<div class=\"Note\" id=\"_fnf$notenum\">" . $note . "</div>\n";
  $footnotes .= $note;

  my $fref="<a class=\"Note\" name=\"_fnb$notenum\" " .
     "href=\"#_fnf$notenum\"><sup class=\"Note\">" . 
     $notenum++ . "</sup></a>";

# print "Returning: $fref\n";
  return $fref; 
}

#sg added
# subroutine for cleaning up the formatting of tables with a class
# and valign attribute.
#
sub formatTable($)
{
    my $table = shift;
    $table =~ s|\n||gs;
    my (@rows,@cells,$j,$cols,$caps);
    @rows = split m|</tr>|,$table;
    @cells = split m|</td>|,$rows[1];
    $cols = $#cells+1;
    @cells = split m|</td>|,$rows[0];
    $caps = $#cells+1;
    if (($caps == 1) && ($caps < $cols)) {
        $rows[0] =~ s|<td |'<td colspan="'.$cols.'" '|e;
    }
    $rows[0] .= '</tr>';
    for ($j=1;$j<=$#rows;$j++) {
        $rows[$j] .= '</tr>';
        $rows[$j] =~ 
          s|<td ([^>]*>\s?<p class="TableText")|<td valign="top" $1|gs;
    }
    return join("",@rows);
}
#sg end added
 

#
# link scripture references to bible gateway
#
sub bglink
{
  my ($atts, $parsed, $text) = @_;
  my ($s, $bg, $version, $book, $fch, $fv, $tch, $tv, $id);

# print "in bglink: got atts=$atts parsed=$parsed text=$text\n";
  $atts =~ m|(id=".*?")|; $id=$1;
  $bg="";

  foreach $s (split /;/, $parsed) {
    ($version, $book, $fch, $fv, $tch, $tv) = split /\|/, $s;
    $bg .= $book;
    $bg .= "+$fch" if $fch;
    $bg .= ":$fv" if $fv;
    $bg .= "-" if $tch;
    $bg .= "$tch:" if $tch && $tch ne $fch;
    $bg .= $tv if $tv;
    $bg .= ",";
  }
  $bg =~ s/ //g;
  $bg = "<a href=\"http://bible.gospelcom.net/bible?passage=" . $bg ;
# $version = "Vulgate&language=Latin" if $version eq "VUL";
  $version = "" if $version eq "VUL";	#would like English vulgate...
  $bg .= "&version=$version" if $version;
  $bg .= "\" class=\"scripRef\" $id>$text</a>";
# print "  returning ref $bg\n";
  return $bg;
}


#------------tag-hack subroutines-----------------

#
# Process a head section: generalInfo, printSourceInfo, or electronicEdInfo
# First parameter is tag; second is contents of element
#
sub headsect
{
  my $section = shift;
  my $contents = shift;
  print "Processing $section\n" if $debug;
  my $result = "<h3>Information on the ";
  $result .= "Book" if $section eq "generalInfo";
  $result .= "Print Source" if $section eq "printSourceInfo";
  $result .= "Electronic Edition" if $section eq "electronicEdInfo";
  $result .= "</h3>\n";
  if ($section eq "generalInfo") {
    $result .= "<p class=\"HeadItem\"><b>Copyright</b>: $rights</p>\n";
    $result .= "<p class=\"HeadItem\"><b>CCEL Identifier</b>: $publisherID/";
    $result .= "$authorID/$bookID/$version</p>\n";
    $result .= "<p class=\"HeadItem\"><b>How to Reference This Edition</b>: ";
    $result .= "$DCpublisher, $DCdate, v$version, URL $URL</p>\n";
  }
  $contents =~ s|<([^>]*?)>(.*?)</\1>|&headItem($1,$2)|gse;
  $contents =~ s|<([^>]*?)/>|&emptyHeadItem($1)|gse;

  $result .= $contents . "\n\n";
  return $result;
}


#
#  Process one header element inside generalInfo, etc:
#  Modify element names to make nice titles: 
#    printSourceInfo --> Print Source Info 
#
sub headItem
{
  my $tag = shift;
  my $item = shift;
# print "HeadItem: processing $tag $item\n";
  return "" if $tag eq "bookID"  || $tag eq "authorID" ||
               $tag eq "version" || $tag eq "publisherID";
  return processDC($item) if $tag eq "DC";
  
  my $flowitem = ($tag eq "revisionHistory" || $tag eq "status" ||
                  $tag eq "editorialComments"); 

  $tag =~ s|([A-Z]{2})([a-z])|$1 $2|g;  #space after acronym
  $tag =~ s|([a-z])([A-Z])|$1 $2|g;	#add in spaces
  $tag = ucfirst($tag);			#make first letter upper case

  #tags have a content model of "Flow" may have their own <p>, etc.
  return "<p class=\"HeadItem\"><b>$tag</b>: $item</p>";
}


sub emptyHeadItem
{
  my $tag  = shift;
# print "EmptyHead: $tag\n";
  return "" if $tag =~ m/image/;
  return "<$tag>";
}


sub processDC
{
  my $DC=shift;
# print "Processing DC record: $DC\n" if $debug;

# convert to meta tags for HTML metadata
  my $DC1=$DC; 	
  $DC1 =~ s|</?DC>||gi;
  my ($tag,$content,$scheme);
  while ($DC1 =~ s|<((DC\.\w+).*?)>(.*?)</\2>||si)
  {
      $tag = $1;
      $content = $3;
      $tag =~ s|\s*sub="(\w+)"\s*|.$1|gsi;
      $scheme="";
      $scheme = " $1" if $tag =~ s|\s*(scheme\s*=\s*".*?")\s*||is;
      $meta .= "<meta name=\"$tag\"$scheme value=\"$content\">\n";
  }

  $DC =~ s|text/xml|text/html|;
  $DC =~ s|\s+sub="(\w+)"|.$1|g;			#for DC subelement
  $DC =~ s|<(DC[A-Za-z._]+)\s+scheme="(.*?)"\s*>(.*?)</DC.*?>|<tr><td>$1<td>$2<td>$3</tr>|gs;
  $DC =~ s|<(DC[A-Za-z._]+)\s*>(.*?)</DC.*?>|<tr><td>$1<td><td>$2</tr>|gs;
  my $ret = "<h3>Dublin Core Record</h3>\n";
  $ret .= "<center><table border=\"2\">\n";
  $ret .= "<tr><td><b>Element</b><td><b>Scheme</b><td><b>Content</b></tr>\n";
  $ret .= "$DC</table></center>\n";
  return $ret;
}
  


#
#given a title, return head of document, through <body>.
#
sub head
{
  my $title = shift;
  my $prev = shift;
  $prev = "TOC.htm" unless $prev;
  my $next = shift;
  $next = "TOC.htm" unless $next;
  my $h = "
<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\"
    \"http://www.w3c.org/TR/REC-html40/loose.dtd\">
<html><head>
<title>$title</title>
<link rel=\"stylesheet\" type=\"text/css\" href=\"/ss/ThML10.css\">
<link rel=\"stylesheet\" type=\"text/css\" href=\"styles.css\">
<link rel=\"prev\" type=\"text/html\" href=\"$prev\">
<link rel=\"toc\"  type=\"text/html\" href=\"TOC.htm\">
<link rel=\"next\" type=\"text/html\" href=\"$next\">
<script language=\"javascript\" src=\"/ss/util.js\"></script>
</head><body>
";

  return $h;
}


#
#build a nav bar
#
sub navbar
{
  my $prev = shift;
  my $next = shift;
# print "In navbar: prev=$prev next=$next\n";

  my $nav  = "<p class=\"Center\">";
  $nav .= "<a href=\"$prev\">"
       .  "<img src=\"/pix/mroonppv.gif\" alt=\"Back\" border=\"0\"></a>";
  $nav .= "<a href=\"TOC.htm\">"
       .  "<img src=\"/pix/mroontoc.gif\" alt=\"Contents\" border=\"0\"></a>";
  $nav .= "<a href=\"$next\">"
       .  "<img src=\"/pix/mroonpnx.gif\" alt=\"Next\" border=\"0\"></a>";
  $nav .= "</p>\n";
  return $nav;
}


#
# make il, nl files with HTML to import into CCEL index, whats-new page.
#
sub makeLinks
{
  $authorID =~ m/^(.)/;
  my $url = "http://www.ccel.org/$1/$authorID/$bookID/htm/";
  $url = "http://www.ccel.org/$1/$authorID/$bookID/htm/TitlePage.htm"
    if -f "htm/TitlePage.htm";

  open IL, ">il" or die $!;
  print IL "<dt>$author\n<dd><a href=\"$url\">$title</a>\n";
  close IL;

  my ($day,$month,$year) = (localtime)[3,4,5];
  my $date = sprintf "%04d-%02d-%02d",$year+1900,$month+1,$day;

  open NL, ">nl" or die $!;
  print NL "\n$date<ul>\n<li>Added <a href=\"$url\">$title</a>\n";
  print NL "by $author.\n";
  print NL "Digitized by $digitized.\n" if $digitized;
  print NL "Typed by $typed.\n" if $typed;
  print NL "ThML markup by $thml.\n" if $thml;
  print NL "$status\n" if $status;
  print NL "</ul>\n";
  close NL;
}


#
# if you find a <pb n="xxx"> element, try to link it to a page image, which 
# should be located at png/nnnn=xxx.htm
#
sub pageref
{
  my $pb = shift;		#<pb> element
  my $file = "";
  my $page = shift;		#the page number of the <pb>
  my $pagenum = $page;
  $pagenum =~ s/^.*\.//;	#change i.3 to 3

  $file = `ls png/*=$page.htm 2>/dev/null` if $ccelhacks;  #find page images
  
  $file = "../" . $file if $file;
  my $pref = "<img class=page border=0 src=http://www.ccel.org/pix/" .
      "p$pagenum.gif title=\"Page $page\" align=left width=20 height=22>";
  $pref = "<a href=\"$file\">$pref</a>" if $file;
  $pref = "$pb\n$pref\n";
  return $pref;
}


# this routine takes an <index>....</index> element and assumes that
# all the numbers listed therein are page numbers. They are linked to
# the Page_nnn ids of this document.
sub linkIndex
{
my $front = shift;
my $index = shift;
my $rear  = shift;

$index =~ s! (\d+)! <a class="TOC" href="|Page_$1">$1</a>!gsi;

return $front . $index . $rear;
}
