#!\c:\apps\perl\bin\perl.exe

# for generating readable html transcriptions from sgm files 
# of already reviewed or partly reviewed complete books.

print ("<HTML><HEAD><TITLE>HTML version</TITLE>\n");
print ('<STYLE type="text/css">');
print ("\n  DIV.argument {display: block; margin-left: 20%; margin-right:20%; background-color: #FFCC66; margin-top: 1em; margin-bottom: 1em; }");
print ("\n  DIV.epigraph {display: block; margin-left: 20%; margin-right:20%; background-color: #FFCC99; margin-top: 1em; margin-bottom: 1em; }");
print ("\n  DIV.closer {display: block; margin-top: 1em; margin-bottom: 1em; font-family: \"Comic Sans\"; font-size: 14pt;}");
print ("\n  DIV.opener {display: block; margin-top: 1em; margin-bottom: 1em; font-family: \"Comic Sans\"; font-size: 14pt;}");
print ("\n  DIV.figblock {display: block; background: yellow; margin-top: 1em; margin-bottom: 1em; margin-left: 20%; 	 margin-right: 20%;  padding: 3px 3px 3px 3px; }");
print ("\n  DIV.figinline {display: inline; background-color: yellow; margin-top: 0; margin-bottom: 0; margin-left: 0; 	margin-right: 0; padding: 0; } ");
print ("\n  DIV.figblock:before {content: \"FIGURE\" ; text-decoration: underline;} ");
print ("\n  DIV.figinline:before {content: \"FIGURE\" ; text-decoration: underline;} ");
print ("\n  DIV[CLASS=\"figblock\"] > H2 { display: block; color: blue; font-size: 12pt; } ");
print ("\n  DIV[CLASS=\"figinline\"] > H2 { display: inline; color: blue; font-size: 12pt; } ");
print ("\n  DIV[CLASS=\"figblock\"] > SPAN.figdesc { display: block; font-size: small; color: brown; } ");
print ("\n  DIV[CLASS=\"figinline\"] > SPAN.figdesc { display: inline; font-size: small; color: brown; } ");
print ("\n  SPAN.figdesc:before { content: \"[\" ; } ");
print ("\n  SPAN.figdesc:after  { content: \"]\" ; } ");
print ("\n  DIV.text:before {content: \"Primary language is \" attr(LANG) \".\"; display: block; font-size: 8pt; color: gray; } ");
print ("\n  DIV.speech {display: block; margin-top: 1em; } ");
print ("\n  DIV.letter {display: block; margin-top: 1em; margin-bottom: 1em; margin-left: 10%; margin-right: 10%; background-color: #CCFFFF; } ");
print ("\n  DIV.license {display: block; margin-top: 1em; margin-bottom: 1em; margin-left: 10%; margin-right: 10%; background-color: #FFCC99; } ");
print ("\n  DIV.floatext {display: block; margin-top: 1em; margin-bottom: 1em; margin-left: 10%; margin-right: 10%; background-color: #FFFFCC; } ");


print ("\n  SPAN.speaker {display: inline; background-color: #CCCCFF; } ");
print ("\n  SPAN.stage  {display: inline; font-size: smaller; font-style: italic; color: brown; }  ");
print ("\n  SPAN.stage:before {	content: \"[\"; } ");
print ("\n  SPAN.stage:after { content: \"]\"; } ");

print ("\n  DIV.list { display: block; margin-top: 1em; margin-bottom: 1em; margin-left: 1em; }");
print ("\n  SPAN.item { display: list-item; margin-bottom: 1.5em; margin-left: 2em; list-style-type: disc; } ");
print ("\n  SPAN[CLASS=\"item\"] SPAN.item { display: list-item; margin-bottom: 1.5em; margin-left: 2em; list-style-type: circle; } ");
print ("\n  SPAN[CLASS=\"item\"] SPAN[CLASS=\"item\"] SPAN.item { display: list-item; margin-bottom: 1.5em; margin-left: 2em; list-style-type: square; } ");
print ("\n  SPAN[CLASS=\"item\"] SPAN[CLASS=\"item\"] SPAN[CLASS=\"item\"] SPAN.item { display: list-item; margin-bottom: 1.5em; margin-left: 2em; list-style-type: disc; } ");
print ("\n  SPAN[CLASS=\"label\"] + SPAN.item  { display: block; text-indent: 0; margin-left: 3em; font-size: smaller; } ");
print ("\n  DIV[CLASS=\"list\"] > SPAN.label { display: list-item; font-weight: bold; margin-left: 1em; margin-right: 40%; margin-top: 1em; list-style-type: none; } ");
print ("\n  SPAN[CLASS=\"label\"] + SPAN.item[ROLE=\"label\"] { display: block; text-indent: 0; margin-left: 3em; color: green; } ");
print ("\n  SPAN[CLASS=\"label\"] + SPAN.item[ROLE=\"total\"] { display: block; text-indent: 0; margin-left: 3em; font-weight: bold; background-color: gray; }  ");
print ("\n  DIV.list > SPAN.label[ROLE=\"label\"] { display: list-item; font-weight: bold; margin-left: 1em; margin-right: 40%; margin-top: 1em; list-style-type: none; color: green; } ");
print ("\n  SPAN[CLASS=\"item\"] > SPAN.label { display: inline; font-style: italic ; font-weight: bold ; } ");
print ("\n  DIV[CLASS=\"note\"] > SPAN.label { display: inline; font-style: italic ; font-weight: bold ; } ");
print ("\n  P > SPAN.label { display: inline; font-style: italic ; font-weight: bold ; } ");
print ("\n  TD > SPAN.label { display: inline; font-style: italic; font-weight: bold; color: green; } ");
print ("\n  SPAN.milestone {display : inline ; font-size: 9pt; color: green; } ");
print ("\n  SPAN.pb {display: block; text-align: center; margin-bottom: 3px; margin-top: 3px; color: gray; font-size: 8pt; } ");
print ("\n  SPAN.pb:before {content: \"----------page \"attr(N)\" on ref \"attr(REF)\"---------\"; } ");
print ("\n  P.italic {font-style: italic;}");
print ("\n  P.small  {font-size: smaller;}");
print ("\n  P.commentary  {font-size: smaller;}");
print ("\n  P.byline {background-color: #CCFFCC; }");
print ("\n  P.trailer {display: block; color: brown; text-align: center; font-size: 14pt; } ");
print ("\n  P.stanza {display: block; margin-left: 10%; margin-right:10%; } ");
print ("\n  P.margdot { padding-left: 20px; background-image: url(http://www.textcreationpartnership.org/docs/pix/other/dots1.gif); background-position: left; background-repeat: repeat-y; } ");
print ("\n  SPAN.line { display: block;  margin-left: 10%; margin-right:10%; } ");
print ("\n  SPAN.indentline { display: block;  margin-left: 4%; margin-right:10%; } ");
print ("\n  BLOCKQUOTE[CLASS=\"italinline\"]  SPAN.line { display: block;  margin-left: 10%; margin-right:10%; background-color: #FFFFCC; } ");
print ("\n  BLOCKQUOTE[CLASS=\"italblock\"]  SPAN.line { display: block;  margin-left: 10%; margin-right:10%; background-color: #FFFFCC; } ");
print ("\n  SPAN.rline { display: block;  margin-left: 10%; margin-right:15%; text-align: right; } ");
print ("\n  P[CLASS=\"stanza\"] > P.stanza {margin-left: 5%; margin-right: 5%; } ");
print ("\n  P[CLASS=\"stanza\"] > SPAN.line {margin-left: 0%; margin-right: 0%; } ");
print ("\n  P[CLASS=\"stanza\"] > SPAN.rline {margin-left: 0%; margin-right: 5%; text-align: right;} ");
print ("\n  SPAN.add:before {content: \"{\"; } ");
print ("\n  SPAN.add {display: inline ; text-decoration: line-through; } ");
print ("\n  SPAN.add:after {content: \"}\"; } ");
print ("\n  SPAN.abbr {display: inline; text-decoration: underline; } ");
print ("\n  SPAN.abbr[EXPAN]:after {content: \" (\" attr(EXPAN) \") \"; color: green; font-size: 8pt; } ");
print ("\n  SPAN.gap {color: #CC6600; } ");
print ("\n  SPAN.above {line-height: 1.6; margin-left: -5; margin-right: -5; vertical-align: 100%; } ");

        
print ("\n  BLOCKQUOTE {display: block;	margin-top: 1em; margin-bottom: 1em; margin-left: 10%; margin-right: 10%; background-color: #FFFFCC; }");
print ("\n  BLOCKQUOTE.margquot {padding-left: 20px; background-image: url(http://www.textcreationpartnership.org/docs/pix/other/commas1.gif); background-position: left; background-repeat: repeat-y; } ");
print ("\n  SPAN.margquot {display: block; padding-left: 20px; background-image: url(http://www.textcreationpartnership.org/docs/pix/other/commas1.gif); background-position: left; background-repeat: repeat-y; } ");

print ("\n  BLOCKQUOTE.inline   {display: inline; background-color: #FFFFCC; }");
print ("\n  BLOCKQUOTE.italinline {display: inline; font-style: italic; background-color: #FFFFCC; margin-left: 0%; margin-right: 0%; }");
print ("\n  BLOCKQUOTE.italblock {display: block; font-style: italic; background-color: #FFFFCC; margin-left: 10%; margin-right: 10%; }");

print ("\n  SPAN.exposed  {color: green;}");
print ("\n  SPAN.signed   {display: block; color: blue;}");
print ("\n  SPAN.salute   {display: block; color: orange;}");
print ("\n  DIV[CLASS=\"opener\"] SPAN.signed   {display: inline;}");
print ("\n  DIV[CLASS=\"closer\"] SPAN.signed   {display: inline;}");
print ("\n  DIV[CLASS=\"opener\"] SPAN.salute   {display: inline;}");
print ("\n  DIV[CLASS=\"closer\"] SPAN.salute   {display: inline;}");
print ("\n  SPAN.bibl {display: inline;	color: #CC0099; } ");
print ("\n  DIV[CLASS=\"epigraph\"] SPAN.bibl:before {content: \"--\" ; }");
print ("\n  DIV[CLASS=\"epigraph\"] > SPAN.bibl {display: block; text-align: right; color: #CC0099; } ");
print ("\n  DIV.note {display: inline; font-size: smaller; color: #999900; } ");
print ("\n  DIV.note:before {content: \" [\"attr(PLACE)\" note \"attr(N) \":\"; } ");
print ("\n  DIV.note:after {content:  \"] \" } ");
print ("\n  DIV.temphead {display: none;	} ");
print ("\n  DIV.idg  {display: block; font-size: 8pt; color: gray; margin-bottom: 3em; } ");
print ("\n  DIV.idg:before {content: \"TCP id: \" attr(IDD)	} ");
print ("\n  SPAN.bibno:before { content: \"bib no. (\" attr(T) \") \" } ");	
print ("\n  SPAN.stc:before { content: \"STC: (\" attr(T) \") \" } ");
print ("\n  SPAN.vid:before { content: \"image set: \" } ");
print ("\n  SPAN.date {display: inline; color: magenta; } ");
print ("\n  SPAN.dateline {display: block; text-align: right; font-size: 9pt; } ");
print ("\n  SPAN.unclear {display: inline; color: #999999 ; background-color: #CCCCCC; } ");
print ("\n  SPAN.hi {font-style: italic;} ");
print ("\n  SPAN[CLASS=\"hi\"] > SPAN.hi {font-style: italic; font-weight: bold; } ");
print ("\n  SPAN[CLASS=\"signed\"] > SPAN.hi {font-style: italic; font-weight: bold; } ");
print ("\n  SPAN[CLASS=\"salute\"] > SPAN.hi {font-style: italic; font-weight: bold; } ");
print ("\n  BLOCKQUOTE[CLASS=\"italinline\"] > SPAN.hi {font-style: normal; } ");
print ("\n  BLOCKQUOTE[CLASS=\"italblock\"] > SPAN.hi {font-style: normal; } ");
print ("\n  P[CLASS=\"italic\"] > SPAN.hi {font-style: normal; } ");
print ("\n  SPAN[CLASS=\"stage\"] > SPAN.hi {font-style: italic; font-weight: bold; } ");
print ("\n  SPAN.hi[REND=\"blackletterType\"] {font-style: normal; font-weight: bolder;} ");
print ("\n  SPAN.numeral {font-family: Georgia, Candara, Constantia, Corbel ; } ");
print ("\n  DIV.headnote, DIV.tailnote {display: block; margin-left: 20%; margin-right:20%; background-color: #FFCC66; margin-top: 1em; margin-bottom: 1em; } ");
print ("\n  DIV[CLASS=\"headnote\"] > H2 {display: block; font-size: 14pt; color: green; text-align: center; } ");
print ("\n  DIV[CLASS=\"tailnote\"] > H2 {display: block; font-size: 14pt; color: green; text-align: center; } ");
print ("\n  DIV.postscript {display: block; background-color: #FFCCFF ; font-size: smaller; margin-left: 10%; margin-right: 10%; } ");      
print ("\n  BLOCKQUOTE > SPAN.bibl:before { content: \"--\" ; } ");
print ("\n  BLOCKQUOTE > SPAN.bibl {display: block; text-align: right; color: #CC0099; } ");
print ("\n  TR[ROLE=\"label\"] > TD {text-align: center; background-color: yellow; font-weight: bold; } ");
print ("\n  TR[ROLE=\"total\"] > TD {background-color: gray; font-weight: bold; } ");
print ("\n  H2 {display: block; color: #FF0000;  font-size: 18pt; } ");
print ("\n  DIV[CLASS=\"TEI2\"] > H2 {display: block; color: #FF0000;  font-size: 16pt; } ");
print ("\n  DIV[CLASS=\"TEI3\"] > H2 {display: block; color: #6666FF;  font-size: 16pt; } ");
print ("\n  DIV[CLASS=\"TEI4\"] > H2 {display: block; color: #FF3300;  font-size: 14pt; } ");
print ("\n  DIV[CLASS=\"TEI5\"] > H2 {display: block; color: #FF9966;  font-size: 14pt; } ");
print ("\n  DIV[CLASS=\"TEI6\"] > H2 {display: block; color: #FF9966;  font-size: 12pt; } ");
print ("\n  DIV[CLASS=\"TEI7\"] > H2 {display: block; color: #FF3399;  font-size: 12pt; } ");
print ("\n  P[CLASS=\"stanza\"] > H2 { color: #FF0000; font-size: 14pt; } ");
print ("\n  DIV[CLASS=\"list\"] > H2 { color: #FF0000; font-size: 14pt; } ");

print ("\n</STYLE>\n");
print ("</HEAD><body bgcolor=\"#F5F5F0\" text=\"#000000\" link=\"#FF0000\" alink=\"#FFFFFF\" vlink=\"#FF0000\" STYLE=\"font-family: 'arial unicode ms'; margin-left: 10px; \"> \n");

 $/= "</EEBO";
while (<>) {
s,<!DOCTYPE[^>]+>,,g;
s#<TEMPHEAD>#<DIV CLASS="temphead">#g;
s#<IDG[^>]+ID="([^"]+)"[^>]*>#<DIV CLASS="idg" IDD="$1">#g;
s#</TEMPHEAD>#</DIV>#g;
s#</IDG>#</DIV>#g;
s#<STC#<BR><SPAN CLASS="stc"#g;
s#<BIBNO#<BR><SPAN CLASS="bibno"#g;
s#<VID SET="([^"]+)">#<BR><SPAN CLASS="vid">[set $1] #g;
s#<VID>#<BR><SPAN CLASS="vid">#g;
s#</VID>#</SPAN>#g;
s#</BIBNO>#</SPAN>#g;
s#</STC>#</SPAN>#g;

# expands some CSS RENDs into STYLE attributes

s# [rR][eE][nN][dD]="CSS\(([^:\)]+):([^;\)]+);([^:\)]+):([^;\)]+);([^:\)]+):([^;\)]+);\)"# STYLE="$1: $2; $3: $4; $5: $6;"#g;
s# [rR][eE][nN][dD]="CSS\(([^:\)]+):([^;\)]+);([^:\)]+):([^;\)]+);\)"# STYLE="$1: $2; $3: $4;"#g;
s# [rR][eE][nN][dD]="CSS\(([^:\)]+):([^;\)]+);\)"# STYLE="$1: $2;"#g;


# makes fancy display of decorated initials

s#_([a-zA-Z\$])#<FONT SIZE="7" FACE="Script MT Bold">$1</FONT>#g;
s#_(&[^;]+;)#<FONT SIZE="7" FACE="Script MT Bold">$1</FONT>#g;

# makes fancy display of numerals
# this causes some problems so is commented out for the moment.
#
# s,([^"0-9V])([0-9]+)([^">0-9]),$1<SPAN CLASS="numeral">$2</SPAN>$3,g;

# moves IDs out of elements

s# ID="([^"]+)"([^>]*)>#$2> <SPAN CLASS="exposed"><A NAME="$1">[id:$1]</A></SPAN>#g;

# closes off stray interrupted HIs (Emma's suggestion)
# no longer (?) needed for complete books instead of samples.

# s,<PB,</I></TD></TH></TR></TABLE></LI></UL></H2><PB,g;
# s#<PB([^>]+)>([^<]*)</CELL>#<PB$1><TABLE><ROW><CELL>$2</CELL>#g;
# s#<PB([^>]+)>([^<]*)</ITEM>#<PB$1><LIST><ITEM>$2</ITEM>#g;
# s#<PB([^>]+)>([^<]*)<ROW#<PB$1><TABLE><ROW#g;

# pull at least some of the PBs out of list items. Cells and rows too?

s,</ITEM>,\@ITEM\@,g;
s,<ITEM>([^@]*)(<PB[^>]+>),$2<ITEM>$1,g;
s,\@ITEM\@,</ITEM>,g;

# fixes label misname and empty cells

s,ROLE="[lL][aA]",ROLE="label",g;
s,<CELL([^>]*)></CELL>,<CELL$1>&nbsp;</CELL>,g;

# upper-case element and attribute names

s#<(/?)([a-zA-Z0-9]+)#<$1\U$2\E#g;
s# ([a-zA-Z0-9]+)="# \U$1\E="#g;
s#REND="([^"]+)"#REND="\L$1\E"#g;
s#REND="marginal dots"#REND="margdot"#g;
s#REND="marginal quotes"#REND="margquot"#g;

# P: rends roles and n's

s#<P ([^>]*)REND="([^"]+)"#<P CLASS="$2 $1"#g;
s#<P ([^>]*)ROLE="([^"]+)"#<P CLASS="$2 $1"#g;
s# +>#>#g;
s#<P ([^>]*)CLASS="([^"]+)"([^>]+)CLASS="([^"]+)"#<P $1CLASS="$2_$4"$3#g;
s#<P ([^>]*)N="([^"]+)"([^>]*)>#<P $1$3><SPAN CLASS="exposed">[$2]</SPAN> #g;

# eliminate TCP divs to make room for HTML divs
# at the moment, these are simply removed and marked by flags
# in the text. We may find reason instead to turn them into proper
# HTML divs. But I can't think why at the moment.

# Temporarily turn these into HTML divs:

s# MS="[yY]"##g;
s#(<DIV[1-7][^>]+)(LANG="[^"]+")([^>]*>)#$1$3{$2}#g;
s#<DIV([1-7]) TYPE="([^"]+)" N="([^"]+)">#<HR><DIV CLASS="TEI$1">{DIV$1: $2 $3}#g;
s#<DIV([1-7]) N="([^"]+)" TYPE="([^"]+)">#<HR><DIV CLASS="TEI$1">{DIV$1: $3 $2}#g;
s#<DIV([1-7]) TYPE="([^"]+)">#<HR><DIV CLASS="TEI$1">{DIV$1: $2}#g;
s#<DIV([1-7])>#<HR><DIV CLASS="TEI$1">{DIV$1 (type unspecified)}#g;
s#<DIV([1-7]) N="([^"]+)">#<HR><DIV CLASS="TEI$1">{DIV$1 number $2}#g;
s#</DIV[1234567]>#</DIV>#g;
s#{(LANG="[^"]+")}{DIV#{$1 in DIV#g;

# elements

s#<ARGUMENT>#<DIV CLASS="argument">#g;
s#</ARGUMENT>#</DIV>#g;
s#<BACK>#<HR><STRONG>{BACK MATTER}</STRONG>#g;
s#</BACK>##g;
s#<BODY>#<HR><STRONG>{BODY}</STRONG>#g;
s#</BODY>##g;
s#<BYLINE>#<P CLASS="byline">#g;
s#</BYLINE>#</P>#g;
s#<CELL([^>]+)ROLE="label"#<CCELL$1#g;
s#<CELL#<TD#g;
s#</CELL>#</TD>#g;
s#ROWS="#ROWSPAN="#g;
s#COLS="#COLSPAN="#g;
s#<CCELL#<TH BGCOLOR="yellow"#g;
s#<TH([<]+)</TD>#<TH$1</TH>#g;
s#ROLE="total"#bgcolor="gray"#g;

s#<CLOSER>#<DIV CLASS="closer">#g;
s#</CLOSER>#</DIV>#g;
s#<OPENER>#<DIV CLASS="opener">#g;
s#</OPENER>#</DIV>#g;
s#<EPIGRAPH>#<DIV CLASS="epigraph">#g;
s#</EPIGRAPH>#</DIV>#g;

s#<SIGNED>#<SPAN CLASS="signed">#g;
s#</SIGNED>#</SPAN>#g;
s#<SALUTE>#<SPAN CLASS="salute">#g;
s#</SALUTE>#</SPAN>#g;


s#<BIBL>#<SPAN CLASS="bibl">#g;
s#</BIBL>#</SPAN>#g;

s#<BYLINE>#<SPAN CLASS="byline">#g;
s#</BYLINE>#</SPAN>#g;

s,<Q REND="italic">,<BLOCKQUOTE CLASS="italinline">,g;
s,<Q REND="block italic">,<BLOCKQUOTE CLASS="italblock">,g;
s,<Q REND="inline italic">,<BLOCKQUOTE CLASS="italinline">,g;
s,<Q REND="inline">,<BLOCKQUOTE CLASS="inline">,g;


s#<Q REND="margquot">#<BLOCKQUOTE CLASS="margquot">#g;
s#<HI REND="margquot">#<SPAN CLASS="margquot">#g;
s#<Q>#<BLOCKQUOTE>#g;
s#</Q>#</BLOCKQUOTE>#g;
s,<Q LANG="[A-Za-z]+">,<BLOCKQUOTE>,g;

s#<FIGURE REND="inline">#<DIV CLASS="figinline">#g;
s#<FIGURE>#<DIV CLASS="figblock">#g;
s#</FIGURE>#</DIV>#g;
s#<FIGDESC>#<SPAN CLASS="figdesc">#g;
s#</FIGDESC>#</SPAN>#g;


s#<FRONT>#<P>{FRONT MATTER}<P>#g;
s#</FRONT>##g;
s#<HEAD>#<H2>#g;
s#</HEAD>#</H2>#g;
s,<REF TARGET="([^"]+)">,<A HREF="#$1" TARGET="display">{xref: ,g;
s,<PTR TARGET="([^"]+)">,<A HREF="#$1" TARGET="display">{xref}</A>,g;
s#</REF># }</A>#g;


s#<LABEL#<SPAN CLASS="label"#g;
s#</LABEL>#</SPAN>#g;
s#<ITEM#<SPAN CLASS="item"#g;
s#</ITEM>#</SPAN>#g;
s#<LIST#<DIV CLASS="list"#g;
s#</LIST>#</DIV>#g;


s#<L N="([^"]+)">#<SPAN CLASS="exposed">[line $1]</SPAN> <SPAN CLASS="line">#g;
s#<L>#<SPAN CLASS="line">#g;
s#<L REND="rightJustify">#<SPAN CLASS="rline">#g;
s#<L REND="indent">#<SPAN CLASS="indentline">#g;
s#</L>#</SPAN>#g;
s#<LB>#<BR>#g;
s#<LG N="([^"]+)">#<P CLASS="stanza"><SPAN CLASS="exposed">[stanza $1]</SPAN>#g;
s#<LG#<P CLASS="stanza"#g;
s#</LG>#</P>#g;

s#<MILESTONE[ /]*>#<SPAN CLASS="milestone"> [ ] </SPAN>#g;
s#<MILESTONE REND="hr"[ /]*>#<HR WIDTH="50%">#g;
s#<MILESTONE REND="([^"]+)"[ /]*>#<SPAN CLASS="milestone" REND="$1"> [ ] </SPAN>#g;
s#<MILESTONE UNIT="([^"]+)"[ /]*>#<SPAN CLASS="milestone"> [$1] </SPAN>#g;
s#<MILESTONE N="([^"]+)"[ /]*>#<SPAN CLASS="milestone"> [$1] </SPAN>#g;
s#<MILESTONE UNIT="([^"]+)" N="([^"]+)">#<SPAN CLASS="milestone"> [$1 $2] </SPAN>#g;
s#<MILESTONE N="([^"]+)" UNIT="([^"]+)"[ /]*>#<SPAN CLASS="milestone"> [$2 $1] </SPAN>#g;

s#<NOTE#<DIV CLASS="note"#g;
s#</NOTE>#</DIV>#g;


s# MS="y"##g;
s# MS="Y"##g;
s#<PB([^>]*)>#<SPAN CLASS="pb"$1></SPAN>#g;

# s#<PB N="([^"]+)">#<SPAN CLASS="pb">-------------------page $1--------------------</SPAN>#g;
# s#<PB>#<SPAN CLASS="pb">----------------------page __ ------------------------</SPAN>#g;
# s#<PB REF="([^"]*)" N="([^"]+)">#<SPAN CLASS="pb">-------------------page $2 on image $1--------------------</SPAN>#g;
# s#<PB N="([^"]*)" REF="([^"]+)">#<SPAN CLASS="pb">-------------------page $1 on image $2--------------------</SPAN>#g;
# s#<PB REF="([^"]*)">#<SPAN CLASS="pb">----------------------page __ (on image $1)-----------------</SPAN>#g;

s#<ROW#<TR#g;
s#</ROW>#</TR>#g;
s#<TABLE#<TABLE BORDER="1"#g;

# s#<GAP># {omitted} #g;
# s#<GAP DESC="([^"]+)"># {omitted because: $1} #g;


s#<ADD>#<SPAN CLASS="add">#g;
s#</ADD>#</SPAN>#g;
s#<ABBR#<SPAN CLASS="abbr"#g;
s#</ABBR>#</SPAN>#g;

s#<HI#<SPAN CLASS="hi"#g;
s#</HI>#</SPAN>#g;

  s#REND="blackletter[^ "]*"#STYLE="font-style: normal; font-weight: bolder;"#g;
  s#REND="small"#STYLE="font-size: smaller;"#g;
  s#REND="large"#STYLE="font-size: larger;"#g;
  s#REND="italic"#STYLE="font-style: italic;"#g;
  s#REND="rightJustify"#STYLE="text-align: right;"#g;
  
s#<LETTER#<DIV CLASS="letter"#g;
s#</LETTER>#</DIV>#g;

s#<LICENSE#<DIV CLASS="license"#g;
s#</LICENSE>#</DIV>#g;

s#<FLOATEXT#<DIV CLASS="floatext"#g;
s#</FLOATEXT>#</DIV>#g;

s#<SP>#<DIV CLASS="speech">#g;
s#</SP>#</DIV>#g;

s#<SPEAKER>#<SPAN CLASS="speaker">#g;
s#</SPEAKER>#</SPAN>#g;

s#<STAGE#<SPAN CLASS="stage"#g;
s#</STAGE>#</SPAN>#g;

s#<TEXT#<DIV CLASS="text"#g;
s#</TEXT>#</DIV>#g;

s#<TRAILER#<P CLASS="trailer"#g;
s#</TRAILER>#</P>#g;

s#<UNCLEAR>#<SPAN CLASS="unclear">#g;
s#</UNCLEAR>#</SPAN>#g;

s#<DATE>#<SPAN CLASS="date">#g;
s#</DATE>#</SPAN>#g;

s#<DATELINE>#<SPAN CLASS="dateline">#g;
s#</DATELINE>#</SPAN>#g;

s#<HEADNOTE>#<DIV CLASS="headnote">#g;
s#<TAILNOTE>#<DIV CLASS="tailnote">#g;
s#<POSTSCRIPT>#<DIV CLASS="postscript">#g;
s#</POSTSCRIPT>#</DIV>#g;
s#</HEADNOTE>#</DIV>#g;
s#</TAILNOTE>#</DIV>#g;

s#<ABOVE>#<SPAN CLASS="above">#g;
s#<BELOW>#<SUB>#g;
s#</ABOVE>#</SPAN>#g;
s#</BELOW>#</SUB>#g;

# removed this old stuff 
# updated GAP DESCs (12/06/03) to deal with AEL data.

# s,DESC="MISSING",DESC="missing",g;
# s,REASON="DAMAGE",REASON="damage",g;
# 
# s,<GAP DESC="missing" REASON="damage" EXTENT="1 character">,\$,g;
# s,<GAP DESC="missing" REASON="damage" EXTENT="1 line">,\$line\$,g;
# s,<GAP DESC="missing" REASON="damage" EXTENT="1 Wards">,\$word\$,g;
# s,<GAP DESC="missing" REASON="damage" EXTENT="1 word">,\$word\$,g;
# s,<GAP DESC="missing" REASON="damage" EXTENT="11 characters">,\$span\$,g;
# s,<GAP DESC="missing" REASON="damage" EXTENT="12 characters">,\$span\$,g;
# s,<GAP DESC="missing" REASON="damage" EXTENT="14 characters">,\$span\$,g;
# s,<GAP DESC="missing" REASON="damage" EXTENT="15 characters">,\$span\$,g;
# s,<GAP DESC="missing" REASON="damage" EXTENT="15 lines">,\$line\$\$line\$\$line\$\$line\$\$line\$\$line\$\$line\$\$line\$\$line\$\$line\$\$line\$\$line\$\$line\$\$line\$\$line\$,g;
# s,<GAP DESC="missing" REASON="damage" EXTENT="2 character">,\$\$,g;
# s,<GAP DESC="missing" REASON="damage" EXTENT="2 characters">,\$\$,g;
# s,<GAP DESC="missing" REASON="damage" EXTENT="2 line">,\$line\$\$line\$,g;
# s,<GAP DESC="missing" REASON="damage" EXTENT="2 lines">,\$line\$\$line\$,g;
# s,<GAP DESC="missing" REASON="damage" EXTENT="2 word">,\$word\$\$word\$,g;
# s,<GAP DESC="missing" REASON="damage" EXTENT="2 words">,\$word\$\$word\$,g;
# s,<GAP DESC="missing" REASON="damage" EXTENT="3 character">,\$\$\$,g;
# s,<GAP DESC="missing" REASON="damage" EXTENT="3 characters">,\$\$\$,g;
# s,<GAP DESC="missing" REASON="damage" EXTENT="3 lines">,\$line\$\$line\$\$line\$,g;
# s,<GAP DESC="missing" REASON="damage" EXTENT="3 word">,\$word\$\$word\$\$word\$,g;
# s,<GAP DESC="missing" REASON="damage" EXTENT="3 words">,\$word\$\$word\$\$word\$,g;
# s,<GAP DESC="missing" REASON="damage" EXTENT="4 characters">,\$\$\$\$,g;
# s,<GAP DESC="missing" REASON="damage" EXTENT="4 lines">,\$line\$\$line\$\$line\$\$line\$,g;
# s,<GAP DESC="missing" REASON="damage" EXTENT="5 characters">,\$\$\$\$\$,g;
# s,<GAP DESC="missing" REASON="damage" EXTENT="5 words">,\$\$,g;
# s,<GAP DESC="missing" REASON="damage" EXTENT="55 characters">,\$span\$,g;
# s,<GAP DESC="missing" REASON="damage" EXTENT="6 character">,\$\$\$\$\$\$,g;
# s,<GAP DESC="missing" REASON="damage" EXTENT="6 characters">,\$\$\$\$\$\$,g;
# s,<GAP DESC="missing" REASON="damage" EXTENT="6 lines">,\$line\$\$line\$\$line\$\$line\$\$line\$\$line\$,g;
# s,<GAP DESC="missing" REASON="damage" EXTENT="6 words">,\$word\$\$word\$\$word\$\$word\$\$word\$\$word\$,g;
# s,<GAP DESC="missing" REASON="damage" EXTENT="8 characters">,\$\$\$\$\$\$\$\$,g;
# s,<GAP DESC="missing" REASON="damage" EXTENT="8 lines">,\$line\$\$line\$\$line\$\$line\$\$line\$\$line\$\$line\$\$line\$,g;
# s,<GAP DESC="missing" REASON="damage" EXTENT="9 character">,\$span\$,g;
# s,<GAP DESC="missing" REASON="damage" EXTENT="9 characters">,\$span\$,g;

# additions to deal with reviewed text

# SLIGHTLY MODIFIED FORM OF GENERAL EXTENTS SCRIPT to tidy up odd extent values
# There is a *LOT* of unexamined old code here. Not sure what it is all doing.

# normalize case (first three already done above?) 

s#reason="#REASON="#g;
s#extent="#EXTENT="#g;
s#desc="#DESC="#g;
s,DESC="FOREIGN",DESC="foreign",g;
s,DESC="ILLEGIBLE",DESC="illegible",g;
s,DESC="MUSIC",DESC="music",g;
s,DESC="MATH",DESC="math",g;
s,DESC="MISSING",DESC="missing",g;
s,DESC="DUPLICATE",DESC="duplicate",g;
s,DESC="REPLACEMENT",DESC="replacement",g;
s,DESC="SYMBOL",DESC="symbol",g;
s,DESC="INTRUDER",DESC="intruder",g;
s,DESC="BLANK",DESC="blank",g;

# repair selective uppercase values

s#EXTENT="WORD"#EXTENT="1 word"#g;
s#EXTENT="1 Page"#EXTENT="1 page"#g;
s#EXTENT="1 WORD"#EXTENT="1 word"#g;
s#EXTENT="([0-9]+)\+ PAGES"#EXTENT="$1+ pages"#g;
s#EXTENT="([2-9]) Pages"#EXTENT="$1 pages"#g;
s#EXTENT="SPAN"#EXTENT="1 span"#g;

# repair odd intrusive characters and spaces

s#EXTENT=" +#EXTENT="#g;
s#EXTENT="'([0-9]+)"#EXTENT="$1"#g;
s#EXTENT="([0-9]+)'"#EXTENT="$1"#g;
s#EXTENT="([0-9]+) +\+#EXTENT="$1+#g;
s#EXTENT="1 line "#EXTENT="1 line"#g;

# assume that non-specific duplicates are all pages in length

s#<GAP DESC="duplicate" EXTENT="1">#<GAP DESC="duplicate" EXTENT="1 page">#g;
s#<GAP DESC="duplicate" EXTENT="([23456789])">#<GAP DESC="duplicate" EXTENT="$1 pages">#g;
s#<GAP DESC="duplicate" EXTENT="([123456789][0-9]+)">#<GAP DESC="duplicate" EXTENT="$1 pages">#g;
s#<GAP DESC="duplicate">#<GAP DESC="duplicate" EXTENT="1 page">#g;

# assume that non-specific symbols are all 1 character in length

s#<GAP DESC="symbol">#<GAP DESC="symbol" EXTENT="1">#g;

# convert $s to GAPs

while (s,(="[^"]+"),<spot>,){
$workover = $1;
$workover =~ s,\$,{ILLEGMARKER},g;
$workover =~ s,#,{SYMBOLMARKER},g;
$workover =~ s,=",{EQUALSQUOTE},g;
s,<spot>,$workover,;
}

s,{EQUALSQUOTE},=",g;

# change word, span, line, page, para(graph) to appropriate GAPs

s,\$[wW]ord\$,<GAP DESC="illegible" EXTENT="1 word" RESP="">,gi;
s,\$[sS]pan\$,<GAP DESC="illegible" EXTENT="1 span" RESP="">,gi;
s,\$[lL]ine\$,<GAP DESC="illegible" EXTENT="1 line" RESP="">,gi;
s,\$[pP]age\$,<GAP DESC="illegible" EXTENT="1 page" RESP="">,gi;
s,\$[pP]ara\$,<GAP DESC="illegible" EXTENT="1 paragraph" RESP="">,gi;
s,\$[pP]aragraph\$,<GAP DESC="illegible" EXTENT="1 paragraph" RESP="">,gi;

# change string of $s of more than 12 to span GAP

s,\$\$\$\$\$\$\$\$\$\$\$\$\$+,<GAP DESC="illegible" EXTENT="1 span" RESP="">,g;

# change individual $strings to GAP of appropriate size

s,\$\$\$\$\$\$\$\$\$\$\$\$,<GAP DESC="illegible" EXTENT="12 letters" RESP="">,g;
s,\$\$\$\$\$\$\$\$\$\$\$,<GAP DESC="illegible" EXTENT="11 letters" RESP="">,g;
s,\$\$\$\$\$\$\$\$\$\$,<GAP DESC="illegible" EXTENT="10 letters" RESP="">,g;
s,\$\$\$\$\$\$\$\$\$,<GAP DESC="illegible" EXTENT="9 letters" RESP="">,g;
s,\$\$\$\$\$\$\$\$,<GAP DESC="illegible" EXTENT="8 letters" RESP="">,g;
s,\$\$\$\$\$\$\$,<GAP DESC="illegible" EXTENT="7 letters" RESP="">,g;
s,\$\$\$\$\$\$,<GAP DESC="illegible" EXTENT="6 letters" RESP="">,g;
s,\$\$\$\$\$,<GAP DESC="illegible" EXTENT="5 letters" RESP="">,g;
s,\$\$\$\$,<GAP DESC="illegible" EXTENT="4 letters" RESP="">,g;
s,\$\$\$,<GAP DESC="illegible" EXTENT="3 letters" RESP="">,g;
s,\$\$,<GAP DESC="illegible" EXTENT="2 letters" RESP="">,g;
s,\$,<GAP DESC="illegible" EXTENT="1 letter" RESP="">,g;

s,############,<GAP DESC="symbol" EXTENT="12 letters">,g;
s,###########,<GAP DESC="symbol" EXTENT="11 letters">,g;
s,##########,<GAP DESC="symbol" EXTENT="10 letters">,g;
s,#########,<GAP DESC="symbol" EXTENT="9 letters">,g;
s,########,<GAP DESC="symbol" EXTENT="8 letters">,g;
s,#######,<GAP DESC="symbol" EXTENT="7 letters">,g;
s,######,<GAP DESC="symbol" EXTENT="6 letters">,g;
s,#####,<GAP DESC="symbol" EXTENT="5 letters">,g;
s,####,<GAP DESC="symbol" EXTENT="4 letters">,g;
s,###,<GAP DESC="symbol" EXTENT="3 letters">,g;
s,##,<GAP DESC="symbol" EXTENT="2 letters">,g;
s,#,<GAP DESC="symbol" EXTENT="1 letter">,g;

# combine up to seven adjacent word GAPs into single multi-word GAP 

s,<GAP DESC="illegible" EXTENT="1 word"[^>]*> *<GAP DESC="illegible" EXTENT="1 word"[^>]*> *<GAP DESC="illegible" EXTENT="1 word"[^>]*> *<GAP DESC="illegible" EXTENT="1 word"[^>]*> *<GAP DESC="illegible" EXTENT="1 word"[^>]*> *<GAP DESC="illegible" EXTENT="1 word"[^>]*> *<GAP DESC="illegible" EXTENT="1 word"[^>]*>,<GAP DESC="illegible" EXTENT="7 words" RESP="">,g;
s,<GAP DESC="illegible" EXTENT="1 word"[^>]*> *<GAP DESC="illegible" EXTENT="1 word"[^>]*> *<GAP DESC="illegible" EXTENT="1 word"[^>]*> *<GAP DESC="illegible" EXTENT="1 word"[^>]*> *<GAP DESC="illegible" EXTENT="1 word"[^>]*> *<GAP DESC="illegible" EXTENT="1 word"[^>]*>,<GAP DESC="illegible" EXTENT="6 words" RESP="">,g;
s,<GAP DESC="illegible" EXTENT="1 word"[^>]*> *<GAP DESC="illegible" EXTENT="1 word"[^>]*> *<GAP DESC="illegible" EXTENT="1 word"[^>]*> *<GAP DESC="illegible" EXTENT="1 word"[^>]*> *<GAP DESC="illegible" EXTENT="1 word"[^>]*>,<GAP DESC="illegible" EXTENT="5 words" RESP="">,g;
s,<GAP DESC="illegible" EXTENT="1 word"[^>]*> *<GAP DESC="illegible" EXTENT="1 word"[^>]*> *<GAP DESC="illegible" EXTENT="1 word"[^>]*> *<GAP DESC="illegible" EXTENT="1 word"[^>]*>,<GAP DESC="illegible" EXTENT="4 words" RESP="">,g;
s,<GAP DESC="illegible" EXTENT="1 word"[^>]*> *<GAP DESC="illegible" EXTENT="1 word"[^>]*> *<GAP DESC="illegible" EXTENT="1 word"[^>]*>,<GAP DESC="illegible" EXTENT="3 words" RESP="">,g;
s,<GAP DESC="illegible" EXTENT="1 word"[^>]*> *<GAP DESC="illegible" EXTENT="1 word"[^>]*>,<GAP DESC="illegible" EXTENT="2 words" RESP="">,g;

# restore the {ILLEGMARKER} to $ and {SYMBOLMARKER} to #

s,{ILLEGMARKER},\$,g;
s,{SYMBOLMARKER},#,g;


# expand bare (non-specific) numbers for non-duplicate GAPs to letter(s)

s#EXTENT="1"#EXTENT="1 letter"#g;
s#EXTENT="([1-9][0-9]+)"#EXTENT="$1 letters"#g;
s#EXTENT="([2-9])"#EXTENT="$1 letters"#g;

s#EXTENT="few words"#EXTENT="1+ words"#g;
s#EXTENT="line"#EXTENT="1 line"#g;
s#EXTENT="([2-9]) line"#EXTENT="$1 lines"#g;
s#EXTENT="three words"#EXTENT="3 words"#g;
s#EXTENT="word"#EXTENT="1 word"#g;
s#EXTENT="span"#EXTENT="1 span"#g;
s#EXTENT="SPAN"#EXTENT="1 span"#g;
s#EXTENT="page"#EXTENT="1 page"#g;
s#EXTENT="1 word "#EXTENT="1 word"#g;
s#EXTENT="([0-9]+) PAGES"#EXTENT="$1 pages"#g;
s#EXTENT="1 span +"#EXTENT="1 span"#g;
s#EXTENT="1 sspan"#EXTENT="1 span"#g;
s#EXTENT="1 ?\+"#EXTENT="1+ letters"#g;





s#EXTENT="1 lines"#EXTENT="1 line"#g;
s#EXTENT="1 letters"#EXTENT="1 letter"#g;
s#EXTENT="2 letter"#EXTENT="2 letters"#g;
s#EXTENT="1 page \+"#EXTENT="1+ pages"#g;
s#EXTENT="2 words "#EXTENT="2 words"#g;
s#EXTENT="3 word"#EXTENT="3 words"#g;
s#EXTENT="1 pages"#EXTENT="1 page"#g;
s#EXTENT="1 ?PAGE"#EXTENT="1 page"#g;
s#EXTENT="note"#EXTENT="1 span"#g;
s#EXTENT="1 number"#EXTENT="1 letter"#g;
s#EXTENT="([0-9]+\+?) PAGES"#EXTENT="$1 pages"#g;
s#EXTENT="page 1"#EXTENT="1 page"#g;
s#EXTENT="page 1"#EXTENT="1 page"#g;
s#EXTENT="para"#EXTENT="1 paragraph"#g;
s#EXTENT="1 lspan"#EXTENT="1 span"#g;
s#EXTENT="1 wprd"#EXTENT="1 word"#g;
s#EXTENT="1 or more pages"#EXTENT="1+ pages"#g;
s#EXTENT="1 Page"#EXTENT="1 page"#g;
s#EXTENT="0.5 pages"#EXTENT="half a page"#g;
s#EXTENT="1 note"#EXTENT="1 span"#g;
s#EXTENT="several words"#EXTENT="2+ words"#g;
s#EXTENT="several letters"#EXTENT="2+ letters"#g;

s#EXTENT="1\+ letter"#EXTENT="1+ letters"#g;
s#EXTENT="1\+ line"#EXTENT="1+ lines"#g;

s#EXTENT="[cC][hH][uU][nN][kK]"#EXTENT="1 chunk"#g;
s#EXTENT="1 [cC][hH][uU][nN][kK]"#EXTENT="1 chunk"#g;



# fill in some superscripts automatically. do away with superscripted words

s#\^<GAP DESC="illegible"[^>]+EXTENT="1 word"[^>]*>#^<GAP DESC="illegible" EXTENT="1 letter">#g;
s# w\^<GAP DESC="illegible[^>]+> # w^t #g;
s# w\^<GAP DESC="illegible[^>]+>$# w^t #g;
s#^w\^<GAP DESC="illegible[^>]+> # w^t #g;

s# w\^<GAP DESC="illegible[^>]+>out# w^tout#g;
s#^w\^<GAP DESC="illegible[^>]+>out# w^tout#g;
s# w\^<GAP DESC="illegible[^>]+>stand# w^stand#g;
s#^w\^<GAP DESC="illegible[^>]+>stand#w^stand#g;
s#notw\^<GAP DESC="illegible[^>]+>stand#notw^stand#g;

s# w\^<GAP DESC="illegible[^>]+>in# w^in#g;
s#^w\^<GAP DESC="illegible[^>]+>in#w^in#g;
s# w\^<GAP DESC="illegible[^>]+>al# w^al#g;
s#^w\^<GAP DESC="illegible[^>]+>al#w^al#g;

s#(<GAP[^>]+>)#<SPAN CLASS="gap">$1</SPAN>#g;

s#EXTENT="paragraph"#EXTENT="1 paragraph"#g;
s#EXTENT="1 para"#EXTENT="1 paragraph"#g;
s#EXTENT="1\+"#EXTENT="1+ letters"#g;
s#EXTENT="1span"#EXTENT="1 span"#g;
s#EXTENT="2 word"#EXTENT="2 words"#g;
s#EXTENT="1 words"#EXTENT="1 word"#g;
s#EXTENT="1 PAGE"#EXTENT="1 page"#g;
s#EXTENT="1page"#EXTENT="1 page"#g;

s#EXTENT="1 "#EXTENT="1 letter"#g;
s#EXTENT="1 note"#EXTENT="1 span"#g;
s#EXTENT="1  span"#EXTENT="1 span"#g;
s#EXTENT="1"#EXTENT="1 letter"#g;
s#EXTENT="1\+"#EXTENT="1+ letters"#g;
s#EXTENT="1word"#EXTENT="1 word"#g;
s#EXTENT="([2-9])"#EXTENT="$1 letters"#g;
s#EXTENT="([2-9]\+)"#EXTENT="$1 letters"#g;
s#EXTENT="1 plus pages"#EXTENT="1+ pages"#g;
s#EXTENT="2 pages +"#EXTENT="2+ pages"#g;
s#EXTENT="1 + pages"#EXTENT="1+ pages"#g;
s#EXTENT="1\+ page"#EXTENT="1+ pages"#g;
s#EXTENT="1\+ word"#EXTENT="1+ words"#g;
s#EXTENT="([2-9]) page"#EXTENT="$1 pages"#g;
s#EXTENT="([2-9]) pages *\+"#EXTENT="$1+ pages"#g;
s#EXTENT="([1-9][0-9]+) pages *\+"#EXTENT="$1+ pages"#g;
s#EXTENT="([0-9]+)([plws])#EXTENT="$1 $2#g;

s#EXTENT="([0-9]+)pages"#EXTENT="$1 pages"#g;
s#EXTENT="1 san"#EXTENT="1 span"#g;
s#<GAP DESC="missing"[^>]+EXTENT="1 letter"[^>]*>#<GAP DESC="illegible" REASON="missing" EXTENT="1 letter">#g;
s#<GAP DESC="missing"[^>]+EXTENT="([0-9]+ letters)"[^>]*>#<GAP DESC="illegible" REASON="missing" EXTENT="$1">#g;

# END OF GENERAL EXTENTS SCRIPT

#  taken from tcp-convert-view:
#  pfs:2010-03  Change selected pipes to &EOLhyphen;
 
 s#\|#\{pfseol\}#g;
 s#\+#\{pfsEOL\}#g;
 
#  pfs:2010-03  Set aside plus in extent values
 
 s#(EXTENT="[0-9]+)\{pfsEOL\}#$1\{pfsplus\}#g;
 
 s#([a-zA-Z~\;>'\]])\{pfseol\}([a-zA-Z&< \(\^\[])#$1&EOLhyphen;$2#g;
 
 #  pfs:2010-03  Change selected plusses to &EOLunhyphen;
 
 s#([a-zA-Z~\;>'])\{pfsEOL\}([a-zA-Z&< \(\[\^])#$1&EOLunhyphen;$2#g;
 
 #  pfs:2010-03  Restore set-aside plusses and hyphens
 
 s#\{pfsplus\}#+#g;
 s#\{pfsEOL\}#+#g;
 s#\{pfseol\}#|#g;
 
 #  pfs:2010-03   END OF HYPHEN ROUTINE


# GAP preliminaries


# Replace diff. GAPs with different tags

s,<GAP([^>]+)DESC="illegible"([^>]*)>,<GUP$1$2>,gi;
s,<GAP([^>]+)DESC="symbol"([^>]*)>,<GIP$1$2>,gi;
s,<GAP([^>]+)DESC="missing"([^>]*)>,<GYP$1$2>,gi;
s,<GAP([^>]+)DESC="duplicate"([^>]*)>,<GQP$1$2>,gi;
s,<GAP([^>]+)DESC="replacement"([^>]*)>,<GWP$1$2>,gi;
s,<GAP([^>]+)DESC="intruder"([^>]*)>,<GOP$1$2>,gi;
s,<GAP([^>]+)DESC="blank"([^>]*)>,<GZP$1$2>,gi;
s,<GAP([^>]+)DESC="foreign"([^>]*)>,<GEP$1$2>,gi;
s,  +, ,g;
s, +>,>,g;

# Except music and math which can go straight to text because not normally any EXTENTs to worry about

s,<GAP[^>]+DESC="music"[^>]*>,&#x2308; &#x266B; &#x2309;,gi;
s,<GAP[^>]+DESC="math"[^>]*>,&#x2308; math &#x2309;,gi;


# SUB and SUPERSCRIPTS

# Deal with super- and sub-scripted characters by assuming that they are always one character as they are supposed to be and treating illegible, foreign, and symbol gaps as one character and charents as one character

s,\^\^<GUP[^>]*>,<SUB>&x#2022;</SUB>,g;
s,\^\^<G[IE]P[^>]*>,<SUB>&#x2610;</SUB>,g;
s,\^<GUP[^>]*>,<SUP>&x#2022;</SUP>,g;
s,\^<G[IE]P[^>]*>,<SUP>&#x2610;</SUP>,g;
s,\^\^(&[^ ;]+;),<SUB>$1</SUB>,g;
s,\^\^<SPAN CLASS="numeral">([0-9]+)</SPAN>,^^$1,g;
s,\^<SPAN CLASS="numeral">([0-9]+)</SPAN>,^$1,g;
s,\^(&[^ ;]+;),<SUP>$1</SUP>,g;
s,\^\^([a-zA-Z0-9]),<SUB>$1</SUB>,g;
s#\^([\(\)\*\.:;,a-zA-Z0-9])#<SUP>$1</SUP>#g;

# GAP nitty gritty

# Illegible gaps. First the letters, words, and spans, for which we can provide a rough symbolic equivalent

# Letters

s,<GUP[^>]*EXTENT="1 letter"[^>]*>,&#x2022;,gi;
s,<GUP[^>]*EXTENT="2 letters"[^>]*>,&#x2022;&#x2022;,gi;
s,<GUP[^>]*EXTENT="3 letters"[^>]*>,&#x2022;&#x2022;&#x2022;,gi;
s,<GUP[^>]*EXTENT="4 letters"[^>]*>,&#x2022;&#x2022;&#x2022;&#x2022;,gi;
s,<GUP[^>]*EXTENT="5 letters"[^>]*>,&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;,gi;
s,<GUP[^>]*EXTENT="6 letters"[^>]*>,&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;,gi;
s,<GUP[^>]*EXTENT="7 letters"[^>]*>,&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;,gi;
s,<GUP[^>]*EXTENT="8 letters"[^>]*>,&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;,gi;
s,<GUP[^>]*EXTENT="9 letters"[^>]*>,&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;,gi;
s,<GUP[^>]*EXTENT="10 letters"[^>]*>,&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;,gi;
s,<GUP[^>]*EXTENT="11 letters"[^>]*>,&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;,gi;
s,<GUP[^>]*EXTENT="12 letters"[^>]*>,&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;,gi;
s,<GUP[^>]*EXTENT="13 letters"[^>]*>,&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;,gi;
s,<GUP[^>]*EXTENT="14 letters"[^>]*>,&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;,gi;
s,<GUP[^>]*EXTENT="15 letters"[^>]*>,&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;,gi;

s,<GUP[^>]*EXTENT="1\+ letters"[^>]*>,&#x2026;,gi;
s,<GUP[^>]*EXTENT="2\+ letters"[^>]*>,&#x2022;&#x2026;,gi;
s,<GUP[^>]*EXTENT="3\+ letters"[^>]*>,&#x2022;&#x2022;&#x2026;,gi;
s,<GUP[^>]*EXTENT="4\+ letters"[^>]*>,&#x2022;&#x2022;&#x2022;&#x2022;&#x2026;,gi;
s,<GUP[^>]*EXTENT="5\+ letters"[^>]*>,&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2026;,gi;
s,<GUP[^>]*EXTENT="6\+ letters"[^>]*>,&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2026;,gi;
s,<GUP[^>]*EXTENT="7\+ letters"[^>]*>,&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2026;,gi;
s,<GUP[^>]*EXTENT="8\+ letters"[^>]*>,&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2026;,gi;
s,<GUP[^>]*EXTENT="9\+ letters"[^>]*>,&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2026;,gi;
s,<GUP[^>]*EXTENT="10\+ letters"[^>]*>,&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2022;&#x2026;,gi;

# Spans

s,<GUP[^>]*EXTENT="1 span"[^>]*>,&#x2308; &#x2026; &#x2309;,gi;

# Chunk

s,<GUP[^>]*EXTENT="1 chunk"[^>]*>,&#x2026;,gi;


# Words up to 10

s,<GUP[^>]*EXTENT="1 word"[^>]*>,&#x2308;&#x25CA;&#x2309;,gi;
s,<GUP[^>]*EXTENT="2 words"[^>]*>,&#x2308;&#x25CA;&#x25CA;&#x2309;,gi;
s,<GUP[^>]*EXTENT="3 words"[^>]*>,&#x2308;&#x25CA;&#x25CA;&#x25CA;&#x2309;,gi;
s,<GUP[^>]*EXTENT="4 words"[^>]*>,&#x2308;&#x25CA;&#x25CA;&#x25CA;&#x25CA;&#x2309;,gi;
s,<GUP[^>]*EXTENT="5 words"[^>]*>,&#x2308;&#x25CA;&#x25CA;&#x25CA;&#x25CA;&#x25CA;&#x2309;,gi;
s,<GUP[^>]*EXTENT="6 words"[^>]*>,&#x2308;&#x25CA;&#x25CA;&#x25CA;&#x25CA;&#x25CA;&#x25CA;&#x2309;,gi;
s,<GUP[^>]*EXTENT="7 words"[^>]*>,&#x2308;&#x25CA;&#x25CA;&#x25CA;&#x25CA;&#x25CA;&#x25CA;&#x25CA;&#x2309;,gi;
s,<GUP[^>]*EXTENT="8 words"[^>]*>,&#x2308;&#x25CA;&#x25CA;&#x25CA;&#x25CA;&#x25CA;&#x25CA;&#x25CA;&#x25CA;&#x2309;,gi;
s,<GUP[^>]*EXTENT="9 words"[^>]*>,&#x2308;&#x25CA;&#x25CA;&#x25CA;&#x25CA;&#x25CA;&#x25CA;&#x25CA;&#x25CA;&#x25CA;&#x2309;,gi;
s,<GUP[^>]*EXTENT="10 words"[^>]*>,&#x2308;&#x25CA;&#x25CA;&#x25CA;&#x25CA;&#x25CA;&#x25CA;&#x25CA;&#x25CA;&#x25CA;&#x25CA;&#x2309;,gi;

# All other illegible extent values (including all with incorporated "+" sign, e.g. "2+ words") left as plain text

s,<GUP[^>]*EXTENT="([^"]+)"[^>]*>,&#x2308;$1 illegible&#x2309;,gi;

# Symbol gaps that can be represented by special characters

s,<GIP[^>]*EXTENT="1 letter"[^>]*>,&#x2610;,gi;
s,<GIP>,&#x2610;,gi;
s,<GIP[^>]*EXTENT="2 letters"[^>]*>,&#x2610;&#x2610;,gi;
s,<GIP[^>]*EXTENT="3 letters"[^>]*>,&#x2610;&#x2610;&#x2610;,gi;
s,<GIP[^>]*EXTENT="4 letters"[^>]*>,&#x2610;&#x2610;&#x2610;&#x2610;,gi;
s,<GIP[^>]*EXTENT="5 letters"[^>]*>,&#x2610;&#x2610;&#x2610;&#x2610;&#x2610;,gi;
s,<GIP[^>]*EXTENT="6 letters"[^>]*>,&#x2610;&#x2610;&#x2610;&#x2610;&#x2610;&#x2610;,gi;

# Gaps without special representation

# With extent values

s,<GIP[^>]*EXTENT="([^"]+)"[^>]*>,&#x2308;symbol ($1)&#x2309;,gi;
s,<GYP[^>]*EXTENT="([^"]+)"[^>]*>,&#x2308;$1 missing&#x2309;,gi;
s,<GQP[^>]*EXTENT="([^"]+)"[^>]*>,&#x2308;$1 duplicate&#x2309;,gi;
s,<GWP[^>]*EXTENT="([^"]+)"[^>]*>,&#x2308;$1 supplied later&#x2309;,gi;
s,<GOP[^>]*EXTENT="([^"]+)"[^>]*>,&#x2308;$1 inserted from a different book&#x2309;,gi;
s,<GZP[^>]*EXTENT="([^"]+)"[^>]*>,&#x2308;$1 left blank&#x2309;,gi;
s,<GEP[^>]*EXTENT="([^"]+)"[^>]*>,&#x2308;$1 in non-Latin alphabet&#x2309;,gi;

# Without extent values

s,<GYP *>,&#x2308;missing page&#x2309;,gi;
s,<GQP *>,&#x2308;duplicate page&#x2309;,gi;
s,<GWP *>,&#x2308;page supplied later&#x2309;,gi;
s,<GOP *>,&#x2308;page inserted from different book&#x2309;,gi;
# s,<GZP *>,&#x2308;left blank&#x2309;,gi;
s,<GZP *>, _____ ,gi;
s,<GEP *>,&#x2308;foreign&#x2309;,gi;

# Turn any surviving <GAPs back to {GAP 

s,<G[UIYQWOZE]P([^>]*)>,{GAP$1},g;

# Convert tilde to combining macron

s,\~,&#x0304;,g;





# characters

# all except long s are taken directly from charmap.htm (the table of
# displayable equivalents used by the 'displayable' form of the XML output)
# Long s is retained here as a long s; regular displayable output merges it
# with ordinary round s.

s,&s;,&#x0283;,g;

# from charmap.htm (itself auto-generated from charmap.sgm)
# updated 2010-06

s/&ahacek;/&#x01CE;/g;
s/&bstrok;/&#x0180;/g;
s/&cmacr;/&#x0063;&#x0304;/g;
s/&Ddotb;/&#x1E0C;/g;
s/&ddotb;/&#x1E0D;/g;
s/&ebreve;/&#x0115;/g;
s/&Ghacek;/&#x01E6;/g;
s/&ghacek;/&#x01E7;/g;
s/&Ibreve;/&#x012C;/g;
s/&ibreve;/&#x012D;/g;
s/&Ldotb;/&#x1E36;/g;
s/&ldotb;/&#x1E37;/g;
s/&obreve;/&#x014F;/g;
s/&ohacek;/&#x01D2;/g;
s/&pgrave;/&#x0070;&#x0300;/g;
s/&qacute;/&#x0071;&#x0301;/g;
s/&Udotb;/&#x1EE4;/g;
s/&udotb;/&#x1EE5;/g;
s/&uhacek;/&#x01D4;/g;
s/&vbreve;/&#x0076;&#x0306;/g;
s/&ydot;/&#x1E8F;/g;
s/&yhook;/&#x01B4;/g;
s/&Yhook;/&#x01B3;/g;
s/&ymacr;/&#x0079;&#x0304;/g;
s/&ohkact;/&#x01EB;&#x0301;/g;
s/&usd5;/{inverted 5}/g;
s/&YOGH;/&#x01B7;/g;
s/&yogh;/&#x0292;/g;
s/&wyn;/&#x01BF;/g;
s/&WYN;/&#x01F7;/g;
s/&w;/{w}/g;
s/&W;/{W}/g;
s/&y;/{y}/g;
s/&Y;/{Y}/g;
s/&V;/&#x01B2;/g;
s/&alef;/&#x05D0;/g;
s/&ayin;/&#x05E2;/g;
s/&bet;/&#x05D1;/g;
s/&dalet;/&#x05D3;/g;
s/&finalkaf;/&#x05DA;/g;
s/&finalmem;/&#x05DD;/g;
s/&finalnun;/&#x05DF;/g;
s/&finalpe;/&#x05E3;/g;
s/&finaltsadi;/&#x05E5;/g;
s/&gimel;/&#x05D2;/g;
s/&he;/&#x05D4;/g;
s/&het;/&#x05D7;/g;
s/&kaf;/&#x05DB;/g;
s/&lamed;/&#x05DC;/g;
s/&mem;/&#x05DE;/g;
s/&nun;/&#x05E0;/g;
s/&pe;/&#x05E4;/g;
s/&qof;/&#x05E7;/g;
s/&resh;/&#x05E8;/g;
s/&samekh;/&#x05E1;/g;
s/&shin;/&#x05E9;/g;
s/&tav;/&#x05EA;/g;
s/&tsadi;/&#x05E6;/g;
s/&tet;/&#x05D8;/g;
s/&vav;/&#x05D5;/g;
s/&yod;/&#x05D9;/g;
s/&zayin;/&#x05D6;/g;
s/&ougr;/{ou}/g;
s/&Slungr;/&#x0297;/g;
s/&finallamed;/&#xFB25;/g;
s/&DIGgr;/&#x03DC;/g;
s/&KOgr;/&#x03DE;/g;
s/&STgr;/&#x03DA;/g;
s/&SAMgr;/&#x03E0;/g;
s/&keraia;/&#x0374;/g;
s/&lkeraia;/&#x0375;/g;
s/&thscrgr;/&#x03D1;/g;
s/&Sgr-rev;/&#x03A3;/g;
s/&Ggr-rev;/&#x0393;/g;
s/&Rgr-rev;/&#x03A1;/g;
s/&abbus;/{bus}/g;
s/&abbrapo;/&#x2019;/g;
s/&abcon;/{con}/g;
s/&aber;/{er}/g;
s/&abis;/{is}/g;
s/&abper;/{per}/g;
s/&abpr;/&#x2118;/g;
s/&abpre;/{pre}/g;
s/&abpri;/{pri}/g;
s/&abPRI;/{PRI}/g;
s/&abpro;/{pro}/g;
s/&abqu;/{qu}/g;
s/&abquam;/{quam}/g;
s/&abque;/{que}/g;
s/&abQUE;/{QUE}/g;
s/&abqui;/{qui}/g;
s/&abQUOD;/{QUOD}/g;
s/&abquod;/{quod}/g;
s/&abris;/{ris}/g;
s/&abrum;/{rum}/g;
s/&absed;/{sed}/g;
s/&abser;/{ser}/g;
s/&abur;/{ur}/g;
s/&abus;/{us}/g;
s/&abUS;/{US}/g;
s/&dhook;/&#x0064;&#x0314;/g;
s/&es;/{es}/g;
s/&etc;/{etc}/g;
s/&qbar;/&#x0071;&#x0332;/g;
s/&sloop;/s&#x0304;/g;
s/&that;/{that}/g;
s/&z;/&#x0290;/g;
s/&chirho;/&#x2627;/g;
s/&schwa;/&#x0259;/g;
s/&yhwh;/&#x05D9;&#x05D4;&#x05D5;&#x05D4;/g;
s/&shilling;/&#x0283;/g;
s/&Xbar;/&#x0058;&#x0336;/g;
s/&resp;/&#x211F;/g;
s/&vers;/&#x2123;/g;
s/&cmbacute;/&#x0301;/g;
s/&cmbbreve;/&#x0306;/g;
s/&cmbcaron;/&#x030C;/g;
s/&cmbcedil;/&#x0327;/g;
s/&cmbcirc;/&#x0302;/g;
s/&cmbdblac;/&#x030B;/g;
s/&cmbdot;/&#x0307;/g;
s/&cmbdotb;/&#x0323;/g;
s/&cmbgrave;/&#x0300;/g;
s/&cmbmacr;/&#x0304;/g;
s/&cmbogon;/&#x0328;/g;
s/&cmbring;/&#x030A;/g;
s/&cmbtilde;/&#x0303;/g;
s/&cmbuml;/&#x0308;/g;
s/&cmbtildeover;/&#x0334;/g;
s/&cmbstrokeover;/&#x0335;/g;
s/&cmbSTROKEover;/&#x0336;/g;
s/&cmbvirguleover;/&#x0337;/g;
s/&cmbVIRGULEover;/&#x0338;/g;
s/&cmbperispo;/&#x0342;/g;
s/&cmbcommaa;/&#x0313;/g;
s/&cmbrcomma;/&#x0314;/g;
s/&spcacute;/&#x00B4;/g;
s/&spcbreve;/&#x02D8;/g;
s/&spccaron;/&#x02C7;/g;
s/&spccedil;/&#x00B8;/g;
s/&spccirc;/&#x005E;/g;
s/&spcdblac;/&#x02DD;/g;
s/&spcdot;/&#x02D9;/g;
s/&spcdotb;/&#x2024;/g;
s/&spcgrave;/&#x0060;/g;
s/&spcmacr;/&#x00AF;/g;
s/&spcogon;/&#x02DB;/g;
s/&spcring;/&#x02DA;/g;
s/&spctilde;/&#x02DC;/g;
s/&spcuml;/&#x00A8;/g;
s/&spcperispo;/&#x1FC0;/g;
s/&spccommaa;/&#x1FBF;/g;
s/&spcrcomma;/&#x1FFE;/g;
s/&anchor;/{anchor}/g;
s/&circdot;/&#x2299;/g;
s/&circle;/&#x25E6;/g;
s/&biglsquo;/&#x275B;/g;
s/&circledplus;/&#x2295;/g;
s/&circledtimes;/&#x2297;/g;
s/&cross;/&#x271A;/g;
s/&diamond;/&#x25C6;/g;
s/&die1;/{&#x00B7;}/g;
s/&die2;/{&#x02D9;.}/g;
s/&die3;/{&#x02D9;&#x00B7;.}/g;
s/&die4;/{::}/g;
s/&die5;/{:&#x00B7;:}/g;
s/&die6;/{:::}/g;
s/&dindx;/&#x261F;/g;
s/&dtridot;/&#x2235;/g;
s/&dtristar;/{inverted &#x2042;}/g;
s/&fivedash;/{fivedash}/g;
s/&fivedot;/{fivedot}/g;
s/&fleurdelys;/{fleur-de-lys}/g;
s/&flower;/&#x2740;/g;
s/&flowerf;/&#x273F;/g;
s/&hDagger;/{horizontal &#x2021;}/g;
s/&heart;/&#x2661;/g;
s/&idagger;/{inverted &#x2020;}/g;
s/&latcross;/&#x271D;/g;
s/&ldagger;/{left &#x2020;}/g;
s/&leaf;/&#x2767;/g;
s/&lindx;/&#x261C;/g;
s/&quaddot;/&#x2237;/g;
s/&quaddotl;/{quaddotl}/g;
s/&quaddotr;/{quaddotr}/g;
s/&rdagger;/{right &#x2020;}/g;
s/&rindx;/&#x261E;/g;
s/&rsect;/{reversed &#x00A7;}/g;
s/&saltire;/&#x2613;/g;
s/&tdagger;/&#x2021;&#x0336;/g;
s/&trefoil;/&#x2663;/g;
s/&triapost;/&#x0027;&#x002C;&#x0027;/g;
s/&tridot;/&#x2234;/g;
s/&tristar;/&#x2042;/g;
s/&triwdot;/&#x25EC;/g;
s/&uindx;/&#x261D;/g;
s/&whbull;/&#x25E6;/g;
s/&endq;/&#x201D;/g;
s/&lpara;/{reversed &#x00B6;}/g;
s/&lquest;/{reversed ?}/g;
s/&lpunctel;/&#x061B;/g;
s/&lsemicol;/{reversed ;}/g;
s/&punc;/&#x25AA;/g;
s/&punctel;/{punctel}/g;
s/&startq;/&#x201C;/g;
s/&leftblank;/{left blank}/g;
s/&closeup;/&#x00A0;&#x00A0;&#x0311;&#x032E;/g;
s/&Earth;/&#x2641;/g;
s/&Jupit;/&#x2643;/g;
s/&Mars;/&#x2642;/g;
s/&Merc;/&#x263F;/g;
s/&Moon;/&#x263D;/g;
s/&Saturn;/&#x2644;/g;
s/&Sun;/&#x2609;/g;
s/&rayedSun;/&#x263C;/g;
s/&Venus;/&#x2640;/g;
s/&Aquar;/&#x2652;/g;
s/&Aries;/&#x2648;/g;
s/&Cancer;/&#x264B;/g;
s/&Capri;/&#x2651;/g;
s/&Gemini;/&#x264A;/g;
s/&Leo;/&#x264C;/g;
s/&Libra;/&#x264E;/g;
s/&Pisces;/&#x2653;/g;
s/&Sagitt;/&#x2650;/g;
s/&Scorp;/&#x264F;/g;
s/&Taurus;/&#x2649;/g;
s/&Virgo;/&#x264D;/g;
s/&conjunction;/&#x260C;/g;
s/&Moonfirst;/&#x263D;/g;
s/&Moonlast;/&#x263E;/g;
s/&Moonnew;/&#x25CF;/g;
s/&Moonround;/&#x274D;/g;
s/&northnode;/&#x260A;/g;
s/&opposition;/&#x260D;/g;
s/&quadrine;/&#x25A1;/g;
s/&semisextile;/&#x22BB;/g;
s/&sextile;/&#x2736;/g;
s/&southnode;/&#x260B;/g;
s/&trine;/&#x25B3;/g;
s/&trine2;/&#x22A6;/g;
s/&stella;/&#x2734;/g;
s/&halfcross;/&#x22A2;/g;
s/&rhalfcross;/&#x22A3;/g;
s/&dram;/&#x0292;/g;
s/&ounce;/&#x2125;/g;
s/&scruple;/&#x2108;/g;
s/&abprecipi;/{precipi}/g;
s/&absubli;/{subli}/g;
s/&afortis;/{aqua fortis}/g;
s/&air;/{air}/g;
s/&alembic;/{alembic}/g;
s/&alum;/{alum}/g;
s/&alum2;/{alum}/g;
s/&antimony;/{antimony}/g;
s/&aregis;/{aqua regis}/g;
s/&arsenic;/{arsenic}/g;
s/&arsenic2;/{arsenic}/g;
s/&ashes;/{ashes}/g;
s/&blood;/{blood}/g;
s/&cinnabar2;/{cinnabar}/g;
s/&cinnabar3;/{cinnabar}/g;
s/&day;/{day}/g;
s/&earth;/{earth}/g;
s/&fire;/{fire}/g;
s/&glass;/{glass}/g;
s/&night;/{night}/g;
s/&nitre;/{nitre}/g;
s/&oil;/{oil}/g;
s/&potash;/{potash}/g;
s/&purify;/{purify}/g;
s/&quicklime;/&#x2295;&#x031F;/g;
s/&salarmon;/{sal armoniac}/g;
s/&salarmon2;/&#x2295;/g;
s/&salt;/{salt}/g;
s/&saltgemme;/&#x2649;/g;
s/&sulphur;/{sulphur}/g;
s/&talc;/{talc}/g;
s/&tartar;/{tartar}/g;
s/&urine;/&#x22A1;/g;
s/&vinedist;/{vinegar distilled}/g;
s/&vinegar;/{vinegar}/g;
s/&vitriol;/{vitriol}/g;
s/&vitriol2;/{vitriol}/g;
s/&vitriol3;/{vitriol}/g;
s/&water;/{water}/g;
s/&wax;/{wax}/g;
s/&ang;/&#x2220;/g;
s/&angulum;/{angulum}/g;
s/&arc;/&#x2312;/g;
s/&decimalc;/&#x002E;/g;
s/&decimalL;/&#x230A;/g;
s/&divisor;/{divisor}/g;
s/&geoprop;/&#x223A;/g;
s/&higherterm;/{higher term}/g;
s/&higherthan;/&#x2227;/g;
s/&langle;/{left angle}/g;
s/&lazyS;/&#x223D;/g;
s/&lessthan;/&#x2039;/g;
s/&logarithm;/{logarithm}/g;
s/&lowerterm;/&#x22BD;/g;
s/&lowerthan;/&#x2228;/g;
s/&morethan;/&#x203A;/g;
s/&multiplier;/{multiplier}/g;
s/&potestas;/{potestas}/g;
s/&powerof1;/{powerof1}/g;
s/&powerof2;/{powerof2}/g;
s/&powerof3;/{powerof3}/g;
s/&powerof4;/{powerof4}/g;
s/&powerof5;/{powerof5}/g;
s/&powerof6;/{powerof6}/g;
s/&pprime;/&#x0027;&#x0027;&#x0027;&#x0027;&#x0027;/g;
s/&product;/{product}/g;
s/&proportion;/&#x2237;/g;
s/&proportion2;/&#x221D;/g;
s/&qprime;/&#x0027;&#x0027;&#x0027;&#x0027;/g;
s/&quotient;/{quotient}/g;
s/&ratio;/&#x2236;/g;
s/&ration;/&#x211B;/g;
s/&revC;/&#x0186;/g;
s/&rn10000;/&#x2182;/g;
s/&rn100000;/{roman 100000}/g;
s/&rn1000000;/{roman 1000000}/g;
s/&rn5000;/&#x2181;/g;
s/&rn50000;/{roman 50000}/g;
s/&rn500000;/{roman 500000}/g;
s/&barline;/{barline}/g;
s/&Barline;/{double barline}/g;
s/&barlinef;/{final barline}/g;
s/&cclef;/{C-clef}/g;
s/&commonTime-adagio;/{common time (adagio)}/g;
s/&commonTime-allegro;/{common time (allegro)}/g;
s/&commonTime-largo;/{common time (largro)}/g;
s/&direct;/{direct}/g;
s/&fclef;/{F-clef}/g;
s/&fermata;/{fermata}/g;
s/&fermatab;/{fermata below}/g;
s/&gclef;/{G-clef}/g;
s/&lrepeat;/{left repeat}/g;
s/&musicBrace;/{musical brace}/g;
s/&musicBracket;/{musical bracket}/g;
s/&musicNote;/&#x2669;/g;
s/&notebreve;/{breve note}/g;
s/&notecrotchet;/{crotchet note}/g;
s/&notedemisemiquaver;/{demisemiquaver note}/g;
s/&noteEighth;/&#x266A;/g;
s/&noteHalf;/{half note}/g;
s/&notelarge;/{large note}/g;
s/&notelong;/{long note}/g;
s/&noteminim;/{minim note}/g;
s/&noteQuarter;/&#x2669;/g;
s/&notequaver;/{quaver note}/g;
s/&notesemibreve;/{semibreve note}/g;
s/&notesemiquaver;/{semiquaver note}/g;
s/&noteWhole;/{whole note}/g;
s/&repeat;/{repeat}/g;
s/&restbreve;/{breve rest}/g;
s/&restcrotchet;/{crotchet rest}/g;
s/&restdemisemiquaver;/{demisemiquaver rest}/g;
s/&restlarge;/{large rest}/g;
s/&restlong;/{long rest}/g;
s/&restminim;/{minim rest}/g;
s/&restquaver;/{quaver rest}/g;
s/&restsemibreve;/{semibreve rest}/g;
s/&restsemiquaver;/{semiquaver rest}/g;
s/&rrepeat;/{right repeat}/g;
s/&sharp2;/&#x266F;/g;
s/&sharpb;/{sharpb}/g;
s/&slur;/&#x00A0;&#x035C;&#x00A0;/g;
s/&timeimperf-prolatimperf;/{timeimperf-prolatimperf}/g;
s/&timeimperf-prolatimperf-rev;/{timeimperf-prolatimperf-rev}/g;
s/&timeimperf-prolatimperf-rev-str;/{timeimperf-prolatimperf-rev-str}/g;
s/&timeimperf-prolatimperf-str;/{timeimperf-prolatimperf-str}/g;
s/&timeimperf-prolatimperf-x;/{timeimperf-prolatimperf-x}/g;
s/&timeimperf-prolatperf;/{timeimperf-prolatperf}/g;
s/&timeimperf-prolatperf-rev;/{timeimperf-prolatperf-rev}/g;
s/&timeimperf-prolatperf-rev-str;/{timeimperf-prolatperf-rev-str}/g;
s/&timeimperf-prolatperf-str;/{timeimperf-prolatperf-str}/g;
s/&timeperf-prolatimperf;/{timeperf-prolatimperf}/g;
s/&timeperf-prolatimperf-str;/{timeperf-prolatimperf-str}/g;
s/&timeperf-prolatimperf-x;/{timeperf-prolatimperf-x}/g;
s/&timeperf-prolatperf;/{timeperf-prolatperf}/g;
s/&timeperf-prolatperf-str;/{timeperf-prolatperf-str}/g;
s/&vocaljoin;/{vocal join}/g;
s/&aacute;/&#x00E1;/g;
s/&Aacute;/&#x00C1;/g;
s/&acirc;/&#x00E2;/g;
s/&Acirc;/&#x00C2;/g;
s/&agrave;/&#x00E0;/g;
s/&Agrave;/&#x00C0;/g;
s/&aring;/&#x00E5;/g;
s/&Aring;/&#x00C5;/g;
s/&atilde;/&#x00E3;/g;
s/&Atilde;/&#x00C3;/g;
s/&auml;/&#x00E4;/g;
s/&Auml;/&#x00C4;/g;
s/&aelig;/&#x00E6;/g;
s/&AElig;/&#x00C6;/g;
s/&ccedil;/&#x00E7;/g;
s/&Ccedil;/&#x00C7;/g;
s/&eth;/&#x00F0;/g;
s/&ETH;/&#x00D0;/g;
s/&eacute;/&#x00E9;/g;
s/&Eacute;/&#x00C9;/g;
s/&ecirc;/&#x00EA;/g;
s/&Ecirc;/&#x00CA;/g;
s/&egrave;/&#x00E8;/g;
s/&Egrave;/&#x00C8;/g;
s/&euml;/&#x00EB;/g;
s/&Euml;/&#x00CB;/g;
s/&iacute;/&#x00ED;/g;
s/&Iacute;/&#x00CD;/g;
s/&icirc;/&#x00EE;/g;
s/&Icirc;/&#x00CE;/g;
s/&igrave;/&#x00EC;/g;
s/&Igrave;/&#x00CC;/g;
s/&iuml;/&#x00EF;/g;
s/&Iuml;/&#x00CF;/g;
s/&ntilde;/&#x00F1;/g;
s/&Ntilde;/&#x00D1;/g;
s/&oacute;/&#x00F3;/g;
s/&Oacute;/&#x00D3;/g;
s/&ocirc;/&#x00F4;/g;
s/&Ocirc;/&#x00D4;/g;
s/&ograve;/&#x00F2;/g;
s/&Ograve;/&#x00D2;/g;
s/&oslash;/&#x00F8;/g;
s/&Oslash;/&#x00D8;/g;
s/&otilde;/&#x00F5;/g;
s/&Otilde;/&#x00D5;/g;
s/&ouml;/&#x00F6;/g;
s/&Ouml;/&#x00D6;/g;
s/&szlig;/&#x00DF;/g;
s/&thorn;/&#x00FE;/g;
s/&THORN;/&#x00DE;/g;
s/&uacute;/&#x00FA;/g;
s/&Uacute;/&#x00DA;/g;
s/&ucirc;/&#x00FB;/g;
s/&Ucirc;/&#x00DB;/g;
s/&ugrave;/&#x00F9;/g;
s/&Ugrave;/&#x00D9;/g;
s/&uuml;/&#x00FC;/g;
s/&Uuml;/&#x00DC;/g;
s/&yacute;/&#x00FD;/g;
s/&Yacute;/&#x00DD;/g;
s/&yuml;/&#x00FF;/g;
s/&abreve;/&#x0103;/g;
s/&Abreve;/&#x0102;/g;
s/&amacr;/&#x0101;/g;
s/&Amacr;/&#x0100;/g;
s/&aogon;/&#x0105;/g;
s/&Aogon;/&#x0104;/g;
s/&cacute;/&#x0107;/g;
s/&Cacute;/&#x0106;/g;
s/&ccaron;/&#x010D;/g;
s/&Ccaron;/&#x010C;/g;
s/&ccirc;/&#x0109;/g;
s/&Ccirc;/&#x0108;/g;
s/&cdot;/&#x010B;/g;
s/&Cdot;/&#x010A;/g;
s/&dcaron;/&#x010F;/g;
s/&Dcaron;/&#x010E;/g;
s/&dstrok;/&#x0111;/g;
s/&Dstrok;/&#x0110;/g;
s/&ecaron;/&#x011B;/g;
s/&Ecaron;/&#x011A;/g;
s/&edot;/&#x0117;/g;
s/&Edot;/&#x0116;/g;
s/&emacr;/&#x0113;/g;
s/&Emacr;/&#x0112;/g;
s/&eogon;/&#x0119;/g;
s/&Eogon;/&#x0118;/g;
s/&gacute;/&#x01F5;/g;
s/&gbreve;/&#x011F;/g;
s/&Gbreve;/&#x011E;/g;
s/&Gcedil;/&#x0122;/g;
s/&gcirc;/&#x011D;/g;
s/&Gcirc;/&#x011C;/g;
s/&gdot;/&#x0121;/g;
s/&Gdot;/&#x0120;/g;
s/&hcirc;/&#x0125;/g;
s/&Hcirc;/&#x0124;/g;
s/&hstrok;/&#x0127;/g;
s/&Hstrok;/&#x0126;/g;
s/&Idot;/&#x0130;/g;
s/&Imacr;/&#x012A;/g;
s/&imacr;/&#x012B;/g;
s/&ijlig;/&#x0133;/g;
s/&IJlig;/&#x0132;/g;
s/&inodot;/&#x0131;/g;
s/&iogon;/&#x012F;/g;
s/&Iogon;/&#x012E;/g;
s/&itilde;/&#x0129;/g;
s/&Itilde;/&#x0128;/g;
s/&jcirc;/&#x0135;/g;
s/&Jcirc;/&#x0134;/g;
s/&kcedil;/&#x0137;/g;
s/&Kcedil;/&#x0136;/g;
s/&kgreen;/&#x0138;/g;
s/&lacute;/&#x013A;/g;
s/&Lacute;/&#x0139;/g;
s/&lcaron;/&#x013E;/g;
s/&Lcaron;/&#x013D;/g;
s/&lcedil;/&#x013C;/g;
s/&Lcedil;/&#x013B;/g;
s/&lmidot;/&#x0140;/g;
s/&Lmidot;/&#x013F;/g;
s/&lstrok;/&#x0142;/g;
s/&Lstrok;/&#x0141;/g;
s/&nacute;/&#x0144;/g;
s/&Nacute;/&#x0143;/g;
s/&eng;/&#x014B;/g;
s/&ENG;/&#x014A;/g;
s/&napos;/&#x0149;/g;
s/&ncaron;/&#x0148;/g;
s/&Ncaron;/&#x0147;/g;
s/&ncedil;/&#x0146;/g;
s/&Ncedil;/&#x0145;/g;
s/&odblac;/&#x0151;/g;
s/&Odblac;/&#x0150;/g;
s/&Omacr;/&#x014C;/g;
s/&omacr;/&#x014D;/g;
s/&oelig;/&#x0153;/g;
s/&OElig;/&#x0152;/g;
s/&racute;/&#x0155;/g;
s/&Racute;/&#x0154;/g;
s/&rcaron;/&#x0159;/g;
s/&Rcaron;/&#x0158;/g;
s/&rcedil;/&#x0157;/g;
s/&Rcedil;/&#x0156;/g;
s/&sacute;/&#x015B;/g;
s/&Sacute;/&#x015A;/g;
s/&scaron;/&#x0161;/g;
s/&Scaron;/&#x0160;/g;
s/&scedil;/&#x015F;/g;
s/&Scedil;/&#x015E;/g;
s/&scirc;/&#x015D;/g;
s/&Scirc;/&#x015C;/g;
s/&tcaron;/&#x0165;/g;
s/&Tcaron;/&#x0164;/g;
s/&tcedil;/&#x0163;/g;
s/&Tcedil;/&#x0162;/g;
s/&tstrok;/&#x0167;/g;
s/&Tstrok;/&#x0166;/g;
s/&ubreve;/&#x016D;/g;
s/&Ubreve;/&#x016C;/g;
s/&udblac;/&#x0171;/g;
s/&Udblac;/&#x0170;/g;
s/&umacr;/&#x016B;/g;
s/&Umacr;/&#x016A;/g;
s/&uogon;/&#x0173;/g;
s/&Uogon;/&#x0172;/g;
s/&uring;/&#x016F;/g;
s/&Uring;/&#x016E;/g;
s/&utilde;/&#x0169;/g;
s/&Utilde;/&#x0168;/g;
s/&wcirc;/&#x0175;/g;
s/&Wcirc;/&#x0174;/g;
s/&ycirc;/&#x0177;/g;
s/&Ycirc;/&#x0176;/g;
s/&Yuml;/&#x0178;/g;
s/&zacute;/&#x017A;/g;
s/&Zacute;/&#x0179;/g;
s/&zcaron;/&#x017E;/g;
s/&Zcaron;/&#x017D;/g;
s/&zdot;/&#x017C;/g;
s/&Zdot;/&#x017B;/g;
s/&frac12;/&#x00BD;/g;
s/&frac14;/&#x00BC;/g;
s/&frac34;/&#x00BE;/g;
s/&frac18;/&#x215B;/g;
s/&frac38;/&#x215C;/g;
s/&frac58;/&#x215D;/g;
s/&frac78;/&#x215E;/g;
s/&sup1;/&#x00B9;/g;
s/&sup2;/&#x00B2;/g;
s/&sup3;/&#x00B3;/g;
s/&plus;/&#x002B;/g;
s/&plusmn;/&#x00B1;/g;
s/&equals;/&#x003D;/g;
s/&divide;/&#x00F7;/g;
s/&times;/&#x00D7;/g;
s/&curren;/&#x00A4;/g;
s/&pound;/&#x00A3;/g;
s/&dollar;/&#x0024;/g;
s/&cent;/&#x00A2;/g;
s/&yen;/&#x00A5;/g;
s/&num;/&#x0023;/g;
s/&percnt;/&#x0025;/g;
s/&ast;/&#x002A;/g;
s/&commat;/&#x0040;/g;
s/&lsqb;/&#x005B;/g;
s/&bsol;/&#x005C;/g;
s/&rsqb;/&#x005D;/g;
s/&lcub;/&#x007B;/g;
s/&horbar;/&#x2015;/g;
s/&verbar;/&#x007C;/g;
s/&rcub;/&#x007D;/g;
s/&micro;/&#x00B5;/g;
s/&ohm;/&#x2126;/g;
s/&deg;/&#x00B0;/g;
s/&ordm;/&#x00BA;/g;
s/&ordf;/&#x00AA;/g;
s/&sect;/&#x00A7;/g;
s/&para;/&#x00B6;/g;
s/&middot;/&#x00B7;/g;
s/&larr;/&#x2190;/g;
s/&rarr;/&#x2192;/g;
s/&uarr;/&#x2191;/g;
s/&darr;/&#x2193;/g;
s/&copy;/&#x00A9;/g;
s/&reg;/&#x00AE;/g;
s/&trade;/&#x2122;/g;
s/&not;/&#x00AC;/g;
s/&sung;/&#x2669;/g;
s/&excl;/&#x0021;/g;
s/&iexcl;/&#x00A1;/g;
s/&lpar;/&#x0028;/g;
s/&rpar;/&#x0029;/g;
s/&comma;/&#x002C;/g;
s/&lowbar;/&#x005F;/g;
s/&hyphen;/&#x002D;/g;
s/&period;/&#x002E;/g;
s/&sol;/&#x002F;/g;
s/&colon;/&#x003A;/g;
s/&semi;/&#x003B;/g;
s/&quest;/&#x003F;/g;
s/&iquest;/&#x00BF;/g;
s/&laquo;/&#x00AB;/g;
s/&raquo;/&#x00BB;/g;
s/&lsquo;/&#x2018;/g;
s/&rsquo;/&#x2019;/g;
s/&ldquo;/&#x201C;/g;
s/&rdquo;/&#x201D;/g;
s/&nbsp;/&#x00A0;/g;
s/&shy;/&#x00AD;/g;
s/&emsp;/&#x2003;/g;
s/&ensp;/&#x2002;/g;
s/&emsp13;/&#x2004;/g;
s/&emsp14;/&#x2005;/g;
s/&numsp;/&#x2007;/g;
s/&puncsp;/&#x2008;/g;
s/&thinsp;/&#x2009;/g;
s/&hairsp;/&#x200A;/g;
s/&mdash;/&#x2014;/g;
s/&ndash;/&#x2013;/g;
s/&dash;/&#x2010;/g;
s/&blank;/&#x2423;/g;
s/&hellip;/&#x2026;/g;
s/&nldr;/&#x2025;/g;
s/&frac13;/&#x2153;/g;
s/&frac23;/&#x2154;/g;
s/&frac15;/&#x2155;/g;
s/&frac25;/&#x2156;/g;
s/&frac35;/&#x2157;/g;
s/&frac45;/&#x2158;/g;
s/&frac16;/&#x2159;/g;
s/&frac56;/&#x215A;/g;
s/&incare;/&#x2105;/g;
s/&block;/&#x2588;/g;
s/&uhblk;/&#x2580;/g;
s/&lhblk;/&#x2584;/g;
s/&blk14;/&#x2591;/g;
s/&blk12;/&#x2592;/g;
s/&blk34;/&#x2593;/g;
s/&marker;/&#x25AE;/g;
s/&cir;/&#x25CB;/g;
s/&rect;/&#x25AD;/g;
s/&utri;/&#x25B5;/g;
s/&dtri;/&#x25BF;/g;
s/&star;/&#x22C6;/g;
s/&bull;/&#x2022;/g;
s/&squf;/&#x25A0;/g;
s/&utrif;/&#x25B4;/g;
s/&dtrif;/&#x25BE;/g;
s/&ltrif;/&#x25C2;/g;
s/&rtrif;/&#x25B8;/g;
s/&clubs;/&#x2663;/g;
s/&diams;/&#x2666;/g;
s/&hearts;/&#x2665;/g;
s/&spades;/&#x2660;/g;
s/&malt;/&#x2720;/g;
s/&dagger;/&#x2020;/g;
s/&Dagger;/&#x2021;/g;
s/&check;/&#x2713;/g;
s/&sharp;/&#x266F;/g;
s/&flat;/&#x266D;/g;
s/&phone;/&#x260E;/g;
s/&telrec;/&#x2315;/g;
s/&copysr;/&#x2117;/g;
s/&caret;/&#x2041;/g;
s/&lsquor;/&#x201A;/g;
s/&ldquor;/&#x201E;/g;
s/&fflig;/&#xFB00;/g;
s/&filig;/&#xFB01;/g;
s/&ffilig;/&#xFB03;/g;
s/&fjlig;/{fj}/g;
s/&ffllig;/&#xFB04;/g;
s/&fllig;/&#xFB02;/g;
s/&vellip;/&#x22EE;/g;
s/&hybull;/&#x2043;/g;
s/&loz;/&#x25CA;/g;
s/&lozf;/&#x2726;/g;
s/&ltri;/&#x25C3;/g;
s/&rtri;/&#x25B9;/g;
s/&starf;/&#x2605;/g;
s/&natur;/&#x266E;/g;
s/&rx;/&#x211E;/g;
s/&sext;/&#x2736;/g;
s/&target;/&#x2316;/g;
s/&dlcrop;/&#x230D;/g;
s/&drcrop;/&#x230C;/g;
s/&ulcrop;/&#x230F;/g;
s/&urcrop;/&#x230E;/g;
s/&aleph;/&#x2135;/g;
s/&and;/&#x2227;/g;
s/&ang90;/&#x221F;/g;
s/&angsph;/&#x2222;/g;
s/&ap;/&#x2248;/g;
s/&becaus;/&#x2235;/g;
s/&cap;/&#x2229;/g;
s/&cong;/&#x2245;/g;
s/&conint;/&#x222E;/g;
s/&cup;/&#x222A;/g;
s/&equiv;/&#x2261;/g;
s/&exist;/&#x2203;/g;
s/&forall;/&#x2200;/g;
s/&fnof;/&#x0192;/g;
s/&ge;/&#x2265;/g;
s/&iff;/&#x21D4;/g;
s/&infin;/&#x221E;/g;
s/&int;/&#x222B;/g;
s/&isin;/&#x220A;/g;
s/&lang;/&#x3008;/g;
s/&lArr;/&#x21D0;/g;
s/&le;/&#x2264;/g;
s/&minus;/&#x2212;/g;
s/&mnplus;/&#x2213;/g;
s/&nabla;/&#x2207;/g;
s/&ne;/&#x2260;/g;
s/&ni;/&#x220D;/g;
s/&or;/&#x2228;/g;
s/&par;/&#x2225;/g;
s/&part;/&#x2202;/g;
s/&permil;/&#x2030;/g;
s/&perp;/&#x22A5;/g;
s/&prime;/&#x2032;/g;
s/&Prime;/&#x2033;/g;
s/&prop;/&#x221D;/g;
s/&radic;/&#x221A;/g;
s/&rang;/&#x3009;/g;
s/&rArr;/&#x21D2;/g;
s/&sim;/&#x223C;/g;
s/&sime;/&#x2243;/g;
s/&square;/&#x25A1;/g;
s/&sub;/&#x2282;/g;
s/&sube;/&#x2286;/g;
s/&sup;/&#x2283;/g;
s/&supe;/&#x2287;/g;
s/&there4;/&#x2234;/g;
s/&Verbar;/&#x2016;/g;
s/&angst;/&#x212B;/g;
s/&bernou;/&#x212C;/g;
s/&compfn;/&#x2218;/g;
s/&DotDot;/&#x20DC;/g;
s/&hamilt;/&#x210B;/g;
s/&lagran;/&#x2112;/g;
s/&lowast;/&#x2217;/g;
s/&notin;/&#x2209;/g;
s/&order;/&#x2134;/g;
s/&phmmat;/&#x2133;/g;
s/&tdot;/&#x20DB;/g;
s/&tprime;/&#x2034;/g;
s/&wedgeq;/&#x2259;/g;
s/&agr;/&#x03B1;/g;
s/&Agr;/&#x0391;/g;
s/&bgr;/&#x03B2;/g;
s/&Bgr;/&#x0392;/g;
s/&ggr;/&#x03B3;/g;
s/&Ggr;/&#x0393;/g;
s/&dgr;/&#x03B4;/g;
s/&Dgr;/&#x0394;/g;
s/&egr;/&#x03B5;/g;
s/&Egr;/&#x0395;/g;
s/&zgr;/&#x03B6;/g;
s/&Zgr;/&#x0396;/g;
s/&eegr;/&#x03B7;/g;
s/&EEgr;/&#x0397;/g;
s/&thgr;/&#x03B8;/g;
s/&THgr;/&#x0398;/g;
s/&igr;/&#x03B9;/g;
s/&Igr;/&#x0399;/g;
s/&kgr;/&#x03BA;/g;
s/&Kgr;/&#x039A;/g;
s/&lgr;/&#x03BB;/g;
s/&Lgr;/&#x039B;/g;
s/&mgr;/&#x03BC;/g;
s/&Mgr;/&#x039C;/g;
s/&ngr;/&#x03BD;/g;
s/&Ngr;/&#x039D;/g;
s/&xgr;/&#x03BE;/g;
s/&Xgr;/&#x039E;/g;
s/&ogr;/&#x03BF;/g;
s/&Ogr;/&#x039F;/g;
s/&pgr;/&#x03C0;/g;
s/&Pgr;/&#x03A0;/g;
s/&rgr;/&#x03C1;/g;
s/&Rgr;/&#x03A1;/g;
s/&sfgr;/&#x03C2;/g;
s/&sgr;/&#x03C3;/g;
s/&Sgr;/&#x03A3;/g;
s/&tgr;/&#x03C4;/g;
s/&Tgr;/&#x03A4;/g;
s/&ugr;/&#x03C5;/g;
s/&Ugr;/&#x03A5;/g;
s/&phgr;/&#x03C6;/g;
s/&PHgr;/&#x03A6;/g;
s/&khgr;/&#x03C7;/g;
s/&KHgr;/&#x03A7;/g;
s/&psgr;/&#x03C8;/g;
s/&PSgr;/&#x03A8;/g;
s/&ohgr;/&#x03C9;/g;
s/&OHgr;/&#x03A9;/g;
s/&acy;/&#x0430;/g;
s/&Acy;/&#x0410;/g;
s/&bcy;/&#x0431;/g;
s/&Bcy;/&#x0411;/g;
s/&vcy;/&#x0432;/g;
s/&Vcy;/&#x0412;/g;
s/&gcy;/&#x0433;/g;
s/&Gcy;/&#x0413;/g;
s/&dcy;/&#x0434;/g;
s/&Dcy;/&#x0414;/g;
s/&iecy;/&#x0435;/g;
s/&IEcy;/&#x0415;/g;
s/&iocy;/&#x0451;/g;
s/&IOcy;/&#x0401;/g;
s/&zhcy;/&#x0436;/g;
s/&ZHcy;/&#x0416;/g;
s/&zcy;/&#x0437;/g;
s/&Zcy;/&#x0417;/g;
s/&icy;/&#x0438;/g;
s/&Icy;/&#x0418;/g;
s/&jcy;/&#x0439;/g;
s/&Jcy;/&#x0419;/g;
s/&kcy;/&#x043A;/g;
s/&Kcy;/&#x041A;/g;
s/&lcy;/&#x043B;/g;
s/&Lcy;/&#x041B;/g;
s/&mcy;/&#x043C;/g;
s/&Mcy;/&#x041C;/g;
s/&ncy;/&#x043D;/g;
s/&Ncy;/&#x041D;/g;
s/&ocy;/&#x043E;/g;
s/&Ocy;/&#x041E;/g;
s/&pcy;/&#x043F;/g;
s/&Pcy;/&#x041F;/g;
s/&rcy;/&#x0440;/g;
s/&Rcy;/&#x0420;/g;
s/&scy;/&#x0441;/g;
s/&Scy;/&#x0421;/g;
s/&tcy;/&#x0442;/g;
s/&Tcy;/&#x0422;/g;
s/&ucy;/&#x0443;/g;
s/&Ucy;/&#x0423;/g;
s/&fcy;/&#x0444;/g;
s/&Fcy;/&#x0424;/g;
s/&khcy;/&#x0445;/g;
s/&KHcy;/&#x0425;/g;
s/&tscy;/&#x0446;/g;
s/&TScy;/&#x0426;/g;
s/&chcy;/&#x0447;/g;
s/&CHcy;/&#x0427;/g;
s/&shcy;/&#x0448;/g;
s/&SHcy;/&#x0428;/g;
s/&shchcy;/&#x0449;/g;
s/&SHCHcy;/&#x0429;/g;
s/&hardcy;/&#x044A;/g;
s/&HARDcy;/&#x042A;/g;
s/&ycy;/&#x044B;/g;
s/&Ycy;/&#x042B;/g;
s/&softcy;/&#x044C;/g;
s/&SOFTcy;/&#x042C;/g;
s/&ecy;/&#x044D;/g;
s/&Ecy;/&#x042D;/g;
s/&yucy;/&#x044E;/g;
s/&YUcy;/&#x042E;/g;
s/&yacy;/&#x044F;/g;
s/&YAcy;/&#x042F;/g;
s/&numero;/&#x2116;/g;
s/&ETHIOPIC-HA;/{ETHIOPIC-HA}/g;
s/&ETHIOPIC-HU;/{ETHIOPIC-HU}/g;
s/&ETHIOPIC-HI;/{ETHIOPIC-HI}/g;
s/&ETHIOPIC-HAA;/{ETHIOPIC-HAA}/g;
s/&ETHIOPIC-HEE;/{ETHIOPIC-HEE}/g;
s/&ETHIOPIC-HE;/{ETHIOPIC-HE}/g;
s/&ETHIOPIC-HO;/{ETHIOPIC-HO}/g;
s/&ETHIOPIC-HOA;/{ETHIOPIC-HOA}/g;
s/&ETHIOPIC-LA;/{ETHIOPIC-LA}/g;
s/&ETHIOPIC-LU;/{ETHIOPIC-LU}/g;
s/&ETHIOPIC-LI;/{ETHIOPIC-LI}/g;
s/&ETHIOPIC-LAA;/{ETHIOPIC-LAA}/g;
s/&ETHIOPIC-LEE;/{ETHIOPIC-LEE}/g;
s/&ETHIOPIC-LE;/{ETHIOPIC-LE}/g;
s/&ETHIOPIC-LO;/{ETHIOPIC-LO}/g;
s/&ETHIOPIC-LWA;/{ETHIOPIC-LWA}/g;
s/&ETHIOPIC-HHA;/{ETHIOPIC-HHA}/g;
s/&ETHIOPIC-HHU;/{ETHIOPIC-HHU}/g;
s/&ETHIOPIC-HHI;/{ETHIOPIC-HHI}/g;
s/&ETHIOPIC-HHAA;/{ETHIOPIC-HHAA}/g;
s/&ETHIOPIC-HHEE;/{ETHIOPIC-HHEE}/g;
s/&ETHIOPIC-HHE;/{ETHIOPIC-HHE}/g;
s/&ETHIOPIC-HHO;/{ETHIOPIC-HHO}/g;
s/&ETHIOPIC-HHWA;/{ETHIOPIC-HHWA}/g;
s/&ETHIOPIC-MA;/{ETHIOPIC-MA}/g;
s/&ETHIOPIC-MU;/{ETHIOPIC-MU}/g;
s/&ETHIOPIC-MI;/{ETHIOPIC-MI}/g;
s/&ETHIOPIC-MAA;/{ETHIOPIC-MAA}/g;
s/&ETHIOPIC-MEE;/{ETHIOPIC-MEE}/g;
s/&ETHIOPIC-ME;/{ETHIOPIC-ME}/g;
s/&ETHIOPIC-MO;/{ETHIOPIC-MO}/g;
s/&ETHIOPIC-MWA;/{ETHIOPIC-MWA}/g;
s/&ETHIOPIC-SZA;/{ETHIOPIC-SZA}/g;
s/&ETHIOPIC-SZU;/{ETHIOPIC-SZU}/g;
s/&ETHIOPIC-SZI;/{ETHIOPIC-SZI}/g;
s/&ETHIOPIC-SZAA;/{ETHIOPIC-SZAA}/g;
s/&ETHIOPIC-SZEE;/{ETHIOPIC-SZEE}/g;
s/&ETHIOPIC-SZE;/{ETHIOPIC-SZE}/g;
s/&ETHIOPIC-SZO;/{ETHIOPIC-SZO}/g;
s/&ETHIOPIC-SZWA;/{ETHIOPIC-SZWA}/g;
s/&ETHIOPIC-RA;/{ETHIOPIC-RA}/g;
s/&ETHIOPIC-RU;/{ETHIOPIC-RU}/g;
s/&ETHIOPIC-RI;/{ETHIOPIC-RI}/g;
s/&ETHIOPIC-RAA;/{ETHIOPIC-RAA}/g;
s/&ETHIOPIC-REE;/{ETHIOPIC-REE}/g;
s/&ETHIOPIC-RE;/{ETHIOPIC-RE}/g;
s/&ETHIOPIC-RO;/{ETHIOPIC-RO}/g;
s/&ETHIOPIC-RWA;/{ETHIOPIC-RWA}/g;
s/&ETHIOPIC-SA;/{ETHIOPIC-SA}/g;
s/&ETHIOPIC-SU;/{ETHIOPIC-SU}/g;
s/&ETHIOPIC-SI;/{ETHIOPIC-SI}/g;
s/&ETHIOPIC-SAA;/{ETHIOPIC-SAA}/g;
s/&ETHIOPIC-SEE;/{ETHIOPIC-SEE}/g;
s/&ETHIOPIC-SE;/{ETHIOPIC-SE}/g;
s/&ETHIOPIC-SO;/{ETHIOPIC-SO}/g;
s/&ETHIOPIC-SWA;/{ETHIOPIC-SWA}/g;
s/&ETHIOPIC-SHA;/{ETHIOPIC-SHA}/g;
s/&ETHIOPIC-SHU;/{ETHIOPIC-SHU}/g;
s/&ETHIOPIC-SHI;/{ETHIOPIC-SHI}/g;
s/&ETHIOPIC-SHAA;/{ETHIOPIC-SHAA}/g;
s/&ETHIOPIC-SHEE;/{ETHIOPIC-SHEE}/g;
s/&ETHIOPIC-SHE;/{ETHIOPIC-SHE}/g;
s/&ETHIOPIC-SHO;/{ETHIOPIC-SHO}/g;
s/&ETHIOPIC-SHWA;/{ETHIOPIC-SHWA}/g;
s/&ETHIOPIC-QA;/{ETHIOPIC-QA}/g;
s/&ETHIOPIC-QU;/{ETHIOPIC-QU}/g;
s/&ETHIOPIC-QI;/{ETHIOPIC-QI}/g;
s/&ETHIOPIC-QAA;/{ETHIOPIC-QAA}/g;
s/&ETHIOPIC-QEE;/{ETHIOPIC-QEE}/g;
s/&ETHIOPIC-QE;/{ETHIOPIC-QE}/g;
s/&ETHIOPIC-QO;/{ETHIOPIC-QO}/g;
s/&ETHIOPIC-QOA;/{ETHIOPIC-QOA}/g;
s/&ETHIOPIC-QWA;/{ETHIOPIC-QWA}/g;
s/&ETHIOPIC-QWI;/{ETHIOPIC-QWI}/g;
s/&ETHIOPIC-QWAA;/{ETHIOPIC-QWAA}/g;
s/&ETHIOPIC-QWEE;/{ETHIOPIC-QWEE}/g;
s/&ETHIOPIC-QWE;/{ETHIOPIC-QWE}/g;
s/&ETHIOPIC-QHA;/{ETHIOPIC-QHA}/g;
s/&ETHIOPIC-QHU;/{ETHIOPIC-QHU}/g;
s/&ETHIOPIC-QHI;/{ETHIOPIC-QHI}/g;
s/&ETHIOPIC-QHAA;/{ETHIOPIC-QHAA}/g;
s/&ETHIOPIC-QHEE;/{ETHIOPIC-QHEE}/g;
s/&ETHIOPIC-QHE;/{ETHIOPIC-QHE}/g;
s/&ETHIOPIC-QHO;/{ETHIOPIC-QHO}/g;
s/&ETHIOPIC-QHWA;/{ETHIOPIC-QHWA}/g;
s/&ETHIOPIC-QHWI;/{ETHIOPIC-QHWI}/g;
s/&ETHIOPIC-QHWAA;/{ETHIOPIC-QHWAA}/g;
s/&ETHIOPIC-QHWEE;/{ETHIOPIC-QHWEE}/g;
s/&ETHIOPIC-QHWE;/{ETHIOPIC-QHWE}/g;
s/&ETHIOPIC-BA;/{ETHIOPIC-BA}/g;
s/&ETHIOPIC-BU;/{ETHIOPIC-BU}/g;
s/&ETHIOPIC-BI;/{ETHIOPIC-BI}/g;
s/&ETHIOPIC-BAA;/{ETHIOPIC-BAA}/g;
s/&ETHIOPIC-BEE;/{ETHIOPIC-BEE}/g;
s/&ETHIOPIC-BE;/{ETHIOPIC-BE}/g;
s/&ETHIOPIC-BO;/{ETHIOPIC-BO}/g;
s/&ETHIOPIC-BWA;/{ETHIOPIC-BWA}/g;
s/&ETHIOPIC-VA;/{ETHIOPIC-VA}/g;
s/&ETHIOPIC-VU;/{ETHIOPIC-VU}/g;
s/&ETHIOPIC-VI;/{ETHIOPIC-VI}/g;
s/&ETHIOPIC-VAA;/{ETHIOPIC-VAA}/g;
s/&ETHIOPIC-VEE;/{ETHIOPIC-VEE}/g;
s/&ETHIOPIC-VE;/{ETHIOPIC-VE}/g;
s/&ETHIOPIC-VO;/{ETHIOPIC-VO}/g;
s/&ETHIOPIC-VWA;/{ETHIOPIC-VWA}/g;
s/&ETHIOPIC-TA;/{ETHIOPIC-TA}/g;
s/&ETHIOPIC-TU;/{ETHIOPIC-TU}/g;
s/&ETHIOPIC-TI;/{ETHIOPIC-TI}/g;
s/&ETHIOPIC-TAA;/{ETHIOPIC-TAA}/g;
s/&ETHIOPIC-TEE;/{ETHIOPIC-TEE}/g;
s/&ETHIOPIC-TE;/{ETHIOPIC-TE}/g;
s/&ETHIOPIC-TO;/{ETHIOPIC-TO}/g;
s/&ETHIOPIC-TWA;/{ETHIOPIC-TWA}/g;
s/&ETHIOPIC-CA;/{ETHIOPIC-CA}/g;
s/&ETHIOPIC-CU;/{ETHIOPIC-CU}/g;
s/&ETHIOPIC-CI;/{ETHIOPIC-CI}/g;
s/&ETHIOPIC-CAA;/{ETHIOPIC-CAA}/g;
s/&ETHIOPIC-CEE;/{ETHIOPIC-CEE}/g;
s/&ETHIOPIC-CE;/{ETHIOPIC-CE}/g;
s/&ETHIOPIC-CO;/{ETHIOPIC-CO}/g;
s/&ETHIOPIC-CWA;/{ETHIOPIC-CWA}/g;
s/&ETHIOPIC-XA;/{ETHIOPIC-XA}/g;
s/&ETHIOPIC-XU;/{ETHIOPIC-XU}/g;
s/&ETHIOPIC-XI;/{ETHIOPIC-XI}/g;
s/&ETHIOPIC-XAA;/{ETHIOPIC-XAA}/g;
s/&ETHIOPIC-XEE;/{ETHIOPIC-XEE}/g;
s/&ETHIOPIC-XE;/{ETHIOPIC-XE}/g;
s/&ETHIOPIC-XO;/{ETHIOPIC-XO}/g;
s/&ETHIOPIC-XOA;/{ETHIOPIC-XOA}/g;
s/&ETHIOPIC-XWA;/{ETHIOPIC-XWA}/g;
s/&ETHIOPIC-XWI;/{ETHIOPIC-XWI}/g;
s/&ETHIOPIC-XWAA;/{ETHIOPIC-XWAA}/g;
s/&ETHIOPIC-XWEE;/{ETHIOPIC-XWEE}/g;
s/&ETHIOPIC-XWE;/{ETHIOPIC-XWE}/g;
s/&ETHIOPIC-NA;/{ETHIOPIC-NA}/g;
s/&ETHIOPIC-NU;/{ETHIOPIC-NU}/g;
s/&ETHIOPIC-NI;/{ETHIOPIC-NI}/g;
s/&ETHIOPIC-NAA;/{ETHIOPIC-NAA}/g;
s/&ETHIOPIC-NEE;/{ETHIOPIC-NEE}/g;
s/&ETHIOPIC-NE;/{ETHIOPIC-NE}/g;
s/&ETHIOPIC-NO;/{ETHIOPIC-NO}/g;
s/&ETHIOPIC-NWA;/{ETHIOPIC-NWA}/g;
s/&ETHIOPIC-NYA;/{ETHIOPIC-NYA}/g;
s/&ETHIOPIC-NYU;/{ETHIOPIC-NYU}/g;
s/&ETHIOPIC-NYI;/{ETHIOPIC-NYI}/g;
s/&ETHIOPIC-NYAA;/{ETHIOPIC-NYAA}/g;
s/&ETHIOPIC-NYEE;/{ETHIOPIC-NYEE}/g;
s/&ETHIOPIC-NYE;/{ETHIOPIC-NYE}/g;
s/&ETHIOPIC-NYO;/{ETHIOPIC-NYO}/g;
s/&ETHIOPIC-NYWA;/{ETHIOPIC-NYWA}/g;
s/&ETHIOPIC-GLOTL-A;/{ETHIOPIC-GLOTL-A}/g;
s/&ETHIOPIC-GLOTL-U;/{ETHIOPIC-GLOTL-U}/g;
s/&ETHIOPIC-GLOTL-I;/{ETHIOPIC-GLOTL-I}/g;
s/&ETHIOPIC-GLOTL-AA;/{ETHIOPIC-GLOTL-AA}/g;
s/&ETHIOPIC-GLOTL-EE;/{ETHIOPIC-GLOTL-EE}/g;
s/&ETHIOPIC-GLOTL-E;/{ETHIOPIC-GLOTL-E}/g;
s/&ETHIOPIC-GLOTL-O;/{ETHIOPIC-GLOTL-O}/g;
s/&ETHIOPIC-GLOTL-WA;/{ETHIOPIC-GLOTL-WA}/g;
s/&ETHIOPIC-KA;/{ETHIOPIC-KA}/g;
s/&ETHIOPIC-KU;/{ETHIOPIC-KU}/g;
s/&ETHIOPIC-KI;/{ETHIOPIC-KI}/g;
s/&ETHIOPIC-KAA;/{ETHIOPIC-KAA}/g;
s/&ETHIOPIC-KEE;/{ETHIOPIC-KEE}/g;
s/&ETHIOPIC-KE;/{ETHIOPIC-KE}/g;
s/&ETHIOPIC-KO;/{ETHIOPIC-KO}/g;
s/&ETHIOPIC-KOA;/{ETHIOPIC-KOA}/g;
s/&ETHIOPIC-KWA;/{ETHIOPIC-KWA}/g;
s/&ETHIOPIC-KWI;/{ETHIOPIC-KWI}/g;
s/&ETHIOPIC-KWAA;/{ETHIOPIC-KWAA}/g;
s/&ETHIOPIC-KWEE;/{ETHIOPIC-KWEE}/g;
s/&ETHIOPIC-KWE;/{ETHIOPIC-KWE}/g;
s/&ETHIOPIC-KXA;/{ETHIOPIC-KXA}/g;
s/&ETHIOPIC-KXU;/{ETHIOPIC-KXU}/g;
s/&ETHIOPIC-KXI;/{ETHIOPIC-KXI}/g;
s/&ETHIOPIC-KXAA;/{ETHIOPIC-KXAA}/g;
s/&ETHIOPIC-KXEE;/{ETHIOPIC-KXEE}/g;
s/&ETHIOPIC-KXE;/{ETHIOPIC-KXE}/g;
s/&ETHIOPIC-KXO;/{ETHIOPIC-KXO}/g;
s/&ETHIOPIC-KXWA;/{ETHIOPIC-KXWA}/g;
s/&ETHIOPIC-KXWI;/{ETHIOPIC-KXWI}/g;
s/&ETHIOPIC-KXWAA;/{ETHIOPIC-KXWAA}/g;
s/&ETHIOPIC-KXWEE;/{ETHIOPIC-KXWEE}/g;
s/&ETHIOPIC-KXWE;/{ETHIOPIC-KXWE}/g;
s/&ETHIOPIC-WA;/{ETHIOPIC-WA}/g;
s/&ETHIOPIC-WU;/{ETHIOPIC-WU}/g;
s/&ETHIOPIC-WI;/{ETHIOPIC-WI}/g;
s/&ETHIOPIC-WAA;/{ETHIOPIC-WAA}/g;
s/&ETHIOPIC-WEE;/{ETHIOPIC-WEE}/g;
s/&ETHIOPIC-WE;/{ETHIOPIC-WE}/g;
s/&ETHIOPIC-WO;/{ETHIOPIC-WO}/g;
s/&ETHIOPIC-WOA;/{ETHIOPIC-WOA}/g;
s/&ETHIOPIC-PHARGL-A;/{ETHIOPIC-PHARGL-A}/g;
s/&ETHIOPIC-PHARGL-U;/{ETHIOPIC-PHARGL-U}/g;
s/&ETHIOPIC-PHARGL-I;/{ETHIOPIC-PHARGL-I}/g;
s/&ETHIOPIC-PHARGL-AA;/{ETHIOPIC-PHARGL-AA}/g;
s/&ETHIOPIC-PHARGL-EE;/{ETHIOPIC-PHARGL-EE}/g;
s/&ETHIOPIC-PHARGL-E;/{ETHIOPIC-PHARGL-E}/g;
s/&ETHIOPIC-PHARGL-O;/{ETHIOPIC-PHARGL-O}/g;
s/&ETHIOPIC-ZA;/{ETHIOPIC-ZA}/g;
s/&ETHIOPIC-ZU;/{ETHIOPIC-ZU}/g;
s/&ETHIOPIC-ZI;/{ETHIOPIC-ZI}/g;
s/&ETHIOPIC-ZAA;/{ETHIOPIC-ZAA}/g;
s/&ETHIOPIC-ZEE;/{ETHIOPIC-ZEE}/g;
s/&ETHIOPIC-ZE;/{ETHIOPIC-ZE}/g;
s/&ETHIOPIC-ZO;/{ETHIOPIC-ZO}/g;
s/&ETHIOPIC-ZWA;/{ETHIOPIC-ZWA}/g;
s/&ETHIOPIC-ZHA;/{ETHIOPIC-ZHA}/g;
s/&ETHIOPIC-ZHU;/{ETHIOPIC-ZHU}/g;
s/&ETHIOPIC-ZHI;/{ETHIOPIC-ZHI}/g;
s/&ETHIOPIC-ZHAA;/{ETHIOPIC-ZHAA}/g;
s/&ETHIOPIC-ZHEE;/{ETHIOPIC-ZHEE}/g;
s/&ETHIOPIC-ZHE;/{ETHIOPIC-ZHE}/g;
s/&ETHIOPIC-ZHO;/{ETHIOPIC-ZHO}/g;
s/&ETHIOPIC-ZHWA;/{ETHIOPIC-ZHWA}/g;
s/&ETHIOPIC-YA;/{ETHIOPIC-YA}/g;
s/&ETHIOPIC-YU;/{ETHIOPIC-YU}/g;
s/&ETHIOPIC-YI;/{ETHIOPIC-YI}/g;
s/&ETHIOPIC-YAA;/{ETHIOPIC-YAA}/g;
s/&ETHIOPIC-YEE;/{ETHIOPIC-YEE}/g;
s/&ETHIOPIC-YE;/{ETHIOPIC-YE}/g;
s/&ETHIOPIC-YO;/{ETHIOPIC-YO}/g;
s/&ETHIOPIC-YOA;/{ETHIOPIC-YOA}/g;
s/&ETHIOPIC-DA;/{ETHIOPIC-DA}/g;
s/&ETHIOPIC-DU;/{ETHIOPIC-DU}/g;
s/&ETHIOPIC-DI;/{ETHIOPIC-DI}/g;
s/&ETHIOPIC-DAA;/{ETHIOPIC-DAA}/g;
s/&ETHIOPIC-DEE;/{ETHIOPIC-DEE}/g;
s/&ETHIOPIC-DE;/{ETHIOPIC-DE}/g;
s/&ETHIOPIC-DO;/{ETHIOPIC-DO}/g;
s/&ETHIOPIC-DWA;/{ETHIOPIC-DWA}/g;
s/&ETHIOPIC-DDA;/{ETHIOPIC-DDA}/g;
s/&ETHIOPIC-DDU;/{ETHIOPIC-DDU}/g;
s/&ETHIOPIC-DDI;/{ETHIOPIC-DDI}/g;
s/&ETHIOPIC-DDAA;/{ETHIOPIC-DDAA}/g;
s/&ETHIOPIC-DDEE;/{ETHIOPIC-DDEE}/g;
s/&ETHIOPIC-DDE;/{ETHIOPIC-DDE}/g;
s/&ETHIOPIC-DDO;/{ETHIOPIC-DDO}/g;
s/&ETHIOPIC-DDWA;/{ETHIOPIC-DDWA}/g;
s/&ETHIOPIC-JA;/{ETHIOPIC-JA}/g;
s/&ETHIOPIC-JU;/{ETHIOPIC-JU}/g;
s/&ETHIOPIC-JI;/{ETHIOPIC-JI}/g;
s/&ETHIOPIC-JAA;/{ETHIOPIC-JAA}/g;
s/&ETHIOPIC-JEE;/{ETHIOPIC-JEE}/g;
s/&ETHIOPIC-JE;/{ETHIOPIC-JE}/g;
s/&ETHIOPIC-JO;/{ETHIOPIC-JO}/g;
s/&ETHIOPIC-JWA;/{ETHIOPIC-JWA}/g;
s/&ETHIOPIC-GA;/{ETHIOPIC-GA}/g;
s/&ETHIOPIC-GU;/{ETHIOPIC-GU}/g;
s/&ETHIOPIC-GI;/{ETHIOPIC-GI}/g;
s/&ETHIOPIC-GAA;/{ETHIOPIC-GAA}/g;
s/&ETHIOPIC-GEE;/{ETHIOPIC-GEE}/g;
s/&ETHIOPIC-GE;/{ETHIOPIC-GE}/g;
s/&ETHIOPIC-GO;/{ETHIOPIC-GO}/g;
s/&ETHIOPIC-GOA;/{ETHIOPIC-GOA}/g;
s/&ETHIOPIC-GWA;/{ETHIOPIC-GWA}/g;
s/&ETHIOPIC-GWI;/{ETHIOPIC-GWI}/g;
s/&ETHIOPIC-GWAA;/{ETHIOPIC-GWAA}/g;
s/&ETHIOPIC-GWEE;/{ETHIOPIC-GWEE}/g;
s/&ETHIOPIC-GWE;/{ETHIOPIC-GWE}/g;
s/&ETHIOPIC-GGA;/{ETHIOPIC-GGA}/g;
s/&ETHIOPIC-GGU;/{ETHIOPIC-GGU}/g;
s/&ETHIOPIC-GGI;/{ETHIOPIC-GGI}/g;
s/&ETHIOPIC-GGAA;/{ETHIOPIC-GGAA}/g;
s/&ETHIOPIC-GGEE;/{ETHIOPIC-GGEE}/g;
s/&ETHIOPIC-GGE;/{ETHIOPIC-GGE}/g;
s/&ETHIOPIC-GGO;/{ETHIOPIC-GGO}/g;
s/&ETHIOPIC-GGWAA;/{ETHIOPIC-GGWAA}/g;
s/&ETHIOPIC-THA;/{ETHIOPIC-THA}/g;
s/&ETHIOPIC-THU;/{ETHIOPIC-THU}/g;
s/&ETHIOPIC-THI;/{ETHIOPIC-THI}/g;
s/&ETHIOPIC-THAA;/{ETHIOPIC-THAA}/g;
s/&ETHIOPIC-THEE;/{ETHIOPIC-THEE}/g;
s/&ETHIOPIC-THE;/{ETHIOPIC-THE}/g;
s/&ETHIOPIC-THO;/{ETHIOPIC-THO}/g;
s/&ETHIOPIC-THWA;/{ETHIOPIC-THWA}/g;
s/&ETHIOPIC-CHA;/{ETHIOPIC-CHA}/g;
s/&ETHIOPIC-CHU;/{ETHIOPIC-CHU}/g;
s/&ETHIOPIC-CHI;/{ETHIOPIC-CHI}/g;
s/&ETHIOPIC-CHAA;/{ETHIOPIC-CHAA}/g;
s/&ETHIOPIC-CHEE;/{ETHIOPIC-CHEE}/g;
s/&ETHIOPIC-CHE;/{ETHIOPIC-CHE}/g;
s/&ETHIOPIC-CHO;/{ETHIOPIC-CHO}/g;
s/&ETHIOPIC-CHWA;/{ETHIOPIC-CHWA}/g;
s/&ETHIOPIC-PHA;/{ETHIOPIC-PHA}/g;
s/&ETHIOPIC-PHU;/{ETHIOPIC-PHU}/g;
s/&ETHIOPIC-PHI;/{ETHIOPIC-PHI}/g;
s/&ETHIOPIC-PHAA;/{ETHIOPIC-PHAA}/g;
s/&ETHIOPIC-PHEE;/{ETHIOPIC-PHEE}/g;
s/&ETHIOPIC-PHE;/{ETHIOPIC-PHE}/g;
s/&ETHIOPIC-PHO;/{ETHIOPIC-PHO}/g;
s/&ETHIOPIC-PHWA;/{ETHIOPIC-PHWA}/g;
s/&ETHIOPIC-TSA;/{ETHIOPIC-TSA}/g;
s/&ETHIOPIC-TSU;/{ETHIOPIC-TSU}/g;
s/&ETHIOPIC-TSI;/{ETHIOPIC-TSI}/g;
s/&ETHIOPIC-TSAA;/{ETHIOPIC-TSAA}/g;
s/&ETHIOPIC-TSEE;/{ETHIOPIC-TSEE}/g;
s/&ETHIOPIC-TSE;/{ETHIOPIC-TSE}/g;
s/&ETHIOPIC-TSO;/{ETHIOPIC-TSO}/g;
s/&ETHIOPIC-TSWA;/{ETHIOPIC-TSWA}/g;
s/&ETHIOPIC-TZA;/{ETHIOPIC-TZA}/g;
s/&ETHIOPIC-TZU;/{ETHIOPIC-TZU}/g;
s/&ETHIOPIC-TZI;/{ETHIOPIC-TZI}/g;
s/&ETHIOPIC-TZAA;/{ETHIOPIC-TZAA}/g;
s/&ETHIOPIC-TZEE;/{ETHIOPIC-TZEE}/g;
s/&ETHIOPIC-TZE;/{ETHIOPIC-TZE}/g;
s/&ETHIOPIC-TZO;/{ETHIOPIC-TZO}/g;
s/&ETHIOPIC-TZOA;/{ETHIOPIC-TZOA}/g;
s/&ETHIOPIC-FA;/{ETHIOPIC-FA}/g;
s/&ETHIOPIC-FU;/{ETHIOPIC-FU}/g;
s/&ETHIOPIC-FI;/{ETHIOPIC-FI}/g;
s/&ETHIOPIC-FAA;/{ETHIOPIC-FAA}/g;
s/&ETHIOPIC-FEE;/{ETHIOPIC-FEE}/g;
s/&ETHIOPIC-FE;/{ETHIOPIC-FE}/g;
s/&ETHIOPIC-FO;/{ETHIOPIC-FO}/g;
s/&ETHIOPIC-FWA;/{ETHIOPIC-FWA}/g;
s/&ETHIOPIC-PA;/{ETHIOPIC-PA}/g;
s/&ETHIOPIC-PU;/{ETHIOPIC-PU}/g;
s/&ETHIOPIC-PI;/{ETHIOPIC-PI}/g;
s/&ETHIOPIC-PAA;/{ETHIOPIC-PAA}/g;
s/&ETHIOPIC-PEE;/{ETHIOPIC-PEE}/g;
s/&ETHIOPIC-PE;/{ETHIOPIC-PE}/g;
s/&ETHIOPIC-PO;/{ETHIOPIC-PO}/g;
s/&ETHIOPIC-PWA;/{ETHIOPIC-PWA}/g;
s/&ETHIOPIC-RYA;/{ETHIOPIC-RYA}/g;
s/&ETHIOPIC-MYA;/{ETHIOPIC-MYA}/g;
s/&ETHIOPIC-FYA;/{ETHIOPIC-FYA}/g;
s/&ETHIOPIC-GEM-MRK;/{ETHIOPIC-GEM-MRK}/g;
s/&ETHIOPIC-SECTION-MRK;/{ETHIOPIC-SECTION-MRK}/g;
s/&ETHIOPIC-WORDSPACE;/{ETHIOPIC-WORDSPACE}/g;
s/&ETHIOPIC-STOP;/{ETHIOPIC-STOP}/g;
s/&ETHIOPIC-COMMA;/{ETHIOPIC-COMMA}/g;
s/&ETHIOPIC-SEMICOLON;/{ETHIOPIC-SEMICOLON}/g;
s/&ETHIOPIC-COLON;/{ETHIOPIC-COLON}/g;
s/&ETHIOPIC-PREF-COLON;/{ETHIOPIC-PREF-COLON}/g;
s/&ETHIOPIC-QUESTION-MRK;/{ETHIOPIC-QUESTION-MRK}/g;
s/&ETHIOPIC-PARAGRAPH;/{ETHIOPIC-PARAGRAPH}/g;
s/&ETHIOPIC-DIGIT-ONE;/{ETHIOPIC-DIGIT-ONE}/g;
s/&ETHIOPIC-DIGIT-TWO;/{ETHIOPIC-DIGIT-TWO}/g;
s/&ETHIOPIC-DIGIT-THREE;/{ETHIOPIC-DIGIT-THREE}/g;
s/&ETHIOPIC-DIGIT-FOUR;/{ETHIOPIC-DIGIT-FOUR}/g;
s/&ETHIOPIC-DIGIT-FIVE;/{ETHIOPIC-DIGIT-FIVE}/g;
s/&ETHIOPIC-DIGIT-SIX;/{ETHIOPIC-DIGIT-SIX}/g;
s/&ETHIOPIC-DIGIT-SEVEN;/{ETHIOPIC-DIGIT-SEVEN}/g;
s/&ETHIOPIC-DIGIT-EIGHT;/{ETHIOPIC-DIGIT-EIGHT}/g;
s/&ETHIOPIC-DIGIT-NINE;/{ETHIOPIC-DIGIT-NINE}/g;
s/&ETHIOPIC-NUM-TEN;/{ETHIOPIC-NUM-TEN}/g;
s/&ETHIOPIC-NUM-TWENTY;/{ETHIOPIC-NUM-TWENTY}/g;
s/&ETHIOPIC-NUM-THIRTY;/{ETHIOPIC-NUM-THIRTY}/g;
s/&ETHIOPIC-NUM-FORTY;/{ETHIOPIC-NUM-FORTY}/g;
s/&ETHIOPIC-NUM-FIFTY;/{ETHIOPIC-NUM-FIFTY}/g;
s/&ETHIOPIC-NUM-SIXTY;/{ETHIOPIC-NUM-SIXTY}/g;
s/&ETHIOPIC-NUM-SEVENTY;/{ETHIOPIC-NUM-SEVENTY}/g;
s/&ETHIOPIC-NUM-EIGHTY;/{ETHIOPIC-NUM-EIGHTY}/g;
s/&ETHIOPIC-NUM-NINETY;/{ETHIOPIC-NUM-NINETY}/g;
s/&ETHIOPIC-NUM-HUNDRED;/{ETHIOPIC-NUM-HUNDRED}/g;
s/&ETHIOPIC-NUM-TEN-THOUSAND;/{ETHIOPIC-NUM-TEN-THOUSAND}/g;
s/&aiotgr;/&#x1FB3;/g;
s/&Aiotgr;/&#x1FBC;/g;
s/&arigr;/&#x1F81;/g;
s/&Arigr;/&#x1F89;/g;
s/&asigr;/&#x1F80;/g;
s/&Asigr;/&#x1F88;/g;
s/&aaigr;/&#x1FB4;/g;
s/&Aaigr;/&#x1FBB;&#x0345;/g;
s/&agigr;/&#x1FB2;/g;
s/&acigr;/&#x1FB7;/g;
s/&araigr;/&#x1F85;/g;
s/&Araigr;/&#x1F8D;/g;
s/&asaigr;/&#x1F84;/g;
s/&Asaigr;/&#x1F8C;/g;
s/&argigr;/&#x1F83;/g;
s/&Argigr;/&#x1F8B;/g;
s/&asgigr;/&#x1F82;/g;
s/&Asgigr;/&#x1F8A;/g;
s/&arcigr;/&#x1F87;/g;
s/&Arcigr;/&#x1F8F;/g;
s/&ascigr;/&#x1F86;/g;
s/&Ascigr;/&#x1F8E;/g;
s/&arougr;/&#x1F01;/g;
s/&Arougr;/&#x1F09;/g;
s/&asmogr;/&#x1F00;/g;
s/&Asmogr;/&#x1F08;/g;
s/&agragr;/&#x1F70;/g;
s/&Agragr;/&#x1FBA;/g;
s/&aacugr;/&#x1F71;/g;
s/&Aacugr;/&#x1FBB;/g;
s/&acirgr;/&#x1FB6;/g;
s/&Acirgr;/&#x0391;&#x0342;/g;
s/&aragr;/&#x1F05;/g;
s/&Aragr;/&#x1F0D;/g;
s/&asagr;/&#x1F04;/g;
s/&Asagr;/&#x1F0C;/g;
s/&arggr;/&#x1F03;/g;
s/&Arggr;/&#x1F0B;/g;
s/&asggr;/&#x1F02;/g;
s/&Asggr;/&#x1F0A;/g;
s/&arcgr;/&#x1F07;/g;
s/&Arcgr;/&#x1F0F;/g;
s/&ascgr;/&#x1F06;/g;
s/&Ascgr;/&#x1F0E;/g;
s/&erougr;/&#x1F11;/g;
s/&Erougr;/&#x1F19;/g;
s/&esmogr;/&#x1F10;/g;
s/&Esmogr;/&#x1F18;/g;
s/&egragr;/&#x1F72;/g;
s/&Egragr;/&#x1FC8;/g;
s/&eacugr;/&#x1F73;/g;
s/&Eacugr;/&#x1FC9;/g;
s/&eragr;/&#x1F15;/g;
s/&Eragr;/&#x1F1D;/g;
s/&esagr;/&#x1F14;/g;
s/&Esagr;/&#x1F1C;/g;
s/&erggr;/&#x1F13;/g;
s/&Erggr;/&#x1F1B;/g;
s/&esggr;/&#x1F12;/g;
s/&Esggr;/&#x1F1A;/g;
s/&eeiotgr;/&#x1FC3;/g;
s/&EEiotgr;/&#x1FCC;/g;
s/&eerigr;/&#x1F91;/g;
s/&EErigr;/&#x1F99;/g;
s/&eesigr;/&#x1F90;/g;
s/&EEsigr;/&#x1F98;/g;
s/&eeaigr;/&#x1FC4;/g;
s/&eegigr;/&#x1FC2;/g;
s/&eecigr;/&#x1FC7;/g;
s/&eeraigr;/&#x1F95;/g;
s/&EEraigr;/&#x1F9D;/g;
s/&eesaigr;/&#x1F94;/g;
s/&EEsaigr;/&#x1F9C;/g;
s/&eergigr;/&#x1F93;/g;
s/&EErgigr;/&#x1F9B;/g;
s/&eesgigr;/&#x1F92;/g;
s/&EEsgigr;/&#x1F9A;/g;
s/&eercigr;/&#x1F97;/g;
s/&EErcigr;/&#x1F9F;/g;
s/&eescigr;/&#x1F96;/g;
s/&EEscigr;/&#x1F9E;/g;
s/&eerougr;/&#x1F21;/g;
s/&EErougr;/&#x1F29;/g;
s/&eesmogr;/&#x1F20;/g;
s/&EEsmogr;/&#x1F28;/g;
s/&eegragr;/&#x1F74;/g;
s/&EEgragr;/&#x1FCA;/g;
s/&eeacugr;/&#x1F75;/g;
s/&EEacugr;/&#x1FCB;/g;
s/&eecirgr;/&#x1FC6;/g;
s/&EEcirgr;/&#x0397;&#x0342;/g;
s/&eeragr;/&#x1F25;/g;
s/&EEragr;/&#x1F2D;/g;
s/&eesagr;/&#x1F24;/g;
s/&EEsagr;/&#x1F2C;/g;
s/&eerggr;/&#x1F23;/g;
s/&EErggr;/&#x1F2B;/g;
s/&eesggr;/&#x1F22;/g;
s/&EEsggr;/&#x1F2A;/g;
s/&eercgr;/&#x1F27;/g;
s/&EErcgr;/&#x1F2F;/g;
s/&eescgr;/&#x1F26;/g;
s/&EEscgr;/&#x1F2E;/g;
s/&irougr;/&#x1F31;/g;
s/&Irougr;/&#x1F39;/g;
s/&ismogr;/&#x1F30;/g;
s/&Ismogr;/&#x1F38;/g;
s/&igragr;/&#x1F76;/g;
s/&Igragr;/&#x1FDA;/g;
s/&iacugr;/&#x1F77;/g;
s/&Iacugr;/&#x1FDB;/g;
s/&icirgr;/&#x1FD6;/g;
s/&iragr;/&#x1F35;/g;
s/&Iragr;/&#x1F3D;/g;
s/&isagr;/&#x1F34;/g;
s/&Isagr;/&#x1F3C;/g;
s/&irggr;/&#x1F33;/g;
s/&Irggr;/&#x1F3B;/g;
s/&isggr;/&#x1F32;/g;
s/&Isggr;/&#x1F3A;/g;
s/&ircgr;/&#x1F37;/g;
s/&Ircgr;/&#x1F3F;/g;
s/&iscgr;/&#x1F36;/g;
s/&Iscgr;/&#x1F3E;/g;
s/&igdgr;/&#x1FD2;/g;
s/&iadgr;/&#x1FD3;/g;
s/&icdgr;/&#x1FD7;/g;
s/&orougr;/&#x1F41;/g;
s/&Orougr;/&#x1F49;/g;
s/&osmogr;/&#x1F40;/g;
s/&Osmogr;/&#x1F48;/g;
s/&ogragr;/&#x1F78;/g;
s/&Ogragr;/&#x1FF8;/g;
s/&oacugr;/&#x1F79;/g;
s/&Oacugr;/&#x1FF9;/g;
s/&oragr;/&#x1F45;/g;
s/&Oragr;/&#x1F4D;/g;
s/&osagr;/&#x1F44;/g;
s/&Osagr;/&#x1F4C;/g;
s/&orggr;/&#x1F43;/g;
s/&Orggr;/&#x1F4B;/g;
s/&osggr;/&#x1F42;/g;
s/&Osggr;/&#x1F4A;/g;
s/&urougr;/&#x1F51;/g;
s/&Urougr;/&#x1F59;/g;
s/&usmogr;/&#x1F50;/g;
s/&ugragr;/&#x1F7A;/g;
s/&Ugragr;/&#x1FEA;/g;
s/&uacugr;/&#x1F7B;/g;
s/&Uacugr;/&#x1FEB;/g;
s/&ucirgr;/&#x1FE6;/g;
s/&uragr;/&#x1F55;/g;
s/&Uragr;/&#x1F5D;/g;
s/&usagr;/&#x1F54;/g;
s/&urggr;/&#x1F53;/g;
s/&Urggr;/&#x1F5B;/g;
s/&usggr;/&#x1F52;/g;
s/&urcgr;/&#x1F57;/g;
s/&Urcgr;/&#x1F5F;/g;
s/&uscgr;/&#x1F56;/g;
s/&ugdgr;/&#x1FE2;/g;
s/&ucdgr;/&#x1FE7;/g;
s/&uadgr;/&#x1FE3;/g;
s/&ohigr;/&#x1FF3;/g;
s/&OHigr;/&#x1FFC;/g;
s/&ohrigr;/&#x1FA1;/g;
s/&OHrigr;/&#x1FA9;/g;
s/&ohsigr;/&#x1FA0;/g;
s/&OHsigr;/&#x1FA8;/g;
s/&ohaigr;/&#x1FF4;/g;
s/&ohgigr;/&#x1FF2;/g;
s/&ohcigr;/&#x1FF7;/g;
s/&ohraigr;/&#x1FA5;/g;
s/&OHraigr;/&#x1FAD;/g;
s/&ohsaigr;/&#x1FA4;/g;
s/&OHsaigr;/&#x1FAC;/g;
s/&ohrgigr;/&#x1FA3;/g;
s/&OHrgigr;/&#x1FAB;/g;
s/&ohsgigr;/&#x1FA2;/g;
s/&OHsgigr;/&#x1FAA;/g;
s/&ohrcigr;/&#x1FA7;/g;
s/&OHrcigr;/&#x1FAF;/g;
s/&ohscigr;/&#x1FA6;/g;
s/&OHscigr;/&#x1FAE;/g;
s/&ohrougr;/&#x1F61;/g;
s/&OHrougr;/&#x1F69;/g;
s/&ohsmogr;/&#x1F60;/g;
s/&OHsmogr;/&#x1F68;/g;
s/&ohgragr;/&#x1F7C;/g;
s/&OHgragr;/&#x1FFA;/g;
s/&ohacugr;/&#x1F7D;/g;
s/&OHacugr;/&#x1FFB;/g;
s/&ohcirgr;/&#x1FF6;/g;
s/&ohragr;/&#x1F65;/g;
s/&OHragr;/&#x1F6D;/g;
s/&ohsagr;/&#x1F64;/g;
s/&OHsagr;/&#x1F6C;/g;
s/&ohrggr;/&#x1F63;/g;
s/&OHrggr;/&#x1F6B;/g;
s/&ohsggr;/&#x1F62;/g;
s/&OHsggr;/&#x1F6A;/g;
s/&ohrcgr;/&#x1F67;/g;
s/&OHrcgr;/&#x1F6F;/g;
s/&ohscgr;/&#x1F66;/g;
s/&OHscgr;/&#x1F6E;/g;
s/&rrougr;/&#x1FE5;/g;
s/&Rrougr;/&#x1FEC;/g;
s/&diggr;/&#x03DC;/g;
s/&kogr;/&#x03DE;/g;
s/&stgr;/&#x03DA;/g;
s/&samgr;/&#x03E0;/g;
s/&qmgr;/&#x037E;/g;
s/&EOLhyphen;/&#x2223;/g;
s/&EOLunhyphen;/&#x00A6;/g;
s/&spcDash;/&#x254C;/g;

# lattaj customizations

# s#\n#\n<BR>#g;
# s#<BR><BR>#<BR>#g;
# s#<BR><P>#<P>#g;
# s#<P><BR>#<P>#g;
# s#<BR><TD#<TD#g;
# s#<BR><TH#<TH#g;
# s#<BR><LI#<LI#g;
# s#<BR><TR#<TR#g;
# s#<LI><BR>#<LI>#g;
# s#<BR></TR>#</TR>#g;
# s#<BR></LI>#</LI>#g;
# s#<BR></TD>#</TD>#g;
# s#<BR></TH>#</TH>#g;
# s#<BR></TABLE>#</TABLE>#g;
# s#<BR></I>#</I>#g;

# remove incompatible STYLE-REND combinations

s# [bB][oO][rR][dD][eE][rR]="1"([^>]+)border-style: ?none;#$1border-style: none;#g;

print;
}

print ("</I></TD></TH></TR></TABLE></LI></UL><BR><BR><HR><BR><ADDRESS>END OF HTML VERSION</ADDRESS></BODY></HTML>");
