#!/usr/bin/perl -w
# Finds multiple hyphens not inside a verbatim environment (or \verb).
# Places these inside a \verb{} contruct so they will not be converted
# to single hyphen by latex or latex2html.
use strict;
my %args;
# The following builds the test string to identify and change multiple
# hyphens in the tex files. Several constructs are identified but only
# multiple hyphens are changed; the others are fed to the output
# unchanged.
my $b = '\\\\begin\\*?\\s*\\{\\s*'; # \begin{
my $e = '\\\\end\\*?\\s*\\{\\s*'; # \end{
my $c = '\\s*\\}'; # closing curly brace
# # This captures entire verbatim environments. These are passed to the output
# # file unchanged.
my $verbatimenv = $b . "verbatim" . $c . ".*?" . $e . "verbatim" . $c;
# # This captures \verb{..{ constructs. They are passed to the output unchanged.
my $verb = '\\\\verb\\*?(.).*?\\1';
# # This captures multiple hyphens with a leading and trailing space. These are not changed.
my $hyphsp = '\\s\\-{2,}\\s';
# # This identifies other multiple hyphens.
my $hyphens = '\\-{2,}';
# This protects "protected" hyphen strings, such as for mdash and ndash.
my $protected_hyphens = '\\{\\-{2,}\\}';
# # This identifies \hyperpage{..} commands, which should be ignored.
my $hyperpage = '\\\\hyperpage\\*?\\{.*?\\}';
# # This builds the actual test string from the above strings.
# #my $teststr = "$verbatimenv|$verb|$tocentry|$hyphens";
my $teststr = "$verbatimenv|$verb|$hyphsp|$protected_hyphens|$hyperpage|$hyphens";
sub get_includes {
# Get a list of include files from the top-level tex file.
my (@list,$file);
foreach my $filename (@_) {
$filename or next;
# Start with the top-level latex file so it gets checked too.
push (@list,$filename);
# Get a list of all the html files in the directory.
open IF,"<$filename" or die "Cannot open input file $filename";
while (<IF>) {
chomp;
push @list,"$1.tex" if (/\\include\{(.*?)\}/);
}
close IF;
}
return @list;
}
sub convert_hyphens {
my (@files) = @_;
my ($linecnt,$filedata,$out,$this,$thiscnt,$before,$verbenv,$cnt);
# Build the test string to check for the various environments.
# We only do the conversion if the multiple hyphens are outside of a
# verbatim environment (either \begin{verbatim}...\end{verbatim} or
# \verb{--}). Capture those environments and pass them to the output
# unchanged.
$cnt = 0;
foreach my $file (@files) {
# Open the file and load the whole thing into $filedata. A bit wasteful but
# easier to deal with, and we don't have a problem with speed here.
$filedata = "";
open IF,"<$file" or die "Cannot open input file $file";
while (<IF>) {
$filedata .= $_;
}
close IF;
# Set up to process the file data.
$out = "";
$verbenv = 0;
$thiscnt = 0;
$linecnt = 1;
# Go through the file data from beginning to end. For each match, save what
# came before it and what matched. $filedata now becomes only what came
# after the match.
# Chech the match to see if it starts with a multiple-hyphen. If so
# change it to \verb{--}. The other possible matches in the pattern
# won't start with a hyphen, so we're ok with matching that.
while ($filedata =~ /$teststr/os) {
$this = $&;
$before = $`;
$filedata = $';
$linecnt += $before =~ tr/\n/\n/;
$linecnt += $this =~ tr/\n/\n/;
if (exists $args{'change'}) {
# Use this contruct for putting something in between each hyphen
#$thiscnt += ($this =~ s/^\-+/do {join('\\,',split('',$&));}/e);
# This is where the actual conversion is done.
# Use this construct for putting something around each hyphen.
$thiscnt += ($this =~ s/^\-+/\\verb\{$&\{/);
} else {
if ($this =~ /^\-+/) {
$thiscnt++;
print "Multiple hyphen found at line $linecnt in $file\n";
}
}
# Put what came before and our (possibly) changed string into
# the output buffer.
$out .= $before . $this;
}
# If any hyphens were converted, save the file.
if ($thiscnt and exists $args{'change'}) {
open OF,">$file" or die "Cannot open output file $file";
print OF $out . $filedata;
close OF;
}
$cnt += $thiscnt;
}
return $cnt;
}
sub check_arguments {
# Checks command-line arguments for ones starting with -- puts them into
# a hash called %args and removes them from @ARGV.
my $args = shift;
my $i;
for ($i = 0; $i < $#ARGV; $i++) {
$ARGV[$i] =~ /^\-+/ or next;
$ARGV[$i] =~ s/^\-+//;
$args{$ARGV[$i]} = "";
delete ($ARGV[$i]);
}
}
##################################################################
# MAIN ####
##################################################################
my @includes;
my $cnt;
check_arguments(\%args);
# Examine the file pointed to by the first argument to get a list of
# includes to test.
@includes = get_includes(@ARGV);
$cnt = convert_hyphens(@includes);
if (exists $args{'change'}) {
print "$cnt Multiple hyphen", ($cnt == 1) ? "" : "s"," Changed\n";
} else {
print "$cnt Multiple hyphen", ($cnt == 1) ? "" : "s"," Found\n";
}
syntax highlighted by Code2HTML, v. 0.9.1