ports//sysutils/apcupsd/work/apcupsd-3.14.2/doc/latex/link

#!/usr/bin/perl -w
use strict;
#
# Straightens out links in multiple latex source files. Uses a master tex
#  file (first argument) and straightens out all the links. The problem is that
#  links in each tex file may point to other tex files.  But 
#  we aren't going to generate multiple dvi files in the end, so the links all
#  need to work in the same source file. This will be resolved by making a list
#  of links, a hash with keys being the original link targets. 
#  Then global link names will be created. The hash will contain the
#  new link name as the value. A reverse hash will also be created at the
#  same time.

# In addition to the above situation, it is possible that tex filenames have changed
#  since the links were created. To handle this, parameters can be entered
#  from the command line and links will be translated. Multiple translation arguments
#  can be entered at the command line to translated any number of file links.

# The top-level tex file is read in and all includes saved. These files are then
#  examined to find all link anchors. The list of links is processed to make them 
#  all unique across all source files. This is done by appending an integer to each
#  link name, using the lowest integer that will generate a unique link name.
#
# The tex files are then processed again, dropping the filename in the link, and
#  putting in the unique link name determined above. The new files are named
#  foo.linked.tex, where foo is the name of the original tex file.

# Unresolved links are listed to STDOUT, and forward and reverse lists of links
#  are output to files at the end.

# Invocation syntax is as follows:
#
#  link_resolver.pl -f infile.tex [ -t oldfile=newfile]...
#
#

my %translation;

sub max { return $_[0] > $_[1] ? $_[0] : $_[1]; }

sub get_includes {
	# Get a list of include files from the top-level tex file.
	my $masterFile = shift;
	my (@list,$file,%excludeList,$filelist);

	# Check that the master file exists, and open it.
	# Grab a list of include files, and add it to the array.
	open MASTER,"<$masterFile" or die "Cannot open master file: $masterFile";
	while (<MASTER>) {
		chomp;
		# If the special code '% nolinks' is found, parse the rest of the line for
		#   a list of files to not bring in to resolve links.
		if (($filelist) = /^\%\s+nolinks\s+(.*)/i) {
			foreach (split(/\s+/,$filelist)) {$excludeList{$_} = "";}
		}
			
		($file) = /^\\include\{(.*)?\}/ or next;
		# Remove the .linked part of the filename, that refers to files already linked.
		$file =~ s/\.linked$//;
		if (!defined $excludeList{$file}) {
			push(@list,"$file.tex");
		}
	}
	close MASTER;
	return @list;
}

sub get_anchors {
	# Examines each file and grabs a list of anchors from it. 
	my @files = @_;
	my $anchors = {};
	my ($cnt,$fileAnchors);

	# Creates a hash composed of those global links, one key for each file. Each
	#  of these hashes will contain a hash of anchors from the file.
	foreach my $file (@files) { 
		($fileAnchors,$cnt) = load_anchors($file); 
		$anchors->{$file} = $fileAnchors;
	}
	return $anchors;
}

sub load_anchors {
	# Loads the links from the given file into the anchors hash, and returs that hash.
	my ($filename) = shift;
	my ($bfr,$name,$cnt,$targetFile,$position,$data);
	my $anchors = {};

	# Opens the indicated file for reading.
	# Load the whole file into a buffer.
	# Search the buffer for the form \label{foo} or \special{html:<a name="foo">}.
	# Note that anchors may be duplicated in the two types of references, but that 
	#  doesn't matter.
	#   If found,
	#   	extract the name.
	#       Drop trailing spaces.
	#		save in anchors hash for this file.
	# close the file.
	# Return the anchors hash for this file.
	
	open IF,"<$filename" or die "Cannot open file $filename for reading";
	while (<IF>) {
		$bfr .= $_;
	}
	close IF;

	$cnt = 0;
	(!$bfr) and die "No data in Input File: $filename";
	while ($bfr =~ /(\\label\{.*?\})|(\\special\n*\{html:<a\s+name=\".*?\">\})/s ) {
		if ($bfr =~ /\\label\{(.*?)\}/s) {
			$bfr = $';
			$name = $1;
			$name =~ s/\s+$//;
			$anchors->{$name} = $name;
			$cnt++;
		} elsif ($bfr =~ /\\special\n*\{html:<a\s+name=\"(.*?)\">\}/s ) {
			$bfr = $';
			$name = $1;
			$name =~ s/\s+$//;
			$anchors->{$name} = $name;
			$cnt++;
		} else {
			die "Parse error retrieving anchors";
		}
	}
	return ($anchors,$cnt);
}


# convert_links
#  Go through the list of anchors for each file and convert the anchors to unique ones.
#  Save the new anchor in its original place and also into a hash of new anchors.
sub convert_links {
	# Converts the anchor to global (source-independent) versions.
	my ($links) = shift;
	my ($file,$anchor,$newanchor);
	my %linksList;

	foreach $file (keys(%$links)) {
		foreach $anchor (keys(%{$links->{$file}})) {
			$newanchor = get_newlink($anchor,\%linksList);
			$links->{$file}{$anchor} = $newanchor;
			$linksList{$newanchor} = "";
		}
	}
}

sub get_newlink {
	# Makes up a new anchor from the old one.
	my ($anchor,$linkhash) = @_;
	my ($newlink,$number,$base);

	# If the anchor isn't already in the hash, it's unique so return it.
	# Otherwise it's not unique, and we need to append a number.
	#  If the link already contains a number at the end, 
	#    start with one higher, otherwise start with 0.
	#  If the anchor exists with the number appended, keep incrementing
	#    the number until a unique one is found.
	# Return the new anchor.
	if (!defined($linkhash->{$anchor})) { return $anchor; };
	if ($anchor =~ /\d*$/) { 
		$number = $&;
		$base = $`;
	} else { 
		$number = 0; 
		$base = $anchor;
	}
	
	while (exists($linkhash->{$anchor . ++$number})) {}
	return $anchor . $number;
}

# changeOldLinks
#  For each source file in the list of files to process,
#    Process the file, writing links into an output file foo.linked.tex.
sub changeOldLinks {
	my ($links,$filelist) = @_;
	my ($infile,$response,$outfile);
	my $cnt = 0;

	foreach $infile (@$filelist) {
		$outfile = $infile;
		$outfile =~ s/\.tex/.linked.tex/;
		$cnt += changeFileLinks($infile,$outfile,$links);
	}
	return $cnt;
}

# Open the tex file for reading, and the output file for writing.
# Read the entire target file into a buffer.
# Pass everything from the target file to the output file, until we
#   get to a link or a label
# If the reference is an external one (http:, ftp:, or mailto:) ignore
#   it and pass it unchanged to the output file
# If it is a label, save the command and grab the first argument as the target.
# If it is a hyperref, save the command and srab the fourth argument as the target
# Separate the filename from the anchor in the target.
# Translate any filenames that have changed.
# If the reference has no corresponding anchor, output a warning.
# If the reference is to a local file that is not a tex file, output a warning.
# Output the reference to the output file, using the new reference name. No line wrapping
#  occurs for outputting a referenece, so the lines in the tex file may get longer.

sub changeFileLinks {
	my ($infile,$outfile,$links) = @_;
	my ($bfr,$cmd,$type,$target,$refFile,$pointer,$newtarget);
	my ($url,$category,$name,$temp);
	my ($text,$output);
	my $cnt = 0;

	my @regLink;

	$regLink[0] = '(\\\\elink\\{)(.*?)\\}';
	$regLink[1] = '(\\\\label\\{)(.*?)\\}';
	$regLink[2] = '(\\\\hyperref\\{)(.*?)\\}\\{(.*?)\\}\\{(.*?)\\}\\{(.*?)\\}';
	$regLink[3] = '(\\\\special\\n*\\{html:<a\\s+href=\\")(.*?)\\">\\}';
	$regLink[4] = '(\\\\special\\n*\\{html:<a\\s+name=\\")(.*?)\\">\\}';
	$regLink[5] = '(\\\\ilink\\{)(.*?)\\}\\{(.*?)\\}';
	my $linkTest = "(" . join(")|(",@regLink) . ")";

	open TEX,"<$infile" or die "Cannot open $infile for reading\n";
	open OUT,">$outfile" or die "Cannot open $outfile for writing\n";
	while (<TEX>) {
		$bfr .= $_;
	}
	close TEX;

	while ($bfr =~ /$linkTest/so) {
		$bfr = $& . $';
		$cnt++;
		print OUT $`;
		if (($cmd,$target) = ($bfr =~ /^$regLink[0]/so)) {
			$bfr = $';
			print OUT $&;
			next;
		} elsif (($cmd,$target) = $bfr =~ /^$regLink[1]/so) {
			$bfr = $';
			$refFile = $infile; $pointer = $target; 
			$type = 1;
		} elsif (($cmd,$text,$category,$name,$url) =  $bfr =~ /^$regLink[2]/so ) {
			$bfr = $';
			$refFile = $url;
			$pointer = $category;
			$type = 2;
		} elsif (($cmd,$target) =  $bfr =~ /^$regLink[3]/so) {
			$bfr = $';
			$temp = $&;
			if ($target =~ /^(http:|ftp:|mailto:)/) {
				print OUT $temp;
				next;
			}
			($refFile,$pointer) = split (/\#+/,$target);
			$type = 3;
		} elsif (($cmd,$target) =  $bfr =~ /^$regLink[4]/so) {
			$bfr = $';
			$refFile = $infile; $pointer = $target; 
			$type = 4;
		} elsif (($cmd,$text,$target) =  $bfr =~ /^$regLink[5]/so ) {
			$bfr = $';
			($refFile,$pointer) = split (/\#+/,$target);
			$type = 5;
		} else {
			# Error.
			die "Parsing Error";	
		}
		$cmd =~ s/\n/ /sg;

		foreach (keys(%translation)) { $refFile =~ s/$_/$translation{$_}/; }

		if (defined($pointer) and $pointer) {
			$pointer =~ s/\n//g;
			$pointer =~ s/\s+$//;
			if (defined($links->{$refFile}{$pointer})) {
				$newtarget = $links->{$refFile}{$pointer};
			} else {
				warn "Warning: Reference not Found- $refFile#$pointer in  $infile\n";
				$newtarget = "";
			}
		} else {
			warn "Warning: Outside Reference: $refFile in  $infile\n";
			$newtarget = $refFile;
		}


		if ($type == 0) {
			$cmd = "";
		} elsif ($type == 1) {
			$cmd .= "$newtarget\}";
		} elsif ($type == 2) {
			$cmd .= "$text\}\{\}\{\}\{$newtarget\}";
		} elsif ($type == 3) {
			$cmd .= "$newtarget\">\}";
		} elsif ($type == 4) {
			$cmd .= "$newtarget\">\}";
		} elsif ($type == 5) {
			$cmd .= "$text\}\{$newtarget\}";
		} else {
			die "Parsing Error";
		}
		print OUT $cmd;
	}
	print OUT $bfr;
	close OUT;
	return $cnt;
}


# Write a list of links to the indicated file.
sub write_links_file {
	my ($links) = shift;
	my $outlinks = "links.out";
	my ($strlen,$filename,$pointer,$longest,$spaces);

	# Writes the list of links out to a file.
	open OF,">$outlinks" or die "Cannot open $outlinks for writing\n";

	# Find the longest key.
	$longest = 0;
	foreach $filename (keys(%$links)) {
		foreach $pointer (sort(keys(%{$links->{$filename}}))) {
			$strlen = length($filename) + length($pointer);
			$longest = max($longest,$strlen);
		}
	}

	foreach $filename (sort(keys(%$links))) {
		foreach $pointer (sort(keys(%{$links->{$filename}}))) {
			$strlen = length($filename) + length($pointer);
			$spaces = " " x ($longest - $strlen);
			print OF "$filename#$pointer $spaces $links->{$filename}{$pointer}\n";
		}
	}
	close OF;
}

# Write a list of reverse links to the indicated file.
sub writeRevLinks {
	my ($links) = shift;
	my $outlinks = "linksr.out";
	my (%revlinks,$longest,$spaces);

	# Writes the list of reverse links out to a file.
	open OF,">$outlinks" or die "Cannot open $outlinks for writing\n";

	foreach my $filename (sort(keys(%$links))) {
		foreach my $pointer (sort(keys(%{$links->{$filename}}))) {
			$revlinks{$links->{$filename}{$pointer}} = "$filename#$pointer";
		}
	}

	$longest = 0;
	foreach (keys(%revlinks)) {
		$longest = max($longest,length($_));
	}

	foreach (sort(keys(%revlinks))) {
		$spaces = " " x ($longest - length($_));
		print OF "$_ $spaces $revlinks{$_}\n";
	}
	close OF;
}

# Look for arguments in the command line, and decode them into a hash.
sub parse_cmdline {
	my $cmds = {};
	my ($cnt,$arg);

	$cnt = 0;
	while (defined($arg = $ARGV[$cnt++])) {
		if ($arg =~ /-f/) {
			if (defined($arg = $ARGV[$cnt++])) { $cmds->{infile} = $arg; }
				else {die "No Input File given on Command line\n"; }
		} elsif ($arg =~ /-t/) {
			if (defined($arg = $ARGV[$cnt++])) { 
				if ($arg =~ /=/) { $translation{$`} = $'; }
					else {die "Invalid translation Given $arg"; }
			} else {
				die "No Translation argument given for -t argument";
			}
		}
	}
	return $cmds;
}


##################################################################
#                       MAIN                                  ####
##################################################################

my (@includes,%pointers,%pointersRev,$anchors);

# Parse the command-line arguments and put them into a hash.
my $args = parse_cmdline;

if (!defined($args->{infile})) {
	die "Master File to Process must be given with -f parameter\n";
}

foreach (sort(keys(%translation))) {
	print "Filename Translation: $_ -> $translation{$_}\n";
}

# Read in the list of files to be included
# Get the links from each file.
# Convert the link format from local to global; check for duplicates.
# Change all the links in the source files to global ones, and change
# 	the names (labels) to global ones.
# Write out the list of links and reverse links to a text file.
#
@includes = get_includes($args->{infile});

$anchors = get_anchors(@includes);
convert_links($anchors);
my $link_cnt = changeOldLinks($anchors,\@includes);
#write_links_file($anchors);
#writeRevLinks($anchors);
my $anchor_cnt = keys(%$anchors);
print "Finished -- $anchor_cnt Anchors Found  $link_cnt Links Resolved\n";
syntax highlighted by Code2HTML, v. 0.9.1