#!/usr/bin/perl -- #-*-perl-*- # Alf-Christian Achilles # # This filter script expands string macro definitions in BibTeX files # # To use this script you must have "bibclean" installed in your PATH # ftp://ftp.math.utah.edu/pub/tex/bib/bibclean # # Usage: expand.abbrevs [macro-file ...] < file.bib > file.expanded.bib # # BibTeX data to be expanded must be provided on standard input and the # results will be output on standard output, that is, use expand.abbrevs # as a filter. # The @String macros can of course also appear in the BibTeX data. # # decided whether you want every '#' join operator to also introduce a # space to offset margin whitespace elimination by bibclean. $join_space = ' '; # list of macros that must not be expanded @noexpand = (jan,feb,mar,apr,may,jun,jul,aug,sep,oct,nov,dec); $noexpand_regexp = '^(' . join('|',(@noexpand)) . ')$'; # standard, predefined BibTeX macros %abbrev = ('jan', '"January"', 'feb', '"February"', 'mar', '"March"', 'apr', '"April"', 'may', '"May"', 'jun', '"June"', 'jul', '"July"', 'aug', '"August"', 'sep', '"September"', 'oct', '"October"', 'nov', '"November"', 'dec', '"December"', 'acmcs', '"ACM Computing Surveys"', 'acta', '"Acta Informatica"', 'cacm', '"Communications of the ACM"', 'compj', '"Computer Journal"', 'ibmjrd', '"IBM Journal of Research and Development"', 'ibmsj', '"IBM Systems Journal"', 'ieeese', '"IEEE Transactions on Software Engineering"', 'ieeetc', '"IEEE Transactions on Computers"', 'ieeetcad', '"IEEE Transactions on Computer-Aided Design of Integrated Circuits"', 'ipl', '"Information Processing Letters"', 'jacm', '"Journal of the ACM"', 'jcss', '"Journal of Computer and System Sciences"', 'scp', '"Science of Computer Programming"', 'sicomp', '"SIAM Journal on Computing"', 'tocs', '"ACM Transactions on Computer Systems"', 'tods', '"ACM Transactions on Database Systems"', 'tog', '"ACM Transactions on Graphics"', 'toms', '"ACM Transactions on Mathematical Software"', 'toois', '"ACM Transactions on Office Information Systems"', 'toplas', '"ACM Transactions on Programming Languages and Systems"', 'tcs', '"Theoretical Computer Science"' ); $oldln = $/; %abbrev = (); foreach (@ARGV) { if (/\.(z|gz|Z)$/) { if (!open(STRINGS,"gunzip -c $_ | bibclean -no-check-values -max-width 0 |")) { die "Could not open pipe from \"gunzip -c $_ | bibclean |\""; } } else { if (!open(STRINGS,"bibclean -no-check-values -max-width 0 $_ |")) { die "Could not open pipe from \"bibclean $_\""; } } # $/="\n@String{"; while ($_ = ) { # handle @string entries if (/^@String{(\S+) += +(.+)\}$/) { # store abbreviation in associative array $abbrev{$1} = $2; # expand abbrevs &expand_value($abbrev{$1}) } } close(STRINGS); } if (!open(INBIBTEX,"bibclean -no-check-values -max-width 0 |")) { die "Could not open pipe from bibclean"; } if (!open(OUTBIBTEX,"| bibclean -no-check-values")) { die "Could not open pipe to bibclean"; } select(OUTBIBTEX); $in_entry = 0; $value = ''; while ($_ = ) { if (!$in_entry && /^@String\{(\S+) += +(.+)\}$/) { # store abbreviation in associative array $abbrev{$1} = $2; # expand abbrevs &expand_value($abbrev{$1}); # print $_; } elsif (/^@[A-Z][-a-zA-Z]+\{(\S+,)?$/) { $in_entry = 1; print $_; } elsif (/^\}$/) { print $_; $in_entry = 0; } elsif ($in_entry) { if ((/#/ || ! /"/) && /^( \S+ =\s+)(.*),$/) { $value = $2; &expand_value($value); print $1 . $value . ",\n"; } else { print $_; } } # uncomment the three following lines if you do not want non-BibTeX data # to be omitted in the output # else { # print $_; # } } select(STDOUT); close(INBIBTEX); if ($? >> 8) { die "bibclean exits with code " . ($? >> 8); } close(OUTBIBTEX); if ($? >> 8) { die "bibclean exits with code " . ($? >> 8); } exit(0); sub expand_line { next if (!/^ \S+ =\s+\S.*,$/); # substitute the macro if it is the first or only item in the field s/^( \S+ =\s+)([^\" \t\n,]+)( # |,$)/$1"$abbrev{$2}"$3/g; # substitute macros that follow a text while (s/^( \S+ =\s+.*" # )([^\" \t\n,]+)( # |,$)/$1"$abbrev{$2}"$3/g) {}; s/\" # \"//g; } sub expand_value { # introduce a fake empty string at the beginning to get things started $_[0] = '"" # ' . $_[0]; # substitute macros that follow a text while ($_[0] =~ s/(" # )([^\" \t\n#,]+)( # |$)/$1 . &abbrev($2) . $3/eg) {}; # get rid of the dummy empty string $_[0] =~ s/^\"\" # //; # unexpand the strings with the magic cookies $_[0] =~ s/\"\000([^\000#, \t]+)\000\"/$1/g; # remove join operator between string values $_[0] =~ s/\" # \"/$join_space/og; } sub abbrev { if ($_[0] =~ /$noexpand_regexp/o) { return "\"\000$_[0]\000\""; } elsif (defined($abbrev{$_[0]})) { return $abbrev{$_[0]}; } else { # return "\"\000$_[0]\000\""; return "\"$_[0]\""; } }