#!/usr/bin/perl # # bids.to.bibtex is based on a perl script written by Jonathan Swinton, # js@ic.ac.uk, and subsequently modified by Ben Bolker and Anthony Stone, # ajs1@cam.ac.uk. # This version last modified on 29 January 1998. # Usage: bids.to.bibtex # # will take input from a BIDS email message in in the source directory # $sourcedir and produce a BibTeX bibfile .bib in the output directory # $bibdir. # If both files are to be in the current directory, replace the definitions # below by # $sourcedir = ""; # $bibdir = ""; # If the output file exists, the new references are appended to it. Otherwise # a new file is created. # You will need to change the first line of this file if perl lives somewhere # other than /usr/bin/perl on your system. # The BIDS message should have been generated using one of the downloading # formats. Other bibliography database output can be processed provided that # it is in the same (fairly standard) form, i.e. a sequence of fields each # beginning with a two-letter flag at the start of a line, followed by "- ", # with continuation lines starting with four spaces. The whole entry is # terminated by a blank line. Any lines in the file that do not conform to # this pattern are ignored. An example of such an entry is # TI- THE NATURE AND GEOMETRY OF INTERMOLECULAR INTERACTIONS BETWEEN # HALOGENS AND OXYGEN OR NITROGEN # AU- LOMMERSE, JPM;STONE, AJ;TAYLOR, R;ALLEN, FH # NA- CAMBRIDGE CRYSTALLOG DATA CTR,12 UNION RD,CAMBRIDGE CB2 1EZ,ENGLAND # CAMBRIDGE CRYSTALLOG DATA CTR,CAMBRIDGE CB2 1EZ,ENGLAND # UNIV CAMBRIDGE,CHEM LAB,CAMBRIDGE CB2 1EW,ENGLAND # JN- JOURNAL OF THE AMERICAN CHEMICAL SOCIETY # PY- 1996 # VO- 118 # NO- 13 # PG- 3108-3116 # IS- 0002-7863 # # Minor variations in this format can be dealt with by changing the "elsif"s # near the end of this file. # Some work will be needed to deal with capitalization and accents in the # BibTeX file. The title is converted to lower-case, unless it already # contains any lower-case characters, and a few common acronyms are converted # back to upper case by subroutine special. Use the examples there as # models for your own if you like. An attempt is made to convert chemical # formulae, but it is usually necessary to do this by hand. Note that any # upper-case material in the title needs to be enclosed in braces, or BibTeX # will force it to lower-case again. If your citation format doesn't include # the title, then of course this isn't a problem. # You may wish also to modify the "journal" subroutine, which replaces the # full journal title by a conventional abbreviation. This almost certainly # doesn't deal with all the journals that you might wish to cite. # This version assumes perl 5, but it should work with perl 4. # Please address comments and bug reports to Anthony Stone (ajs1@cam.ac.uk), # who will be pleased to receive them but does not promise to do anything # about them. $sourcedir = "/users/anthony/Mail/BIDS/"; $bibdir = "/users/anthony/bibliography/bibfiles/"; $, = ' '; # set output field separator $\ = "\n"; # set output record separator format OUT = ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< $line ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<~~ $line . %name = ( 'TI', 'title', 'AU', 'author', 'NA', 'address', 'JN', 'journal', 'PY', 'year', 'VO', 'volume', 'NO', 'issue', 'PG', 'pages', 'AB', 'abstract', 'KP', 'keywords', # 'NA', 'ignore', # 'CR', 'ignore', # 'RF', 'ignore', # 'KP', 'ignore', # 'PA', 'ignore' ); @entry{'header','author','title','journal','volume','year','issue','pages', 'keywords','abstract','none'} = ('','','','','','','','','','',''); $type = 'none'; sub initcaps # Capitalise initial letter of every word, lower-case the rest { local ($arg) = $_[0]; $arg =~ s/([A-Za-z])([A-Za-z]+)/\U$1\E\L$2\E/g; $arg; } sub firstupper # Capitalise initial letter of every sentence, lower-case the rest { local ($arg) = $_[0]; $arg =~ s/(.)(.*)/\U$1\E\L$2\E/; $arg =~ s/([.?!]\s+)([a-z])/$1\u$2/g; $arg; } sub lowercase { local ($arg) = $_[0]; $arg =~ s/(.*)/\L$1\E/; $arg; } sub lowertrivial { local ($arg) = $_[0]; $arg =~ s/\ba\b/a/gi; $arg =~ s/\ban\b/an/gi; $arg =~ s/\band\b/and/gi; $arg =~ s/\bthe\b/the/gi; $arg =~ s/\bof\b/of/gi; $arg =~ s/\bto\b/to/gi; $arg =~ s/\bfrom\b/from/gi; $arg =~ s/\bin\b/in/gi; $arg =~ s/\bwith\b/with/gi; $arg; } sub special # Add your own acronyms. Note that the upper-case replacement is enclosed # in braces so that BibTeX doesn't put it back into lower-case. { local ($arg) = $_[0]; $arg =~ s/\bscf\b/{SCF}/g; $arg =~ s/\bmc(-*)scf\b/{MC$1SCF}/g; $arg =~ s/\bci\b/{CI}/g; $arg =~ s/\b([0-9]-[0-9]+)g\b/{$1G}/g; $arg =~ s/\bdna\b/{DNA}/g; $arg =~ s/vanderwaals/{Van} der {Waals}/g; $arg =~ s/moller-plesset/{M}{\\o}ller--{Plesset}/g; $arg; } sub formulae # Chemical formulae -- impossible to get them all right { local ($arg) = $_[0]; $arg =~ s/(\b[a-z]+[1-9]+[a-z0-9]+\b)/{\U$1\E}/gi; $arg; } sub strip { local ($val) = $_[0]; $val =~ s/....//; # chop off initial flag and surrounding spaces $val =~ s/ *$//; # some entries have trailing spaces $val; } sub header { $line = "\@article{" . $flag . ","; write(OUT); } sub authors { # Transform author entry into BibTeX format and construct flag. # The flag is made up from the first author's surname, followed by the # first letter of the surname of each remaining author, followed by the # last two digits of the year. This is usually unique, but not always. @arr = split(/;/,$entry{'author'}); $flag = ""; foreach $auth ( @arr) { ($sname,$fname) = split(/, /, $auth); $fname =~ s/\w/$&. /g; $fname =~ s/ *$//; $sname = &initcaps($sname); $sname =~ s/^(Mac|Mc|O')([a-z])/$1\u$2/; # print $fname . " " . $sname . "\n"; if ($flag eq "") {$flag=$sname; $flag =~ s/ //g;} else {$flag .= substr($sname,0,1);} $auth = $sname . ', ' . $fname ; } $entry{'author'} = join (" and ", @arr); $flag .= substr($entry{'year'},-2); # append last two digits of year to flag $flag =~ s/ //g; } sub author { $line = ' Author = {' . $entry{'author'} . '},'; $line = &texsafety ($line); write(OUT); } sub address { $entry{'address'} =~ s/,([^ ])/, $1/g; $entry{'address'} = &lowertrivial(&initcaps($entry{'address'})); $line = ' Address = {' . $entry{'address'} . '},'; $line = &texsafety ($line); write(OUT); } sub title { # If the title contains any lowercase characters already, leave it # unchanged. Otherwise convert to lowercase except for first letter of # each sentence, and attempt to uppercase chemical formulae and common # acronyms. if ( $entry{'title'}=~/[a-z]/ ) { $line = $entry{'title'};} else { $line = &formulae(&special(&firstupper($entry{'title'})));} $line = &texsafety($line); $line = ' Title = {' . $line . '},'; # print $line; write(OUT); } sub journal { $journal = &initcaps($entry{'journal'}); $journal =~ s/Journal/J./; $journal =~ s/Chemical|Chemistry/Chem./; $journal =~ s/Physics|Physical/Phys./; $journal =~ s/Society/Soc./; $journal =~ s/Communications/Comm./; $journal =~ s/Transactions/Trans./; $journal =~ s/Reviews/Rev./; $journal =~ s/-Chemical/ Chem./; $journal =~ s/-Faraday/ Faraday/; $journal =~ s/Discussions/Disc./; $journal =~ s/American/Amer./; $journal =~ s/(\s)a\s/$1/gi; $journal =~ s/(\s)an\s/$1/gi; $journal =~ s/(\s)and\s/$1/gi; $journal =~ s/(\s)the\s/$1/gi; $journal =~ s/(\s)of\s/$1/gi; $journal =~ s/(\s)to\s/$1/gi; $journal =~ s/(\s)from\s/$1/gi; $journal =~ s/(\s)in\s/$1/gi; $journal =~ s/(\s)with\s/$1/gi; $line = ' Journal = {' . $journal . '},'; write(OUT); } sub year { $line = ' Year = {' . $entry{'year'} . '},'; write(OUT); } sub volume { $line = ' Volume = {' . $entry{'volume'} . '},'; write(OUT); } sub pages { $line = ' Pages = {' . $entry{'pages'} . '}'; if ( $entry{'abstract'} ne "" || $entry{'keywords'} ne "" ) {$line .= ',';} $line = &texsafety ($line); write(OUT); } sub keywords { if ( $entry{'keywords'} ne "" ) { $line = ' Keywords = {' . &lowercase($entry{'keywords'}) . '}'; if ( $entry{'abstract'} ne "" ) {$line .= ',';} write(OUT);} } sub abstract { if ( $entry{'abstract'} ne "" ) { $line = ' Abstract = {' . $entry{'abstract'} . '}'; $line = &texsafety ($line); write(OUT);} } sub issue { } sub terminator { $line = "}"; write(OUT); print OUT "\n"; } sub clear { $entry{'author'} = ''; $entry{'title'} = ''; $entry{'journal'} = ''; $entry{'volume'} = ''; $entry{'year'} = ''; $entry{'page'} = ''; $entry{'issue'} = ''; $entry{'abstract'} = ''; } sub texsafety { # Replace % and & by \% and \&. $arg = $_[0]; $arg =~ s/\&/\\&/g; $arg =~ s/\%/\\%/g; $arg; } while ($file = shift) { open(IN,$sourcedir.$file) || die "Can't open ".$sourcedir.$file."\n"; if ( -e $bibdir.$file.".bib" ) { warn $file.".bib exists -- appending\n"; open(OUT,">>" . $bibdir.$file . ".bib");} else { open(OUT,">" . $bibdir.$file . ".bib");} while () { # print; chop; # strip record separator if (/^ *$/ && $type ne 'none' && $type ne 'header' ) { # Entry is complete. Process and print it. & authors; & header; & author; & address; & title; & journal; & year; & volume; & issue; & pages; & keywords; & abstract; & terminator; $type = 'none'; & clear; } else { if (/^record/i) { $type = 'header'; $entry{$type} = $_; } elsif (/^\(([0-9]+)\)/) { die "Input appears to be in the wrong format. You need to use 'downloading' format.\n"; } elsif (/^ / && $type ne 'none' && $type ne 'ignore') { $entry{$type} .= " " . & strip($_); } elsif (/^([A-Z][A-Z])- /i) {$tag=$1; if ( defined($name{$tag}) ) { $type = $name{$tag}; $entry{$type} = & strip ($_) ; } else { $type = "ignore"; $entry{$type} = ""; } } # print "$type : $entry{$type}\n"; } } close IN; close OUT; }