#!/usr/bin/perl

# ris2eprints.pl
#
# Andy Powell 
# UKOLN, University of Bath
# 20 June 2002
#
# Parse Reference Manager RIS export file to produce XML suitable for
# importing into eprints.org software.

use XML::Writer;

# Modify this to change/add field names...
%name = (
  "M3" => "", "N2" => "abstract", "SN" => "", "VL" => "", "RP" => "",
  "KW" => "keywords", "SP" => "", "T1" => "title", "T2" => "", "UR" => "",
  "T3" => "", "U5" => "", "Y1" => "year", "Y2" => "", "TY" => "type",
  "ID" => "", "JF" => "", "A1" => "authors", "PB" => "", "A2" => "",
  "A3" => "", "EP" => "", "ER" => "", "AV" => "", "IS" => "",
  "CY" => "", "M1" => "", "M2" => "", "N1" => ""
);

# Modify this to change/add types...
%type = (
  "ADVS" => "other", "BOOK" => "book", "CHAP" => "bookchapter",
  "CONF" => "confpaper", "JOUR" => "journalp", "MGZN" => "other",
  "MPCT" => "other", "PCOMM" => "other", "RPRT" => "techreport",
  "SER" => "other", "SOUND" => "other", "THES" => "thesis",
  "UNBILL" => "other", "UNPB" => "other"
);

$writer = new XML::Writer(OUTPUT => STDOUT, DATA_MODE => 1, DATA_INDENT => 2);
$writer->xmlDecl();
$writer->startTag("eprintsdata");

while (<>) {
  chop;
  # Handle DOS end of line character :-(
  s/\cM$//;
  if (/^$/) { # new record...
    $writer->endTag("record") if $inrecord;
    $writer->startTag("record");
    $inrecord = 1;
    # Always uses userid 1 currently.  Need to modify this to associate
    # different userids with each record.
    $writer->dataElement('field', '1', 'name' => 'userid');
  }
  ($t, $v) = split(/-/);
  $t =~ s/\s*//g;
  $v =~ s/^\s*//;
  next unless $t;
  next unless $name{$t};
  $v =~ s/\/.*// if ($t eq 'Y1');
  $v = $type{$v} if ($t eq 'TY' && $type{$v});
  if ($t eq 'A1') {
    $writer->startTag('field', 'name' => $name{$t});
    if ($v =~ /,/) {
      ($fn, $gn) = split(/,/, $v);
      $writer->dataElement('part', $gn, 'name' => 'given');
      $writer->dataElement('part', $fn, 'name' => 'family');
    }
    else {
      $writer->dataElement('part', $v, 'name' => 'family');
    }
    $writer->endTag('field', 'name' => $name{$t});
  }
  else {
    $writer->dataElement('field', $v, 'name' => $name{$t});
  }
}
$writer->endTag("record");
$writer->endTag("eprintsdata");
$writer->end();

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA