#!/usr/bin/perl

# $Id: iafa2spt.pl,v 1.2 2002/09/30 12:04:54 martin Exp $

my @rec = ();
use Getopt::Std;
getopts("d");

$count = -1;
while (<>) {
    chop;

    if (/^$/) {
	# dump record out
	&dump_record;
	@rec = ();
	$count = -1;
    } else {
      if (/^ (.*)/) {
        print ">> [$count] got continuation line .. $1\n" if $opt_d;
        $rec[$count] .= $1;
      } else {
        if (/^([^\s:])+:$/) {
	    # valueless attribute...
	    next; 
        } elsif (/^([^\s:])+:\s(.*)/) {
          $count++;
          print ">> [$count] got a/v pair .. $_\n" if $opt_d;
          $rec[$count] = $_;
        } else {
          die "$0: how did I get here?  $_";
        }
      }
    }
}


sub dump_record {
    my ($Title, $AlternateTitle, $Description, $Url, $Source,
	$Relation, $Coverage, $Rights, $EmailAddress, $DateIssued,
	$DateOfRecordCreation, $DateRecordChecked, $DateLastModified,
	$VerificationAttempts, $ControlledName, $ControlledNameTypeName,
	$ClassificationName, $ClassificationTypeId) = ();

    foreach (@rec) {

	if (/^Handle:/) {
	    next;
	}
	
	if (/Title:\s+(.*)/) {
	    $Title = $1;
	    next;
	}
	
	if (/Creator:\s+(.*)/) {
	    $ControlledName = $1;
	    $ControlledNameTypeName = "Creator";
	    next;
	}
	
	if (/Subject:\s+(.*)/) {
	    $ClassificationName = $1;
	    next;
	}
	
	if (/Description:\s+(.*)/) {
	    $Description = $1;
	    next;
	}
    
	if (/Publisher:\s+(.*)/) {
	    $ControlledName = $1;
	    $ControlledNameTypeName = "Publisher";
	    next;
	}
	
	if (/Date:\s+(.*)/) {
	    $Date = $1;
	    next;
	}
	
	if (/Source:\s+(.*)/) {
	    $Source = $1;
	    next;
	}
	
	if (/Identifier:\s+(.*)/) {
	    my ($url) = $1;
	    next unless $url =~ /^(ftp|gopher|wais|http|mailto|irc|urn):/;
	    next if $url =~ /^urn:/;
	    $Url = $url;
	    next;
	}
	
	if (/Relation:\s+(.*)/) {
	    $Relation = $1;
	    next;
	}
	
	if (/Coverage:\s+(.*)/) {
	    $Coverage = $1;
	    next;
	}
	
	if (/Rights:\s+(.*)/) {
	    $Rights = $1;
	    next;
	}

  # Skipping...
  # Language
  # Format
  # Contributors
  # Type
    }

    print join("\t", $Title, $AlternateTitle, $Description, $Url, $Source,
        $Relation, $Coverage, $Rights, $Creator, $Date, $Date, $Date, $Date,
        "0", "", "", $Subject, "") . "\n";
}



=head1 NAME

B<iafa2spt.pl> - Munge a collection of IAFA (ROADS) templates into SPT format

=head1 SYNOPSIS

  iafa2spt.pl <iafa.txt >spt.txt
  cat source/* | iafa2spt.pl >spt.txt

=head1 DESCRIPTION

The B<iafa2spt.pl> program accepts one or more IAFA templates (as used
by the ROADS software), and uses them as the basis for new records
created in the Scout Portal Toolkit (SPT) bulk import/export format
for merging into an SPT database.  This provides a potential migration
path from ROADS to the SPT software, and an easy way to offer service
using both packages in parallel.

Note that this program has been tested primarily with the
DUBLINCORESIMPLE IAFA template type, which was created in order that
the ROADS software could be used to manipulate Dublin Core metadata.
Some effort has been made to make it compatible with the attributes
found in other common ROADS templates, e.g. DOCUMENT and IMAGE, but it
may be necessary to hand edit the program in order to achieve the best
results when converting an arbitrary ROADS database.

The IAFA templates in the ROADS database need not be particularly
well-formed, and it should not be necessary to insert any blank lines
or other punctuation between them.  Be sure to check that continuation
lines have been preserved for multi-line attribute/value pairs, since
some versions of the ROADS metadata editor B<mktemp.pl> had problems
writing out correctly formatted templates.

=head1 OPTIONS

=over 4

=item B<-d>

Turn on debugging output

=back

=head1 EXAMPLE

  $ cat 0004
  Template-Type: DOCUMENT
  Handle: 0004
  Category: Book
  Title: The Unlimited Dream Company
  Author-Name-v1: J. G. Ballard
  Description: From the moment Blake crashes his stolen aircraft into
    the Thames, the unlimited dream company takes over and the town
    of Shepperton is transformed into an apocalyptic kingdom of desire
    and stunning imagination ruled over by Blake's messianic figure.
    Tropical flora and fauna appear; pan-sexual celebrations occur
    regularly; and in a final climax of liberation, the townspeople
    learn to fly.
  Publisher-Name-v1: Triad/Panther Books
  Creation-Date: 1979
  URI-v1: http://www.simons-rock.edu/~craigs/ballard.html
  ISBN-v1: 0-586-05205-4
  Subject-Descriptor-v1: 823
  Subject-Descriptor-Scheme-v1: DDC
  Comments: First published in Great Britain by Jonathan Cape.
    This edition published in 1991.

  $ iafa2spt.pl <0004
  The Unlimited Dream Company             From the moment Blake
  crashes his stolen aircraft into the Thames, the unlimited dream
  company takes over and the town of Shepperton is transformed
  into an apocalyptic kingdom of desire and stunning imagination
  ruled over by Blake's messianic figure. Tropical flora and fauna
  appear; pan-sexual celebrations occur regularly; and in a final
  climax of liberation, the townspeople learn to fly.           
                             1979     1979    1979    1979    0

(SPT records use the tab character as a delimiter - but the above has
been word wrapped for readability)

=head1 SEE ALSO

L<spt2iafa.pl>

=head1 COPYRIGHT

Copyright (c) 2002, Martin Hamilton E<lt>imeshtk-utils@martinh.netE<gt>
All rights reserved.

This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.

It was developed at the Department of Computer Science at Loughborough
University, as part of the joint JISC/NSF IMesh Toolkit project.

=head1 AUTHOR

Martin Hamilton E<lt>imeshtk-utils@martinh.netE<gt>

