#!/usr/bin/perl

# $Id: ldif2spt.pl,v 1.2 2002/09/30 12:04:54 martin Exp $

use Getopt::Std;
getopts("d");

my @rec = ();

$count = -1;
while (<>) {
    chop;

    if (/^$/) {
	# dump record out
	&dump_record;
	@rec = ();
	$count = -1;
    } else {
      if (/^ (.*)/) {
	  print ">> [$count] got continuation line .. $1\n" if $opt_d;
        $rec[$count] .= $1;
      } else {
        if (/^([^\s:])+:\s(.*)/) {
          $count++;
	  print ">> [$count] got a/v pair .. $_\n" if $opt_d;
          $rec[$count] = $_;
        } else {
          die "$0: how did I get here?  $_";
        }
      }
    }
}


sub dump_record {
    my ($Title, $AlternateTitle, $Description, $Url, $Source,
	$Relation, $Coverage, $Rights, $EmailAddress, $DateIssued,
	$DateOfRecordCreation, $DateRecordChecked, $DateLastModified,
	$VerificationAttempts, $ControlledName, $ControlledNameTypeName,
	$ClassificationName, $ClassificationTypeId) = ();

    foreach (@rec) {

	if (/^cn:/) {
	    next;
	}
	
	if (/dcTitle:\s+(.*)/) {
	    $Title = $1;
	    next;
	}
	
	if (/dcCreator:\s+(.*)/) {
	    $ControlledName = $1;
	    $ControlledNameTypeName = "Creator";
	    next;
	}
	
	if (/dcSubject:\s+(.*)/) {
	    $ClassificationName = $1;
	    next;
	}
	
	if (/dcDescription:\s+(.*)/) {
	    $Description = $1;
	    next;
	}
    
	if (/dcPublisher:\s+(.*)/) {
	    $ControlledName = $1;
	    $ControlledNameTypeName = "Publisher";
	    next;
	}
	
	if (/dcDate:\s+(.*)/) {
	    $Date = $1;
	    next;
	}
	
	if (/dcSource:\s+(.*)/) {
	    $Source = $1;
	    next;
	}
	
	if (/dcIdentifier:\s+(.*)/) {
	    my ($url) = $1;
	    next unless $url =~ /^(ftp|gopher|wais|http|mailto|irc|urn):/;
	    next if $url =~ /^urn:/;
	    $Url = $url;
	    next;
	}
	
	if (/dcRelation:\s+(.*)/) {
	    $Relation = $1;
	    next;
	}
	
	if (/dcCoverage:\s+(.*)/) {
	    $Coverage = $1;
	    next;
	}
	
	if (/dcRights:\s+(.*)/) {
	    $Rights = $1;
	    next;
	}

  # Skipping...
  # dcLanguage
  # dcFormat
  # dcType
  # dcContributors
  # dcType
    }

    print join("\t", $Title, $AlternateTitle, $Description, $Url, $Source,
        $Relation, $Coverage, $Rights, $Creator, $Date, $Date, $Date, $Date,
        "0", "", "", $Subject, "") . "\n";
}


=head1 NAME

B<ldif2spt.pl> - Munge a collection of LDIF records into SPT format

=head1 SYNOPSIS

  ldif2spt.pl <ldif.txt >spt.txt

=head1 DESCRIPTION

The B<ldif2spt.pl> program accepts one or more LDIF records in the
Dublin Core schema (included in the I<support> subdirectory of this
distribution), and uses them as the basis for new records created in
the Scout Portal Toolkit (SPT) bulk import/export format for merging
into an SPT database.  This provides a potential migration path from
LDAP based subject gateways to the SPT software, and an easy way to
offer service using both packages in parallel.

=head1 OPTIONS

=over 4

=item B<-d>

Turn on debugging output

=back

=head1 EXAMPLE

  $ cat foo.ldif
  dn: cn=0001,dc=martinh,dc=net
  objectclass: dublincoreobject
  cn: 0001
  dcType: Book
  dcTitle: Cities of the Red Night
  dcreator: William S. Burroughs
  dcDescription: Burroughs is an awe-inspiring poetic magician.  I 
   believe Cities of the Red Night is his masterpiece - Christopher 
   Isherwood
  dcPublisher: Picador
  dcDate: 1981
  dcIdentifier: http://www.hyperreal.com/wsb/
  dcIdentifier: urn:ddc:813
  dcPublisher: First published in Great Britain by John Calder 
   (Publishers) Ltd.  This edition 1982.

  $ ldif2spt.pl <foo.ldif
  Cities of the Red Night         Burroughs is an awe-inspiring
  poetic magician.  I believe Cities of the Red Night is his
  masterpiece - Christopher Isherwood
  http://www.hyperreal.com/wsb/  1981    1981    1981    1981    0

(SPT records use the tab character as a delimiter - but the above has
been word wrapped for readability)

=head1 SEE ALSO

L<spt2ldif.pl>

=head1 COPYRIGHT

Copyright (c) 2002, Martin Hamilton E<lt>imeshtk-utils@martinh.netE<gt>
All rights reserved.

This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.

It was developed at the Department of Computer Science at Loughborough
University, as part of the joint JISC/NSF IMesh Toolkit project.

=head1 AUTHOR

Martin Hamilton E<lt>imeshtk-utils@martinh.netE<gt>

