#!/usr/bin/perl

# $Id: spt2centroid.pl,v 1.2 2002/09/30 12:04:54 martin Exp $

$now = time;
$handlee = shift;
$counter = 0;

while (<>) {
    chop;

    my ($Title, $AlternateTitle, $Description, $Url, $Source,
	$Relation, $Coverage, $Rights, $EmailAddress, $DateIssued,
	$DateOfRecordCreation, $DateRecordChecked, $DateLastModified,
	$VerificationAttempts, $ControlledName, $ControlledNameTypeName,
	$ClassificationName, $ClassificationTypeId) = split /\t/;

    foreach $word (split(/\W/, $Title)) {
        next if $word =~ /^$/ || $word =~ /^\s+$/;
	$centroid{Title}{$word} = 1;
    }
    foreach $word (split(/\W/, $AlternateTitle)) {
        next if $word =~ /^$/ || $word =~ /^\s+$/;
	$centroid{Title}{$word} = 1;
    }
    foreach $word (split(/\W/, $Description)) {
        next if $word =~ /^$/ || $word =~ /^\s+$/;
	$centroid{Description}{$word} = 1;
    }
    foreach $word (split(/\W/, $Source)) {
        next if $word =~ /^$/ || $word =~ /^\s+$/;
	$centroid{Source}{$word} = 1;
    }
    foreach $word (split(/\W/, $Relation)) {
        next if $word =~ /^$/ || $word =~ /^\s+$/;
	$centroid{Relation}{$word} = 1;
    }
    foreach $word (split(/\W/, $Coverage)) {
        next if $word =~ /^$/ || $word =~ /^\s+$/;
	$centroid{Coverage}{$word} = 1;
    }
    foreach $word (split(/\W/, $Rights)) {
        next if $word =~ /^$/ || $word =~ /^\s+$/;
	$centroid{Rights}{$word} = 1;
    }

#XXX
#dcCreator: $EmailAddress
#dcDate: $DateIssued
#dc$ControlledNameTypeName: $ControlledName
#dcSubject: $ClassificationName
}


print <<EOF;
# CENTROID-CHANGES
 Version-number: 1.0
 Start-time: $now
 End-time: $now
 Case-Sensitive: FALSE
 Server-handle: $handle
# BEGIN TEMPLATE
Template: DUBLINCORESIMPLE
Any-Field: FALSE
EOF

foreach $attr (keys %centroid) {
    print "# BEGIN FIELD\r\n";
    print " Field: $attr\r\n";
    $cont = 0;
    foreach $value (keys %{$centroid{$attr}}) {
	if ($cont == 0) {
	    print " Data: $value\r\n";
	    $cont = 1;
	} else {
	    print "-$value\r\n";
	}
    }
    print "# END FIELD\r\n";
}

print <<EOF;
# END TEMPLATE
# END CENTROID-CHANGES
EOF



=head1 NAME

B<spt2centroid.pl> - Munge an SPT export into a WHOIS++ centroid

=head1 SYNOPSIS

  spt2centroid.pl <spt.txt >centroid.txt

=head1 DESCRIPTION

The B<spt2centroid.pl> program takes a bulk export produced by the
Scout Portal Toolkit (SPT) software, and uses it to generate an RFC
1913 style WHOIS++ "centroid".  Centroids are the WHOIS++ forward
knowledge summary mechanism, and are supported by default in the ROADS
software's cross-searching subsystem.

Note that the centroids produced by this program are based on the
DUBLINCORESIMPLE ROADS template type, which encapsulates unqualified
Dublin Core elements.

=head1 EXAMPLE

  $ tail -1 bar.spt
  Cities of the Red Night         Burroughs is an awe-inspiring
  poetic magician.  I believe Cities of the Red Night is his
  masterpiece - Christopher Isherwood
  http://www.hyperreal.com/wsb/                                
            1981   1981     1981    1981    0

(SPT records use the tab character as a delimiter - but the above has
been word wrapped for readability)

  $ tail -1 bar.spt | spt2centroid.pl 
  # CENTROID-CHANGES
   Version-number: 1.0
   Start-time: 1033386070
   End-time: 1033386070
   Case-Sensitive: FALSE
   Server-handle: 
  # BEGIN TEMPLATE
  Template: DUBLINCORESIMPLE
  Any-Field: FALSE
  # BEGIN FIELD
   Field: Description
   Data: Isherwood
  -believe
  -poetic
  -masterpiece
  -his
  -of
  -is
  -I
  -Red
  -Night
  -an
  -magician
  -inspiring
  -Burroughs
  -Cities
  -Christopher
  -the
  -awe
  # END FIELD
  # BEGIN FIELD
   Field: Title
   Data: Night
  -of
  -Cities
  -the
  -Red
  # END FIELD
  # END TEMPLATE
  # END CENTROID-CHANGES

=head1 SEE ALSO

L<spt2iafa.pl>, L<iafa2spt.pl>, L<spt2cip.pl>

=head1 COPYRIGHT

Copyright (c) 2002, Martin Hamilton E<lt>imeshtk-utils@martinh.netE<gt>
All rights reserved.

This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.

It was developed at the Department of Computer Science at Loughborough
University, as part of the joint JISC/NSF IMesh Toolkit project.

=head1 AUTHOR

Martin Hamilton E<lt>imeshtk-utils@martinh.netE<gt>

