#!/usr/local/bin/perl

# soif2metadc - convert SOIF record on STDIN to Dublin Core embedded
#               in HTML META tags
#               Look for SERVER_SOFTWARE environment variable... to indicate
#               we're being called from Apache
#
# Author: andy powell <a.powell@ukoln.ac.uk>
#
# $Id: soif2metadc,v 1.12 1998/04/07 13:34:27 lisap Exp $

$ENV{'HARVEST_HOME'} = "/usr/local/harvest" if (!defined($ENV{'HARVEST_HOME'}));
unshift(@INC, "$ENV{'HARVEST_HOME'}/lib");      # use local files
require "soif.pl";
use HTML::Entities (); # from LWP

$MaxSummaryLength = 0;

# SOIF			Dublin Core
# ----			-----------
# Abstract		?
# Author		Creator
# Body
# Description		Description
# File-Size
# Full-Text
# Gatherer-Host
# Gatherer-Name
# Gatherer-Port
# Gatherer-Version
# Headings
# Update-Time
# Keywords		Subject
# Last-Modification-Time	Date (TYPE=modified)
# MD5
# Refresh-Rate
# Time-to-Live
# Title			Title
# Type			Format
# URL-References
# URL			Identifier

# Object-Type		Type
# Rights		Rights
# Creator[-.]		Creator
# Date			Date

# NewsAgent-Topic	NewsAgent.topic
# NewsAgent-Contant	NewsAgent.contact
# NewsAgent-Contact-Email NewsAgent.contact.email
# NewsAgent-Date-valid-to NewsAgent.date.validto
# NewsAgent-Coverage	NewsAgent.coverage


# Mappings taken from
#    <URL:http://www.ukoln.ac.uk/metadata/interoperability/soif_dc.html>
%Mappings = (
#   'SOIF', 'Dublin_Core;SCHEME',
    'author', 'DC.Creator;',
    'creator', 'DC.Creator;',
    'dc-creator', 'DC.Creator;',
    'creator-address', 'DC.Creator.Address;',
    'dc-creator-address', 'DC.Creator.Address;',
    'creator-2', 'DC.Creator;',
    'creator-address-2', 'DC.Creator.Address;',
    'creator-3', 'DC.Creator;',
    'creator-address-3', 'DC.Creator.Address;',
    'description', 'DC.Description;',
    'dc-description', 'DC.Description;',
    'keywords', 'DC.Subject;',
    'subject', 'DC.Subject;',
    'dc-subject', 'DC.Subject;',
    'last-modification-time', 'DC.Date;',
    'title', 'DC.Title;',
    'dc-title', 'DC.Title;',
    'type', 'DC.Format;',
    'dc-format', 'DC.Format;',
    'url', 'DC.Identifier;',
    'dc-identifier', 'DC.Identifier;',
    'object-type', 'DC.Type;',
    'dc-type', 'DC.Type;',
    'rights', 'DC.Rights;',
    'dc-rights', 'DC.Rights;',
    'date', 'DC.Date;',
    'dc-date', 'DC.Date;',
    'newsagent-topic', 'NewsAgent.Topic',
    'newsagent-contact', 'NewsAgent.Contact',
    'newsagent-contact-email', 'NewsAgent.Contact.Email',
    'newsagent-date-valid-to', 'NewsAgent.Date.ValidTo',
    'newsagent-coverage', 'NewsAgent.Coverage',
);

if ($ENV{'SERVER_SOFTWARE'}) { # we've been called as an SSI by Apache
    $ssi = 1;
    $htmlfile = $ENV{'SCRIPT_FILENAME'};
    $soiffile = $htmlfile . '.soif';
    open(SOIF, "<$soiffile") || exit;
    $soif'input = \*SOIF;
}

($ttype,$url,%SOIF) = &soif'parse;
$url =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;

close(SOIF) if ($ssi);

foreach (keys SOIF) {
    $value = $SOIF{$_};
    tr/A-Z/a-z/;
#    if ($soif'debug == 1) {
#        $value =~ s/\s+/ /g;
#	print "SOIF-$_ = $value\n";
#    }
    next unless ($Mappings{$_});
    $value =~ s// /;
    if ($_ eq 'keywords') {
	$value =~ s/\n+/, /g;
    }
    $value =~ s/\s+/ /g;
    $value =~ s/ $//;
    $value =~ s/,$//;
    if ($_ =~ /-time$/) {
	$value = localtime($value);
    }
    $value = HTML::Entities::encode_entities($value);
#    &writefield($Mappings{$_}, $value);
#    print "$Mappings{$_}: $value\n";
    $DC{$Mappings{$_}} = $value;
#    print "DC-$Mappings{$_} = $value\n" if ($soif'debug == 1);
}

$DC{'DC.Identifier;'} = $url;

unless ($DC{'DC.Title;'}) {
    if ($SOIF{'headings'}) {
	$DC{'DC.Title;'} = $SOIF{'headings'};
	$DC{'DC.Title;'} =~ s/\n.*//;
    }
}

print "<!-- start of soif2metadc generated META tags -->\n" if ($ssi);
foreach (keys DC) {
    next unless ($DC{$_});
    ($dc, $scheme) = split(/;/);
    $dc =~ s/-[0-9]+$//;
    if ($scheme) {
	$scheme =~ s/^/(SCHEME=/;
	$scheme =~ s/$/) /;
    }
    $DC{$_} =~ s/^/ / if $scheme;
    $meta = "<META NAME=\"$dc\" CONTENT=\"$scheme$DC{$_}\">";
    if (length $meta > 100) {
	write;
    }
    else {
	print "$meta\n";
    }
    if ($ssi && $dc eq 'DC.Description' && $scheme eq '') {
        $meta = "<META NAME=\"Description\" CONTENT=\"$DC{$_}\">";
        if (length $meta > 100) {
	    write;
        }
        else {
	    print "$meta\n";
        }
    }
    if ($ssi && $dc eq 'DC.Subject' && $scheme eq '') {
        $meta = "<META NAME=\"Keywords\" CONTENT=\"$DC{$_}\">";
        if (length $meta > 100) {
	    write;
        }
        else {
	    print "$meta\n";
        }
    }
}
print "<!-- end of soif2metadc generated META tags -->\n" if ($ssi);

exit 0;

format =
^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<~~
$meta
.
