#!/usr/local/bin/perl # soif2metadc - convert SOIF record on STDIN to Dublin Core embedded # in HTML META tags # Look for SERVER_SOFTWARE environment variable... to indicate # we're being called from Apache # # Author: andy powell # # $Id: soif2metadc,v 1.12 1998/04/07 13:34:27 lisap Exp $ $ENV{'HARVEST_HOME'} = "/usr/local/harvest" if (!defined($ENV{'HARVEST_HOME'})); unshift(@INC, "$ENV{'HARVEST_HOME'}/lib"); # use local files require "soif.pl"; use HTML::Entities (); # from LWP $MaxSummaryLength = 0; # SOIF Dublin Core # ---- ----------- # Abstract ? # Author Creator # Body # Description Description # File-Size # Full-Text # Gatherer-Host # Gatherer-Name # Gatherer-Port # Gatherer-Version # Headings # Update-Time # Keywords Subject # Last-Modification-Time Date (TYPE=modified) # MD5 # Refresh-Rate # Time-to-Live # Title Title # Type Format # URL-References # URL Identifier # Object-Type Type # Rights Rights # Creator[-.] Creator # Date Date # NewsAgent-Topic NewsAgent.topic # NewsAgent-Contant NewsAgent.contact # NewsAgent-Contact-Email NewsAgent.contact.email # NewsAgent-Date-valid-to NewsAgent.date.validto # NewsAgent-Coverage NewsAgent.coverage # Mappings taken from # %Mappings = ( # 'SOIF', 'Dublin_Core;SCHEME', 'author', 'DC.Creator;', 'creator', 'DC.Creator;', 'dc-creator', 'DC.Creator;', 'creator-address', 'DC.Creator.Address;', 'dc-creator-address', 'DC.Creator.Address;', 'creator-2', 'DC.Creator;', 'creator-address-2', 'DC.Creator.Address;', 'creator-3', 'DC.Creator;', 'creator-address-3', 'DC.Creator.Address;', 'description', 'DC.Description;', 'dc-description', 'DC.Description;', 'keywords', 'DC.Subject;', 'subject', 'DC.Subject;', 'dc-subject', 'DC.Subject;', 'last-modification-time', 'DC.Date;', 'title', 'DC.Title;', 'dc-title', 'DC.Title;', 'type', 'DC.Format;', 'dc-format', 'DC.Format;', 'url', 'DC.Identifier;', 'dc-identifier', 'DC.Identifier;', 'object-type', 'DC.Type;', 'dc-type', 'DC.Type;', 'rights', 'DC.Rights;', 'dc-rights', 'DC.Rights;', 'date', 'DC.Date;', 'dc-date', 'DC.Date;', 'newsagent-topic', 'NewsAgent.Topic', 'newsagent-contact', 'NewsAgent.Contact', 'newsagent-contact-email', 'NewsAgent.Contact.Email', 'newsagent-date-valid-to', 'NewsAgent.Date.ValidTo', 'newsagent-coverage', 'NewsAgent.Coverage', ); if ($ENV{'SERVER_SOFTWARE'}) { # we've been called as an SSI by Apache $ssi = 1; $htmlfile = $ENV{'SCRIPT_FILENAME'}; $soiffile = $htmlfile . '.soif'; open(SOIF, "<$soiffile") || exit; $soif'input = \*SOIF; } ($ttype,$url,%SOIF) = &soif'parse; $url =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg; close(SOIF) if ($ssi); foreach (keys SOIF) { $value = $SOIF{$_}; tr/A-Z/a-z/; # if ($soif'debug == 1) { # $value =~ s/\s+/ /g; # print "SOIF-$_ = $value\n"; # } next unless ($Mappings{$_}); $value =~ s/ / /; if ($_ eq 'keywords') { $value =~ s/\n+/, /g; } $value =~ s/\s+/ /g; $value =~ s/ $//; $value =~ s/,$//; if ($_ =~ /-time$/) { $value = localtime($value); } $value = HTML::Entities::encode_entities($value); # &writefield($Mappings{$_}, $value); # print "$Mappings{$_}: $value\n"; $DC{$Mappings{$_}} = $value; # print "DC-$Mappings{$_} = $value\n" if ($soif'debug == 1); } $DC{'DC.Identifier;'} = $url; unless ($DC{'DC.Title;'}) { if ($SOIF{'headings'}) { $DC{'DC.Title;'} = $SOIF{'headings'}; $DC{'DC.Title;'} =~ s/\n.*//; } } print "\n" if ($ssi); foreach (keys DC) { next unless ($DC{$_}); ($dc, $scheme) = split(/;/); $dc =~ s/-[0-9]+$//; if ($scheme) { $scheme =~ s/^/(SCHEME=/; $scheme =~ s/$/) /; } $DC{$_} =~ s/^/ / if $scheme; $meta = ""; if (length $meta > 100) { write; } else { print "$meta\n"; } if ($ssi && $dc eq 'DC.Description' && $scheme eq '') { $meta = ""; if (length $meta > 100) { write; } else { print "$meta\n"; } } if ($ssi && $dc eq 'DC.Subject' && $scheme eq '') { $meta = ""; if (length $meta > 100) { write; } else { print "$meta\n"; } } } print "\n" if ($ssi); exit 0; format = ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<~~ $meta .