#!/usr/bin/perl

# images2mylibrary.pl - harvest images metadata and import it into MyLibrary

# Eric Lease Morgan <emorgan@nd.edu>

# 2004-10-12 - first cut


=head1 NAME

images2mylibrary.pl - harvest images metadata and import it into MyLibrary

=head1 DESCRIPTION

The primary purpose of this program is to populate your MyLibrary database with the first 100 records harvested from two specific OAI-PMH data repositories. One at infomotions.com, and the other at memory.loc.gov.

If it doesn't already exist, the program will automatically create a facet/term combination called Formats/Images.

Only very basic meta-data is saved to the MyLibrary database like title, creator, description, and identifier.

=head1 TODO

This program includes many hard-coded values, and the program could be improved through a configuration section.

=head1 AUTHOR

Eric Lease Morgan

=cut



# include the necessary modules
use lib '../lib/';
use MyLibrary::Facet;
use MyLibrary::Resource;
use MyLibrary::Term;
use Net::OAI::Harvester;
use strict;
require 'subroutines.pl';

# display an introduction
&clearScreen;
print "\nIf they don't exist, this script will first create a facet called Formats.\n";
print "It will then create term called Images. The script will then harvest the\n";
print "image metadata from the Library of Congress as well as Infomtions and import\n";
print "it into MyLibrary according.\n\n";
print "Press enter (or return) to begin. "; <STDIN>;

# define the repositories
my %repositories = ('http://infomotions.com/gallery/oai/index.pl' => '',
                    'http://memory.loc.gov/cgi-bin/oai2_0'        => 'lcphotos');

# check for a facet called Formats
my $facet = MyLibrary::Facet->new;
if (! MyLibrary::Facet->get_facets(value => 'Formats', field => 'name')) {

	# create it
	$facet->facet_name('Formats');
	$facet->facet_note('This list of list of physical items embodying information.');
	$facet->commit;
	print "\nThe facet Formats was created.\n";

}

else {

	# already exists
	$facet = MyLibrary::Facet->new(name => 'Formats');
	print "\nThe facet Formats already exists.\n";
	
}
my $facetID = $facet->facet_id;

# check for a term named Images
my $term = MyLibrary::Term->new;
if (! MyLibrary::Term->get_terms(value => 'Images', field => 'name')) {

	# create it
	$term->term_name('Images');
	$term->term_note('These are things like photographs or paintings.');
	$term->facet_id($facetID);
	$term->commit;
	print "The term Images was created.\n";
	
}

else {

	# it already exists
	$term = MyLibrary::Term->new(name => 'Images');
	print "The term Images already exists.\n";
	
}
my $imageTermID = $term->term_id;

# process each repository
foreach my $repository (keys(%repositories)) {

	# get the set
	my $set = %repositories->{repository};
	
	# create a harvester and get the data
	my $harvester = Net::OAI::Harvester->new('baseURL' => $repository);
	my $records = $harvester->listAllRecords('metadataPrefix' => 'oai_dc', set => $set);

	# initialize a counter
	my $counter = 0;
	
	# process each record
	while ( my $record = $records->next() ) {
	
		$counter = $counter + 1;
		
		my $FKey         = $record->header->identifier;
		my $metadata     = $record->metadata();
		my $name         = $metadata->title();
		my @description  = $metadata->description();
		my $description  = join (' ', @description);
		my @subject      = $metadata->subject();
		my $description .= join (' ', @subject);
		my $publisher    = $metadata->publisher();
		my $creator      = $metadata->creator();
		my $location     = $metadata->identifier();
		print "$name...";
		
		# check to see if it already exits
		if (! MyLibrary::Resource->new(fkey => $FKey)) {
			
			# create it
			my $resource = MyLibrary::Resource->new;
			$resource->name($name);
			$resource->publisher($publisher);
			$resource->note($description);
			$resource->fkey($FKey);
			$resource->related_terms(new => [$imageTermID]);
			$resource->add_location(location => $location, location_type => 1);
			$resource->commit;
			print "added (", $resource->id, ").\n";
		
		}
		
		else {
		
			# already got it
			print "already exists.\n";

		}
		
		# don't get too many; this is only a demonstration
		last if ($counter == 1000);
		
	}
	
}

# done
print "\nDone\n";
exit;





