#!/usr/bin/perl
package Pubmed;

##########################################################################
##### Author :	Shuai Weng
##### Date   :  Jan. 2001
##### Description : This package contains all necessary methods for dictyBase
#####               curators to create pubmed reference.
#####
#####  Usage: 
#####  use Pubmed;
#####  my $refObject = new Pubmed(database=>$database,
#####                             pubmed=>$pubmed,    
#####                             user=>$user,
#####                             source=>$source);
#####
#####  my $refObject = new Pubmed(dbh=>$dbh,
#####                             pubmed=>$pubmed,    
#####                             user=>$user,
#####                             source=>$source);
#####
#####  pass dbh or database
#####  The parameter source is optional. The default source is 'PubMed'
#####  
#####  my $refNo = $refObject->referenceNo;           
#######################################################################
use strict;
use DBI;
use lib "/usr/local/dicty/www_dictybase/db/lib/common";
use Login qw (ConnectToDatabase);
use lib "/usr/local/dicty/www_dictybase/db/lib/dictyBase";
use dictyBaseCentralMod qw(:getInfo);
use MedlineParse qw( GetMedlineContent ParseTitle ParseAuthors ParseAbstract ParseDatePublished ParseEntryDate ParseYear ParseJournal ParseVolume ParsePages ParseIssue ParseMedlineID ParsePubMed ParsePubTypes ParsePST ParseLastRevision ParseUrl CreateCitation );
use TextUtil qw( DeleteUnwantedChar );

#######################################################################
#################### global variables #################################
#######################################################################

my $dbh;
my $createRefUrl;
my $begMedlineUrl;
my $endMedlineUrl;

my %AuthorNo4Name;
my %JournalNo4Abbrev;
my %ReferenceNo4Pubmed;
my %ISSN4Abbrev;
my %FullName4Abbrev;
my %foundPubType;

my $journalFile = "/share/dictyBase/data/J_Medline.txt";

#######################################################################
sub new {      ############ constructor ###############################
#######################################################################
       
	my ($self, %args) = @_;

	$self = {};
	bless $self;

	$self->{'_pubmed'} = $args{'pubmed'};
	$self->{'_source'} = $args{'source'};
	if (!$self->{'_source'}) {
	    $self->{'_source'} = "PubMed";
	}
	$dbh = $args{'dbh'};
	if (!$dbh) {
	    $self->{'_database'} = $args{'database'};
	    my $user = $args{'user'};
	    $user = "\U$user";
	    if (!$user) {
		print "You have to pass the user name to this object.";
		return;
	    }
	    my ($dbuser, $dbpasswd) = &getUsernamePassword($user, 
							   $self->database);
	    if (!$dbuser || !$dbpasswd) {
		print "You may need to login to curator central page before you can use this object.";
	        return;
	    }
	    $dbh = &ConnectToDatabase($self->database, $dbuser, $dbpasswd);
        }
        $self->createReference;
    	return $self;
}

sub database { $_[0]->{_database} }

######################################################################
sub createReference {
######################################################################

    	my ($self) = @_;
	my $refNo = $self->getRefNoBYpubmed($self->pubmed);
	if ($refNo) {
	    $self->{'_referenceNo'} = $refNo;
	    $self->{'_error'} = "::refexist";
	    return;
	}
	if ($self->{'_source'} !~ /^YPD/i) {
	    $begMedlineUrl = 'http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Text&db=PubMed&uid=';
	    $endMedlineUrl = '&dopt=Medline';
	}
	else {
	    $begMedlineUrl = "http://www.ncbi.nlm.nih.gov/htbin-post/Entrez/query?db=m&form=6&uid=";
	    $endMedlineUrl = '&dopt=l';
	}
	$self->parseJournalFile;
	$self->getPubTypeList;
	my $content = &GetMedlineContent($self->pubmed);

	$self->parseDataInsertData($self->pubmed, \$content);
}

########################################################################
sub parseDataInsertData {
########################################################################
        my ($self, $pubmed, $contentRef) = @_;
        my $paperTitle = &ParseTitle($contentRef);
        &DeleteUnwantedChar(\$paperTitle);

        my $pst = &ParsePST($contentRef);
        &DeleteUnwantedChar(\$pst) if $pst;

        my $datePublished = &ParseDatePublished($contentRef);
        &DeleteUnwantedChar(\$datePublished);
        my $entryDate = &ParseEntryDate($contentRef); 
        &DeleteUnwantedChar(\$entryDate);

        ############ Insert into journal table
        my $journal = &ParseJournal($contentRef);
        &DeleteUnwantedChar(\$journal);
        my $journalNo = $self->insertJournal($journal);
        my @authors = &ParseAuthors($contentRef);   

        my $year = &ParseYear($contentRef);
        &DeleteUnwantedChar(\$year);

        my $volume = &ParseVolume($contentRef);
        &DeleteUnwantedChar(\$volume);

        my $issue = &ParseIssue($contentRef);  
        &DeleteUnwantedChar(\$issue);

        my $page = &ParsePages($contentRef);
        &DeleteUnwantedChar(\$page);
   
        my $dateRevised = &ParseLastRevision($contentRef);
        &DeleteUnwantedChar(\$dateRevised);

	my $thisPubmed = &ParsePubMed($contentRef);
	if ($thisPubmed != $pubmed) {
	    $pubmed = $thisPubmed;
	    my $refNo = $self->getRefNoBYpubmed($thisPubmed, "pubmed");
	    $self->{'_pubmed'} = $thisPubmed;
	    if ($refNo) {
		$self->{'_referenceNo'} = $refNo;
		$self->{'_error'} .= "::refexist";
		return;
	    }
	}
	if (!$paperTitle) {
	    $self->{'_error'} .= "::notitle";
	    return ;
	}
        my $citation = &CreateCitation(\$year, \$paperTitle, \$journal, 
				   \$volume, \$issue, \$page, \@authors);

	if (!$citation) { 
	    $self->{'_error'} .= "::nocitation";
	    return ;
	}
        ############ Insert into reference table
        my $refNo = $self->insertReference($self->{'_source'}, 
				       "Published", $citation, $year, 
				       $pubmed, $datePublished, 
				       $dateRevised, $issue, $page, 
				       $volume, $paperTitle, $journalNo);    
	
	if (!$refNo) {
	    return;
	}
	$self->{'_referenceNo'} = $refNo;
	

	############ Insert into author and author_editor tables
	my $authorOrder;
	foreach my $author (@authors) {
	    &DeleteUnwantedChar(\$author);
	    $authorOrder++;
	    my $authorNo = $self->insertAuthor($author);
	    if ($authorNo) {
		$self->insertAuthorEditor($authorNo, $refNo, "Author", 
					  $authorOrder);
	    }
	}

	############ Insert into abstract table
	my $abstract = &ParseAbstract($contentRef);
	&DeleteUnwantedChar(\$abstract);
	if ($abstract) {
	    my $ABScheck = $self->insertAbstract($refNo, $abstract);
	}
	############ Insert into publication_type table
	my @pubTypes = &ParsePubTypes($contentRef);
	foreach my $pubtype (@pubTypes) {
	    &DeleteUnwantedChar(\$pubtype);
	    $pubtype = "\u\L$pubtype";
	    my $err = $self->insertPublicationType($refNo, $pubtype);
	}

	############ Insert into external_id table
	my $medlineID = &ParseMedlineID($contentRef);
	&DeleteUnwantedChar(\$medlineID);
	$self->{'_medline'} = $medlineID;
	$self->insertExternalID($refNo, $pubmed, $medlineID);

	############ Insert into url and ref_url tables
	my $url = &ParseUrl($contentRef);
	&DeleteUnwantedChar(\$url);
	if ($url) {
	    $self->insertUrl($refNo, $url);
	}
	return ;
}

########################################################################


########################################################################
sub parseJournalFile {
########################################################################
    my ($self) = @_;
    my $tmpfile = "/var/tmp/journal.$$";
    system("/bin/tr '\12' '\15' < $journalFile > $tmpfile");
    open(IN, "$tmpfile") || 
        die "Can't open '$tmpfile' for reading:$!\n";     
    while(<IN>) {
	my @journalRecord = split(/JrId:/);
	foreach my $record (@journalRecord) {
	    my @line = split(/\15/, $record);
	    my ($abbrev, $issn, $Jtitle);
	    foreach my $line (@line) {
		if ($line =~ /^JournalTitle: (.+)$/i) {
		    $Jtitle = $1;
		}
		elsif ($line =~ /^MedAbbr: (.+)$/i) {
		    $abbrev = $1;
		}
		elsif ($line =~ /^ISSN: (.+)$/i) {
		    $issn = $1;
		}
	    }
	    $ISSN4Abbrev{"\U$abbrev"} = $issn;
	    $FullName4Abbrev{"\U$abbrev"} = $Jtitle;
	}
    }
    close(IN);
    unlink("$tmpfile");
}

########################################################################
sub getAuthorNoBYauthorNm {
########################################################################
    my ($self, $authorNm) = @_;
    my $sth = $dbh->prepare("
        SELECT author_no
        FROM   CGM_DDB.author
        WHERE  upper(author_name) = ?
    ");
    $authorNm = "\U$authorNm";
    $sth->execute($authorNm);
    my $authorNo = $sth->fetchrow;
    $sth->finish;
    return $authorNo;
}

########################################################################
sub getJournalNoBYabbrev {
########################################################################
    my ($self, $abbrev) = @_;
    my $sth = $dbh->prepare("
        SELECT journal_no
        FROM   CGM_DDB.journal
        WHERE  upper(abbreviation) = ?
    ");
    $abbrev = "\U$abbrev";
    $sth->execute($abbrev);
    my $journalNo = $sth->fetchrow;
    $sth->finish;
    return $journalNo;
}

########################################################################
sub getRefNoBYpubmed {
########################################################################
    my ($self, $pubmedORmedNo, $type) = @_;
    my $sth = $dbh->prepare("
        SELECT reference_no
        FROM   CGM_DDB.reference
        WHERE  pubmed = ?
    ");
    $sth->execute($pubmedORmedNo);
    my $refNo = $sth->fetchrow;
    if ($refNo) {
	$sth->finish;
	return $refNo;
    }
    if (!$type) {
	$sth = $dbh->prepare("
              SELECT primary_key
              FROM   CGM_DDB.external_id 
              WHERE  external_id = ?
              AND    source = 'Medline'
              AND    tab_name = 'REFERENCE'
              AND    primary_key in 
                     (select reference_no from CGM_DDB.reference)
        ");
	$sth->execute($pubmedORmedNo);
	$refNo = $sth->fetchrow;
	$sth->finish;
    }
    return $refNo;
}

#########################################################################
sub getPubTypeList {
#########################################################################
    my ($self) = @_;
    my $sth = $dbh->prepare("
        SELECT tab_name, code_value
        FROM   CGM_DDB.code
        WHERE  col_name = 'PUB_TYPE'
        AND    tab_name IN ('PUBLICATION_TYPE', 'RELATED_REF')
    ");
    $sth->execute();    
    while (my ($tabNm, $codeVal) = $sth->fetchrow()){
	$foundPubType{$codeVal} = $tabNm;
    }
    $sth->finish();
}

#########################################################################
sub getUrlNoByUrl {
#########################################################################
    my ($self, $url) = @_;
    $url = "\U$url";
    my $sth = $dbh->prepare("
        SELECT url_no
        FROM   CGM_DDB.url
        WHERE  upper(url) = ?
        AND    url_type = 'Reference full text'
    ");
    $sth->execute($url);
    my $urlNo = $sth->fetchrow;
    $sth->finish;
    return $urlNo;
}

########################################################################
sub insertAuthor {
########################################################################
    my ($self, $author) = @_;
    if ($self->getAuthorNoBYauthorNm($author)) {
	return $self->getAuthorNoBYauthorNm($author);
    }
    my $insertSth = $dbh->prepare("
        INSERT INTO CGM_DDB.author(author_no, author_name)
        VALUES(CGM_DDB.authorno_seq.nextval, ?)
    ");
    eval {$insertSth->execute($author); };
    if ($@) {
	$insertSth->finish;
	$self->{'_error'} .= "::author:$@"; 
	return ;
    }
    else {
	$dbh->commit;
	$insertSth->finish;
	my $sth = $dbh->prepare("
            SELECT CGM_DDB.authorno_seq.currval
            FROM   dual
        ");
	$sth->execute;
	my $authorNo = $sth->fetchrow();
	$sth->finish;
	return $authorNo;
    }
}

########################################################################
sub insertJournal {
########################################################################
    my ($self, $journal) = @_;
    my $issn = $ISSN4Abbrev{"\U$journal"};
    my $Jtitle = $FullName4Abbrev{"\U$journal"};
    if ($self->getJournalNoBYabbrev($journal)) {
	return $self->getJournalNoBYabbrev($journal);
    }
    my $insertSth = $dbh->prepare("
        INSERT INTO CGM_DDB.journal(journal_no, full_name, abbreviation, issn)
        VALUES(CGM_DDB.journalno_seq.nextval, ?,?,?)
    ");
    eval { $insertSth->execute($Jtitle, $journal, $issn); };
    if ($@) {
	$insertSth->finish;
	$self->{'_error'} .= "::journal:$@";
	return ;
    }
    else {	
	$dbh->commit;
	$insertSth->finish;
	my $sth = $dbh->prepare("
            SELECT CGM_DDB.journalno_seq.currval
            FROM   dual
        ");
	$sth->execute;
	my $journalNo = $sth->fetchrow();
	$sth->finish;
	return $journalNo;
    }
}

#######################################################################
sub insertReference {
#######################################################################
    my ($self, $refSrc, $status, $citation, $year, $pubmed, 
	$datePublished, $dateRevised, $issue, $page, $volume,
	$title, $journalNo, $bookNo) = @_;
    my $insertSth = $dbh->prepare("
        INSERT INTO CGM_DDB.reference(reference_no, ref_source, status,
                 citation, year, pubmed, date_published, date_revised,
                 issue, page, volume, title, journal_no, book_no)
        VALUES(CGM_DDB.refno_seq.nextval, ?,?,?,?,?,?,?,?,?,?,?,?,?)
    "); 
    eval { $insertSth->execute($refSrc, $status, $citation, $year, $pubmed, $datePublished, $dateRevised, $issue, $page, $volume, $title, $journalNo, $bookNo); };
    if ($@) {
	$insertSth->finish;
	$self->{'_error'} .= "::reference:$@";
        return ;
    }
    else {
	$dbh->commit;
	$insertSth->finish;
	my $sth = $dbh->prepare("
            SELECT CGM_DDB.refno_seq.currval
            FROM   dual
        ");
	$sth->execute;
	my $refNo = $sth->fetchrow();
	$sth->finish;
	return $refNo;
    }
}

########################################################################
sub insertAuthorEditor {
########################################################################
    my ($self, $authorNo, $refNo, $authorType, $order) = @_;
    my $insertSth = $dbh->prepare("
        INSERT INTO CGM_DDB.author_editor(author_no, reference_no, 
                 author_type, author_order)
        VALUES(?,?,?,?)
    ");
    eval{ $insertSth->execute($authorNo, $refNo, $authorType, $order);};
    if ($@) {
	$insertSth->finish;
	$self->{'_error'} .= "::editor:$@";
	return ;
    }
    else {
	$dbh->commit;
	$insertSth->finish;
	return 1;
    }
}


#######################################################################
sub insertAbstract {
#######################################################################
    my ($self, $refNo, $abstract) = @_;
    my $insertSth = $dbh->prepare("
        INSERT INTO CGM_DDB.abstract(reference_no, abstract)
        VALUES(?,?)
    ");
    eval { $insertSth->execute($refNo, $abstract);};
    if ($@) {
	$insertSth->finish;
	$self->{'_error'} .= "::abstract:$@";
	return ;
    }
    else {
	$dbh->commit;
	$insertSth->finish;
	return 1;
    }
}

########################################################################
sub insertPublicationType {
########################################################################
    my ($self, $refNo, $type) = @_;
    if (!$foundPubType{$type}) {  
	return "pub_type not found in code table"; 
    }
    my $sth = $dbh->prepare("
        INSERT INTO CGM_DDB.publication_type(reference_no, pub_type)
        VALUES(?,?)
    ");
    eval { $sth->execute($refNo, $type); };
    $sth->finish;
    if ($@) {
	$self->{'_error'} .= "::pubtype:$@";
	return ;
    }
    else {
	$dbh->commit;
	return 1;
    }
}

########################################################################
sub insertUrl {
########################################################################
    my ($self, $refNo, $url) = @_;
    my $urlNo = $self->getUrlNoByUrl($url);
    if (!$urlNo) {
	my $sth = $dbh->prepare("
            INSERT INTO CGM_DDB.url(url_no, url, url_type)
            VALUES(CGM_DDB.urlno_seq.nextval,?, 'Reference full text')
        ");
        eval { $sth->execute($url); };
        $sth->finish;
        if ($@) {
	    $self->{'_error'} .= "::url:$@";
	    return;
	}
	else {
	    $dbh->commit;
	    $sth = $dbh->prepare("
                SELECT CGM_DDB.urlno_seq.currval
                FROM   dual
            ");
	    $sth->execute;
	    $urlNo = $sth->fetchrow();
	    $sth->finish;
        }
    }
    my $sth = $dbh->prepare("
        INSERT INTO CGM_DDB.ref_url(reference_no, url_no)
        VALUES (?,?)
    ");
    eval { $sth->execute($refNo, $urlNo); };
    if ($@) {
	$self->{'_error'} .= "::ref_url:$@";
	$sth->finish;
	return ;
    }
    return 1;
}


########################################################################
sub insertExternalID {
########################################################################
    my ($self, $refNo, $pubmed, $medline) = @_;
    my $sth;
    if ($medline) {
	$sth = $dbh->prepare("
	    INSERT into CGM_DDB.external_id (external_id_no, external_id, 
                                          source, tab_name, primary_key)
	    VALUES (CGM_DDB.extidno_seq.nextval, ?, 'Medline', 'REFERENCE', ?)
        ");
	eval { $sth->execute($medline, $refNo); };
	$sth->finish;
	if (!$@) {
	    $dbh->commit;
	}
    }
    $sth = $dbh->prepare("
        INSERT into CGM_DDB.external_id (external_id_no, external_id, 
                                      source, tab_name, primary_key)
        VALUES (CGM_DDB.extidno_seq.nextval, ?, 'PubMed', 'REFERENCE', ?)
    ");
    eval { $sth->execute($pubmed, $refNo); };
    $sth->finish;
    if ($@) {
	$self->{'_error'} .= "::externalid:$@";
	return ;
    }
    else {
	$dbh->commit;
	return 1;
    }
}
sub pubmed { $_[0]->{_pubmed} }
sub medline { $_[0]->{_medline} }
sub referenceNo {$_[0]->{_referenceNo} } 
sub error { $_[0]->{_error} }
 
########################################################################
1;
########################################################################

=pod

=head1 Name

Pubmed.pm    

=head1 Description

This perl object (Pubmed.pm) encapsulates data and methods for creating all
reference related information in oracle database for a specified pubmed reference. Once an object has been instantiated, the medline text will be retrieved from NCBI automatically, reformated, and inserted into various tables if the reference is not already in database. The method is available for you to retrieve the reference_no for the newly created or available reference. 

=head1 Instantiating a New Pubmed Object

To instantiate a new Pubmed object, you may use one of following syntaxes:

my $refObject = Pubmed->new(dbh=>$dbh,
			       pubmed=>$pubmed,
                               source=>$source);

my $refObject = Pubmed->new(database=>$database,
                               user=>$user,
			       pubmed=>$pubmed,
                               source=>$source);


The parameter source is optional. The default is 'PubMed'.


=head1 Accessor Methods

=head2 referenceNo 

Usage :

my $refNo = $refObject->referenceNo;

This accessor returns reference_no for a available or newly created reference otherwise returns a boolean false.

=head2 pubmed 

Usage :

my $pubmed = $refObject->pubmed;

This accessor returns pubmed id for a newly created reference otherwise returns a boolean false.

=head2 medline 

Usage :

my $medline = $refObject->medline;

This accessor returns medline id for a newly created reference otherwise returns a boolean false.


=cut





