#!/usr/bin/perl
package sdevParagraphUI;

###########################################################################
#
# Script Name: ParagraphUI.pm
# Programmer:  Kane Tse (KT)
# Creation:    Aug/2001
# Rev./Ver.:   Sep/2001 v
#
# Purpose:     
#                
#
# IMPORTANT NOTE:
#              
#                
#
# Inputs:      1 
#                
#
#                Filehandle:
#
# Outputs:     1 
#                
#
#                Filehandle:
#
#              2 
#                
#
#                Filehandle:
#
# Algorithms:  
#                
#
# Project:     dictyBase / Gene Summary Paragraphs / Oracle Conversion
#
# Notes:       
#                
#
# Dependencies: 
#                
#
# Assumptions: 
#                
#
# Other Related Files:
#              Scripts whose output upon which this script relies
#               --> 
#
#              Scripts that are known to rely upon this script's output  --
#               --> 
#
#              Scripts in the same process
#               --> 
#
#              Scripts in the same project for other external databases
#               (None currently known)
#
###########################################################################
use strict;

use CGI qw/:all :html3 :noDebug/;
use CGI::Carp qw(fatalsToBrowser);

use lib "/usr/local/dicty/www_dictybase/db/lib/dictyBase";
use dictyBaseObject;

use lib "/usr/local/dicty/www_dictybase/db/lib/dictyBase/Objects";
use ParagraphDS;
use Reference;
use ConfigURLdictyBase;

use Exporter();
use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);

# $VERSION = 1.00;
# @ISA = qw( Exporter );
# @EXPORT = qw( displayParagraph renderText displayPhrases);


###########################################################################
# Global Variables - Script Configuration
###########################################################################
my $dblink = "dictyBase";
my $configUrl = ConfigURLdictyBase->new();
my $dictyBaseServer = $configUrl->dictyBaseServerRoot;
my $dictyBaseCGI = $configUrl->dictyBaseCGIRoot;

my $refURL = $dictyBaseCGI . $dblink. "/reference/reference.pl?refNo=";
my $locusURL = $dictyBaseCGI . $dblink . "/locus.pl?locusNo=";
my $locusSearchURL = $dictyBaseCGI . $dblink . "/locus.pl?locus=";
my $goURL = $dictyBaseCGI . $dblink ."/GO/go.pl?goid=";

###########################################################################
# Global Variables - Used during program execution
###########################################################################
my $dbh;
my %refs;
my $refCount = 1;
my %pm2refno;

my $path = "/usr/local/dicty/www_dictybase/db/data/dictyBase";
my $boldFilename = "$path/interfaces/paragraphAutoBold.txt";
my $italicFilename = "$path/interfaces/paragraphAutoItalics.txt";

###########################################################################
# Static Class Variables - these are also global variables.
###########################################################################
my %bold;
my %italic;


###########################################################################
sub new {
###########################################################################
    my ($self, %args) = @_;

    $self = {};
    bless $self;

    open(BOLD_WORDS, $boldFilename)
	 || die "Unable to locate list of bold words in $boldFilename.\n";
    while (<BOLD_WORDS>) {
	my $line = $_;
	if ($line =~ /^\#/) { next; } # skip comment lines
	if ($line =~ /^\n/) { next; } # skip blank links

	$line =~ s/\"//g; # remove double-quotes
	chomp($line); # remove trailing \n character

	$line =~ s/^\s+//; # remove leading white space
	$line =~ s/\s+$//; # remove trailing white space

	$bold{$line} = 1;

    }
    close(BOLD_WORDS);

    open(ITALICIZED_WORDS, $italicFilename)
	 || die ("Unable to locate list of words to italicize in ".
		 "$italicFilename.\n");
    while (<ITALICIZED_WORDS>) {
	my $line = $_;
	if ($line =~ /^\#/) { next; } # skip comment lines
	if ($line =~ /^\n/) { next; } # skip blank links

	$line =~ s/\"//g; # remove double-quotes
	chomp($line); # remove trailing \n character

	$line =~ s/^\s+//; # remove leading white space
	$line =~ s/\s+$//; # remove trailing white space

	$italic{$line} = 1;

    }
    close(ITALICIZED_WORDS);

    return $self;

}

###########################################################################
sub displayParagraph {
###########################################################################
    my ($self, %args) = @_;

    my $outputString = "";

    my $locusNo = $args{'locusNo'};
    my $dictyBaseObject = $args{'dictyBaseObject'};
    $dbh = $args{'dbh'};
    my $paragraphObj = $args{'paragraphObj'};

    if (defined $dictyBaseObject) {
	if (!defined $dictyBaseObject->locusNo) {
	    return;
	}
	else {
	    $locusNo = $dictyBaseObject->locusNo;
	}

	if (!defined $dbh) {
	    $dbh = $dictyBaseObject->dbh();
	}
    }

    eval {
	if (!defined $paragraphObj) {
	    $paragraphObj = ParagraphDS->new(dbh=>$dbh, 
					     locusNo=>$locusNo
					    );
	}
	else {
	    $dbh = $paragraphObj->dbh;
	}
    };
    if ($@) {
	# print ("Error in ParagraphUI::displayParagraph().  Please ".
	#       "copy/paste this information to an dictyBase programmer.\n");
	# if script execution gets here, it probably means that there is no
	#   summary paragraph for this locus_no
	return;
    }

    my $paragraphText = $self->para2html($paragraphObj->getParagraphText(), 
					 dictyBaseObject=>$dictyBaseObject);

    my $paragraph = (a({name=>"summaryParagraph"}) . 
		     b("Summary: ") . "\n" .
		     $paragraphText . "\n" 
		    );

    # modification information:
    my $mod_date = (b("Created: ") . "\n" .
		      $paragraphObj->getDateWritten()
		    . " " . $paragraphObj->getWrittenBy()
		    . br . "\n" 
		   );

    $mod_date .= (b("Modified: ") . "\n" .
		  $paragraphObj->getLastModifiedDate()
		  . " " . $paragraphObj->getLastModifiedBy()
		  . br . "\n" 
		  . br . "\n\n"
		 );

    # load references from the database
    my $referenceListRef = $paragraphObj->getReferences();
    my @referenceList = @$referenceListRef;
    my $references = "";

    my $refno = 1;
    my @bgcolor = ("#FFFFFF", "#F5F5F5");

    my %references;

    foreach my $ref (@referenceList) {

	my $refObj;
	my $reference_no;
	if ($ref =~ /RF:(\d+)/) {
	    $reference_no = $1;
	    $refObj = Reference->new(dbh=>$dbh,
				     reference_no=>$reference_no);
	    $references{$reference_no} = 1;
	}
	elsif ($ref =~ /PM:(\d+)/) {
	    my $pubmed_id = $1;
	    $refObj = Reference->new(dbh=>$dbh,
				     pubmed=>$pubmed_id);
	    $reference_no = $refObj->reference_no();

	    if (defined $references{$reference_no}) {
		# don't print a reference line for this value if it has already
		#  been displayed previously (i.e. it was found as a REF_NO
		#  value, and now the same reference is listed a second time
		#  as a PUBMED id)
		next;
	    }

	}

	my $currBgcolor = (($refno % 2) == 1) ? $bgcolor[1] : $bgcolor[0];

	$references .= Tr(td({bgcolor=>$currBgcolor,
			      valign=>"top",
			      align=>"left"},
			     "$refno)"),
			  td({bgcolor=>$currBgcolor},
			     a(
			       {name=>"$reference_no"}
			      ).
			     $refObj->formatedCitation()
			    )
			 );
	$refno++;
    }

   $references = (b("Summary References:") . 
		  table({border=>0},
			$references
		       )
		 );

    $outputString .= (
		     $paragraph . p . "\n" 
		     . $mod_date . p . "\n"
		     . $references . "\n"
		    );

    return $outputString;
}

###########################################################################
sub setDbh {
###########################################################################
    my ($self, $newDbh) = @_;

    $dbh = $newDbh;
}



###########################################################################
sub renderText {
###########################################################################
    my ($self, $pText, $locusNo, $ref_numsRef) = @_;

    $pText = $self->para2html($pText, 
			      locusNo=>$locusNo, 
			      ref_numRef=>$ref_numsRef
			     );
    return $pText;
}

###########################################################################
sub displayPhrases {
###########################################################################
# This method displays a paragraph, but composes the paragraph from
# each phrase object individually.

    my ($self, %args) = @_;

    my $paragraphObj = $args{'paragraphObj'};
    my $locusNo = $args{'locusNo'};
    my $highlight = defined $args{'highlight'} ? $args{'highlight'} : -1;

    my $pText = "";

    my $phraseObjsRef = $paragraphObj->getPhrases();
    my @phraseObjs = @$phraseObjsRef;

    foreach my $phraseObj (@phraseObjs) {
	my $phrase 
	     = $self->para2html($phraseObj->getPhraseText(), 
				locusNo=>$locusNo);

	if ($highlight == $phraseObj->getPhraseOrder()) {
	    $phrase = font({-'style'=>"color:black;background-color:#ffff66"},
			    $phrase
			   );
	}
	$pText .= (
		   font({-size=>2,
			 -color=>"RED"
			},
			" [". ($phraseObj->getPhraseOrder+1) . code("... ")
		       ).
		   $phrase .
		   font({-size=>2,
			 -color=>"RED"
			},
			code(" ..."). ($phraseObj->getPhraseOrder+1) ."] "
		       )
		  );
    }

    return $pText;
}

###########################################################################
sub para2html {
###########################################################################
    my ($self, $pText, %args) = @_;

    my $locusNo;
    if (defined $args{'dictyBaseObject'}) {
	my $dictyBaseObject = $args{'dictyBaseObject'};
	$locusNo = $dictyBaseObject->locusNo;
    }
    elsif (defined $args{'locusNo'}){
	$locusNo = $args{'locusNo'};
    }

    my @ref_nums = ();
    my $ref_numsRef = \@ref_nums;
    if (defined $args{'ref_numRef'}) {
	$ref_numsRef = $args{'ref_numRef'};
    }

    #
    # remove any duplicated that follow one-after-another tags
    # $pText =~ s/(\<[\/A-Za-z0-9\:\#\_\-\']+>)$1+/$1/g;

    # italicize the primary locus, but don't hyperlink it to its locus page.
    $pText =~ s/\<locus\d{2}:$locusNo\>([A-Za-z0-9\,\-\']+)\<\/locus\>/\<i\>$1\<\/i\>/g;

    # de-italicize the primary locus name where it is a protein name with a 'p'
    #   postscript appended to it.
    $pText =~ s/\<i\>([A-Za-z0-9\,\-\']+)\<\/i\>p/$1p/ig;


    # hyperlink primary and secondary loci (that is not the current locus)
    #   and italicize
    $pText =~ s/\<locus\d{2}:(\d+)\>/\<a href=\"$locusURL$1\"\>\<i\>/g;
    $pText =~ s/\<\/locus\>/\<\/i\>\<\/a\>/g;

    # remove italicization for protein names
    $pText =~ s/\<a href=\"([A-Za-z0-9\,\.\:\/\-\_\%\$\&\?\=]+)\"\>\<i\>([A-Za-z0-9\,\-\']+)<\/i\>\<\/a\>p/\<a href=\"$1\"\>$2\<\/a\>p/ig;

    # for loci with unknown locus_no
    $pText =~ s/\<locus\d{2}:\#\>([a-zA-Z0-9\-\'\,]*)\</\<a href=\"$locusSearchURL$1\"\>\<i\>$1\<\/i\>\</g;

    foreach my $word (split(/[^A-Za-z0-9_:\<\>]/, $pText)) {
	# references
	if ($word =~ /\<ref_no:(\d+)\>/) {
	    my $refno = $refCount;
	    # this is not the same value as REFERENCE_NO in the database, this
	    #   is meant to refer to the internal number of referneces within
	    #   the paragraph text (i.e this is the first reference, 
	    #   second... etc)

	    if (defined $refs{$1}) {
		$refno = $refs{$1};
	    }
	    else {
		$refs{$1} = $refCount;
		$refno = $refCount;
		$refCount++;
	    }
	    $pText 
		 =~ s/\<ref_no:($1)\>/\<a href=\"\#$1\"\>$refno\<\/a>/g;
	    push (@$ref_numsRef, $refno);

	}
	elsif ($word =~ /\<ref_pm:(\d+)\>/) {
	    my $refno = $refCount;

	    # we need to translate the pubmed id into a reference no to
	    #   see if this reference has already been mentioned earlierin this
	    #   paragraph or not.

	    my $reference_no;
	    if (defined $pm2refno{$1}) {
		$reference_no = $pm2refno{$1};
	    }
	    else {
		my $referenceObj = Reference->new(dbh=>$dbh,
						  pubmed=>$1);
		$reference_no = $referenceObj->reference_no();
		$pm2refno{$1} = $reference_no;
	    }

	    if (defined $refs{$reference_no}) {
		$refno = $refs{$reference_no};
	    }
	    else {
		$refs{$reference_no} = $refCount;
		$refno = $refCount;
		$refCount++;
	    }
	    $pText 
		 =~ s/\<ref_pm:($1)\>/\<a href=\"\#$reference_no\"\>$refno\<\/a>/g;
	    push (@$ref_numsRef, $refno);
	}
    }

    # identify GO terms
    $pText =~ s/\<go:(\d+)\>/<a href=\"$goURL$1\"\>/g;
    $pText =~ s/\<\/go\>/\<\/a\>/g;

    # italicized words
    foreach my $italWord (keys %italic) {
	$pText =~ s/$italWord/\<i\>$italWord\<\/i\>/g if (!$pText=~ /\/$italWord\//);
    }

    # bolded words
    foreach my $boldWord (keys %bold) {
	$pText =~ s/$boldWord/\<b\>$boldWord\<\/b\>/g;
    }

    return $pText;

}

###########################################################################
sub displayAutoMarkedupTerms {
###########################################################################
    my ($self, %args) = @_;

    my $outputString = "";

    # auto-bolded strings
    open(BOLD_WORDS, $boldFilename)
	 || die "Unable to locate list of bold words in $boldFilename.\n";
    my $boldFile;
    while (<BOLD_WORDS>) {
	my $line = $_;
	$boldFile .= $line;

    }
    close(BOLD_WORDS);

    # auto-italicized strings
    open(ITALICIZED_WORDS, $italicFilename)
	 || die "Unable to locate list of words to italicize in $italicFilename.\n";
    my $italicFile;
    while (<ITALICIZED_WORDS>) {
	my $line = $_;
	$italicFile .= $line;

    }
    close(ITALICIZED_WORDS);

    $boldFile =~ s/%INSERT_THIS_FILENAME_HERE%/$boldFilename/;
    $italicFile =~ s/%INSERT_THIS_FILENAME_HERE%/$italicFilename/;

    $outputString .= hr. b("List of bolded words"). br;
    $outputString .= pre($boldFile). hr;
    $outputString .= b("List of italicized words"). br;
    $outputString .= pre($italicFile);

    return $outputString;
}

1;
