#!/usr/bin/perl
package Motif;

##########################################################################
##### Author :	Shuai Weng
##### Date   :  Dec. 2001
##### Description : This object contains methods for 
#####               retrieving motif info for a given 
#####               sequence name.
#####
##### See documentation for usage details:
#####
##### http:///usr/local/dicty/www_dictybase/db/lib/staff/dictyBase/programmer/Motif.html
#####
#######################################################################
use strict;
use DBI;
use lib "/usr/local/dicty/www_dictybase/db/lib/common";
use TextUtil qw (DeleteUnwantedChar);
use lib "/usr/local/dicty/www_dictybase/db/lib/dictyBase/Objects";
use ConfigPathdictyBase;


#######################################################################
######################## global variables #############################
my $configPath = ConfigPathdictyBase->new;
my $dataDir = $configPath->dataDir."sacch3d/emotif/";

#######################################################################
sub new {      ############ constructor ###############################
#######################################################################
       
    my ($self, %args) = @_;

    $self = {};
    bless $self;

    $self->{'_seqname'} = $args{'seqname'};
    if (!$self->{'_seqname'}) {
	print "You must pass a sequence name to Motif object.";
	return;
    }

    return $self;
}

#######################################################################
sub hasMotif {
#######################################################################
    my ($self) = @_;

    my ($self, $showNm) = @_;
    my $datafile = $dataDir."emotif.data";
    open(IN, "$datafile") || 
	die "Can't open '$datafile' for reading:$!\n";
    my $found;
    while(<IN>) {
	my ($orf, $others) = split(/\t/);
	if (uc($orf) eq uc($self->{'_seqname'})) {
	    $found++;
	    last;
	}
    }
    close(IN);
    return $found;
}

########################################################################
sub LastUpdate {
########################################################################
    my ($self) = @_;

    my $datafile = $dataDir."emotif.data";

    my %mon2num = ('Jan'=>'01',
		   'Feb'=>'02',
		   'Mar'=>'03',
		   'Apr'=>'04',
		   'May'=>'05',
		   'Jun'=>'06',
		   'Jul'=>'07',
		   'Aug'=>'08',
		   'Sep'=>'09',
		   'Oct'=>'10',
		   'Nov'=>'11',
		   'Dec'=>'12');

    my $date = `/bin/ls -l $datafile`;

    my @date = split(' ', $date);

    my $mon = $mon2num{$date[5]};

    my $day = $date[6];

    if (length($day) == 1) { $day = "0".$day; }

    my $year = $date[7];

    if ($year =~ /\:/) {

	$year = `/usr/bin/date`;

	$year =~ s/^.+ ([0-9]{4})$/$1/;
	
	chomp $year;
	
    }
    return "$year-$mon-$day";

}

########################################################################
sub getSequence {
########################################################################
    my ($self) = @_;

    my $sequencefile = $dataDir."Dictyostelium.tfa";

    open(SEQ, "$sequencefile") ||
	die "Can't open '$sequencefile' for reading:$!\n";
    my $found;
    my $sequence;
    while(<SEQ>) {
	chomp;
	if (/^>ORFP:$self->{'_seqname'} /i) {
	    $found++;
	    next;
	}
	elsif (/^>/i) {
	    $found = 0;
	    next;
	}
	if ($found) {
	    $sequence .= $_; 
	}
	if ($sequence && !$found) { last; }    
    }
    close(SEQ);
    return $sequence;
}

########################################################################
sub getMotifInfo {
########################################################################
    my ($self) = @_;

    my $datafile = $dataDir."emotif.data";

    open(DATA, "$datafile") || 
	die "Can't open '$datafile' for reading:$!\n";

    my (@motif, @relatedProteinMotif);

    my %foundMotif;
    while(<DATA>) {
	chomp;
	my ($orfNm, $orflen, $motifNm, $pvalue, $orfBeg, 
	    $orfEnd, $linkUrl, $motifDesc) = split(/\t/);
	if (uc($orfNm) eq uc($self->{'_seqname'})) {
	    push(@motif, $_);
	    $foundMotif{$motifNm}++;
	}
    }
    
    if (!@motif) { 
	close(DATA);
	return;
    }

    seek(DATA, 0,0);
    my %rows4orf;
    my $currMatchOrf;
    while(<DATA>) {
	chomp;
	my ($orfNm, $orflen, $motifNm, $pvalue, $orfBeg, 
	    $orfEnd, $linkUrl, $motifDesc) = split(/\t/);
	$rows4orf{$orfNm} .= "::".$_;
	if (uc($orfNm) ne uc($self->{'_seqname'}) && 
	        $foundMotif{$motifNm}) {
	    $currMatchOrf = $orfNm;
	    $rows4orf{$orfNm} =~ s/^:://;
	    my @row = split(/::/, $rows4orf{$orfNm});
	    push(@relatedProteinMotif, @row);
	}
	elsif ($orfNm eq $currMatchOrf) {
	    push(@relatedProteinMotif, $_);
	}
    }
    close(DATA);
    return (\@motif, \@relatedProteinMotif);
}



########################################################################
1;
########################################################################

=pod

=head1 Name

Motif.pm    

=head1 Description

This perl object (Motif.pm) contains method for retrieving motif info for a given sequence name.

=head1 Instantiating a New Motif Object

To instantiate a new Motif object, you may use following syntax:

my $obj = Motif->new(seqname=>$seqname);


=head1 Accessor Methods


=head2 hasMotif

Usage:

my $hasMotif = $obj->hasMotif;

This method returns true if there is motif info associated with a given sequence (ORF) name, otherwise returns false. 


=head2 getSequence 

Usage:

my $sequence = $obj->getSequence;

This method returns protein sequence for a given sequence (ORF) name. 

=head2 getMotifInfo 

Usage:

my ($motifRef, $relatedProteinMotifRef) = $obj->getMotifInfo;

This method returns an array ref of motif info for a given sequence (ORF) name and an array ref of related protein motif info.  


=head1 Author

shuai@genome.stanford.edu

=cut








