package Sequence;

##########################################################################
##### Author : Mira Kaloper	
##### Date   : April. 29th 2001
##### Description : This object contains methods for 
#####               retrieving sequences for a given 
#####               sequence name or a given chrnum/start_coord/
#####               stop_coord.
#####
##### See documentation for usage details:
#####
##### http:///usr/local/dicty/www_dictybase/db/lib/staff/dictyBase/programmer/Sequence.html
#####
#######################################################################

use strict;
use Bio::SeqIO;
use Bio::Seq;

use DBI;
use lib "/usr/local/dicty/www_dictybase/db/lib/common";
use TextUtil qw (DeleteUnwantedChar);
use lib "/usr/local/dicty/www_dictybase/db/lib/dictyBase/Objects";
use Display_seq;
use Chromosome; 
use Feature;
use ConfigPathdictyBase;



#######################################################################
sub new {      ############ constructor ###############################
#######################################################################
       
    my ($self, %args) = @_;

    $self = {};
    bless $self;


    $self->{'_dbh'} = $args{'dbh'};
    $self->{'_feature_name'} = $args{'feature_name'};
    $self->{'_chrnum'} = $args{'chrnum'};
    $self->{'_type'} = $args{'type'} || 'DNA coding sequence';

    #upsteram/downstream flanking basepairs are only for coding sequence
    if ($self->{'_feature_name'} 
    && ($self->{'_type'} eq 'Genomic DNA')) {
       $self->{'_upstream'} = $args{'upstream'};
       $self->{'_downstream'} = $args{'downstream'};
    } 

    $self->{'_beg'} = $args{'beg'};
    $self->{'_end'} = $args{'end'};
    $self->{'_rev'} = $args{'rev'};
    $self->{'_trans'} = $args{'trans'};
    $self->{'_sequence'} = $args{'sequence'};
    $self->{'_desc'} = $args{'desc'};
    $self->{'_format'} = $args{'format'} || 'fasta';
    $self->{'_display_id'} = $args{'display_id'} || $self->{'_feature_name'};

    $self->{'_alphabet'} = $args{'alphabet'};
    $self->{'_accession_number'} = $args{'accession_number'} || 'none'; 
    
    if (!$self->{'_feature_name'} && !$self->{'_chrnum'} && 
	!$self->{'_sequence'}) {
	print "You must pass a feature name, a chromosome number or DNA sequence to Sequence object. \n";
	return;
    }

    if (!$self->{'_sequence'} && !$self->{'_dbh'}) {
       print "You must pass dbh handle to Sequence object. \n"; 
       return;
    }


    if ($self->{'_feature_name'}) {
      $self->_getFeatureInfo;
    }

    $self->_checkRequest();


    if ($self->{'_sequence'} && $self->{'_trans'}) {
       $self->_translateSequence;
       return $self;
    }


    if (!$self->{'_sequence'}) {
       $self->_getSequence;
    }

    if (!$self->{'_sequence'}) {
       return;
    }

    return $self;
}

#######################################################################
sub _getSequence {
#######################################################################
    my ($self) = @_;

    #create sequence object;
    my $seqObj;
    my $sequence;

    if ($self->{'_feature_name'} 
    && $self->{'_upstream'} 
    && $self->{'_downstream'} 
    && $self->{'_type'} eq 'Genomic DNA') {

       $self->_getChromosomeSequence;

    } elsif ($self->{'_feature_name'}) {

         $self->_getFeatureSequence;

    } elsif ($self->{'_chrnum'}){

        $self->_getChromosomeSequence;

    } 

   if (!$self->{'_sequence'}) {
      return;
   }
 
   my $seqObj = Bio::Seq->new('-seq'=>$self->{'_sequence'},
                            '-desc'=>$self->{'_desc'},
                            '-display_id'=>$self->{'_display_id'},
                            '-accession_number'=>$self->{'_accession_number'},
                            '-alphabet'=>$self->{'_alphabet'});

   #cut subsequence when sequence requested...
   if ($self->{'_beg'} && $self->{'_end'}) {
       $seqObj = $seqObj->trunc($self->{'_beg'},$self->{'_end'});
   }

  #reverse sequence  
  if ($self->{'_rev'} == 1 ) {
     $seqObj = $seqObj->revcom;
  }

  $self->{'_sequence'} = $seqObj->seq(); # string of sequence

  #translate sequence
  if ($self->{'_trans'} == 1){
     $self->_translateSequence;
  }


  return $self->{'_sequence'};

}


#######################################################################
sub _translateSequence {
#######################################################################
  my $self = shift;


  my $seqObj = Bio::Seq->new('-seq'=>$self->{'_sequence'},
                            '-desc'=>$self->{'_desc'},
                            '-display_id'=>$self->{'_display_id'},
                            '-accession_number'=>$self->{'_accession_number'},
                            '-alphabet'=>$self->{'_alphabet'});

  #use differente translation when Mito
  if ($self->{'_chrnum'} == 17) {
      $seqObj = $seqObj->translate(undef,undef,undef,3);
  } else {
          $seqObj = $seqObj->translate();
  }

  $self->{'_sequence'} = $seqObj->seq(); 

}


#######################################################################
sub _getFeatureSequence{
#######################################################################
  my $self = shift;

  my $seqObj = Display_seq->new(dbh=>$self->{'_dbh'},
                              feature_no=>$self->{'_feature_no'},
                              display_seq_type=>$self->{'_type'});

  if (!$seqObj) {
     return;
  }

  my $sequence = $seqObj->display_seq;

  if (!$sequence) {
      print "Sequence doesn't exist for $self->{'_feature_name'}.\n";
      return;
  }

  #remove * from the end of Sequence when protein sequence...
#  if ($self->{'_type'} eq 'Protein') {
#     my $star = chop($sequence);

#     if ($star ne '*') {
#        $sequence .= $star;
#     } else {
#       $self->{'_protein_end'} = '*';
#     }
#  }

  $self->{'_sequence'} = $sequence;

}

#######################################################################
sub _getChromosomeSequence {
#######################################################################

  my $self = shift;

  my $seqObj = Chromosome->new(dbh=>$self->{'_dbh'},
                               chromosome=>$self->{'_chrnum'});

  my $sequence = $seqObj->chr_seq;

  if (!$sequence) {
     print "Sequence doesn't exist for $self->{'_chrnum'}.\n";
     return;
  }

  $self->{'_sequence'} = $sequence;
}


#######################################################################
sub _checkRequest {
#######################################################################
  my $self = shift;

  if ($self->{'_type'} eq 'Protein'  
  && ($self->{'_trans'} == 1)
  && ($self->{'_rev'} == 1)) {
     print "Cannot translate or reverse sequence for Protein. \n";
     return;
  } 



}

#######################################################################
sub _getFeatureInfo {
#######################################################################
  my ($self) = @_;

  my $featureObj = Feature->new(dbh=>$self->{'_dbh'}, 
                                feature_name=>$self->{'_feature_name'}); 
  
  if (!$featureObj) {
     print "You must pass feature_name to Sequence object.";
     return;
  }

  $self->{'_feature_no'} = $featureObj->feature_no;

  $self->{'_start'} = $featureObj->start_coord;
  $self->{'_stop'} = $featureObj->stop_coord;

  if ($self->{'_start'} > $self->{'_stop'}) {
     ($self->{'_start'},$self->{'_stop'})=($self->{'_stop'},$self->{'_start'}); 
  }

  if (($self->{'_type'} eq 'Genomic DNA') 
  && $self->{'_upstream'} 
  && $self->{'_downstream'}) {

     $self->{'_beg'} = $self->{'_start'} - $self->{'_downstream'};
     $self->{'_end'} = $self->{'_stop'} + $self->{'_upstream'};
  }


  $self->{'_chrnum'} = $featureObj->chromosome;
 
}


#######################################################################
sub getSequence {
#######################################################################
  my ($self) = @_;

  return $self->{'_sequence'};

}



########################################################################
sub fastaFormatedSequence {
########################################################################
  my $self = shift;

  return $self->_reformatSequence('fasta');

}


########################################################################
sub gcgFormatedSequence {
########################################################################
  my $self = shift;

  return  $self->_reformatSequence('gcg');

}


########################################################################
sub noheaderFormatedSequence {
########################################################################
  my $self = shift;

  return  $self->_reformatSequence('noheader');

}


########################################################################
sub _reformatSequence {
########################################################################
  my ($self, $type) = @_;

 
  my $seqObj = Bio::Seq->new('-seq'=>$self->{'_sequence'},
                            '-desc'=>$self->{'_desc'},
                            '-display_id'=>$self->{'_display_id'},
                            '-accession_number'=>$self->{'_accession_number'},
                            '-alphabet'=>$self->{'_alphabet'});


  #in order to pass fomatted sequence back to calling program have to
  #write into file, and later to read from file. 
  #this should be improved by using SeqIO 
  my $configPath = ConfigPathdictyBase->new;
  my $tmpDir = $configPath->tmpDir;

  my $tmpfile = $tmpDir."bioperl.tmp.$$";   # temp file

  my $subtype;

  if ($type eq 'noheader') {
     $type = 'fasta';
     $subtype = 'noheader';
  }

  my $fh = Bio::SeqIO->newFh(-file=>">$tmpfile", '-format'=>$type);


  print $fh $seqObj;

  #close ($fh); <- not working
  undef ($fh);

  open (IN, "$tmpfile") || die "Could not open file $tmpfile: $!...\n";


  my $sequence;

  #read first line when no header requested i.e. remove > from gcg display
  if ($subtype eq 'noheader') {
     <IN>;
  }

  while (<IN>) {
    $sequence.= $_;
  }

  close IN;

  return $sequence;

}


########################################################################
1;
########################################################################

=pod

=head1 Name

Sequence.pm    

=head1 Description

This perl object (Sequence.pm) contains method for retrieving gcg DNA/protein sequences or for translating DNA sequence to protein sequence. 

=head1 Instantiating a New Sequence Object

To instantiate a new Sequence object, you may use one of following syntaxes:

my $obj = Sequence->new( dbh => $dbh,
                         feature_name => $feature_name,
                         type => $type,
                         beg => $beg,
                         end => $end,
                         rev => $rev,
                         upstream=>$up,
                         downstream=>$down,
                         desc => $desc,
                         display_id => $display_id,
                         alphabet => $alphabet,
                         accession_number => $accession_number);

OR


my $obj = Sequence->new( dbh => $dbh,
                         chrnum => $chrnum,
                         beg => $beg,
                         end => $end,
                         rev => 1,
                         trans => 1,
                         display_id => $display_id);


OR

my $obj = Sequence->new( sequence => $sequence,
                         trans => 1);


###### $type = "Protein", "DNA coding sequence", "Genomic DNA",
               "Genomic DNA +/- 1000bp"  


###### $chrnum = chromosome number (1->17)

###### $beg = start_coord

###### $end = stop_coord

###### $rev = '1' or ''

###### $sequence = DNA sequence -- for protein translation 
 

=head1 Accessor Methods


=head2 getSequence

Usage:

my $sequence = $obj->getSequence;

This method returns unformatted requested sequence.

=head2 fastaFormatedSequence

Usage:

my $sequence = $obj->fastaFormatedSequence;

This method returns fasta formated sequence.

=head2 gcgFormatedSequence

Usage:

my $sequence = $obj->gcgFormatedSequence;

This method returns gcg formated sequence.

=head2 noheaderFormatedSequence

Usage:

my $sequence = $obj->noheaderFormatedSequence;

This method returns formated sequence without header.


=head1 Author

mkaloper@genome.stanford.edu

=cut








