#!/usr/bin/perl
package Sequence;


#######################################################################
#    Author :  Mira Kaloper 
#    Date   :  Apr. 1st 2002
#    Description : This package contains all necessary methods for
#                  formating sequence 
#
#######################################################################
use strict;
use DBI;
use CGI qw/:all/;
use CGI::Carp qw(fatalsToBrowser);

use lib "/usr/local/dicty/www_dictybase/db/lib/dictyBase/Objects";
use Reflink;

#######################################################################
sub new {     
#######################################################################

  my ($self, %args) = @_;

  $self = {};

  bless $self;

  $self->{'_dbh'} = $args{'dbh'}; 
  $self->{'_map'} =$args{'map'};
  $self->{'_chrnum'} = $args{'chrnum'};
  $self->{'_chr'} = $args{'chr'};
  $self->{'_beg'} = $args{'beg'};
  $self->{'_end'} = $args{'end'};
  $self->{'_seqname'}=$args{'seqname'};
  $self->{'_raw_seq_type'}=$args{'raw_seq_type'};
  $self->{'_rev'} =$args{'rev'};
 
  if ($self->{'_beg'} > $self->{'_end'}) {
     $self->{'_strand'} = "watson";
  } else {
     $self->{'_strand'} = "crick"; 
  } 

  return $self;
}



########################################################################
sub formatSequence{
########################################################################
  my ($self, $sequence) = @_;

  my $header = $self->getFASTAHeader;

  print $header, br;

  my $offset = 0;
  my $size = 60;
  my $chunk;

  while ($chunk = substr($sequence,$offset, $size)) {

    $offset += $size;

    print "<pre>";
    print $chunk, br;
    print "<\pre>";

  }


}


########################################################################
sub getFASTAHeader{
########################################################################
  my $self = shift;

  my ($name, $chr, $reverse);

  if ($self->{'_seqname'}) {
      $name = $self->{'_seqname'} . ", ";
  }

  $chr = $self->{'_chrnum'};

  if ($self->{_rev} eq '-REV') {
     $reverse = ", reverse complement";
  }

  $reverse;

  $reverse = ", reverse complement"; 
  return "$name Chr $chr $reverse";   
}


########################################################################
sub formatSequenceGCG{
########################################################################
  my ($self,$sequence) = @_;

  my $chromosome = $self->getChromosome;
  my $citation = $self->getCitation;
  my $checkSum = $self->getCheckSum($sequence);


  print table({-border=>0,-width=>'600', -cellspacing=>0, -cellpadding=>0},
               Tr[(td([$chromosome]),
                  td([$citation]),
                  td([$checkSum]))]
              );
  print br;

  my $offset = 0;
  my $size = 10;
  my $count = 0;
  my $chunk;

  my @row;
  my @rows;

  my $head;
  $head = "<pre>" . ($count+1) . "<\pre>";
  while ($chunk = substr($sequence,$offset, $size)) {

    $offset += $size;
    $count++;

    if ($count%5 == 0) {
       my $line = ($count*10 + 1);
       push(@row,"<pre>" . $chunk . "<\pre>");
       push(@rows, td({-align=>"right", -colspan=>"10"},$head) . td(\@row));
       undef @row;
       $head = "<pre>". $line . "<\pre>";
    } else {
      push(@row,"<pre>" . $chunk . "<\pre>");
    }

    #print table after 1000 rows are displayed
    if ($count % 100 == 0) {
         print table({-border=>0,-width=>'500', -cellspacing=>'6'},
               Tr(\@rows)
              );
         undef @row;
         undef @rows;
         $head = "<pre>" . ($count*10 + 1) . "<\pre>";
    }
  }


  if (@row) {
     push(@rows, td({-align=>"right", -colspan=>"10"},$head) . td(\@row));
  }


  print table({-border=>0,-width=>'500', -cellspacing=>'6'},
               Tr(\@rows)
              );

}


########################################################################
sub getChromosome {
########################################################################
  my $self = shift;

  my %chrHash =(
               1=>  'I',
               2 => 'II',
               3 => 'III',
               4 => 'IV',
               5 => 'V',
               6 => 'VI',
               7 => 'VII',
               8 => 'VIII',
               9 => 'IX',
               10 => 'X',
               11 => 'XI',
               12 => 'XII',
               13 => 'XIII',
               14 => 'XIV',
               15 => 'XV',
               16 => 'XVI',
               17 => 'Mito'
              );

  my $chromosome_no = $self->{'_chrnum'};

  return "Chromosome $chrHash{$chromosome_no} Sequence";

}


########################################################################
sub getCitation{
########################################################################
  my ($self) = @_;

  my $dbh =  $self->{'_dbh'};
  my $chromosome_no = $self->{'_chrnum'};
  my $refNoList = Reflink->GetRefNoListBYtabNmPrikeyPrikeycol(dbh=>$dbh, tab_name=>'CHROMOSOME', primary_key=>$chromosome_no, primary_key_col=>'CHROMOSOME');

  my $RefObj = Reference->new(dbh=>$dbh, reference_no=>$refNoList);

  my $citation = $RefObj->citation;

  return $citation;

}


###########################################################################
sub getCheckSum {
###########################################################################
  my ($self, $sequence) = @_;

  my $i;

  $sequence = uc($sequence);
  my @sequence = split(//, $sequence);

  my $seqlen = length($sequence);

  my $check;
  my $count;
  for ($i = 0; $i < $seqlen; $i++) {
    $count++;

    $check += $count * ord($sequence[$i]);
    $count = 0 if ($count == 57);
    }

  $check %= 10000;

  my $type;
  if ($self->{'_map'} =~ /^p/i){
     $type ='P';
  } else {
     $type = 'N';
  }

  my $name = $self->{'_seqname'};

  if (!$name) {
      $name = 'gcgseq.tmp.'. $$;
  } 
  return $name . " Length: $seqlen". $self->getDate . " Type: $type  Check: $check ..";
}


#########################################################################
sub getDate {
#########################################################################
  my $self = shift;


  my %numToDate = ('0' => 'January',
                   '1' => 'February',
                   '2' => 'March',
                   '3' => 'April',
                   '4' => 'May',
                   '5' => 'June',
                   '6' => 'July',
                   '7' => 'August',
                   '8' => 'September',
                   '9' => 'October',
                   '10' => 'November',
                   '11' => 'December');

  my ($sec, $min, $hours, $mday, $month, $year) = localtime;

  $year =1900+$year;
 
  my $date = " " . $numToDate{$month} . "  $mday, $year $hours:$min ";

  return $date;
}



####################################################################
sub getSubsequence{
####################################################################
  my ($self, $sequence) = @_;

    $sequence = substr($sequence, $self->{'_beg'}-1, $self->{'_end'}-$self->{'_beg'}+1);

    if ($self->{_rev} eq '-REV') {
       $sequence = $self->reverseCompl($sequence);
    }

    #when protein requested and chromosome is selected 
    # have to do protein transation...
    if (($self->{'_map'} =~ /^p/i) &&  ($self->{'_chr'})) {
       $sequence = $self->doProteinTranslation($sequence);
    }

    if ($self->{'_map'} !~ /3/i) {
       $self->formatSequenceGCG($sequence);
    } else {
       $self->formatSequence($sequence);
    }

}


####################################################################
sub reverseCompl{
####################################################################
  my ($self, $sequence) = @_;

  my $complement;

  while ($sequence) {
        my $char = chop($sequence);
        if (getComplement($char)) {
            $complement .= getComplement($char);
        }
        else {
           $complement .= $char;
        }
  }
  return ($complement);

}


########################################################################
sub getComplement {
########################################################################
    my ($char) = @_;
    return 'A' if ($char eq 'T');
    return 'a' if ($char eq 't');
    return 'G' if ($char eq 'C');
    return 'g' if ($char eq 'c');
    return 'C' if ($char eq 'G');
    return 'c' if ($char eq 'g');
    return 'T' if ($char eq 'A');
    return 't' if ($char eq 'a');

    print "ERROR: $char \n";
}



########################################################################
sub doProteinTranslation{
########################################################################
  my ($self, $sequence) = @_;

  my %DNAtoAA = ('GCT'=>'A', 'GCC'=>'A', 'GCA'=>'A', 'GCG'=>'A', 'TGT'=>'C',
'TGC'=>'C', 'GAT'=>'D', 'GAC'=>'D', 'GAA'=>'E', 'GAG'=>'E', 'TTT'=>'F',
'TTC'=>'F', 'GGT'=>'G', 'GGC'=>'G', 'GGA'=>'G', 'GGG'=>'G', 'CAT'=>'H',
'CAC'=>'H', 'ATT'=>'I', 'ATC'=>'I', 'ATA'=>'I', 'AAA'=>'K', 'AAG'=>'K',
'TTG'=>'L', 'TTA'=>'L', 'CTT'=>'L', 'CTC'=>'L', 'CTA'=>'L', 'CTG'=>'L',
'ATG'=>'M', 'AAT'=>'N', 'AAC'=>'N', 'CCT'=>'P', 'CCC'=>'P', 'CCA'=>'P',
'CCG'=>'P', 'CAA'=>'Q', 'CAG'=>'Q', 'CGT'=>'R', 'CGC'=>'R', 'CGA'=>'R',
'CGG'=>'R', 'AGA'=>'R', 'AGG'=>'R', 'TCT'=>'S', 'TCC'=>'S', 'TCA'=>'S',
'TCG'=>'S', 'AGT'=>'S', 'AGC'=>'S', 'ACT'=>'T', 'ACC'=>'T', 'ACA'=>'T',
'ACG'=>'T', 'GTT'=>'V', 'GTC'=>'V', 'GTA'=>'V', 'GTG'=>'V', 'TGG'=>'W',
'TAT'=>'Y', 'TAC'=>'Y', 'TAA'=>'*', 'TAG'=>'*', 'TGA'=>'*');

  my %MtDNAtoAA = ('GCT'=>'A', 'GCC'=>'A', 'GCA'=>'A', 'GCG'=>'A', 'TGT'=>'C',
'TGC'=>'C', 'GAT'=>'D', 'GAC'=>'D', 'GAA'=>'E', 'GAG'=>'E', 'TTT'=>'F',
'TTC'=>'F', 'GGT'=>'G', 'GGC'=>'G', 'GGA'=>'G', 'GGG'=>'G', 'CAT'=>'H',
'CAC'=>'H', 'ATT'=>'I', 'ATC'=>'I', 'ATA'=>'M', 'AAA'=>'K', 'AAG'=>'K',
'TTG'=>'L', 'TTA'=>'L', 'CTT'=>'T', 'CTC'=>'T', 'CTA'=>'T', 'CTG'=>'T',
'ATG'=>'M', 'AAT'=>'N', 'AAC'=>'N', 'CCT'=>'P', 'CCC'=>'P', 'CCA'=>'P',
'CCG'=>'P', 'CAA'=>'Q', 'CAG'=>'Q', 'CGT'=>'R', 'CGC'=>'R', 'CGA'=>'R',
'CGG'=>'R', 'AGA'=>'R', 'AGG'=>'R', 'TCT'=>'S', 'TCC'=>'S', 'TCA'=>'S',
'TCG'=>'S', 'AGT'=>'S', 'AGC'=>'S', 'ACT'=>'T', 'ACC'=>'T', 'ACA'=>'T',
'ACG'=>'T', 'GTT'=>'V', 'GTC'=>'V', 'GTA'=>'V', 'GTG'=>'V', 'TGG'=>'W',
'TAT'=>'Y', 'TAC'=>'Y', 'TAA'=>'*', 'TAG'=>'*', 'TGA'=>'W');

  my $chunk;
  my $offset=0; 
  my $len = length($sequence);
  my $protein;
  my $chr = $self->{_chr};

  while ($offset <= $len) {
    $chunk = substr($sequence,$offset,3); 
    $offset = $offset+3;

    if ($chr == 17) {
      $protein .= $MtDNAtoAA{$chunk};
    } else {
      $protein .= $DNAtoAA{$chunk};
    }
      
  }


  return $protein;
}


########################################################################
1; 
########################################################################



