#!/usr/bin/perl
package sdevDatadump;
 
use strict;
use DBI;
use lib "/usr/local/dicty/www_dictybase/db/lib/common";
use Login qw (ConnectToDatabase);
use lib "/usr/local/dicty/www_dictybase/db/lib/dictyBase";
#use dictyBaseObject;

#######################################################################
#################### global variables #################################
#######################################################################

my $database = 'dictyBase';

my $dbuser = 'OTTO';

my $password = `/usr/local/bin/db-auth ${dbuser}\@${database}`;

chomp $password;

my $dbh = &ConnectToDatabase($database, $dbuser, $password);

my %locusdictyBaseid;
my %featdictyBaseid; 
my %dictyBaseidOtherFeat; 

#######################################################################
sub new {      ############ constructor ###############################
#######################################################################
       
    my ($class, %args) = @_;

    my $self = {};

    bless $self, $class;

    $self->get_dictyBaseid;

    return $self;
}

#######################################################################
sub get_dictyBaseid  {
#######################################################################

    my $locus_sth = $dbh->prepare("
        SELECT s.dictyBaseid, s.primary_key, f.feature_no
        FROM CGM_DDB.dictyBaseid s, CGM_DDB.feature f 
        WHERE s.dictyBaseid_type = ? and 
              s.tab_name = 'LOCUS' and 
              s.primary_key = f.locus_no (+)
              
    ");

    $locus_sth->execute('Primary');

    while (my ($dictyBaseid, $locusNo, $featNo) = $locus_sth->fetchrow()) {

        $locusdictyBaseid{$locusNo} = $dictyBaseid;

        $featdictyBaseid{$featNo} = $dictyBaseid;
    }

    my $feat_sth = $dbh->prepare("

        SELECT s.dictyBaseid, s.primary_key, f.locus_no
        FROM CGM_DDB.dictyBaseid s, CGM_DDB.feature f 
        WHERE s.dictyBaseid_type = ? and 
              s.tab_name = 'FEATURE' and 
              s.primary_key = f.feature_no
              
    ");

    $feat_sth->execute('Primary');

    while (my ($dictyBaseid, $featNo, $locusNo) = $feat_sth->fetchrow()) {

        $featdictyBaseid{$featNo} = $dictyBaseid;

        $locusdictyBaseid{$locusNo} = $dictyBaseid;
    }

    my $dictyBaseidOther = $dbh->prepare("

        SELECT primary_key, dictyBaseid
        FROM CGM_DDB.dictyBaseid
        WHERE tab_name = 'FEATURE' and 
              dictyBaseid_type in ('Secondary','Deleted') 
        ORDER by dictyBaseid
    ");

    $dictyBaseidOther->execute;

    while (my ($priKey, $dictyBaseid) = $dictyBaseidOther->fetchrow())  {

        if (my $exist = $dictyBaseidOtherFeat{$priKey})  {

            $dictyBaseidOtherFeat{$priKey} = $exist."|".$dictyBaseid;           
        }
        else  {
            $dictyBaseidOtherFeat{$priKey} = $dictyBaseid;
        }
    }

#debugging
#    my $logfile = '/share/dictyBase/data/data_dump/external_id/hash';
#    open (LOG, ">$logfile") || die;
#    foreach my $item (keys %locusdictyBaseid) {
#        print LOG "LOCUS: $item\t$locusdictyBaseid{$item}\n";
#    }
#    foreach my $item (keys %featdictyBaseid) {
#        print LOG "FEAT: $item\t$featdictyBaseid{$item}\n";
#    }
#    close LOG;

    return;
}

#######################################################################
sub get_external_id {
#######################################################################

    my ($self, $file) = @_;

    open (OUT, ">$file") || die "Can't open $file:$!\n";

    my $sth = $dbh->prepare("

        SELECT e.external_id, e.source, l.locus_name, l.locus_no, e.tab_name   
        FROM CGM_DDB.external_id e, CGM_DDB.locus l 
        WHERE tab_name = 'LOCUS' and  
              e.primary_key = l.locus_no 
        UNION
        SELECT e.external_id, e.source, f.feature_name, f.feature_no, e.tab_name  
        FROM CGM_DDB.external_id e, CGM_DDB.feature f 
        WHERE tab_name = 'FEATURE' and 
              e.primary_key = f.feature_no
    ");    

    $sth->execute();

    while (my @result = $sth->fetchrow()) {

        my $dictyBaseid;

        if ($result[4] eq 'LOCUS')  {

            $dictyBaseid = $locusdictyBaseid{$result[3]}; 
            
        }
        elsif ($result[4] eq 'FEATURE')  {

            $dictyBaseid = $featdictyBaseid{$result[3]};
        }

        print OUT "$result[0]\t$result[1]\t$result[2]\t$dictyBaseid\n";
    }

    close OUT;

    return;
}

###################################################################
sub get_goAnnotation  {
###################################################################

    my ($self, $file) = @_;

    open (OUT, ">$file") || die "Can't open $file:$!\n";
    
    my $sth = $dbh->prepare("

        SELECT f.feature_name, l.locus_name, f.feature_no, g.go_aspect, g.go_term, g.goid, ge.evidence_code, l.locus_no  
        FROM CGM_DDB.feature f, CGM_DDB.locus l, CGM_DDB.go_feat_goev gfg, CGM_DDB.go g, CGM_DDB.go_evidence ge 
        WHERE f.feature_no = gfg.feature_no and 
              gfg.goid = g.goid and 
              gfg.go_evidence_no = ge.go_evidence_no and 
              l.locus_no (+) = f.locus_no 
        UNION 
        SELECT f.feature_name, l.locus_name, f.feature_no, g.go_aspect, g.go_term, g.goid, ge.evidence_code, l.locus_no  
        FROM CGM_DDB.feature f, CGM_DDB.locus l, CGM_DDB.go_locus_goev glg, CGM_DDB.go g, CGM_DDB.go_evidence ge 
        WHERE l.locus_no = glg.locus_no and 
              glg.goid = g.goid and 
              glg.go_evidence_no = ge.go_evidence_no and 
              l.locus_no = f.locus_no (+)
    ");

    $sth->execute;

    while (my @result = $sth->fetchrow())  {

        if ($result[0])  {
            
            #get dictyBaseid with feature_no
            $result[2] = $featdictyBaseid{$result[2]};
        }
        elsif ($result[1])   {

            #get dictyBaseid with locus_no
            $result[2] = $locusdictyBaseid{$result[7]};
        }

        my $row = join("\t", @result[0,1,2,3,4,5,6]);

        print OUT "$row\n";
    }
    
    close OUT;

    return;
}

###################################################################
sub get_goTerm  {
###################################################################

    my ($self, $file) = @_;

    open (OUT, ">$file") || die "Can't open $file:$!\n";
    
    my $sth = $dbh->prepare("

        SELECT goid, go_term, go_aspect, go_definition 
        FROM CGM_DDB.go 
        ORDER BY goid
    ");

    $sth->execute;

    while (my @result = $sth->fetchrow())  {

        my $row = join("\t", @result);

        print OUT "$row\n";
    }
    
    close OUT;

    return;
}

###################################################################
sub get_phenotypes  {
###################################################################

    my ($self, $file) = @_;

    open (OUT, ">$file") || die "Can't open $file:$!\n";
    
    my $sth = $dbh->prepare("

        SELECT f.feature_name, l.locus_name, f.feature_no, lp.phenotype_type, p.phenotype, lp.sentence, l.locus_no  
        FROM CGM_DDB.feature f, CGM_DDB.locus l, CGM_DDB.locus_pheno lp, CGM_DDB.phenotype p  
        WHERE l.locus_no = f.locus_no (+) and
              l.locus_no = lp.locus_no and
              p.phenotype_no = lp.phenotype_no 
        UNION  
        SELECT f.feature_name, l.locus_name, f.feature_no, fp.phenotype_type, p.phenotype, fp.sentence, l.locus_no   
        FROM CGM_DDB.feature f, CGM_DDB.locus l,CGM_DDB.feat_pheno fp,CGM_DDB.phenotype p  
        WHERE l.locus_no (+) = f.locus_no and 
              f.feature_no = fp.feature_no and
              p.phenotype_no = fp.phenotype_no
    ");

    $sth->execute;

    while (my @result = $sth->fetchrow())  {

        if ($result[0])  {
            
            #get dictyBaseid with feature_no
            $result[2] = $featdictyBaseid{$result[2]};
        }
        elsif ($result[1])   {

            #get dictyBaseid with locus_no
            $result[2] = $locusdictyBaseid{$result[6]};
        }
        my $row = join("\t", @result[0,1,2,3,4,5]);

        print OUT "$row\n";
    }
    
    close OUT;

    return;
}

###################################################################
sub get_geneticMap  {
###################################################################

    my ($self, $file) = @_;

    open (OUT, ">$file") || die "Can't open $file:$!\n";
    
    my $sth = $dbh->prepare("

        SELECT tp.two_point_name, tp.parental_ditype, tp.nonparental_ditype, tp.tetratype, tp.first_division, tp.second_division, tp.distance, tp.std_err, tp.interference, tp.interference_std_err, tp.remark, l1.locus_name, f1.feature_name, l1.chromosome, l1.genetic_position, l1.locus_no, l2.locus_name, f2.feature_name, l2.chromosome, l2.genetic_position, l2.locus_no, f1.feature_no, f2.feature_no  
        FROM CGM_DDB.two_point tp, CGM_DDB.locus_twopt lt1, CGM_DDB.feature f1, 
          CGM_DDB.locus l1, CGM_DDB.locus_twopt lt2, CGM_DDB.feature f2, CGM_DDB.locus l2  
        WHERE tp.two_point_no = lt1.two_point_no and 
              lt1.locus_no = l1.locus_no and 
              l1.locus_no = f1.locus_no (+) and
              tp.two_point_no = lt2.two_point_no and
              lt2.locus_no = l2.locus_no and 
              l2.locus_no = f2.locus_no (+) and 
              l1.locus_no < l2.locus_no  
        ORDER BY tp.two_point_name 
    ");

    $sth->execute;

    while (my @result = $sth->fetchrow())  {

        if ($result[21])   {

            #get dictyBaseid with feature_no
            $result[15] = $featdictyBaseid{$result[21]};
        }
        elsif ($result[15])  {
            
            #get dictyBaseid with locus_no
            $result[15] = $locusdictyBaseid{$result[15]};
        }

        if ($result[22])  {

            $result[20] = $featdictyBaseid{$result[22]};
        }
        elsif ($result[20])  {

            $result[20] = $locusdictyBaseid{$result[20]};
        }

        #truncate the array
        $#result = 20;

        my $row = join("\t", @result);

        print OUT "$row\n";
    }
    
    close OUT;

    return;
}

###################################################################
sub get_chromFeature  {
###################################################################

    my ($self, $file) = @_;

    open (OUT, ">$file") || die "Can't open $file:$!\n";

    my $sth = $dbh->prepare("        
        SELECT distinct f.feature_name, l.locus_name, f.feature_no, ft.feature_type, f.chromosome, f.start_coord, f.stop_coord, f.strand, f.feature_no, f.feature_no, f.brief_id, f.date_created  
        FROM CGM_DDB.feature f, CGM_DDB.feature_type ft, CGM_DDB.locus l    
        WHERE f.feature_no = ft.feature_no  and 
              ft.feature_no not in (SELECT fe.feature_no FROM CGM_DDB.feature fe, CGM_DDB.feature_type fty WHERE fe.feature_no = fty.feature_no and fty.feature_type in ('Deleted', 'Merged')) and
              f.locus_no = l.locus_no (+)
    ");			    
   
    $sth->execute;

    my $arrayRef = $sth->fetchall_arrayref();

    #get aliases
    my $alias_sth = $dbh->prepare("

        SELECT f.feature_no, a.alias_name
        FROM CGM_DDB.feature f, CGM_DDB.locus l, CGM_DDB.locus_alias la, CGM_DDB.alias a 
        WHERE f.locus_no = l.locus_no and
              l.locus_no = la.locus_no (+) and 
              la.alias_no  = a.alias_no (+)
        UNION
        SELECT f.feature_no, a.alias_name
        FROM CGM_DDB.feature f, CGM_DDB.alias a
        WHERE f.feature_no = a.feature_no 
    ");

    $alias_sth->execute;

    my %aliasFeat;

    while (my ($featNo, $alias) = $alias_sth->fetchrow())  {

        next if (!$alias);

        if (!$aliasFeat{$featNo})  {
            $aliasFeat{$featNo} = $alias
        }
        else {
            $aliasFeat{$featNo} = $aliasFeat{$featNo}.'|'.$alias;
        }
    }

    my $formatAref = $self->execute_format($arrayRef);

    foreach my $lineAref (@$formatAref)   {

        my $featNo = $$lineAref[2];

        #get aliases
        $$lineAref[2] = $aliasFeat{$featNo};

        #get primarydictyBaseid
        $$lineAref[8] = $featdictyBaseid{$featNo};
        
        #get dictyBaseidOther
        $$lineAref[9] = $dictyBaseidOtherFeat{$featNo};

        my $row = join("\t", @$lineAref);

        print OUT "$row\n";
    }

    close OUT;

    return;
}


###################################################################
sub get_chromFeature1  {
###################################################################

    my ($self, $file) = @_;

    my $sth = $dbh->prepare("        
        SELECT distinct f.feature_name, l.locus_name, a.alias_name, ft.feature_type, f.chromosome, f.start_coord, f.stop_coord, f.strand, s1.dictyBaseid, s2.dictyBaseid, f.brief_id, f.date_created  
        FROM CGM_DDB.feature f, CGM_DDB.feature_type ft, CGM_DDB.locus l, CGM_DDB.alias a, CGM_DDB.locus_alias la, CGM_DDB.dictyBaseid s1, CGM_DDB.dictyBaseid s2   
        WHERE f.feature_no = ft.feature_no  and 
              f.feature_no not in (SELECT fe.feature_no FROM CGM_DDB.feature fe, CGM_DDB.feature_type fty WHERE fe.feature_no = fty.feature_no and fty.feature_type in ('Deleted', 'Merged')) and
              f.locus_no = l.locus_no (+)  and
              l.locus_no = la.locus_no (+) and 
              la.alias_no  = a.alias_no (+) and 
              s1.dictyBaseid_type = 'Primary' and
              s1.tab_name = 'FEATURE' and  
              s1.primary_key (+)  = f.feature_no and 
              s2.dictyBaseid_type in ('Secondary','Deleted') and 
              s2.tab_name = 'LOCUS' and 
              s2.primary_key (+) = l.locus_no 
        UNION  
        SELECT distinct f.feature_name, l.locus_name, a.alias_name, ft.feature_type, f.chromosome, f.start_coord, f.stop_coord, f.strand, s1.dictyBaseid, s2.dictyBaseid, f.brief_id, f.date_created  
        FROM CGM_DDB.feature f, CGM_DDB.feature_type ft, CGM_DDB.locus l, CGM_DDB.alias a, CGM_DDB.dictyBaseid s1, CGM_DDB.dictyBaseid s2   
        WHERE f.feature_no = ft.feature_no  and 
              f.feature_no not in (SELECT fe.feature_no FROM CGM_DDB.feature fe, CGM_DDB.feature_type fty WHERE fe.feature_no = fty.feature_no and fty.feature_type in ('Deleted', 'Merged')) and
              f.locus_no = l.locus_no (+)  and 
              f.feature_no = a.feature_no (+) and 
              s1.dictyBaseid_type = 'Primary' and 
              s1.tab_name = 'FEATURE' and 
              s1.primary_key (+) = f.feature_no and 
              s2.dictyBaseid_type in ('Secondary','Deleted') and 
              s2.tab_name = 'FEATURE' and 
              s2.primary_key (+) = f.feature_no          
    ");
			    
    $self->execute_format($sth, $file);

    return;
}

############################################################
sub execute_format {
############################################################

    my ($self, $arrayRef) = @_;

    my @formatArray;

#    open (OUT, ">$file") || die "Can't open $file:$!\n";

#   print "***$sql***\t$outfile\n";
#  my $sth = $dbh->prepare("$sql");

#    $sth->execute();

#    my @row = $sth->fetchrow();

    my $row;   #hold the array of the first result line

    foreach my $current (@$arrayRef) {

        if (!$row)  {
            $row = $current;
            next;
        }

        if ($$row[0] eq $$current[0]){

            for (my $i=1; $i<(scalar @$row); $i++) {

                if (($$row[$i] ne $$current[$i]) && ($$row[$i]) && ($$current[$i]))                {

                    #for duplication check
                    my $curr = $$current[$i];

                    if (!($$row[$i] =~ /$curr/)) {  
 
                        $$row[$i] = "$$row[$i]|$$current[$i]";
                    }
                }    
                elsif (!$$row[$i]) {

                    $$row[$i] = $$current[$i];    
                } 
           }
       }

       else {

#          my $line = join("\t", @row);

#          print OUT "$line\n";

           push (@formatArray, $row);

           $row = $current;
       }
    }

# print the last unique row in the result set
#    my $lastline = join("\t", @row);

    push (@formatArray, $row);;

#    print OUT "$lastline\n";
#   print "LAST!!  $lastline\n";

#    $sth->finish;

#    close OUT;

    return \@formatArray;
} 






#give the compiler sth to run.
1;
