package FeatureDataSource::MicadoFeatureDataSource;

use strict;

use DBI;
use Bio::Seq;
use Bio::SeqFeature::Generic;

use FeatureDataSource;

@FeatureDataSource::MicadoFeatureDataSource::ISA=qw(FeatureDataSource);

$FeatureDataSource::MicadoFeatureDataSource::DBHOST='127.0.0.1';
$FeatureDataSource::MicadoFeatureDataSource::DBNAME='noname';
$FeatureDataSource::MicadoFeatureDataSource::DBUSER='nouser';
$FeatureDataSource::MicadoFeatureDataSource::DBPASSWD='nopasswd';

my $BUFFSIZE=4096;

my $dbh=undef;

my %complete_genomes=();

sub init {
    $dbh=DBI->connect("dbi:Pg:host=$FeatureDataSource::MicadoFeatureDataSource::DBHOST;dbname=$FeatureDataSource::MicadoFeatureDataSource::DBNAME","$FeatureDataSource::MicadoFeatureDataSource::DBUSER","$FeatureDataSource::MicadoFeatureDataSource::DBPASSWD",{PrintError=>0})
	if (!defined $dbh);

    my $res=1;

    $res=0
	if (!defined $dbh);

    return $res;

}


sub _retrieve_by_access_number {
    my $self=shift;
    my $accessnumber=shift;
    my $start=shift;
    my $end=shift;
    my $qualifiers=shift;

    my %features=();
    my $sth=undef;
    my $rv=undef;
    my $rowhash=undef;


    #retrieve sequence for given access number.
    my $seqobj=undef;
    my $seq='';
    $sth=$dbh->prepare("select sequences from dna_seq where accession = '$accessnumber'");
    $rv=$sth->execute;
    if ($sth->rows) {
	while ($rowhash=$sth->fetchrow_hashref()) {
	    my $oid=$rowhash->{sequences};
	    $dbh->{AutoCommit}=0;
	    my $lobj_fd=$dbh->func($oid,$dbh->{pg_INV_READ},'lo_open');
	    my $buffer='';
	    my $bytesread=0;
	    do {
		$bytesread=$dbh->func($lobj_fd,$buffer,$BUFFSIZE,'lo_read');
		if ($bytesread) {
		    $seq.=$buffer;
		    $buffer='';
		}
	    } while ($bytesread);
	    $lobj_fd=$dbh->func($lobj_fd,'lo_close');
	    $dbh->commit();
	    $dbh->{AutoCommit}=1;
	}
	
	
	
	#retrieve all locations for given access number
	my $location_restriction='';
	$location_restriction="and ((startpos_begin>=$start and startpos_begin<=$end) or (stoppos_end>=$start and stoppos_end<=$end) or (startpos_begin<$start and stoppos_end>$end))" 
	    if ($start>0 && $end>$start);
	$sth=$dbh->prepare("select code_feat, startpos_begin, stoppos_end, strand from locations where code_feat like '$accessnumber#%' $location_restriction");
	$rv=$sth->execute;
	while ($rowhash=$sth->fetchrow_hashref()) {
	    my $startpos_begin=$rowhash->{startpos_begin};
	    my $stoppos_end=$rowhash->{stoppos_end};
	    my $strand=$rowhash->{strand};
	    $features{$rowhash->{code_feat}}={
		start=>$startpos_begin,
		end=>$stoppos_end,
		strand=>$strand,
		tags => {}
	    };
	}
	
	
	#retrieve all feature types for given access number
	$sth=$dbh->prepare("select code_feat, type_feat from features where code_feat like '$accessnumber#%'");
	$rv=$sth->execute;
	while ($rowhash=$sth->fetchrow_hashref()) {
	    $features{$rowhash->{code_feat}}->{primary}=$rowhash->{type_feat}
	    if (defined $features{$rowhash->{code_feat}});
	}
	
	#retrieve qualifiers with values for given access number.
	if (!$qualifiers) {
	    # Don't retrieve all qualifiers : only 'gene' and 'product' are
	    # needed for correct map display.
	    # Queries are split to improve speed : one 'OR' query seems
	    # slower than two separate queries...
	    $sth=$dbh->prepare("select code_feat, type_qual, qualifier from qualifiers  where code_feat like '$accessnumber#%' and type_qual='gene'");
	    $rv=$sth->execute;
	    while ($rowhash=$sth->fetchrow_hashref()) {
		$features{$rowhash->{code_feat}}->{tags}->{$rowhash->{type_qual}}=$rowhash->{qualifier}
		if (defined $features{$rowhash->{code_feat}});
	    }
	    $sth=$dbh->prepare("select code_feat, type_qual, qualifier from qualifiers  where code_feat like '$accessnumber#%' and type_qual='product'");
	    $rv=$sth->execute;
	    while ($rowhash=$sth->fetchrow_hashref()) {
		$features{$rowhash->{code_feat}}->{tags}->{$rowhash->{type_qual}}=$rowhash->{qualifier}
		if (defined $features{$rowhash->{code_feat}});
	    }
	} else {
	    my $query_string="select code_feat, type_qual, qualifier from qualifiers  where code_feat like '$accessnumber#%'";
	    $sth=$dbh->prepare($query_string);
	    $rv=$sth->execute;
	    while ($rowhash=$sth->fetchrow_hashref()) {
		$features{$rowhash->{code_feat}}->{tags}->{$rowhash->{type_qual}}=$rowhash->{qualifier}
		if (defined $features{$rowhash->{code_feat}});
	    }
	}
	
#    if ($seq =~ /([^atgcATGC])/) {
#	$seq =~ s/[atgcATGC]//g;
#	open DUMP,'>seq.bad';
#	print DUMP $seq;
#	close DUMP;
#	die "Unknown char in sequence : $1\n";
#    }
	
	$seqobj=new Bio::Seq(-seq => $seq,
			  -id => 'micado_retrieved_sequence',
			  -accession_number => $accessnumber);
	
	foreach my $feature (sort keys %features) {
	    my $start=$features{$feature}->{start};
	    my $end=$features{$feature}->{end};
	    my $strand=$features{$feature}->{strand};
	    my $primary=$features{$feature}->{primary};
	    
	    my $seqfeature=new Bio::SeqFeature::Generic(
							-start => $start,
							-end => $end,
							-strand => $strand,
							-primary => $primary,
							-tag => $features{$feature}->{tags});
	    $seqobj->add_SeqFeature($seqfeature);
	}
    } else {
	warn "MicadoFeatureDataSource: unable to retrieve data for ",
	"accessnumber ",$accessnumber,"\n";
    }

    return $seqobj;

}

sub _retrieve_by_organism {
    my $self=shift;
    my $organism=shift;
    my $start=shift;
    my $end=shift;
    my $qualifiers=shift;

    return undef
	if (!defined $complete_genomes{$organism});

    my $accession=$complete_genomes{$organism};
    
    
    return $self->_retrieve_by_access_number($accession,$start,$end,$qualifiers);
}

sub _retrieve_complete_genome_names {

    my $sth=$dbh->prepare("select species,accession,type from complete_genomes");
    my $rv=$sth->execute();
    while (my $rowhash=$sth->fetchrow_hashref()) {
	my $species=$rowhash->{species};
	my $accession=$rowhash->{accession};
	my $type=$rowhash->{type};
	my $species_type="$species, $type";
	$complete_genomes{$species_type}=$accession;
    }
}

sub get_complete_genome_names {

    _retrieve_complete_genome_names
	if (!scalar(keys(%complete_genomes)));

    my @complete_genome_names=();
    foreach my $species_type (sort keys %complete_genomes) {
	push @complete_genome_names,$species_type;
    }

    return @complete_genome_names;

}

sub get_access_number {
    my $organism=shift;

    _retrieve_complete_genome_names
	if (!scalar(keys(%complete_genomes)));

    return $complete_genomes{$organism};
}

sub new {
    my $class=shift;
    my %params=();

    $params{start}=-1;
    $params{end}=-1;
    $params{qualifiers}=0;
    while (my $paramname=shift) {
	$params{lc $paramname}=shift;
    }

    my $self=_new FeatureDataSource;

    bless $self,$class;

    my $seqobj=undef;

    if (!defined $dbh) {
	warn "Trying to use Micado as data source without valid database connection.\n";
	return undef;
    }

    if (defined $params{organism} && defined $params{gene}) {
	$seqobj=$self->_retrieve_by_organism_gene($params{organism},
						  $params{gene},
						  $params{start},
						  $params{end},
						  $params{qualifiers});
    } elsif (defined $params{organism} && !defined $params{gene}) {
	$seqobj=$self->_retrieve_by_organism($params{organism},
					     $params{start},
					     $params{end},
					     $params{qualifiers});
    } elsif (defined $params{accessnumber}){
	$seqobj=$self->_retrieve_by_access_number($params{accessnumber},
						  $params{start},
						  $params{end},
						  $params{qualifiers});
    }

    if (defined $seqobj) {
	$self->FeatureDataSource::_load_features($seqobj),
    } else {
	$self=undef;
    }
    
    return $self;
}

END {
    $dbh->disconnect()
	if (defined $dbh);
}

1
