#!/usr/local/bin/perl -w use strict; use Bio::SeqIO; use Bio::DB::GenBank; use Bio::Seq; my $db = new Bio::DB::GenBank; my $file = shift @ARGV; my $in = new Bio::SeqIO(-format => 'genbank', -file => $file); my $out = new Bio::SeqIO(-format => 'fasta'); my %seqs; while( my $seq = $in->next_seq ) { my $num = 1; foreach my $feature ( $seq->get_SeqFeatures ) { if( $feature->primary_tag eq 'CDS' ) { my $cdsseq; my $strand = 1; if( $feature->has_tag('coded_by') ) { my ($tag) = $feature->get_tag_values('coded_by'); print "tag is $tag\n"; if( $tag =~ /complement/ ) { $strand = -1; } if( $tag =~ /join\(([^)]+)\)/ ) { my @pieces = split(/\s*\,\s*/,$1); foreach my $piece ( @pieces ) { my ($acc,$loc) = split(/\:/,$piece); print "$acc $loc\n"; my ($start,$stop) = ($loc =~ /(\d+)\.\.(\d+)/ ); print "want $acc $start $stop\n"; my $ntseq = $seqs{$acc}; if( ! $ntseq ) { $ntseq = $db->get_Seq_by_acc($acc); $seqs{$acc} = $ntseq; } my $subseqobj = $ntseq->trunc($start,$stop); my $subseq = $subseqobj->seq(); $cdsseq .= $subseq; } } my $cdseqobj = new Bio::Seq(-seq => $cdsseq, -display_id => "CDS$num"); $num++; if( $strand < 0 ) { $cdseqobj = $cdseqobj->revcom; } $out->write_seq($cdseqobj); $out->write_seq($cdseqobj->translate()); $out->write_seq($seq); } } } }