#!/usr/bin/perl -w use strict; use Bio::SeqIO; use Bio::Seq; use Data::Dumper; # To run this, pass in two filenames my $file = $ARGV[0]; my $ofile = $ARGV[1]; my $ofile2 = $ARGV[2]; my $input = Bio::SeqIO->new(-format => 'genbank', -file => $file); my $output = Bio::SeqIO->new(-format => 'fasta', -file => ">$ofile"); my $output_protein = Bio::SeqIO->new(-format => 'fasta', -file => ">$ofile2"); while( my $seq = $input->next_seq ) { # get all the feature from the sequence object my @features = $seq->get_SeqFeatures; foreach my $f ( @features ) { # test if the feature TYPE is 'CDS' #print "feature object is $f\n"; #print Dumper($f), "\n"; if( $f->primary_tag eq 'CDS' ) { my @tags = $f->get_all_tags; print "the tags are @tags\n"; my @namelist = $f->get_tag_values('gene'); my $name = $namelist[0]; my ($desc) = $f->get_tag_values('product'); # process location information my $location = $f->location; print "location str is ", $location->to_FTstring(), "\n"; my @sublocs = $location->each_Location; my $cds_string; for my $l ( @sublocs ) { print $l->to_FTstring(), "\n"; my $start = $l->start; my $end = $l->end; my $strand= $l->strand; my $exon_seq = $seq->subseq($start,$end); print "exon_seq is $exon_seq\n"; $cds_string .= $exon_seq; } my $cds_seq2 = $f->spliced_seq; my $cds_seq = Bio::Seq->new(-seq => $cds_string, -id => $name, -desc=> $desc); $output->write_seq($cds_seq); $output->write_seq($cds_seq2); $output_protein->write_seq($cds_seq->translate); #print $f->start, "..", $f->end, " for feature ", $name, "\n"; } } }