#!/usr/bin/perl -w use strict; use Getopt::Long; use Bio::AlignIO; use Bio::PopGen::Statistics; use Bio::PopGen::Utilities; my (@outgroup,@ingroup,$polarized,$infile,$outfile,$verbose); # run with: perl MK.pl -i CG11099-PA-2R.EXON.fasaln -ig sim -og yak -og mel --polarized -o CG11099.out my $alnformat = 'fasta'; GetOptions( 'i|infile:s' => \$infile, 'ig|ingroup:s' => \@ingroup, 'og|outgroup:s' => \@outgroup, 'p|polarized!' => \$polarized, 'o|outfile:s' => \$outfile, 'af|alnformat|alignformat:s' => \$alnformat, 'v|verbose!' => \$verbose, ); if( ! defined $infile && ! @ARGV ) { usage(); die; } my $stats = Bio::PopGen::Statistics->new(-verbose => $verbose); my $alnio; if( ! $infile ) { # allow stdin/ARGV piping $alnio= Bio::AlignIO->new(-format => $alnformat, -fh => \*ARGV); } else { $alnio= Bio::AlignIO->new(-format => $alnformat, -file => $infile); } while( my $aln = $alnio->next_aln ) { my $pop = Bio::PopGen::Utilities->aln_to_population(-alignment => $aln, -site_model=> 'codon'); my @ingroup_seqs; my @outgroup_seqs; for my $ind ( $pop->get_Individuals ) { my $id = $ind->unique_id; # do we allow ingroup to be a list as well? my $pushed = 0; for my $i ( @ingroup ) { if( $id =~ /\Q$i\E/ ) { push @ingroup_seqs, $ind; $pushed++; last; } } if( ! $pushed ) { for my $o ( @outgroup ) { if( $id =~ /\Q$o\E/ ) { push @outgroup_seqs, $ind; $pushed++; } } } if( ! $pushed ) { warn("sequence $id was not grouped, ignoring...\n"); # push @ingroup_seqs, $ind; } } my @counts = $stats->mcdonald_kreitman(-ingroup => \@ingroup_seqs, -outgroup => \@outgroup_seqs, -polarized => $polarized); warn(join("\t", qw(NSpoly NSfix SPoly Sfix)),"\n"); warn(join("\t",@counts),"\n"); my $mk = $stats->mcdonald_kreitman_counts(@counts); warn("mk is $mk\n"); } sub usage { warn("\t-i/--infile\tinput aligned file\n", "\t-ig/--ingroup\tinput ingroups\n", ); # 'og|outgroup:s' => \@outgroup, # 'p|polarized!' => \$polarized, # 'o|outfile:s' => \$outfile, # 'af|alnformat|alignformat:s' => \$alnformat, # 'v|verbose!' => \$verbose, }