基因家族分析流程
⼋、⾃动化构建系统发育树程序。
其实从多序列⽐对⼀直到构建系统进化树,已经是⼀套较为标准的流程了。之前也对这套流程做了个整合,写了个⼩程序。Phylogeny系统进化树的⼀键化构建——Perl_pipeline。之后对这个程序做了个优化,添加了可选参数以及氨基酸⽐对矩阵转为核酸⽐对矩阵 ( 流程图中的
fasta2tree .pl)。做⼀个分享~
fasta2tree.pl 帮助信息
#!/usr/bin/env perl
use strict;
use File::Copy;
use Getopt::Long;
perl下载安装教程my $usage = <<USAGE;
Usage:
perl $0 -in MultiSequences.fasta -out OutPrefix -type d/c/p
For example:
perl $0 -in AP3.fasta -out ap3_tree -type c
This Pipeline depends on the following software that can be run directly in terminal:
1. mafft (v7.310)
2. pal2nal (v14)
3. Gblocks (0.91b)
4. iqtree (1.55)
by Wang Tianpeng
Version 1.2
2018/11/12
USAGE
if (@ARGV==0){die $usage}
my($fasta,$out_prefix,$type);
GetOptions(
"in=s" => \$fasta,
"out=s" => \$out_prefix,
"type=s"=> \$type,
);
my $pwd0=`pwd`;
chomp $pwd0;
# Preliminary Test: Detecting the softwares dependency
## mafft
print STDERR "\nDetecting the dependency softwares\n\n";
my $software=`mafft --help 2>&1`;
if($software=~/MAFFT/){
print STDERR "MAFFT:\tOK\n";
}else{
die "Mafft\tfailed\n";
}
## pal2nal
my $software=`pal2nal.pl 2>&1`;
if($software=~/pal2nal.pl/){
print STDERR "PAL2NAL:\tOK\n";
}else{
die "pal2nal\tfailed\n";
}
## Gblocks
my $sofware1=`Gblocks -a -b >111`;
open IN,"111" or die "$!";
<IN>;my $software_info=<IN>;
if($software_info=~/^\*/){
print STDERR "Gblocks:\tOK\n";
}else{
die "Gblocks\tfailed\n";
}
close IN;system("rm 111");
## iqtree
$software=`iqtree 2>&1`;
if ($software=~/IQ-TREE/){
print STDERR "IQtree:\tOK\n";
}else{
die "iqtree\tfailed\n";
}
# step1 create temporary directory;
print "\n======================================\n";
mkdir "$p" unless -e "$p";
chdir "$p";
my $pwd1 = `pwd`;chomp($pwd1); #print STDERR "PWD:$pwd";
open IN,"$pwd0\/$fasta" or die "$!";
open OUT,">$fasta" or die "$!";
open OUT2, ">$fasta.pep" or die "$!";
my($seq,$seq_name,@seq_name,%hash_seq);
while(<IN>){
s/\r?\n//;
if(/^>/){
$seq_name=$_;
push @seq_name,$seq_name;
}else{
$hash_seq{$seq_name}.=$_;
}
}
foreach (@seq_name){
print OUT "$_\n$hash_seq{$_}\n";
print OUT2 "$_\n",&cds2pep($hash_seq{$_}),"\n";
}
close IN;close OUT;
#open OUT,">","genome.fasta" or die "cant create file genome.fasta,$!";
# step2 MultiSequences Alignment with mafft and pal2nal;
print "\n=====================================================\n";
print "STEP1 MultiSequences Alignment with mafft\n\n";
mkdir "1.Mafft" unless -e "1.Mafft";
unless (-e "1.Mafft.ok"){
chdir "1.Mafft";
my $pwd=`pwd`;print STDERR "PWD:$pwd";
my $command="mafft --auto $pwd1\/$fasta.pep >$out_prefix.aln.pep.fas 2>mafft.log";
print STDERR (localtime).": $command\n";
system($command)==0 or die "can not execute :$command\n";
my $command="pal2nal.pl $out_prefix.aln.pep.fas $pwd1\/$fasta -output fasta > $out_prefix.aln.cds.fas"; system($command)==0 or die "can not execute: $command\n";
my $pwd_mafft=`pwd`;chomp($pwd_mafft);
chdir "..";
open OUT,">1.Mafft.ok" or die "$!";close OUT;
}else{
print STDERR "CMD(skipped): mafft \n";
}
my $pwd_mafft="$pwd1\/1.Mafft";#print "$pwd_mafft\n";
# step3 informative alignment points filter with Gblocks
#my $pwd =`pwd`;print "$pwd\n";
print "\n===================================================\n";
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。
发表评论