gff文件提取cds

 1 #!/usr/bin/perl
 2 
 3 use strict;
 4 use warnings;
 5 ########input########
 6 
 7 my $gff = $ARGV[0];my $cut = &cut($gff);my %cut = %$cut;
 8 
 9 my ($gene_number,$gene_id,$gene_name)  = ($ARGV[1],$ARGV[2],$ARGV[3]);
10 
11 #########main#######
12 
13 my %hash_2 = %{$cut{$gene_name}};
14 
15 foreach my $key_2(keys %hash_2)
16 {
17     my @arr = $hash_2{$key_2};my $exon_start = $arr[0][0]+30-1;my $exon_end =$arr[0][1]-30+1;
18 
19     print "$gene_name	$key_2	$arr[0][0]	$exon_start
$gene_name	$key_2	$exon_end	$arr[0][1]
";
20 }
21 
22 #############sub################
23 
24 sub cut
25 {
26     my %gene;
27 
28     my $exon = 1;my $start = 0;my $length = 0;
29 
30     my $gff = shift;open GFF,"$gff"; 
31     
32     while(my $line = <GFF>)
33     {
34         chomp $line;
35 
36         my @q = split /s/,$line;
37 
38         if($q[2] =~ /mRNA/)
39         {
40             $exon = 1;$start = $q[3];$length = 0;
41         }
42         elsif($q[2] =~ /CDS/)
43         {
44             $q[8] =~ /Parent=(.*);S/m;my $key = $1;  
45 
46             my $new_length = $q[4]-$q[3]; 
47 
48             $exon =~/(w*)/; $gene{$key}{$1} = [$length+1,$length+$new_length];
49 
50             $exon++; $length += $new_length;
51         }    
52     }   
53     return %gene;
54 }
原文地址:https://www.cnblogs.com/yuanjingnan/p/11087860.html