circRNA数据库的建立

  • circRNA数据库的建立
wget http://circbase.org/download/human_hg19_circRNAs_putative_spliced_sequence.fa.gz
gunzip human_hg19_circRNAs_putative_spliced_sequence.fa.gz
wget http://circbase.org/download/mouse_mm9_circRNAs_putative_spliced_sequence.fa.gz
gunzip mouse_mm9_circRNAs_putative_spliced_sequence.fa.gz
wget http://circbase.org/download/mmu_mm9_circRNA.txt
wget http://circbase.org/download/hsa_hg19_circRNA.txt
perl extract_circBase_seq.pl mouse_mm9_circRNAs_putative_spliced_sequence.fa mmu_mm9_circRNA.txt mmu_mm9_circRNA.fa
  • 这个脚本是为了从可能的circRNA序列里面提取已知的circRNA序列
  • 下载坐标转换工具
wget http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/liftOver
wget https://hgdownload-test.gi.ucsc.edu/goldenPath/hg19/liftOver/hg19ToHg38.over.chain.gz
gunzip hg19ToHg38.over.chain.gz
wget https://hgdownload-test.gi.ucsc.edu/goldenPath/mm9/liftOver/mm9ToMm10.over.chain.gz
gunzip mm9ToMm10.over.chain.gz
cut -f1,2,3 mmu_mm9_circRNA.txt >mmu_mm9_circRNA_pos.txt
les mmu_mm9_circRNA.txt |perl -F"	" -lane 'print join("	",@F[3..12])' >mmu_mm9_circRNA.info.txt
#http://genome.ucsc.edu/cgi-bin/hgLiftOver trans mmu_mm9_circRNA_pos.txt to mmu_mm10_circRNA_pos.txt
#使用网页版的转换,由于只提供了位置信息,不知道属于那一条链,会有一些bug
paste mmu_mm10_circRNA_pos.txt mmu_mm9_circRNA.info.txt >mmu_mm10_circRNA.txt


#use liftOver
les mmu_mm9_circRNA.txt|sed '1d'|awk '{print $1"	"$2"	"$3"	"$5"	"0"	"$4"	"$2"	"$3"	255,0,0"}' >mmu_mm9_circRNA.pre2mm10.id.txt
./liftOver mmu_mm9_circRNA.pre2mm10.id.txt mm9ToMm10.over.chain mmu_mm10_circRNA.bed unmap
perl -e '$cir=shift;$bed=shift;open IN,$cir;while(<IN>){chomp;if(/^#/){print"$_
";}else{@a=(split/	/,$_);$info=join("	",@a[5..12]);$hash{$a[4]}=$info;}};close IN;open IN2,$bed;while(<IN2>){chomp;@b=(split/	/,$_);if(exists $hash{$b[3]}){print qq{$b[0]	$b[1]	$b[2]	$b[5]	$b[3]	$hash{$b[3]}
}}};close IN2;' mmu_mm9_circRNA.txt mmu_mm10_circRNA.bed >mmu_mm10_circRNA2.txt



原文地址:https://www.cnblogs.com/raisok/p/10836402.html