OTU_Network&calc_otu

  1 # -*- coding: utf-8 -*-
  2 # __author__ = 'JieYap'
  3 from biocluster.agent import Agent
  4 from biocluster.tool import Tool
  5 import os
  6 import types
  7 import subprocess
  8 from biocluster.core.exceptions import OptionError
  9 
 10 
 11 class OtunetworkAgent(Agent):
 12     """
 13     需要calc_otu_network.py
 14     version 1.0
 15     author: JieYao
 16     last_modified:2016.8.1
 17     """
 18     
 19     def __init__(self, parent):
 20         super(OtunetworkAgent, self).__init__(parent)
 21         options = [
 22             {"name": "otutable", "type": "infile", "format": "meta.otu.otu_table, meta.otu.tax_summary_dir"},
 23             {"name": "level", "type": "string", "default": "otu"},
 24             {"name": "envtable", "type": "infile", "format": "meta.otu.group_table"},
 25             {"name": "envlabs", "type": "string", "default": ""}
 26             ]
 27         self.add_option(options)
 28         self.step.add_steps('OtunetworkAnalysis')
 29         self.on('start', self.step_start)
 30         self.on('end', self.step_end)
 31 
 32     def step_start(self):
 33         self.step.OtunetworkAnalysis.start()
 34         self.step.update()
 35         
 36     def step_end(self):
 37         self.step.OtunetworkAnalysis.finish()
 38         self.step.update()
 39         
 40     def gettable(self):
 41         """
 42         根据输入的otu表和分类水平计算新的otu表
 43         :return:
 44         """
 45         if self.option('otutable').format == "meta.otu.tax_summary_dir":
 46             return self.option('otutable').get_table(self.option('level'))
 47         else:
 48             return self.option('otutable').prop['path']
 49         
 50     def check_options(self):
 51         """
 52         重写参数检查
 53         """
 54         if not self.option('otutable').is_set:
 55             raise OptionError('必须提供otu表')
 56         self.option('otutable').get_info()
 57         if self.option('otutable').prop['sample_num'] < 2:
 58             raise OptionError('otu表的样本数目少于2,不可进行网络分析')
 59         if self.option('envtable').is_set:
 60             self.option('envtable').get_info()
 61             if self.option('envlabs'):
 62                 labs = self.option('envlabs').split(',')
 63                 for lab in labs:
 64                     if lab not in self.option('envtable').prop['group_scheme']:
 65                         raise OptionError('envlabs中有不在物种(环境因子)表中存在的因子:%s' % lab)
 66             else:
 67                 pass
 68             if len(self.option('envtable').prop['sample']) < 2:
 69                 raise OptionError('物种(环境因子)表的样本数目少于2,不可进行网络分析')
 70         samplelist = open(self.gettable()).readline().strip().split('	')[1:]
 71         if self.option('envtable').is_set:
 72             self.option('envtable').get_info()
 73             if len(self.option('envtable').prop['sample']) > len(samplelist):
 74                 raise OptionError('OTU表中的样本数量:%s少于物种(环境因子)表中的样本数量:%s' % (len(samplelist),
 75                                   len(self.option('envtable').prop['sample'])))
 76             for sample in self.option('envtable').prop['sample']:
 77                 if sample not in samplelist:
 78                     raise OptionError('物种(环境因子)表的样本中存在OTU表中未知的样本%s' % sample)
 79         table = open(self.gettable())
 80         if len(table.readlines()) < 4 :
 81             raise OptionError('数据表信息少于3行')
 82         table.close()
 83         return True
 84 
 85     def set_resource(self):
 86         """
 87         设置所需资源
 88         """
 89         self._cpu = 2
 90         self._memory = ''
 91         
 92     def end(self):
 93         result_dir = self.add_upload_dir(self.output_dir)
 94         result_dir.add_relpath_rules([
 95                 [".", "", "OTU网络分析结果输出目录"],
 96                 ["./real_node_table.txt", "txt", "OTU网络节点属性表"],
 97                 ["./real_edge_table.txt", "txt", "OTU网络边集属性表"],
 98                 ["./real_dc_otu_degree.txt", "txt", "OTU网络OTU节点度分布表"],
 99                 ["./real_dc_sample_degree.txt", "txt", "OTU网络sample节点度分布表"],
100                 ["./real_dc_sample_otu_degree.txt", "txt", "OTU网络节点度分布表"],
101                 ["./network_centrality.txt", "txt", "OTU网络中心系数表"],
102                 ["./network_attributes.txt", "txt", "OTU网络单值属性表"],
103                 ])
104         print self.get_upload_files()
105         super(OtunetworkAgent, self).end()
106 
107 
108 class OtunetworkTool(Tool):
109     def __init__(self, config):
110         super(OtunetworkTool, self).__init__(config)
111         self._version = "1.0.1"
112         self.cmd_path = self.config.SOFTWARE_DIR + '/bioinfo/meta/scripts/calc_otu_network.py'
113         self.env_table = self.get_new_env()
114         self.otu_table = self.get_otu_table()
115         self.out_files = ['real_node_table.txt', 'real_edge_table.txt', 'real_dc_otu_degree.txt', 'real_dc_sample_degree.txt', 'real_dc_sample_otu_degree.txt', 'network_centrality.txt', 'network_attributes.txt']
116         
117         
118     def get_otu_table(self):
119         """
120         根据调用的level参数重构otu表
121         :return:
122         """
123         if self.option('otutable').format == "meta.otu.tax_summary_dir":
124             otu_path = self.option('otutable').get_table(self.option('level'))
125         else:
126             otu_path = self.option('otutable').prop['path']
127         if self.option('envtable').is_set:
128             return self.filter_otu_sample(otu_path, self.option('envtable').prop['sample'],
129                                           os.path.join(self.work_dir, 'temp_filter.otutable'))
130         else:
131             return otu_path
132     
133     def filter_otu_sample(self, otu_path, filter_samples, newfile):
134         if not isinstance(filter_samples, types.ListType):
135             raise Exception('过滤otu表样本的样本名称应为列表')
136         try:
137             with open(otu_path, 'rb') as f, open(newfile, 'wb') as w:
138                 one_line = f.readline()
139                 all_samples = one_line.rstrip().split('	')[1:]
140                 if not ((set(all_samples) & set(filter_samples)) == set(filter_samples)):
141                     raise Exception('提供的过滤样本集合中存在otu表中不存在的样本all:%s,filter_samples:%s' % (all_samples, filter_samples))
142                 if len(all_samples) == len(filter_samples):
143                     return otu_path
144                 samples_index = [all_samples.index(i) + 1 for i in filter_samples]
145                 w.write('OTU	' + '	'.join(filter_samples) + '
')
146                 for line in f:
147                     all_values = line.rstrip().split('	')
148                     new_values = [all_values[0]] + [all_values[i] for i in samples_index]
149                     w.write('	'.join(new_values) + '
')
150                 return newfile
151         except IOError:
152             raise Exception('无法打开OTU相关文件或者文件不存在')
153 
154     def get_new_env(self):
155         """
156         根据envlabs生成新的envtable
157         """
158         if self.option('envlabs'):
159             new_path = self.work_dir + '/temp_env_table.xls'
160             self.option('envtable').sub_group(new_path, self.option('envlabs').split(','))
161             return new_path
162         else:
163             return self.option('envtable').path
164 
165     def run(self):
166         """
167         运行
168         """
169         super(OtunetworkTool, self).run()
170         self.run_otu_network_py()
171 
172     def formattable(self, tablepath):
173         alllines = open(tablepath).readlines()
174         if alllines[0][0] == '#':
175             newtable = open(os.path.join(self.work_dir, 'temp_format.table'), 'w')
176             newtable.write(alllines[0].lstrip('#'))
177             newtable.writelines(alllines[1:])
178             newtable.close()
179             return os.path.join(self.work_dir, 'temp_format.table')
180         else:
181             return tablepath
182 
183     def run_otu_network_py(self):
184         """
185         运行calc_otu_network.py
186         """
187         real_otu_path = self.formattable(self.otu_table)
188         cmd = self.config.SOFTWARE_DIR + '/program/Python/bin/python '
189         cmd += self.cmd_path
190         cmd += ' -i %s -o %s' % (real_otu_path, self.work_dir + '/otu_network')
191         if self.option('envtable').is_set:
192             cmd += ' -m %s' % (self.env_table)
193         self.logger.info('开始运行calc_otu_network生成OTU网络并进行计算')
194         
195         try:
196             subprocess.check_output(cmd, shell=True)
197             self.logger.info('OTU_Network计算完成')
198         except subprocess.CalledProcessError:
199             self.logger.info('OTU_Network计算失败')
200             self.set_error('运行calc_otu_network.py失败')
201         allfiles = self.get_filesname()
202         for i in range(len(self.out_files)):
203             self.linkfile(allfiles[i], self.out_files[i])
204         self.end()
205 
206     def linkfile(self, oldfile, newname):
207         """
208         link文件到output文件夹
209         :param oldfile: 资源文件路径
210         :param newname: 新的文件名
211         :return:
212         """
213         newpath = os.path.join(self.output_dir, newname)
214         if os.path.exists(newpath):
215             os.remove(newpath)
216         os.link(oldfile, newpath)
217 
218     def get_filesname(self):
219         files_status = [None, None, None, None, None, None, None]
220         for paths,d,filelist in os.walk(self.work_dir + '/otu_network'):
221             for filename in filelist:
222                 name = os.path.join(paths, filename)
223                 for i in range(len(self.out_files)):
224                     if self.out_files[i] in name:
225                         files_status[i] = name
226         for i in range(len(self.out_files)):
227             if not files_status[i]:
228                 self.set_error('未知原因,结果文件生成出错或丢失')
229         return files_status
View Code
  1 # -*- coding: utf-8 -*-
  2 # __author__ = 'JieYao'
  3 import os
  4 import argparse
  5 from biocluster.config import Config
  6 import shutil
  7 import networkx
  8 
  9 def make_env_table(inFile, outFile):
 10     with open(inFile, "r") as tmp_file:
 11         samples_name = tmp_file.readline().rstrip().split('	')
 12     with open('group.txt' , "w") as tmp_file:
 13         tmp_file.write("#sample	group
")
 14         for i in range(1,len(samples_name)):
 15             tmp_file.write(samples_name[i]+"	STD
") 
 16     return './group.txt'
 17 
 18 parser = argparse.ArgumentParser(description='输入OTU表格,生成OTU网络信息')
 19 parser.add_argument('-i', "--otu_matrix", help="输入的OTU表", required = True)
 20 parser.add_argument('-o', "--output", help="输出文件位置", required = True)
 21 parser.add_argument('-m', "--env_table", help="样本分类表", required = False)
 22 args = vars(parser.parse_args())
 23 
 24 flag = False
 25 inFile = args["otu_matrix"]
 26 outFile = args["output"]
 27 if not args["env_table"]:
 28     env_table = make_env_table(inFile, outFile)
 29     flag = True
 30 else:
 31     env_table = args["env_table"]
 32 if os.path.exists(outFile):
 33     shutil.rmtree(outFile)
 34     
 35 """
 36 执行make_otu_network.py 计算otu网络的相关信息并生成文件
 37 完成后由于make_otu_network.py生成的是一个文件夹,使用os和shutil的命令将文件全部移动到输出路径下
 38 """
 39 command = Config().SOFTWARE_DIR + '/program/Python/bin/python '
 40 command += Config().SOFTWARE_DIR + '/program/Python/bin/make_otu_network.py'
 41 command += ' -i %s -o %s -m %s' %(inFile, outFile, env_table)
 42 os.system(command)
 43 if flag:
 44     os.remove("./group.txt")
 45 for paths,d,filelist in os.walk(outFile):
 46     for filename in filelist:
 47         name = os.path.join(paths, filename)
 48         if "reduced" in name:
 49             os.remove(name)
 50         elif "/otu_network/" in name:
 51             shutil.move(name, outFile)
 52 shutil.rmtree(outFile + '/otu_network')
 53 for paths,d,filelist in os.walk(outFile):
 54     for filename in filelist:
 55         name = os.path.join(paths, filename)
 56         if "props" in name:
 57             os.remove(name)
 58 
 59 """
 60 根据node表建立{节点名字---节点编号}的字典
 61 """
 62 node_name = [""]
 63 node_dictionary = {}
 64 with open(outFile + '/real_node_table.txt', "r") as node_file:
 65     informations = node_file.readlines()
 66     for i in range(1, len(informations)):
 67         tmp = informations[i].rstrip().split("	")
 68         node_dictionary[tmp[0]] = i
 69         node_name += [tmp[0]]
 70 """
 71 开始使用Networkx包建立图
 72 计算OTU网络的属性信息
 73 """
 74 G = networkx.Graph()
 75 with open(outFile + "/real_edge_table.txt" , "r") as edge_file:
 76     informations = edge_file.readlines()
 77     for i in range(1, len(informations)):
 78         tmp = informations[i].rstrip().split("	")
 79         G.add_edge(node_dictionary[tmp[0]], node_dictionary[tmp[1]], weight = eval(tmp[2]))
 80 
 81 
 82 """
 83 用实践测试单独对Sample或者是OTU构图的构图方法,
 84 结果证明这样的构图出来的结果基本上Sample是完全图,
 85 OTU单独构图的意义则不大,所以这种想法……失败了。
 86 """
 87 """
 88 H = networkx.Graph()
 89 with open(outFile + "/real_node_table.txt" , "r") as node_file:
 90     informations = node_file.readlines()
 91     for i in range(1,len(informations)):
 92         tmp = informations[i].rstrip().split("	")
 93         if tmp[2] == "otu_node":
 94             break
 95 position = i
 96 for i in range(position):
 97     for j in range(position):
 98         H.add_edge(i,j,weight=0)
 99         for k in range(position,len(G)):
100             if G.get_edge_data(i,k) and G.get_edge_data(j,k):
101                 H.edge[i][j]['weight'] += min(G.edge[i][k]['weight']+G.edge[j][k]['weight'])
102         if H.edge[i][j]['weight'] == 0:
103             H.remove_edge(i,j)
104 
105 minx = 32767
106 for i in range(position):
107     for j in range(position):
108         if (i in H)and(j in H)and(H.get_edge_data(i,j)):
109             minx = min(minx, H.edge[i][j]['weight'])
110 
111 for i in range(position):
112     for j in range(position):
113         if (i in H)and(j in H)and(H.get_edge_data(i,j)):
114             H.edge[i][j]['weight'] -= minx
115             if H.edge[i][j]['weight'] <=0:
116                 H.remove_edge(i,j)
117 print H.edges()
118 
119 H = networkx.Graph()
120 with open(outFile + "/real_node_table.txt" , "r") as node_file:
121     informations = node_file.readlines()
122     for i in range(1,len(informations)):
123         tmp = informations[i].rstrip().split("	")
124         if tmp[2] == "otu_node":
125             break
126 position = i
127 for i in range(position,len(G)):
128     for j in range(position,len(G)):
129         H.add_edge(i,j,weight=0)
130         for k in range(position):
131             if G.get_edge_data(i,k) and G.get_edge_data(j,k):
132                 H.edge[i][j]['weight'] += 1
133         if H.edge[i][j]['weight'] == 0:
134             H.remove_edge(i,j)
135 print len(H)
136 print len(H.edges())
137 print H.edges()
138 
139 minx = 32767
140 for i in range(position,len(G)):
141     for j in range(position,len(G)):
142         if (i in H)and(j in H)and(H.get_edge_data(i,j)):
143             minx = min(minx, H.edge[i][j]['weight'])
144 
145 for i in range(position):
146     for j in range(position):
147         if (i in H)and(j in H)and(H.get_edge_data(i,j)):
148             H.edge[i][j]['weight'] -= minx
149             if H.edge[i][j]['weight'] <=0:
150                 H.remove_edge(i,j)
151 """
152 
153 """3计算属性表,分本3"""
154 
155 #节点度中心系数,表示节点在图中的重要性
156 Degree_Centrality = networkx.degree_centrality(G)
157 #节点距离中心系数,值越大表示到其他节点距离越近,中心性越高
158 Closeness_Centrality = networkx.closeness_centrality(G)
159 #节点介数中心系数,值越大表示通过该节点的最短路径越多,中心性越高
160 Betweenness_Centrality = networkx.betweenness_centrality(G)
161 with open(os.path.join(args["output"], "network_centrality.txt"), "w") as tmp_file:
162     tmp_file.write("Node_ID	Node_Name	Degree_Centrality	")
163     tmp_file.write("Closeness_Centrality	Betweenness_Centrality
")
164     for i in range(1, len(G)+1):
165         tmp_file.write(str(i)+"	"+node_name[i]+"	")
166         tmp_file.write(str(Degree_Centrality[i])+"	")
167         tmp_file.write(str(Closeness_Centrality[i])+"	")
168         tmp_file.write(str(Betweenness_Centrality[i])+"
")
169 
170 
171 #网络传递性,二分图中应该为0,否则有问题
172 Transitivity = networkx.transitivity(G)
173 #网络直径
174 Diameter = networkx.diameter(G)
175 #网络平均最短路长度
176 Average_shortest_path = networkx.average_shortest_path_length(G)
177 with open(os.path.join(args["output"], "network_attributes.txt"), "w") as tmp_file:
178     tmp_file.write("Transitivity:"+str(Transitivity)+"
")
179     tmp_file.write("Diameter:"+str(Diameter)+"
")
180     tmp_file.write("Average_shortest_path_length:"+str(Average_shortest_path)+"
")
View Code
原文地址:https://www.cnblogs.com/neverforget/p/5784957.html