DAVID Web Service

开始是打算用protein gi number,获得他们的go term。 本来想用uniprot来解决,不过想需要convert ID,这其中会导致有的ID对应不正常,一是缺失了;而是一对多,或者多对一。数据至少还要重新整理一次。

后来想到了david,先手动提交数据,而且要是多个物种的,他还不让整体分析,不能一次拿下所有结果,拿下后还需要重新解析一遍。发信问他们,他们也是自己先转ID再处理。

后来看到了david web service,用这个直接搞定,唯一的缺点是直接生成的结果变为文本保存后,不好解析。另外,GOTERM_XX_FAT小木虫上查询说David自己弄的goslim。官方的帮助文档链接失效了。

于是用json重新保存一遍。

import sys
sys.path.append('../')

import logging
import traceback as tb
import suds.metrics as metrics
from tests import *
from suds import *
from suds.client import Client
from datetime import datetime
import json errors
= 0 setup_logging() logging.getLogger('suds.client').setLevel(logging.DEBUG) url = 'http://david.abcc.ncifcrf.gov/webservice/services/DAVIDWebService?wsdl' print 'url=%s' % url # # create a service client using the wsdl. # client = Client(url) # # print the service (introspection) # print client #authenticate user email print client.service.authenticate('XXX@XXX.XXX') #add a list #inputIds = '16077069,16077070,16077074,16077075,16077077,16077081,255767015,255767017,16077099,16077106,16077114,16077118,16077119,16077121' inputIds = '16127995,16127996,16127997,16127998' idType = 'PROTEIN_GI_ACCESSION' listName = 'make_up' listType = 0 print client.service.addList(inputIds, idType, listName, listType) #print client.service.getDefaultCategoryNames() #getChartReport thd=0.1 count = 2 #print client.service.getChartReport(thd, count) #getTermClusterReport overlap=3 initialSeed = 3 finalSeed = 3 linkage = 0.5 kappa = 20 #myresult = client.service.getTermClusterReport(overlap, initialSeed, finalSeed, linkage, kappa) mytable = client.service.getTableReport() #mylist = client.service.getListReport() with open('mytable.txt', 'w') as f: print >> f, mytable mylist1 = [] for item in mytable: tpdict = {} tpdict['gi'] = item['values'][0]['array'][0] tpdict['id'] = item['geneObject']['id'] tpdict['name'] = item['name'] tplist = filter(lambda x: x['category'].startswith('GOTERM'), item['annotationRecords']) for i in tplist: tpdict[i['category']] = i['terms'] mylist1.append(tpdict) f = open('test_json1.txt', 'w') json.dump(mylist1, f, sort_keys = True, indent = 1) f.close()
原文地址:https://www.cnblogs.com/hluo/p/4107543.html