DAVID Web Service

开始是打算用protein gi number，获得他们的go term。本来想用uniprot来解决，不过想需要convert ID，这其中会导致有的ID对应不正常，一是缺失了；而是一对多，或者多对一。数据至少还要重新整理一次。

后来想到了david，先手动提交数据，而且要是多个物种的，他还不让整体分析，不能一次拿下所有结果，拿下后还需要重新解析一遍。发信问他们，他们也是自己先转ID再处理。

后来看到了david web service，用这个直接搞定，唯一的缺点是直接生成的结果变为文本保存后，不好解析。另外，GOTERM_XX_FAT小木虫上查询说David自己弄的goslim。官方的帮助文档链接失效了。

于是用json重新保存一遍。

import sys
sys.path.append('../')

import logging
import traceback as tb
import suds.metrics as metrics
from tests import *
from suds import *
from suds.client import Client
from datetime import datetime
import json

errors = 0

setup_logging()

logging.getLogger('suds.client').setLevel(logging.DEBUG)

url = 'http://david.abcc.ncifcrf.gov/webservice/services/DAVIDWebService?wsdl'
    
print 'url=%s' % url

#
# create a service client using the wsdl.
#
client = Client(url)

#
# print the service (introspection)
#
print client

#authenticate user email 
print client.service.authenticate('XXX@XXX.XXX')

#add a list 
#inputIds = '16077069,16077070,16077074,16077075,16077077,16077081,255767015,255767017,16077099,16077106,16077114,16077118,16077119,16077121'
inputIds = '16127995,16127996,16127997,16127998'
idType = 'PROTEIN_GI_ACCESSION'
listName = 'make_up'
listType = 0
print client.service.addList(inputIds, idType, listName, listType)

#print client.service.getDefaultCategoryNames()

#getChartReport
thd=0.1
count = 2
#print client.service.getChartReport(thd, count)

#getTermClusterReport
overlap=3
initialSeed = 3
finalSeed = 3
linkage = 0.5
kappa = 20 
#myresult = client.service.getTermClusterReport(overlap, initialSeed, finalSeed, linkage, kappa)
mytable = client.service.getTableReport()
#mylist = client.service.getListReport()

with open('mytable.txt', 'w') as f:
    print >> f, mytable
mylist1 = []
for item in mytable:
    tpdict = {}
    tpdict['gi'] = item['values'][0]['array'][0]
    tpdict['id'] = item['geneObject']['id']
    tpdict['name'] = item['name']
    tplist = filter(lambda x: x['category'].startswith('GOTERM'), item['annotationRecords'])
    for i in tplist:
        tpdict[i['category']] = i['terms']
    mylist1.append(tpdict)
f = open('test_json1.txt', 'w')
json.dump(mylist1, f, sort_keys = True, indent = 1)
f.close()