cookbook_数据结构和算法

1.1将数据分解为单独的变量

list_a = [1,2,3,4,5,6,7,8,9]
a,b,c,d,e,f,g,h,i = list_a
print(a,b,c,d,e,f,g,h,i)
#使用相等数量的参数来接收


_,b,c,d,e,f,g,h,_ = list_a 
print(b,c,d,e,f,g,h)
#不要的数据使用一个没有用的变量接收

View Code

1.2从任意长度的可迭代对象中分解元素

使用 * XXX实现

list_a = range(20)
first,*middle,last = list_a
print(first,middle,last)
#使用*来接收任意数量，甚至没有，返回一个list

#当一个元祖内有一个标志位时，一个较好的应用
records = [
    ("foo",1,2),
    ("bar","hello"),
    ("foo",3,4)
]

def do_foo(x,y):
    print("foo",x,y)

def do_bar(s):
    print("bar",s)

for tags,*args in records:
    if tags == "foo":
        do_foo(*args)
    elif tags == "bar":
        do_bar(*args)

View Code

1.3保存最后N个元素

collections.deque()

import collections

#使用collections.deque(maxlen=5)来定义一个固定长度的list,有新数据写入时如果已经达到maxlen，会自动删除最早插入的数据
def search(lines,pattern,history = 5):
    previous_lines = collections.deque(maxlen=history)
    for line in lines:
        if pattern in line:
            yield line,previous_lines
        previous_lines.append(line)

if __name__ =="__main__":
    with open("test.txt","r",encoding="utf8") as f:
        for line,previous in search(f,"python",5):
            for pline in previous:
                print(pline,end="")
            print(line,end="")
            print("-"*20)

#collections.deque使用简介
#一个更加强大的list

queue = collections.deque(["jiao","li",'hao',"yu"])
queue.appendleft("wu")
print(queue)
queue.append("haha")
print(queue)
queue.popleft()
print(queue)
print(queue[4])

View Code

1.4找到最大或最小的N个元素

heapq.nlargest(),heapq.nsmallest()

import heapq

nums = [5,56,7,6,34,2,5,7,6,89,80,-90,0,9,-67,5,45,]

print(min(nums))
print(max(nums))

print(heapq.nlargest(3,nums))
print(heapq.nsmallest(3,nums))

#可支持更加复杂的数据结构

portfolio = [
    {"name":"jiao","age":24},
    {"name":"jsdfo","age":2},
    {"name":"jisd","age":12},
    {"name":"jdo","age":36},
    {"name":"li","age":25},
    {"name":"jgd","age":50},
]

print(heapq.nlargest(3,portfolio,key=lambda s:s['age']))
print(max(portfolio,key=lambda s:s['age']))

View Code

1.5实现优先级队列

heapq.heappush(),heapq.heappop()

import heapq


#列表中实际存一个元组，（-priority,self._index,item）
class PriorityQueue:
    def __init__(self):
        self._queue = []
        self._index = 0

    def push(self,item,priority):
        heapq.heappush(self._queue,(-priority,self._index,item))
        self._index += 1

    def pop(self):
        return heapq.heappop(self._queue)[-1]

    def get(self):
        return self._queue

q = PriorityQueue()
q.push("foo",2)
q.push("sdf",3)
q.push("sfasc",5)
q.push("fdsg",4)
print(q.pop())
print(q.get())

View Code

1.6在字典中将键映射到多个值上

collections.defaultdict(list),collections.defaultdict(set)

import collections

d = collections.defaultdict(list)#自动初始化，不用判断是否存在
d["a"].append(1)
d["a"].append(1)
d["a"].append(1)
d["a"].append(1)
print(d['a'])

View Code

1.7让字典保持有序

collections.OrderedDict()

import collections

d = collections.OrderedDict()#普通字典的两倍，大数据不应该使用
d['foo'] = 1
d["bar"] = 2
d["spam"] = 3
d["gork"] = 4
for i in d:
    print(i)

View Code

1.8与字典有关的计算问题

zip(),min(),sorted().max()

#字典进行大小运算时都是使用key值进行大小比较，而我们一般想要用value值比较，而且还想要得到该值的key

prices = {
    "ACME":23,
    "AAPL":345,
    "IBM":34,
    "FB":24
}

#利用zip,zip返回一个迭代器，只能使用一次

min_price = min(zip(prices.values(),prices.keys()))
print(min_price)

#排序
price_sorted = sorted(zip(prices.values(),prices.keys()))
print(price_sorted)

View Code

1.9在两个字典中寻找相同点

a = {
    "x":2,
    "y":5,
    "z":7
}

b = {
    "x":2,
    "y":8,
    "w":4
}

print(a.keys() & b.keys())#寻找相同的key
print(a.keys() - b.keys())#寻找a中有b中没有的key
print(a.items() & b.items())#寻找相同项

View Code

1.10从序列中移除重复项且保持元素间顺序不变

def dedupe(items,key = None):
    seen = set()
    for item in items:
        val = item if key is None else key(item)
        if val not in seen:
            yield item
            seen.add(val)

View Code