collections模块

defaultdict

创建一个多值字典，如果我们自己实现

# 我们自己实现
d = {}
for key, value in pairs:
    if key not in d:
        d[key] = []
    d[key].append(value)

# 使用defaultdict
d = defaultdict(list)
for key, value in pairs:
    d[key].append(value)

defaultdict 的一个特征是它会自动初始化每个 key 刚开始对应的值，所以你只需要关注添加元素操作了

from collections import defaultdict

# 保证元素插入顺序
d = defaultdict(list)
d['a'].append(1)
d['a'].append(2)
d['a'].append(3)
print(d)
print(d['a'])

# 去重
d = defaultdict(set)
d['a'].add(1)
d['a'].add(1)
d['a'].add(2)
print(d)
print(d['a'])

defaultdict(<class 'list'>, {'a': [1, 2, 3]})
[1, 2, 3]
defaultdict(<class 'set'>, {'a': {1, 2}})
{1, 2}

OrderedDict

为了能控制一个字典中元素的顺序，你可以使用 collections 模块中的 OrderedDict 类。在迭代操作的时候它会保持元素被插入时的顺序

OrderedDict 内部维护着一个根据键插入顺序排序的双向链表。每次当一个新的元素插入进来的时候，它会被放到链表的尾部。对于一个已经存在的键的重复赋值不会改变键的顺序。

需要注意的是，一个 OrderedDict 的大小是一个普通字典的两倍，因为它内部维护着另外一个链表。所以如果你要构建一个需要大量 OrderedDict 实例的数据结构的时候（比如读取 100,000 行 CSV 数据到一个 OrderedDict 列表中去），那么你就得仔细权衡一下是否使用 OrderedDict 带来的好处要大过额外内存消耗的影响。

from collections import OrderedDict

d = OrderedDict()
d['name'] = 'wangys'
d['age'] = 18
d['sex'] = 'male'
print(d)
for k, v in d.items():
    print(k, v)

Couter

Counter 对象在几乎所有需要制表或者计数数据的场合是非常有用的工具。在解决这类问题的时候你应该优先选择它，而不是手动的利用字典去实现。

from collections import Counter

words = [
    'look', 'into', 'my', 'eyes', 'look', 'into', 'my', 'eyes',
    'the', 'eyes', 'the', 'eyes', 'the', 'eyes', 'not', 'around', 'the',
    'eyes', "don't", 'look', 'around', 'the', 'eyes', 'look', 'into',
    'my', 'eyes', "you're", 'under'
]

word_counts = Counter(words)
top_three = word_counts.most_common(3) # top 3
print(word_counts) 
print(word_counts.items())
print(top_three)
Counter({'eyes': 8, 'the': 5, 'look': 4, 'into': 3, 'my': 3, 'around': 2, 'not': 1, "don't": 1, "you're": 1, 'under': 1})
dict_items([('look', 4), ('into', 3), ('my', 3), ('eyes', 8), ('the', 5), ('not', 1), ('around', 2), ("don't", 1), ("you're", 1), ('under', 1)])
[('eyes', 8), ('the', 5), ('look', 4)]