Python高级编程-collections模块（番外篇）

Python高级编程-collections模块（番外篇）

x.1 collections模块介绍

from collections import *
from collections.abc import *
"""
提供了更加高级的数据结构
"""

x.2 tuple的功能

不可变，iterable
拆包 # name, *others = ("coder", 20, 175)
tuple不可变性不是绝对的 # ("name", [20, 175])
tuple比list好的地方

immutable的重要性：

性能优化：指出元素全部为immutable的tuple会作为常量在编译时确定，因此产生了如此显著的速度差异

线程安全

可以作为dict的key

拆包特性

如果要拿C语言来类比，Tuple对应的是struct，而List对应的是array

x.3 nametuple 详解

from collections import nametuple


User = nametuple("User", ["name", "age", "height"])
user = User(name="booby", age=29, height=175)
print(user.age, user.name, user.height)


"""
为什么不用类封装，而用nametuple？
因为使用nametuple会比class少去很多默认属性，更加节约空间。
"""


user_tuple = ("bobby", 29, 175)
user = User(*user_tuple)  # User._make(user_tuple)
print(user._asdict())
name, age, height = user

x.4 defaultdict 功能详解

from collections import defaultdict


# 统计数量
user_dict = {}
users = ["bobby1", "bobby2", "bobby3", "bobby1", "bobby2", "bobby2"]
for user in users:
    if user not in user_dict:
        user_dict[user] = 1
    else:
        user_dict[user] += 1
        
        
print(user_dict)


# 实际上dict中有一个方法setdefault
user_dict = {}
users = ["bobby1", "bobby2", "bobby3", "bobby1", "bobby2", "bobby2"]
for user in users:
    user_dict.setdefault(user, 0)  # 如果键不存在，就会设置默认值，性能更高，少了一次查
    user_dict[user] += 1
    
    
print(user_dict)


# 代码可否再简洁，使用defaultdict
default_dict = defaultdict(int)  # 传入一个可调用对象
users = ["bobby1", "bobby2", "bobby3", "bobby1", "bobby2", "bobby2"]
for user in users:
    default_dict[user] += 1
    

print(default_dict)


"""
defaultdict如何实现？
使用__missing__魔法函数
"""

x.5 deque 功能详解

"""
双端队列
线程安全，list不是线程安全
"""

from collections import deque


# 只能队列尾部操作
user_list = ["bobby", "bobby1"]
user_name = user_list.pop()
print(user_name, user_list)


# 使用双端队列
user_list = deque(("bobby1", "bobb2"))
user_list_dict = deque(
	{
        "bobby1": 1,
        "bobby2": 2
    }
)
print(user_list)
print(user_list_dict)
user_list.appendleft("bobby8")
user_list_copy = user_list.copy()  # 浅拷贝只是拷贝元素，可变元素会直接指向
print(id(user_list), id(user_list_copy))


class deque(MutableSequence[_T], Generic[_T]):
    @property
    def maxlen(self) -> Optional[int]: ...
    def __init__(self, iterable: Iterable[_T] = ...,
                 maxlen: int = ...) -> None: ...
    def append(self, x: _T) -> None: ...
    def appendleft(self, x: _T) -> None: ...
    def clear(self) -> None: ...
    if sys.version_info >= (3, 5):
        def copy(self) -> deque[_T]: ...
    def count(self, x: _T) -> int: ...
    def extend(self, iterable: Iterable[_T]) -> None: ...
    def extendleft(self, iterable: Iterable[_T]) -> None: ...
    def insert(self, i: int, x: _T) -> None: ...
    def index(self, x: _T, start: int = ..., stop: int = ...) -> int: ...
    def pop(self, i: int = ...) -> _T: ...
    def popleft(self) -> _T: ...
    def remove(self, value: _T) -> None: ...
    def reverse(self) -> None: ...
    def rotate(self, n: int) -> None: ...

x.6 Counter功能详解

"""
用来做统计
"""

from collections import Counter


users = ["bobby1", "bobby2", "bobby3", "bobby1", "bobby2", "bobby2"]
user_counter = Counter(users)
counter_str = Counter("dffdfdfd")
counter_str2 = Counter("dfdfdfdf")
user_counter.update("gwdesd")
user_counter.update(counter_str2)  # 任意可迭代对象
print(counter_str)
print(user_counter)


# top n问题  headq 堆数据结构
print(user_counter.most_common(2))  # 返回前两名统计结果

x.7 OrderedDict 功能详解

"""
有序字典
"""

from collections import OrderedDict


user_dict = dict()  # Python3中字典默认有序的，Python2是无需的
user_dict["b"] = "bobby2"
user_dict["a"] = "bobby1"
user_dict["c"] = "bobby3"
print(user_dict)


user_dict = OrderedDict()
user_dict["b"] = "bobby2"
user_dict["a"] = "bobby1"
user_dict["c"] = "bobby3"
print(user_dict)
print(user_dict.pop("a"))  # pop方法必须传一个key
print(user_dict.popitem())  # 返回一个元组(key, val)
print(user_dict.move_to_end("b"))

x.8 ChainMap 功能

"""
用于连接dict
"""


from collections import ChainMap


user_dict1 = {"a":"bobby1", "b":"bobby2"}
user_dict2 = {"c":"bobby2", "d":"bobby3"}
for key,value in user_dict1.items():
    print(key, value)
for key, value in user_dict2.items():
    print(key, value)
    
    
new_dict = ChainMap(user_dict1, user_dict2)
new_dict.new_child({"aa":"aa", "bb":"bb"})  # 动态添加
print(new_dict["c"])
print(new_dict.maps)
new_dict.maps[0]["a"] = "bobby"  # 只是指向对应字典，并没有完全复制
for key, value in new_dict.items():
	print(key, value)

    
# 有重复怎么办？   
user_dict1 = {"a":"bobby1", "b":"bobby2"}
user_dict2 = {"b":"bobby2", "d":"bobby3"}
new_dict = ChainMap(user_dict1, user_dict2)  # 只取重复值第一个
for key, value in new_dict.items():
	print(key, value)