一些模塊的用法

包

包的作用：
当模块内部函数过多，为了方便管理模块，把一个模块划分成多个模块，但是又不能改变导入方式
把多个模块放入一个包中，未来导包就是__init__

什麽是包

包就是模塊

爲什麽要有包

1.包 == 模块，包拿来导入用的

2.包是含有__init__.py的文件夹；导包就是导入__init__

3.包一定是被当作模块文件导入，模块文件 m1.py/m2.py 的搜索路径以执行文件包的介绍.py 路径为准

time模塊

時間相關，提供了三種不同類型的時間（時間戳），三種不同類型的時間可以相互轉換

import time

print(time.time())  # 时间戳形式

# 格式化时间
print(time.strftime('%Y-%m-%d %X'))

# 结构化时间
print(time.localtime())


# 结构化时间 --》 格式化时间
struct_time = time.localtime(3600*24*365)
print(time.strftime('%Y-%m-%d %X',struct_time))


# 格式化时间 --》 结构化时间
format_time = time.strftime('%Y-%m-%d %X')
print(time.strptime(format_time,'%Y-%m-%d %X'))


# 结构化时间 --》 时间戳
struct_time = time.localtime(3600*24*365)
print(time.mktime(struct_time))

# 时间戳 --》 结构化时间
time_stamp = time.time()
print(time.localtime(time_stamp))



# (*******)
time.time()
time.sleep(1)

datetime模塊

# datetime模块：时间的加减
import datetime

now = datetime.datetime.now()
print(now)

# 默认3天
print(now + datetime.timedelta(3))
# 加3周
print(now + datetime.timedelta(weeks=3))
# 加3小时
print(now + datetime.timedelta(hours=3))
# 减3小时
print(now - datetime.timedelta(hours=3))
print(now + datetime.timedelta(hours=-3))

print(now.replace(year=1949, month=10, day=1, hour=10, minute=1, second=0, microsecond=0))

random模塊

random模块：随机数

import random
# 掌握
# 0-1
print(random.random())
# [1-3]
print(random.randint(1, 3))
# 打乱
lt = [1, 2, 3]
random.shuffle(lt)
print(lt)
# 随机选择一个
print(random.choice(lt))
# 只随机一次  --> 梅森旋转算法
import time
# random.seed(time.time())
# random.seed(111111111111)
print(random.random())
# 了解
print(random.sample([1, 'a', 'c', 2, 3, 4], 2))

hashlib與hmac模塊

hashlib模塊：可對字符加密

hmac模塊：對字符加密，並加上密鑰

hashlib有叠加性

m=hashlib.md5()
m.update(b'say')
m.update(b'hello')# 981fe96ed23ad8b9554cfeea38cd334a
#和
m.update(b'sayhello')# 981fe96ed23ad8b9554cfeea38cd334a
#print出來的值是一樣的

手机号/生日/性别/qq账号/以前的密码/ --》挖矿（算法）

hash_pwd = '0562b36c3c5a3925dbe3c4d32a4f2ba2'

pwd_list = [
    'hash3714',
    'hash1313',
    'hash94139413',
    'hash123456',
    '123456hash',
    'h123ash',
]

for pwd in pwd_list:
    m = hashlib.md5()
    m.update(pwd.encode('utf8'))
    res = m.hexdigest()
    if res == hash_pwd:
        print(f'获取密码成功:{pwd}')

hmac模塊可以用於破解密碼

import hmac

m = hmac.new(b'maerzi')
m.update(b'hash123456')  # f82317e44545b0ab087109454814b5c4
print(m.hexdigest())

m = hmac.new(b'sdfjhjk2394879ul%$$Y#($&')
m.update(b'hash123456')  # 2a70fd0f13cb49357f40d326a4e071a2
print(m.hexdigest())

pwd_list = [
    'hash3714',
    'hash1313',
    'hash94139413',
    'hash123456',
    '123456hash',
    'h123ash',
]

requests模塊

requests模塊就是爬蟲：從網頁爬取數據

url-->一個特定網站-->永不重複

一般與re模塊一起使用

import requests
import re
res=requests.get("http://www.baidu.com")
print(res.text)

# .*?代表所有类型都要
resb=re.findall('')

爬取段子網
 import re
 import requests

 response = requests.get('https://ishuo.cn')
 data = response.text

 res = re.findall('<div class="content">(.*?)</div>|</span><a href="/subject/.*?">(.*?)</a>', data)

 with open('duanzi_new.txt', 'w', encoding='utf8') as fw:
     for i in res:  # type:str
         print(i)
         if i[1]:
             fw.write(i[1] + ':' + '

')
         if i[0]:
             if i[0].startswith('<ul>'):
                 continue
             fw.write(i[0] + '
')

typing模塊

typing模塊：與函數聯用，控制函數參數的數據類型，提供了基礎數據類型之外的數據類型

lt=[1,2,3,4]

print(type(lt) is list)

from typing import Iterable,Iterator,Generator



#print(lt == Iterable)#False

def func(x:int,lt:Iterable)->list:

      return[1,2,3]

func(10,'123123')

re模塊

re模塊：去字符串找符合某種特點的字符串

import re

# s = '去字符串找符合某种特点的字符串'
#
# res = re.findall('', s)
# print(res)


# 元字符

s = 'abcdabc'
#    abc
#        abc
#     bc  bc

# ^：以...开头
res = re.findall('^ab', s)
print(res)
res = re.findall('^bc', s)
print(res)
# $: 以..结尾
s = 'abcdabc'
res = re.findall('bc$', s)
print(res)

# .: 任意字符
s = 'abc红abc'
res = re.findall('abc.', s)
print(res)

# d: 数字
s = 'skld2342ljk'
res = re.findall('d', s)
print(res)

# w: 非空，数字字母下划线
s = 'skld_23 42ljk'
res = re.findall('w', s)
print(res)

# s：空，空格/	/

s = 'skld_23 42ljk'
res = re.findall('s', s)
print(res)

# D: 非数字
s = 'skld2342ljk'
res = re.findall('D', s)
print(res)

# W: 空
s = 'skld_23 42ljk'
res = re.findall('W', s)
print(res)

# S：非空
s = 'skld_23 42ljk'
res = re.findall('S', s)
print(res)

# +: 前面的一个字符至少1个
s = 'abcddddd abcd abc'
print(re.findall('abcd+', s))

# ？：前面的一个字符0-1个
s = 'abcddddd abcd abc'
print(re.findall('abcd?', s))

# *：前面的一个字符至少0个
s = 'abcdddddddddddddddddd abcd abc'
print(re.findall('abcd*', s))

# []: 中括号内的都可以
s = 'abc bbc cbc dbc'
print(re.findall('[abc]bc', s))

# [^]: 中括号的都不可以
s = 'abc bbc cbc dbc'
print(re.findall('[^abc]bc', s))

# |：或
s = 'abc bbc dbc'
print(re.findall('abc|bbc', s))

# {2}:前面的字符2个

s = 'abccabc abccc'
print(re.findall('abc{2}', s))

# {1,2}:前面的字符2个

s = 'abccabc abccc'
print(re.findall('abc{1,2}', s))

# 贪婪模式

# .（任意字符）*（0-无穷个）

s = 'abcdefgbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbg'
print(re.findall('a.*g', s))

# 非贪婪模式（*******）

# .（任意字符）*（0-无穷个）？（让他进入非贪婪模式）
s = 'abcdefgbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbg'
print(re.findall('a.*?g', s))

# bug
# .（任意字符）*（0-无穷个）？（让他进入非贪婪模式）
s = 'abcdefg'
print(re.findall('.*?', s))

# 了解：特殊构造


# a(?=d) ：a后面是数字，但是不要数字，不消耗字符串内容
s = 'a123 aaaa a234 abc'
#    a1    aa
#           aa
#            aa a2    ab
print(re.findall('a(?=d)', s))
print(re.findall('a(?=w)', s))

# 匹配邮箱:
s = '#@#@#@nickchen121@163.com$$$$////nick@qq.com$$#$#$[]]2287273393@162.com@$2423423lksdlfj#'
# w(字母/数字/下划线)+(0-无穷个)@ w(字母/数字/下划线)+(0-无穷个).com
print(re.findall('w+@w+.com', s))

# 函数


## compile
s = 'abcd abcddd abc'
# res = re.compile('abcd*')
email_pattern = re.compile('w+@w+.com')
phone_patter = re.compile('d{13}')
print(re.findall(email_pattern, s))

print(re.findall('abcd*', s))

# ## match:  从开头找一个，找得到就不找了 ；找不到报错 --》
# s = 'ab abcddd abc'
# res = re.match('abcd*', s)
# print(res.group())

## search： 从字符串找一个，就不找了
s = 'ab abcddd abc'
res = re.search('abcd*', s)
print(res.group())

## split
s = 'ab23423abcddd234234abcasdfjlasjdk234l23lk4j2kl34kl25k3j2kl3j5lkj'
print(re.split('d+', s))

## sub == replace
s = 'ab23423abcddd234234abcasdfjlasjdk234l23lk4j2kl34kl25k3j2kl3j5lkj'
print(re.sub('d+', ' ', s))

## subn --> 替换了多少次
s = 'ab23423abcddd234234abcasdfjlasjdk234l23lk4j2kl34kl25k3j2kl3j5lkj'
print(re.subn('d+', ' ', s))

# 补充（非常有用）

## 修饰符 --> re.S会让.匹配换行符（*****）
s = '''abc
abcabc*abc
'''

# .不匹配换行
print(re.findall('abc.abc', s))  # ['abc*abc']
print(re.findall('abc.abc', s, re.S))  # ['abc
abc', 'abc*abc']

## 分组 --> 只要括号里的（*****）
s = 'abc abcd abcdd'
print(re.findall('a(.)c(d)', s))

## 有名分组(了解)
s = 'abc abcd abcdd'
print(re.search('a(?P<name>.)c(?P<name2>d)', s).groupdict())

# 超高级用法
s = 'abc123abc123'  # c123a
print(re.sub('c(d+)a', ' ', s))
print(re.sub('c(?P<name1>d+)a', ' g<name1> ', s))  # g<name1>这个东西不能替换掉

# 以下必须得记住

# .*?
# 贪婪和非贪婪
# findall
# re.S
# match和sarch的区别
# 分组
# 有名分组：给分组加名字


# 哪些做了解

# 杂七杂八的元字符
# 特殊构造元字符
# 特殊修饰符