No.008-Python-学习之路-Day5-random|os|sys|shutil|shelve|xml|PyYAML|ConfigParser|hashlib|RE

Random模块

模块主要函数

>>> import random
>>> random.random()  # 0-1内的随机浮点数
0.06052892434757606
>>> random.uniform(1,3) # 1-3范围内的随机浮点数
>>> random.uniform(3,1) # 3-1也是可以的
1.0158647412645498
>>> random.randint(1,3) # 1-3范围内的随机整数
1
>>> random.randrange(0,100,2) # 类似于从rang()取出一个值
32
>>> random.choice('学习Python') # 从一个序列中获取一个随机元素
'习'
>>> random.choice(['every','cloud','has', 'a', 'silver','lining'])
'a'
>>> s =[1,2,5,123123,132,1123] #对一个序列进行随机排序并返回
>>> random.shuffle(s)
>>> s
[1, 123123, 132, 5, 2, 1123]
>>> l = [1,2,3,4,5,6,7]  # 对一个序列进行随机的切片
>>> random.sample(l, 4)
[1, 4, 6, 3]

综合举例-验证码生成

def check_code(num):
    '''
    create a [num] long check code by random()
    :param num:how long check code will be created
    :return:random check code in <range 0-9,a-z,A-Z>
    '''
    import random
    code = ''
    if num and str(num).isdigit():
        # 生成字符串的长度
        for i in range(int(num)):
            # 随机选择类型
            choice = random.randrange(3)
            if choice == 0:
                # 随机的数字
                code += str(random.randint(0,9))
            elif choice == 1:
                # 随机的大写字母
                code += chr(random.randint(65, 90))
            elif choice == 2:
                # 随机的小写字母
                code += chr(random.randint(97, 122))
        return code
    else:
        return False


print(check_code("aa"))
print(check_code(6))

os模块

os 模块提供了非常丰富的方法用来处理文件和目录,常用的方法如下：

import os 

#目录相关操作
os.getcwd() # 获取当前目录
os.chdir(dirname) # 切换工作目录到指定目录
os.mkdir(dirname) # 非递归创建目录
os.rmdir(dirname) # 非递归删除目录
os.makedirs(dirname) # 递归创建目录
os.removedirs(dirname) # 递归删除目录
os.listdir(dirname) # 列出目录下所有内容，list对象
os.curdir # 返回当前目录
os.pardir # 返回父级目录
os.remove(path) # 删除文件
os.rename('oldname', 'newname') # 重命名文件
os.stat(path) # 获取文件信息
# 系统相关信息获取
os.sep # 获取路径分隔符，win的"\",linux的"/"
os.linesep # 获取换行符
os.pathsep # 获取path的分隔符，win的";",linux的":"
os.environ # 获取环境变量
os.name # 获取系统的类型，win是ns，linux是posix
# 针对系统执行命令
os.system("command") # 执行命令，返回0,1成功或失败
os.popen("command") # 执行命令，将命令获取的值存为一个文件对象，可使用read
(status, output) = commands.getstatusoutput("command") # 如果想同时获得以上两种结果，可以使用该条命令；
# 路径的相关信息的获取
os.path.abspath(path)  # 根据路径生成绝对路径
os.path.split(path)  # 将路径分割为一个tuple(dir, file)，不考虑路径是否存在
os.path.dirname(path) # 返回dir，不考虑路径是否存在
os.path.basename(path) # 返回文件名，不考虑路径是否存在
os.path.exists(path) # 判断是否存在
os.path.isabs(path) # 判断是不是绝对路径
os.path.isfile(path) # 判断是不是文件
os.path.isdir(path) # 判断是不是目录
os.path.join(path1, path2) # 将两个目录合并成一个
os.path.getatime(path) # 获取文件的存取时间
os.path.getmtime(path) # 获取文件的修改时间

sys模块

Python中的sys模块提供访问<由解释器使用或维护的变量>的接口，并提供一些函数用来和解释器进行交互，操控Python的运行时的系统环境；

sys.argv

# test.py
import sys
print(sys.argv) # 命令行传参数使用，生成一个含有脚本名及相应参数的list
# 运行
>python test.py 1 2 3 4 5
['test.py', '1', '2', '3', '4', '5']

sys.exit([arg])

# 程序执行的返回值，返回并结束程序
argv = sys.argv
ip = os.popen("dig +time=3 +short {} @{}".format(argv[1],argv[2])).read().strip()
now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time()))
if argv[1] == "xxx.xx.com":
    if ip == "172.20.xx.xx":
        print("1")
       # with open("/tmp/uad_dns_wrong.log", "a") as f:
       #     f.write("[{}] {}
".format(now, ip))
        sys.exit(1)
    else:
        print("0")
        with open("/tmp/uad_dns_wrong.log", 'a') as f:
            f.write("[{}] {}
".format(now, ip))
        sys.exit(0)
else:
    print("2")
    sys.exit(2)

sys.stdin() sys.stdout(), stderr()

# stdin
str1 = sys.stdin.readline() # 类似于input，会读最后的换行符；
str1 = sys.stdin.readline().strip() # 去除方法1
str1 = sys.stdin.readline()[:-1] # 去除方法2

# stdout
在python中调用print()时，事实上调用了sys.stdout.write(obj+'
')

# stderr
# 错误输出内容，待续

其他一些方法属性

sys.version # 获取Python解释器程序的版本信息
sys.maxint  # 最大的int值
sys.path      # 返回模块的搜索路径，初始化时使用PYTHONPATH环境变量值
sys.platform # 返回操作系统平台名称

进度条程序举例

import sys
import math
import time

def bar(portion, total): # 参数，当前，总共
    part = total / 50
    count = math.ceil(portion/part)
    # sys.stdout.write('
') # 从行开头从写
    sys.stdout.write(('[%-50s]%.2f%%' % (('#' * count), portion / total * 100))) # %-*s 左对齐，宽度50，%.2f浮点数2位
    sys.stdout.flush() # 缓存刷新

    if portion >= total:
        sys.stdout.write('
')
        return True


portion = 0
total = 10000
while True:
    portion += 100
    time.sleep(0.1)
    end = bar(portion, total)
    if end:break
print("OK")

View Code

shutil模块

高级的文件、文件夹、压缩包处理模块,-->减少调用shell命令的方式

shutil.copyfileobj(fsrc, fdst, [,length])

# 将文件内容拷贝到另外一个文件，可以部分
# 参数需为文件对象
import shutil
f1 = open("本节课笔记.txt", 'r', encoding="utf-8")
f2 = open("本节课笔记2.txt", 'w', encoding="utf-8")
shutil.copyfileobj(f1, f2)

shutil.copyfile(fsrc, fdst, [,length])

# 将文件内容拷贝到另外一个文件，可以部分
# 参数需为文件名
import shutil
shutil.copyfile("本节课笔记.txt", "本节课笔记3.txt")

shutil.copymode(fsrc, fdst)

# 将文件的权限copy到另外一个文件，仅权限至目标文件，用户，组及内容不copy
# 参数为文件名
import shutil
shutil.copymode("本节课笔记.txt", "本节课笔记3.txt")

shutil.copystat(fsrc, fdst)

# 将文件关联的权限和日期copy到另外一个文件
# 参数为文件名
import shutil
shutil.copymode("本节课笔记.txt", "本节课笔记3.txt")

shutil.copy(fsrc, fdst)

# 拷贝文件和权限
# 相当于
copyfile()
copymode()

shutil.copy(fsrc, fdst)

# 拷贝文件和权限和一些属性
# 相当于
copyfile()
copystat()

shutil.copytree(fsrc, fdst)

# 递归copy文件
# 即可以copy目录

shutil.rmtree(fsrc, fdst)

# 删除目录

shutil.move(fsrc, fdst)

# 移动文件

shutil.make_archive(base_name, format,...)

# 创建压缩包并返回文件路径，例如:zip、tar
# base_name：压缩包的文件名，也可以是压缩包的路径；
# format：压缩包类型，zip, tar, bztar, gztar
# root_dir：要压缩的文件夹路径（默认当前目录）
# owner：用户， 默认当前用户
# group：组，默认当前组
# logger：用于记录日志，通常是logging.Logger对象
# 其实是使用

import shutil
shutil.make_archive(r"D:my_python", "zip", r"D:资料存放PythonObjects")

# make_archive调用了zipfile及tarfile完成压缩
# make_archive主要针对目录，如果有部分文件压缩可以直接使用压缩模块
import zipfile
import tarfile # tar类似于zipfile，其中write改为add之类的
# 压缩
z = zipfile.ZipFile("ziptest.zip", "w")
z.write("本节课笔记.txt")
z.write("本节课笔记2.txt")
z.write("本节课笔记3.txt")
z.close()
# 解压
z = zipfile.ZipFile("ziptest.zip", "r")
z.extractall()
z.close()

shelve

简述

Shelve是对象持久化保存方法，将对象保存到文件里面，缺省（即默认）的数据存储文件是二进制的；
shelve将对象保存到文件里面，缺省的数据存储文件是二进制的，‘dict-like’对象的调用方式；
anydbm类似于shelve，区别相同于json与pickle，anydbm的key与value必须是字符串，而shelve可以为认证python数据类型；

方法

# 创建或打开一个shelve对象。shelve默认打开方式支持同时读写操作
shelve.open(filename, flag=’c’, protocol=None, writeback=False)
#可选参数flag，默认为‘c’，如果数据文件不存在，就创建，允许读写；可以是: ‘r’: 只读；’w’: 可读写; ‘n’: 每次调用open()都重新创建一个空的文件，可读写

# 同步并关闭shelve对象
shelve.close()

# 可以使用with语句：
with shelve.open('spam') as db:
    db['eggs'] = 'eggs'

实例-序列化数据

import shelve
import time
d = shelve.open("shelve_test") # 打开一个文件
class Test(object):
    def __init__(self, n):
        self.n = n
    def show(self):
        print(self.n)
t1 = Test(123) # 类实例化
time1 = time.ctime(time.time()) # 时间格式数据
name = ['Bruce', "Amadeus", "Lee"] # 列表格式数据
d["test"] = name # 格式化列表
d["t1"] = t1 # 格式化类
d["time"] = time1 # 格式化时间
d.close()

View Code

实例-反序列数据

import shelve
class Test(object):
    def __init__(self, n):
        self.n = n
    def show(self):
        print(self.n)

with shelve.open("shelve_test", flag='r') as d: # 可以使用with方法
    t1 = d.get("t1") # 类似于字典的取值方式
    time = d["time"]
    name = d.get("test")
t1.show()
print(time)
print(name)

View Code

xml

XML 是各种应用程序之间进行数据传输的常用的工具，目前渐渐为JSON取代，下面是一段XML文本：

<Bruce>
    <country name="Bang">
        <rank updated="yes">5</rank>
        <year updated="yes">2018</year>
        <gdppc>5990</gdppc>
        <neighbor direction="E" name="Austria" />
            <old ddd = "aaa">111</old>
    </country>
</Bruce>

View Code

XML的获取

import xml.etree.ElementTree as ET

tree = ET.parse('xmltest.xml')
root = tree.getroot()
print(root) # 根的Element对象
print(root.tag) # 根的标签
# 遍历xml文档
for child in root:
    print(child.tag, child.attrib)
    print("fengefu".center(50,"-"))
    for i in child:
        print(i.tag, i.text, i.attrib) # 标签，内容，属性
# 遍历节点
for node in root.iter('neighbor'):
    print(node.tag, node.text,node.attrib) # 标签，内容，属性

View Code

XML的修改

import xml.etree.ElementTree as ET

tree = ET.parse("xmltest.xml")
root = tree.getroot()
# 修改
for node in root.iter('year'):
    new_year = int(node.text) +1 # 更新值
    node.text = str(new_year)
    node.set("updated", "yes") # 添加属性
tree.write("xmltest.xml")  # 更新原文件
# 删除
for country in root.findall("country"):
    rank = int(country.find('rank').text)
    if rank > 50:
        root.remove(country)
tree.write("xmltest.xml") # 更新原文件

View Code

XML的新建

import xml.etree.ElementTree as ET

new_xml = ET.Element("namelist")
name = ET.SubElement(new_xml, "name", attrib={"enrolled":"yes", "name": "Bruce"})
age = ET.SubElement(name, "age", attrib={"checked":"no"})
sex = ET.SubElement(name, "sex")
age.text = '33'
name2 = ET.SubElement(new_xml, "name", attrib={"enrolled":"no"})
name2.text = "Amadeus"
age = ET.SubElement(name2, "age")
age.text = "19"

et = ET.ElementTree(new_xml)
et.write("test.xml", encoding="utf-8", xml_declaration=True)

ET.dump(new_xml) # 打印生成的格式

tree = ET.parse("test.xml")
print(tree)
root = tree.getroot()
print(root.tag)

for node in root.iter("name"):
    print(node.tag, node.text, node.attrib)

View Code

PyYAML

是一个三方库，用于生成及读取配置文件，官方文档:https://pyyaml.org/wiki/PyYAMLDocumentation；

pyyaml模块在python中用于处理yaml格式数据，主要使用yaml.safe_dump()、yaml.safe_load()函数将python值和yaml格式数据相互转换；

如果想对一个yaml文件中的多块yaml数据进行转换操作，则可以使用yaml.safe_dump_all()、yaml.safe_load_all()函数；

ConfigParser

用于生成和修改常见配置文档，当前模块的名称在python3.x版本中变更为configparser；

生成配置文件

import configparser

config = configparser.ConfigParser() # 生成对象
config["DEFAULT"] = { # 类似于字典赋值
    'ServerAliveInterval': '45',
    'Compression':'yes',
    'CompressionLevel':'9'
}

config['bitbucket.org'] = {}
config['bitbucket.org']['User'] = 'hg'
config['topsecret.server.com'] = {}
topsecret = config['topsecret.server.com']
topsecret["Host Port"] = '50022'
topsecret["ForwardX11"] = 'no'
config['DEFAULT']['ForwardX11'] = 'yes' # 增加字典元素

with open("example.ini", 'w') as configfile:
    config.write(configfile)

代码

[DEFAULT]
serveraliveinterval = 45
compression = yes
compressionlevel = 9
forwardx11 = yes

[bitbucket.org]
user = hg

[topsecret.server.com]
host port = 50022
forwardx11 = no

文件

读取配置文件

>>> import configparser # 导入模块
>>> config = configparser.ConfigParser() # 初始化对象
>>> config.read('example.ini') # 读取文件
>>> config.sections() # 获取group <DEFAULT的默认不打印>
['bitbucket.org', 'topsecret.server.com']

>>> config.options('topsecret.server.com') # 获取option
['host port', 'forwardx11', 'serveraliveinterval', 'compression', 'compressionlevel']

>>> config.items('topsecret.server.com') # 获取items<key与value的元祖>
[('serveraliveinterval', '45'), ('compression', 'yes'), ('compressionlevel', '9'), ('forwardx11', 'no'), ('host port', '50022')]

>>> config.defaults()['compression'] # 获取值，DEFAULT下的值，要这样获取
'yes'
>>> config['bitbucket.org']['user'] # 获取值，类似于字典
'hg'
>>> config.get('topsecret.server.com', 'host port') # 获取值的方式二
'50022'
>>> config.getint('topsecret.server.com', 'host port') # 获取值的方式二
50022

View Code

修改配置文件

import configparser

config = configparser.ConfigParser()
print(config.sections())
config.read('example.ini')

# 删除section
sec = config.remove_section('bitbucket.org') # 返回成功<TRUE>or失败<FALSE>
config.write(open('example.ini', 'w')) # 写入文件
# 删除option
config.remove_option('DEFAULT', 'compressionlevel')
config.write(open('example.ini', 'w'))

# 添加group
sec = config.has_section('bitbucket1.org')
if not sec:
    sec = config.add_section('bitbucket1.org')
config.write(open('example.ini', 'w'))

# 设置option
sec = config.has_option('bitbucket1.org', 'user')
if not sec:
    sec = config.set('bitbucket1.org', 'user', 'Amadeus')
config.write(open('example.ini', 'w'))

View Code

hashlib

用于加密相关的操作，3.x内代替了MD5模块和sha模块，主要提供SHA1,SHA224,SHA256,SHA384,SHA512,MD5算法<这些算法都是基于hash的>；

hash在字典中建立映射关系，目的是为了提高查询速度，具体点这里

相同的内容hash的值是相同的；

正常的加密操作

m = hashlib.md5() # 生成一个对象
m.update("天Hello".encode()) # 需要加密的内容
print(m.hexdigest()) # 直接打印16进制字符串
print(m.digest()) # 打印二进制
m.update(b"It's me") # 叠加的
print(m.hexdigest())

使用hmac进行二次加密

import hmac
h = hmac.new("你么".encode("utf-8"), "你好".encode('utf-8'))
print(h.digest())
print(h.hexdigest())

re模块

常用正则表达式符号：

'.'     默认匹配除
之外的任意一个字符，若指定flag DOTALL,则匹配任意字符，包括换行
'^'     匹配字符开头，若指定flags MULTILINE,这种也可以匹配上(r"^a","
abc
eee",flags=re.MULTILINE)
'$'     匹配字符结尾，或e.search("foo$","bfoo
sdfsf",flags=re.MULTILINE).group()也可以
'*'     匹配*号前的字符0次或多次，re.findall("ab*","cabb3abcbbac")  结果为['abb', 'ab', 'a']
'+'     匹配前一个字符1次或多次，re.findall("ab+","ab+cd+abb+bba") 结果['ab', 'abb']
'?'     匹配前一个字符1次或0次
'{m}'   匹配前一个字符m次
'{n,m}' 匹配前一个字符n到m次，re.findall("ab{1,3}","abb abc abbcbbb") 结果'abb', 'ab', 'abb']
'|'     匹配|左或|右的字符，re.search("abc|ABC","ABCBabcCD").group() 结果'ABC'
'(...)' 分组匹配，re.search("(abc){2}a(123|456)c", "abcabca456c").group() 结果 abcabca456c
 
 
'A'    只从字符开头匹配，re.search("Aabc","alexabc") 是匹配不到的
''    匹配字符结尾，同$
'd'    匹配数字0-9
'D'    匹配非数字
'w'    匹配[A-Za-z0-9]
'W'    匹配非[A-Za-z0-9]
's'     匹配空白字符、	、
、
 , re.search("s+","ab	c1
3").group() 结果 '	'
 
'(?P<name>...)' 分组匹配 re.search("(?P<province>[0-9]{4})(?P<city>[0-9]{2})(?P<birthday>[0-9]{4})","371481199306143242").groupdict("city") 
结果{'province': '3714', 'city': '81', 'birthday': '1993'}

贪婪匹配与最小匹配

re模块默认为贪婪匹配，即匹配最长的，可以使用如下方式匹配最短的；

>>> re.search("a.*?c", "abcabc") # 使用最小匹配
<re.Match object; span=(0, 3), match='abc'>
>>> re.search("a.*c", "abcabc") # 默认贪婪匹配
<re.Match object; span=(0, 6), match='abcabc'>

re模块匹配方法

# re.match意为从头开始匹配，所以自带‘^’字符，A同^  同$，调用group方法取得值
>>> re.match("r.*e$","Bruce Amadeus Lee") # r不是在字符串头，所以未匹配到
>>> re.match("B.*e$","Bruce Amadeus Lee") # B在开头匹配到
<re.Match object; span=(0, 17), match='Bruce Amadeus Lee'>
>>> re.match("^B.*e$","Bruce Amadeus Lee") # 等价于上面表达式,
<re.Match object; span=(0, 17), match='Bruce Amadeus Lee'>

# re.search在字符串内取一个匹配到的值<仅取回一个值>，调用group方法取得之
>>> re.search("r.*e","Bruce Amadeus Lee")
<re.Match object; span=(1, 17), match='ruce Amadeus Lee'>
>>> re.search("(a.*?c){2}", "abcabc") # 分组
<re.Match object; span=(0, 6), match='abcabc'>

# re.findall返回所有的匹配到的值以list形式返回，re.finditer返回的是迭代器
>>> re.findall("u.*?e","Bruce Amadeus Lee")
['uce', 'us Le']
>>> re.findall("u.*?e|r.*?u","Bruce Amadeus Lee")
['ru', 'us Le']

# re.split以正则表达式匹配字符为分隔符
>>> re.split("d+", "ab123cd131ef1124hk1123aa")
['ab', 'cd', 'ef', 'hk', 'aa']

# re.sub匹配到的值替换
>>> re.sub("d+","|", "ab123cd131ef1124hk1123aa")
'ab|cd|ef|hk|aa'
>>> re.sub("d+","|", "ab123cd131ef1124hk1123aa", count=2)
'ab|cd|ef1124hk1123aa'

 #注1: 若匹配成功，match()/search()返回的是Match对象，finditer()返回的也是Match对象的迭代器，获取匹配结果需要调用Match对象的group()、groups或group(index)方法。
 #注2: 匹配中的flag的设置:
　　　　flags=re.I<OGMPRECASE> 忽略大小写
　　　　flags=re.M<MULTLINE> 多行模式，使用本标志后，‘^’和‘$’匹配行首和行尾时，会增加换行符之前和之后的位置；
　　　　flags=re.S<DOTALL> 使 “.” 特殊字符完全匹配任何字符，包括换行；没有这个标志， “.” 匹配除了换行符外的任何字符。

re分组匹配

>>> re.search("(?P<tel>d+)(?P<name>[a-bA-Z]+)", '17752113231Bruce').groupdict()
{'tel': '17752113231', 'name': 'B'}
>>> re.search("(?P<province>[0-9]{4})(?P<city>[0-9]{2})(?P<birthday>[0-9]{4})","371481199306143242").groupdict("city")
{'province': '3714', 'city': '81', 'birthday': '1993'}

end，参考：

http://blog.sina.com.cn/s/blog_15ab3a6e00102yhdp.html

https://www.cnblogs.com/nyist-xsk/p/7978488.html