python

一、 python基础

1. 字符串

### 斜杠转义 和 r 不转义

# 可以转义很多字符，
表示换行，	表示制表符，\表示

#        
print('\	\')
# \	\
print(r'\	\')

# hello,

# world
print(r'''hello,

world''')

### 字符串的修改
a.replace('A', 'a')

### 计算分数增长百分比
s1 = 72
s2 = 85
r = (s2 - s1) / s1 * 100
print('%.1f%%' % r)
>>> 18.1%

### 字符串连接
'Jim' + 'Green' = 'JimGreen'

'%s, %s' % ('Jim', 'Green') = 'Jim, Green'

var_list = ['tom', 'david', 'john']
a = '###'
a.join(var_list) = 'tom###david###john'

2. 变量（字符串、列表）

#### 字符串变量
a = 'ABC'
b = a
b = 'XYZ'
print (a)
# >>> ABC
# 在内存中创建了一个'ABC'的字符串；
# 在内存中创建了一个名为a的变量，并把它指向'ABC'。

#### 列表变量
a = [1, 2, 3]
b = a   # 和java数组一样
b[0] = 9
print (a)
# >>> [9, 2, 3]
# 将b指向了与a的同一个列表。b与a共同指向一个列表“实例”

3. list增删改

#### 列表的增加删除
## 增加 append insert
classmates = ['Michael', 'Bob', 'Tracy']
classmates.append('Adam')
classmates.insert(1, 'Jack')

## 删除 pop
# 删除list末尾的元素，用pop()方法
# 删除指定位置的元素，用pop(i)方法
classmates.pop()
classmates.pop(1)

# 清空列表
classmates.clear()

## 修改
#某个元素替换成别的元素，可以直接赋值给对应的索引位置
classmates[1] = 'Sarah'

#### 下标循环 enumerate
# >>> 0 A
# >>> 1 B
# >>> 2 C
for i, value in enumerate(['A', 'B', 'C']):
    print(i, value)


## 超出列表长度
list = [1, 2, 3]
print (list[4:])
>>> []

4. tuple

#### 元祖

# 定义一个空的tuple
t = ()

# 定义一个只有1个元素的tuple
t = (1)   # 定义的不是tuple，是1这个数
t = (1,)

# 元祖的修改
# 变的不是tuple的元素，而是list的元素。
# tuple一开始指向的list并没有改成别的list
# 指向'a'，就不能改成指向'b'，
# 指向一个list，就不能改成指向其他对象，但指向的这个list本身是可变的
t = ('a', 'b', ['A', 'B'])
t[2][0] = 'X'
t[2][1] = 'Y'
print (t)    # >>> ('a', 'b', ['X', 'Y'])

5. 字典dict增删改查

dic_t = {'Michael': 95, 'Bob': 75, 'Tracy': 85}

#### 增加
dic_t ['blithe'] = 100
print (dic_t ['blithe'])
# >>> 100
dic_t ['blithe'] = 90
print (dic_t ['blithe'])
# >>> 90  会直接修改之前key值的value

#### 条件判断  查找 in 和 get()
# 判断key值是否在字典中
>>> 'Thomas' in dic_t 
>>> False
# 通过dict提供的get()方法，
# 如果key不存在，可以返回None，或者自己指定的value
dic_t .get('Thomas')         # >>>None
dic_t .get('Thomas', -1)     # >>> -1

#### 删除 pop(), del, clear()
# pop 删除字典给定键 key 所对应的值，
# 返回值为被删除的值
dic_t .pop('Bob')
# del 能删单一的元素也能清空字典，
# 清空只需一项操作
del dic_t ['Bob']
del dic_t 
# clear 删除字典内所有元素
dic_t .clear()    # dic_t = {}
# popitem 随机删除字典中的一对键值 并 随机返回还存在的值
dic_t .popitem()

#### 字典不存在的key值
dict = { 'a': 1, 'b': 2 }
print (dict['c'])
>>> KeyError: 'c'

#### 生成空字典的方法
d = {}
d = dict()
d.update([('svk', 'Bratislava'), ('deu', 'Berlin'), ('dnk', 'Copenhagen')])

#### 字典遍历
d = {'a': 1, 'b':2, 'c': 3}
for key in d:
    print (key)

for value in d.values():
    print (value)

for k, v in d.items():
    print (k, v)

#### zip字典生成
def create_dict(lista, listb):
    # print (create_dict(['a', 'b', 'c'], [2, 3, 4]))
    # >>>{'a': 2, 'b': 3, 'c': 4}
    if not isinstance(lista, list):
        return ("lista is not list")
    elif not isinstance(listb, list):
        return ("listb is not list")
    else:
        new_dict = {}
        if  len(lista) == len(listb):
            new_dict = dict( zip( lista, listb) )
        return new_dict 

#### formkey字典生成
# >>>{0: 'x', 1: 'x', 2: 'x'}
dic = dict.fromkeys(range(3), 'x')

#### 二元组列表创建
# >>>{'bar': 3, 'egg': 2, 'spam': 1}
list = [('spam', 1), ('egg', 2), ('bar', 3)]
dic = dict(list)

6. 集合（set）

#### set的增删
# set和dict类似，也是一组key的集合，但不存储value。
# 由于key不能重复，所以，在set中，没有重复的key

### 创建
# 创建一个set，需要提供一个list作为输入集合
# 重复元素在set中自动被过滤
s = set([1, 1, 2, 2, 3, 3])
# >>> {1, 2, 3}

### 增加 add
# 可以重复添加，但不会有效果
s.add(4)
s.add(4)
# >>> {1, 2, 3, 4}

### 删除 remove
s.remove(4)
# >>> {1, 2, 3}

### 两个set可以做数学意义上的交集、并集
s1 = set([1, 2, 3])
s2 = set([2, 3, 4])
print (s1 & s2)
# >>>{2, 3}
print (s1 | s2)
# >>> {1, 2, 3, 4}

二、函数

1. 函数参数

　　参数定义的顺序必须是：必选参数、默认参数、可变参数、命名关键字参数和关键字参数

#### 默认参数
def enroll(name, gender, age=6, city='Beijing'):
    print('name:', name)
    print('gender:', gender)
    print('age:', age)
    print('city:', city)

print (enroll('Sarah', 'F'))
## 与默认参数不符的才需要提供额外的信息
print (enroll('Adam', 'M', city='Tianjin'))

## 默认参数必须指向不变对象
def add_end(L=[]):
    # add_end()
    # add_end()
    # add_end()
    # >>>['END', 'END', 'END']
    L.append('END')
    return L
#修改后 借用None这个不变对象
def add_end(L=None):
    if L is None:
        L = []
    L.append('END')
    return L


#### 可变参数
# 允许传入0个或任意个参数，
# 这些可变参数在函数调用时自动组装为一个tuple
def calc(*numbers):
    sum = 0
    for n in numbers:
        sum = sum + n * n
    return sum


#### 关键字参数 & 命名关键字参数
# 允许你传入0个或任意个含参数名的参数，
# 这些关键字参数在函数内部自动组装为一个dict
# 存在可变参数和不存在可变参数
"""
>>> f1(1, 2)
a = 1 b = 2 c = 0 args = () kw = {}
>>> f1(1, 2, c=3)
a = 1 b = 2 c = 3 args = () kw = {}

>>> f1(1, 2, 3, 'a', 'b')
a = 1 b = 2 c = 3 args = ('a', 'b') kw = {}

>>> f1(1, 2, 3, 'a', 'b', x=99)
a = 1 b = 2 c = 3 args = ('a', 'b') kw = {'x': 99}

>>> f2(1, 2, d=99, ext=None)
a = 1 b = 2 c = 0 d = 99 kw = {'ext': None}
"""
def f1(a, b, c=0, *args, **kw):
    print('a =', a, 'b =', b, 'c =', c, 'args =', args, 'kw =', kw)

def f2(a, b, c=0, *, d, e, **kw):
    # 要限制关键字参数的名字
    # 只接受 d、e为命名关键字参数
    print('a =', a, 'b =', b, 'c =', c, 'd =', d, 'kw =', kw)

def person(name, age, *args, city='city', job, **kw):
    # 如果函数定义中已经有了一个可变参数，
    # 后面跟着的命名关键字参数就不再需要一个特殊分隔符*
    # person('a', 11, 33, job='job', ot= 'ot', ot2= 'ot2'))
    # >>>a 11 (33,) city job {'ot': 'ot', 'ot2': 'ot2'}
    print(name, age, args, city, job, kw)

2. 函数返回值

import math

def move(x, y, step, angle=0):
    nx = x + step * math.cos(angle)
    ny = y - step * math.sin(angle)
    return nx, ny

# 函数可以同时返回多个值，但其实就是一个tuple。
x, y = move(100, 100, 60, math.pi / 6)
r = move(100, 100, 60, math.pi / 6)
print(x, y)    # >>> 151.96152422706632 70.0
print(r)        # >>> (151.96152422706632, 70.0)

三、高级特性

1. 切片（Slice）

　　数列、元祖、字符串均可切片

## 先创建一个0-99的数列
L = list(range(100))

## 取前10个数
# >>> [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
L[:10]

## 取后10个数
# >>> [90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
L[-10:]
# >>> [90, 91, 92, 93, 94, 95, 96, 97, 98]
L[-10:-1]

## 前10个数，每两个取一个
# >>> [0, 2, 4, 6, 8]
L[:10:2]

## 所有数，每5个取一个
# >>> [0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95]
L[::5]

## 只写[:]就可以原样复制一个list
# >>> [0, 1, 2, 3, ..., 99]
L[:]

2. 迭代

　　使用for循环时，只要作用于一个可迭代对象，for循环就可以正常运行

　　判断一个对象是可迭代对象，方法是通过collections模块的Iterable类型判断

>>> from collections import Iterable
>>> isinstance('abc', Iterable) # str是否可迭代
True
>>> isinstance([1,2,3], Iterable) # list是否可迭代
True
>>> isinstance(123, Iterable) # 整数是否可迭代
False

　　判断对象类型：

#### 判断对象类型
# 基本类型都可以用type()判断
"""
>>> import types
>>> def fn():
...     pass
...
>>> type(fn)==types.FunctionType
True
>>> type(abs)==types.BuiltinFunctionType
True
>>> type(lambda x: x)==types.LambdaType
True
>>> type((x for x in range(10)))==types.GeneratorType
True
"""
# 判断一个对象是否可迭代对象
"""
from collections import Iterable
print (isinstance('abc', Iterable))
print (isinstance([1,2,3], Iterable))
"""
# 要获得一个对象的所有属性和方法，可以使用dir()函数
"""
>>> dir('ABC')
['__add__', '__class__',..., '__subclasshook__', 'capitalize', 'casefold',..., 'zfill']
"""

3. 列表生成式

　　可以用来创建list的生成式

# 生成list [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
list(range(1, 11))

# 生成[4, 16, 36, 64, 100]
L = [x * x for x in range(1, 11) if x % 2 == 0]

# 两层循环，可以生成全排列
# >>> ['AX', 'AY', 'AZ', 'BX', 'BY', 'BZ', 'CX', 'CY', 'CZ']
L = [m + n for m in 'ABC' for n in 'XYZ']

### 列出当前目录下的所有文件和目录名
# >>> ['.emacs.d', '.ssh', '.Trash'......'Workspace', 'XCode']
import os
[d for d in os.listdir('.')]

### 使用两个变量来生成list
# >>> ['y=B', 'x=A', 'z=C']
d = {'x': 'A', 'y': 'B', 'z': 'C' }
L = [k + '=' + v for k, v in d.items()]

### if...else 和列表生成式 
# 在一个列表生成式中，for前面的if ... else是表达式，
# 而for后面的if是过滤条件，不能带else，否则报错
# >>> [-1, 2, -3, 4, -5, 6, -7, 8, -9, 10]
L = [x if x % 2 == 0 else -x for x in range(1, 11)]

4. 生成器

在循环的过程中不断推算出后续的元素，就不必创建完整的list，从而节省大量的空间。这种一边循环一边计算的机制，称为生成器：generator。

#### 创建
# 把一个列表生成式的[]改成()，就创建了一个generator
L = [x * x for x in range(10)]
# >>> [0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
g = (x * x for x in range(10))
# >>> <generator object <genexpr> at 0x1022ef630>

# generator保存的是算法，
# 每次调用next(g)，就计算出g的下一个元素的值，
# 直到计算到最后一个元素，没有更多的元素，抛出错误。
'''
>>> next(g)
0
>>> next(g)
1
……
>>> next(g)
81
>>> next(g)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
StopIteration
'''
# 正确的方法是使用for循环，因为generator也是可迭代对象
g = (x * x for x in range(10))
for n in g:
    print(n)

#### 定义generator的另一种方法 yield
## 斐波拉契数列
def fib(max):
    # >>> 1, 1, 2, 3, 5, 8, 13, 21, 34, ...
    n, a, b = 0, 0, 1
    while n < max:
        yield b
        a, b = b, a + b
        n = n + 1
    return 'done'

　　杨辉三角形：

# 杨辉三角形 生成器 generator
def triangles():
    L = [1]  

    while True:
        yield L
        L.append(0)
        L = [ L[i]+ L[i-1] for i in range(len(L))]

def triangles_yh():
    # 期待输出:
    # [1]
    # [1, 1]
    # [1, 2, 1]
    # [1, 3, 3, 1]
    # [1, 4, 6, 4, 1]
    # [1, 5, 10, 10, 5, 1]
    # [1, 6, 15, 20, 15, 6, 1]
    # [1, 7, 21, 35, 35, 21, 7, 1]
    # [1, 8, 28, 56, 70, 56, 28, 8, 1]
    # [1, 9, 36, 84, 126, 126, 84, 36, 9, 1]
    n = 0
    results = []
    for t in triangles():
        results.append(t)
        n = n + 1
        if n == 10:
            break

    for t in results:
        print(t)

    real_res = [
        [1],
        [1, 1],
        [1, 2, 1],
        [1, 3, 3, 1],
        [1, 4, 6, 4, 1],
        [1, 5, 10, 10, 5, 1],
        [1, 6, 15, 20, 15, 6, 1],
        [1, 7, 21, 35, 35, 21, 7, 1],
        [1, 8, 28, 56, 70, 56, 28, 8, 1],
        [1, 9, 36, 84, 126, 126, 84, 36, 9, 1]
    ]
    if results == real_res:
        print('测试通过!')
    else:
        print('测试失败!')

5. 迭代器

　　可以直接作用于for循环的数据类型有以下几种：

　　　　一类是集合数据类型，如list、tuple、dict、set、str等；

　　　　一类是generator，包括生成器和带yield的generator function。

　　这些可以直接作用于for循环的对象统称为可迭代对象：Iterable。

　　可以被next()函数调用并不断返回下一个值的对象称为迭代器：Iterator。

　　可以使用isinstance()判断一个对象是否是Iterator对象

>>> from collections.abc import Iterator
>>> isinstance((x for x in range(10)), Iterator)
True
>>> isinstance([], Iterator)
False
>>> isinstance({}, Iterator)
False
>>> isinstance('abc', Iterator)
False

　　生成器都是Iterator对象，但list、dict、str虽然是Iterable，却不是Iterator。

　　把list、dict、str等Iterable变成Iterator可以使用iter()函数：

>>> isinstance(iter([]), Iterator)
True
>>> isinstance(iter('abc'), Iterator)
True

四、函数式编程

1. 高阶函数： map / reduce

#### map()函数接收两个参数，一个是函数，一个是Iterable，
# map将传入的函数依次作用到序列的每个元素，
# 并把结果作为新的Iterator返回。

def f(x):
    return x * x

r = map(f, [1, 2, 3, 4, 5, 6, 7, 8, 9])
list(r)
# >>> [1, 4, 9, 16, 25, 36, 49, 64, 81]

# 把这个list所有数字转为字符串
list(map(str, [1, 2, 3, 4, 5, 6, 7, 8, 9]))
# >>> ['1', '2', '3', '4', '5', '6', '7', '8', '9']

#### reduce()函数 
# reduce把一个函数作用在一个序列[x1, x2, x3, ...]上，
# 这个函数必须接收两个参数，
# reduce把结果继续和序列的下一个元素做累积计算，其效果就是：
# reduce(f, [x1, x2, x3, x4]) = f(f(f(x1, x2), x3), x4)

# 把序列[1, 3, 5, 7, 9]变换成整数13579
from functools import reduce
def fn(x, y):
    return x * 10 + y

reduce(fn, [1, 3, 5, 7, 9])

# 把str转换为int的函数
def char2num(s):
    digits = {'0': 0, '1': 1, '2': 2, '3': 3, '4': 4, '5': 5, '6': 6, '7': 7, '8': 8, '9': 9}
    return digits[s]

reduce(fn, map(char2num, '13579'))

2. 高阶函数：filter

#### filter()
# filter()也接收一个函数和一个序列。
#和map()不同的是，filter()把传入的函数依次作用于每个元素，然后根据返回值是True还是False决定保留还是丢弃该元素。

## 把一个序列中的空字符串删掉
def not_empty(s):
    return s and s.strip()

list(filter(not_empty, ['A', '', 'B', None, 'C', '  ']))

3. 高阶函数：sorted

#### sorted()
# sorted()函数可以对list进行排序
sorted([36, 5, -12, 9, -21])                   # >>> [-21, -12, 5, 9, 36]
sorted([36, 5, -12, 9, -21], key=abs)    # >>> [5, 9, -12, -21, 36]

# 反向排序，不必改动key函数，可以传入第三个参数reverse=True
sorted(['bob', 'about', 'Zoo', 'Credit'], key=str.lower, reverse=True)

4. 返回函数(闭包)

高阶函数除了可以接受函数作为参数外，还可以把函数作为结果值返回。

返回一个函数时，牢记该函数并未执行，返回函数中不要引用任何可能会变化的变量。

def lazy_sum(*args):
    def sum():
        ax = 0
        for n in args:
            ax = ax + n
        return ax
    return sum

# 当我们调用lazy_sum()时，返回的并不是求和结果，而是求和函数
# 调用lazy_sum()时，每次调用都会返回一个新的函数，即使传入相同的参数
f1 = lazy_sum(1, 3, 5, 7, 9)    # >>> <function lazy_sum.<locals>.sum at 0x101c6ed90>
f2 = lazy_sum(1, 3, 5, 7, 9)
f1<> f2
f1() == f2()                                       # >>> 25

'''
在函数lazy_sum中又定义了函数sum，并且，内部函数sum可以引用外部函数lazy_sum的参数和局部变量，
当lazy_sum返回函数sum时，相关参数和变量都保存在返回的函数中，这称为“闭包（Closure）”
'''

## 闭包数据异常
===============
def count():
    fs = []
    for i in range(1, 4):
        def f():
             return i*i
        fs.append(f)
    return fs

# 返回闭包时牢记一点：返回函数不要引用任何循环变量，或者后续会发生变化的变量。
f1, f2, f3 = count()    # >>> f1() = f2() = f3() = 9
===============
def count():
    def f(j):
        def g():
            return j*j
        return g
    fs = []
    for i in range(1, 4):
        fs.append(f(i)) # f(i)立刻被执行，因此i的当前值被传入f()
    return fs

f1, f2, f3 = count()    # >>> f1() f2() f3() = 1, 4, 9

5. 匿名函数

lambda x：[ 表达式 ]

关键字lambda表示匿名函数，冒号前面的x表示函数参数。

匿名函数有个限制，就是只能有一个表达式，不用写return，返回值就是该表达式的结果。

用匿名函数有个好处，因为函数没有名字，不必担心函数名冲突。此外，匿名函数也是一个函数对象，也可以把匿名函数赋值给一个变量，再利用变量来调用该函数：f = lambda x: x * x

也可以把匿名函数作为返回值返回：

def build(x, y):
    return lambda: x * x + y * y

6. 装饰器

def now():
    print('2015-3-25')

假设我们要增强now()函数的功能，比如，在函数调用前后自动打印日志，但又不希望修改now()函数的定义，这种在代码运行期间动态增加功能的方式，称之为“装饰器”（Decorator）。

本质上，decorator就是一个返回函数的高阶函数。所以，我们要定义一个能打印日志的decorator，可以定义如下：

def log(func):
    def wrapper(*args, **kw):
        print('call %s():' % func.__name__)
        return func(*args, **kw)
    return wrapper

wrapper()函数的参数定义是(*args, **kw)，因此，wrapper()函数可以接受任意参数的调用。

7. 偏函数

functools.partial的作用就是，把一个函数的某些参数给固定住（也就是设置默认值），返回一个新的函数。

当函数的参数个数太多，需要简化时，使用functools.partial可以创建一个新的函数，这个新函数可以固定住原函数的部分参数，从而在调用时更简单。

import functools
int2 = functools.partial(int, base=2)
print （int2('1000000')）                  # 64
print （int2('1000000', base=10)）    # 1000000

五、模块

1. 包和模块

一个abc.py的文件就是一个名字叫abc的模块，一个xyz.py的文件就是一个名字叫xyz的模块。

现在，假设我们的abc和xyz这两个模块名字与其他模块冲突了，于是我们可以通过包来组织模块，避免冲突。方法是选择一个顶层包名，比如mycompany，按照如下目录存放：

mycompany
├─ __init__.py
├─ abc.py
└─ xyz.py

引入了包以后，只要顶层的包名不与别人冲突，那所有模块都不会与别人冲突。现在，abc.py模块的名字就变成了mycompany.abc，类似的，xyz.py的模块名变成了mycompany.xyz。

请注意，每一个包目录下面都会有一个__init__.py的文件，这个文件是必须存在的，否则，Python就把这个目录当成普通目录，而不是一个包。__init__.py可以是空文件，也可以有Python代码，因为__init__.py本身就是一个模块，而它的模块名就是mycompany。

2. 使用模块

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

' a test module '

__author__ = 'Michael Liao'

import sys

def test():
    args = sys.argv
    if len(args)==1:
        print('Hello, world!')
    elif len(args)==2:
        print('Hello, %s!' % args[1])
    else:
        print('Too many arguments!')

if __name__=='__main__':
    test()

第1行和第2行是标准注释，第1行注释可以让这个hello.py文件直接在Unix/Linux/Mac上运行，第2行注释表示.py文件本身使用标准UTF-8编码；

第4行是一个字符串，表示模块的文档注释，任何模块代码的第一个字符串都被视为模块的文档注释；

第6行使用__author__变量把作者写进去，这样当你公开源代码后别人就可以瞻仰你的大名；

3. 作用域

正常的函数和变量名是公开的（public），可以被直接引用，比如：abc，x123，PI等；

类似__xxx__这样的变量是特殊变量，可以被直接引用，但是有特殊用途，比如上面的__author__，__name__就是特殊变量，hello模块定义的文档注释也可以用特殊变量__doc__访问，我们自己的变量一般不要用这种变量名；

类似_xxx和__xxx这样的函数或变量就是非公开的（private），不应该被直接引用，比如_abc，__abc等；