数据库学习

　　今天学习了数据库的索引和面向对象中的元类

　　一、索引

　　　　什么是索引

　　　　　　在关系数据库中，索引是一种单独的、物理的对数据库表中一列或多列的值进行排序的一种存储结构;也称之为key

　　　　　　索引的作用相当于图书的目录，可以根据目录中的页码快速找到所需的内容。

　　　　为什么需要索引

　　　　　　思考：一个项目正常运行后，对数据库的操作中，哪些操作是最频繁的？

　　　　　　对数据库的写操作（增加、删除、修改）频繁吗？

　　　　　　对数据库的读操作（查询）频繁吗？

　　　　　　相比较下，对数据的读操作会更加频繁，比例在10：1左右，也就是说对数据库的查询操作是非常频繁的

　　　　　　随着时间的推移，表中的记录会越来越多，此时如果查询速度太慢的话对用户体验是非常不利的

　　　　　　索引是提升查询效率最有效的手段

　　　　　　在数据库中插入数据会引发索引的重建

　　　　　　简单的说索引就是用来帮我们加快查询速度的

　　　　索引的实现原理

　　　　　　如何能实现加快查询的效果

　　　　　　　　索引最终的目的是要尽可能降低io次数，减少查找的次数，以最少的io找到需要的数据，此时B+树闪亮登场

　　　　　　　　光有数据结构还不行，还需要有对应的算法做支持，就是二分查找法

　　　　　　　　有了B+数据结构后查找数据的方式就不再是逐个的对比了，而是通过二分查找法来查找

　　　　　　　　另外，其实大多数文件系统都是使用B+来完成的！

　　　　　　应该尽可能的将数据量小的字段作为索引，这样一个叶子节点能存储的数据就更多，从而降低树的高度;

　　　　　　例如：name和id,应当将id设置为索引而不是name

　　　　聚集索引

　　　　　　叶子节点保存的就是完成的一行记录，如果设置了主键，主键就作为聚集索引

　　　　　　如果没有主键，则找第一个NOT NULL 且 QUNIQUE的列作为聚集索引

　　　　　　如果也没有这样的列，innoDB会在表内自动产生一个聚集索引，它是自增的

　　　　辅助索引

　　　　　　除了聚集索引之外的索引都称之为辅助索引或第二索引，包括foreign key 与 unique

　　　　　　辅助索引的特点：

　　　　　　其叶子节点保存的是索引数据与所在行的主键值，InnoDB用这个主键值来从聚集索引中搜查找数据

　　　　覆盖索引

　　　　　　覆盖索引指的是需要的数据仅在辅助索引中就能找到：

# 假设stu表的name字段是一个辅助索引
select name from stu where name = "jack";

　　　　　　这样的话则不需要再查找聚集索引数据已经找到

　　　　回表

　　　　　　如果要查找的数据在辅助索引中不存在，则需要回到聚集索引中查找，这种现象称之为回表

# name字段是一个辅助索引 而sex字段不是索引
select sex from stu where name = "jack";

　　　　　　需要从辅助索引中获取主键的值，再拿着主键值到聚集索引中找到sex的值

　　　　　　查询速度对比：

　　　　　　聚集索引 > 覆盖索引 > 非覆盖索引

　　　　正确使用索引

　　　　　　案例：

　　　　　　首先准备一张表数据量在百万级别

create table usr(id int, name char(10),gender char(3),email char(30));
#准备数据
delimiter //
create procedure addData(in num int)
begin
declare i int default 0;
while i < num do
    insert into usr values(i,'jack','m',concat('xxxx',i,'@qq.com'));
set i = i+1;
end while;
end //
delimiter ;
#执行查询语句  观察查询时间
select count(*) from usr where id = 1;
# 1 row in set (3.85 sec)
# 时间在秒级别 比较慢


1.
# 添加主键
alter table usr add primary key(id);
# 再次查询
select count(*) from usr where id = 1;
# 1 row in set(0.00 sec)
# 基于在毫秒级就能完成 提升非常大

2.
# 当条件为范围查询时
select count(*) from usr where id > 1;
# 速度依然很慢 对于这种查询没有办法可以优化因为需要的数据就是那么多
# 缩小查询范围  速度立马就快了
select count(*) from usr where id > 1 and id < 10;

#当查询语句中匹配字段没有索引时 效率测试 
select count(*) from usr where name = 'jack';
# 1 row in set (2.85 sec)
# 速度慢



3.
# 为name字段添加索引
create index name_index on usr(name);
# 再次查询
select count(*) from usr where name = 'jack';
# 1 row in set (3.89 sec)
# 速度反而降低了 为什么?
# 由于name字段的区分度非常低 完全无法区分 ， 因为值都相同 这样一来B+树会没有任何的子节点，像一根竹竿每一次都匹配 相当于有几条记录就有几次io ， 所以要注意 区分度低的字段不应该建立索引，不能加速查询反而降低写入效率，
# 同理 性别字段也不应该建立索引，email字段更加适合建立索引

# 修改查询语句为
select count(*) from usr where name = 'aaaaaa';
# 1 row in set (0.00 sec) 速度非常快因为在树根位置就已经判断出树中没有这个数据 全部跳过了
# 模糊匹配时
select count(*) from usr where name like 'xxx'; #快
select count(*) from usr where name like 'XXX%'; # 快
select count(*) from usr where name like '%xxx'; # 慢
# 由于索引是比较大小 会从左边开始匹配 很明显所有字符都能匹配% 所以全都匹配了一遍

4.索引字段不能参加运算
select count(*) from usr where id * 12 = 120;
# 速度非常慢原因在于 mysql需要取出所有列的id 进行运算之后才能判断是否成立
# 解决方案
select count(*) from usr where id = 120/12;
# 速度提升了 因为在读取数据时 条件就一定固定了 相当于
select count(*) from usr where id = 10;
# 速度自然快了

5.有多个匹配条件时 索引的执行顺序 and和or
# 先看and
# 先删除所有的索引
alter table usr drop primary key;
drop index name_index on usr;

#测试
select count(*) from usr where name = 'jack' and gender = 'm' and id = 1 and email = 'xxxx2@qq.com';
#1 row in set (1.34 sec) 时间在秒级

# 为name字段添加索引
create index name_index on usr(name);
# 测试
select count(*) from usr where name = 'jack' and gender = 'm' and id = 1 and email 'xxxx2@qq.con';
# 1 row in set (17.82 sec) 反而时间更长了

# 为gender字段添加索引
create index gender_index on usr(gender);
# 测试
select count(*) from usr where name = 'jack' and gender = 'm' and id = 1 and email = 'xxxx2@qq.com';
# 1 row in set (16.83 sec) gender字段任然不具备区分度

# 为id加上索引
alter table usr add primary key(id);
# 测试
select count(*) from usr where name = 'jack' and gender = 'm' and id = 1 and email = 'xxxx1@qq.com';
# 1row in set (0.00 sec) id 字段区分度高 速度提升
# 虽然三个字段都有索引 mysql并不是从左往右傻傻的去查 而是找出一个区分度高的字段优先匹配
# 改为范围匹配
select count(*) from usr where name = 'jack' and gender = 'm' and id > 1 and email = 'xxxx1@qq.com';
# 速度变慢了

# 删除id 索引 为email建立索引
alter table usr drop primary key;
create index email_index on usr(email);
# 测试
select count(*) from usr where name = 'jack' and gender = 'm' and id  =1 and email = 'xxxx2@qq.com';
# 1 row in set(0.00 sec) 速度非常快
# 对于 or条件 都是从左往右匹配
select count(*)  from usr where name = 'jackxxxx' or email = 'xxxx0@qq.com';
# 注意 必须or两边都有索引才会使用索引

6.多字段联合索引
为什么需要联合索引
案例：
select count(*) from  usr where name = 'jack' and gender = 'm' and id >3 and email = 'xxxx2@qq.com';
假设所有字段都是区分度非常高的字段，那么除了id为谁添加索引都能够提升速度，但是如果sql语句中没有出现所有字段，那就无法加速查询，最简单的办法是为每个字段都加上索引，但是索引也是一种数据，会占用内存空间，并且降低写入效率
此处就可以使用联合索引，
联合索引最重要的是顺序 按照最左匹配原则 应该将区分度高的放在左边 区分度低的放到右边
# 删除其他索引
drop index name_index on usr;
drop index email_index on usr;
# 联合索引
create index mul_index on usr(email,name,gender,id);
# 查询测试
select count(*) from usr where name = 'xx' and id = 1 and email = 'xx';
只要语句中出现了最左侧的索引(email)无论在前在后都能提升效率
drop index mul_index on usr;

　　二、元类

　　　　　　1.什么是元类

　　　　　　　　一切源自于一句话：python中一切皆为对象。既然如此类是不是也是对象呢？

class Teacher(object):
    school = 'tsinghua'
    def __init__(self,name,age):
        self.name = name
        self.age = age
    def say(self):
        print('%s says welcome to the Beijing' %self.name)

t1 = oldboyTeacher('egon',18)
print(type(t1))    #查看对象t1的类是<class '__main__.oldboyTeacher'>

　　　　所有的对象都是实例化或者说调用类而得到的(调用类的过程称之为类的实例化)，比如对象t1是调用类Teacher得到的

　　　　一切皆对象的话类也必然是一个对象，验证一下

tcls = Teacher
li = [Teacher]
def func(cls):
    print(cls)
func(Teacher)
# 完全没问题把他当作对象来使用 和其他对象没有任何区别

　　　　t1是通过Teacher实例化得到的，那Teacher对象是哪个类实例化的呢？

print(type(Teacher))
# <class 'type'>

　　　　可以推导出===>产生Teacher的过程一定发生了：Teacher = type(...)

　　　　用于实例化产生类的类称之为元类就是此时的type类;

　　　　Teacher是通过type实例化得到的，既然如此，是不是可以自己调用type来实例化一个class呢？

　　　　2.创建类的流程分析

　　　　　　class关键字在帮我们创建类时，必然帮我们调用了元类Teacher = type(...),那调用type时传入的参数是什么呢？必然是类的关键组成部分，一个类有三大组成部分，分别是

　　　　　　1.类名class_name = 'Teacher'

　　　　　　2.基类们class_bases = (object,)

　　　　　　3.类的名称空间class_dic,类的名称空间是执行类体代码而得到的

　　　　　　调用type时会依次传入以上三个参数

　　　　　　自己来实例化一个类

class_name = 'Teacher'
class_body = '''
    def __init__(self,name,age):
        self.name = name
        self.age = age
    def say(self):
        print('%s says welcome to the Beijing' %self.name)      
'''
class_dic = exce(class_body)
bases = (object,)
Teacher = type(class_name

,bases

,class_body)

　　　　　　综上，class关键字帮我们创建一个类应该细分为以下四个过程

　　　　　　1.获取类名

　　　　　　2.获取基类

　　　　　　3.获取名称空间

　　　　　　4.实例化元类得到类

　　　　补充__call__函数得到执行时机

　　　　该方法会在调用对象时自动触发执行(对象加括号)

class Foo:
    def _-call__(self,*args,**kwargs):
        print('run')
f = Foo()
f()

　　　　自定义元类控制类的创建

　　　　　　一个类没有声明自己的元类，默认他的元类就是type，除了使用内置元类type，我们也可以通过继承type来自定义元类，然后使用metaclass关键字参数为一个指定元类

class Mymeta(type):    #只有继承了type类才能称之为一个元类，否则就是一个普通的自定义类
    pass
class Teacher(object,metaclass = Mymeta):    #Teacher = Mymete('Teacher',(object),{...})
    school = 'tsinghua'
    def __init__(self,name,age):
        self.name = name
        self.age = age
    def say(self):
        print('%s says welcome to the Beijing'%self.name)

　　　　需求

　　　　1.规范类名必须大写

　　　　2.类中必须包含文档注释

class MyMate(type):
    def __init__(self,name,bases,dic):
        print('run')
        if not dic.get('__doc__')
            raise TypeError('类必须有文档注释!')
        if not name.istitle():
            raise TypeError('类名必须大写开头!')
        super().__init__(name,bases,dic)
class Foo(object,metaclass = MyMate):
    pass

　　　　自定义元类控制类的调用

　　　　　　控制类的调用过程关键在于call函数，类也是对象，调用类必然也会执行call函数

class MyMate(type):
    def __init__(self,name,bases,dic):
        print('run')
        if not dic.get('__doc__'):
            raise TypeError('类必须有文档注释!')
        if not name.istitle():
            raise TypeError('类名必须大写开头!')
        super().__init__(name,bases,dic)
    def __call__(self,*args,**kwargs):
        #创建空对象
        #调用init
        #返回初始化后的对象
        obj = object.__new__(self)
        self.__init__(obj,*args,**kwargs)
        return obj
class Foo(object,metaclass = MyMate):
    def __init__(self):
        print('初始化对象')
    pass
f = Foo()
print(f)

　　元类实现单例

　　　　　　什么是单例，

　　　　　　单例指的是单个实例，值一个类只能有一个实例对象

　　　　　　为什么要用单例

　　　　　　当一个类的实例中的数据不会变化时使用单例，数据是不变的

　　　　　　例如开发一个音乐播放器程序，音乐播放器可以封装为一个对象，那你考虑一下，当你切歌的时候，是重新创建一个播放器，还是使用已有的播放器？

　　　　　　因为播放器中的数据和业务逻辑都是相同的没有必要创建新的，所以最好使用单例模式，以节省资源，

#使用classmethod 实现单例
class Player():
    def __init__(self):
        print('创建播放器了')
    __play = None
    @classmethod
    def get_player(cls):
        if not cls.__play:
            cls.__play = Player()
        return cls.__play

p1 = Player.get_player();
p1 = Player.get_player();
p1 = Player.get_player();
p1 = Player.get_player();

　　　　该方法无法避免使用者直接调用类来实例化，这样就不是单例了

　　　　使用元类实现单例模式

class MyMetaClass(type):
    instance = None
    def __call__(cls,*args,**kwargs):
        if not MyMetaClass.instance:
            MyMetaClass.instance = object.__new__(cls)
        　　print('创建新的播放器对象')
            MyMetaClass.instance.__init__(*args,**kwargs)
        return MyMetaClass.instance

class CDPlayer(metaclass = MyMetaClass):
    def play(self,music):
        print('切换音乐',music)
    def __init__(self,music_name):
        self.music_name = music_name

p1 = CDPlayer('你发如雪!')
p1.play('菊花台')
p1.play('时光机')