预处理算法_6_缺失值处理

#!/usr/bin/env python
# -*- coding:utf-8 -*-


def execute():
    params = {"method": '', "columns": "score", "value": 20}
    inputs = {"table": 'test'}
    # <editable>
    '''
    载入模块
    '''
    import pandas as pd
    import numpy as np
    from sqlalchemy import create_engine
    '''
    选择目标数据
    '''
    engine = create_engine('mysql+pymysql://root:123123qwe@127.0.0.1:3306/analysis')
    sql = 'select ' + params['columns'] + ' from ' + inputs['table']
    data_in = pd.read_sql_query(sql, engine)

    '''
    判断输入数据是否为数值类型
    '''

    def is_number(s):
        try:  # 如果能运行float(s)语句,返回True(字符串s是浮点数)
            float(s)
            return True
        except ValueError:  # ValueError为Python的一种标准异常,表示"传入无效的参数"
            pass  # 如果引发了ValueError这种异常,不做任何事情(pass:不做任何事情,一般用做占位语句)
        try:
            import unicodedata  # 处理ASCii码的包
            unicodedata.numeric(s)  # 把一个表示数字的字符串转换为浮点数返回的函数
            return True
        except (TypeError, ValueError):
            pass
        return False

    '''
    缺失值处理
    '''
    if params['method'] == 'drop':
        data_out = data_in.dropna()
    elif params['method'] == 'Median_interpolation':
        data_out = data_in.fillna(data_in.median())
    elif params['method'] == 'Mode_interpolation':
        data_out = data_in.fillna(data_in.mode())
    elif params['method'] == 'slinear':
        data_out = data_in.interpolate(method='slinear')
    elif params['method'] == 'quadratic':
        data_out = data_in.interpolate(method='quadratic')
    elif params['method'] == 'polynomial':
        data_out = data_in.fillna(data_in.mean())
    else:
        if is_number(params['value']):
            data_out = data_in.fillna(float(params['value']))
        else:
            data_out = data_in.fillna(params['value'])

    '''
    将结果写出
    '''
    print(data_out)
    # </editable>


if __name__ == '__main__':
    execute()
作者:沐禹辰
出处:http://www.cnblogs.com/renfanzi/
本文版权归作者和博客园共有,欢迎转载,但未经作者同意必须保留此段声明,且在文章页面明显位置给出原文连接。
原文地址:https://www.cnblogs.com/renfanzi/p/14476550.html