大作业 数据清洗(缺省值处理)

这次实现了缺省值处理部分内容,主要有将缺省值按中位数,平均值补全。将缺省行或者列去掉。

#缺省值补全
@app.route('/data_clean_supply')
def data_clean_supply():
    supply_type = request.values.get("supply_type")
    flag='1'
    try:
        global data_clean
        data_clean =dataclean.data_clean_supply(data_clean,supply_type)  # 去重后的数据保存,为以后处理缺省值
    except (Exception, BaseException) as e:
        exstr = traceback.format_exc()
        print( exstr)
        flag='0'
    print(supply_type+"flag:"+flag)
    return jsonify({"cog":flag})
    pass
#缺省值去除
@app.route("/data_clean_remove")
def data_clean_remove():
    action_on = str(request.values.get("action_on"))
    type_on = str(request.values.get("type_on"))
    min_num = str(request.values.get("min_num"))
    flag='1'
    try:
        global data_clean
        dataclean.data_clean_remove(data_clean,action_on,type_on,min_num)
    except (Exception, BaseException) as e:
        exstr = traceback.format_exc()
        print(exstr)
        flag='0'
    print( "min_num:"+min_num+",type_on:"+type_on+",action_on:"+action_on+",flag:"+flag)
    return jsonify({"cog":flag})
    pass
#获取要补充的数值
def data_clean_supply_num(temp_col_not_nan,suplly_type):
    if (suplly_type == "median"):
        return temp_col_not_nan.median()
    if(suplly_type == "mean"):
        return temp_col_not_nan.mean()
#补全缺省值
def data_clean_supply(data_clean,suplly_type):
    for i in range(data_clean.shape[1]):#遍历列数
        try:
            temp_col=data_clean.iloc[:,i]#取一列
            temp_col_not_nan=temp_col[temp_col==temp_col].astype('float')#当前一列不为nan,使用布尔索引
            mean=data_clean_supply_num(temp_col_not_nan,suplly_type)
            flag_list=[]
            num=len(temp_col)
            for j in range(num):
                if(temp_col.iloc[j]!=temp_col.iloc[j]):
                    flag_list.append(mean)
                else:
                    flag_list.append(temp_col.iloc[j])
            data_clean.iloc[:,i]=flag_list# 填充均值,必须整列赋值
        except:
            pass
            #print("不是数字类型")
    return data_clean
    pass
#去除缺省行或列
def data_clean_remove(data_clean,action_on,type_on,minnum):
    return data_clean.dropna(axis=action_on, how=type_on, thresh=int(minnum))
    pass

有一点要注意由于从数据库中读出的缺省值是‘’而不是nan所以要转为nan

 #把‘’转换为nan
    for j in range(num_1):
        flag_list=[]
        for i in range(num_0):
            if(df.iloc[i][j]==''):
                flag_list.append(np.nan)
            else:
                flag_list.append(df.iloc[i][j])
        df.iloc[:,j]=flag_list#必须整列赋值,如果单个赋值则会失败

前台部分:

     <blockquote class="layui-elem-quote layui-text">
            <h2>缺省值处理</h2>
        </blockquote>
        <fieldset class="layui-elem-field layui-field-title" style="margin-top: 30px;">
            <legend>缺省值补全</legend>
        </fieldset>
        <form class="layui-form" action="">
            <div class="layui-form-item">
                <label class="layui-form-label">补全方式</label>
                <div class="layui-input-block">
                    <select name="interest" lay-filter="aihao" id="supply_type">
                        <option value=""></option>
                        <option value="median">中位数</option>
                        <option value="mode" selected="">众数</option>
                        <option value="average">平均数</option>
                    </select>
                    <button type="button" class="layui-btn layui-btn-normal" id="supply_submit"  onclick="supply_submit">确定</button>
                </div>
            </div>
        </form>
        <fieldset class="layui-elem-field layui-field-title" style="margin-top: 30px;">
            <legend>缺省值去除</legend>
        </fieldset>
        <form class="layui-form" action="">
            <div class="layui-form-item">
                <label class="layui-form-label">作用于</label>
                <div class="layui-input-block">
                    <input type="radio" name="action_on" value="index" title="行" checked="">
                    <input type="radio" name="action_on" value="columns" title="列">
                </div>
            </div>
            <div class="layui-form-item">
                <label class="layui-form-label">非空值最小数</label>
                <div class="layui-input-block">
                    <input type="text" id="min_num" lay-verify="title" autocomplete="off" placeholder="非空值小于此数目的行或列将被删除" class="layui-input">
                </div>
            </div>
            <div class="layui-form-item">
                <label class="layui-form-label">方式</label>
                <div class="layui-input-block">
                    <input type="radio" name="type_on" value="all" title="all" checked="">
                    <input type="radio" name="type_on" value="any" title="any">
                </div>
                <button type="button" class="layui-btn layui-btn-normal" id="remove_submit" >确定</button>
            </div>
        </form>
<script>
    $("#supply_submit").click(function() {
        supply_type = $("#supply_type").find("option:selected").val()
        $.ajax({
            type: "GET",
            url: "/data_clean_supply?supply_type=" + supply_type,
            dataType: "json",
            success: function (data) {
                if (data.cog == 1) {
                    alert("操作成功")
                } else {
                    alert("操作失败")
                }
            }
        })
    })
    $("#remove_submit").click(function() {
        action_on=$("input[name='action_on']:checked").val();
        type_on=$("input[name='type_on']:checked").val();
        min_num=$("#min_num").val()
        $.ajax({
            type: "GET",
            url: "/data_clean_remove?action_on="+action_on+"&type_on="+type_on+"&min_num="+min_num,
            dataType: "json",
            success: function (data) {
                if (data.cog == 1) {
                    alert("操作成功")
                } else {
                    alert("操作失败")
                }
            }
        })
    })
</script>
原文地址:https://www.cnblogs.com/fengchuiguobanxia/p/15679315.html