预处理算法_4_表堆叠

表堆叠主要是两个DataFrame进行数据合并

#!/usr/bin/env python
# -*- coding:utf-8 -*-

# <editable>


def execute():
    # <editable>

    '''
    载入模块
    '''
    import pandas as pd
    from sqlalchemy import create_engine
    '''
    连接数据库
    '''
    engine = create_engine('mysql+pymysql://root:123123qwe@127.0.0.1:3306/analysis')
    '''
    选择目标数据
    '''
    params = {
        "left_columns": "id, score",
        "right_columns": "id, name",
        "left_on": "id",
        "right_on": "id",
        "method": 0,      # axis: 需要合并链接的轴,0是行,1是列
    }
    inputs = {"table_left": 'test', "table_right": "class"}

    if params['left_columns'] == '':

        left_sql = 'select * from ' + inputs['table_left']
        left = pd.read_sql_query(left_sql, engine)
    else:
        left_sql = 'select ' + params['left_columns'] + ' from ' + inputs['table_left']
        left = pd.read_sql_query(left_sql, engine)
    if params['right_columns'] == '':
        right_sql = 'select * from ' + inputs['table_right']
        right = pd.read_sql_query(right_sql, engine)
    else:
        right_sql = 'select ' + params['right_columns'] + ' from ' + inputs['table_right']
        right = pd.read_sql_query(right_sql, engine)
    # print(left)
    # print(right)
    '''
    合并数据
    '''
    data_out = pd.concat([left, right], axis=int(params['method']))
    # axis: 需要合并链接的轴,0是行,1是列
    '''
    将结果写出
    '''
    print(data_out)

    '''
    数据示例
    '''

    """
       id  score
    0   1   80.0
    1   2   20.0
    2   3    NaN
    3   4    5.0
    4   5    4.0
       id name
    0   1   张三
    1   2   李四
    2   3   王五
    3   4   赵六
    4   5   冯七
    5   6  朱重八
        id  score  id name
    0  1.0   80.0   1   张三
    1  2.0   20.0   2   李四
    2  3.0    NaN   3   王五
    3  4.0    5.0   4   赵六
    4  5.0    4.0   5   冯七
    5  NaN    NaN   6  朱重八

    ==========================
       id  score name
    0   1   80.0  NaN
    1   2   20.0  NaN
    2   3    NaN  NaN
    3   4    5.0  NaN
    4   5    4.0  NaN
    5   6   20.0  NaN
    0   1    NaN   张三
    1   2    NaN   李四
    2   3    NaN   王五
    3   4    NaN   赵六
    4   5    NaN   冯七
    5   6    NaN  朱重八

    """
# </editable>


if __name__ == '__main__':
    execute()
作者:沐禹辰
出处:http://www.cnblogs.com/renfanzi/
本文版权归作者和博客园共有,欢迎转载,但未经作者同意必须保留此段声明,且在文章页面明显位置给出原文连接。
原文地址:https://www.cnblogs.com/renfanzi/p/14476441.html