[Python] Pandas load DataFrames

Create an empty Data frame with date index:

import pandas as pd

def test_run():
    start_date='2017-11-24'
    end_data='2017-11-28'
    dates=pd.date_range(start_date, end_data)
    df1=pd.DataFrame(index=dates)
    print(df1)

"""
Empty DataFrame
Columns: []
Index: [2010-01-22 00:00:00, 2010-01-23 00:00:00, 2010-01-24 00:00:00, 2010-01-25 00:00:00, 2010-01-26 00:00:00]
"""

Now we want to load SPY.csv and get 'Adj Close' column value and copy the range (11-21, 11-28) data to the empty data frame:

import pandas as pd

def test_run():
    start_date='2017-11-24'
    end_data='2017-11-28'
    dates=pd.date_range(start_date, end_data)

    # Create an empty data frame
    df1=pd.DataFrame(index=dates)

    # Load csv file
    dspy=pd.read_csv('data/SPY.csv', 
    index_col="Date", 
    parse_dates=True,
    usecols=['Date', 'Adj Close'],
    na_values=['nan'])
    # print(dspy) 
    """
             Adj Close
    Date
    2017-11-16  258.619995
    2017-11-17  257.859985
    2017-11-20  258.299988
    """

    # join the table
    df1=df1.join(dspy)
    #print(df1)
    """
                 Adj Close
    2017-11-24  260.359985
    2017-11-25         NaN
    2017-11-26         NaN
    2017-11-27  260.230011
    """

    # drop the nan row
    df1=df1.dropna()
    print(df1)
    """
                 Adj Close
    2017-11-24  260.359985
    2017-11-27  260.230011
    2017-11-28  262.869995
    """

if __name__ == '__main__':
    test_run()    

There is a simpy way to drop the data which index is not present in dspy:

df1=df1.join(dspy, how='inner')

We can also rename the 'Adj Close' to prevent conflicts:

    # rename the column
    dspy=dspy.rename(columns={'Adj Close': 'SPY'})

Load more stocks:

import pandas as pd

def test_run():
    start_date='2017-11-24'
    end_data='2017-11-28'
    dates=pd.date_range(start_date, end_data)

    # Create an empty data frame
    df1=pd.DataFrame(index=dates)

    # Load csv file
    dspy=pd.read_csv('data/spy.csv', 
    index_col="Date", 
    parse_dates=True,
    usecols=['Date', 'Adj Close'],
    na_values=['nan'])
    # print(dspy) 
    """
             Adj Close
    Date
    2017-11-16  258.619995
    2017-11-17  257.859985
    2017-11-20  258.299988
    """

    # rename the column
    dspy=dspy.rename(columns={'Adj Close': 'spy'})

    # join the table
    df1=df1.join(dspy, how='inner')
    # print(df1)
    """
                 Adj Close
    2017-11-24  260.359985
    2017-11-27  260.230011
    2017-11-28  262.869995
    """

    symbols=['aapl', 'ibm']
    for symbol in symbols:
        temp=pd.read_csv('data/{0}.csv'.format(symbol), index_col="Date", parse_dates=True, usecols=['Date', 'Adj Close'], na_values=['nan'])
        
        temp=temp.rename(columns={'Adj Close': symbol})
        
        df1=df1.join(temp)

    print(df1)
    """
                       spy        aapl         ibm
    2017-11-24  260.359985  174.970001  151.839996
    2017-11-27  260.230011  174.089996  151.979996
    2017-11-28  262.869995  173.070007  152.470001
    """

if __name__ == '__main__':
    test_run()    
原文地址:https://www.cnblogs.com/Answer1215/p/8050690.html