pandas_时间序列和常用操作

# 时间序列和常用操作
import pandas as pd

# 每隔五天--5D
pd.date_range(start = '20200101',end = '20200131',freq = '5D')
'''
DatetimeIndex(['2020-01-01', '2020-01-06', '2020-01-11', '2020-01-16',
               '2020-01-21', '2020-01-26', '2020-01-31'],
              dtype='datetime64[ns]', freq='5D')
'''
# 每隔一周--W
pd.date_range(start = '20200301',end = '20200331',freq = 'W')
'''
DatetimeIndex(['2020-03-01', '2020-03-08', '2020-03-15', '2020-03-22',
               '2020-03-29'],
              dtype='datetime64[ns]', freq='W-SUN')
'''
# 间隔两天,五个数据
pd.date_range(start = '20200301',periods = 5,freq = '2D')
# periods 几个数据 ,freq 间隔时期,两天
'''
DatetimeIndex(['2020-03-01', '2020-03-03', '2020-03-05', '2020-03-07',
               '2020-03-09'],
              dtype='datetime64[ns]', freq='2D')
'''
# 间隔三小时,八个数据
pd.date_range(start = '20200301',periods = 8,freq = '3H')
'''
DatetimeIndex(['2020-03-01 00:00:00', '2020-03-01 03:00:00',
               '2020-03-01 06:00:00', '2020-03-01 09:00:00',
               '2020-03-01 12:00:00', '2020-03-01 15:00:00',
               '2020-03-01 18:00:00', '2020-03-01 21:00:00'],
              dtype='datetime64[ns]', freq='3H')
'''
# 三点开始,十二个数据,间隔一分钟
pd.date_range(start = '202003010300',periods = 12,freq = 'T')
'''
DatetimeIndex(['2020-03-01 03:00:00', '2020-03-01 03:01:00',
               '2020-03-01 03:02:00', '2020-03-01 03:03:00',
               '2020-03-01 03:04:00', '2020-03-01 03:05:00',
               '2020-03-01 03:06:00', '2020-03-01 03:07:00',
               '2020-03-01 03:08:00', '2020-03-01 03:09:00',
               '2020-03-01 03:10:00', '2020-03-01 03:11:00'],
              dtype='datetime64[ns]', freq='T')
'''
# 每个月的最后一天
pd.date_range(start = '20190101',end = '20191231',freq = 'M')
'''
DatetimeIndex(['2019-01-31', '2019-02-28', '2019-03-31', '2019-04-30',
               '2019-05-31', '2019-06-30', '2019-07-31', '2019-08-31',
               '2019-09-30', '2019-10-31', '2019-11-30', '2019-12-31'],
              dtype='datetime64[ns]', freq='M')
'''
# 间隔一年,六个数据,年末最后一天
pd.date_range(start = '20190101',periods = 6,freq = 'A')
'''
DatetimeIndex(['2019-12-31', '2020-12-31', '2021-12-31', '2022-12-31',
               '2023-12-31', '2024-12-31'],
              dtype='datetime64[ns]', freq='A-DEC')
'''
# 间隔一年,六个数据,年初最后一天
pd.date_range(start = '20200101',periods = 6,freq = 'AS')
'''
DatetimeIndex(['2020-01-01', '2021-01-01', '2022-01-01', '2023-01-01',
               '2024-01-01', '2025-01-01'],
              dtype='datetime64[ns]', freq='AS-JAN')
'''
# 使用 Series 对象包含时间序列对象,使用特定索引
data = pd.Series(index = pd.date_range(start = '20200321',periods = 24,freq = 'H'),data = range(24))
'''
2020-03-21 00:00:00     0
2020-03-21 01:00:00     1
2020-03-21 02:00:00     2
2020-03-21 03:00:00     3
2020-03-21 04:00:00     4
2020-03-21 05:00:00     5
2020-03-21 06:00:00     6
2020-03-21 07:00:00     7
2020-03-21 08:00:00     8
2020-03-21 09:00:00     9
2020-03-21 10:00:00    10
2020-03-21 11:00:00    11
2020-03-21 12:00:00    12
2020-03-21 13:00:00    13
2020-03-21 14:00:00    14
2020-03-21 15:00:00    15
2020-03-21 16:00:00    16
2020-03-21 17:00:00    17
2020-03-21 18:00:00    18
2020-03-21 19:00:00    19
2020-03-21 20:00:00    20
2020-03-21 21:00:00    21
2020-03-21 22:00:00    22
2020-03-21 23:00:00    23
Freq: H, dtype: int64
'''
# 查看前五个数据
data[:5]
'''
2020-03-21 00:00:00    0
2020-03-21 01:00:00    1
2020-03-21 02:00:00    2
2020-03-21 03:00:00    3
2020-03-21 04:00:00    4
Freq: H, dtype: int64
'''
# 三分钟重采样,计算均值
data.resample('3H').mean()
'''
2020-03-21 00:00:00     1
2020-03-21 03:00:00     4
2020-03-21 06:00:00     7
2020-03-21 09:00:00    10
2020-03-21 12:00:00    13
2020-03-21 15:00:00    16
2020-03-21 18:00:00    19
2020-03-21 21:00:00    22
Freq: 3H, dtype: int64
'''
# 五分钟重采样,求和
data.resample('5H').sum()
'''
2020-03-21 00:00:00    10
2020-03-21 05:00:00    35
2020-03-21 10:00:00    60
2020-03-21 15:00:00    85
2020-03-21 20:00:00    86
Freq: 5H, dtype: int64
'''
# 计算OHLC open,high,low,close
data.resample('5H').ohlc()
'''
                     open  high  low  close
2020-03-21 00:00:00     0     4    0      4
2020-03-21 05:00:00     5     9    5      9
2020-03-21 10:00:00    10    14   10     14
2020-03-21 15:00:00    15    19   15     19
2020-03-21 20:00:00    20    23   20     23
'''
# 将日期替换为第二天
data.index = data.index + pd.Timedelta('1D')
# 查看前五条数据
data[:5]
'''
2020-03-22 00:00:00    0
2020-03-22 01:00:00    1
2020-03-22 02:00:00    2
2020-03-22 03:00:00    3
2020-03-22 04:00:00    4
Freq: H, dtype: int64
'''
# 查看指定日期是星期几
# pd.Timestamp('20200321').weekday_name
# 'Saturday'

# 查看指定日期的年份是否是闰年
pd.Timestamp('20200301').is_leap_year
# True

# 查看指定日期所在的季度和月份
day = pd.Timestamp('20200321')
# Timestamp('2020-03-21 00:00:00')

# 查看日期的季度
day.quarter
# 1

# 查看日期所在的月份
day.month
# 3

# 转换为 python 的日期时间对象
day.to_pydatetime()
# datetime.datetime(2020, 3, 21, 0, 0)

2020-05-07

原文地址:https://www.cnblogs.com/hany-postq473111315/p/12844876.html