#导入需要的包
import pandas as pd
import numpy as np
# 导入数据
mydata = pd.read_excel(r'D:学习dataAOnline Retail.xlsx')
mydata.head()
---
InvoiceNo StockCode Description Quantity InvoiceDate UnitPrice CustomerID Country
0 536365 85123A WHITE HANGING HEART T-LIGHT HOLDER 6 2010-12-01 08:26:00 2.55 17850.0 United Kingdom
1 536365 71053 WHITE METAL LANTERN 6 2010-12-01 08:26:00 3.39 17850.0 United Kingdom
2 536365 84406B CREAM CUPID HEARTS COAT HANGER 8 2010-12-01 08:26:00 2.75 17850.0 United Kingdom
3 536365 84029G KNITTED UNION FLAG HOT WATER BOTTLE 6 2010-12-01 08:26:00 3.39 17850.0 United Kingdom
4 536365 84029E RED WOOLLY HOTTIE WHITE HEART. 6 2010-12-01 08:26:00 3.39 17850.0 United Kingdom
,
# 去除重复值,只有当两行数据完全相同时才算是重复值
before_delete=mydata.shape[0]
mydata.drop_duplicates(inplace = True)
after_delete = mydata.shape[0]
print("删除前行数",before_delete,'删除后行数:',after_delete,'重复行数:',before_delete-after_delete)
---
删除前行数 541909 删除后行数: 536641 重复行数: 5268
商品的描述特性是没办法进行缺失值处理的
# 处理缺失值
# 根据结果发现Description和CustomerID两个是有缺失值的
# 考虑到商品的描述特性是没办法进行缺失值处理的
mydata.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 536641 entries, 0 to 541908
Data columns (total 8 columns):
InvoiceNo 536641 non-null object
StockCode 536641 non-null object
Description 535187 non-null object
Quantity 536641 non-null int64
InvoiceDate 536641 non-null datetime64[ns]
UnitPrice 536641 non-null float64
CustomerID 401604 non-null float64
Country 536641 non-null object
dtypes: datetime64[ns](1), float64(2), int64(1), object(4)
memory usage: 36.8+ MB
n
mydata['CustomerID'].nunique()
4372
# 对于缺失值的处理一般有中位数、众数、前值填充、后值填充、定值填充等方法
# 本次考虑FFill填充可能会造成较大偏差,因此采用定值填充考虑定值为0
# 首先,观察有无id==0的商品
# 注意:商品的id是数值型,不是字符串类型,结果出现的是一行字段名,说明没有商品id为0的商品
mydata[mydata['CustomerID']==0]
InvoiceNo StockCode Description Quantity InvoiceDate UnitPrice CustomerID Country
已无缺失值
mydata['CustomerID'].fillna(0,inplace = True) #用0填充缺失值
mydata['CustomerID'].isnull().sum() #结果为0表是商品id已无缺失值
0
mydata.isnull().sum()
InvoiceNo 0
StockCode 0
Description 1454
Quantity 0
InvoiceDate 0
UnitPrice 0
CustomerID 0
Country 0
dtype: int64
# 增加新字段 date month
mydata['date'] = pd.to_datetime(mydata['InvoiceDate'].dt.date,errors='coerce')
mydata['month'] = mydata['InvoiceDate'].astype('datetime64[M]')
mydata.head()
InvoiceNo StockCode Description Quantity InvoiceDate UnitPrice CustomerID Country date month
0 536365 85123A WHITE HANGING HEART T-LIGHT HOLDER 6 2010-12-01 08:26:00 2.55 17850.0 United Kingdom 2010-12-01 2010-12-01
1 536365 71053 WHITE METAL LANTERN 6 2010-12-01 08:26:00 3.39 17850.0 United Kingdom 2010-12-01 2010-12-01
2 536365 84406B CREAM CUPID HEARTS COAT HANGER 8 2010-12-01 08:26:00 2.75 17850.0 United Kingdom 2010-12-01 2010-12-01
3 536365 84029G KNITTED UNION FLAG HOT WATER BOTTLE 6 2010-12-01 08:26:00 3.39 17850.0 United Kingdom 2010-12-01 2010-12-01
4 536365 84029E RED WOOLLY HOTTIE WHITE HEART. 6 2010-12-01 08:26:00 3.39 17850.0 United Kingdom 2010-12-01 2010-12-01
mydata.dtypes
# 查看字段类型
mydata.dtypes
InvoiceNo object
StockCode object
Description object
Quantity int64
InvoiceDate datetime64[ns]
UnitPrice float64
CustomerID float64
Country object
date datetime64[ns]
month datetime64[ns]
dtype: object
int64
# 将商品的id转换为整型
mydata['CustomerID'] = mydata['CustomerID'].astype('int64')
# 增加一列求每次消费的消费总额
mydata['SumCost'] = mydata['Quantity'] * mydata['UnitPrice']
# 描述性统计
mydata.describe()
Quantity UnitPrice CustomerID SumCost
count 536641.000000 536641.000000 536641.000000 536641.000000
mean 9.620029 4.632656 11435.904653 18.123861
std 219.130156 97.233118 6795.044250 380.656263
min -80995.000000 -11062.060000 0.000000 -168469.600000
25% 1.000000 1.250000 0.000000 3.750000
50% 3.000000 2.080000 14336.000000 9.870000
75% 10.000000 4.130000 16241.000000 17.400000
max 80995.000000 38970.000000 18287.000000 168469.600000
可以发现订单的编码都是以C开头的
# 根据结果可以知道,有负值出现,这不是正常现象。所以可以想到是订单取消或者订单失败
# 筛选出数量小于0、价格小于0的值
# 根据输出结果可以发现订单的编码都是以C开头的
mydata[(mydata['Quantity'] <=0) | (mydata['UnitPrice']<=0)]
InvoiceNo StockCode Description Quantity InvoiceDate UnitPrice CustomerID Country date month SumCost
141 C536379 D Discount -1 2010-12-01 09:41:00 27.50 14527 United Kingdom 2010-12-01 2010-12-01 -27.50
154 C536383 35004C SET OF 3 COLOURED FLYING DUCKS -1 2010-12-01 09:49:00 4.65 15311 United Kingdom 2010-12-01 2010-12-01 -4.65
235 C536391 22556 PLASTERS IN TIN CIRCUS PARADE -12 2010-12-01 10:24:00 1.65 17548 United Kingdom 2010-12-01 2010-12-01 -19.80
236 C536391 21984 PACK OF 12 PINK PAISLEY TISSUES -24 2010-12-01 10:24:00 0.29 17548 United Kingdom 2010-12-01 2010-12-01 -6.96
237 C536391 21983 PACK OF 12 BLUE PAISLEY TISSUES -24 2010-12-01 10:24:00 0.29 17548 United Kingdom 2010-12-01 2010-12-01 -6.96
238 C536391 21980 PACK OF 12 RED RETROSPOT TISSUES -24 2010-12-01 10:24:00 0.29 17548 United Kingdom 2010-12-01 2010-12-01 -6.96
239 C536391 21484 CHICK GREY HOT WATER BOTTLE -12 2010-12-01 10:24:00 3.45 17548 United Kingdom 2010-12-01 2010-12-01 -41.40
240 C536391 22557 PLASTERS IN TIN VINTAGE PAISLEY -12 2010-12-01 10:24:00 1.65 17548 United Kingdom 2010-12-01 2010-12-01 -19.80
241 C536391 22553 PLASTERS IN TIN SKULLS -24 2010-12-01 10:24:00 1.65 17548 United Kingdom 2010-12-01 2010-12-01 -39.60
622 536414 22139 NaN 56 2010-12-01 11:52:00 0.00 0 United Kingdom 2010-12-01 2010-12-01 0.00
939 C536506 22960 JAM MAKING SET WITH JARS -6 2010-12-01 12:38:00 4.25 17897 United Kingdom 2010-12-01 2010-12-01 -25.50
1441 C536543 22632 HAND WARMER RED RETROSPOT -1 2010-12-01 14:30:00 2.10 17841 United Kingdom 2010-12-01 2010-12-01 -2.10
1442 C536543 22355 CHARLOTTE BAG SUKI DESIGN -2 2010-12-01 14:30:00 0.85 17841 United Kingdom 2010-12-01 2010-12-01 -1.70
1970 536545 21134 NaN 1 2010-12-01 14:32:00 0.00 0 United Kingdom 2010-12-01 2010-12-01 0.00
1971 536546 22145 NaN 1 2010-12-01 14:33:00 0.00 0 United Kingdom 2010-12-01 2010-12-01 0.00
1972 536547 37509 NaN 1 2010-12-01 14:33:00 0.00 0 United Kingdom 2010-12-01 2010-12-01 0.00
1973 C536548 22244 3 HOOK HANGER MAGIC GARDEN -4 2010-12-01 14:33:00 1.95 12472 Germany 2010-12-01 2010-12-01 -7.80
1974 C536548 22242 5 HOOK HANGER MAGIC TOADSTOOL -5 2010-12-01 14:33:00 1.65 12472 Germany 2010-12-01 2010-12-01 -8.25
1975 C536548 20914 SET/5 RED RETROSPOT LID GLASS BOWLS -1 2010-12-01 14:33:00 2.95 12472 Germany 2010-12-01 2010-12-01 -2.95
1976 C536548 22892 SET OF SALT AND PEPPER TOADSTOOLS -7 2010-12-01 14:33:00 1.25 12472 Germany 2010-12-01 2010-12-01 -8.75
1977 C536548 22654 DELUXE SEWING KIT -1 2010-12-01 14:33:00 5.95 12472 Germany 2010-12-01 2010-12-01 -5.95
1978 C536548 22767 TRIPLE PHOTO FRAME CORNICE -2 2010-12-01 14:33:00 9.95 12472 Germany 2010-12-01 2010-12-01 -19.90
1979 C536548 22333 RETROSPOT PARTY BAG + STICKER SET -1 2010-12-01 14:33:00 1.65 12472 Germany 2010-12-01 2010-12-01 -1.65
1980 C536548 22245 HOOK, 1 HANGER ,MAGIC GARDEN -2 2010-12-01 14:33:00 0.85 12472 Germany 2010-12-01 2010-12-01 -1.70
1981 C536548 22077 6 RIBBONS RUSTIC CHARM -6 2010-12-01 14:33:00 1.65 12472 Germany 2010-12-01 2010-12-01 -9.90
1982 C536548 22631 CIRCUS PARADE LUNCH BOX -1 2010-12-01 14:33:00 1.95 12472 Germany 2010-12-01 2010-12-01 -1.95
1983 C536548 22168 ORGANISER WOOD ANTIQUE WHITE -2 2010-12-01 14:33:00 8.50 12472 Germany 2010-12-01 2010-12-01 -17.00
1984 C536548 21218 RED SPOTTY BISCUIT TIN -3 2010-12-01 14:33:00 3.75 12472 Germany 2010-12-01 2010-12-01 -11.25
1985 C536548 20957 PORCELAIN HANGING BELL SMALL -1 2010-12-01 14:33:00 1.45 12472 Germany 2010-12-01 2010-12-01 -1.45
1986 C536548 22580 ADVENT CALENDAR GINGHAM SACK -4 2010-12-01 14:33:00 5.95 12472 Germany 2010-12-01 2010-12-01 -23.80
... ... ... ... ... ... ... ... ... ... ... ...
538565 C581409 22173 METAL 4 HOOK HANGER FRENCH CHATEAU -2 2011-12-08 14:08:00 3.29 12476 Germany 2011-12-08 2011-12-01 -6.58
538566 C581409 85199L LARGE HANGING IVORY & RED WOOD BIRD -1 2011-12-08 14:08:00 0.65 12476 Germany 2011-12-08 2011-12-01 -0.65
538567 C581409 85127 SMALL SQUARE CUT GLASS CANDLESTICK -5 2011-12-08 14:08:00 4.95 12476 Germany 2011-12-08 2011-12-01 -24.75
538919 581422 23169 smashed -235 2011-12-08 15:24:00 0.00 0 United Kingdom 2011-12-08 2011-12-01 -0.00
540072 C581460 22197 POPCORN HOLDER -5 2011-12-08 18:48:00 0.72 13078 United Kingdom 2011-12-08 2011-12-01 -3.60
540073 C581460 22107 PIZZA PLATE IN BOX -1 2011-12-08 18:48:00 1.25 13078 United Kingdom 2011-12-08 2011-12-01 -1.25
540078 C581462 16219 HOUSE SHAPE PENCIL SHARPENER -48 2011-12-08 18:51:00 0.06 12985 United Kingdom 2011-12-08 2011-12-01 -2.88
540079 C581462 21642 ASSORTED TUTTI FRUTTI PEN -72 2011-12-08 18:51:00 0.29 12985 United Kingdom 2011-12-08 2011-12-01 -20.88
540080 C581463 85048 15CM CHRISTMAS GLASS BALL 20 LIGHTS -2 2011-12-08 18:56:00 7.95 17526 United Kingdom 2011-12-08 2011-12-01 -15.90
540081 C581464 23458 DOLLY CABINET 3 DRAWERS -1 2011-12-08 18:57:00 14.95 15951 United Kingdom 2011-12-08 2011-12-01 -14.95
540082 C581464 71477 COLOURED GLASS STAR T-LIGHT HOLDER -6 2011-12-08 18:57:00 3.95 15951 United Kingdom 2011-12-08 2011-12-01 -23.70
540083 C581465 23660 HENRIETTA HEN MUG -2 2011-12-08 18:59:00 1.65 15755 United Kingdom 2011-12-08 2011-12-01 -3.30
540084 C581465 22171 3 HOOK PHOTO SHELF ANTIQUE WHITE -1 2011-12-08 18:59:00 8.50 15755 United Kingdom 2011-12-08 2011-12-01 -8.50
540085 C581465 21876 POTTERING MUG -4 2011-12-08 18:59:00 1.65 15755 United Kingdom 2011-12-08 2011-12-01 -6.60
540086 C581465 20914 SET/5 RED RETROSPOT LID GLASS BOWLS -3 2011-12-08 18:59:00 2.95 15755 United Kingdom 2011-12-08 2011-12-01 -8.85
540087 C581466 22838 3 TIER CAKE TIN RED AND CREAM -1 2011-12-08 19:20:00 14.95 13883 United Kingdom 2011-12-08 2011-12-01 -14.95
540088 C581466 22720 SET OF 3 CAKE TINS PANTRY DESIGN -2 2011-12-08 19:20:00 4.95 13883 United Kingdom 2011-12-08 2011-12-01 -9.90
540089 C581466 21216 SET 3 RETROSPOT TEA,COFFEE,SUGAR -1 2011-12-08 19:20:00 4.95 13883 United Kingdom 2011-12-08 2011-12-01 -4.95
540090 C581466 21535 RED RETROSPOT SMALL MILK JUG -2 2011-12-08 19:20:00 2.55 13883 United Kingdom 2011-12-08 2011-12-01 -5.10
540091 C581466 21232 STRAWBERRY CERAMIC TRINKET POT -1 2011-12-08 19:20:00 1.25 13883 United Kingdom 2011-12-08 2011-12-01 -1.25
540141 C581468 21314 SMALL GLASS HEART TRINKET POT -10 2011-12-08 19:26:00 2.10 13599 United Kingdom 2011-12-08 2011-12-01 -21.00
540142 C581468 22098 BOUDOIR SQUARE TISSUE BOX -12 2011-12-08 19:26:00 0.39 13599 United Kingdom 2011-12-08 2011-12-01 -4.68
540176 C581470 23084 RABBIT NIGHT LIGHT -4 2011-12-08 19:28:00 2.08 17924 United Kingdom 2011-12-08 2011-12-01 -8.32
540422 C581484 23843 PAPER CRAFT , LITTLE BIRDIE -80995 2011-12-09 09:27:00 2.08 16446 United Kingdom 2011-12-09 2011-12-01 -168469.60
540448 C581490 22178 VICTORIAN GLASS HANGING T-LIGHT -12 2011-12-09 09:57:00 1.95 14397 United Kingdom 2011-12-09 2011-12-01 -23.40
540449 C581490 23144 ZINC T-LIGHT HOLDER STARS SMALL -11 2011-12-09 09:57:00 0.83 14397 United Kingdom 2011-12-09 2011-12-01 -9.13
541541 C581499 M Manual -1 2011-12-09 10:28:00 224.69 15498 United Kingdom 2011-12-09 2011-12-01 -224.69
541715 C581568 21258 VICTORIAN SEWING BOX LARGE -5 2011-12-09 11:57:00 10.95 15311 United Kingdom 2011-12-09 2011-12-01 -54.75
541716 C581569 84978 HANGING HEART JAR T-LIGHT HOLDER -1 2011-12-09 11:58:00 1.25 17315 United Kingdom 2011-12-09 2011-12-01 -1.25
541717 C581569 20979 36 PENCILS TUBE RED RETROSPOT -5 2011-12-09 11:58:00 1.25 17315 United Kingdom 2011-12-09 2011-12-01 -6.25
11763 rows × 11 columns
query_c = mydata['InvoiceNo'].str.contains('C') #找订单编码包含C
# 创建新表格,只包含取消订单的表格
mydata_cannel = mydata.loc[query_c == True,:].copy()
# 创建新表格,不包含取消的订单
mydata_success = mydata.loc[-(query_c==True),:].copy()
mydata_cannel.head()
InvoiceNo StockCode Description Quantity InvoiceDate UnitPrice CustomerID Country date month SumCost
141 C536379 D Discount -1 2010-12-01 09:41:00 27.50 14527 United Kingdom 2010-12-01 2010-12-01 -27.50
154 C536383 35004C SET OF 3 COLOURED FLYING DUCKS -1 2010-12-01 09:49:00 4.65 15311 United Kingdom 2010-12-01 2010-12-01 -4.65
235 C536391 22556 PLASTERS IN TIN CIRCUS PARADE -12 2010-12-01 10:24:00 1.65 17548 United Kingdom 2010-12-01 2010-12-01 -19.80
236 C536391 21984 PACK OF 12 PINK PAISLEY TISSUES -24 2010-12-01 10:24:00 0.29 17548 United Kingdom 2010-12-01 2010-12-01 -6.96
237 C536391 21983 PACK OF 12 BLUE PAISLEY TISSUES -24 2010-12-01 10:24:00 0.29 17548 United Kingdom 2010-12-01 2010-12-01 -6.96
mydata_success.head()
InvoiceNo StockCode Description Quantity InvoiceDate UnitPrice CustomerID Country date month SumCost
0 536365 85123A WHITE HANGING HEART T-LIGHT HOLDER 6 2010-12-01 08:26:00 2.55 17850 United Kingdom 2010-12-01 2010-12-01 15.30
1 536365 71053 WHITE METAL LANTERN 6 2010-12-01 08:26:00 3.39 17850 United Kingdom 2010-12-01 2010-12-01 20.34
2 536365 84406B CREAM CUPID HEARTS COAT HANGER 8 2010-12-01 08:26:00 2.75 17850 United Kingdom 2010-12-01 2010-12-01 22.00
3 536365 84029G KNITTED UNION FLAG HOT WATER BOTTLE 6 2010-12-01 08:26:00 3.39 17850 United Kingdom 2010-12-01 2010-12-01 20.34
4 536365 84029E RED WOOLLY HOTTIE WHITE HEART. 6 2010-12-01 08:26:00 3.39 17850 United Kingdom 2010-12-01 2010-12-01 20.34
# 处理完取消订单后在看一下价格为0的订单
query_free = mydata_success['UnitPrice'] == 0
mydata_free = mydata_success.loc[query_free == True,:].copy()
mydata_not_free = mydata_success.loc[-(query_free == True),:].copy()
mydata_not_free.describe()
Quantity UnitPrice CustomerID SumCost
count 524880.000000 524880.000000 524880.000000 524880.000000
mean 10.616564 3.880407 11437.688582 20.233171
std 156.279734 42.063124 6799.537329 272.552926
min 1.000000 -11062.060000 0.000000 -11062.060000
25% 1.000000 1.250000 0.000000 3.900000
50% 4.000000 2.080000 14350.000000 9.920000
75% 11.000000 4.130000 16245.000000 17.700000
max 80995.000000 13541.330000 18287.000000 168469.600000
# 根据结果发现仍然存在价格为负的订单:
query_mzero =mydata_not_free['UnitPrice'] <0
mydata_mzero = mydata_not_free.loc[query_mzero==True,:]
mydata_mzero
InvoiceNo StockCode Description Quantity InvoiceDate UnitPrice CustomerID Country date month SumCost
299983 A563186 B Adjust bad debt 1 2011-08-12 14:51:00 -11062.06 0 United Kingdom 2011-08-12 2011-08-01 -11062.06
299984 A563187 B Adjust bad debt 1 2011-08-12 14:52:00 -11062.06 0 United Kingdom 2011-08-12 2011-08-01 -11062.06
# 发现订单编号是以A开头的,表示坏账,应该删除
mydata_finall = mydata_not_free.loc[-(query_mzero) == True,:]
mydata_finall
InvoiceNo StockCode Description Quantity InvoiceDate UnitPrice CustomerID Country date month SumCost
0 536365 85123A WHITE HANGING HEART T-LIGHT HOLDER 6 2010-12-01 08:26:00 2.55 17850 United Kingdom 2010-12-01 2010-12-01 15.30
1 536365 71053 WHITE METAL LANTERN 6 2010-12-01 08:26:00 3.39 17850 United Kingdom 2010-12-01 2010-12-01 20.34
2 536365 84406B CREAM CUPID HEARTS COAT HANGER 8 2010-12-01 08:26:00 2.75 17850 United Kingdom 2010-12-01 2010-12-01 22.00
3 536365 84029G KNITTED UNION FLAG HOT WATER BOTTLE 6 2010-12-01 08:26:00 3.39 17850 United Kingdom 2010-12-01 2010-12-01 20.34
4 536365 84029E RED WOOLLY HOTTIE WHITE HEART. 6 2010-12-01 08:26:00 3.39 17850 United Kingdom 2010-12-01 2010-12-01 20.34
5 536365 22752 SET 7 BABUSHKA NESTING BOXES 2 2010-12-01 08:26:00 7.65 17850 United Kingdom 2010-12-01 2010-12-01 15.30
6 536365 21730 GLASS STAR FROSTED T-LIGHT HOLDER 6 2010-12-01 08:26:00 4.25 17850 United Kingdom 2010-12-01 2010-12-01 25.50
7 536366 22633 HAND WARMER UNION JACK 6 2010-12-01 08:28:00 1.85 17850 United Kingdom 2010-12-01 2010-12-01 11.10
8 536366 22632 HAND WARMER RED POLKA DOT 6 2010-12-01 08:28:00 1.85 17850 United Kingdom 2010-12-01 2010-12-01 11.10
9 536367 84879 ASSORTED COLOUR BIRD ORNAMENT 32 2010-12-01 08:34:00 1.69 13047 United Kingdom 2010-12-01 2010-12-01 54.08
10 536367 22745 POPPY'S PLAYHOUSE BEDROOM 6 2010-12-01 08:34:00 2.10 13047 United Kingdom 2010-12-01 2010-12-01 12.60
11 536367 22748 POPPY'S PLAYHOUSE KITCHEN 6 2010-12-01 08:34:00 2.10 13047 United Kingdom 2010-12-01 2010-12-01 12.60
12 536367 22749 FELTCRAFT PRINCESS CHARLOTTE DOLL 8 2010-12-01 08:34:00 3.75 13047 United Kingdom 2010-12-01 2010-12-01 30.00
13 536367 22310 IVORY KNITTED MUG COSY 6 2010-12-01 08:34:00 1.65 13047 United Kingdom 2010-12-01 2010-12-01 9.90
14 536367 84969 BOX OF 6 ASSORTED COLOUR TEASPOONS 6 2010-12-01 08:34:00 4.25 13047 United Kingdom 2010-12-01 2010-12-01 25.50
15 536367 22623 BOX OF VINTAGE JIGSAW BLOCKS 3 2010-12-01 08:34:00 4.95 13047 United Kingdom 2010-12-01 2010-12-01 14.85
16 536367 22622 BOX OF VINTAGE ALPHABET BLOCKS 2 2010-12-01 08:34:00 9.95 13047 United Kingdom 2010-12-01 2010-12-01 19.90
17 536367 21754 HOME BUILDING BLOCK WORD 3 2010-12-01 08:34:00 5.95 13047 United Kingdom 2010-12-01 2010-12-01 17.85
18 536367 21755 LOVE BUILDING BLOCK WORD 3 2010-12-01 08:34:00 5.95 13047 United Kingdom 2010-12-01 2010-12-01 17.85
19 536367 21777 RECIPE BOX WITH METAL HEART 4 2010-12-01 08:34:00 7.95 13047 United Kingdom 2010-12-01 2010-12-01 31.80
20 536367 48187 DOORMAT NEW ENGLAND 4 2010-12-01 08:34:00 7.95 13047 United Kingdom 2010-12-01 2010-12-01 31.80
21 536368 22960 JAM MAKING SET WITH JARS 6 2010-12-01 08:34:00 4.25 13047 United Kingdom 2010-12-01 2010-12-01 25.50
22 536368 22913 RED COAT RACK PARIS FASHION 3 2010-12-01 08:34:00 4.95 13047 United Kingdom 2010-12-01 2010-12-01 14.85
23 536368 22912 YELLOW COAT RACK PARIS FASHION 3 2010-12-01 08:34:00 4.95 13047 United Kingdom 2010-12-01 2010-12-01 14.85
24 536368 22914 BLUE COAT RACK PARIS FASHION 3 2010-12-01 08:34:00 4.95 13047 United Kingdom 2010-12-01 2010-12-01 14.85
25 536369 21756 BATH BUILDING BLOCK WORD 3 2010-12-01 08:35:00 5.95 13047 United Kingdom 2010-12-01 2010-12-01 17.85
26 536370 22728 ALARM CLOCK BAKELIKE PINK 24 2010-12-01 08:45:00 3.75 12583 France 2010-12-01 2010-12-01 90.00
27 536370 22727 ALARM CLOCK BAKELIKE RED 24 2010-12-01 08:45:00 3.75 12583 France 2010-12-01 2010-12-01 90.00
28 536370 22726 ALARM CLOCK BAKELIKE GREEN 12 2010-12-01 08:45:00 3.75 12583 France 2010-12-01 2010-12-01 45.00
29 536370 21724 PANDA AND BUNNIES STICKER SHEET 12 2010-12-01 08:45:00 0.85 12583 France 2010-12-01 2010-12-01 10.20
... ... ... ... ... ... ... ... ... ... ... ...
541879 581585 22726 ALARM CLOCK BAKELIKE GREEN 8 2011-12-09 12:31:00 3.75 15804 United Kingdom 2011-12-09 2011-12-01 30.00
541880 581585 22727 ALARM CLOCK BAKELIKE RED 4 2011-12-09 12:31:00 3.75 15804 United Kingdom 2011-12-09 2011-12-01 15.00
541881 581585 16016 LARGE CHINESE STYLE SCISSOR 10 2011-12-09 12:31:00 0.85 15804 United Kingdom 2011-12-09 2011-12-01 8.50
541882 581585 21916 SET 12 RETRO WHITE CHALK STICKS 24 2011-12-09 12:31:00 0.42 15804 United Kingdom 2011-12-09 2011-12-01 10.08
541883 581585 84692 BOX OF 24 COCKTAIL PARASOLS 25 2011-12-09 12:31:00 0.42 15804 United Kingdom 2011-12-09 2011-12-01 10.50
541884 581585 84946 ANTIQUE SILVER T-LIGHT GLASS 12 2011-12-09 12:31:00 1.25 15804 United Kingdom 2011-12-09 2011-12-01 15.00
541885 581585 21684 SMALL MEDINA STAMPED METAL BOWL 12 2011-12-09 12:31:00 0.85 15804 United Kingdom 2011-12-09 2011-12-01 10.20
541886 581585 22398 MAGNETS PACK OF 4 SWALLOWS 12 2011-12-09 12:31:00 0.39 15804 United Kingdom 2011-12-09 2011-12-01 4.68
541887 581585 23328 SET 6 SCHOOL MILK BOTTLES IN CRATE 4 2011-12-09 12:31:00 3.75 15804 United Kingdom 2011-12-09 2011-12-01 15.00
541888 581585 23145 ZINC T-LIGHT HOLDER STAR LARGE 12 2011-12-09 12:31:00 0.95 15804 United Kingdom 2011-12-09 2011-12-01 11.40
541889 581585 22466 FAIRY TALE COTTAGE NIGHT LIGHT 12 2011-12-09 12:31:00 1.95 15804 United Kingdom 2011-12-09 2011-12-01 23.40
541890 581586 22061 LARGE CAKE STAND HANGING STRAWBERY 8 2011-12-09 12:49:00 2.95 13113 United Kingdom 2011-12-09 2011-12-01 23.60
541891 581586 23275 SET OF 3 HANGING OWLS OLLIE BEAK 24 2011-12-09 12:49:00 1.25 13113 United Kingdom 2011-12-09 2011-12-01 30.00
541892 581586 21217 RED RETROSPOT ROUND CAKE TINS 24 2011-12-09 12:49:00 8.95 13113 United Kingdom 2011-12-09 2011-12-01 214.80
541893 581586 20685 DOORMAT RED RETROSPOT 10 2011-12-09 12:49:00 7.08 13113 United Kingdom 2011-12-09 2011-12-01 70.80
541894 581587 22631 CIRCUS PARADE LUNCH BOX 12 2011-12-09 12:50:00 1.95 12680 France 2011-12-09 2011-12-01 23.40
541895 581587 22556 PLASTERS IN TIN CIRCUS PARADE 12 2011-12-09 12:50:00 1.65 12680 France 2011-12-09 2011-12-01 19.80
541896 581587 22555 PLASTERS IN TIN STRONGMAN 12 2011-12-09 12:50:00 1.65 12680 France 2011-12-09 2011-12-01 19.80
541897 581587 22728 ALARM CLOCK BAKELIKE PINK 4 2011-12-09 12:50:00 3.75 12680 France 2011-12-09 2011-12-01 15.00
541898 581587 22727 ALARM CLOCK BAKELIKE RED 4 2011-12-09 12:50:00 3.75 12680 France 2011-12-09 2011-12-01 15.00
541899 581587 22726 ALARM CLOCK BAKELIKE GREEN 4 2011-12-09 12:50:00 3.75 12680 France 2011-12-09 2011-12-01 15.00
541900 581587 22730 ALARM CLOCK BAKELIKE IVORY 4 2011-12-09 12:50:00 3.75 12680 France 2011-12-09 2011-12-01 15.00
541901 581587 22367 CHILDRENS APRON SPACEBOY DESIGN 8 2011-12-09 12:50:00 1.95 12680 France 2011-12-09 2011-12-01 15.60
541902 581587 22629 SPACEBOY LUNCH BOX 12 2011-12-09 12:50:00 1.95 12680 France 2011-12-09 2011-12-01 23.40
541903 581587 23256 CHILDRENS CUTLERY SPACEBOY 4 2011-12-09 12:50:00 4.15 12680 France 2011-12-09 2011-12-01 16.60
541904 581587 22613 PACK OF 20 SPACEBOY NAPKINS 12 2011-12-09 12:50:00 0.85 12680 France 2011-12-09 2011-12-01 10.20
541905 581587 22899 CHILDREN'S APRON DOLLY GIRL 6 2011-12-09 12:50:00 2.10 12680 France 2011-12-09 2011-12-01 12.60
541906 581587 23254 CHILDRENS CUTLERY DOLLY GIRL 4 2011-12-09 12:50:00 4.15 12680 France 2011-12-09 2011-12-01 16.60
541907 581587 23255 CHILDRENS CUTLERY CIRCUS PARADE 4 2011-12-09 12:50:00 4.15 12680 France 2011-12-09 2011-12-01 16.60
541908 581587 22138 BAKING SET 9 PIECE RETROSPOT 3 2011-12-09 12:50:00 4.95 12680 France 2011-12-09 2011-12-01 14.85
524878 rows × 11 columns
# 此时的数据集应景没有负值和免费值了,数据清洗工作到此告一段落。
# 保存
mydata_finall.to_csv(r'D:学习dataA
ew_OnlineRetail.csv')