Python 使用 Vaex 处理海量数据

Vaex :https://vaex.io/docs/examples.html

Examples — vaex 4.3.0 documentation

使用中的问题

#coding:utf-8


import python_utils
import vaex

from vaex import groupby,grids,utils,legacy,selections
import numpy as np
import pandas as pa
from pandas import Series,DataFrame

# df = vaex.open("C:\Users\Anchnet\Desktop\ttt\aa.csv_chunk_0..hdf5")
# df_p = pa.read_csv("C:\Users\Anchnet\Desktop\ttt\aa.csv")
# print(df_p.count())
#
# print(df_p)
# print(pa.get_versions())

df = vaex.read_csv("C:\Users\Anchnet\Desktop\ttt\aa.csv")
# assert isinstance(df, vaex.groupby)
# # print(df)
# df_goup=  df.sort

# assert isinstance(df, vaex.groupby)
# print( df_goup)

# print(df)
# df
# a= vgroup["企业名称"]  #  df.groupby(df["k"])
df_a= df[df["e"] =="化纤针织内裤"]

print(df.select(df["a"]=="义乌市智洋商品采购有限公司"))

df_s= df.sort('e', ascending=False)  # type: vaex.dataframe.DataFrameLocal
# print(type(df))
print(df_s.count())
# assert isinstance(df, vaex.dataframe.DataFrameLocal)
# print(type(df))

print(df_a)
# dv_group = df_s.groupby(df_s['e'], agg=vaex.agg.sum(df_s['i']))
dv_group = df.groupby(df['i'], agg=vaex.agg.count(df['i']))
print( dv_group)

print(type(df))

# type: vaex.dataframe.DataFrameLocal

可以代码补全啦!!!!!

原文地址:https://www.cnblogs.com/mrguoguo/p/14927935.html