python系统学习:第五周之数据分析1

# 打开文件
path = 'D:\操作软件\pydata-book-2nd-edition\pydata-book-2nd-edition\datasets\bitly_usagov\example.txt'
# json格式处理
import json

records = [json.loads(line) for line in open(path)]
# 校验 print(records[0])
# tz时区计数
from pandas import DataFrame, Series
import pandas as pd
import numpy as np

# 处理
frame = DataFrame(records)
cframe = frame['tz'].fillna('MISS')
cframe[cframe == ''] == 'KONG'
tz_count = cframe.value_counts()
# 校验 print(tz_count[:20])
# 画图
import matplotlib as mpl
import pylab as pl

# 参数设定
tz_count[:20].plot(kind='barh', rot=20)
pl.xlabel(u'count')
pl.ylabel(u'type')
pl.title(u'tz_count')
# 校验 pl.show()
# 查看浏览器类型 print(records[0])
result = Series([x.split()[0] for x in frame.a.dropna()])
# print(result[:20])
liu_count = result.value_counts()
# 校验 print(liu_count[:20])
# 再次分组查看:是否是win print(records[0])
frame = DataFrame(records)
gframe = frame[frame.a.notnull()]
# 找出分组字段
by_column = np.where(gframe['a'].str.contains('Windows'),'Windows','Not Windows')
# 分组明细
by_detail = gframe.groupby(['tz', by_column])
# 分组计数
by_count = by_detail.size().unstack().fillna(0)
print(by_count[:20])
原文地址:https://www.cnblogs.com/niushichong/p/10331219.html