简单的入门2

import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
if __name__=="__main__":
    path="usagov_bitly_data2012-03-16-1331923249.txt"
    fp=open(path)
    records=[json.loads(line) for line in fp.readlines()]
    print(len(records))
    frame=pd.DataFrame(records)
    print(frame['tz'])
    clean_tz=frame['tz'].fillna('Missing')
    clean_tz[clean_tz=='']='Unknown'
    tz_counts=clean_tz.value_counts()
    print(tz_counts[:20])
    #tz_counts[:10].plot(kind='barh',rot=0)
    #plt.show()
    results=pd.Series([x.split()[0] for x in frame.a.dropna()])
    print(results[:5])
    cframe=frame[frame.a.notnull()]
    operating_system=np.where(cframe['a'].str.contains('Windows'),'Windows','not Windows')
    print(operating_system[:10])
    by_tz_os=cframe.groupby(['tz',operating_system])
    agg_counts=by_tz_os.size().unstack().fillna(0)
    print(agg_counts[:10])
    indexer=agg_counts.sum(1).argsort()
    print(indexer[:10])
    count_subset=agg_counts.take(indexer)[-10:]
    print(count_subset)
    #count_subset.plot(kind='barh',stacked=True)
    normed_subset=count_subset.div(count_subset.sum(1),axis=0)
    normed_subset.plot(kind='barh',stacked=True)
    plt.show()
    
    
    

原文地址:https://www.cnblogs.com/sklww/p/3655246.html