Bar Chart of Frequency of modals in different sections of the Brown Corpus

Natural Language Processing with Python

Chapter 4.8

 1 colors = 'rgbcmyk' # red, green, blue, cyan, magenta, yellow, black
 2     
 3 def bar_chart(categories, words, counts):
 4     "Plot a bar chart showing counts for each word by category"
 5     import pylab
 6     
 7     ind = pylab.arange(len(words))
 8     width = 0.1*1 / (len(categories) + 1)*10
 9     bar_groups = []
10     for c in range(len(categories)):
11         bars = pylab.bar(ind+c*width, counts[categories[c]], width,
12                         color=colors[c % len(colors)])
13         bar_groups.append(bars)
14     pylab.xticks(ind+width, words)
15     pylab.legend([b[0] for b in bar_groups], categories, loc='upper left')
16     pylab.ylabel('Frequency')
17     pylab.title('Frequency of Six Modal Verbs by Genre')
18     pylab.show()
19     
20 def test_bar_char():
21     genres = ['news', 'religion', 'hobbies', 'government', 'adventure']
22     modals = ['can', 'could', 'may', 'might', 'must', 'will']
23     cfdist = nltk.ConditionalFreqDist(
24                 (genre, word)
25                 for genre in genres
26                 for word in nltk.corpus.brown.words(categories=genre)
27                 if word in modals)
28     counts = {}
29     for genre in genres:
30         counts[genre] = [cfdist[genre][word] for word in modals]
31     bar_chart(genres, modals, counts)

修改了width,结果为:

原文地址:https://www.cnblogs.com/gui0901/p/4450611.html