INT104-lab7

Python 信息熵-条件熵计算

 1 import csv
 2 import numpy as np
 3 
 4 
 5 def read(path: str) -> tuple:
 6     with open(path, 'r') as f:
 7         text = csv.reader(f)
 8         A = []
 9         for row in text:
10             A.append(row)
11     return A, len(A), len(A[0])
12 
13 
14 def h(p: float) -> float:
15     return -p * np.log2(p)
16 
17 
18 def Hc(A: list, N: int, M: int) -> float:
19     count = 0
20     for a in A:
21         count += int(a[M - 1])
22     p = count / N
23     return h(p) + h(1 - p)
24 
25 
26 def Hcw(A: list, w: int, N: int, M: int) -> float:
27     subset0 = []
28     subset1 = []
29     for a in A:
30         if int(a[w]) == 0:
31             subset0.append(a)
32         else:
33             subset1.append(a)
34     p0 = (len(subset0) / N)
35     p1 = (len(subset1) / N)
36     return p0 * Hc(subset0, N, M) + p1 * Hc(subset1, N, M)
37 
38 
39 if __name__ == '__main__':
40     path = 'binary_data.csv'
41     A, N, M = read(path)
42     for i in range(5):
43         IG = Hc(A, N, M) - Hcw(A, i, N, M)
44         print("IG(w = %d) = %.5f" % (i, IG))
~~Jason_liu O(∩_∩)O
原文地址:https://www.cnblogs.com/JasonCow/p/14751272.html