实现Apriori算法(python)

  1 # coding: utf-8
  2 
  3 # 利用python实现apriori算法
  4 
  5 # In[1]:
  6 
  7 
  8 #导入需要的库
  9 from numpy import *
 10 
 11 
 12 # In[2]:
 13 
 14 
 15 def loadDataSet():
 16     return [[1,3,4],[2,3,5],[1,2,3,5],[2,5]]
 17 
 18 
 19 # In[3]:
 20 
 21 
 22 def createC1(dataSet):
 23     C1=[]
 24     for transaction in dataSet:
 25         for item in transaction:
 26             if not [item] in C1:
 27                 C1.append([item])
 28     C1.sort()
 29     return map(frozenset,C1)
 30 
 31 
 32 # In[4]:
 33 
 34 
 35 #计算Ck在数据集D中的支持度,并返回支持度大于minSupport的数据集
 36 def scanD(D,Ck,minSupport):
 37     ssCnt={}
 38     for tid in D:
 39         for can in Ck:
 40             if can.issubset(tid):
 41                 if can not in ssCnt.keys():
 42                     ssCnt[can]=1
 43                 else :
 44                     ssCnt[can]+=1
 45     numItems=float(len(D))
 46     retList=[]
 47     supportData={}
 48     for key in ssCnt:
 49         support=ssCnt[key]/numItems
 50         if support>= minSupport:
 51             retList.insert(0,key)
 52         supportData[key]=support
 53     return retList,supportData
 54 
 55 
 56 # In[15]:
 57 
 58 
 59 def aprioriGen(Lk,k):
 60     retList=[]
 61     lenLk=len(Lk)
 62     for i in range(lenLk):
 63         for j in range(i+1,lenLk):
 64             L1=list(Lk[i])[:k-2]
 65             L2=list(Lk[j])[:k-2]
 66             L1.sort()
 67             L2.sort()
 68             if L1==L2:
 69                 retList.append(Lk[i] | Lk[j])
 70     return retList
 71         
 72 
 73 
 74 # In[14]:
 75 
 76 
 77 def apriori(dataSet, minSupport=0.5):
 78     C1=createC1(dataSet)
 79     D=list(map(set,dataSet))
 80     print('D:',D)
 81     L1,supportData= scanD(D,C1,minSupport)
 82     L=[L1]
 83     k=2
 84     while (len(L[k-2])>0):
 85         Ck=aprioriGen(L[k-2], k)
 86         Lk,supK= scanD(D,Ck,minSupport)
 87         supportData.update(supK)
 88         if len(Lk)==0:
 89             break
 90         L.append(Lk)
 91         k+=1
 92     return L,supportData
 93 
 94 
 95 # In[19]:
 96 
 97 
 98 def calConf(freqSet,H,supportData,brl,minConf=0.7):
 99     prunedH=[]
100     for conseq in H:
101         conf=supportData[freqSet]/supportData[freqSet-conseq]
102         if conf >= minConf:
103             print(freqSet-conseq, '-->',conseq,'conf',conf)
104             brl.append((freqSet-conseq,conseq,conf))
105             prunedH.append(conseq)
106     return prunedH
107 
108 
109 # In[21]:
110 
111 
112 def rulesFromConseq(freqSet,H,supportData,brl,minConf=0.7):
113     m=len(H[0])
114     if(len(freqSet)>(m+1)):
115         Hmpl=aprioriGen(H,m+1)
116         Hmpl=calConf(freqSet,Hmpl,supportData,brl,minConf)
117         print('Hmpl=',Hmpl)
118         print('len(Hmpl)=',len(Hmpl),'len(freqSet)=',len(freqSet))
119         if(len(Hmpl)>1):
120             rulesFromConseq(freqSet,Hmpl,supportData,brl,minConf)
121 
122 
123 # In[9]:
124 
125 
126 def generateRules(L,supportData,minConf=0.7):
127     bigRuleList=[]
128     for i in range(1,len(L)):
129         for freqSet in L[i]:
130             H1=[frozenset([item]) for item in freqSet]
131             if(i>1):
132                 rulesFromConseq(freqSet,H1,supportData,bigRuleList,minConf)
133             else:
134                 calConf(freqSet,H1,supportData,bigRuleList,minConf)
135     return bigRuleList
136 
137 
138 # In[10]:
139 
140 
141 def testApriori():
142     dataSet=loadDataSet()
143     print('dataSet:',dataSet)
144     L1,supportData1=apriori(dataSet,minSupport=0.7)
145     print('L(0.7):',L1)
146     print('supportData(0.7):',supportData1)
147     print('------------------------------------------')
148     L2,supportData2=apriori(dataSet,minSupport=0.5)
149     print('L(0.5):',L2)
150     print('supportData(0.5:).supportData2')
151     print('------------------------------------------')
152 
153 
154 # In[11]:
155 
156 
157 def testGenerateRules():
158     dataSet=loadDataSet()
159     L1,supportData1=apriori(dataSet,minSupport=0.2)
160     print('L(0.2):',L1)
161     print('minSupport(0.2):',supportData1)
162     rules=generateRules(L1,supportData1,minConf=1.1)
163     print('Rules:',rules)
164 
165 
166 # In[12]:
167 
168 
169 def main():
170     testApriori()
171     testGenerateRules()
172 
173 
174 # In[22]:
175 
176 
177 if __name__=="__main__":
178     main()

参考:“机器学习实战-ApachCN”

原文地址:https://www.cnblogs.com/share-sjb/p/9977803.html