强化学习-下棋系列

整理强化学习知识，实践出真知。

本篇文章只贴代码，写了一个 game：五子棋环境，一个 player : 随机下棋，一个 WuziBoard : 棋盘可视化

效果图：


绘制棋盘：

#!/usr/bin/env python
# -*- coding:utf-8 -*-
# Author:Hiuhung Wan

import turtle
from enum import Enum



class PotColor(Enum):
    Black = 1
    White = 2

import time
class WuziBoard(object):
    def __init__(self, RowNum):
        turtle.speed(9)
        turtle.hideturtle()
        self.RowNum = RowNum
        self.halfDim = 500 /(RowNum-1)/2.0
        pass

    def drawBoard(self, ActionHis =None):
        turtle.screensize(400, 400, "white")
        turtle.title('五子棋')
        turtle.home()
        turtle.speed(0)
        time.sleep(5)

        for i in range( self.RowNum ):
            
            x = 0 - 250 + i * ( self.halfDim ) *2
            y = 0 -250
            turtle.penup()
            turtle.setpos(x, y)
            turtle.pendown()
            turtle.goto(x, y + 500)
            
            

        for i in range( self.RowNum ):
            x = 0 - 250
            y = 0 -250 + i * ( self.halfDim ) *2
            turtle.penup()
            turtle.setpos(x, y)
            turtle.pendown()
            turtle.setpos(x+500, y)

        if( ActionHis != None):
            self.drawNow( ActionHis )

        turtle.done()

        pass

    def action2potxy(self, action):
        x = 0 - 250 + action[0]*self.halfDim*2
        y = 0 - 250 + action[1]*self.halfDim*2
        return x,y

    def drawNow(self, RunAction ):

        for potsite in RunAction:

            x,y = self.action2potxy( ( potsite[0], potsite[1] ) )
            turtle.penup()
            turtle.setpos(x, y)
            turtle.pendown()
            if( potsite[2] != PotColor.Black ) :
                turtle.dot(10,"Red")
            else:
                turtle.dot(10, "Black")

            if( potsite ==  RunAction[len(RunAction) - 1 ] ):
                if (potsite[2] != PotColor.Black):
                    turtle.dot(20, "Red")
                else:
                    turtle.dot(20, "Black")

        pass

    def drawAction(self):
        pass

    pass

def main():
    ActionHis =[ (0,1,1), (1,1,2), (5,1,1),  ]
    wuziBoard = WuziBoard( 6 )
    wuziBoard.drawBoard( ActionHis )

    pass


if __name__ == "__main__":
    #test()
    main()

游戏代码：

class GameFivePot(object):

    def __init__(self):
        self.potCount  =0;
        self.AllAction    =[]
        self.ActionHis = []

        for x in range(ROW_NUM):
                for y in range(ECO_NUM):
                    self.AllAction     += [(x,y)]

        self.AvailAction = self.AllAction
        self.RunAction = [[0 for col in range(ROW_NUM)] for row in range(ECO_NUM)]
      
    def getActions(self):
        return self.AvailAction
        
    def getRunAction(self):      
        return self.RunAction

     def getActionHis(self):
        return  self.ActionHis
 
    def is_over(self, action, potColor):
        x = action[0]
        y = action[1]
        dimCount =[1,1,1,1]
        
        #���� xiang qian
        for x1 in range(x+1, x+5):

            if(x1 >= ROW_NUM ):
                break
            if( self.RunAction[x1][y] == potColor ):
                dimCount[0] +=1 
            else:
                break
                
        #- xiang hou
        for x1 in range(x-1, x-5, -1 ):
            if(x1 < 0 ):
                break
                
            if( self.RunAction[x1][y] == potColor ):
                dimCount[0] +=1 
            else:
                break
                
        if( dimCount[0] >= 5 ):
            return True,True
            
        #���� ����
        for y1 in range(y+1, y+5):
            if(y1 >= ROW_NUM ):
                break
                
            if( self.RunAction[x][y1] == potColor ):
                dimCount[1] +=1 
            else:
                break
                
        #- ����
        for y1 in range(y-1, y-5, -1 ):
            if(y1 < 0 ):
                break
                
            if( self.RunAction[x][y1] == potColor ):
                dimCount[1] +=1 
            else:
                break
                
        if( dimCount[1] >= 5 ):
            return True,True
                            
        #-��б ����
        for offset in range(1 ,5):
            x1 = x+offset
            y1 = y+offset
            
            if(y1 >= ROW_NUM or x1 >= ROW_NUM  ):
                break
                
            if( self.RunAction[x1][y1] == potColor ):
                dimCount[2] +=1 
            else:
                break
                
        #- ����
        for offset in range(-1, -5, -1 ):
            x1 = x+offset
            y1 = y+offset            
            if(y1 < 0 or x1<0):
                break
                
            if( self.RunAction[x1][y1] == potColor ):
                dimCount[2] +=1 
            else:
                break
                
        if( dimCount[2] >= 5 ):
            return True,True
                
        #-��б ���� 
        for offset in range(1 ,5):
            x1 = x+offset
            y1 = y-offset
            
            if(y1 < 0 or x1 >= ROW_NUM  ):
                break
                
            if( self.RunAction[x1][y1] == potColor ):
                dimCount[3] +=1 
            else:
                break
                
        #- ���� 
        for offset in range(-1, -5, -1 ):
            x1 = x+offset
            y1 = y-offset            
            if(y1 >= ROW_NUM  or x1<0 ):
                break
                
            if( self.RunAction[x1][y1] == potColor ):
                dimCount[3] +=1 
            else:
                break
                
        if( dimCount[3] >= 5 ):
            return True,True
            
        if( len(self.AvailAction) == 0 ):
            return True,False
        
        return False,False
        pass   
            
            
    def action( self, action,potColor ):
        self.potCount +=1
        self.ActionHis += [  ( action[0], action[1], potColor )  ]
        self.AllAction.remove(  action  )

        self.RunAction[ action[0] ][  action[1] ] =potColor

        isOver, isWin = self.is_over(action, potColor)
        return self.RunAction,  isOver, isWin

    def __repr__(self):
        return "Game step count: {}, AvailAction len: {},  ".format( self.potCount,     len(self.AvailAction) )

玩家代码：

class GamePlayer(object):
    
    def __init__(self, potColor ):
        self.actionHis = []
        self.color = potColor
      
      
    def getActionHis(self):
        return self.actionHis
          
    def play(self, game):
        actions = game.getActions()
        action = self.choiceActions( actions )
        self.actionHis = self.actionHis +[action]

        gameInfo , isOver, isWin = game.action(action , self.color )
        
        return gameInfo , isOver, isWin
        
        pass
              
    def choiceActions( self, actions ):
        action = random.choice( actions  )
        return action
        
        
        
        
    def __repr__(self):
        return "color: {}, actionHis: {},  ".format(    self.color, self.actionHis )

github 代码地址：

https://github.com/rehylas/play_chess

ps: 下一篇文章，用MCTS 相互博弈