算法笔记--极大极小搜索及alpha-beta剪枝

参考1:https://www.zhihu.com/question/27221568

参考2:https://blog.csdn.net/hzk_cpp/article/details/79275772

参考3:https://blog.csdn.net/BIT1120172185/article/details/80963609

极小极大搜索算法即minimax搜索算法

主要应用于零和博弈(非胜即负,如围棋,象棋,井子棋等),完全信息(玩家知道之前所有的步骤。象棋就是完全信息,因为玩家是交替着落子,且之前的步骤都能在棋盘上体现)

这个算法采用搜索算法递归实现,一层为先手,记为a, 一层为后手,记为b, 交替出现

对于最终局面,有一个分数(比如:先手胜分数为1, 平局分数为0,先手输分数为-1)

先手a想要让这个分数越大越好,后手b想要让这个分数越小越好,于是搜索到先手这一层,取最大返回,搜索到后手这一层,取最小返回

如下图:

于是伪代码为:

int MaxMin(position,int d)
{
    int bestvalue,value;
    if(game over)   //检查游戏是否结束 
        return evaluation(p);// 游戏结束,返回估值 
    if(depth<=0)    //检查是否是叶子节点 
        return evaluation(p);//叶子节点,返回估值 
    if(max)         //极大值点 
        bestvalue=-INFINTY;
    else            //极小值点 
        bestvalue=INFINTY;
    for(each possibly move m)
    {
        MakeMove(m);    //走棋 
        value=MaxMin(p,d-1);
        UnMakeMove(m);  //恢复当前局面 
        if(max)
            bestvalue=max(value,bestvalue);//取最大值 
        else
            bestvalue=min(value,bestvalue);//取最小值 
    }
    return bestvalue;
}

 关于alpha-beta剪枝:

如果当前层为取最小,如果取最小后比上一层当前最大值还小,则不需要往下搜索,因为上一层根本不会选择当前节点往下搜,还有更好的选择

同理,如果当前层为取最大,如果取最大后比上一层当前最小值还大,则不需要往下搜索,因为上一层根本不会选择当前节点往下搜

具体推荐看最上面的知乎链接点赞最多的回答。

alpha-beta剪枝后的伪代码:

int AlphaBeta(nPlay,nAlpha,nBeta)
{
    if(game over)
        return Eveluation;   //胜负已分,返回估值
    if(nPly==0)
        return  Eveluation;  //叶子节点返回估值
    if(Is Min Node)          //判断 节点类型 
    {        // 极小值节点
        for(each possible move m)
        {
            make move m;      //生成新节点
            score=AlphaBeta(nPly-1,nAlpha,nBeta)//递归搜索子节点
            unmake move m;//撤销搜索过的节点
            if(score<nBeta)
            {
                nBeta=score;//取极小值
                if(nAlpha>=nBeta)
                    return nAlpha;//alpha剪枝,抛弃后继节点 
             } 
        }
        return nBeta;//返回最小值 
    }
    else
    {//取极大值的节点 
        for(each possible move m)
        {
            make move m;      //生成新节点
            score=AlphaBeta(nPly-1,nAlpha,nBeta)//递归搜索子节点
            unmake move m;//撤销搜索过的节点
            if(score>nAlpha)
            {
                nAlpha=score;//取极小值
                if(nAlpha>=nBeta)
                    return nBeta;//nBeta剪枝,抛弃后继节点 
             } 
        }
        return nAlpha;//返回最小值 
    } 
} 

例题1:POJ - 1568

思路:井子棋下的步数小于等于4必平局

代码:

#pragma GCC optimize(2)
#pragma GCC optimize(3)
#pragma GCC optimize(4)
#include<cstdio>
#include<iostream>
using namespace std;
#define fi first
#define se second
#define pi acos(-(long double)1.0)
#define LL long long
//#define mp make_pair
#define pb push_back
#define ls rt<<1, l, m
#define rs rt<<1|1, m+1, r
#define ULL unsigned LL
#define pll pair<LL, LL>
#define pli pair<LL, int>
#define pii pair<int, int>
#define piii pair<pii, int>
#define pdd pair<long double, long double>
#define mem(a, b) memset(a, b, sizeof(a))
#define fio ios::sync_with_stdio(false);cin.tie(0);cout.tie(0);
#define fopen freopen("in.txt", "r", stdin);freopen("out.txt", "w", stout);
//head

char s[10], mp[10][10]; 
int cnt = 0, ansx, ansy;
int check(char c) {
    for (int i = 0; i < 4; i++) {
        int a = 0;
        for (int j = 0; j < 4; j++) {
            if(mp[i][j] == c) a++;
        }
        if(a == 4) return 1;
        a = 0;
        for (int j = 0; j < 4; j++) {
            if(mp[j][i] == c) a++;
        }
        if(a == 4) return 1;
    }
    int a = 0;
    for (int i = 0; i < 4; i++) if(mp[i][i] == c) a++;
    if(a == 4) return 1;
    a = 0;    
    for (int i = 0; i < 4; i++) if(mp[i][3-i] == c) a++;
    if(a == 4) return 1;
    return 0;
}
int dfs(int step, int a, int b) {
    if((cnt-step)%2 == 0) {
        int tmp = check('o');
        if(tmp || step == 0) return -tmp;
    }
    else {
        int tmp = check('x');
        if(tmp || step == 0) return tmp;
    }
    if((cnt-step)%2 == 0) {
        for (int i = 0; i < 4; i++) {
            for (int j = 0; j < 4; j++) {
                if(mp[i][j] == '.') {
                    mp[i][j] = 'x';
                    int tmp = dfs(step-1, a, b);
                    mp[i][j] = '.';
                    if(tmp >= a) {
                        if(step == cnt) {
                            ansx = i;
                            ansy = j;
                        } 
                        a = tmp;
                        if(b <= a) return b;
                    }
                }        
            }
        }
        return a;
    }
    else {
        for (int i = 0; i < 4; i++) {
            for (int j = 0; j < 4; j++) {
                if(mp[i][j] == '.') {
                    mp[i][j] = 'o';
                    int tmp = dfs(step-1, a, b);
                    mp[i][j] = '.';
                    if(tmp <= b) {
                        b = tmp;
                        if(a >= b) return a;
                    }
                }
            }
        }
        return b;
    }
}
int main() {
    while(~scanf("%s", s)) {
        if(s[0] == '$') return 0;
        for (int i = 0; i < 4; i++) scanf("%s", mp[i]);
        cnt = 0;
        for (int i = 0; i < 4; i++) for (int j = 0; j < 4; j++) if(mp[i][j] == '.') cnt++; 
        if(cnt >= 12) {
            printf("#####
");
            continue;
        }
        int ans = dfs(cnt, -1, 1);    
        if(ans == 1) printf("(%d,%d)
", ansx, ansy);
        else printf("#####
");
    }
    return 0;
}
View Code

例题2:POJ - 1085

思路1:记忆化搜索dp, dp[s]表示从状态s出发先手所能获得的最大利益。

特点:空间大,时间短

代码:

#pragma GCC optimize(2)
#pragma GCC optimize(3)
#pragma GCC optimize(4)
#include<cstdio>
#include<iostream>
#include<cstring>
#include<algorithm>
#include<cmath>
#include<climits> 
using namespace std;
#define fi first
#define se second
#define pi acos(-1.0)
#define LL long long
//#define mp make_pair
#define pb push_back
#define ls rt<<1, l, m
#define rs rt<<1|1, m+1, r
#define ULL unsigned LL
#define pll pair<LL, LL>
#define pli pair<LL, int>
#define pii pair<int, int>
#define piii pair<pii, int>
#define pdd pair<long double, long double>
#define mem(a, b) memset(a, b, sizeof(a))
#define fio ios::sync_with_stdio(false);cin.tie(0);cout.tie(0);
#define fopen freopen("in.txt", "r", stdin);freopen("out.txt", "w", stout);
//head

int line[18][2] = {{1, 2}, {1, 3}, {2, 3}, {2, 4}, {2, 5}, {4, 5}, {3, 5}, {3, 6}, {5, 6}, {4, 7}, {4, 8}, {7, 8}, {5, 8}, {5, 9}, {8, 9}, {6, 9}, {6, 10}, {9, 10}};
int tri[9][3] = {{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, {9, 10, 11}, {12, 13, 14}, {15, 16, 17}, {2, 4, 6}, {5, 10, 12}, {8, 13, 15}};
int num[11][11];
int dp[1<<20];
int count(int s) {
    int cnt = 0;
    for (int i = 0; i < 9; i++) if((s&(1<<tri[i][0])) && (s&(1<<tri[i][1])) && (s&(1<<tri[i][2]))) cnt++;
    return cnt;
}
int dfs(int s) {
    if(~dp[s]) return dp[s];
    if(__builtin_popcount(s) == 18) return 0;
    int prenum = count(s);
    int ans = INT_MIN;
    for(int i = 0; i < 18; i++) {
        if(!(s&(1<<i))) {
            int nownum = count(s|(1<<i));
            if(nownum == prenum) {
                ans = max(ans, -dfs(s|1<<i));
            }
            else {
                ans = max(ans, nownum - prenum + dfs(s|1<<i));
            }
        }
    }  
    return dp[s] = ans;
}
int main() {
    int T, m, u, v;
    for (int i = 0; i < 18; i++) num[line[i][0]][line[i][1]] = num[line[i][1]][line[i][0]] = i;
    mem(dp, -1);
    scanf("%d", &T);
    for(int cs = 1; cs <= T; cs++) {
        int st = 0;
        scanf("%d", &m);
        int prenum = 0, nownum = 0;
        int cnt = 0, step = 0;
        for (int i = 1; i <= m; i++) {
            scanf("%d %d", &u, &v);
            st |= 1<<num[u][v];
            nownum = count(st);
            if(nownum > prenum) {
                if(step%2 == 0) cnt += nownum - prenum;
                else cnt -= nownum - prenum;
            } 
            else step++; 
            prenum = nownum;
        }    
        if(step%2 == 0) cnt += dfs(st);
        else cnt -= dfs(st);
        if(cnt > 0) printf("Game %d: A wins.
", cs);
        else printf("Game %d: B wins.
", cs);
    }
    return 0;
}
View Code

思路2:极大极小搜索+alpha-beta剪枝

特点:空间小,时间长

代码:

#pragma GCC optimize(2)
#pragma GCC optimize(3)
#pragma GCC optimize(4)
#include<cstdio>
#include<iostream>
#include<cstring>
#include<algorithm>
#include<cmath>
#include<climits> 
using namespace std;
#define fi first
#define se second
#define pi acos(-1.0)
#define LL long long
//#define mp make_pair
#define pb push_back
#define ls rt<<1, l, m
#define rs rt<<1|1, m+1, r
#define ULL unsigned LL
#define pll pair<LL, LL>
#define pli pair<LL, int>
#define pii pair<int, int>
#define piii pair<pii, int>
#define pdd pair<long double, long double>
#define mem(a, b) memset(a, b, sizeof(a))
#define fio ios::sync_with_stdio(false);cin.tie(0);cout.tie(0);
#define fopen freopen("in.txt", "r", stdin);freopen("out.txt", "w", stout);
//head

int line[18][2] = {{1, 2}, {1, 3}, {2, 3}, {2, 4}, {2, 5}, {4, 5}, {3, 5}, {3, 6}, {5, 6}, {4, 7}, {4, 8}, {7, 8}, {5, 8}, {5, 9}, {8, 9}, {6, 9}, {6, 10}, {9, 10}};
int tri[9][3] = {{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, {9, 10, 11}, {12, 13, 14}, {15, 16, 17}, {2, 4, 6}, {5, 10, 12}, {8, 13, 15}};
int num[11][11];
int count(int s) {
    int cnt = 0;
    for (int i = 0; i < 9; i++) if((s&(1<<tri[i][0])) && (s&(1<<tri[i][1])) && (s&(1<<tri[i][2]))) cnt++;
    return cnt;
}
int dfs(int s, int a, int b, int alpha, int beta, int step) {
    if(__builtin_popcount(s) == 18) {
        if(a > b) return 1;
        else return -1;
    }
    int prenum = count(s);
    for(int i = 0; i < 18; i++) {
        if(!(s&(1<<i))) {
            int nownum = count(s|(1<<i));
            if(nownum == prenum) {
                if(step%2 == 0) {
                    int tmp = dfs(s|1<<i, a, b, alpha, beta, step+1);    
                    if(tmp >= alpha) {
                        alpha = tmp;
                        if(alpha >= beta) return alpha;
                    }
                }
                else {
                    int tmp = dfs(s|1<<i, a, b, alpha, beta, step+1);    
                    if(tmp <= beta) {
                        beta = tmp;
                        if(alpha >= beta) return beta;
                    }
                }
            }
            else {
                if(step%2 == 0) {
                    int tmp = dfs(s|1<<i, a+nownum-prenum, b, alpha, beta, step);    
                    if(tmp >= alpha) {
                        alpha = tmp;
                        if(alpha >= beta) return alpha;
                    }
                }
                else {
                    int tmp = dfs(s|1<<i, a, b+nownum-prenum, alpha, beta, step);    
                    if(tmp <= beta) {
                        beta = tmp;
                        if(alpha >= beta) return beta;
                    }
                }
            }
        }
    }  
    if(step%2 == 0) return alpha;
    else return beta;
}
int main() {
    int T, m, u, v;
    for (int i = 0; i < 18; i++) num[line[i][0]][line[i][1]] = num[line[i][1]][line[i][0]] = i;
    scanf("%d", &T);
    for(int cs = 1; cs <= T; cs++) {
        int st = 0;
        scanf("%d", &m);
        int prenum = 0, nownum = 0;
        int step = 0, a = 0, b = 0;
        for (int i = 1; i <= m; i++) {
            scanf("%d %d", &u, &v);
            st |= 1<<num[u][v];
            nownum = count(st);
            if(nownum > prenum) {
                if(step%2 == 0) a += nownum - prenum;
                else b += nownum - prenum;
            } 
            else step++; 
            prenum = nownum;
        }    
        int cnt = dfs(st, a, b, -1, 1, step);
        if(cnt > 0) printf("Game %d: A wins.
", cs);
        else printf("Game %d: B wins.
", cs);
    }
    return 0;
}
View Code
原文地址:https://www.cnblogs.com/widsom/p/10153159.html