bzoj4720

期望dp

n久以前做过,再做一遍

你只能决定决策,不能决定结果,这是这道题的关键,因为我们换了教室不一定成功,所以我们应该这样设dp状态,dp[i][j][k],第i天,换j次,换没换,转移:

dp[i][j][0] = max(dp[i-1][j][0] + dis[c[i-1]][c[i]], dp[i-1][j-1][1] + dis[c[i-1]][c[i]]*(1.0-k[i]) + dis[d[i-1]][c[i]] * k[i]) 之前也不换,那么只可能从c[i-1]转移过来,之前申请了,那么有可能成功也可能没成功,于是就是上次的期望值+dis换*k[i]+dis不换*(1.0-k[i]),就是概率乘上值的和,这就是期望,然后下面具体看代码。

我0和1写错了还能在uoj上有80

感觉加深了对期望dp的理解,你只能决定决策,不能决定结果,所以我们要从之前所有可能的状态转移过来,设状态也不能设错,比如dp[i][j][k]表示i天j次在c[i]还是d[i],因为换教室是有概率的,不能决定结果

#include<cstdio>
#include<cstring>
#include<iostream>
#include<algorithm>
using namespace std;
const int N = 2010;
int c[N], d[N];
int n, m, v, e;
double dis[310][310], dp[N][N][2], k[N];
int main()
{
    cin >> n >> m >> v >> e;
    for(int i = 1; i <= n; ++i) scanf("%d", &c[i]);
    for(int i = 1; i <= n; ++i) scanf("%d", &d[i]);
    for(int i = 1; i <= n; ++i) scanf("%lf", &k[i]);
    for(int i = 1; i <= n; ++i)
        for(int j = 0; j <= m; ++j) dp[i][j][0] = dp[i][j][1] = 1e9;
    for(int i = 1; i <= v; ++i)
        for(int j = 1; j <= v; ++j) if(i != j) dis[i][j] = 1e9;
    for(int i = 1; i <= e; ++i) 
    {
        int u, v;
        double w;
        scanf("%d%d%lf", &u, &v, &w);
        dis[u][v] = min(dis[u][v], w);
        dis[v][u] = min(dis[v][u], w);
    }
    for(int x = 1; x <= v; ++x)
        for(int i = 1; i <= v; ++i) if(i != x)
            for(int j = 1; j <= v; ++j) if(i != j && j != x) dis[i][j] = min(dis[i][j], dis[i][x] + dis[x][j]);
    dp[1][0][0] = dp[1][1][1] = 0.0;
    for(int i = 2; i <= n; ++i)
        for(int j = 0; j <= min(m, i); ++j)
        {
            dp[i][j][0] = min(dp[i - 1][j][0] + dis[c[i]][c[i - 1]], (dp[i - 1][j][1] + dis[d[i - 1]][c[i]]) * k[i - 1] + (dp[i - 1][j][1] + dis[c[i - 1]][c[i]]) * (1.0 - k[i - 1]));
            if(j > 0) 
            {
                dp[i][j][1] = min((dp[i - 1][j - 1][0] + dis[c[i - 1]][c[i]]) * (1.0 - k[i]) + (dp[i - 1][j - 1][0] + dis[c[i - 1]][d[i]]) * k[i], 
            (dp[i - 1][j - 1][1] + dis[d[i - 1]][d[i]]) * k[i - 1] * k[i] + (dp[i - 1][j - 1][1] + dis[c[i - 1]][d[i]]) * (1.0 - k[i - 1]) * k[i] + (dp[i - 1][j - 1][1] + dis[d[i - 1]][c[i]]) * k[i - 1] * (1.0 - k[i]) + (dp[i - 1][j - 1][1] + dis[c[i - 1]][c[i]]) * (1.0 - k[i - 1]) * (1.0 - k[i]));
            }   
        }   
    double ans = 1e100;
    for(int i = 0; i <= m; ++i) ans = min(ans, min(dp[n][i][0], dp[n][i][1]));
    printf("%.2f
", ans);
    return 0;
}
View Code
原文地址:https://www.cnblogs.com/19992147orz/p/7526516.html