POJ #1789 Truck History 最小生成树(MST) prim 稠密图链式向前星

Description

Advanced Cargo Movement, Ltd. uses trucks of different types. Some trucks are used for vegetable delivery, other for furniture, or for bricks. The company has its own code describing each type of a truck. The code is simply a string of exactly seven lowercase letters (each letter on each position has a very special meaning but that is unimportant for this task). At the beginning of company's history, just a single truck type was used but later other types were derived from it, then from the new types another types were derived, and so on.

Today, ACM is rich enough to pay historians to study its history. One thing historians tried to find out is so called derivation plan -- i.e. how the truck types were derived. They defined the distance of truck types as the number of positions with different letters in truck type codes. They also assumed that each truck type was derived from exactly one other truck type (except for the first truck type which was not derived from any other type). The quality of a derivation plan was then defined as
1/Σ_{(t_o,t_d)}d(t_o,t_d)
where the sum goes over all pairs of types in the derivation plan such that t _o is the original type and t _dthe type derived from it and d(t _o,t _d) is the distance of the types.
Since historians failed, you are to write a program to help them. Given the codes of truck types, your program should find the highest possible quality of a derivation plan.

Input

The input consists of several test cases. Each test case begins with a line containing the number of truck types, N, 2 <= N <= 2 000. Each of the following N lines of input contains one truck type code (a string of seven lowercase letters). You may assume that the codes uniquely describe the trucks, i.e., no two of these N lines are the same. The input is terminated with zero at the place of number of truck types.

Output

For each test case, your program should output the text "The highest possible quality is 1/Q.", where 1/Q is the quality of the best derivation plan.

Sample Input
4
aaaaaaa
baaaaaa
abaaaaa
aabaaaa
0
Sample Output
The highest possible quality is 1/3.　

这道题的数据集网上比较少，提供一组自己手写的数据：

INPUT:
3
aaaaaaa
baaaaaa
abaaaaa

OUTPUT:
The highest possible quality is 1/2.

思路

　　题意比较不好理解，简而言之就是有 n 个字符串，设两个字符串之间的差异为 dis，dis 由两个字符串对应位置上不同字母的数量决定。比如串A“aaaaaaa” 、串B"baaaaaa" 和串C“abaaaaa”的 dis 分别为 dis(A, B) = 1, dis(A, C) = 1, dis(B, C) = 2 ...etc ，规定除其中一个串外其他串都是由该串派生的，那么产生三个计划：

　　1、A派生B、C

　　2、B派生A、C

　　3、C派生A、B

　　问所有计划中 dis 总和最小的计划是哪一个，输出 dis 总和。

　　我们若以每个串为顶点，本题实质上就是求最小生成树的所有边权之和。

　　很明显，建出的图是完全图，为稠密图，适合 prim 求解最小生成树。

　　TLE N 次... 最后利用 char[] 代替 string 、堆优化prim、链式向前星（静态邻接表，数组模拟链表）代替vector G[] ，最终1500+ms 险过。

　　但是发现，关键问题是出在更新d数组时重复入队过多，我（WTF？？？）

　　当然解决 TLE 过程中也收获了很多，比如知道了堆优化的 prim 算法在处理连通图问题（E = V-1）时比 kruskal 更快，而在处理稀疏图问题上还是比 kruskal 慢；已知数据上界的情况下最好选用 char[] 当作string 和链式向前星建图，因为vector动态申请非常耗时，push_back() 是成倍申请的，而 string 可能封装得过多了？？？（我猜的

　　关于大牛对链式向前星的理解在这：链接

　　测试证明：char[] 代替 string 快了 200+ms ，可怕...

　　优化前版本

#include<iostream>
#include<algorithm>
#include<queue>
#include<cstring>
#include<vector>
using namespace std;
#define INF 0x3f3f3f3f
const int MAXN = 2000 + 10;
int n;
string str[MAXN];

int getDis (int u, int v) {
    int cnt = 0;
    for (int i = 0; i <= 6; i++) {
        if (str[u][i] != str[v][i])
            cnt++;
    }
    return cnt;
}

struct Edge {
    int to;
    int dis;
    Edge(int tt, int dd) : to(tt), dis(dd) {}
};
vector<Edge> G[MAXN];

void addEdge (int u, int v) {
    G[u].push_back (Edge(v, getDis(u,v)));
}

void initG() {
    for (int i = 1; i <= n; i++) {
        G[i].clear();
        G[i].reserve(MAXN);
    }
    //建立完全图
    for (int i = 1; i <= n; i++) {
        for (int j = i+1; j <= n; j++) {
            addEdge(i, j);
            addEdge(j, i);
        }
    }
}

struct Node {
    int id;
    int key;
    Node (int v, int kk) : id(v), key(kk) {}
    friend bool operator < (const Node& a, const Node& b) {
        return a.key > b.key;
    }
};
int d[MAXN]; //连接v和树中结点的最小边的权值
bool vis[MAXN]; //v加入树
void prim (const int& s, int& sum ) {
    for (int i = 1; i <= n; i++) d[i] = INF, vis[i] = false;
    d[s] = 0;
    priority_queue<Node> pQueue;
    pQueue.push (Node(s, d[s]));
    while (!pQueue.empty()) {
        int u = pQueue.top().id; pQueue.pop();
        if (vis[u]) continue;
        vis[u] = true;
        sum += d[u]; //MST所有边的权重之和
        for (int j = 0; j < G[u].size(); j++) {
            int v = G[u][j].to;
            if (!vis[v] && d[v] > G[u][j].dis) {
                d[v] = G[u][j].dis;
                pQueue.push(Node(v, d[v]));
            }
        }
    }
}

int main(void) {
    while (cin >> n && n) {
        for (int i = 1; i <= n; i++) cin >> str[i];
        initG();
        int ans = 0;
        prim (1, ans);
        cout << "The highest possible quality is 1/" << ans << "." << endl;
    }
    return 0;
}

View Code

　　优化后版本

#include<iostream>
#include<algorithm>
#include<queue>
#include<cstring>
#include<cstdio>
using namespace std;
#define INF 0x3f3f3f3f
const int MAXN = 2010;
const int EDGE_MAXN = 2010*2010;
int n;
char str[MAXN][7];

//求边权
int getDis (int u, int v) {
    int count = 0;
    for (int i = 0; i <= 6; i++) {
        if (str[u][i] != str[v][i]) count++;
    }
    return count;
}

//链式前向星(静态邻接表)
struct Edge {
    int to, dis, next;
}e[EDGE_MAXN];
int head[MAXN], cnt; //head存储以i为起点的最后一条边在e中的位置

void addEdge (int u, int v, int w) {
    cnt++; //从e[1]开始存边
    e[cnt].to = v;
    e[cnt].dis = w;
    e[cnt].next = head[u]; //next存储以u为起点的上一条边在e中的位置
    head[u] = cnt;
}

void initG() {
    memset(head, -1, sizeof(head));
    cnt = 0; //统计边数
    //建立完全图
    for (int i = 1; i <= n; i++) {
        for (int j = i+1; j <= n; j++) {
            int w = getDis(i, j);
            addEdge (i, j, w);
            addEdge (j, i, w);
        }
    }
}

//堆优化prim
struct Node {
    int id;
    int key;
    Node (int v, int kk) : id(v), key(kk) {}
};
struct cmp {
    bool operator() (const Node& a, const Node& b) const
    {
        return a.key > b.key;
    }
};
int d[MAXN]; //连接v和树中结点的最小边的权值
bool vis[MAXN]; //判断v是否加入树
void prim (const int& s, int& sum ) {
    for (int i = 1; i <= n; i++) d[i] = INF, vis[i] = false;
    d[s] = 0;
    priority_queue<Node, vector<Node>, cmp > pQueue;
    pQueue.push (Node(s, d[s]));
    while (!pQueue.empty()) {
        int u = pQueue.top().id; pQueue.pop();
        if (vis[u]) continue; //已加入树，不必再入树，避免TLE
        vis[u] = true;
        sum += d[u]; //MST所有边的权重之和
        //访问u的所有邻接点
        for (int i = head[u]; i != -1; i = e[i].next) {
            int v = e[i].to;
            if (!vis[v] && d[v] > e[i].dis) {
                d[v] = e[i].dis;
                pQueue.push(Node(v, d[v])); //和dijkstra一样存在重复入队的情况
            }
        }
    }
}

int main(void) {
    while (cin >> n && n) {
        for (int i = 1; i <= n; i++) scanf("%s", str[i]);
        initG();
        int ans = 0;
        prim (1, ans);
        cout << "The highest possible quality is 1/" << ans << "." << endl;
    }
    return 0;
}

View Code

　　prim 和 dijkstra 还真的很像，就是d数组代表的意义与松弛操作不一样而已。

————全心全意投入，拒绝画地为牢

POJ #1789 Truck History 最小生成树(MST) prim 稠密图 链式向前星

POJ #1789 Truck History 最小生成树(MST) prim 稠密图链式向前星