Megcup2017 Dogfood

问题描述

小强每天会在小区的某些位置摆一些狗盆，并在狗盆里倒入不同口味的狗粮。而所有的流浪狗都会跑到离自己第k近的狗盆那里吃狗粮，一定的跑动可以帮助狗保持身材。

已知小强牌狗粮目前只有10种口味，我们用1,2,3,…,10来表示这些口味。(草莓味是1,西瓜味是2,香橙味是3......)

为了估算每种口味的狗粮每日的需求量，他想知道对于一个在[0,10000]x[0,10000]随机均匀生成的实数坐标(x,y)来说，离它第k近的狗盆里放的狗粮是口味z(z=1...10)的概率是多少。

由于小强最近忙着训练神经网络，他把这个任务交给了你，你能帮他解决吗？

为了简化题目，小区的每个位置可以用坐标(x,y)来表示,坐标范围是[0,10000]x[0,10000]

输入格式

第一行是两个整数n和k，分别表示狗盆的数量和题目描述中的k。

接下来n行，每行有三个整数 X Y Z，分别表示狗盆的坐标(X,Y)和这个狗盆中所放狗粮的口味Z。

输出格式

按顺序输出题中描述的属于口味1~10的概率。

输出的值与正确答案的差的绝对值小于1e-5即可。

样例输入

4 2
0 0 1
0 10000 2
10000 0 3
10000 10000 4

样例输出

0.2500000000
0.2500000000
0.2500000000
0.2500000000

Input文件 dogfood_input.txt

80 23
13 5702 9
2143 9228 3
9904 559 7
5632 858 10
2629 6635 1
9533 9321 5
3417 2051 4
5860 2582 8
8901 8855 5
646 5445 10
3723 3993 3
9460 8418 3
5342 6370 4
5857 3892 2
4453 3001 7
7779 7617 1
6333 6299 10
9508 5811 4
1502 1029 5
1259 6572 2
849 8905 6
5142 8063 4
8736 9776 1
2724 5386 3
5676 7238 10
6521 8374 6
4533 2614 2
5535 6041 8
741 5192 7
1368 6361 3
1967 428 5
4338 374 8
28 4433 9
4793 1407 7
2238 4627 2
4397 3235 2
9715 1681 8
4829 7205 1
7440 874 3
7393 3689 4
8909 9344 3
4794 6390 7
9269 230 6
7271 8835 10
1595 2184 10
7764 9876 5
108 5086 2
3392 7318 7
2157 9471 9
5847 4619 9
1001 417 10
5737 236 5
6374 1349 10
4615 7113 4
8476 8619 7
6977 9068 3
8089 5058 9
1957 9904 10
7791 7207 5
3792 8631 10
6077 2780 3
6101 2758 4
7481 8120 2
6679 2954 1
8001 7276 4
3953 7587 8
9520 3272 6
5848 6982 9
2902 8808 5
9067 8584 5
5674 3275 1
2557 6123 9
4790 2973 2
9429 2741 2
9090 21 8
666 364 4
9372 9724 3
9617 1009 7
950 1963 3
9254 2668 1

解法

有时候一心想着精确解就忘记了近似法，多次模拟求近似值的思想很重要。

megcup不需要提交代码去服务器上运行，只需要提交程序运行结果。

这个问题暴力也是可以解决的，但是需要运行十几分钟。

直接枚举10000x10000个点

#include <set>
#include <map>
#include <ctime>
#include <cmath>
#include <cstdio>
#include <cstring>
#include <cstdlib>
#include <iostream>
#include <algorithm>

using namespace std;

struct Bowl {
	int x, y, c;
	double dist;
}b[1000];

int n, k, cnt[10];

bool cmp(const Bowl& a, const Bowl& b) {
	return a.dist < b.dist;
}

double sqr(double x) {
	return x * x;
}

int main() {
	freopen("dogfood_input.txt","r",stdin);
	scanf("%d%d", &n, &k);
	for (int i = 0; i < n; ++i) {
		scanf("%d%d%d", &b[i].x, &b[i].y, &b[i].c); 
	}
	for (int i = 0; i < 10000; ++i) {
		printf("%d
",i);
		for (int j = 0; j < 10000; ++j) {
			for (int kk = 0; kk < n; ++kk) {
				b[kk].dist = sqr(i + 0.5 - b[kk].x) + sqr(j + 0.5 - b[kk].y);
			}
			sort(b, b + n, cmp);
			cnt[b[k - 1].c - 1] ++;
		}
	}
	for (int i = 0; i < 10; ++i) {
		printf("%.8lf
", cnt[i] * 1.0 / 100000000);
	}
	puts("");
}

如果对精度要求高一些，可以使用随机插点法
使用pair<double,int>可以省掉排序函数cmp。

#include <iostream>
#include <cstdio>
#include <cstring>
#include <cmath>
#include <thread>
#include <algorithm>

using namespace std;

const int delta = 4;

int n, kkk;

int x[100], y[100], z[100];
pair<double, int> dis[100];
int num[100];

void sample() {
	double xx, yy;
	for (int i = 0; i < 10000; ++i) {
		for (int j = 0; j < 10000; ++j) {
			for (int d = 0; d < delta; ++d) {//随机插入delta个点
				xx = i + 1.0 * rand() / RAND_MAX;
				yy = j + 1.0 * rand() / RAND_MAX;
				for (int k = 1; k <= n; ++k) {
					dis[k].first = (xx - x[k]) * (xx - x[k]) + (yy - y[k]) * (yy - y[k]);
					dis[k].second = z[k];
				}
				sort(dis + 1, dis + n + 1);
				num[dis[kkk].second] += 1;
			}
		}
	}
}

int main(int argc, char *argv[]) {
	//使用一个环境变量判断是否是本地环境（这个变量在本地定义）
	#ifdef LOCAL_JUDGE
	freopen("dogfood_input.txt", "r", stdin);
	freopen("test.out", "w", stdout);
	#endif // LOCAL_JUDGE

	scanf("%d %d", &n, &kkk);
	for (int i = 1; i <= n; ++i) {
		scanf("%d %d %d", &x[i], &y[i], &z[i]);
	}	
	sample();

	int tot = 100000000 * 4;
	for (int i = 1; i <= 10; ++i)
		printf("%.8f
", 1.0 * num[i] / tot);
	return 0;
}

其实也不一定要均匀取点，有人随机撒点20亿次

#include <iostream>
#include <string>
#include <map>
#include <ctime>
#include <cmath>
#include <vector>
#include <cstdio>

using namespace std;

int n, k;

const int MAXN = 100000;
int a[MAXN][3];
vector<pair<double, int> >v;
double ans[100];

double real()
{
	return 10000.0 * rand() / RAND_MAX - 5000;
}

int get_ans(double x, double y)
{
	v.clear();
	for (int i = 0; i < n; i++)
	{
		double d = pow(a[i][0] - x, 2) + pow(a[i][1] - y, 2);
		v.push_back(make_pair(d, a[i][2]));
	}
	sort(v.begin(), v.end());
	return v[k - 1].second;
}

void work()
{
	double x = real(), y = real();
	ans[get_ans(x, y)]++;
	ans[get_ans(-x, y)]++;
	ans[get_ans(x, -y)]++;
	ans[get_ans(-x, -y)]++;
}

void print()
{
	int tot = 0;
	for (int i = 1; i <= 10; i++) tot += ans[i];
	for (int i = 1; i <= 10; i++) printf("%.8lf
", ans[i] / tot);
}

int main()
{
	srand(time(0));
	freopen("dogfood_input.txt", "r", stdin);
	cin>>n>>k;
	for (int i = 0; i < n; i++)
	{
		for (int j = 0; j < 3; j++) cin >> a[i][j];
		a[i][0] -= 5000;
		a[i][1] -= 5000;
	}
	int T = 2000000000;
	for (int i = 1; i <= T; i++)
	{
		work();
		if (i % 100000 == 0)
		{
			cout << "============== " << i << endl;
			print();
		}
	}

	return 0;
}

既然是K近邻，那么KD树应该能够加速，但是很容易写挫导致还不如不用KD树。
很多时候，我都没有意识到Python有多慢，用C++重写Python之后运行效率的提升可以说是翻天覆地。
算法比赛中，用C++已经成为一种标准，出题人往往不写其它语言的标程，对其它语言的运行时间会高估很多，导致用其他语言很难通过（需要进行很多优化），而用C++暴力有时也能通过。

import numpy as np
from sklearn.neighbors import KDTree

a = [int(x) for x in open("dogfood_input.txt").read().split()]
N = a[0]
K = a[1]
points = np.empty((N, 2), dtype=np.int)
flavor = np.empty(N, dtype=np.int)
for i in range(N):
    points[i][0] = a[2 + i * 3]
    points[i][1] = a[3 + i * 3]
    flavor[i] = a[4 + i * 3]
kdt = KDTree(points, metric="euclidean")
cnt = [0] * 11
for i in range(10001):
    if i % 10 == 0: print(i)
    for j in range(10001):
        p = kdt.query([(i, j)], k=K, return_distance=False)
        ind = p[0][-1]
        x, y = points[ind][0], points[ind][1]
        f = flavor[ind]
        cnt[f] += 1
s = 1e4 ** 2
for i in range(11):
    cnt[i] /= s
print(cnt)

答案

0.08715860
0.15624080
0.11137702
0.16169284
0.07123529
0.02621666
0.09187029
0.08934817
0.09907106
0.10578927

总结

megcup的题目暴力比较多，如果掌握并行化编程，就可以少考虑许多技巧。