OpenMP性能优化方案

#include <iostream>
#include <windows.h>
using namespace std;

struct Count
{
    long count1;
    char space[64];  // 此处增加64字节的“无用”空间,目的就是使得count1和count2位于不同的Cache行,从而避免Cache行同步引起的“乒乓效应”,可以尝试去掉这64字节的“无用”空间,将会得到令人诧异的运行结果,要探明深层原因,可以搜索一下“乒乓效应 CPU cache”

    long count2;
};
DWORD WINAPI Calculate1(void *p)
{
    double x = 0,y = 0;
    long* c1 = (long*)p;
    for(; x <= 0.5;x += 0.0001) 
        for(y = 0;y <= 1;y += 0.0001) 
        { 
            if (x*x + y*y <= 1) (*c1)++; 
        } 
    return 0;
}
DWORD WINAPI Calculate2(void *p)
{
    double i = 0,j = 0;//16
    long* c2 = (long*)p;
    for(i=0.5001;i <=1;i+=0.0001) 
        for(j=0;j <=1;j+=0.0001) 
        { 
            if(i*i+j*j <=1) (*c2)++; 
        } 
    return 0;
}

int main()
{
    DWORD pThreadID;
    Count cnt;

    DWORD startTime = 0,endTime = 0;
    startTime= GetTickCount(); 
    Calculate1(&cnt.count1);
    Calculate2(&cnt.count2);
    cout << "Serial calculate cost " << GetTickCount() - startTime << "ms." << endl;

    cnt.count1 = 0;
    cnt.count2 = 0;
    startTime= GetTickCount();

    HANDLE hThread1 = CreateThread (NULL,
                                    0,
                                    Calculate1,
                                    &cnt.count1,
                                    0,
                                    &pThreadID);

    HANDLE hThread2 = CreateThread (NULL,
                                    0,
                                    Calculate2,
                                    &cnt.count2,
                                    0,
                                    &pThreadID);


    WaitForSingleObject(hThread1, INFINITE); 
    WaitForSingleObject(hThread2, INFINITE); 
    endTime=GetTickCount();

    cout << "Parallel calculate cost " << endTime-startTime << "ms." << endl; 
}

原文地址:https://www.cnblogs.com/jast/p/4516471.html