SSE sqrt还是比C math库的sqrtf快了不少

#include <stdio.h>
#include <xmmintrin.h>
#define NOMINMAX
#include <windows.h>
#include <math.h>
#include <time.h>

__forceinline float fast_sqrt(float x)
{
    return _mm_cvtss_f32(_mm_sqrt_ss(_mm_set_ss(x)));
}

int main(int argc, char *argv[])
{
    const int N = 100000000;
    float *buf = new float[N];
    for (int i = 0; i < N; ++i)
    {
        buf[i] = 1000.0f * (float)rand() / (float)RAND_MAX;
    }

    float sum;
    int start_time;

    sum = 0.0f;
    start_time = clock();
    for (int i = 0; i < N; ++i)
    {
        sum += sqrtf(buf[i]);
    }
    printf("sum = %f   in clock %d
", sum, clock() - start_time);



    sum = 0.0f;
    start_time = clock();
    for (int i = 0; i < N; ++i)
    {
        sum += fast_sqrt(buf[i]);
    }
    printf("sum (fast) = %f   in clock %d
", sum, clock() - start_time);



    delete[]buf;
    return 0;
}

测试结果:

sum = 536870912.000000 in clock 391
sum (fast) = 536870912.000000 in clock 281

原文地址:https://www.cnblogs.com/len3d/p/7912062.html