sign3 neon

This is neon intrincs for sign3, which can be used in sample adaptive offset in HEVC.

void sign3_neon(const uint8_t *src1, const int src_stride1, const uint8_t *src2, const int src_stride2, int8_t* dst, const int dst_stride, const int width, const int height) {
        int i, j;
        for(i = 0; i < height; i++) {
                const uint8_t* _s1 = src1 + i*src_stride1;
                const uint8_t* _s2 = src2 + i*src_stride2;
                int8_t* _d = dst + i*dst_stride;
                for(j = 0; j < width - 15; j += 16) {
                        uint8x16_t c1 = vld1q_u8(_s1); _s1 += 16;
                        uint8x16_t c2 = vld1q_u8(_s2); _s2 += 16;
                        uint8x16_t r1 = vcltq_u8(c1, c2);
                        uint8x16_t r2 = vcgtq_u8(c1, c2);
                        uint8x16_t r3 = vshrq_n_u8(r2, 7);
                        uint8x16_t r4 = vorrq_u8(r3, r1);
                        int8x16_t r5 = vreinterpretq_s8_u8(r4);
                        vst1q_s8(_d, r5);
                        _d += 16;
                }
                for(; j < width; j++) {
                        uint8_t c1 = *_s1;
                        uint8_t c2 = *_s2;
                        int8_t d = 0;
                        if (c1 < c2) d = -1;
                        if (c1 > c2) d = 1;
                        *_d = d;
                        _s1++;
                        _s2++;
                        _d ++;
                }
        }
}

enjoy it!

原文地址:https://www.cnblogs.com/mathlover/p/3335702.html