通过打包 accumulate 实现多线程版本的 accumulate

#include <iostream>
#include <algorithm>
#include <thread>
#include <functional>

using namespace std;

template <typename Iterator, typename T>
struct accumulate_block
{
  void operator ()(Iterator first, Iterator last, T &result)
  {
    result = accumulate(first, last, result);
  }
};

template <typename Iterator, typename T>
T parallel_accumulate(Iterator first, Iterator last, T init)
{
    unsigned long const length = std::distance(first, last);

//if there is no element, return init value
    if(!length){
        return init;
    }


    unsigned long const min_per_thread = 25;

    //how much threads at least we needed
    unsigned long const max_threads =
        (length + min_per_thread - 1) / min_per_thread;

    unsigned long const hardware_threads =
        std::thread::hardware_concurrency();

//if max_threads more than hardware threads,
//just use hardware threads
//if hardware threads not support even muiltithreads, use 2 threads
//instead of , use hardware threads
    unsigned long const num_threads =
        std::min( hardware_threads != 0
                                                         ? hardware_threads
                                                         :  2
                     , max_threads);

    //the length of the range was divided by threads
    unsigned long const block_size = length / num_threads;

    std::vector<T>                 results  (num_threads);
    std::vector<std::thread>  threads (num_threads - 1);

    auto block_start = first;
    for(unsigned long i = 0; i < (num_threads - 1); ++i){
        auto block_end = block_start;
        
        //put block_end to the end of current block
        std::advance(block_end, block_size);
        
        threads[i] = std::thread(
              accumulate_block<Iterator, T>(),
              block_start, block_end, std::ref(results[i]));
        block_start = block_end;
    }
        accumulate_block<Iterator, T>()(block_start,
                                         last,
                                         results[num_threads - 1]);
        std::for_each(threads.begin(), threads.end(),
                                 std::mem_fn(&std::thread::join));

        return std::accumulate(results.begin(), results.end(), init);
}

int main()
{
    vector<int> ivec{1, 2, 3, 4, 5, 5, 6, 7, 8, 9};
    int result = 0;
    result = parallel_accumulate(ivec.cbegin(), ivec.cend(), result);
    cout << result << endl;
    return 0;
}