softmax_loss.cu 和 softmax_loss.cpp源码

 1 #include <algorithm>
 2 #include <cfloat>
 3 #include <vector>
 4 
 5 #include "caffe/layers/softmax_loss_layer.hpp"
 6 #include "caffe/util/math_functions.hpp"
 7 
 8 namespace caffe {
 9 
 10 template <typename Dtype>
 11 __global__ void SoftmaxLossForwardGPU(const int nthreads,
 12           const Dtype* prob_data, const Dtype* label, Dtype* loss,
 13           const int num, const int dim, const int spatial_dim,
 14           const bool has_ignore_label_, const int ignore_label_,
 15           Dtype* counts) {
 16   CUDA_KERNEL_LOOP(index, nthreads) {
 17     const int n = index / spatial_dim;
 18     const int s = index % spatial_dim;
 19     const int label_value = static_cast<int>(label[n * spatial_dim + s]);
 20     if (has_ignore_label_ && label_value == ignore_label_) {
 21       loss[index] = 0;
 22       counts[index] = 0;
 23     } else {
 24       loss[index] = -log(max(prob_data[n * dim + label_value * spatial_dim + s],
 25                       Dtype(FLT_MIN)));
 26       counts[index] = 1;
 27     }
 28   }
 29 }
 30 
 31 template <typename Dtype>
 32 void SoftmaxWithLossLayer<Dtype>::Forward_gpu(
 33     const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
 34   softmax_layer_->Forward(softmax_bottom_vec_, softmax_top_vec_);
 35   const Dtype* prob_data = prob_.gpu_data();
 36   const Dtype* label = bottom[1]->gpu_data();
 37   const int dim = prob_.count() / outer_num_;
 38   const int nthreads = outer_num_ * inner_num_;
 39   // Since this memory is not used for anything until it is overwritten
 40   // on the backward pass, we use it here to avoid having to allocate new GPU
 41   // memory to accumulate intermediate results in the kernel.
 42   Dtype* loss_data = bottom[0]->mutable_gpu_diff();
 43   // Similarly, this memory is never used elsewhere, and thus we can use it
 44   // to avoid having to allocate additional GPU memory.
 45   Dtype* counts = prob_.mutable_gpu_diff();
 46   // NOLINT_NEXT_LINE(whitespace/operators)
 47   SoftmaxLossForwardGPU<Dtype><<<CAFFE_GET_BLOCKS(nthreads),
 48       CAFFE_CUDA_NUM_THREADS>>>(nthreads, prob_data, label, loss_data,
 49       outer_num_, dim, inner_num_, has_ignore_label_, ignore_label_, counts);
 50   Dtype loss;
 51   caffe_gpu_asum(nthreads, loss_data, &loss);
 52   Dtype valid_count = -1;
 53   // Only launch another CUDA kernel if we actually need the count of valid
 54   // outputs.
 55   if (normalization_ == LossParameter_NormalizationMode_VALID &&
 56       has_ignore_label_) {
 57     caffe_gpu_asum(nthreads, counts, &valid_count);
 58   }
 59   top[0]->mutable_cpu_data()[0] = loss / get_normalizer(normalization_,
 60                                                         valid_count);
 61   if (top.size() == 2) {
 62     top[1]->ShareData(prob_);
 63   }
 64 }
 65 
 66 template <typename Dtype>
 67 __global__ void SoftmaxLossBackwardGPU(const int nthreads, const Dtype* top,
 68           const Dtype* label, Dtype* bottom_diff, const int num, const int dim,
 69           const int spatial_dim, const bool has_ignore_label_,
 70           const int ignore_label_, Dtype* counts) {
 71   const int channels = dim / spatial_dim;
 72 
 73   CUDA_KERNEL_LOOP(index, nthreads) {
 74     const int n = index / spatial_dim;
 75     const int s = index % spatial_dim;
 76     const int label_value = static_cast<int>(label[n * spatial_dim + s]);
 77 
 78     if (has_ignore_label_ && label_value == ignore_label_) {
 79       for (int c = 0; c < channels; ++c) {
 80         bottom_diff[n * dim + c * spatial_dim + s] = 0;
 81       }
 82       counts[index] = 0;
 83     } else {
 84       bottom_diff[n * dim + label_value * spatial_dim + s] -= 1;
 85       counts[index] = 1;
 86     }
 87   }
 88 }
 89 
 90 template <typename Dtype>
 91 void SoftmaxWithLossLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
 92     const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
 93   if (propagate_down[1]) {
 94     LOG(FATAL) << this->type()
 95                << " Layer cannot backpropagate to label inputs.";
 96   }
 97   if (propagate_down[0]) {
 98     Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
 99     const Dtype* prob_data = prob_.gpu_data();
100     const Dtype* top_data = top[0]->gpu_data();
101     caffe_gpu_memcpy(prob_.count() * sizeof(Dtype), prob_data, bottom_diff);
102     const Dtype* label = bottom[1]->gpu_data();
103     const int dim = prob_.count() / outer_num_;
104     const int nthreads = outer_num_ * inner_num_;
105     // Since this memory is never used for anything else,
106     // we use to to avoid allocating new GPU memory.
107     Dtype* counts = prob_.mutable_gpu_diff();
108     // NOLINT_NEXT_LINE(whitespace/operators)
109     SoftmaxLossBackwardGPU<Dtype><<<CAFFE_GET_BLOCKS(nthreads),
110         CAFFE_CUDA_NUM_THREADS>>>(nthreads, top_data, label, bottom_diff,
111         outer_num_, dim, inner_num_, has_ignore_label_, ignore_label_, counts);
112 
113     Dtype valid_count = -1;
114     // Only launch another CUDA kernel if we actually need the count of valid
115     // outputs.
116     if (normalization_ == LossParameter_NormalizationMode_VALID &&
117         has_ignore_label_) {
118       caffe_gpu_asum(nthreads, counts, &valid_count);
119     }
120     const Dtype loss_weight = top[0]->cpu_diff()[0] /
121         (get_normalizer(normalization_, valid_count) * Caffe::getThreadNum());
122     caffe_gpu_scal(prob_.count(), loss_weight , bottom_diff);
123   }
124 }
125 
126 INSTANTIATE_LAYER_GPU_FUNCS_DISABLE_FP16(SoftmaxWithLossLayer);
127 
128 }  // namespace caffe

outer_num_:相当于batch_size

dim: c*w*h

spatial_dim(inner_num_):w*h

softmax_loss.cpp的代码:

outer_num_ = bottom[0]->count(0, softmax_axis_);
inner_num_ = bottom[0]->count(softmax_axis_ + 1);

其实可以看出来count的只取前,不取后,(0, softmax_axis_)只取了0这一个轴

原文地址:https://www.cnblogs.com/ymjyqsx/p/8479104.html