多标签caffe重新编译

说明:

Caffe自带的图像转LMDB接口只支持单label，对于多label的任务，可以使用HDF5的格式，也可以通过修改caffe代码来实现.本篇文章介绍怎么通过修改DataLayer来实现带Multilabel的lmdb格式数据输入的分类任务.

--- 本文参考自 :

Caffe实现多标签分类支持Multi-Label的lmdb数据格式输入

caffe实现多标签分类模型

1. 总体介绍

共修改5个文件：

$CAFFE_ROOT/src/caffe/proto/caffe.proto

$CAFFE_ROOT/src/caffe/layers/data_layer.cpp

$CAFFE_ROOT/src/caffe/util/io.cpp

$CAFFE_ROOT/include/caffe/util/io.hpp

$CAFFE_ROOT/tools/convert_imageset.cpp

其中$CAFFE_ROOT为caffe根目录,即git clone 目录, 如图:

修改完成后,执行:

make clean
make all –j8

-------______
具体的文件修改内容如下:

2.1 caffe.proto

vim /src/caffe/proto/caffe.proto
在message Datum { }里中添加一行代码，即添加一个labels，是repeated类型的，以便接受多标签数据集。
　repeated float labels = 8;

message Datum {
  optional int32 channels = 1;
  optional int32 height = 2;
  optional int32 width = 3;
  // the actual image data, in bytes
  optional bytes data = 4;
  optional int32 label = 5;
  // Optionally, the datum could also hold float data.
  repeated float float_data = 6;
  // If true data contains an encoded image that need to be decoded
  optional bool encoded = 7 [default = false];
  //////////////////////////////////
  repeated float labels = 8;
  //////////////////////////////////
}

2.2 data_layer.cpp (两处)

// label
  /*
  if (this->output_labels_) {
    vector<int> label_shape(1, batch_size);
    top[1]->Reshape(label_shape);
    for (int i = 0; i < this->prefetch_.size(); ++i) {
      this->prefetch_[i]->label_.Reshape(label_shape);
    }
  }
  */
  /////////////////////////////////////////////////
  if (this->output_labels_){
      top[1]->Reshape(batch_size, 4, 1, 1);
      for (int i = 0; i < this->prefetch_.size(); ++i) {
          this->prefetch_[i]->label_.Reshape(batch_size, 4, 1, 1);
      }
  }
  //////////////////////////////////////////////////

// Copy label.
    /*
    if (this->output_labels_) {
      Dtype* top_label = batch->label_.mutable_cpu_data();
      top_label[item_id] = datum.label();
    }
    */
    ///////////////////////////////////////////////
    if (this->output_labels_) {
      Dtype* top_label = batch->label_.mutable_cpu_data();
      for (int i = 0; i < 4; i++)
                 top_label[item_id * 4 + i] = datum.labels(i);
      }
    ///////////////////////////////////////////////

2.3 io.cpp(两处)

修改两个函数,替换成下面修改后的代码即可,
vim /src/caffe/util/io.cpp

ReadImageToDatum()

bool ReadImageToDatum(const string& filename, const vector<float> label,
    const int height, const int width, const bool is_color,
    const std::string & encoding, Datum* datum) {
  cv::Mat cv_img = ReadImageToCVMat(filename, height, width, is_color);
  if (cv_img.data) {
    if (encoding.size()) {
      if ( (cv_img.channels() == 3) == is_color && !height && !width &&
          matchExt(filename, encoding) )
        return ReadFileToDatum(filename, label, datum);
      std::vector<uchar> buf;
      cv::imencode("."+encoding, cv_img, buf);
      datum->set_data(std::string(reinterpret_cast<char*>(&buf[0]),
                      buf.size()));
      
      datum->clear_labels();
      for (int i = 0; i < label.size(); i++){
         datum->add_labels(label[i]);
      }     
      datum->set_encoded(true);
      return true;
    }
    CVMatToDatum(cv_img, datum);
    
    datum->clear_labels();
    for (int i = 0; i < label.size(); i++){
       datum->add_labels(label[i]);
    }     
    return true;
  } else {
    return false;
  }
}

ReadFileToDatum()

//////////////////////////////////////////////////////////////////////
bool ReadFileToDatum(const string& filename, const vector<float> label,
    Datum* datum) {
  std::streampos size;
 
  fstream file(filename.c_str(), ios::in|ios::binary|ios::ate);
  if (file.is_open()) {
    size = file.tellg();
    std::string buffer(size, ' ');
    file.seekg(0, ios::beg);
    file.read(&buffer[0], size);
    file.close();
    datum->set_data(buffer);
    
    datum->clear_labels();
    for (int i = 0; i < label.size(); i++){
       datum->add_labels(label[i]);
    }  
    datum->set_encoded(true);
    return true;
  } else {
    return false;
  }
}

2.4 io.hpp

在其中新加入/////// ..... ///////内的两个成员函数声明，不删除原来的任何代码，下面的前两个函数声明是原来文件中就有的，可以看到，原来代码中的label参数是int类型，只能处理单标签字符；新增的两个成员函数就是参考上面两个函数，将const int label参数改成了std::vector labels，以接受多标签字符。

bool ReadImageToDatum(const string& filename, const int label,
    const int height, const int width, const bool is_color,
    const std::string & encoding, Datum* datum);
bool ReadFileToDatum(const string& filename, const int label, Datum* datum);
//////////////////////////////////////////
bool ReadImageToDatum(const string& filename, std::vector<float> labels,
    const int height, const int width, const bool is_color,
    const std::string & encoding, Datum* datum);
bool ReadFileLabelsToDatum(const string& filename, std::vector<float> labels,
    Datum* datum);
///////////////////////////////////

2.5 convert_imageset.cpp

/*
  std::ifstream infile(argv[2]);
  std::vector<std::pair<std::string, int> > lines;
  std::string line;
  size_t pos;
  int label;
  while (std::getline(infile, line)) {
    pos = line.find_last_of(' ');
    label = atoi(line.substr(pos + 1).c_str());
    lines.push_back(std::make_pair(line.substr(0, pos), label));
  }
  */
  ////////////////////////////
  std::ifstream infile(argv[2]);
  std::vector<std::pair<std::string, vector<float> > > lines;
  std::string filename;
  vector<float> labels(4);
  while (infile >> filename >> labels[0] >> labels[1] >> labels[2] >> labels[3]){
      lines.push_back(std::make_pair(filename, labels));
  }
  ///////////////////////////