Weighted Softmax Loss Layer for Caffe
Usage:
(1)caffe.proto文件修改以下部分,增加pos_mult(指定某类的权重乘子)和pos_cid(指定的某类的类别编号)两个参数:
// Message that stores parameters used by SoftmaxLayer, SoftmaxWithLossLayer message SoftmaxParameter { enum Engine { DEFAULT = 0; CAFFE = 1; CUDNN = 2; } optional Engine engine = 1 [default = DEFAULT]; // The axis along which to perform the softmax -- may be negative to index // from the end (e.g., -1 for the last axis). // Any other axes will be evaluated as independent softmaxes. optional int32 axis = 2 [default = 1]; optional float pos_mult = 3 [default = 1]; optional int32 pos_cid = 4 [default = 1]; } (2)include\caffe\loss_layers.hpp,增加以下部分:
/** * @brief A weighted version of SoftmaxWithLossLayer. * * TODO: Add description. Add the formulation in math. */ template <typename Dtype> class WeightedSoftmaxWithLossLayer : public LossLayer<Dtype> { public: /** * @param param provides LossParameter loss_param, with options: * - ignore_label (optional) * Specify a label value that should be ignored when computing the loss. * - normalize (optional, default true) * If true, the loss is normalized by the number of (nonignored) labels * present; otherwise the loss is simply summed over spatial locations. */ explicit WeightedSoftmaxWithLossLayer(const LayerParameter& param) : LossLayer<Dtype>(param) {} virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual void Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual inline const char* type() const { return "WeightedSoftmaxWithLoss"; } virtual inline int ExactNumBottomBlobs() const { return -1; } virtual inline int MinBottomBlobs() const { return 1; } virtual inline int MaxBottomBlobs() const { return 2; } virtual inline int ExactNumTopBlobs() const { return -1; } virtual inline int MinTopBlobs() const { return 1; } virtual inline int MaxTopBlobs() const { return 2; } protected: /// @copydoc WeightedSoftmaxWithLossLayer virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); /** * @brief Computes the softmax loss error gradient w.r.t. the predictions. * * Gradients cannot be computed with respect to the label inputs (bottom[1]), * so this method ignores bottom[1] and requires !propagate_down[1], crashing * if propagate_down[1] is set. * * @param top output Blob vector (length 1), providing the error gradient with * respect to the outputs * -# @f$ (1 \times 1 \times 1 \times 1) @f$ * This Blob's diff will simply contain the loss_weight* @f$ \lambda @f$, * as @f$ \lambda @f$ is the coefficient of this layer's output * @f$\ell_i@f$ in the overall Net loss * @f$ E = \lambda_i \ell_i + \mbox{other loss terms}@f$; hence * @f$ \frac{\partial E}{\partial \ell_i} = \lambda_i @f$. * (*Assuming that this top Blob is not used as a bottom (input) by any * other layer of the Net.) * @param propagate_down see Layer::Backward. * propagate_down[1] must be false as we can't compute gradients with * respect to the labels. * @param bottom input Blob vector (length 2) * -# @f$ (N \times C \times H \times W) @f$ * the predictions @f$ x @f$; Backward computes diff * @f$ \frac{\partial E}{\partial x} @f$ * -# @f$ (N \times 1 \times 1 \times 1) @f$ * the labels -- ignored as we can't compute their error gradients */ virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom); virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom); /// The internal SoftmaxLayer used to map predictions to a distribution. shared_ptr<Layer<Dtype> > softmax_layer_; /// prob stores the output probability predictions from the SoftmaxLayer. Blob<Dtype> prob_; /// bottom vector holder used in call to the underlying SoftmaxLayer::Forward vector<Blob<Dtype>*> softmax_bottom_vec_; /// top vector holder used in call to the underlying SoftmaxLayer::Forward vector<Blob<Dtype>*> softmax_top_vec_; /// Whether to ignore instances with a certain label. bool has_ignore_label_; /// The label indicating that an instance should be ignored. int ignore_label_; /// Whether to normalize the loss by the total number of values present /// (otherwise just by the batch size). bool normalize_; int softmax_axis_, outer_num_, inner_num_; float pos_mult_; int pos_cid_; }; (3)在src\caffe\layers文件夹中增加weighted_softmax_loss_layer.cpp和weighted_softmax_loss_layer.cu两个文件
(4)在训练的prototxt文件中,按照下面方法使用(比如指定从0数起的第1类,权重加强,乘子为2.0):
layer { name: "loss" type: "WeightedSoftmaxWithLoss" bottom: "fc_end" bottom: "label" top: "loss" softmax_param { pos_cid: 1 pos_mult: 2.0 } }