cvCreateStumpClassifier


CV_BOOST_IMPL
CvClassifier* cvCreateStumpClassifier( CvMat* trainData,  //训练样本的数据,包含图像大小。数量,类别,权重等
                      int flags,                          //0表示矩阵的列是训练样本。1表示行是训练样本
                      CvMat* trainClasses,                //表示训练样本的类别矩阵
                      CvMat* /*typeMask*/,
                      CvMat* missedMeasurementsMask,
                      CvMat* compIdx,                     //特征序列
                      CvMat* sampleIdx,                   //训练样本排序后的寻列
                      CvMat* weights,                     //训练样本的权重矩阵
                      CvClassifierTrainParams* trainParams//训练參数
                    )
{
    CvStumpClassifier* stump = NULL;
    int m = 0; /* 样本数量number of samples */
    int n = 0; /* 特征数量number of components */
    uchar* data = NULL;
    int cstep   = 0;   //一个特征(component)在水平方向上的长度,即是水平方向上所占字节数
    int sstep   = 0;   //一个样本(sample)在水平方向上的长度,即是水平方向上所占字节数(这两个參数我看了非常长时间才看出来)
    uchar* ydata = NULL;
    int ystep    = 0;
    uchar* idxdata = NULL;
    int idxstep    = 0;
    int l = 0; /* number of indices */
    uchar* wdata = NULL;
    int wstep    = 0;

    int* idx = NULL;
    int i = 0;

    float sumw   = FLT_MAX;
    float sumwy  = FLT_MAX;
    float sumwyy = FLT_MAX;

    CV_Assert( trainData != NULL );
    CV_Assert( CV_MAT_TYPE( trainData->type ) == CV_32FC1 );
    CV_Assert( trainClasses != NULL );
    CV_Assert( CV_MAT_TYPE( trainClasses->type ) == CV_32FC1 );
    CV_Assert( missedMeasurementsMask == NULL );
    CV_Assert( compIdx == NULL );
    CV_Assert( weights != NULL );
    CV_Assert( CV_MAT_TYPE( weights->type ) == CV_32FC1 );
    CV_Assert( trainParams != NULL );

    data = trainData->data.ptr;
    if( CV_IS_ROW_SAMPLE( flags ) )//当traindata训练样本是按行排列,一行表示一个训练样本在不同特征下的特征值
    {
        cstep = CV_ELEM_SIZE( trainData->type );
        sstep = trainData->step;
        m = trainData->rows;      //行数表示样本数量
        n = trainData->cols;      //列数表示特征的个数
    }
    else                          //当traindata训练样本是按列排列,一列表示一个训练样本在不同特征下的特征值
    {
        sstep = CV_ELEM_SIZE( trainData->type );
        cstep = trainData->step;
        m = trainData->cols;      //列数表示样本的数量
        n = trainData->rows;      //行数表示特征的个数
    }

    ydata = trainClasses->data.ptr;
    if( trainClasses->rows == 1 )
    {
        assert( trainClasses->cols == m );
        ystep = CV_ELEM_SIZE( trainClasses->type );
    }
    else
    {
        assert( trainClasses->rows == m );
        ystep = trainClasses->step;
    }

    wdata = weights->data.ptr;
    if( weights->rows == 1 )
    {
        assert( weights->cols == m );
        wstep = CV_ELEM_SIZE( weights->type );
    }
    else
    {
        assert( weights->rows == m );
        wstep = weights->step;
    }

    l = m;
    if( sampleIdx != NULL )
    {
        assert( CV_MAT_TYPE( sampleIdx->type ) == CV_32FC1 );

        idxdata = sampleIdx->data.ptr;
        if( sampleIdx->rows == 1 )
        {
            l = sampleIdx->cols;
            idxstep = CV_ELEM_SIZE( sampleIdx->type );
        }
        else
        {
            l = sampleIdx->rows;
            idxstep = sampleIdx->step;
        }
        assert( l <= m );
    }

    idx = (int*) cvAlloc( l * sizeof( int ) );//为idx分配内存
    stump = (CvStumpClassifier*) cvAlloc( sizeof( CvStumpClassifier) );//为stump分配内存

    /* START */
    memset( (void*) stump, 0, sizeof( CvStumpClassifier ) );//stump内存初始化为零

    stump->eval = cvEvalStumpClassifier;                   
    stump->tune = NULL;
    stump->save = NULL;
    stump->release = cvReleaseStumpClassifier;

    stump->lerror = FLT_MAX;
    stump->rerror = FLT_MAX;
    stump->left  = 0.0F;
    stump->right = 0.0F;

    /* copy indices */
    if( sampleIdx != NULL )
    {
        for( i = 0; i < l; i++ )
        {
            idx[i] = (int) *((float*) (idxdata + i*idxstep));
        }
    }
    else
    {
        for( i = 0; i < l; i++ )
        {
            idx[i] = i;
        }
    }

    for( i = 0; i < n; i++ )                      //遍历全部特征
    {
        CvValArray va;

        va.data = data + i * ((size_t) cstep);
        va.step = sstep;
        icvSortIndexedValArray_32s( idx, l, &va );//对数据进行排序
        if( findStumpThreshold_32s[(int) ((CvStumpTrainParams*) trainParams)->error]
        //该error是计算不纯度的方式,包含四种,各自是:熵不纯度,吉尼不纯度,错分类不纯度,和最小二乘不纯度
              ( data + i * ((size_t) cstep), sstep,
                wdata, wstep, ydata, ystep, (uchar*) idx, sizeof( int ), l,
                &(stump->lerror), &(stump->rerror),
                &(stump->threshold), &(stump->left), &(stump->right),
                &sumw, &sumwy, &sumwyy ) )       //寻找树桩分类器的阈值
        {
            stump->compidx = i;
        }
    } /* for each component */

    /* END */

    cvFree( &idx );

    if( ((CvStumpTrainParams*) trainParams)->type == CV_CLASSIFICATION_CLASS )
    {
        stump->left = 2.0F * (stump->left >= 0.5F) - 1.0F;
        stump->right = 2.0F * (stump->right >= 0.5F) - 1.0F;
    }

    return (CvClassifier*) stump;
}


原文地址:https://www.cnblogs.com/lcchuguo/p/5395598.html