最小二乘参数估计---梯度下降法求解参数的sas代码实现

理论和公式请看网易公开课中Andrew Ng的机器学习，或者coursera中Andrew Ng的机器学习

对于多元线性回归要拟合最好的直线，要使得误差平方和最小，课本上的方法都是求偏导，并使其为0，然后求解线性方程组。

但是还有很多其他方法可以达到上述效果，Andrew在大样本和小样本的情况下给出了两种梯度下降的方法。我这里实现了他的第一种

步长参数选取为0.03，初始值选取为0 0。数据集使用的是sas内置的sashelp.class数据集。

因为变量单位不同，所以都在scaing feature步进行了处理

验证后的结果与sas reg过程得到的结果一致。

options fullstimer;
proc iml;
    reset deflib=sashelp;
    use class;
    read all var {weight height} into me;
    m = nrow(me);

    *scaling feature;
    s1 = max(me[,1])-min(me[,1]);
    s2 = max(me[,2])-min(me[,2]);
    mean_s1 = mean(me[,1]);
    mean_s2 = mean(me[,2]);
    me[,1] = (me[,1]-mean_s1)/s1;
    me[,2] = (me[,2]-mean_s2)/s2;
    *scaling feature;
    *print me;
    theta_0 = 0;
    theta_1 = 0;
    x0 = 1;
    ov = 10;
    alpha = 0.03;
    *print me;
    rec = 0;
　　
    do while (ov>0.000000001);
 
    theta_0Old = theta_0;
    theta_1Old = theta_1;
    *compute old residual and collect data to plot r*numOfIteration;
    rec = rec + 1;
    r2 = 0;
    do i=1 to m;
            residual_tt =(theta_0Old*x0 + theta_1Old*me[i,2]) - me[i,1];
            r2 = r2+residual_tt*residual_tt;
    end;
    Jtheta = r2/2/m;
    xy = xy//(rec||Jtheta);
    *compute old residual and collect data to plot r*numOfIteration;

        res = 0; res_1 = 0;
        do i=1 to m;
            residual_0 =(theta_0Old*x0 + theta_1Old*me[i,2]) - me[i,1];
            res = res + (residual_0*x0);
            res_1 = res_1 + (residual_0*me[i,2]);
        end;
        *print residual_0;
        theta_0 = theta_0Old - alpha*res/m;
        theta_1 = theta_1Old - alpha*res_1/m;

    *update residual and decide whether it's convergence;
        r2 = 0;
        do i=1 to m;
            residual_tt =(theta_0*x0 + theta_1*me[i,2]) - me[i,1];
            r2 = r2+residual_tt*residual_tt;
        end;
        Jtheta_new = r2/2/m;
        ov = abs(Jtheta_new - Jtheta);
    *update residual and decide whether it's convergence;    
    end;
    print ov;
    call pgraf(xy,'*','x','y','mmmmm');

    theta_0_last = theta_0*s1+mean_s1-mean_s2*s1*theta_1/s2;
    theta_1_last = theta_1*s1/s2;
    print theta_0_last theta_1_last;
    run;
quit;