x=np.sort(4*(np.random.rand(25,1)-0.5),axis=0)# Random data from [-2,2]b=0.9*x+0.1*np.random.randn(len(x),1)# Line y = 0.9x with noiseatrue=np.linalg.lstsq(x,b,rcond=None)[0]# Least-squares slope (no outliers)atrue=atrue.item(0)b[-1]=-5.5# Introduce outlieracorrupt=np.linalg.lstsq(x,b,rcond=None)[0]# New slopeacorrupt=acorrupt.item(0)
In [76]:
## L1 optimization to reject outlierdefL1_norm(a):returnnp.linalg.norm(a*x-b,ord=1)a0=acorrupt# initialize to L2 solutionres=minimize(L1_norm,a0)aL1=res.x[0]# aL1 is robust
In [77]:
plt.plot(x[:-1],b[:-1],'o',color='b',ms=8)# Dataplt.plot(x[-1],b[-1],'o',color='r',ms=8)# Outlierxgrid=np.arange(-2,2,0.01)plt.plot(xgrid,atrue*xgrid,'--',color='k')# L2 fit (no outlier)plt.plot(xgrid,acorrupt*xgrid,'--',color='r')# L2 fit (outlier)plt.plot(xgrid,aL1*xgrid,'--',color='b')# L1 fitplt.show()