当前位置:首页 > 用R语言做非参数和半参数回归笔记
等窗宽:
2、kernel smoothing 核光滑
其中,
二、局部多项式估计Local Polynomial Regression 1、主要结构
局部多项式估计是核光滑的扩展,也是基于局部加权均值构造。 ——local constant regression ——local linear regression ——lowess (Cleveland, 1979) ——loess (Cleveland, 1988) 【本部分可参考:
Takezana(2006). Introduction to Nonparametric Regression.(P185 3.7 and P195 3.9) Chambers and Hastie(1993). Statistical models in S. (P312 ch8)】 2、方法思路
(1)对于每个xi,以该点为中心,按照预定宽度构造一个区间;
(2)在每个结点区域内,采用加权最小二乘法(WLS)估计其参数,并用得到的模型估计该结点对应的x值对应y值,作为y|xi的估计值(只要这一个点的估计值); (3)估计下一个点xj;
(4)将每个y|xi的估计值连接起来。 【R操作
library(KernSmooth) #函数locpoly()
library(locpol) #locpol(); locCteSmootherC() library(locfit) #locfit()
#weight funciton: kernel=‖tcub‖. And ―rect‖, ―trwt‖, ―tria‖, ―epan‖, ―bisq‖, ―gauss‖ 】
3、每个方法对应的估计形式
(1)变量个数p=0, local constant regression (kernel smoothing)
min
(2)变量个数p=1, local linear regression
min
(3)Lowess (Local Weighted scatterplot smoothing) p=1:
min
【还有个加权修正的过程,这里略,详见原书或者PPT】 (4)Loess (Local regression) p=1,2:
min
【还有个加权修正的过程,这里略,详见原书或者PPT】 (5)Friedman supersmoother
symmetric k-NN, using local linear fit,
varying span, which is determined by local CV, not robust to outliers, fast to compute supsmu( ) in R 三、模型选择
需要选择的内容:(1)窗宽the span;(2)多项式的度the degree of polynomial for the local regression models;(3)权重函数the weight functions。 【其他略】 四、R语言部分
library(foreign) library(SemiPar) library(mgcv) jacob <- read.table(\############################################################################### #第一部分,简单的光滑估计 #1、Kernel Density Estimation #Illustration of Kernel Concepts #Defining the Window Width attach(jacob) x0 <- sort(perotvote)[75] diffs <- abs(perotvote - x0) which.diff <- sort(diffs)[120] #Applying the Tricube Weight #...Tricube function tricube <- function(z) { ifelse (abs(z) < 1, (1 - (abs(z))^3)^3, 0) } #... a <- seq(0,1, by=.1) tricube(a) #Figure 2.5 plot(range(perotvote), c(0,1), xlab=\abline(v=c(x0-which.diff, x0+which.diff), lty=2) abline(v=x0) xwts <- seq(x0-which.diff, x0+which.diff, len=250) lines(xwts, tricube((xwts-x0)/which.diff), lty=1, lwd=1) points(x.n, tricube((x.n - x0)/which.diff), cex=1) ########################################################################### #2、Kernel Smoothing ########################################################################### Figure 2.6 par(mfrow=c(3,1)) plot(perotvote, chal.vote, pch=\xlab=\main=\lines(ksmooth(perotvote, chal.vote, bandwidth=\plot(perotvote, chal.vote, pch=\xlab=\main=\lines(ksmooth(perotvote, chal.vote, kernel=\plot(perotvote, chal.vote, pch=\xlab=\main=\lines(ksmooth(perotvote, chal.vote, bandwidth=\#******* Kernel smoothing中选取box和normal核函数的比较,带宽相等 plot(perotvote, chal.vote, pch=\Share (%)\lines(ksmooth(perotvote, chal.vote, kernel=\lines(ksmooth(perotvote, chal.vote, kernel=\################################################################################## #第二部分,LPR模型 #Data Prep For Local Average Regression Step-by-Step cong <- as.data.frame(jacob[,2:3]) cong <- cong[order(cong$perotvote),1:2] y <- as.matrix(cong$chal.vote) x <- as.matrix(cong$perotvote) n <- length(y) #... tricube <- function(z) { ifelse (abs(z) < 1, (1 - (abs(z))^3)^3, 0) } #... x0 <- x[75] diffs <- abs(x - x0) which.diff <- sort(diffs)[120] x.n <- x[diffs<= which.diff] y.n <- y[diffs <= which.diff] weigh=tricube((x.n-x0)/which.diff) mod <- lm(y.n ~ x.n, weights=weigh) #Figure 2.7 plot(x, y, type=\bty=\abline(v=c(x0 - which.diff, x0 + which.diff), lty = 2) abline(v=x0) points(x[diffs > which.diff], y[diffs > which.diff], pch=16, cex=1, col=gray(.80)) points(x[diffs <= which.diff], y[diffs <= which.diff], cex=.85) abline(mod, lwd=2, col=1) text(27.5, 50, expression(paste(\x[0]))) #这里expression的用法比较有意思 arrows(25, 47, 15, 37, code =2, length = .10) ################################################################################# #2、Now Putting It Together For Local Regression Demonstration. #OLS Fit for Comparison ols <- lm(chal.vote ~ perotvote, data=jacob) #The loess fit model.loess <- loess(chal.vote ~ perotvote, data=jacob, span = 0.5) #*** 默认设置 degree=2,family=gauss, tricube加权 *** n <- length(chal.vote) x.loess <- seq(min(perotvote), max(perotvote), length=n) y.loess <- predict(model.loess, data.frame(perotvote=x.loess)) #得到预测值便于比较 #The lowess fit model.lowess <- lowess(chal.vote ~ perotvote, data=jacob, f = 0.5) #*** 默认设置 robust linear tricube加权 *** n <- length(chal.vote) x.lowess <- seq(min(perotvote), max(perotvote), length=n) y.lowess <- predict(model.lowess, data.frame(perotvote=x.lowess)) #得到预测值便于比较
共分享92篇相关文档