source('SMSVM_1.2.1.R') # Example 1: # A toy example (p=2, k=3) illustrated in OSU TR No.743. # x2 is not relevant to y. ###################### # MSVM # ###################### # generate training data set.seed(326) temp <- twod.data(60) x <- as.matrix(temp$x) y <- temp$y # > table(y) # y # 1 2 3 # 16 25 19 # MSVM with additive spline kernel # tune lambda using 10-fold CV (w.r.t. hinge loss) msvm.out <- msvm(x, y, lambda = seq(-16,-8,1), kernel.type = 'spline-t', fold=10, cv = T) # Output: # The minimum of average 10 fold cross-validated hinge loss: # 0.6183682 # The optimal lambda on log2 scale: -12 # plot the MSVM classification boundary kernel <- list(type='spline-t',par=1) msvm.plot(x, y, kernel, msvm.out$model) # generate test data set.seed(327) temp1 <- twod.data(100) x.test <- as.matrix(temp1$x) y.test <- temp1$y # prediction fit.test <- predict.msvm(x, x.test, kernel, msvm.out$model) # test error rate: error.rate(y.test, fit.test) # Output: # > 0.48 # predicted class: # fit.class <- apply(fit.test,1,which.max) ###################### # SMSVM # ###################### #-----------------# # One-step update # #-----------------# # The initial c-step is, in fact, the same as MSVM. lambda <- seq(-16,-8,1) cstep.out <- cstep(x, y, lambda , kernel.type='spline-t',cv = T, fold = 10) # Output: # The minimum of 10 fold cross-validated hinge loss: # 0.6183682 # The optimal lambda on log2 scale: -12 # c-step tuning plot plot(lambda, cstep.out$hinge, type="b", xlab=expression(log[2](lambda)), ylab="cv.hinge") # First update # theta-step: feature selection -------------------------- # sequential tuning thetastep.out <- thetastep(x, y, opt.lambda= -12, lambda.theta = seq(-10,0,1), kernel.type='spline-t', fold = 10, isCombined = F, cv = T) # Output: # The minimum of average 10 fold cross-validated hinge loss: # 0.6095216 # The optimal lambda.theta on log2 scale: -5 # The number of selected features out of 2 : 1 # The average shrinkage factor: 0.5 # theta-step tuning plot lambda.theta = seq(-10,0,1) plot(lambda.theta, thetastep.out$hinge, type="b", xlab=expression(log[2](lambda[theta])), ylab="cv.hinge") # optimal rescaling parameters thetastep.out$opt.theta # Output: # [1] 1 0 # visualize theta-path as a function of lamda.theta theta.plot(lambda.theta, thetastep.out$theta.seq) abline(v=thetastep.out$opt.lambda.theta, lty=2) # The first c-step based on the fitted theta ----------------- cstep1.out <- cstep(x, y, lambda , kernel.type = 'spline-t', theta = thetastep.out$opt.theta, cv = T, fold = 10) # Output: # The minimum of 10 fold cross-validated hinge loss: # 0.6007785 # The optimal lambda on log2 scale: -11 # visualize the classification boundaries. msvm.plot(x, y, kernel, cstep1.out$model, thetastep.out$opt.theta) fit.test <- predict.smsvm(x, x.test, kernel, cstep1.out$model, thetastep.out$opt.theta) # test error rate: error.rate(y.test, fit.test) # Output: # > 0.47 #----------------------------------end of one-step update # The SMSVM one-step update can be done easily by 'smsvm' function. smsvm.out <- smsvm(x, y, kernel.type="spline-t", lambda = seq(-16,-8,1), lambda.theta = seq(-10,0,1), cv = T, fold = 10, isCombined = F) # visualize the classification boundaries. msvm.plot(x, y, kernel, smsvm.out$model, smsvm.out$opt.theta) # prediction fit.test <- predict.smsvm(x, x.test, kernel, smsvm.out$model, smsvm.out$opt.theta) # test error rate: error.rate(y.test, fit.test) #-------------------------------------------------------------- # Compare the previous results with theta-step: combined tuning thetastep.out<-thetastep(x, y, opt.lambda= -12, lambda.theta = seq(-10,0,1), kernel.type='spline-t', fold = 10, cv = T) # Output: # The minimum of average 10 fold cross-validated hinge loss: # 0.6080333 # The optimal lambda.theta on log2 scale: -3 # The number of selected features out of 2 : 1 # The average shrinkage factor: 0.4947156 thetastep.out$opt.theta # [1] 0.9894312 0.0000000 # visualize the classification boundaries. msvm.plot(x, y, kernel, thetastep.out$model, thetastep.out$opt.theta) fit.test <- predict.smsvm(x, x.test, kernel, thetastep.out$model, thetastep.out$opt.theta) # test error rate: error.rate(y.test, fit.test) # Output: # > 0.48 #Again, this SMSVM one-step update with combined tuning can be done by 'smsvm'. smsvm.out <- smsvm (x, y, kernel.type="spline-t", lambda = seq(-16,-8,1), lambda.theta = seq(-10,0,1), cv = T, fold = 10) #-----------------# # More iterations # #-----------------# # the second iteration thetastep2.out <- thetastep(x, y, opt.lambda= -11, lambda.theta = seq(-10,0,1), kernel.type='spline-t', fold = 10, isCombined = F, cv = T, pretheta = thetastep.out$opt.theta) # Output: # The minimum of average 10 fold cross-validated hinge loss: # 0.6007739 # The optimal lambda.theta on log2 scale: -6 # The number of selected features out of 2 : 2 # The average shrinkage factor: 0.5000019 thetastep2.out$opt.theta # [1] 1.000000e+00 3.737154e-06 cstep2.out <- cstep(x, y, lambda = seq(-16,-8,1), kernel.type = 'spline-t', theta = thetastep2.out$opt.theta, cv = T, fold = 10) # Output: # The minimum of 10 fold cross-validated hinge loss: # 0.6007746 # The optimal lambda is -11 on log2 scale. # Little change is made in the second iteration. #----------------------------------------------------------------------- # Example 2: x1 and x2 both are relevant to y. ###################### # MSVM # ###################### # generate training data set.seed(123) temp <- twod.k4(40) x <- as.matrix(temp$x) y <- temp$y # scatter plot plot(x, type="n", xlab=expression(x[1]), ylab=expression(x[2]),frame.plot=T) for (j in 1:max(y)) { points(x[(y == j),], pch=1, col=j+1)} # > table(y) # y # 1 2 3 4 # 10 11 9 10 # MSVM with radial basis kernel # tune lambda using 10-fold CV (w.r.t. hinge loss) msvm.out <- msvm(x, y, lambda = seq(-10,-4,1), kernel.type = 'rbf', fold=10, cv = T) # Output: # The minimum of average 10 fold cross-validated hinge loss: # 0.2184695 # The optimal lambda on log2 scale: -7 # plot the MSVM classification boundary kernel <- list(type='rbf',par=1) msvm.plot(x, y, kernel, msvm.out$model) # MSVM with two-way interaction spline kernel # need to transform x so that x is between 0 and 1. trans.x <- unit.scale(x) msvm.out <- msvm(trans.x, y, lambda = seq(-18,-12,1), kernel.type ='spline-t2', fold=10, cv = T) # Output: # The minimum of average 10 fold cross-validated hinge loss: # 0.1747517 # The optimal lambda on log2 scale: -16 # plot the classification boundary kernel <- list(type='spline-t2',par=1) msvm.plot(trans.x, y, kernel, msvm.out$model) # c-step tuning plot lambda <- seq(-18,-12,1) plot(lambda, msvm.out$hinge, type="b", xlab=expression(log[2](lambda)), ylab="cv.hinge") # theta-step: feature selection -------------------------- # sequential tuning thetastep.out <- thetastep(trans.x, y, opt.lambda=-16, lambda.theta = seq(-10,0,1), kernel.type='spline-t2', fold = 10, isCombined=F, cv = T) # Output: # The minimum of average 10 fold cross-validated hinge loss: # 0.1747808 # The optimal lambda.theta on log2 scale: -6 # The number of selected features out of 3 : 3 # The average shrinkage factor: 1 # optimal rescaling parameters thetastep.out$opt.theta # Output: # [1] 1 1 1 #------------------------------------ last updated by Y. Lee, Sep.10.05