R 實作


#----------------------------------------------------------------------------------------------
# prepare training data
#----------------------------------------------------------------------------------------------

data=read.csv('T2330-all2.csv', header=T, sep=',')
data=data[1:1742,]

closeprice=data$closeprice
lrow=nrow(data)
row.train=lrow

# compute class
class=rep(0, lrow)

for(i in 1:(lrow-1)){

  data.fun=rep(closeprice[i], lrow-i)
  data.com=closeprice[(i+1):lrow]
  data.ret=(data.com-data.fun)/data.fun

  com10=c()
  com5=c()

  com5=which(data.ret<(-0.05))
  com10=which(data.ret>=0.1)

  if((length(com5)!=0)&(length(com10)!=0)){

    if(com10[1]<com5[1]){

      class[i]=1
    } 

  }

  if((length(com5)==0)&(length(com10)!=0)){

    class[i]=1
  }
}


# delete tail data (-250 rows)
class=class[1:(lrow-250)]
data=data[1:(lrow-250), 3:ncol(data)]
data.total=data.frame(class, data)


get_variables <- function(stepresult.call){

  stepresult.call=attr(stepresult$terms, 'term.labels')
  stepresult.call=gsub('v','',stepresult.call)

  stepresult.variables=as.numeric(stepresult.call)

  return(stepresult.variables)
}

# do stepwise regression
library(MASS)

fit1=lm(class~., data=data.total)
stepresult=stepAIC(fit1, direction='both')

# select predict variables by AIC
# simplify to get stepresult's modal variables
vcol=get_variables(stepresult$call)

needdata.tsmc=data.frame(class, data[,vcol])
needdata=needdata.tsmc
#head(needdata)


#----------------------------------------------------------------------------------------------
# prepare testing data
#----------------------------------------------------------------------------------------------

data1=read.csv('T2330-all2.csv', header=T, sep=',')

closeprice=data1$closeprice

lrow=nrow(data1)
row.all=lrow

# compute class
class=rep(0, lrow)

for(i in 1:(lrow-1)){

  data.fun=rep(closeprice[i], lrow-i)
  data.com=closeprice[(i+1):lrow]
  data.ret=(data.com-data.fun)/data.fun

  com10=c()
  com5=c()

  com5=which(data.ret<(-0.05))
  com10=which(data.ret>=0.1)

  if((length(com5)!=0)&(length(com10)!=0)){

    if(com10[1]<com5[1]){

      class[i]=1
    } 

  }

  if((length(com5)==0)&(length(com10)!=0)){

    class[i]=1
  }
}


# giving real class
realmatrix=c()

for(i in (row.train+1):(row.all-120)){

  realmatrix[i-row.train]=as.matrix(class[i])
}

# compute class
a=row.train+1
b=row.all-120

sum(class[a:b]==1)
sum(class[a:b]==0)

# target(10%,-5%)
data1=data1[,3:ncol(data1)]
testdata.tsmc=data.frame(class, data1[,vcol])
testdata=testdata.tsmc

needdata$class=as.factor(needdata$class)
testdata$class=as.factor(testdata$class)

#write.csv(testdata, file = "rr.csv",row.names=FALSE)
#----------------------------------------------------------------------------------------------
# predict testing data
#----------------------------------------------------------------------------------------------

# SVM(e1071)
library(e1071)

data.resultsvm=c()
prematrixsvm=c()


for(i in (row.train+1):(row.all-120)){

  # training data by svm -- create model
  tsvm=svm(class~., data=needdata[1:(row.train-250),], type='C-classification', cost=2, kernal='radial basis', gamma=0.1, scale=TRUE)

  # predict result by model
  pretsvm=predict(tsvm, testdata[i, -1])


  data.resultsvm[i-row.train]=sum(pretsvm==testdata$class[i])/length(pretsvm)
  #prematrixsvm[i-row.train]=as.character.factor(pretsvm)
  prematrixsvm[i-row.train]=as.character.factor(pretsvm)
}

#
summary(data.resultsvm)
t=table(prematrixsvm, realmatrix)
t
sum(diag(t)/sum(t))
#data.frame(prematrixsvm,realmatrix)

# Decision Tree(C50)
library(C50)

data.resultc50=c()
prematrixc50=c()

needdata$class=as.factor(needdata$class)
testdata$class=as.factor(testdata$class)

for(i in (row.train+1):(row.all-120)){

  # training data by c50 -- create model
  trainc50=C5.0(class~., needdata[1:(row.train-250),], trial=5, control=C5.0Control(subset=FALSE, noGlobalPruning=TRUE, CF=0.25))

  # predict result by model
  predc50=predict(trainc50, testdata[i, -1], trials=5, type='class')


  data.resultc50[i-row.train]=sum(predc50==testdata$class[i])/length(predc50)
  prematrixc50[i-row.train]=as.character.factor(predc50)

}

#
summary(data.resultc50)
t=table(prematrixc50, realmatrix)
t
sum(diag(t)/sum(t))

原始碼:https://goo.gl/0sOR5k

results matching ""

    No results matching ""