-
Notifications
You must be signed in to change notification settings - Fork 0
/
Kaggle_mike.R
69 lines (46 loc) · 1.13 KB
/
Kaggle_mike.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
setwd("/home/mike-bowles/Downloads/campaignData/")
data <- read.table(file="TrainingDataset.csv",header=TRUE, sep=",")
str(data)
require(gbm)
X <- data[,13:30]
#Y - labels a single column from data
Y <- as.numeric(data[,1])
Y <- log(Y)
Y[is.na(Y)] <- 0.0
#Y - labels as sum of all month sales
for(i in 1:nrow(data)){
Y[i] <- log(sum(data[i,1:12],na.rm=TRUE))
}
#Y - montly fraction of total (labels sum to one)
Y <- data[,1]/exp(Y)
nc1 <- ncol(X)
d1 <- dimnames(X)[[2]]
idxCat <- c(1,18)
for(i in 1:length(idxCat)) {
v <- as.factor(X[,idxCat[i]])
X[,idxCat[i]] <- v
}
for(i in 2:17){
v <- is.nan(X[,i])
if(sum(v)>0){
meanx <- mean(X[!v,i])
X[v,i] <- meanx
X <- cbind(X,as.factor(v))
}
}
newCols <- paste("V",1:(ncol(X)-nc1),sep="")
dimnames(X)[[2]] <- c(d1,newCols)
gdata <- cbind(Y,X)
ntrees <- 4000
depth <- 5
minObs <- 10
shrink <- 0.001
folds <- 10
mo1gbm <- gbm(Y~. ,data=gdata,
distribution = "gaussian",
n.trees = ntrees,
shrinkage = shrink,
cv.folds = folds)
gbm.perf(mo1gbm,method="cv")
sqrt(min(mo1gbm$cv.error))
which.min(mo1gbm$cv.error)