-
Notifications
You must be signed in to change notification settings - Fork 341
/
arima.R
966 lines (907 loc) · 32.9 KB
/
arima.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
search.arima <- function(x, d=NA, D=NA, max.p=5, max.q=5,
max.P=2, max.Q=2, max.order=5, stationary=FALSE, ic=c("aic", "aicc", "bic"),
trace=FALSE, approximation=FALSE, xreg=NULL, offset=offset, allowdrift=TRUE,
allowmean=TRUE, parallel=FALSE, num.cores=2, ...) {
# dataname <- substitute(x)
ic <- match.arg(ic)
m <- frequency(x)
allowdrift <- allowdrift & (d + D) == 1
allowmean <- allowmean & (d + D) == 0
maxK <- (allowdrift | allowmean)
# Choose model orders
# Serial - technically could be combined with the code below
if (parallel == FALSE) {
best.ic <- Inf
for (i in 0:max.p) {
for (j in 0:max.q) {
for (I in 0:max.P) {
for (J in 0:max.Q) {
if (i + j + I + J <= max.order) {
for (K in 0:maxK) {
fit <- myarima(
x, order = c(i, d, j), seasonal = c(I, D, J),
constant = (K == 1), trace = trace, ic = ic, approximation = approximation,
offset = offset, xreg = xreg, ...
)
if (fit$ic < best.ic) {
best.ic <- fit$ic
bestfit <- fit
constant <- (K == 1)
}
}
}
}
}
}
}
} else if (parallel == TRUE) {
to.check <- WhichModels(max.p, max.q, max.P, max.Q, maxK)
par.all.arima <- function(l, max.order) {
.tmp <- UndoWhichModels(l)
i <- .tmp[1]
j <- .tmp[2]
I <- .tmp[3]
J <- .tmp[4]
K <- .tmp[5] == 1
if (i + j + I + J <= max.order) {
fit <- myarima(
x, order = c(i, d, j), seasonal = c(I, D, J), constant = (K == 1),
trace = trace, ic = ic, approximation = approximation, offset = offset, xreg = xreg,
...
)
}
if (exists("fit")) {
return(cbind(fit, K))
} else {
return(NULL)
}
}
if (is.null(num.cores)) {
num.cores <- detectCores()
}
all.models <- mclapply(X = to.check, FUN = par.all.arima, max.order=max.order, mc.cores = num.cores)
# Removing null elements
all.models <- all.models[!sapply(all.models, is.null)]
# Choosing best model
best.ic <- Inf
for (i in 1:length(all.models)) {
if (!is.null(all.models[[i]][, 1]$ic) && all.models[[i]][, 1]$ic < best.ic) {
bestfit <- all.models[[i]][, 1]
best.ic <- bestfit$ic
constant <- unlist(all.models[[i]][1, 2])
}
}
class(bestfit) <- c("forecast_ARIMA", "ARIMA", "Arima")
}
if (exists("bestfit")) {
# Refit using ML if approximation used for IC
if (approximation) {
if (trace) {
cat("\n\n Now re-fitting the best model(s) without approximations...\n")
}
# constant <- length(bestfit$coef) - ncol(xreg) > sum(bestfit$arma[1:4])
newbestfit <- myarima(
x, order = bestfit$arma[c(1, 6, 2)],
seasonal = bestfit$arma[c(3, 7, 4)], constant = constant, ic,
trace = FALSE, approximation = FALSE, xreg = xreg, ...
)
if (newbestfit$ic == Inf) {
# Final model is lousy. Better try again without approximation
# warning("Unable to fit final model using maximum likelihood. AIC value approximated")
bestfit <- search.arima(
x, d = d, D = D, max.p = max.p, max.q = max.q,
max.P = max.P, max.Q = max.Q, max.order = max.order, stationary = stationary,
ic = ic, trace = trace, approximation = FALSE, xreg = xreg, offset = offset,
allowdrift = allowdrift, allowmean = allowmean,
parallel = parallel, num.cores = num.cores, ...
)
bestfit$ic <- switch(ic, bic = bestfit$bic, aic = bestfit$aic, aicc = bestfit$aicc)
}
else {
bestfit <- newbestfit
}
}
}
else {
stop("No ARIMA model able to be estimated")
}
bestfit$x <- x
bestfit$series <- deparse(substitute(x))
bestfit$ic <- NULL
bestfit$call <- match.call()
if (trace) {
cat("\n\n")
}
return(bestfit)
}
# Set up seasonal dummies using Fourier series
SeasDummy <- function(x) {
n <- length(x)
m <- frequency(x)
if (m == 1) {
stop("Non-seasonal data")
}
tt <- 1:n
fmat <- matrix(NA, nrow = n, ncol = 2 * m)
for (i in 1:m) {
fmat[, 2 * i] <- sin(2 * pi * i * tt / m)
fmat[, 2 * (i - 1) + 1] <- cos(2 * pi * i * tt / m)
}
return(fmat[, 1:(m - 1)])
}
# CANOVA-HANSEN TEST
# Largely based on uroot package code for CH.test()
SD.test <- function(wts, s=frequency(wts)) {
if (any(is.na(wts))) {
stop("Series contains missing values. Please choose order of seasonal differencing manually.")
}
if (s == 1) {
stop("Not seasonal data")
}
t0 <- start(wts)
N <- length(wts)
if (N <= s) {
stop("Insufficient data")
}
frec <- rep(1, as.integer((s + 1) / 2))
ltrunc <- round(s * (N / 100) ^ 0.25)
R1 <- as.matrix(SeasDummy(wts))
lmch <- lm(wts ~ R1, na.action = na.exclude) # run the regression : y(i)=mu+f(i)'gamma(i)+e(i)
Fhat <- Fhataux <- matrix(nrow = N, ncol = s - 1)
for (i in 1:(s - 1))
Fhataux[, i] <- R1[, i] * residuals(lmch)
for (i in 1:N) {
for (n in 1:(s - 1))
Fhat[i, n] <- sum(Fhataux[1:i, n])
}
wnw <- 1 - seq(1, ltrunc, 1) / (ltrunc + 1)
Ne <- nrow(Fhataux)
Omnw <- 0
for (k in 1:ltrunc)
Omnw <- Omnw + (t(Fhataux)[, (k + 1):Ne] %*% Fhataux[1:(Ne - k), ]) * wnw[k]
Omfhat <- (crossprod(Fhataux) + Omnw + t(Omnw)) / Ne
sq <- seq(1, s - 1, 2)
frecob <- rep(0, s - 1)
for (i in 1:length(frec)) {
if (frec[i] == 1 && i == as.integer(s / 2)) {
frecob[sq[i]] <- 1
}
if (frec[i] == 1 && i < as.integer(s / 2)) {
frecob[sq[i]] <- frecob[sq[i] + 1] <- 1
}
}
a <- length(which(frecob == 1))
A <- matrix(0, nrow = s - 1, ncol = a)
j <- 1
for (i in 1:(s - 1)) {
if (frecob[i] == 1) {
A[i, j] <- 1
ifelse(frecob[i] == 1, j <- j + 1, j <- j)
}
}
tmp <- t(A) %*% Omfhat %*% A
problems <- (min(svd(tmp)$d) < .Machine$double.eps)
if (problems) {
stL <- 0
} else {
stL <- (1 / N ^ 2) * sum(diag(solve(tmp, tol = 1e-25) %*% t(A) %*% t(Fhat) %*% Fhat %*% A))
}
return(stL)
}
#' Forecasting using ARIMA or ARFIMA models
#'
#' Returns forecasts and other information for univariate ARIMA models.
#'
#' For \code{Arima} or \code{ar} objects, the function calls
#' \code{\link[stats]{predict.Arima}} or \code{\link[stats]{predict.ar}} and
#' constructs an object of class "\code{forecast}" from the results. For
#' \code{fracdiff} objects, the calculations are all done within
#' \code{\link{forecast.fracdiff}} using the equations given by Peiris and
#' Perera (1988).
#'
#' @param object An object of class "\code{Arima}", "\code{ar}" or
#' "\code{fracdiff}". Usually the result of a call to
#' \code{\link[stats]{arima}}, \code{\link{auto.arima}},
#' \code{\link[stats]{ar}}, \code{\link{arfima}} or
#' \code{\link[fracdiff]{fracdiff}}.
#' @param h Number of periods for forecasting. If \code{xreg} is used, \code{h}
#' is ignored and the number of forecast periods is set to the number of rows
#' of \code{xreg}.
#' @param level Confidence level for prediction intervals.
#' @param fan If \code{TRUE}, level is set to \code{seq(51,99,by=3)}. This is
#' suitable for fan plots.
#' @param xreg Future values of an regression variables (for class \code{Arima}
#' objects only). A numerical vector or matrix of external regressors; it should not be a data frame.
#' @param bootstrap If \code{TRUE}, then prediction intervals computed using
#' simulation with resampled errors.
#' @param npaths Number of sample paths used in computing simulated prediction
#' intervals when \code{bootstrap=TRUE}.
#' @param ... Other arguments.
#' @inheritParams forecast.ts
#'
#' @return An object of class "\code{forecast}".
#'
#' The function \code{summary} is used to obtain and print a summary of the
#' results, while the function \code{plot} produces a plot of the forecasts and
#' prediction intervals.
#'
#' The generic accessor functions \code{fitted.values} and \code{residuals}
#' extract useful features of the value returned by \code{forecast.Arima}.
#'
#' An object of class "\code{forecast}" is a list containing at least the
#' following elements: \item{model}{A list containing information about the
#' fitted model} \item{method}{The name of the forecasting method as a
#' character string} \item{mean}{Point forecasts as a time series}
#' \item{lower}{Lower limits for prediction intervals} \item{upper}{Upper
#' limits for prediction intervals} \item{level}{The confidence values
#' associated with the prediction intervals} \item{x}{The original time series
#' (either \code{object} itself or the time series used to create the model
#' stored as \code{object}).} \item{residuals}{Residuals from the fitted model.
#' That is x minus fitted values.} \item{fitted}{Fitted values (one-step
#' forecasts)}
#' @author Rob J Hyndman
#' @seealso \code{\link[stats]{predict.Arima}},
#' \code{\link[stats]{predict.ar}}, \code{\link{auto.arima}},
#' \code{\link{Arima}}, \code{\link[stats]{arima}}, \code{\link[stats]{ar}},
#' \code{\link{arfima}}.
#' @references Peiris, M. & Perera, B. (1988), On prediction with fractionally
#' differenced ARIMA models, \emph{Journal of Time Series Analysis},
#' \bold{9}(3), 215-220.
#' @keywords ts
#' @aliases forecast.forecast_ARIMA
#' @examples
#' fit <- Arima(WWWusage,c(3,1,0))
#' plot(forecast(fit))
#'
#' library(fracdiff)
#' x <- fracdiff.sim( 100, ma=-.4, d=.3)$series
#' fit <- arfima(x)
#' plot(forecast(fit,h=30))
#'
#' @export
forecast.Arima <- function(object, h=ifelse(object$arma[5] > 1, 2 * object$arma[5], 10),
level=c(80, 95), fan=FALSE, xreg=NULL, lambda=object$lambda, bootstrap=FALSE, npaths=5000, biasadj=NULL, ...) {
# Check whether there are non-existent arguments
all.args <- names(formals())
user.args <- names(match.call())[-1L] # including arguments passed to 3 dots
check <- user.args %in% all.args
if (!all(check)) {
error.args <- user.args[!check]
warning(sprintf("The non-existent %s arguments will be ignored.", error.args))
}
use.drift <- is.element("drift", names(object$coef))
x <- object$x <- getResponse(object)
usexreg <- (use.drift | is.element("xreg", names(object))) # | use.constant)
if (!is.null(xreg) && usexreg) {
if(!is.numeric(xreg))
stop("xreg should be a numeric matrix or a numeric vector")
xreg <- as.matrix(xreg)
if (is.null(colnames(xreg))) {
colnames(xreg) <- if (ncol(xreg) == 1) "xreg" else paste("xreg", 1:ncol(xreg), sep = "")
}
origxreg <- xreg <- as.matrix(xreg)
h <- nrow(xreg)
}
else {
if(!is.null(xreg)){
warning("xreg not required by this model, ignoring the provided regressors")
xreg <- NULL
}
origxreg <- NULL
}
if (fan) {
level <- seq(51, 99, by = 3)
} else {
if (min(level) > 0 & max(level) < 1) {
level <- 100 * level
} else if (min(level) < 0 | max(level) > 99.99) {
stop("Confidence limit out of range")
}
}
level <- sort(level)
if (use.drift) {
n <- length(x)
#missing <- is.na(x)
#firstnonmiss <- head(which(!missing),1)
#n <- length(x) - firstnonmiss + 1
if (!is.null(xreg)) {
xreg <- `colnames<-`(cbind(drift = (1:h) + n, xreg),
make.unique(c("drift", if(is.null(colnames(xreg)) && !is.null(xreg)) rep("", NCOL(xreg)) else colnames(xreg))))
} else {
xreg <- `colnames<-`(as.matrix((1:h) + n), "drift")
}
}
# Check if data is constant
if (!is.null(object$constant)) {
if (object$constant) {
pred <- list(pred = rep(x[1], h), se = rep(0, h))
} else {
stop("Strange value of object$constant")
}
}
else if (usexreg) {
if (is.null(xreg)) {
stop("No regressors provided")
}
object$call$xreg <- getxreg(object)
if (NCOL(xreg) != NCOL(object$call$xreg)) {
stop("Number of regressors does not match fitted model")
}
if(!identical(colnames(xreg), colnames(object$call$xreg))){
warning("xreg contains different column names from the xreg used in training. Please check that the regressors are in the same order.")
}
pred <- predict(object, n.ahead = h, newxreg = xreg)
}
else {
pred <- predict(object, n.ahead = h)
}
# Fix time series characteristics if there are missing values at end of series, or if tsp is missing from pred
if (!is.null(x)) {
tspx <- tsp(x)
nx <- max(which(!is.na(x)))
if (nx != length(x) | is.null(tsp(pred$pred)) | is.null(tsp(pred$se))) {
tspx[2] <- time(x)[nx]
start.f <- tspx[2] + 1 / tspx[3]
pred$pred <- ts(pred$pred, frequency = tspx[3], start = start.f)
pred$se <- ts(pred$se, frequency = tspx[3], start = start.f)
}
}
# Compute prediction intervals
nint <- length(level)
if (bootstrap) # Compute prediction intervals using simulations
{
sim <- matrix(NA, nrow = npaths, ncol = h)
for (i in 1:npaths)
sim[i, ] <- simulate(object, nsim = h, bootstrap = TRUE, xreg = origxreg, lambda = lambda)
lower <- apply(sim, 2, quantile, 0.5 - level / 200, type = 8)
upper <- apply(sim, 2, quantile, 0.5 + level / 200, type = 8)
if (nint > 1L) {
lower <- t(lower)
upper <- t(upper)
}
else {
lower <- matrix(lower, ncol = 1)
upper <- matrix(upper, ncol = 1)
}
}
else { # Compute prediction intervals via the normal distribution
lower <- matrix(NA, ncol = nint, nrow = length(pred$pred))
upper <- lower
for (i in 1:nint) {
qq <- qnorm(0.5 * (1 + level[i] / 100))
lower[, i] <- pred$pred - qq * pred$se
upper[, i] <- pred$pred + qq * pred$se
}
if (!is.finite(max(upper))) {
warning("Upper prediction intervals are not finite.")
}
}
colnames(lower) <- colnames(upper) <- paste(level, "%", sep = "")
lower <- ts(lower)
upper <- ts(upper)
tsp(lower) <- tsp(upper) <- tsp(pred$pred)
method <- arima.string(object, padding = FALSE)
seriesname <- if (!is.null(object$series)) {
object$series
}
else if (!is.null(object$call$x)) {
object$call$x
}
else {
object$call$y
}
fits <- fitted.Arima(object)
if (!is.null(lambda) & is.null(object$constant)) { # Back-transform point forecasts and prediction intervals
pred$pred <- InvBoxCox(pred$pred, lambda, biasadj, pred$se^2)
if (!bootstrap) { # Bootstrapped intervals already back-transformed
lower <- InvBoxCox(lower, lambda)
upper <- InvBoxCox(upper, lambda)
}
}
return(structure(
list(
method = method, model = object, level = level,
mean = future_msts(x, pred$pred), lower = future_msts(x, lower),
upper = future_msts(x, upper), x = x, series = seriesname,
fitted = copy_msts(x, fits), residuals = copy_msts(x, residuals.Arima(object))
),
class = "forecast"
))
}
#' @export
forecast.forecast_ARIMA <- forecast.Arima
#' @rdname forecast.Arima
#' @export
forecast.ar <- function(object, h=10, level=c(80, 95), fan=FALSE, lambda=NULL,
bootstrap=FALSE, npaths=5000, biasadj=FALSE, ...) {
x <- getResponse(object)
pred <- predict(object, newdata = x, n.ahead = h)
if (bootstrap) # Recompute se using simulations
{
sim <- matrix(NA, nrow = npaths, ncol = h)
for (i in 1:npaths)
sim[i, ] <- simulate(object, nsim = h, bootstrap = TRUE)
pred$se <- apply(sim, 2, sd)
}
if (fan) {
level <- seq(51, 99, by = 3)
} else {
if (min(level) > 0 & max(level) < 1) {
level <- 100 * level
} else if (min(level) < 0 | max(level) > 99.99) {
stop("Confidence limit out of range")
}
}
nint <- length(level)
lower <- matrix(NA, ncol = nint, nrow = length(pred$pred))
upper <- lower
for (i in 1:nint) {
qq <- qnorm(0.5 * (1 + level[i] / 100))
lower[, i] <- pred$pred - qq * pred$se
upper[, i] <- pred$pred + qq * pred$se
}
colnames(lower) <- colnames(upper) <- paste(level, "%", sep = "")
method <- paste("AR(", object$order, ")", sep = "")
f <- frequency(x)
res <- residuals.ar(object)
fits <- fitted.ar(object)
if (!is.null(lambda)) {
pred$pred <- InvBoxCox(pred$pred, lambda, biasadj, list(level = level, upper = upper, lower = lower))
lower <- InvBoxCox(lower, lambda)
upper <- InvBoxCox(upper, lambda)
fits <- InvBoxCox(fits, lambda)
x <- InvBoxCox(x, lambda)
}
return(structure(
list(
method = method, model = object, level = level,
mean = future_msts(x, pred$pred),
lower = future_msts(x, lower),
upper = future_msts(x, upper), x = x, series = deparse(object$call$x),
fitted = copy_msts(x, fits), residuals = copy_msts(x, res)
)
, class = "forecast"
))
}
# Find xreg matrix in an Arima object
getxreg <- function(z) {
# Look in the obvious place first
if (is.element("xreg", names(z))) {
return(z$xreg)
}
# Next most obvious place
else if (is.element("xreg", names(z$coef))) {
return(eval.parent(z$coef$xreg))
}
# Now check under call
else if (is.element("xreg", names(z$call))) {
return(eval.parent(z$call$xreg))
}
# Otherwise check if it exists
else {
armapar <- sum(z$arma[1:4]) + is.element("intercept", names(z$coef))
npar <- length(z$coef)
if (npar > armapar) {
stop("It looks like you have an xreg component but I don't know what it is.\n Please use Arima() or auto.arima() rather than arima().")
} else { # No xreg used
return(NULL)
}
}
}
#' Errors from a regression model with ARIMA errors
#'
#' Returns time series of the regression residuals from a fitted ARIMA model.
#'
#' This is a deprecated function
#' which is identical to \code{\link{residuals.Arima}(object, type="regression")}
#' Regression residuals are equal to the original data
#' minus the effect of any regression variables. If there are no regression
#' variables, the errors will be identical to the original series (possibly
#' adjusted to have zero mean).
#'
#' @param object An object containing a time series model of class \code{Arima}.
#' @return A \code{ts} object
#' @author Rob J Hyndman
#' @seealso \code{\link{residuals.Arima}}.
#' @keywords ts
#'
#' @export
arima.errors <- function(object) {
message("Deprecated, use residuals.Arima(object, type='regression') instead")
residuals.Arima(object, type = "regression")
}
# Return one-step fits
#' h-step in-sample forecasts for time series models.
#'
#' Returns h-step forecasts for the data used in fitting the model.
#'
#' @param object An object of class "\code{Arima}", "\code{bats}",
#' "\code{tbats}", "\code{ets}" or "\code{nnetar}".
#' @param h The number of steps to forecast ahead.
#' @param ... Other arguments.
#' @return A time series of the h-step forecasts.
#' @author Rob J Hyndman & Mitchell O'Hara-Wild
#' @seealso \code{\link{forecast.Arima}}, \code{\link{forecast.bats}},
#' \code{\link{forecast.tbats}}, \code{\link{forecast.ets}},
#' \code{\link{forecast.nnetar}}, \code{\link{residuals.Arima}},
#' \code{\link{residuals.bats}}, \code{\link{residuals.tbats}},
#' \code{\link{residuals.ets}}, \code{\link{residuals.nnetar}}.
#' @keywords ts
#' @aliases fitted.forecast_ARIMA
#' @examples
#' fit <- ets(WWWusage)
#' plot(WWWusage)
#' lines(fitted(fit), col='red')
#' lines(fitted(fit, h=2), col='green')
#' lines(fitted(fit, h=3), col='blue')
#' legend("topleft", legend=paste("h =",1:3), col=2:4, lty=1)
#'
#' @export
fitted.Arima <- function(object, h = 1, ...) {
if (h == 1) {
x <- getResponse(object)
if (!is.null(object$fitted)) {
return(object$fitted)
}
else if (is.null(x)) {
# warning("Fitted values are unavailable due to missing historical data")
return(NULL)
}
else if (is.null(object$lambda)) {
return(x - object$residuals)
}
else {
fits <- InvBoxCox(BoxCox(x, object$lambda) - object$residuals, object$lambda, NULL, object$sigma2)
return(fits)
}
}
else {
return(hfitted(object = object, h = h, FUN = "Arima", ...))
}
}
#' @export
fitted.forecast_ARIMA <- fitted.Arima
# Calls arima from stats package and adds data to the returned object
# Also allows refitting to new data
# and drift terms to be included.
#' Fit ARIMA model to univariate time series
#'
#' Largely a wrapper for the \code{\link[stats]{arima}} function in the stats
#' package. The main difference is that this function allows a drift term. It
#' is also possible to take an ARIMA model from a previous call to \code{Arima}
#' and re-apply it to the data \code{y}.
#'
#' See the \code{\link[stats]{arima}} function in the stats package.
#'
#' @aliases print.ARIMA summary.Arima as.character.Arima
#'
#' @param y a univariate time series of class \code{ts}.
#' @param order A specification of the non-seasonal part of the ARIMA model:
#' the three components (p, d, q) are the AR order, the degree of differencing,
#' and the MA order.
#' @param seasonal A specification of the seasonal part of the ARIMA model,
#' plus the period (which defaults to frequency(y)). This should be a list with
#' components order and period, but a specification of just a numeric vector of
#' length 3 will be turned into a suitable list with the specification as the
#' order.
#' @param xreg Optionally, a numerical vector or matrix of external regressors, which
#' must have the same number of rows as y. It should not be a data frame.
#' @param include.mean Should the ARIMA model include a mean term? The default
#' is \code{TRUE} for undifferenced series, \code{FALSE} for differenced ones
#' (where a mean would not affect the fit nor predictions).
#' @param include.drift Should the ARIMA model include a linear drift term?
#' (i.e., a linear regression with ARIMA errors is fitted.) The default is
#' \code{FALSE}.
#' @param include.constant If \code{TRUE}, then \code{include.mean} is set to
#' be \code{TRUE} for undifferenced series and \code{include.drift} is set to
#' be \code{TRUE} for differenced series. Note that if there is more than one
#' difference taken, no constant is included regardless of the value of this
#' argument. This is deliberate as otherwise quadratic and higher order
#' polynomial trends would be induced.
#' @param method Fitting method: maximum likelihood or minimize conditional
#' sum-of-squares. The default (unless there are missing values) is to use
#' conditional-sum-of-squares to find starting values, then maximum likelihood.
#' @param model Output from a previous call to \code{Arima}. If model is
#' passed, this same model is fitted to \code{y} without re-estimating any
#' parameters.
#' @param x Deprecated. Included for backwards compatibility.
#' @param ... Additional arguments to be passed to \code{\link[stats]{arima}}.
#' @inheritParams forecast.ts
#' @return See the \code{\link[stats]{arima}} function in the stats package.
#' The additional objects returned are \item{x}{The time series data}
#' \item{xreg}{The regressors used in fitting (when relevant).}
#' \item{sigma2}{The bias adjusted MLE of the innovations variance.}
#'
#' @export
#'
#' @author Rob J Hyndman
#' @seealso \code{\link{auto.arima}}, \code{\link{forecast.Arima}}.
#' @keywords ts
#' @examples
#' library(ggplot2)
#' WWWusage %>%
#' Arima(order=c(3,1,0)) %>%
#' forecast(h=20) %>%
#' autoplot
#'
#' # Fit model to first few years of AirPassengers data
#' air.model <- Arima(window(AirPassengers,end=1956+11/12),order=c(0,1,1),
#' seasonal=list(order=c(0,1,1),period=12),lambda=0)
#' plot(forecast(air.model,h=48))
#' lines(AirPassengers)
#'
#' # Apply fitted model to later data
#' air.model2 <- Arima(window(AirPassengers,start=1957),model=air.model)
#'
#' # Forecast accuracy measures on the log scale.
#' # in-sample one-step forecasts.
#' accuracy(air.model)
#' # out-of-sample one-step forecasts.
#' accuracy(air.model2)
#' # out-of-sample multi-step forecasts
#' accuracy(forecast(air.model,h=48,lambda=NULL),
#' log(window(AirPassengers,start=1957)))
#'
Arima <- function(y, order=c(0, 0, 0), seasonal=c(0, 0, 0), xreg=NULL, include.mean=TRUE,
include.drift=FALSE, include.constant, lambda=model$lambda, biasadj=FALSE,
method=c("CSS-ML", "ML", "CSS"), model=NULL, x=y, ...) {
# Remove outliers near ends
# j <- time(x)
# x <- na.contiguous(x)
# if(length(j) != length(x))
# warning("Missing values encountered. Using longest contiguous portion of time series")
series <- deparse(substitute(y))
origx <- y
if (!is.null(lambda)) {
x <- BoxCox(x, lambda)
lambda <- attr(x, "lambda")
if (is.null(attr(lambda, "biasadj"))) {
attr(lambda, "biasadj") <- biasadj
}
}
if (!is.null(xreg)) {
if(!is.numeric(xreg))
stop("xreg should be a numeric matrix or a numeric vector")
xreg <- as.matrix(xreg)
if (is.null(colnames(xreg))) {
colnames(xreg) <- if (ncol(xreg) == 1) "xreg" else paste("xreg", 1:ncol(xreg), sep = "")
}
}
if (!is.list(seasonal)) {
if (frequency(x) <= 1) {
seasonal <- list(order = c(0, 0, 0), period = NA)
if(length(x) <= order[2L])
stop("Not enough data to fit the model")
} else {
seasonal <- list(order = seasonal, period = frequency(x))
if(length(x) <= order[2L] + seasonal$order[2L] * seasonal$period)
stop("Not enough data to fit the model")
}
}
if (!missing(include.constant)) {
if (include.constant) {
include.mean <- TRUE
if ((order[2] + seasonal$order[2]) == 1) {
include.drift <- TRUE
}
}
else {
include.mean <- include.drift <- FALSE
}
}
if ((order[2] + seasonal$order[2]) > 1 & include.drift) {
warning("No drift term fitted as the order of difference is 2 or more.")
include.drift <- FALSE
}
if (!is.null(model)) {
tmp <- arima2(x, model, xreg = xreg, method = method)
xreg <- tmp$xreg
tmp$fitted <- NULL
tmp$lambda <- model$lambda
}
else {
if (include.drift) {
xreg <- `colnames<-`(cbind(drift = 1:length(x), xreg),
make.unique(c("drift", if(is.null(colnames(xreg)) && !is.null(xreg)) rep("", NCOL(xreg)) else colnames(xreg))))
}
if (is.null(xreg)) {
suppressWarnings(tmp <- stats::arima(x = x, order = order, seasonal = seasonal, include.mean = include.mean, method = method, ...))
} else {
suppressWarnings(tmp <- stats::arima(x = x, order = order, seasonal = seasonal, xreg = xreg, include.mean = include.mean, method = method, ...))
}
}
# Calculate aicc & bic based on tmp$aic
npar <- length(tmp$coef[tmp$mask]) + 1
missing <- is.na(tmp$residuals)
firstnonmiss <- head(which(!missing),1)
lastnonmiss <- tail(which(!missing),1)
n <- sum(!missing[firstnonmiss:lastnonmiss])
nstar <- n - tmp$arma[6] - tmp$arma[7] * tmp$arma[5]
tmp$aicc <- tmp$aic + 2 * npar * (nstar / (nstar - npar - 1) - 1)
tmp$bic <- tmp$aic + npar * (log(nstar) - 2)
tmp$series <- series
tmp$xreg <- xreg
tmp$call <- match.call()
tmp$lambda <- lambda
tmp$x <- origx
# Adjust residual variance to be unbiased
if (is.null(model)) {
tmp$sigma2 <- sum(tmp$residuals ^ 2, na.rm = TRUE) / (nstar - npar + 1)
}
out <- structure(tmp, class = c("forecast_ARIMA", "ARIMA", "Arima"))
out$fitted <- fitted.Arima(out)
out$series <- series
return(out)
}
# Refits the model to new data x
arima2 <- function(x, model, xreg, method) {
use.drift <- is.element("drift", names(model$coef))
use.intercept <- is.element("intercept", names(model$coef))
use.xreg <- is.element("xreg", names(model$call))
sigma2 <- model$sigma2
if (use.drift) {
driftmod <- lm(model$xreg[, "drift"] ~ I(time(as.ts(model$x))))
newxreg <- driftmod$coefficients[1] + driftmod$coefficients[2] * time(as.ts(x))
if (!is.null(xreg)) {
origColNames <- colnames(xreg)
xreg <- cbind(newxreg, xreg)
colnames(xreg) <- c("drift", origColNames)
} else {
xreg <- as.matrix(data.frame(drift = newxreg))
}
use.xreg <- TRUE
}
if (!is.null(model$xreg)) {
if (is.null(xreg)) {
stop("No regressors provided")
}
if (ncol(xreg) != ncol(model$xreg)) {
stop("Number of regressors does not match fitted model")
}
}
if (model$arma[5] > 1 & sum(abs(model$arma[c(3, 4, 7)])) > 0) # Seasonal model
{
if (use.xreg) {
refit <- Arima(
x, order = model$arma[c(1, 6, 2)], seasonal = list(order = model$arma[c(3, 7, 4)], period = model$arma[5]),
include.mean = use.intercept, xreg = xreg, method = method, fixed = model$coef
)
} else {
refit <- Arima(
x, order = model$arma[c(1, 6, 2)], seasonal = list(order = model$arma[c(3, 7, 4)], period = model$arma[5]),
include.mean = use.intercept, method = method, fixed = model$coef
)
}
}
else if (length(model$coef) > 0) # Nonseasonal model with some parameters
{
if (use.xreg) {
refit <- Arima(x, order = model$arma[c(1, 6, 2)], xreg = xreg, include.mean = use.intercept, method = method, fixed = model$coef)
} else {
refit <- Arima(x, order = model$arma[c(1, 6, 2)], include.mean = use.intercept, method = method, fixed = model$coef)
}
}
else { # No parameters
refit <- Arima(x, order = model$arma[c(1, 6, 2)], include.mean = FALSE, method = method)
}
refit$var.coef <- matrix(0, length(refit$coef), length(refit$coef))
if (use.xreg) { # Why is this needed?
refit$xreg <- xreg
}
refit$sigma2 <- sigma2
return(refit)
}
# Modified version of function print.Arima from stats package
#' @export
print.forecast_ARIMA <- function(x, digits=max(3, getOption("digits") - 3), se=TRUE, ...) {
cat("Series:", x$series, "\n")
cat(arima.string(x, padding = FALSE), "\n")
if (!is.null(x$lambda)) {
cat("Box Cox transformation: lambda=", x$lambda, "\n")
}
# cat("\nCall:", deparse(x$call, width.cutoff=75), "\n", sep=" ")
# if(!is.null(x$xreg))
# {
# cat("\nRegression variables fitted:\n")
# xreg <- as.matrix(x$xreg)
# for(i in 1:3)
# cat(" ",xreg[i,],"\n")
# cat(" . . .\n")
# for(i in 1:3)
# cat(" ",xreg[nrow(xreg)-3+i,],"\n")
# }
if (length(x$coef) > 0) {
cat("\nCoefficients:\n")
coef <- round(x$coef, digits = digits)
if (se && NROW(x$var.coef)) {
ses <- rep.int(0, length(coef))
ses[x$mask] <- round(sqrt(diag(x$var.coef)), digits = digits)
coef <- matrix(coef, 1L, dimnames = list(NULL, names(coef)))
coef <- rbind(coef, s.e. = ses)
}
# Change intercept to mean if no regression variables
j <- match("intercept", colnames(coef))
if (is.null(x$xreg) & !is.na(j)) {
colnames(coef)[j] <- "mean"
}
print.default(coef, print.gap = 2)
}
cm <- x$call$method
cat("\nsigma^2 = ", format(x$sigma2, digits = digits), sep="")
if(!is.na(x$loglik))
cat(": log likelihood = ", format(round(x$loglik, 2L)), sep = "")
cat("\n")
if (is.null(cm) || cm != "CSS") {
if(!is.na(x$aic)) {
npar <- length(x$coef[x$mask]) + 1
missing <- is.na(x$residuals)
firstnonmiss <- head(which(!missing),1)
lastnonmiss <- tail(which(!missing),1)
n <- lastnonmiss - firstnonmiss + 1
nstar <- n - x$arma[6] - x$arma[7] * x$arma[5]
bic <- x$aic + npar * (log(nstar) - 2)
aicc <- x$aic + 2 * npar * (nstar / (nstar - npar - 1) - 1)
cat("AIC=", format(round(x$aic, 2L)), sep = "")
cat(" AICc=", format(round(aicc, 2L)), sep = "")
cat(" BIC=", format(round(bic, 2L)), "\n", sep = "")
}
}
invisible(x)
}
#' Return the order of an ARIMA or ARFIMA model
#'
#' Returns the order of a univariate ARIMA or ARFIMA model.
#'
#'
#' @param object An object of class \dQuote{\code{Arima}}, dQuote\code{ar} or
#' \dQuote{\code{fracdiff}}. Usually the result of a call to
#' \code{\link[stats]{arima}}, \code{\link{Arima}}, \code{\link{auto.arima}},
#' \code{\link[stats]{ar}}, \code{\link{arfima}} or
#' \code{\link[fracdiff]{fracdiff}}.
#' @return A numerical vector giving the values \eqn{p}, \eqn{d} and \eqn{q} of
#' the ARIMA or ARFIMA model. For a seasonal ARIMA model, the returned vector
#' contains the values \eqn{p}, \eqn{d}, \eqn{q}, \eqn{P}, \eqn{D}, \eqn{Q} and
#' \eqn{m}, where \eqn{m} is the period of seasonality.
#' @author Rob J Hyndman
#' @seealso \code{\link[stats]{ar}}, \code{\link{auto.arima}},
#' \code{\link{Arima}}, \code{\link[stats]{arima}}, \code{\link{arfima}}.
#' @keywords ts
#' @examples
#' WWWusage %>% auto.arima %>% arimaorder
#'
#' @export
arimaorder <- function(object) {
if (is.element("Arima", class(object))) {
order <- object$arma[c(1, 6, 2, 3, 7, 4, 5)]
names(order) <- c("p", "d", "q", "P", "D", "Q", "Frequency")
seasonal <- (order[7] > 1 & sum(order[4:6]) > 0)
if (seasonal) {
return(order)
} else {
return(order[1:3])
}
}
else if (is.element("ar", class(object))) {
return(c("p" = object$order, "d" = 0, "q" = 0))
}
else if (is.element("fracdiff", class(object))) {
return(c("p" = length(object$ar), "d" = object$d, "q" = length(object$ma)))
}
else {
stop("object not of class Arima, ar or fracdiff")
}
}
#' @export
as.character.Arima <- function(x, ...) {
arima.string(x, padding = FALSE)
}
#' @rdname is.ets
#' @export
is.Arima <- function(x) {
inherits(x, "Arima")
}
#' @rdname fitted.Arima
#' @export
fitted.ar <- function(object, ...) {
getResponse(object) - residuals(object)
}