R包 - R包_lubridate和时间 - 《C020101_R》

时间
Lubridate包
补全日期
计算时间差 difftime更好用
时间加法
时间序列
- ts（）生成时间序列
实例
xts包
lag 是指将前后的值赋值给后面，diff是指减去前一个时间
- 实例:co2数据集
- Arima建模，forecast预测

时间

as.Date(‘2017-02-16’,format = ) format是指定前面时间的格式

%m :数字月份
%b:jan
%B:july
%Y:四位数年份
%y:两位数年份

%a Mon

%A Monday

> as.Date('2/16/2017',format = '%m/%d/%Y')
[1] "2017-02-16"
> as.Date('March 10, 1993', format = '%B %d, %Y')
[1] NA
> as.Date('10Mar93','%d%b%y')　＃不存在的日期返回ＮＡ
[1] NA
> as.Date(100, origin = '2017-02-16') #指定开始时间，后面加天数
[1] "2017-05-27"

ISOdate

> ISOdate(1993,3,10,16,14,20)
[1] "1993-03-10 16:14:20 GMT"

　POSIX

> dts <- c('2005-10-21 18:24:24', "2017-02-16 19:20:20")
> as.POSIXlt(dts)
[1] "2005-10-21 18:24:24 CST" "2017-02-16 19:20:20 CST"

strptime() and strftime()

> time <- strptime('04/3/2016:16:18:34', format = '%d/%m/%Y:%H:%M:%S');time
[1] "2016-03-04 16:18:34 CST"
> strftime(time, 'Now is %H:%M on %Y %m %d')
[1] "Now is 16:18 on 2016 03 04"

julian 计算天数

> julian(as.Date('2017-02-16'), origin = as.Date('2016-02-19'))
[1] 363
attr(,"origin")
[1] "2016-02-19"
> julian(as.Date('2017-02-16'))  #默认是1970-01-01
[1] 17213
attr(,"origin")
[1] "1970-01-01"

difftime 计算时间差，mean计算中间时间，dateline生成间隔时间

> difftime(as.Date('2017-02-16'),as.Date('2016-02-19'),units = 'weeks')
Time difference of 51.85714 weeks
> difftime(as.Date(now()),as.Date("1987-07-13"),units = "day")
Time difference of 12241 days
> difftime(as.Date("2067-07-13"),as.Date("1987-07-13"),units = "day")
Time difference of 29220 days
> mean(c(as.Date('2017-02-16'),as.Date('2016-02-19')))
[1] "2016-08-18"
> dateline <- seq(as.Date('2016-02-10'), by = '2 weeks', length.out = 10);dateline
 [1] "2016-02-10" "2016-02-24" "2016-03-09" "2016-03-23" "2016-04-06"
 [6] "2016-04-20" "2016-05-04" "2016-05-18" "2016-06-01" "2016-06-15"
> dateline <- seq(as.Date('2016-02-10'), by = '2 days', length.out = 10);dateline
 [1] "2016-02-10" "2016-02-12" "2016-02-14" "2016-02-16" "2016-02-18"
 [6] "2016-02-20" "2016-02-22" "2016-02-24" "2016-02-26" "2016-02-28"

用stringi处理时间

> library(stringi)
> (my_newtime <- stri_datetime_add(as.Date('2017-02-16'), value = 10, units = 'days'))
[1] "2017-02-26 08:00:00 CST"
> stri_datetime_create(2014,4,20)
[1] "2014-04-20 12:00:00 CST"
> stri_datetime_parse(c('2015-02-27','2015-02-29'), 'yyyy-MM-dd')
[1] "2015-02-27 20:01:51 CST" NA
> stri_datetime_parse(c('2015-02-27','2015-02-29'), 'yyyy-MM-dd', lenient = TRUE) #lenient = TRUE不存在该时间时，自动往后生成正确的时间
[1] "2015-02-27 20:00:17 CST" "2015-03-01 20:00:17 CST"
> stri_datetime_parse('DEC-29','LLL-dd')  #这个函数现在不能识别了MMM 和LLL
[1] NA
> stri_datetime_parse('02-20','MM-dd')
[1] "2021-02-20 20:00:18 CST"
> stri_datetime_parse('DEC-05','MMM-dd')
[1] NA
> stri_datetime_format(stri_datetime_now(), 'datetime_relative_medium')
[1] "今天 下午8:00:20"

Lubridate包

ymd（a）直接将a转换年月日格式

myd（a）直接将a转换月年日格式

ymd_hms 加了时分秒

month，day ，year可以直接提取其中的月日年，label是标签，abbr是简称

mday返回在月份的顺序，wday返回在星期中的顺序

now()返回现在的时间

make_datetime()返回最初的原时间1970-1-1

make_date（year，month，day）生成时间

round_date（）生成近似时间，不是四舍五入，是看哪个比较近

> library(lubridate)
> ymd('020217')
[1] "2002-02-17"
> mdy('06182016')
[1] "2016-06-18"
> x <- c('2009s01s01','2009-01-02','2009 01 03','2009,1,4','09.1.1','leopard 09 12 09', '!!09 ## 12 $$ 12')
> x_time <- ymd(x);x_time
[1] "2009-01-01" "2009-01-02" "2009-01-03" "2009-01-04" "2009-01-01"
[6] "2009-12-09" "2009-12-12"
> ymd_hms('2017 02 19 14 23 23')
[1] "2017-02-19 14:23:23 UTC"
> month(x_time, label = T, abbr = F)
[1] 一月   一月   一月   一月   一月   十二月 十二月
12 Levels: 一月 < 二月 < 三月 < 四月 < 五月 < 六月 < 七月 < ... < 十二月
> month(x_time, label = T, abbr = T)
[1]  1  1  1  1  1 12 12
Levels:  1 <  2 <  3 <  4 <  5 <  6 <  7 <  8 <  9 < 10 < 11 < 12
> month(x_time, label = F)
[1]  1  1  1  1  1 12 12
> day(x_time)
[1]  1  2  3  4  1  9 12
> mday(x_time)
[1]  1  2  3  4  1  9 12
> wday(x_time)
[1] 5 6 7 1 5 4 7
> new_date <- now();new_date
[1] "2021-01-16 20:32:19 CST"
> month(new_date) <- 12
> new_date
[1] "2021-12-16 20:32:19 CST"
> dates <- make_date(year = 2010:2016,month = 1:3,day = 1:5)
> dates
[1] "2010-01-01" "2011-02-02" "2012-03-03" "2013-01-04" "2014-02-05"
[6] "2015-03-01" "2016-01-02"
> make_datetime()
[1] "1970-01-01 UTC"
> x_time <- as.POSIXlt('2009-08-03 12:01:59');x_time
> round_date(x_time, unit = 'minutes')
[1] "2009-08-03 12:02:00 CST"
> round_date(x_time, unit = 'day')
[1] "2009-08-04 CST"
> round_date(x_time, unit = 'halfyear')
[1] "2009-07-01 CST"
> time_t <- c('2017-02','201609','2017/5')

补全日期

> time_t <- c('2017-02','201609','2017/5')
> ymd(time_t)
[1] NA NA NA
Warning message:
All formats failed to parse. No formats found. 
> ymd(time_t,truncated = 1) # 补全日期，1和2没有区别
[1] "2017-02-01" "2016-09-01" "2017-05-01"
> ymd(time_t,truncated = 2)
[1] "2017-02-01" "2016-09-01" "2017-05-01"

计算时间差 difftime更好用

> time_tt <- ymd('1900,01,01','1999,12,31')
> int <- interval(start = ymd('1900,01,01'),end = ymd('1999,12,31'));int
[1] 1900-01-01 UTC--1999-12-31 UTC
> time_length(int,unit = 'day')
[1] 36523
> time_length(int,unit = 'year')
[1] 99.99726

时间加法

> x <- as.POSIXlt('2017-02-03');x
[1] "2017-02-03 CST"
> x + days(10) + hours(12) + minutes(30)
[1] "2017-02-13 12:30:00 CST"

时间序列

ts（）生成时间序列

> #ts（）生成时间序列
> ts(1:10, frequency = 4, start = c(1999, 2),end = c(2001,3)) #4个月为间隔
     Qtr1 Qtr2 Qtr3 Qtr4
1999         1    2    3
2000    4    5    6    7
2001    8    9   10     
> ts(1:10, frequency = 4, start = c(1999, 5),end = c(2001,3)) #4个月为间隔，注意5
     Qtr1 Qtr2 Qtr3 Qtr4
2000    1    2    3    4
2001    5    6    7     
> ts(1:10, frequency = 12, start = c(1999, 4),end = c(2001,3))#12个月为间隔
     Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
1999               1   2   3   4   5   6   7   8   9
2000  10   1   2   3   4   5   6   7   8   9  10   1
2001   2   3   4                                    
> ts(1:10, frequency = 12, start = c(1999, 13),end = c(2001,3))#12个月为间隔 注意13
     Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
2000   1   2   3   4   5   6   7   8   9  10   1   2
2001   3   4   5 

> ts(1:10, frequency = 12, start = c(1999, 13))
     Jan Feb Mar Apr May Jun Jul Aug Sep Oct
2000   1   2   3   4   5   6   7   8   9  10

实例

value 按照start开始的时间到end结束的时间，频率是每天，生成一个数，数有sample随机生成
date 生成时间序列
df 将时间和数值组成数据框

> value <- ts(data = sample(0:300,366,replace = TRUE),start = as.Date('2016-01-01'), frequency = 1,end = as.Date('2016-12-31'))
> date <- seq(from = as.Date('2016-01-01'),by = 1, length.out = 366)
> df <- data.frame(value = value, time = date)
> plot(ts(cumsum(1+round(rnorm(100),2)), start = c(1954,7), frequency = 12)) #cumsum累积和，后面一个数为前面俩个数的和
> plot(value)

xts包

> library(xts)
> value <- sample(0:100, 365, replace = T)
> times <- seq(from = as.Date('2017-01-01'), by = 1, length = 365)
> myts <- xts(value, times)
> head(myts)
           [,1]
2017-01-01   64
2017-01-02   45
2017-01-03   22
2017-01-04   65
2017-01-05    6
2017-01-06    9
> window(myts, start = as.Date('2017-01-10'), end = as.Date('2017-01-15')) #取值
           [,1]
2017-01-10   42
2017-01-11   14
2017-01-12   90
2017-01-13   12
2017-01-14   18
2017-01-15   21
> window(myts, start = as.Date('2017-01-1'), end = as.Date('2017-01-6')) <- 1:6 #赋值
> head(myts)
           [,1]
2017-01-01    1
2017-01-02    2
2017-01-03    3
2017-01-04    4
2017-01-05    5
2017-01-06    6

lag 是指将前后的值赋值给后面，diff是指减去前一个时间

> myts <- xts(value, times)
> head(myts)
           [,1]
2017-01-01   64
2017-01-02   45
2017-01-03   22
2017-01-04   65
2017-01-05    6
2017-01-06    9
> head(lag(myts))
           [,1]
2017-01-01   NA
2017-01-02   64
2017-01-03   45
2017-01-04   22
2017-01-05   65
2017-01-06    6
> head(diff(myts))
           [,1]
2017-01-01   NA
2017-01-02  -19
2017-01-03  -23
2017-01-04   43
2017-01-05  -59
2017-01-06    3

实例:co2数据集

>   data("co2") #co2数据集
> class(co2)
[1] "ts"
> head(co2)
[1] 315.42 316.31 316.50 317.56 318.13 318.00
> length(co2)
[1] 468
> training <- co2[1:400]
> ts_training <- ts(training, start = start(co2), frequency = frequency(co2))
> plot(ts_training)
> de_co2 <- decompose(ts_training)
> plot(de_co2)

> library(tseries)
> training <- co2[1:400]
> ts_training <- ts(training, start = start(co2), frequency = frequency(co2))

> kpss.test(ts_training)

    KPSS Test for Level Stationarity

data:  ts_training
KPSS Level = 6.6342, Truncation lag parameter = 5, p-value = 0.01

Warning message:
In kpss.test(ts_training) : p-value smaller than printed p-value

> ts_training_diff <- diff(ts_training)


> kpss.test(ts_training_diff)

    KPSS Test for Level Stationarity

data:  ts_training_diff
KPSS Level = 0.023192, Truncation lag parameter = 5, p-value = 0.1

Warning message:
In kpss.test(ts_training_diff) : p-value greater than printed p-value

> plot(ts_training_diff)

acf(ts_training)
pacf(ts_training)

R包_lubridate和时间 - 图5 R包_lubridate和时间 - 图6

Arima建模，forecast预测

> library(forecast)
# ARIMA(P,D,Q)
> co2_fit <- Arima(ts_training, order = c(1,1,1), 
+                  seasonal = list(order = c(1,1,1), period = 12))
> co2_fore <- forecast(co2_fit, 68)
> plot(co2, col = 'red')
> par(new = TRUE)

> par(new = TRUE)
> plot(co2_fore)