取列 只认列名,不能用下标```r
library(data.table)
各列长度不一,可自动补齐
dt = data.table(v1 = c(1,2),v2 = LETTERS[1:3],v3 = round(rnorm(12,2,2)),
- v4 = sample(1:20,12));dt v1 v2 v3 v4 1: 1 A 3 20 2: 2 B 0 10 3: 1 C 1 11 4: 2 A 2 9 5: 1 B 2 16 6: 2 C 2 7 7: 1 A -1 17 8: 2 B 2 12 9: 1 C 1 3 10: 2 A 1 18 11: 1 B 2 4 12: 2 C 4 19 dt[3:6] #取行 v1 v2 v3 v4 1: 1 C 1 11 2: 2 A 2 9 3: 1 B 2 16 4: 2 C 2 7 dt[v2 ==’B’] v1 v2 v3 v4 1: 2 B 0 10 2: 1 B 2 16 3: 2 B 2 12 4: 1 B 2 4 dt[v2 ==’B’,] #取行,加不加逗号,结果一样 v1 v2 v3 v4 1: 2 B 0 10 2: 1 B 2 16 3: 2 B 2 12 4: 1 B 2 4 dt[v2 %in% c(“A”,”B”)] #返回有AB的行 v1 v2 v3 v4 1: 1 A 3 20 2: 2 B 0 10 3: 2 A 2 9 4: 1 B 2 16 5: 1 A -1 17 6: 2 B 2 12 7: 2 A 1 18 8: 1 B 2 4
取列 只认列名,不能用下标<a name="d41d8cd9"></a>
#
```r
# 取列 只认列名,不能用下标
dt[,list(v1,v2)]
v1 v2
1: 1 A
2: 2 B
3: 1 C
4: 2 A
5: 1 B
6: 2 C
7: 1 A
8: 2 B
9: 1 C
10: 2 A
11: 1 B
12: 2 C
dt[,v1]
[1] 1 2 1 2 1 2 1 2 1 2 1 2
dt[,sum(v4)] #取列的同时同时对其操作,sum mean
[1] 146
dt[,list(sum_v3 = sum(v3),mean_v4 = mean(v4))]
sum_v3 mean_v4
1: 19 12.16667
dt[,.(sum_v3 = sum(v3),mean_v4 = mean(v4))] #另一种方法
sum_v3 mean_v4
1: 19 12.16667
> dt1 = dt[,list(v5 = v4 + 1,v6 = v3 +1)];dt1
v5 v6
1: 21 4
2: 11 1
3: 12 2
4: 10 3
5: 17 3
6: 8 3
7: 18 0
8: 13 3
9: 4 2
10: 19 2
11: 5 3
12: 20 5
> dt[,list(print(v2),plot(1:12,v3,col = 'red'))]
[1] "A" "B" "C" "A" "B" "C" "A" "B" "C" "A" "B" "C"
V1
1: A
2: B
3: C
4: A
5: B
6: C
7: A
8: B
9: C
10: A
11: B
12: C
> dt[,{print(v2);plot(1:12,v3,col = 'red')}]
[1] "A" "B" "C" "A" "B" "C" "A" "B" "C" "A" "B" "C"
NULL
作图一样,但是后面那个出现了一个null,不懂
> dt[,list(sum_v3 = sum(v3),mean_v4 = mean(v4)),by = v2] # 根据V2计算
v2 sum_v3 mean_v4
1: A 5 16.0
2: B 6 10.5
3: C 8 10.0
> dt[,list(sum_v3 = sum(v3),mean_v4 = mean(v4)),by = list(v2,v1)]
v2 v1 sum_v3 mean_v4
1: A 1 2 18.5
2: B 2 2 11.0
3: C 1 2 7.0
4: A 2 3 13.5
5: B 1 4 10.0
6: C 2 6 13.0
> dt[1:6,list(sum_v3 = sum(v3),mean_v4 = mean(v4)),by = v2]
v2 sum_v3 mean_v4
1: A 5 14.5
2: B 2 13.0
3: C 3 9.0
> dt[,.N,by =list(v1,v2)] #N计算频数
v1 v2 N
1: 1 A 2
2: 2 B 2
3: 1 C 2
4: 2 A 2
5: 1 B 2
6: 2 C 2
> # 增加列 :=特殊符号
> dt[,v5 := v4+1];head(dt)
v1 v2 v3 v4 v5 v6
1: 1 A 3 20 21 21
2: 2 B 0 10 11 11
3: 1 C 1 11 12 12
4: 2 A 2 9 10 10
5: 1 B 2 16 17 17
6: 2 C 2 7 8 8
> #增加两列
> dt[,c("v5","v6") := list(v3 +1,v4+1)] ;head(dt)
v1 v2 v3 v4 v5 v6
1: 1 A 3 20 4 21
2: 2 B 0 10 1 11
3: 1 C 1 11 2 12
4: 2 A 2 9 3 10
5: 1 B 2 16 3 17
6: 2 C 2 7 3 8
> setkey(dt,v2) #类似attach(data) 会改变作用环境,慎用
> dt[c("A","B")] #直接将作用环境设置到了V2
v1 v2 v3 v4 v5 v6
1: 1 A 3 20 4 21
2: 2 A 2 9 3 10
3: 1 A -1 17 0 18
4: 2 A 1 18 2 19
5: 2 B 0 10 1 11
6: 1 B 2 16 3 17
7: 2 B 2 12 3 13
8: 1 B 2 4 3 5
> # nomatch
> dt[c("A","D"),nomatch = 0] #未匹配到的不会显示为NA
v1 v2 v3 v4 v5 v6
1: 1 A 3 20 4 21
2: 2 A 2 9 3 10
3: 1 A -1 17 0 18
4: 2 A 1 18 2 19
> dt[c("A","D")]
v1 v2 v3 v4 v5 v6
1: 1 A 3 20 4 21
2: 2 A 2 9 3 10
3: 1 A -1 17 0 18
4: 2 A 1 18 2 19
5: NA D NA NA NA NA
仍需要研究
> dt[,list(sum_v4 = sum(v4)),by = v2][sum_v4 >20]
v2 sum_v4
1: A 47
2: B 37