filtr 筛选,只对行,不对列,subset(),可以对行和列操作
> head(tips,3) total_bill tip sex smoker day time size1 16.99 1.01 Female No Sun Dinner 22 10.34 1.66 Male No Sun Dinner 33 21.01 3.50 Male No Sun Dinner 3> fil1 = filter(tips,tips$smoker =='No',tips$day == 'Sun');head(fil1) total_bill tip sex smoker day time size1 16.99 1.01 Female No Sun Dinner 22 10.34 1.66 Male No Sun Dinner 33 21.01 3.50 Male No Sun Dinner 34 23.68 3.31 Male No Sun Dinner 25 24.59 3.61 Female No Sun Dinner 46 25.29 4.71 Male No Sun Dinner 4
slice 切片,行
> s1 = slice(tips,1:5);s1 total_bill tip sex smoker day time size1 16.99 1.01 Female No Sun Dinner 22 10.34 1.66 Male No Sun Dinner 33 21.01 3.50 Male No Sun Dinner 34 23.68 3.31 Male No Sun Dinner 25 24.59 3.61 Female No Sun Dinner 4
select ,选择列
> s2 = select(tips,tip,sex,day);head(s2,3) tip sex day1 1.01 Female Sun2 1.66 Male Sun3 3.50 Male Sun> s3 = select(tips,tip:time);head(s3,3) tip sex smoker day time1 1.01 Female No Sun Dinner2 1.66 Male No Sun Dinner3 3.50 Male No Sun Dinner> s4 = select(tips,2:5);head(s4,3) tip sex smoker day1 1.01 Female No Sun2 1.66 Male No Sun3 3.50 Male No Sun
arrange()
> new_tips = arrange(tips,total_bill,tip);head(new_tips) #默认升序 total_bill tip sex smoker day time size68 3.07 1.00 Female Yes Sat Dinner 193 5.75 1.00 Female Yes Fri Dinner 2112 7.25 1.00 Female No Sat Dinner 1173 7.25 5.15 Male Yes Sun Dinner 2150 7.51 2.00 Male No Thur Lunch 2196 7.56 1.44 Male No Thur Lunch 2> new_tips = arrange(tips,desc(total_bill),tip);head(new_tips) #降序的操作 total_bill tip sex smoker day time size171 50.81 10.00 Male Yes Sat Dinner 3213 48.33 9.00 Male No Sat Dinner 460 48.27 6.73 Male No Sat Dinner 4157 48.17 5.00 Male No Sun Dinner 6183 45.35 3.50 Male Yes Sun Dinner 3103 44.30 2.50 Female Yes Sat Dinner 3
rename 重新命名 列
> new_tips = rename(tips,bill = total_bill);head(new_tips) bill tip sex smoker day time size1 16.99 1.01 Female No Sun Dinner 22 10.34 1.66 Male No Sun Dinner 33 21.01 3.50 Male No Sun Dinner 34 23.68 3.31 Male No Sun Dinner 25 24.59 3.61 Female No Sun Dinner 46 25.29 4.71 Male No Sun Dinner 4
distinct(data,variable) 将数据集里的列生成因子
> distinct(tips,day) day1 Sun20 Sat78 Thur91 Fri> distinct(tips,sex) sex1 Female2 Male
mutate add new column
> m1 = mutate(tips,rate = tip/total_bill);head(m1) total_bill tip sex smoker day time size rate1 16.99 1.01 Female No Sun Dinner 2 0.059446732 10.34 1.66 Male No Sun Dinner 3 0.160541593 21.01 3.50 Male No Sun Dinner 3 0.166587344 23.68 3.31 Male No Sun Dinner 2 0.139780415 24.59 3.61 Female No Sun Dinner 4 0.146807656 25.29 4.71 Male No Sun Dinner 4 0.18623962> > m1 = mutate(tips,rate = tip/total_bill,new_rate = rate*100);head(m1)#可以同时生成rate和new_rate total_bill tip sex smoker day time size rate1 16.99 1.01 Female No Sun Dinner 2 0.059446732 10.34 1.66 Male No Sun Dinner 3 0.160541593 21.01 3.50 Male No Sun Dinner 3 0.166587344 23.68 3.31 Male No Sun Dinner 2 0.139780415 24.59 3.61 Female No Sun Dinner 4 0.146807656 25.29 4.71 Male No Sun Dinner 4 0.18623962 new_rate1 5.9446732 16.0541593 16.6587344 13.9780415 14.6807656 18.623962
transform cannot creat new variable like mutate for example
> transform(tips,rate = tip/total_bill,new_rate = rate*100) # error cannot find rateError in eval(substitute(list(...)), `_data`, parent.frame()) : 找不到对象'rate'
summarise 可以
head(summarize(tips,rate = tip/total_bill) ,3)# head(summarize(tips,rate = tip/total_bill,new_rate = rate*100) ,3)
sample random choose num
> sample_n(iris,size = 3) #随机抽出行 Sepal.Length Sepal.Width Petal.Length Petal.Width Species1 6.5 2.8 4.6 1.5 versicolor2 5.1 3.8 1.6 0.2 setosa3 6.0 2.9 4.5 1.5 versicolor> sample_frac(iris,size = 0.01) #按百分比随机抽出行 Sepal.Length Sepal.Width Petal.Length Petal.Width Species1 6.2 2.9 4.3 1.3 versicolor2 6.8 3.2 5.9 2.3 virginica
%>% pip
> result = tips %>% group_by(smoker,sex) %>% summarise(count = n(),mean_tips = mean(tip),+ sd_bill = sd(total_bill));result`summarise()` regrouping output by 'smoker' (override with `.groups` argument)# A tibble: 4 x 5# Groups: smoker [2] smoker sex count mean_tips sd_bill <fct> <fct> <int> <dbl> <dbl>1 No Female 54 2.77 7.292 No Male 97 3.11 8.733 Yes Female 33 2.93 9.194 Yes Male 60 3.05 9.91
join join two data.frame
inner_join
semi_join
anit_join
left_join
right_join
> df1 = data.frame(x = letters[1:8],y = 1:8);df1 x y1 a 12 b 23 c 34 d 45 e 56 f 67 g 78 h 8> df2 = data.frame(x = c("a","b","c"),z = 10:12);df2 x z1 a 102 b 113 c 12> inner_join(df1,df2,by = 'x') #jiao ji x y z1 a 1 102 b 2 113 c 3 12> semi_join(df1,df2,by = "x") #只取df1中在df2中出现的 x y1 a 12 b 23 c 3> anti_join(df1,df2,by = "x") #只取df1不在df2出现的 x y1 d 42 e 53 f 64 g 75 h 8> left_join(df1,df2,by = "x") #按照左侧数据库结合,没有的就自动填充NA x y z1 a 1 102 b 2 113 c 3 124 d 4 NA5 e 5 NA6 f 6 NA7 g 7 NA8 h 8 NA> right_join(df1,df2,by = "x") x y z1 a 1 102 b 2 113 c 3 12