R: dplyr package

From RHS Wiki
Revision as of 23:35, 22 April 2015 by Rafahsolis (talk | contribs) (Protected "R: dplyr package" ([Edit=Allow only administrators] (indefinite) [Move=Allow only administrators] (indefinite)))
Jump to navigation Jump to search
library(dplyr)
packageVersion("dplyr")
mydf <- read.csv(path2csv, stringsAsFactors=FALSE)
cran <- tbl_df(mydf)

# fundamental data manipulation tasks: select(), filter(), arrange(), mutate(), and summarize()

# select()
select(cran, ip_id, package, country)
select(cran, r_arch:country)
select(cran, country:r_arch)

# Omit columns
select(cran, -time)
select(cran, -(X:size))

# filter()
filter(cran, package == "swirl")
filter(cran, r_version == "3.1.1", country == "US")
filter(cran, country == "US" | country == "IN")
filter(cran, size > 100500, r_os == "linux-gnu")
filter(cran, !is.na(r_version))

# arrange()
cran2 <- select(cran, size:ip_id)
arrange(cran2, ip_id)
arrange(cran2, desc(ip_id))
arrange(cran2, package, ip_id)
arrange(cran2, country, desc(r_version), ip_id)

# mutate()
mutate(cran3, size_mb = size / 2^20)
mutate(cran3, size_mb = size / 2^20, size_gb = size_mb / 2^10)

# summarize()
summarize(cran, avg_bytes = mean(size))