| Line 37: |
Line 37: |
| | summarize(cran, avg_bytes = mean(size)) | | summarize(cran, avg_bytes = mean(size)) |
| | | | |
| | + | pack_sum <- summarize(by_package, |
| | + | count = n(), |
| | + | unique = n_distinct(ip_id), |
| | + | countries = n_distinct(country), |
| | + | avg_bytes = mean(size)) |
| | | | |
| | + | quantile(pack_sum$count, probs = 0.99) |
| | + | top_counts<-filter(pack_sum, count > 679) |
| | + | View(top_counts) |
| | + | top_counts_sorted<-arrange(top_counts, desc(count)) |
| | + | View(top_counts_sorted) |
| | + | |
| | + | # group_by() |
| | + | by_package <- group_by(cran, package) |
| | + | summarize(by_package, mean(size)) |
| | + | </source> |
| | + | |
| | + | Rank packages. |
| | + | <source lang="rsplus"> |
| | + | result2 <- |
| | + | arrange( |
| | + | filter( |
| | + | summarize(group_by(cran, package), |
| | + | count = n(), |
| | + | unique = n_distinct(ip_id), |
| | + | countries = n_distinct(country), |
| | + | avg_bytes = mean(size) |
| | + | ), |
| | + | countries > 60 |
| | + | ), desc(countries), avg_bytes |
| | + | ) |
| | + | |
| | + | print(result2) |
| | + | </source> |
| | + | |
| | + | With pipelines |
| | + | <source lang="rsplus"> |
| | + | result3 <- |
| | + | cran %>% |
| | + | group_by(package) %>% |
| | + | summarize(count = n(), |
| | + | unique = n_distinct(ip_id), |
| | + | countries = n_distinct(country), |
| | + | avg_bytes = mean(size) |
| | + | ) %>% |
| | + | filter(countries > 60) %>% |
| | + | arrange(desc(countries), avg_bytes) |
| | + | |
| | + | # Print result to console |
| | + | print(result3) |
| | </source> | | </source> |