| Line 51: |
Line 51: |
| | mutate(class= extract_numeric(class)) %>% | | mutate(class= extract_numeric(class)) %>% |
| | print | | print |
| | + | </source> |
| | + | Tidy multiple observational units in one table (id, name, sex repeated...): |
| | + | <nowiki> |
| | + | > students4 |
| | + | id name sex class midterm final |
| | + | 1 168 Brian F 1 B B |
| | + | 2 168 Brian F 5 A C |
| | + | 3 588 Sally M 1 A C |
| | + | 4 588 Sally M 3 B C |
| | + | 5 710 Jeff M 2 D E |
| | + | 6 710 Jeff M 4 A C |
| | + | 7 731 Roger F 2 C A |
| | + | 8 731 Roger F 5 B A |
| | + | 9 908 Karen M 3 C C |
| | + | 10 908 Karen M 4 A A</nowiki> |
| | + | To solve this will breake the dataset in two:<br /> |
| | + | (id, name, and sex)<br /> |
| | + | (id, class, midterm, final)<br /> |
| | + | <source lang="rsplus"> |
| | + | student_info <- students4 %>% |
| | + | select(id, name, sex) %>% |
| | + | unique() %>% |
| | + | print |
| | + | gradebook <- students4 %>% |
| | + | select(id, class, midterm, final) %>% |
| | + | print |
| | + | </source> |
| | + | If a single observation is stored in multiple tables: |
| | + | <nowiki> |
| | + | > passed |
| | + | name class final |
| | + | 1 Brian 1 B |
| | + | 2 Roger 2 A |
| | + | 3 Roger 5 A |
| | + | 4 Karen 4 A |
| | + | |
| | + | > failed |
| | + | name class final |
| | + | 1 Brian 5 C |
| | + | 2 Sally 1 C |
| | + | 3 Sally 3 C |
| | + | 4 Jeff 2 E |
| | + | 5 Jeff 4 C |
| | + | 6 Karen 3 C</nowiki> |
| | + | |
| | + | <source lang="rsplus"> |
| | + | # first we need to add to each table the variable passed or failed accordingly. |
| | + | passed <- mutate(passed, status="passed") |
| | + | failed <- mutate(failed, status="failed") |
| | + | </source> |
| | + | |
| | + | Example: dataset -> http://research.collegeboard.org/programs/sat/data/cb-seniors-2013 |
| | + | <source lang="rsplus"> |
| | + | # Load dataset in variable sat |
| | + | sat %>% |
| | + | select(-contains("total")) %>% |
| | + | gather(part_sex, count, -score_range) %>% |
| | + | separate(part_sex, c("part", "sex")) %>% |
| | + | ### <Your call to group_by()> %>% |
| | + | mutate(###, |
| | + | ### |
| | + | ) %>% print |
| | + | |
| | + | # The same adding total and prop |
| | + | sat %>% |
| | + | select(-contains("total")) %>% |
| | + | gather(part_sex, count, -score_range) %>% |
| | + | separate(part_sex, c("part", "sex")) %>% |
| | + | group_by(part, sex) %>% |
| | + | mutate(total = sum(count), |
| | + | prop = count/total |
| | + | ) %>% print |
| | </source> | | </source> |