| Line 7: |
Line 7: |
| | <syntaxhighlight lang="python3"> | | <syntaxhighlight lang="python3"> |
| | df = pd.read_csv('news_2019.05.10.csv') | | df = pd.read_csv('news_2019.05.10.csv') |
| | + | </syntaxhighlight> |
| | + | |
| | + | ==DataSeries== |
| | + | <syntaxhighlight lang="python3"> |
| | + | s = pd.Series(['banana', 42]) |
| | + | s = pd.Series(['banana', 42], index=['Fruit', 'Calories']) |
| | + | s.values |
| | + | s.keys() |
| | + | s.values[0] |
| | + | s.keys()[0] |
| | + | s.min() |
| | + | s.max() |
| | + | s.std() |
| | </syntaxhighlight> | | </syntaxhighlight> |
| | | | |
| | ==Dataframe== | | ==Dataframe== |
| | + | |
| | + | ===Create=== |
| | + | <syntaxhighlight lang="python3"> |
| | + | scientists = pd.DataFrame({ |
| | + | 'Name': ['Rosaline Franklin', 'William Gosset'], |
| | + | 'Occupation': ['Chemist', 'Statistician'], |
| | + | 'Born': ['1920-07-25', '1876-06-13'], |
| | + | 'Died': ['1958-04-16', '1937-10-16'], |
| | + | }) |
| | + | </syntaxhighlight>If you want to keep the order of columns:<syntaxhighlight lang="python3"> |
| | + | scientists = pd.DataFrame({ |
| | + | 'Occupation': ['Chemist', 'Statistician'], |
| | + | 'Born': ['1920-07-25', '1876-06-13'], |
| | + | 'Died': ['1958-04-16', '1937-10-16'], |
| | + | }, index=['Rosaline Franklin', 'William Gosset'], columns=['Occupation', 'Born', 'Died']) |
| | + | </syntaxhighlight> |
| | + | |
| | + | ===From CSV=== |
| | + | <syntaxhighlight lang="python3"> |
| | + | news = pd.read_csv('news_2019.05.10.csv') |
| | + | </syntaxhighlight> |
| | | | |
| | ===Info=== | | ===Info=== |
| Line 21: |
Line 55: |
| | df.tail() | | df.tail() |
| | df.info() | | df.info() |
| | + | df.describe() |
| | </syntaxhighlight> | | </syntaxhighlight> |
| | | | |
| | ===Select 1 row=== | | ===Select 1 row=== |
| | <syntaxhighlight lang="python3"> | | <syntaxhighlight lang="python3"> |
| − | df.iloc[[1]] | + | df.iloc[[1]] # for positional indexing |
| − | df.loc[0] | + | df.loc[0] # for label based |
| | df.iloc[-1] == df.loc[df.shape[0]-1] | | df.iloc[-1] == df.loc[df.shape[0]-1] |
| | + | </syntaxhighlight> |
| | + | |
| | + | ===Select specific rows=== |
| | + | <syntaxhighlight lang="python3"> |
| | + | df.loc[[9, 99, 999]] |
| | </syntaxhighlight> | | </syntaxhighlight> |
| | | | |
| Line 46: |
Line 86: |
| | <syntaxhighlight lang="python3"> | | <syntaxhighlight lang="python3"> |
| | df.iloc[1][1] | | df.iloc[1][1] |
| − | # Or
| |
| | df.iloc[1]['summary'] | | df.iloc[1]['summary'] |
| | + | df.iloc[1, 3] |
| | + | |
| | + | df.loc[1, 'summary'] |
| | + | </syntaxhighlight> |
| | + | |
| | + | ===Subset multiple rows and multiple columns=== |
| | + | <syntaxhighlight lang="python3"> |
| | + | df.iloc[[1,34,56],[2,4,5]] |
| | + | df.loc[[1,34,56],['modification_date', 'content']] |
| | </syntaxhighlight> | | </syntaxhighlight> |
| | [[Category:Python]] | | [[Category:Python]] |
| | + | [[Category:DataScience]] |