Difference between revisions of "Pandas"
Jump to navigation
Jump to search
Rafahsolis (talk | contribs) m (→Info) Tag: visualeditor |
Rafahsolis (talk | contribs) m Tag: visualeditor |
||
| (11 intermediate revisions by the same user not shown) | |||
| Line 7: | Line 7: | ||
<syntaxhighlight lang="python3"> | <syntaxhighlight lang="python3"> | ||
df = pd.read_csv('news_2019.05.10.csv') | df = pd.read_csv('news_2019.05.10.csv') | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | ==DataSeries== | ||
| + | <syntaxhighlight lang="python3"> | ||
| + | s = pd.Series(['banana', 42]) | ||
| + | s = pd.Series(['banana', 42], index=['Fruit', 'Calories']) | ||
| + | s.values | ||
| + | s.keys() | ||
| + | s.values[0] | ||
| + | s.keys()[0] | ||
| + | s.min() | ||
| + | s.max() | ||
| + | s.std() | ||
</syntaxhighlight> | </syntaxhighlight> | ||
==Dataframe== | ==Dataframe== | ||
| + | |||
| + | ===Create=== | ||
| + | <syntaxhighlight lang="python3"> | ||
| + | scientists = pd.DataFrame({ | ||
| + | 'Name': ['Rosaline Franklin', 'William Gosset'], | ||
| + | 'Occupation': ['Chemist', 'Statistician'], | ||
| + | 'Born': ['1920-07-25', '1876-06-13'], | ||
| + | 'Died': ['1958-04-16', '1937-10-16'], | ||
| + | }) | ||
| + | </syntaxhighlight>If you want to keep the order of columns:<syntaxhighlight lang="python3"> | ||
| + | scientists = pd.DataFrame({ | ||
| + | 'Occupation': ['Chemist', 'Statistician'], | ||
| + | 'Born': ['1920-07-25', '1876-06-13'], | ||
| + | 'Died': ['1958-04-16', '1937-10-16'], | ||
| + | }, index=['Rosaline Franklin', 'William Gosset'], columns=['Occupation', 'Born', 'Died']) | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | ===From CSV=== | ||
| + | <syntaxhighlight lang="python3"> | ||
| + | news = pd.read_csv('news_2019.05.10.csv') | ||
| + | </syntaxhighlight> | ||
===Info=== | ===Info=== | ||
| Line 19: | Line 53: | ||
df.dtypes | df.dtypes | ||
df.head() | df.head() | ||
| + | df.tail() | ||
df.info() | df.info() | ||
| + | df.describe() | ||
</syntaxhighlight> | </syntaxhighlight> | ||
===Select 1 row=== | ===Select 1 row=== | ||
<syntaxhighlight lang="python3"> | <syntaxhighlight lang="python3"> | ||
| − | df.iloc[[1]] | + | df.iloc[[1]] # for positional indexing |
| + | df.loc[0] # for label based | ||
| + | df.iloc[-1] == df.loc[df.shape[0]-1] | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | ===Select specific rows=== | ||
| + | <syntaxhighlight lang="python3"> | ||
| + | df.loc[[9, 99, 999]] | ||
</syntaxhighlight> | </syntaxhighlight> | ||
| Line 33: | Line 76: | ||
list(df['one']) | list(df['one']) | ||
dfToList = df['one'].tolist() | dfToList = df['one'].tolist() | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | ===Select multiple columns=== | ||
| + | <syntaxhighlight lang="python3"> | ||
| + | df[['column1', 'column2', 'column3']] | ||
</syntaxhighlight> | </syntaxhighlight> | ||
| Line 38: | Line 86: | ||
<syntaxhighlight lang="python3"> | <syntaxhighlight lang="python3"> | ||
df.iloc[1][1] | df.iloc[1][1] | ||
| − | |||
df.iloc[1]['summary'] | df.iloc[1]['summary'] | ||
| + | df.iloc[1, 3] | ||
| + | |||
| + | df.loc[1, 'summary'] | ||
| + | </syntaxhighlight> | ||
| + | |||
| + | ===Subset multiple rows and multiple columns=== | ||
| + | <syntaxhighlight lang="python3"> | ||
| + | df.iloc[[1,34,56],[2,4,5]] | ||
| + | df.loc[[1,34,56],['modification_date', 'content']] | ||
</syntaxhighlight> | </syntaxhighlight> | ||
[[Category:Python]] | [[Category:Python]] | ||
| + | [[Category:DataScience]] | ||
Latest revision as of 21:28, 12 February 2022
Install[edit]
pip install pandas
Read CSV[edit]
df = pd.read_csv('news_2019.05.10.csv')
DataSeries[edit]
s = pd.Series(['banana', 42])
s = pd.Series(['banana', 42], index=['Fruit', 'Calories'])
s.values
s.keys()
s.values[0]
s.keys()[0]
s.min()
s.max()
s.std()
Dataframe[edit]
Create[edit]
scientists = pd.DataFrame({
'Name': ['Rosaline Franklin', 'William Gosset'],
'Occupation': ['Chemist', 'Statistician'],
'Born': ['1920-07-25', '1876-06-13'],
'Died': ['1958-04-16', '1937-10-16'],
})
If you want to keep the order of columns:
scientists = pd.DataFrame({
'Occupation': ['Chemist', 'Statistician'],
'Born': ['1920-07-25', '1876-06-13'],
'Died': ['1958-04-16', '1937-10-16'],
}, index=['Rosaline Franklin', 'William Gosset'], columns=['Occupation', 'Born', 'Died'])
From CSV[edit]
news = pd.read_csv('news_2019.05.10.csv')
Info[edit]
df.index
df.columns
df.values
df.shape
df.dtypes
df.head()
df.tail()
df.info()
df.describe()
Select 1 row[edit]
df.iloc[[1]] # for positional indexing
df.loc[0] # for label based
df.iloc[-1] == df.loc[df.shape[0]-1]
Select specific rows[edit]
df.loc[[9, 99, 999]]
Select 1 column[edit]
sumarys = df[['summary']]
# Or
list(df['one'])
dfToList = df['one'].tolist()
Select multiple columns[edit]
df[['column1', 'column2', 'column3']]
Select 1 cell[edit]
df.iloc[1][1]
df.iloc[1]['summary']
df.iloc[1, 3]
df.loc[1, 'summary']
Subset multiple rows and multiple columns[edit]
df.iloc[[1,34,56],[2,4,5]]
df.loc[[1,34,56],['modification_date', 'content']]