Changes

Jump to navigation Jump to search
990 bytes added ,  21:28, 12 February 2022
m
no edit summary
Line 7: Line 7:  
<syntaxhighlight lang="python3">
 
<syntaxhighlight lang="python3">
 
df = pd.read_csv('news_2019.05.10.csv')
 
df = pd.read_csv('news_2019.05.10.csv')
 +
</syntaxhighlight>
 +
 +
==DataSeries==
 +
<syntaxhighlight lang="python3">
 +
s = pd.Series(['banana', 42])
 +
s = pd.Series(['banana', 42], index=['Fruit', 'Calories'])
 +
s.values
 +
s.keys()
 +
s.values[0]
 +
s.keys()[0]
 +
s.min()
 +
s.max()
 +
s.std()
 
</syntaxhighlight>
 
</syntaxhighlight>
    
==Dataframe==
 
==Dataframe==
 +
 +
===Create===
 +
<syntaxhighlight lang="python3">
 +
scientists = pd.DataFrame({
 +
    'Name': ['Rosaline Franklin', 'William Gosset'],
 +
    'Occupation': ['Chemist', 'Statistician'],
 +
    'Born': ['1920-07-25', '1876-06-13'],
 +
    'Died': ['1958-04-16', '1937-10-16'],
 +
})
 +
</syntaxhighlight>If you want to keep the order of columns:<syntaxhighlight lang="python3">
 +
scientists = pd.DataFrame({
 +
    'Occupation': ['Chemist', 'Statistician'],
 +
    'Born': ['1920-07-25', '1876-06-13'],
 +
    'Died': ['1958-04-16', '1937-10-16'],
 +
}, index=['Rosaline Franklin', 'William Gosset'], columns=['Occupation', 'Born', 'Died'])
 +
</syntaxhighlight>
 +
 +
===From CSV===
 +
<syntaxhighlight lang="python3">
 +
news = pd.read_csv('news_2019.05.10.csv')
 +
</syntaxhighlight>
    
===Info===
 
===Info===
Line 21: Line 55:  
df.tail()
 
df.tail()
 
df.info()
 
df.info()
 +
df.describe()
 
</syntaxhighlight>
 
</syntaxhighlight>
   Line 30: Line 65:  
</syntaxhighlight>
 
</syntaxhighlight>
   −
=== Select specific rows ===
+
===Select specific rows===
 
<syntaxhighlight lang="python3">
 
<syntaxhighlight lang="python3">
 
df.loc[[9, 99, 999]]
 
df.loc[[9, 99, 999]]
Line 57: Line 92:  
</syntaxhighlight>
 
</syntaxhighlight>
   −
=== Subset multiple rows and multiple columns ===
+
===Subset multiple rows and multiple columns===
 
<syntaxhighlight lang="python3">
 
<syntaxhighlight lang="python3">
 
df.iloc[[1,34,56],[2,4,5]]
 
df.iloc[[1,34,56],[2,4,5]]
Line 63: Line 98:  
</syntaxhighlight>
 
</syntaxhighlight>
 
[[Category:Python]]
 
[[Category:Python]]
 +
[[Category:DataScience]]

Navigation menu