Pandas¶
Tutorials¶
Parse Excel or CSV into DataFrame¶
import pandas as pd
# data = pd.read_excel('example.xlsx', 'sheet1')
data = pd.read_csv("example.csv", sep=';', header=None, names=['host', 'speed', 'time'])
print data[data.speed <= 950]
Get data from SQL¶
from pandas.io import sql
import sqlite3
conn = sqlite3.connect('example.sqlite3')
query = "SELECT * FROM mytable WHERE mycol = 'MUH';"
data = sql.read_frame(query, con=conn)
print data.describe()
Read data from the web¶
from urllib2 import urlopen
from StringIO import StringIO
url = urlopen('http://www.codekid.net/data_file.txt').read()
data = pd.read_table(StringIO(url), sep='\t')
Converter¶
replace all $ in salary column with nothing
data = pd.read_csv('example.csv',
converters={'salary': lambda x: float(x.replace('$', ''))})
Selecting stuff¶
where
print data[(data.speed <= 950) | (data.time > 0.2)]
print data[(data.speed > 950) & (data.time < 0.2)]
join
pd.merge(left_frame, right_frame, on='key', how='inner')
pd.merge(left_frame, right_frame, on='key', how='left')
pd.merge(left_frame, right_frame, on='key', how='right')
Merging¶
data_full = pd.concat([data, data_set2])
data_full = pd.merge(data, data_set2)
Aggregation¶
Group data by column title
Run np.size and np.mean functions on column rating
data.groupby('title').agg({'rating': [np.size, np.mean]})
Iteration¶
for entry in data[['date', 'value']].itertuples():
print entry
Graphing¶
import pandas as pd
import matplotlib.pyplot as plt
data = pd.read_excel('example.xlsx', 'sheet1')
data.set_index("name", inplace=True)
plt.figure()
plt.title("Just a simple test")
data.plot(kind="barh")
plt.show()