Series DataFrame 運算
Series
s=Series(np.random.rand(10))
s.sort_values()
s.sort_values(ascending=False)
s.sort_index()
s.sort_index(ascending=False)
s.max()
s.min()
s.mean()
s1=Serise([1,2,3])
s2=Series([4,5,6])
s1+s2
s1=Serise([1,2,3],index=[‘A’,’B’,’C’])
s2=Series([4,5,6],index=[‘A’,’B’,’C’])
s1+s2
s1=Serise([1,2,3],index=[‘A’,’B’,’C’])#s1 D E = NaN
s2=Series([4,5,6],index=[‘A’,’D’,’E’])#s2 B C = NaN
s1+s2 #1+NaN=NaN
DataFrame
df=DataFrame(np.random.rand(20).reshape(4,5))
df.sort_values(by=4)#by column 4
df.sort_values(axis=1,by=1)
df.sort_index()
df.max() #columns max value
df.max(axis=1)
df.describe()
Binning分級
score_list=np.random.rand(30,100,20)
bins=[30,59,79,100]
pd.cut(score_list,bins)#顯示每個數值的bins區間
a=pd.cut(score_list,bins)
a.value_counts()#顯示每個bins區間數值各有幾個
df=DataFrame()
df[‘student’]=[pd.util.testing.rands(5) for i in range(20)]
df[‘score’]=score_list
pd.cut(df[‘score’],bins=bins)
pd.cut(df[‘score’],bins=bins,labels=[‘bad’,’ok’,’great’])
df[‘bins’]=pd.cut(df[‘score’],bins=bins,labels=[‘bad’,’ok’,’great’])