import numpy as np
import pandas as pd
np.random.seed(1)
dic= {'X1':np.random.normal(0,1,5), 
      'X2':np.random.normal(0,1,5), 
      'X3':np.random.normal(0,1,5), 
      'X4':np.random.normal(0,1,5), 
      'X5':np.random.normal(0,1,5), 
      'X6':np.random.normal(0,1,5)}
df1=pd.DataFrame(dic) # 딕셔너리도 데이터프레임으로 만들 수 있음
df1
X1 X2 X3 X4 X5 X6
0 1.624345 -2.301539 1.462108 -1.099891 -1.100619 -0.683728
1 -0.611756 1.744812 -2.060141 -0.172428 1.144724 -0.122890
2 -0.528172 -0.761207 -0.322417 -0.877858 0.901591 -0.935769
3 -1.072969 0.319039 -0.384054 0.042214 0.502494 -0.267888
4 0.865408 -0.249370 1.133769 0.582815 0.900856 0.530355
  • 행 추출
# df1.iloc[[0]]
# df1.iloc[[0],:]
# df1.iloc[0,:]
# df1.loc[[0]]
# df1.loc[[0],:]
# df1.loc[0,:]
  • 대괄호 두개 처리
# df1.loc[[True,False,False,False,False]]
# df1.iloc[[True,False,False,False,False],:]
# df1.iloc[[True,False,False,False,False]]
  • 대괄호 두개 처리!
# df1.loc[[0,2],:] # 1,3행 선택

loc vs iloc

_df= pd.DataFrame({'A':[1,2,3,4],'B':[4,5,6,7]},index=list('abcd')) # 인덱스는 형태가 list!
_df
A B
a 1 4
b 2 5
c 3 6
d 4 7
  • 연속 추출은 대괄호 생략 가능
# _df.loc['a':'c',:]
# _df.iloc[0:3,:]   =>  0,1,2행
# _df.iloc[[0,2],:]  =>  0,2행
np.random.normal(size=(20,4))
array([[-1.37311732,  0.31515939,  0.84616065, -0.85951594],
       [ 0.35054598, -1.31228341, -0.03869551, -1.61577235],
       [ 1.12141771,  0.40890054, -0.02461696, -0.77516162],
       [ 1.27375593,  1.96710175, -1.85798186,  1.23616403],
       [ 1.62765075,  0.3380117 , -1.19926803,  0.86334532],
       [-0.1809203 , -0.60392063, -1.23005814,  0.5505375 ],
       [ 0.79280687, -0.62353073,  0.52057634, -1.14434139],
       [ 0.80186103,  0.0465673 , -0.18656977, -0.10174587],
       [ 0.86888616,  0.75041164,  0.52946532,  0.13770121],
       [ 0.07782113,  0.61838026,  0.23249456,  0.68255141],
       [-0.31011677, -2.43483776,  1.0388246 ,  2.18697965],
       [ 0.44136444, -0.10015523, -0.13644474, -0.11905419],
       [ 0.01740941, -1.12201873, -0.51709446, -0.99702683],
       [ 0.24879916, -0.29664115,  0.49521132, -0.17470316],
       [ 0.98633519,  0.2135339 ,  2.19069973, -1.89636092],
       [-0.64691669,  0.90148689,  2.52832571, -0.24863478],
       [ 0.04366899, -0.22631424,  1.33145711, -0.28730786],
       [ 0.68006984, -0.3198016 , -1.27255876,  0.31354772],
       [ 0.50318481,  1.29322588, -0.11044703, -0.61736206],
       [ 0.5627611 ,  0.24073709,  0.28066508, -0.0731127 ]])
np.random.seed(1)
_df= pd.DataFrame(np.random.normal(size=(20,4)), columns=list('ABCD'), index=pd.date_range('20201225',periods=20))
_df
A B C D
2020-12-25 1.624345 -0.611756 -0.528172 -1.072969
2020-12-26 0.865408 -2.301539 1.744812 -0.761207
2020-12-27 0.319039 -0.249370 1.462108 -2.060141
2020-12-28 -0.322417 -0.384054 1.133769 -1.099891
2020-12-29 -0.172428 -0.877858 0.042214 0.582815
2020-12-30 -1.100619 1.144724 0.901591 0.502494
2020-12-31 0.900856 -0.683728 -0.122890 -0.935769
2021-01-01 -0.267888 0.530355 -0.691661 -0.396754
2021-01-02 -0.687173 -0.845206 -0.671246 -0.012665
2021-01-03 -1.117310 0.234416 1.659802 0.742044
2021-01-04 -0.191836 -0.887629 -0.747158 1.692455
2021-01-05 0.050808 -0.636996 0.190915 2.100255
2021-01-06 0.120159 0.617203 0.300170 -0.352250
2021-01-07 -1.142518 -0.349343 -0.208894 0.586623
2021-01-08 0.838983 0.931102 0.285587 0.885141
2021-01-09 -0.754398 1.252868 0.512930 -0.298093
2021-01-10 0.488518 -0.075572 1.131629 1.519817
2021-01-11 2.185575 -1.396496 -1.444114 -0.504466
2021-01-12 0.160037 0.876169 0.315635 -2.022201
2021-01-13 -0.306204 0.827975 0.230095 0.762011
  • 연속 추출시 대괄호 생략 가능
 
pd.Series(_df.index) # index 자리에 columns 입력하면 열의 번호
0    2020-12-25
1    2020-12-26
2    2020-12-27
3    2020-12-28
4    2020-12-29
5    2020-12-30
6    2020-12-31
7    2021-01-01
8    2021-01-02
9    2021-01-03
10   2021-01-04
11   2021-01-05
12   2021-01-06
13   2021-01-07
14   2021-01-08
15   2021-01-09
16   2021-01-10
17   2021-01-11
18   2021-01-12
19   2021-01-13
dtype: datetime64[ns]
_df.iloc[11:15]
A B C D
2021-01-05 0.050808 -0.636996 0.190915 2.100255
2021-01-06 0.120159 0.617203 0.300170 -0.352250
2021-01-07 -1.142518 -0.349343 -0.208894 0.586623
2021-01-08 0.838983 0.931102 0.285587 0.885141

특정 이름의 열 추출

_df.A
2020-12-25    1.624345
2020-12-26    0.865408
2020-12-27    0.319039
2020-12-28   -0.322417
2020-12-29   -0.172428
2020-12-30   -1.100619
2020-12-31    0.900856
2021-01-01   -0.267888
2021-01-02   -0.687173
2021-01-03   -1.117310
2021-01-04   -0.191836
2021-01-05    0.050808
2021-01-06    0.120159
2021-01-07   -1.142518
2021-01-08    0.838983
2021-01-09   -0.754398
2021-01-10    0.488518
2021-01-11    2.185575
2021-01-12    0.160037
2021-01-13   -0.306204
Freq: D, Name: A, dtype: float64
_df['A'] #행에선 x
2020-12-25    1.624345
2020-12-26    0.865408
2020-12-27    0.319039
2020-12-28   -0.322417
2020-12-29   -0.172428
2020-12-30   -1.100619
2020-12-31    0.900856
2021-01-01   -0.267888
2021-01-02   -0.687173
2021-01-03   -1.117310
2021-01-04   -0.191836
2021-01-05    0.050808
2021-01-06    0.120159
2021-01-07   -1.142518
2021-01-08    0.838983
2021-01-09   -0.754398
2021-01-10    0.488518
2021-01-11    2.185575
2021-01-12    0.160037
2021-01-13   -0.306204
Freq: D, Name: A, dtype: float64
_df.iloc[::5] # 5단위로 행 추출
A B C D
2020-12-25 1.624345 -0.611756 -0.528172 -1.072969
2020-12-30 -1.100619 1.144724 0.901591 0.502494
2021-01-04 -0.191836 -0.887629 -0.747158 1.692455
2021-01-09 -0.754398 1.252868 0.512930 -0.298093
_df.iloc[:,::2] # 2단위로 열 추출
A C
2020-12-25 1.624345 -0.528172
2020-12-26 0.865408 1.744812
2020-12-27 0.319039 1.462108
2020-12-28 -0.322417 1.133769
2020-12-29 -0.172428 0.042214
2020-12-30 -1.100619 0.901591
2020-12-31 0.900856 -0.122890
2021-01-01 -0.267888 -0.691661
2021-01-02 -0.687173 -0.671246
2021-01-03 -1.117310 1.659802
2021-01-04 -0.191836 -0.747158
2021-01-05 0.050808 0.190915
2021-01-06 0.120159 0.300170
2021-01-07 -1.142518 -0.208894
2021-01-08 0.838983 0.285587
2021-01-09 -0.754398 0.512930
2021-01-10 0.488518 1.131629
2021-01-11 2.185575 -1.444114
2021-01-12 0.160037 0.315635
2021-01-13 -0.306204 0.230095

lambda + map

np.random.seed(1)
df2= pd.DataFrame(np.random.normal(size=(10,4)),columns=list('ABCD'))

A>0 : A열만 뽑진 않고 A>0인 모든 열을 다 뽑음

 
 
 
 

A>0,C<0 : A>0,C<0인 모든 행 뽑음

 
 
  • 괄호 처리 주의(&)