import pandas as pd 
df2016=pd.read_csv("https://raw.githubusercontent.com/PacktPublishing/Pandas-Cookbook/master/data/stocks_2016.csv")
df2017=pd.read_csv("https://raw.githubusercontent.com/PacktPublishing/Pandas-Cookbook/master/data/stocks_2017.csv")
df2018=pd.read_csv("https://raw.githubusercontent.com/PacktPublishing/Pandas-Cookbook/master/data/stocks_2018.csv")
pd.concat([df2016,df2017,df2018]) # default는 아래로 쌓음
Symbol Shares Low High
0 AAPL 80 95 110
1 TSLA 50 80 130
2 WMT 40 55 70
0 AAPL 50 120 140
1 GE 100 30 40
2 IBM 87 75 95
3 SLB 20 55 85
4 TXN 500 15 23
5 TSLA 100 100 300
0 AAPL 40 135 170
1 AMZN 8 900 1125
2 TSLA 50 220 400
pd.concat([df2016,df2017,df2018]).reset_index() # 구 인덱스는 아직 존재함
index Symbol Shares Low High
0 0 AAPL 80 95 110
1 1 TSLA 50 80 130
2 2 WMT 40 55 70
3 0 AAPL 50 120 140
4 1 GE 100 30 40
5 2 IBM 87 75 95
6 3 SLB 20 55 85
7 4 TXN 500 15 23
8 5 TSLA 100 100 300
9 0 AAPL 40 135 170
10 1 AMZN 8 900 1125
11 2 TSLA 50 220 400
pd.concat([df2016,df2017,df2018]).reset_index(drop=True)
Symbol Shares Low High
0 AAPL 80 95 110
1 TSLA 50 80 130
2 WMT 40 55 70
3 AAPL 50 120 140
4 GE 100 30 40
5 IBM 87 75 95
6 SLB 20 55 85
7 TXN 500 15 23
8 TSLA 100 100 300
9 AAPL 40 135 170
10 AMZN 8 900 1125
11 TSLA 50 220 400

특정 구간만 뽑아서 concat하기

pd.concat([df2016,df2017.iloc[:,1:]]) # df2017에서 선택되지 못한 자료인 0열은 자동으로 NaN처리
Symbol Shares Low High
0 AAPL 80 95 110
1 TSLA 50 80 130
2 WMT 40 55 70
0 NaN 50 120 140
1 NaN 100 30 40
2 NaN 87 75 95
3 NaN 20 55 85
4 NaN 500 15 23
5 NaN 100 100 300
pd.concat([df2016,df2017,df2018],axis='columns') 
## axis='columns'  axis=1로 가능/ 1대신에 0쓰면 rbind, 또는 그냥 aixs=0 안 써도 자동으로 rbind 처리
Symbol Shares Low High Symbol Shares Low High Symbol Shares Low High
0 AAPL 80.0 95.0 110.0 AAPL 50 120 140 AAPL 40.0 135.0 170.0
1 TSLA 50.0 80.0 130.0 GE 100 30 40 AMZN 8.0 900.0 1125.0
2 WMT 40.0 55.0 70.0 IBM 87 75 95 TSLA 50.0 220.0 400.0
3 NaN NaN NaN NaN SLB 20 55 85 NaN NaN NaN NaN
4 NaN NaN NaN NaN TXN 500 15 23 NaN NaN NaN NaN
5 NaN NaN NaN NaN TSLA 100 100 300 NaN NaN NaN NaN
df=pd.concat([df2016,df2017,df2018],keys=[2016,2017,2018])
df
Symbol Shares Low High
2016 0 AAPL 80 95 110
1 TSLA 50 80 130
2 WMT 40 55 70
2017 0 AAPL 50 120 140
1 GE 100 30 40
2 IBM 87 75 95
3 SLB 20 55 85
4 TXN 500 15 23
5 TSLA 100 100 300
2018 0 AAPL 40 135 170
1 AMZN 8 900 1125
2 TSLA 50 220 400
df.index
MultiIndex([(2016, 0),
            (2016, 1),
            (2016, 2),
            (2017, 0),
            (2017, 1),
            (2017, 2),
            (2017, 3),
            (2017, 4),
            (2017, 5),
            (2018, 0),
            (2018, 1),
            (2018, 2)],
           )
  • $MuiliIndex$가 됐음을 알 수 있다.
pd.concat({2016:df2016,2017:df2017,2018:df2018})
Symbol Shares Low High
2016 0 AAPL 80 95 110
1 TSLA 50 80 130
2 WMT 40 55 70
2017 0 AAPL 50 120 140
1 GE 100 30 40
2 IBM 87 75 95
3 SLB 20 55 85
4 TXN 500 15 23
5 TSLA 100 100 300
2018 0 AAPL 40 135 170
1 AMZN 8 900 1125
2 TSLA 50 220 400
pd.concat(dict(zip([2016,2017,2018],[df2016,df2017,df2018])))
Symbol Shares Low High
2016 0 AAPL 80 95 110
1 TSLA 50 80 130
2 WMT 40 55 70
2017 0 AAPL 50 120 140
1 GE 100 30 40
2 IBM 87 75 95
3 SLB 20 55 85
4 TXN 500 15 23
5 TSLA 100 100 300
2018 0 AAPL 40 135 170
1 AMZN 8 900 1125
2 TSLA 50 220 400
pd.concat([df2016,df2017,df2018],keys=[2016,2017])
Symbol Shares Low High
2016 0 AAPL 80 95 110
1 TSLA 50 80 130
2 WMT 40 55 70
2017 0 AAPL 50 120 140
1 GE 100 30 40
2 IBM 87 75 95
3 SLB 20 55 85
4 TXN 500 15 23
5 TSLA 100 100 300
pd.concat([df2016,df2017,df2018],keys=[2016,2018]) #이렇게 해도 이름만 2018이지 데이터는 df2017로 불러오게 된다
Symbol Shares Low High
2016 0 AAPL 80 95 110
1 TSLA 50 80 130
2 WMT 40 55 70
2018 0 AAPL 50 120 140
1 GE 100 30 40
2 IBM 87 75 95
3 SLB 20 55 85
4 TXN 500 15 23
5 TSLA 100 100 300
pd.concat([df2016,df2017,df2018],keys=['a','S','C',2019]) #2019는 그냥 아무처리도 못함
Symbol Shares Low High
a 0 AAPL 80 95 110
1 TSLA 50 80 130
2 WMT 40 55 70
S 0 AAPL 50 120 140
1 GE 100 30 40
2 IBM 87 75 95
3 SLB 20 55 85
4 TXN 500 15 23
5 TSLA 100 100 300
C 0 AAPL 40 135 170
1 AMZN 8 900 1125
2 TSLA 50 220 400

pd.concat([...],keys=[...],axis='columns')

df=pd.concat([df2016,df2017,df2018],axis=1,keys=[2016,2017,2018])
df
2016 2017 2018
Symbol Shares Low High Symbol Shares Low High Symbol Shares Low High
0 AAPL 80.0 95.0 110.0 AAPL 50 120 140 AAPL 40.0 135.0 170.0
1 TSLA 50.0 80.0 130.0 GE 100 30 40 AMZN 8.0 900.0 1125.0
2 WMT 40.0 55.0 70.0 IBM 87 75 95 TSLA 50.0 220.0 400.0
3 NaN NaN NaN NaN SLB 20 55 85 NaN NaN NaN NaN
4 NaN NaN NaN NaN TXN 500 15 23 NaN NaN NaN NaN
5 NaN NaN NaN NaN TSLA 100 100 300 NaN NaN NaN NaN
df.columns
MultiIndex([(2016, 'Symbol'),
            (2016, 'Shares'),
            (2016,    'Low'),
            (2016,   'High'),
            (2017, 'Symbol'),
            (2017, 'Shares'),
            (2017,    'Low'),
            (2017,   'High'),
            (2018, 'Symbol'),
            (2018, 'Shares'),
            (2018,    'Low'),
            (2018,   'High')],
           )
  • column도 MultiIndex로 들어갔음
pd.concat({2016:df2016,2017:df2017,2018:df2018},axis=1)
2016 2017 2018
Symbol Shares Low High Symbol Shares Low High Symbol Shares Low High
0 AAPL 80.0 95.0 110.0 AAPL 50 120 140 AAPL 40.0 135.0 170.0
1 TSLA 50.0 80.0 130.0 GE 100 30 40 AMZN 8.0 900.0 1125.0
2 WMT 40.0 55.0 70.0 IBM 87 75 95 TSLA 50.0 220.0 400.0
3 NaN NaN NaN NaN SLB 20 55 85 NaN NaN NaN NaN
4 NaN NaN NaN NaN TXN 500 15 23 NaN NaN NaN NaN
5 NaN NaN NaN NaN TSLA 100 100 300 NaN NaN NaN NaN
pd.concat(dict(zip([2016,2017,2018],[df2016,df2017,df2018])),axis=1)
2016 2017 2018
Symbol Shares Low High Symbol Shares Low High Symbol Shares Low High
0 AAPL 80.0 95.0 110.0 AAPL 50 120 140 AAPL 40.0 135.0 170.0
1 TSLA 50.0 80.0 130.0 GE 100 30 40 AMZN 8.0 900.0 1125.0
2 WMT 40.0 55.0 70.0 IBM 87 75 95 TSLA 50.0 220.0 400.0
3 NaN NaN NaN NaN SLB 20 55 85 NaN NaN NaN NaN
4 NaN NaN NaN NaN TXN 500 15 23 NaN NaN NaN NaN
5 NaN NaN NaN NaN TSLA 100 100 300 NaN NaN NaN NaN

타이디 데이터

df=pd.concat(dict(zip([2016,2017,2018],[df2016,df2017,df2018])))
df.stack().reset_index().drop(['level_1'],axis=1).rename(columns={'level_0':'year','level_2':'category',0:'value'})
year category value
0 2016 Symbol AAPL
1 2016 Shares 80
2 2016 Low 95
3 2016 High 110
4 2016 Symbol TSLA
5 2016 Shares 50
6 2016 Low 80
7 2016 High 130
8 2016 Symbol WMT
9 2016 Shares 40
10 2016 Low 55
11 2016 High 70
12 2017 Symbol AAPL
13 2017 Shares 50
14 2017 Low 120
15 2017 High 140
16 2017 Symbol GE
17 2017 Shares 100
18 2017 Low 30
19 2017 High 40
20 2017 Symbol IBM
21 2017 Shares 87
22 2017 Low 75
23 2017 High 95
24 2017 Symbol SLB
25 2017 Shares 20
26 2017 Low 55
27 2017 High 85
28 2017 Symbol TXN
29 2017 Shares 500
30 2017 Low 15
31 2017 High 23
32 2017 Symbol TSLA
33 2017 Shares 100
34 2017 Low 100
35 2017 High 300
36 2018 Symbol AAPL
37 2018 Shares 40
38 2018 Low 135
39 2018 High 170
40 2018 Symbol AMZN
41 2018 Shares 8
42 2018 Low 900
43 2018 High 1125
44 2018 Symbol TSLA
45 2018 Shares 50
46 2018 Low 220
47 2018 High 400