from pandas_datareader import data as pdr 
import numpy as np 
import pandas as pd 
import warnings
from IPython.display import HTML
def show(fig): 
    return HTML(fig.to_html(include_plotlyjs='cdn',include_mathjax=False, config=dict({'scrollZoom':False})))

line - (matploblib)

symbols = ['AMZN','AAPL','GOOG','MSFT','NFLX','NVDA','TSLA']
start = '2020-01-01'
end = '2020-11-28'
df = pdr.get_data_yahoo(symbols,start,end)['Adj Close']
df.index
DatetimeIndex(['2019-12-31', '2020-01-02', '2020-01-03', '2020-01-06',
               '2020-01-07', '2020-01-08', '2020-01-09', '2020-01-10',
               '2020-01-13', '2020-01-14',
               ...
               '2020-11-13', '2020-11-16', '2020-11-17', '2020-11-18',
               '2020-11-19', '2020-11-20', '2020-11-23', '2020-11-24',
               '2020-11-25', '2020-11-27'],
              dtype='datetime64[ns]', name='Date', length=231, freq=None)
df.columns
Index(['AMZN', 'AAPL', 'GOOG', 'MSFT', 'NFLX', 'NVDA', 'TSLA'], dtype='object', name='Symbols')
df # 지금 인덱스 역할을 Date가 하고 있음
Symbols AMZN AAPL GOOG MSFT NFLX NVDA TSLA
Date
2019-12-31 1847.839966 72.337982 1337.020020 154.749741 323.570007 58.676846 83.666000
2020-01-02 1898.010010 73.988480 1367.369995 157.615112 329.809998 59.826443 86.052002
2020-01-03 1874.969971 73.269150 1360.660034 155.652542 325.899994 58.868862 88.601997
2020-01-06 1902.880005 73.852982 1394.209961 156.054855 335.829987 59.115734 90.307999
2020-01-07 1906.859985 73.505646 1393.339966 154.632004 330.750000 59.831432 93.811996
... ... ... ... ... ... ... ...
2020-11-20 3099.399902 116.621048 1742.189941 208.641113 488.239990 130.724670 489.609985
2020-11-23 3098.389893 113.152443 1734.859985 208.363434 476.619995 131.246567 521.849976
2020-11-24 3118.060059 114.464348 1768.880005 212.082260 482.880005 129.426178 555.380005
2020-11-25 3185.070068 115.319084 1771.430054 212.092194 485.000000 132.192963 574.000000
2020-11-27 3195.340088 115.875648 1793.189941 213.440872 491.359985 132.457642 585.760010

231 rows × 7 columns

  • Symbols는 그냥 회사 이름들 범주알려주는 지표?느낌 인듯
df.reset_index()
# Date가 column으로 들어갔음
Symbols Date AMZN AAPL GOOG MSFT NFLX NVDA TSLA
0 2019-12-31 1847.839966 72.337982 1337.020020 154.749741 323.570007 58.676846 83.666000
1 2020-01-02 1898.010010 73.988480 1367.369995 157.615112 329.809998 59.826443 86.052002
2 2020-01-03 1874.969971 73.269150 1360.660034 155.652542 325.899994 58.868862 88.601997
3 2020-01-06 1902.880005 73.852982 1394.209961 156.054855 335.829987 59.115734 90.307999
4 2020-01-07 1906.859985 73.505646 1393.339966 154.632004 330.750000 59.831432 93.811996
... ... ... ... ... ... ... ... ...
226 2020-11-20 3099.399902 116.621048 1742.189941 208.641113 488.239990 130.724670 489.609985
227 2020-11-23 3098.389893 113.152443 1734.859985 208.363434 476.619995 131.246567 521.849976
228 2020-11-24 3118.060059 114.464348 1768.880005 212.082260 482.880005 129.426178 555.380005
229 2020-11-25 3185.070068 115.319084 1771.430054 212.092194 485.000000 132.192963 574.000000
230 2020-11-27 3195.340088 115.875648 1793.189941 213.440872 491.359985 132.457642 585.760010

231 rows × 8 columns

df.reset_index().plot.line(x='Date',y='AMZN')
<AxesSubplot:xlabel='Date'>
df.reset_index().plot.line(x='Date',y=['AMZN','GOOG'])
<AxesSubplot:xlabel='Date'>
df.reset_index().plot.line(x='Date')
<AxesSubplot:xlabel='Date'>
df.reset_index().plot.line(x='Date',figsize=(10,10))
<AxesSubplot:xlabel='Date'>

서브플랏

df.reset_index().plot.line(x='Date',figsize=(10,10),subplots=True)
array([<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
       <AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
       <AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
       <AxesSubplot:xlabel='Date'>], dtype=object)
df.reset_index().plot.line(x='Date',figsize=(10,10),subplots=True,layout=(4,2))
array([[<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
       [<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
       [<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
       [<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>]],
      dtype=object)

- 폰트조정

df.reset_index().plot.line(x='Date',figsize=(10,10),subplots=True,layout=(4,2),fontsize=20)
array([[<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
       [<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
       [<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
       [<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>]],
      dtype=object)

- 투명도 조정

df.reset_index().plot.line(x='Date',figsize=(10,10),subplots=True,layout=(4,2),alpha=0.3)
array([[<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
       [<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
       [<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
       [<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>]],
      dtype=object)

- 레전드 삭제

df.reset_index().plot.line(x='Date',figsize=(10,10),subplots=True,layout=(4,2),legend=False)
array([[<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
       [<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
       [<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
       [<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>]],
      dtype=object)

bar, barh

예제1 (matplotlib)

df = pd.read_csv('https://raw.githubusercontent.com/kalilurrahman/datasets/main/mobilephonemktshare2020.csv')
df
Date Samsung Apple Huawei Xiaomi Oppo Mobicel Motorola LG Others Realme Google Nokia Lenovo OnePlus Sony Asus
0 2019-10 31.49 22.09 10.02 7.79 4.10 3.15 2.41 2.40 9.51 0.54 2.35 0.95 0.96 0.70 0.84 0.74
1 2019-11 31.36 22.90 10.18 8.16 4.42 3.41 2.40 2.40 9.10 0.78 0.66 0.97 0.97 0.73 0.83 0.75
2 2019-12 31.37 24.79 9.95 7.73 4.23 3.19 2.50 2.54 8.13 0.84 0.75 0.90 0.87 0.74 0.77 0.70
3 2020-01 31.29 24.76 10.61 8.10 4.25 3.02 2.42 2.40 7.55 0.88 0.69 0.88 0.86 0.79 0.80 0.69
4 2020-02 30.91 25.89 10.98 7.80 4.31 2.89 2.36 2.34 7.06 0.89 0.70 0.81 0.77 0.78 0.80 0.69
5 2020-03 30.80 27.03 10.70 7.70 4.30 2.87 2.35 2.28 6.63 0.93 0.73 0.72 0.74 0.78 0.76 0.66
6 2020-04 30.41 28.79 10.28 7.60 4.20 2.75 2.51 2.28 5.84 0.90 0.75 0.69 0.71 0.80 0.76 0.70
7 2020-05 30.18 26.72 10.39 8.36 4.70 3.12 2.46 2.19 6.31 1.04 0.70 0.73 0.77 0.81 0.78 0.76
8 2020-06 31.06 25.26 10.69 8.55 4.65 3.18 2.57 2.11 6.39 1.04 0.68 0.74 0.75 0.77 0.78 0.75
9 2020-07 30.95 24.82 10.75 8.94 4.69 3.46 2.45 2.03 6.41 1.13 0.65 0.76 0.74 0.76 0.75 0.72
10 2020-08 31.04 25.15 10.73 8.90 4.69 3.38 2.39 1.96 6.31 1.18 0.63 0.74 0.72 0.75 0.73 0.70
11 2020-09 30.57 24.98 10.58 9.49 4.94 3.50 2.27 1.88 6.12 1.45 0.63 0.74 0.67 0.81 0.69 0.67
12 2020-10 30.25 26.53 10.44 9.67 4.83 2.54 2.21 1.79 6.04 1.55 0.63 0.69 0.65 0.85 0.67 0.64
df.plot.bar(x='Date',y=['Samsung','Apple'],figsize=(10,5))
<AxesSubplot:xlabel='Date'>
df.plot.bar(x='Date',y=['Samsung','Apple'],figsize=(10,5),width=0.8)
<AxesSubplot:xlabel='Date'>
df.plot.barh(x='Date',y=['Samsung','Apple'],figsize=(5,10))
<AxesSubplot:ylabel='Date'>
df.plot.bar(x='Date',figsize=(15,10),subplots=True,layout=(4,4),legend=False)
array([[<AxesSubplot:title={'center':'Samsung'}, xlabel='Date'>,
        <AxesSubplot:title={'center':'Apple'}, xlabel='Date'>,
        <AxesSubplot:title={'center':'Huawei'}, xlabel='Date'>,
        <AxesSubplot:title={'center':'Xiaomi'}, xlabel='Date'>],
       [<AxesSubplot:title={'center':'Oppo'}, xlabel='Date'>,
        <AxesSubplot:title={'center':'Mobicel'}, xlabel='Date'>,
        <AxesSubplot:title={'center':'Motorola'}, xlabel='Date'>,
        <AxesSubplot:title={'center':'LG'}, xlabel='Date'>],
       [<AxesSubplot:title={'center':'Others'}, xlabel='Date'>,
        <AxesSubplot:title={'center':'Realme'}, xlabel='Date'>,
        <AxesSubplot:title={'center':'Google'}, xlabel='Date'>,
        <AxesSubplot:title={'center':'Nokia'}, xlabel='Date'>],
       [<AxesSubplot:title={'center':'Lenovo'}, xlabel='Date'>,
        <AxesSubplot:title={'center':'OnePlus'}, xlabel='Date'>,
        <AxesSubplot:title={'center':'Sony'}, xlabel='Date'>,
        <AxesSubplot:title={'center':'Asus'}, xlabel='Date'>]],
      dtype=object)

df.melt(id_vars='Date') $\to$ wideform이였던 dateframe를 longform으로 바꾸면서 타이디한 데이터

 이제 plotly를 벡엔드로 활용해보자

df
Date Samsung Apple Huawei Xiaomi Oppo Mobicel Motorola LG Others Realme Google Nokia Lenovo OnePlus Sony Asus
0 2019-10 31.49 22.09 10.02 7.79 4.10 3.15 2.41 2.40 9.51 0.54 2.35 0.95 0.96 0.70 0.84 0.74
1 2019-11 31.36 22.90 10.18 8.16 4.42 3.41 2.40 2.40 9.10 0.78 0.66 0.97 0.97 0.73 0.83 0.75
2 2019-12 31.37 24.79 9.95 7.73 4.23 3.19 2.50 2.54 8.13 0.84 0.75 0.90 0.87 0.74 0.77 0.70
3 2020-01 31.29 24.76 10.61 8.10 4.25 3.02 2.42 2.40 7.55 0.88 0.69 0.88 0.86 0.79 0.80 0.69
4 2020-02 30.91 25.89 10.98 7.80 4.31 2.89 2.36 2.34 7.06 0.89 0.70 0.81 0.77 0.78 0.80 0.69
5 2020-03 30.80 27.03 10.70 7.70 4.30 2.87 2.35 2.28 6.63 0.93 0.73 0.72 0.74 0.78 0.76 0.66
6 2020-04 30.41 28.79 10.28 7.60 4.20 2.75 2.51 2.28 5.84 0.90 0.75 0.69 0.71 0.80 0.76 0.70
7 2020-05 30.18 26.72 10.39 8.36 4.70 3.12 2.46 2.19 6.31 1.04 0.70 0.73 0.77 0.81 0.78 0.76
8 2020-06 31.06 25.26 10.69 8.55 4.65 3.18 2.57 2.11 6.39 1.04 0.68 0.74 0.75 0.77 0.78 0.75
9 2020-07 30.95 24.82 10.75 8.94 4.69 3.46 2.45 2.03 6.41 1.13 0.65 0.76 0.74 0.76 0.75 0.72
10 2020-08 31.04 25.15 10.73 8.90 4.69 3.38 2.39 1.96 6.31 1.18 0.63 0.74 0.72 0.75 0.73 0.70
11 2020-09 30.57 24.98 10.58 9.49 4.94 3.50 2.27 1.88 6.12 1.45 0.63 0.74 0.67 0.81 0.69 0.67
12 2020-10 30.25 26.53 10.44 9.67 4.83 2.54 2.21 1.79 6.04 1.55 0.63 0.69 0.65 0.85 0.67 0.64
df.melt(id_vars='Date')
Date variable value
0 2019-10 Samsung 31.49
1 2019-11 Samsung 31.36
2 2019-12 Samsung 31.37
3 2020-01 Samsung 31.29
4 2020-02 Samsung 30.91
... ... ... ...
203 2020-06 Asus 0.75
204 2020-07 Asus 0.72
205 2020-08 Asus 0.70
206 2020-09 Asus 0.67
207 2020-10 Asus 0.64

208 rows × 3 columns

df.melt(id_vars='Date').groupby('variable').agg(np.mean)
value
variable
Apple 25.362308
Asus 0.705385
Google 0.811538
Huawei 10.484615
LG 2.200000
Lenovo 0.783077
Mobicel 3.112308
Motorola 2.407692
Nokia 0.793846
OnePlus 0.774615
Oppo 4.485385
Others 7.030769
Realme 1.011538
Samsung 30.898462
Sony 0.766154
Xiaomi 8.368462
df.melt(id_vars='Date').groupby('variable').agg(np.mean).\
plot.bar(legend=False)
<AxesSubplot:xlabel='variable'>
df.melt(id_vars='Date').groupby('variable').agg(np.mean).sort_values('value',ascending=False).\
plot.bar(legend=False)
<AxesSubplot:xlabel='variable'>

예제1 (plotly)

df.plot.bar(backend='plotly')

error 발생 $\to$ wideform은 plotly를 벡엔드로 활용 X

  • plotly는 이 블로그에서 지원하지 않으므로 주석처리 하겠음
# plot.bar(backend='plotly')

위의 그림은 제조사의 평균 점유율을 날짜 별로 그린 것이고 아래 그림은 날짜별로 각 제조사의 점유율을 그린 것이기 때문에 각 날짜별로 제조사를 구분하기 위해서 색깔로 제조사를 구분해줄 필요가 있다

# plot.bar(x='Date',y='value',color='variable',backend='plotly',width=500,height=600)
# plot.bar(x='Date',y='value',color='variable',backend='plotly',barmode='group')
# plot.bar(x='Date',y='value',color='variable',backend='plotly',barmode='group',text='value')
# plot.bar(x='Date',y='value',color='variable',backend='plotly',facet_col='variable')
# plot.bar(y='Date',x='value',color='variable',backend='plotly',facet_row='variable',height=700)