import pandas as pd
df=pd.read_csv('WEOApr2020all.csv')
df.describe()
WEO Country Code Estimates Start After
count 1552.000000 1466.000000
mean 553.670103 2018.291951
std 261.437803 1.075652
min 111.000000 2009.000000
25% 314.000000 2018.000000
50% 571.000000 2018.000000
75% 734.000000 2019.000000
max 968.000000 2020.000000
df.index
RangeIndex(start=0, stop=1552, step=1)
pd.Series(df.index)
0          0
1          1
2          2
3          3
4          4
        ... 
1547    1547
1548    1548
1549    1549
1550    1550
1551    1551
Length: 1552, dtype: int64
df.columns
Index(['WEO Country Code', 'ISO', 'WEO Subject Code', 'Country',
       'Subject Descriptor', 'Subject Notes', 'Units', 'Scale',
       'Country/Series-specific Notes', '1980', '1981', '1982', '1983', '1984',
       '1985', '1986', '1987', '1988', '1989', '1990', '1991', '1992', '1993',
       '1994', '1995', '1996', '1997', '1998', '1999', '2000', '2001', '2002',
       '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011',
       '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020',
       '2021', 'Estimates Start After'],
      dtype='object')
pd.Series(df.columns)
0                  WEO Country Code
1                               ISO
2                  WEO Subject Code
3                           Country
4                Subject Descriptor
5                     Subject Notes
6                             Units
7                             Scale
8     Country/Series-specific Notes
9                              1980
10                             1981
11                             1982
12                             1983
13                             1984
14                             1985
15                             1986
16                             1987
17                             1988
18                             1989
19                             1990
20                             1991
21                             1992
22                             1993
23                             1994
24                             1995
25                             1996
26                             1997
27                             1998
28                             1999
29                             2000
30                             2001
31                             2002
32                             2003
33                             2004
34                             2005
35                             2006
36                             2007
37                             2008
38                             2009
39                             2010
40                             2011
41                             2012
42                             2013
43                             2014
44                             2015
45                             2016
46                             2017
47                             2018
48                             2019
49                             2020
50                             2021
51            Estimates Start After
dtype: object
len(df['Country'].unique())
194
df['Country'].nunique()
# 194개 나라의 data가 들어있음을 알 수 있다
194
df.iloc[:,3]
0       Afghanistan
1       Afghanistan
2       Afghanistan
3       Afghanistan
4       Afghanistan
           ...     
1547       Zimbabwe
1548       Zimbabwe
1549       Zimbabwe
1550       Zimbabwe
1551       Zimbabwe
Name: Country, Length: 1552, dtype: object
df.Country
0       Afghanistan
1       Afghanistan
2       Afghanistan
3       Afghanistan
4       Afghanistan
           ...     
1547       Zimbabwe
1548       Zimbabwe
1549       Zimbabwe
1550       Zimbabwe
1551       Zimbabwe
Name: Country, Length: 1552, dtype: object
df[['Country']]
Country
0 Afghanistan
1 Afghanistan
2 Afghanistan
3 Afghanistan
4 Afghanistan
... ...
1547 Zimbabwe
1548 Zimbabwe
1549 Zimbabwe
1550 Zimbabwe
1551 Zimbabwe

1552 rows × 1 columns

df[['Subject Descriptor']].head(7)
Subject Descriptor
0 Gross domestic product, constant prices
1 Gross domestic product, current prices
2 Gross domestic product per capita, constant pr...
3 Inflation, average consumer prices
4 Inflation, end of period consumer prices
5 Unemployment rate
6 General government net lending/borrowing
df[['Subject Descriptor','2020']]
Subject Descriptor 2020
0 Gross domestic product, constant prices -3.007
1 Gross domestic product, current prices 74.792
2 Gross domestic product per capita, constant pr... -4.291
3 Inflation, average consumer prices 4.711
4 Inflation, end of period consumer prices 4.5
... ... ...
1547 Inflation, average consumer prices 319.036
1548 Inflation, end of period consumer prices 154.297
1549 Unemployment rate NaN
1550 General government net lending/borrowing -4.931
1551 Current account balance -1.914

1552 rows × 2 columns

df[['Subject Descriptor','2020','Country']]
Subject Descriptor 2020 Country
0 Gross domestic product, constant prices -3.007 Afghanistan
1 Gross domestic product, current prices 74.792 Afghanistan
2 Gross domestic product per capita, constant pr... -4.291 Afghanistan
3 Inflation, average consumer prices 4.711 Afghanistan
4 Inflation, end of period consumer prices 4.5 Afghanistan
... ... ... ...
1547 Inflation, average consumer prices 319.036 Zimbabwe
1548 Inflation, end of period consumer prices 154.297 Zimbabwe
1549 Unemployment rate NaN Zimbabwe
1550 General government net lending/borrowing -4.931 Zimbabwe
1551 Current account balance -1.914 Zimbabwe

1552 rows × 3 columns

Inflation


df['Subject Descriptor'].str
  • create a new string object from the given object
  • 판다스에서 문자열 관련 함수를 사용하거나 전처리를 하기 위해서는 함수 및 명령어 앞에 str을 붙여주어야 한다.

df['Subject Descriptor'].str.contains()

지정한 문자열이 포함되어 있는지 알 수 있다.


idx_inf=df['Subject Descriptor'].str.contains('Inflation, end of')
idx_inf
0       False
1       False
2       False
3       False
4        True
        ...  
1547    False
1548     True
1549    False
1550    False
1551    False
Name: Subject Descriptor, Length: 1552, dtype: bool
sum(idx_inf)
194
df.loc[idx_inf]
WEO Country Code ISO WEO Subject Code Country Subject Descriptor Subject Notes Units Scale Country/Series-specific Notes 1980 ... 2013 2014 2015 2016 2017 2018 2019 2020 2021 Estimates Start After
4 512 AFG PCPIEPCH Afghanistan Inflation, end of period consumer prices Annual percentages of end of period consumer ... Percent change NaN Source: National Statistics Office Latest actu... NaN ... 7.241 1.487 1.131 4.588 3.041 0.755 2.773 4.5 5 2018.0
12 914 ALB PCPIEPCH Albania Inflation, end of period consumer prices Annual percentages of end of period consumer ... Percent change NaN Source: National Statistics Office Latest actu... NaN ... 1.776 0.719 1.937 2.176 1.798 1.799 1.15 2.6 2.9 2019.0
20 612 DZA PCPIEPCH Algeria Inflation, end of period consumer prices Annual percentages of end of period consumer ... Percent change NaN Source: National Statistics Office Latest actu... 14.143 ... 1.148 5.253 4.362 6.957 4.933 2.702 2.434 3.3 4 2019.0
28 614 AGO PCPIEPCH Angola Inflation, end of period consumer prices Annual percentages of end of period consumer ... Percent change NaN Source: National Statistics Office Latest actu... NaN ... 7.687 7.498 12.091 41.125 23.667 18.604 16.893 20.979 23.981 2019.0
36 311 ATG PCPIEPCH Antigua and Barbuda Inflation, end of period consumer prices Annual percentages of end of period consumer ... Percent change NaN Source: Central Bank Latest actual data: 2018 ... 17.71 ... 1.059 1.327 0.9 -1.121 2.356 1.741 1.574 1.348 2.008 2018.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1516 299 VEN PCPIEPCH Venezuela Inflation, end of period consumer prices Annual percentages of end of period consumer ... Percent change NaN Source: Central Bank Latest actual data: 2019 ... NaN ... 56.193 68.54 180.87 274.354 862.629 130,060.24 9,585.49 15,000.00 15,000.00 2019.0
1524 582 VNM PCPIEPCH Vietnam Inflation, end of period consumer prices Annual percentages of end of period consumer ... Percent change NaN Source: National Statistics Office Latest actu... NaN ... 6.036 1.839 0.597 4.737 2.597 2.983 5.237 2 4.3 2018.0
1532 474 YEM PCPIEPCH Yemen Inflation, end of period consumer prices Annual percentages of end of period consumer ... Percent change NaN Source: National Statistics Office. Central Ba... NaN ... 8.14 10.005 34 11.9 47 14.3 6.2 46 5 2017.0
1540 754 ZMB PCPIEPCH Zambia Inflation, end of period consumer prices Annual percentages of end of period consumer ... Percent change NaN Source: National Statistics Office Latest actu... NaN ... 7.14 7.862 21.112 7.469 6.085 7.9 11.7 12.7 11.4 2019.0
1548 698 ZWE PCPIEPCH Zimbabwe Inflation, end of period consumer prices Annual percentages of end of period consumer ... Percent change NaN Source: National Statistics Office Latest actu... NaN ... 0.331 -0.796 -2.473 -0.898 3.43 42.074 521.15 154.297 3 2019.0

194 rows × 52 columns

df_inf=df[idx_inf]
df_inf_2021=df_inf[['Country','2021']]
df_inf_2021
Country 2021
4 Afghanistan 5
12 Albania 2.9
20 Algeria 4
28 Angola 23.981
36 Antigua and Barbuda 2.008
... ... ...
1516 Venezuela 15,000.00
1524 Vietnam 4.3
1532 Yemen 5
1540 Zambia 11.4
1548 Zimbabwe 3

194 rows × 2 columns

  • (구) 인덱스drop
df_inf_2021.reset_index(drop=True,inplace=True)
df_inf_2021
Country 2021
0 Afghanistan 5
1 Albania 2.9
2 Algeria 4
3 Angola 23.981
4 Antigua and Barbuda 2.008
... ... ...
189 Venezuela 15,000.00
190 Vietnam 4.3
191 Yemen 5
192 Zambia 11.4
193 Zimbabwe 3

194 rows × 2 columns

df_inf_2021.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 194 entries, 0 to 193
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   Country  194 non-null    object
 1   2021     190 non-null    object
dtypes: object(2)
memory usage: 3.2+ KB
  • 2021 null data가 4개임을 알 수 있다

  • notnull() = null 아닌 것만
df_inf_2021=df_inf_2021.loc[df_inf_2021['2021'].notnull()]
df_inf_2021
Country 2021
0 Afghanistan 5
1 Albania 2.9
2 Algeria 4
3 Angola 23.981
4 Antigua and Barbuda 2.008
... ... ...
189 Venezuela 15,000.00
190 Vietnam 4.3
191 Yemen 5
192 Zambia 11.4
193 Zimbabwe 3

190 rows × 2 columns

  • null data가 빠졌음
df_inf_2021.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 190 entries, 0 to 193
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   Country  190 non-null    object
 1   2021     190 non-null    object
dtypes: object(2)
memory usage: 4.5+ KB
  • data에 큰 숫자마다 comma가 들어가 있음 $\to$ 없애주자
  • regex=True 정규표현식 사용하도록.
  • 굳이 없어도 되는 것 같은데....
df_inf_2021=df_inf_2021.replace(',','')
df_inf_2021
Country 2021
0 Afghanistan 5
1 Albania 2.9
2 Algeria 4
3 Angola 23.981
4 Antigua and Barbuda 2.008
... ... ...
189 Venezuela 15000.00
190 Vietnam 4.3
191 Yemen 5
192 Zambia 11.4
193 Zimbabwe 3

190 rows × 2 columns

  • 이제 numeric data로 바꿔보자
df_inf_2021['2021']=pd.to_numeric(df_inf_2021['2021'])
  • 확인해보자
df_inf_2021.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 190 entries, 0 to 193
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   Country  190 non-null    object 
 1   2021     190 non-null    float64
dtypes: float64(1), object(1)
memory usage: 4.5+ KB
  • float64로 잘 바뀌었음
df_inf_2021.sort_values('2021').plot.bar(x='Country')
<AxesSubplot:xlabel='Country'>
  • 무용지물의 그래프가 나왔다
df_inf_2021.sort_values('2021').iloc[0:20,:].plot.bar(x='Country')
<AxesSubplot:xlabel='Country'>

실업률

df_ur=df[df['Subject Descriptor'].str.contains('Unemployment')]
df_ur
WEO Country Code ISO WEO Subject Code Country Subject Descriptor Subject Notes Units Scale Country/Series-specific Notes 1980 ... 2013 2014 2015 2016 2017 2018 2019 2020 2021 Estimates Start After
5 512 AFG LUR Afghanistan Unemployment rate Unemployment rate can be defined by either the... Percent of total labor force NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
13 914 ALB LUR Albania Unemployment rate Unemployment rate can be defined by either the... Percent of total labor force NaN Source: National Statistics Office Latest actu... 5.028 ... 15.9 17.5 17.1 15.2 13.7 12.3 12 11.8 11.5 2019.0
21 612 DZA LUR Algeria Unemployment rate Unemployment rate can be defined by either the... Percent of total labor force NaN Source: National Statistics Office Latest actu... 15.789 ... 9.829 10.6 11.214 10.498 11.709 11.731 11.383 15.091 13.909 2019.0
29 614 AGO LUR Angola Unemployment rate Unemployment rate can be defined by either the... Percent of total labor force NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
37 311 ATG LUR Antigua and Barbuda Unemployment rate Unemployment rate can be defined by either the... Percent of total labor force NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1517 299 VEN LUR Venezuela Unemployment rate Unemployment rate can be defined by either the... Percent of total labor force NaN Source: National Statistics Office Latest actu... NaN ... 7.47 6.7 7.4 20.863 27.886 35.543 NaN NaN NaN 2011.0
1525 582 VNM LUR Vietnam Unemployment rate Unemployment rate can be defined by either the... Percent of total labor force NaN Source: Other Latest actual data: 2019 Employm... NaN ... 2.75 2.1 2.33 2.33 2.21 2.21 2.21 NaN NaN 2019.0
1533 474 YEM LUR Yemen Unemployment rate Unemployment rate can be defined by either the... Percent of total labor force NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1541 754 ZMB LUR Zambia Unemployment rate Unemployment rate can be defined by either the... Percent of total labor force NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1549 698 ZWE LUR Zimbabwe Unemployment rate Unemployment rate can be defined by either the... Percent of total labor force NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

194 rows × 52 columns

df_ur=df_ur[['Country','2021']] 
df_ur.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 194 entries, 5 to 1549
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   Country  194 non-null    object
 1   2021     100 non-null    object
dtypes: object(2)
memory usage: 4.5+ KB
  • 94개의 null data 발견
df_ur=df_ur.loc[df_ur['2021'].notnull()]
df_ur.reset_index(drop=True,inplace=True)
df_ur
Country 2021
0 Albania 11.5
1 Algeria 13.909
2 Argentina 10.084
3 Armenia 18.389
4 Aruba 7.458
... ... ...
95 Turkey 15.567
96 Ukraine 9.318
97 United Kingdom 4.375
98 United States 9.135
99 Uruguay 8.098

100 rows × 2 columns

  • null data가 잘 빠졌음
  • 이제 numeric으로 변형해주자
df_ur['2021']=pd.to_numeric(df_ur['2021'])
df_ur.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   Country  100 non-null    object 
 1   2021     100 non-null    float64
dtypes: float64(1), object(1)
memory usage: 1.7+ KB
df_ur.plot.bar(x='Country')
<AxesSubplot:xlabel='Country'>
df_ur.sort_values('2021',ascending=False).plot.bar(x='Country',
                                                  title='Unemployment Rate',figsize=(15,5))
<AxesSubplot:title={'center':'Unemployment Rate'}, xlabel='Country'>
  • 관심있는 나라만 따로 표시해보자
df_ur_np=df_ur.sort_values('2021',ascending=False).to_numpy()
  • plt.plot()에서 figsize를 기본 크기를 지정할 수도 있지만, 매번 그릴 때마다 크기를 지정해야 하는 불편함이 있다. 특히 시계열 차트를 많이 그리는 경우 시간에 따른 변화를 보기 위해 가로로 긴 차트를 그리는 경우가 더 많다. 따라서 rcParams를 이용하여 차트 그림(figure)의 기본 설정을 지정할 수 있다.
plt.rcParams['figure.figsize']=(20,3)
plt.xticks(rotation='vertical') # x축명 세로로?
plt.bar(df_ur_np[:,0],df_ur_np[:,1]) # x축, y축
plt.title('2021 Unemployment Rate - IMF World Economics Outlook Database, April 2020')
idx=np.where(df_ur_np=='Korea')
plt.bar(df_ur_np[idx[0],0],df_ur_np[idx[0],1],label='Korea')
idx=np.where(df_ur_np=='United States')
plt.bar(df_ur_np[idx[0],0],df_ur_np[idx[0],1],label='United States')
idx=np.where(df_ur_np=='China')
plt.bar(df_ur_np[idx[0],0],df_ur_np[idx[0],1],label='China')
plt.legend()
<matplotlib.legend.Legend at 0x1a08f3edeb0>

TIP

df.head(3)
WEO Country Code ISO WEO Subject Code Country Subject Descriptor Subject Notes Units Scale Country/Series-specific Notes 1980 ... 2013 2014 2015 2016 2017 2018 2019 2020 2021 Estimates Start After
0 512 AFG NGDP_RPCH Afghanistan Gross domestic product, constant prices Annual percentages of constant price GDP are y... Percent change NaN Source: National Statistics Office Latest actu... NaN ... 5.683 2.697 0.988 2.164 2.889 2.664 3.037 -3.007 4.495 2018.0
1 512 AFG PPPGDP Afghanistan Gross domestic product, current prices These data form the basis for the country weig... Purchasing power parity; international dollars Billions Source: National Statistics Office Latest actu... NaN ... 60.181 62.948 64.231 66.301 69.501 73.091 76.624 74.792 79.678 2018.0
2 512 AFG NGDPRPPPPCPCH Afghanistan Gross domestic product per capita, constant pr... GDP is expressed in constant international dol... Purchasing power parity; percent change NaN Source: National Statistics Office Latest actu... NaN ... 2.236 -0.521 -1.941 -0.547 0.358 1.268 1.654 -4.291 3.063 2018.0

3 rows × 52 columns

len(df.columns)
52
idx = list(range(0,52))
idx[3]=0
idx[0]=3
print(idx)
[3, 1, 2, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51]
df.iloc[:,idx]
Country ISO WEO Subject Code WEO Country Code Subject Descriptor Subject Notes Units Scale Country/Series-specific Notes 1980 ... 2013 2014 2015 2016 2017 2018 2019 2020 2021 Estimates Start After
0 Afghanistan AFG NGDP_RPCH 512 Gross domestic product, constant prices Annual percentages of constant price GDP are y... Percent change NaN Source: National Statistics Office Latest actu... NaN ... 5.683 2.697 0.988 2.164 2.889 2.664 3.037 -3.007 4.495 2018.0
1 Afghanistan AFG PPPGDP 512 Gross domestic product, current prices These data form the basis for the country weig... Purchasing power parity; international dollars Billions Source: National Statistics Office Latest actu... NaN ... 60.181 62.948 64.231 66.301 69.501 73.091 76.624 74.792 79.678 2018.0
2 Afghanistan AFG NGDPRPPPPCPCH 512 Gross domestic product per capita, constant pr... GDP is expressed in constant international dol... Purchasing power parity; percent change NaN Source: National Statistics Office Latest actu... NaN ... 2.236 -0.521 -1.941 -0.547 0.358 1.268 1.654 -4.291 3.063 2018.0
3 Afghanistan AFG PCPIPCH 512 Inflation, average consumer prices Annual percentages of average consumer prices ... Percent change NaN Source: National Statistics Office Latest actu... NaN ... 7.386 4.674 -0.662 4.384 4.976 0.626 2.302 4.711 4.451 2018.0
4 Afghanistan AFG PCPIEPCH 512 Inflation, end of period consumer prices Annual percentages of end of period consumer ... Percent change NaN Source: National Statistics Office Latest actu... NaN ... 7.241 1.487 1.131 4.588 3.041 0.755 2.773 4.5 5 2018.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1547 Zimbabwe ZWE PCPIPCH 698 Inflation, average consumer prices Annual percentages of average consumer prices ... Percent change NaN Source: National Statistics Office Latest actu... NaN ... 1.632 -0.213 -2.41 -1.558 0.907 10.607 255.292 319.036 3.7 2019.0
1548 Zimbabwe ZWE PCPIEPCH 698 Inflation, end of period consumer prices Annual percentages of end of period consumer ... Percent change NaN Source: National Statistics Office Latest actu... NaN ... 0.331 -0.796 -2.473 -0.898 3.43 42.074 521.15 154.297 3 2019.0
1549 Zimbabwe ZWE LUR 698 Unemployment rate Unemployment rate can be defined by either the... Percent of total labor force NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1550 Zimbabwe ZWE GGXCNL_NGDP 698 General government net lending/borrowing Net lending (+)/ borrowing (?) is calculated a... Percent of GDP NaN Source: Ministry of Finance or Treasury Latest... NaN ... -0.621 -0.421 -1.418 -6.242 -8.113 -4.499 -2.575 -4.931 -1.518 2018.0
1551 Zimbabwe ZWE BCA_NGDPD 698 Current account balance Current account is all transactions other than... Percent of GDP NaN Source: Reserve Bank of Zimbabwe and Ministry ... NaN ... -13.23 -11.563 -7.617 -3.58 -1.299 -5.896 1.133 -1.914 -1.894 2018.0

1552 rows × 52 columns

  • 이렇게 원하는 순서로 열을 배열해보았음

Grouping

df_ur['Criteria']=0
df_ur.loc[ df_ur['2021'] < 5 , 'Criteria' ]='Low'
df_ur.loc[ (df_ur['2021'] >= 5) & (df_ur['2021'] < 10) , 'Criteria' ]='Medium' # &로 묶는 거 주의
df_ur.loc[ df_ur['2021'] >=10 , 'Criteria' ]='High'
df_ur.groupby(['Criteria']).mean()
2021
Criteria
High 14.767680
Low 3.547000
Medium 7.100353
df_ur.groupby(['Criteria']).mean().sort_values('2021')
2021
Criteria
Low 3.547000
Medium 7.100353
High 14.767680
df_ur.groupby(['Criteria']).count()
Country 2021
Criteria
High 25 25
Low 24 24
Medium 51 51

  • 용량이 큰 csv 파일을 읽고 처리할 수 있는 방법
df_new=pd.DataFrame(columns=df.columns)
df_new
WEO Country Code ISO WEO Subject Code Country Subject Descriptor Subject Notes Units Scale Country/Series-specific Notes 1980 ... 2013 2014 2015 2016 2017 2018 2019 2020 2021 Estimates Start After

0 rows × 52 columns

for df_chunk in pd.read_csv('WEOApr2020all.csv',chunksize=5):
    temp=df_chunk.loc[df_chunk['Subject Descriptor']=='Unemployment rate']
    df_new = pd.concat([df_new,temp])
df_new
WEO Country Code ISO WEO Subject Code Country Subject Descriptor Subject Notes Units Scale Country/Series-specific Notes 1980 ... 2013 2014 2015 2016 2017 2018 2019 2020 2021 Estimates Start After
5 512 AFG LUR Afghanistan Unemployment rate Unemployment rate can be defined by either the... Percent of total labor force NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
13 914 ALB LUR Albania Unemployment rate Unemployment rate can be defined by either the... Percent of total labor force NaN Source: National Statistics Office Latest actu... 5.028 ... 15.9 17.5 17.1 15.2 13.7 12.3 12.0 11.8 11.5 2019
21 612 DZA LUR Algeria Unemployment rate Unemployment rate can be defined by either the... Percent of total labor force NaN Source: National Statistics Office Latest actu... 15.789 ... 9.829 10.6 11.214 10.498 11.709 11.731 11.383 15.091 13.909 2019
29 614 AGO LUR Angola Unemployment rate Unemployment rate can be defined by either the... Percent of total labor force NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
37 311 ATG LUR Antigua and Barbuda Unemployment rate Unemployment rate can be defined by either the... Percent of total labor force NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1517 299 VEN LUR Venezuela Unemployment rate Unemployment rate can be defined by either the... Percent of total labor force NaN Source: National Statistics Office Latest actu... NaN ... 7.47 6.7 7.4 20.863 27.886 35.543 NaN NaN NaN 2011
1525 582 VNM LUR Vietnam Unemployment rate Unemployment rate can be defined by either the... Percent of total labor force NaN Source: Other Latest actual data: 2019 Employm... NaN ... 2.75 2.1 2.33 2.33 2.21 2.21 2.21 NaN NaN 2019
1533 474 YEM LUR Yemen Unemployment rate Unemployment rate can be defined by either the... Percent of total labor force NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1541 754 ZMB LUR Zambia Unemployment rate Unemployment rate can be defined by either the... Percent of total labor force NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1549 698 ZWE LUR Zimbabwe Unemployment rate Unemployment rate can be defined by either the... Percent of total labor force NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

194 rows × 52 columns


얼굴 인식

import cv2
img=cv2.imread('spurs_pic.jpg')
plt.imshow(img)
# BGR로 인식하기 때문에 RGB로 바꿔야 함
<matplotlib.image.AxesImage at 0x1a08fb6b4c0>
rgb=cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
plt.imshow(rgb)
<matplotlib.image.AxesImage at 0x1a08fbd5640>
gray=cv2.cvtColor(rgb, cv2.COLOR_RGB2GRAY)
plt.imshow(gray,cmap='gray')
<matplotlib.image.AxesImage at 0x1a0905e15b0>
classifier=cv2.CascadeClassifier('.\\haarcascades\\haarcascade_frontalface_default.xml')

분류할 데이터가 정해졌음

rects = classifier.detectMultiScale(gray,scaleFactor=1.2,minNeighbors=5)
print('Face found:{} '.format(len(rects)))
Face found:11 
for x,y,w,h in rects:
    cv2.rectangle(rgb,(x,y),(x+w,y+h),(0,255,0),2)
plt.imshow(rgb)
<matplotlib.image.AxesImage at 0x1a090634430>
#plt.imshow(rgb)
bgr=cv2.cvtColor(rgb,cv2.COLOR_RGB2BGR)
cv2.imwrite('spurs_pic_faces.jpg',bgr)
# 저장했음
True