파이썬 활용/전처리 모음집
인구통계분석_학교 수업 때 배운 부분
as_형준
2022. 10. 25. 17:22
In [ ]:
!sudo apt-get install -y font-nanum
Reading package lists... Done
Building dependency tree
Reading state information... Done
E: Unable to locate package font-nanum
In [ ]:
!sudo fc-cache -fv
/usr/share/fonts: caching, new cache contents: 0 fonts, 1 dirs
/usr/share/fonts/truetype: caching, new cache contents: 0 fonts, 2 dirs
/usr/share/fonts/truetype/humor-sans: caching, new cache contents: 1 fonts, 0 dirs
/usr/share/fonts/truetype/liberation: caching, new cache contents: 16 fonts, 0 dirs
/usr/local/share/fonts: caching, new cache contents: 0 fonts, 0 dirs
/root/.local/share/fonts: skipping, no such directory
/root/.fonts: skipping, no such directory
/var/cache/fontconfig: cleaning cache directory
/root/.cache/fontconfig: not cleaning non-existent cache directory
/root/.fontconfig: not cleaning non-existent cache directory
fc-cache: succeeded
In [ ]:
!rm ~/.cache/matplotlib -rf
In [ ]:
from google.colab import drive
drive.mount('/drive')
Mounted at /drive
In [ ]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
In [ ]:
df = pd.read_excel('ingu.xlsx',thousands=',')
df.dtypes
Out[ ]:
Unnamed: 0 object
2012 float64
2013 float64
2014 float64
2015 float64
2016 float64
2017 float64
2018 float64
2019 float64
2020 float64
2021 object
dtype: object
In [ ]:
df= df.set_index('Unnamed: 0')
df
Out[ ]:
2012 | 2013 | 2014 | 2015 | 2016 | 2017 | 2018 | 2019 | 2020 | 2021 | |
---|---|---|---|---|---|---|---|---|---|---|
Unnamed: 0 | ||||||||||
출생아수(명) | 484550.000 | 436455.000 | 435435.000 | 438420.000 | 406243.000 | 357771.000 | 326822.000 | 302676.000 | 272337.000 | 260562 |
조출생률(인구1천명당 명) | 9.600 | 8.600 | 8.600 | 8.600 | 7.900 | 7.000 | 6.400 | 5.900 | 5.300 | 5.1 |
합계출산율(가임여성1명당 명) | 1.297 | 1.187 | 1.205 | 1.239 | 1.172 | 1.052 | 0.977 | 0.918 | 0.837 | 0.808 |
사망자수(명) | 267221.000 | 266257.000 | 267692.000 | 275895.000 | 280827.000 | 285534.000 | 298820.000 | 295110.000 | 304948.000 | - |
조사망률(인구1천명당 명) | 5.300 | 5.300 | 5.300 | 5.400 | 5.500 | 5.600 | 5.800 | 5.700 | 5.900 | - |
전체(년) | 80.900 | 81.400 | 81.800 | 82.100 | 82.400 | 82.700 | 82.700 | 83.300 | 83.500 | - |
남자(년) | 77.600 | 78.100 | 78.600 | 79.000 | 79.300 | 79.700 | 79.700 | 80.300 | 80.500 | - |
여자(년) | 84.200 | 84.600 | 85.000 | 85.200 | 85.400 | 85.700 | 85.700 | 86.300 | 86.500 | - |
In [ ]:
df.index.name= 'object'
df
Out[ ]:
2012 | 2013 | 2014 | 2015 | 2016 | 2017 | 2018 | 2019 | 2020 | 2021 | |
---|---|---|---|---|---|---|---|---|---|---|
object | ||||||||||
출생아수(명) | 484550.000 | 436455.000 | 435435.000 | 438420.000 | 406243.000 | 357771.000 | 326822.000 | 302676.000 | 272337.000 | 260562 |
조출생률(인구1천명당 명) | 9.600 | 8.600 | 8.600 | 8.600 | 7.900 | 7.000 | 6.400 | 5.900 | 5.300 | 5.1 |
합계출산율(가임여성1명당 명) | 1.297 | 1.187 | 1.205 | 1.239 | 1.172 | 1.052 | 0.977 | 0.918 | 0.837 | 0.808 |
사망자수(명) | 267221.000 | 266257.000 | 267692.000 | 275895.000 | 280827.000 | 285534.000 | 298820.000 | 295110.000 | 304948.000 | - |
조사망률(인구1천명당 명) | 5.300 | 5.300 | 5.300 | 5.400 | 5.500 | 5.600 | 5.800 | 5.700 | 5.900 | - |
전체(년) | 80.900 | 81.400 | 81.800 | 82.100 | 82.400 | 82.700 | 82.700 | 83.300 | 83.500 | - |
남자(년) | 77.600 | 78.100 | 78.600 | 79.000 | 79.300 | 79.700 | 79.700 | 80.300 | 80.500 | - |
여자(년) | 84.200 | 84.600 | 85.000 | 85.200 | 85.400 | 85.700 | 85.700 | 86.300 | 86.500 | - |
In [ ]:
df.index=['출생아수','조출생률','합계출산율','사망자수','조사망률','전체','남자','여자']
df
Out[ ]:
2012 | 2013 | 2014 | 2015 | 2016 | 2017 | 2018 | 2019 | 2020 | 2021 | |
---|---|---|---|---|---|---|---|---|---|---|
출생아수 | 484550.000 | 436455.000 | 435435.000 | 438420.000 | 406243.000 | 357771.000 | 326822.000 | 302676.000 | 272337.000 | 260562 |
조출생률 | 9.600 | 8.600 | 8.600 | 8.600 | 7.900 | 7.000 | 6.400 | 5.900 | 5.300 | 5.1 |
합계출산율 | 1.297 | 1.187 | 1.205 | 1.239 | 1.172 | 1.052 | 0.977 | 0.918 | 0.837 | 0.808 |
사망자수 | 267221.000 | 266257.000 | 267692.000 | 275895.000 | 280827.000 | 285534.000 | 298820.000 | 295110.000 | 304948.000 | - |
조사망률 | 5.300 | 5.300 | 5.300 | 5.400 | 5.500 | 5.600 | 5.800 | 5.700 | 5.900 | - |
전체 | 80.900 | 81.400 | 81.800 | 82.100 | 82.400 | 82.700 | 82.700 | 83.300 | 83.500 | - |
남자 | 77.600 | 78.100 | 78.600 | 79.000 | 79.300 | 79.700 | 79.700 | 80.300 | 80.500 | - |
여자 | 84.200 | 84.600 | 85.000 | 85.200 | 85.400 | 85.700 | 85.700 | 86.300 | 86.500 | - |
In [ ]:
df.loc['출생아수'].values
Out[ ]:
array(['484,550', '436,455', '435,435', '438,420', '406,243', '357,771',
'326,822', '302,676', '272,337', '260,562'], dtype=object)
In [ ]:
year = df.columns
y1 = list(map(int, df.loc['출생아수'].values))
plt.bar(year,y1)
plt.show()
In [ ]:
year = df.columns
y1 = list(map(int, df.loc['출생아수'].values))
y2 = df.loc['조출생률'].values
fig, baseAxis = plt.subplots()
baseAxis.bar(year, y1)
subAxis = baseAxis.twinx()
subAxis.plot(year, y2, 'y')
plt.show()
In [93]:
year = df.columns
y1 = list(map(int, df.loc['출생아수'].values))
y2 = df.loc['조출생률'].values
fig, baseAxis = plt.subplots()
plt.figure(figsize=(6,8))
plt.rcParams['figsize'](6,8)
baseAxis.bar(year, y1, color= 'red')
baseAxis.set_xlabel('year')
subAxis = baseAxis.twinx()
subAxis.plot(year, y2, 'y-o')
plt.show()
<Figure size 432x576 with 0 Axes>
In [156]:
df.columns[:-1]
y3 = df.iloc[6,:-1].values
y4 = df.iloc[7,:-1].values
print(len(y3))
print(len(y5))
for i in range(2012,2020):
print(i)
9
9
2012
2013
2014
2015
2016
2017
2018
2019
In [166]:
from pandas.core.internals.managers import BaseBlockManager
import matplotlib.pyplot as plt
#plt.rcParams['font.family']= 'NanumGothic'
plt.rc('font',family='NanumGothic')
year = np.array([y for y in range(2012,2021)])
y3 = df.iloc[6,:-1].values
y4 = df.iloc[7,:-1].values
y5 = df.iloc[4,:-1].values
fig, baseAxis = plt.subplots()
baseAxis.bar(year-0.2, y3, width=0.4, label='men')
baseAxis.bar(year+0.2, y4, width=0.4,label='women')
baseAxis.set_xlabel('가나')
subAx = baseAxis.twinx()
baseAxis.legend()
subAx.plot(year, y5,'-o', color='darkorange')
plt.show()
/usr/local/lib/python3.7/dist-packages/matplotlib/backends/backend_agg.py:214: RuntimeWarning: Glyph 44032 missing from current font.
font.set_text(s, 0.0, flags=flags)
/usr/local/lib/python3.7/dist-packages/matplotlib/backends/backend_agg.py:214: RuntimeWarning: Glyph 45208 missing from current font.
font.set_text(s, 0.0, flags=flags)
/usr/local/lib/python3.7/dist-packages/matplotlib/backends/backend_agg.py:183: RuntimeWarning: Glyph 44032 missing from current font.
font.set_text(s, 0, flags=flags)
/usr/local/lib/python3.7/dist-packages/matplotlib/backends/backend_agg.py:183: RuntimeWarning: Glyph 45208 missing from current font.
font.set_text(s, 0, flags=flags)
In [168]:
import matplotlib.pyplot as plt
import matplotlib
matplotlib.matplotlib_fname()
Out[168]:
'/usr/local/lib/python3.7/dist-packages/matplotlib/mpl-data/matplotlibrc'