NUMPY를 이용한 지역별 인구분포도

as_형준 2022. 10. 25. 15:52

In [2]:

import numpy as np
import csv
import pandas as pd

넘파이를 이용한 지역인구¶

In [3]:

f = open('age.csv')
data = csv.reader(f)
next(data)

Out[3]:

['행정구역',
 '2019년02월_계_총인구수',
 '2019년02월_계_연령구간인구수',
 '2019년02월_계_0세',
 '2019년02월_계_1세',
 '2019년02월_계_2세',
 '2019년02월_계_3세',
 '2019년02월_계_4세',
 '2019년02월_계_5세',
 '2019년02월_계_6세',
 '2019년02월_계_7세',
 '2019년02월_계_8세',
 '2019년02월_계_9세',
 '2019년02월_계_10세',
 '2019년02월_계_11세',
 '2019년02월_계_12세',
 '2019년02월_계_13세',
 '2019년02월_계_14세',
 '2019년02월_계_15세',
 '2019년02월_계_16세',
 '2019년02월_계_17세',
 '2019년02월_계_18세',
 '2019년02월_계_19세',
 '2019년02월_계_20세',
 '2019년02월_계_21세',
 '2019년02월_계_22세',
 '2019년02월_계_23세',
 '2019년02월_계_24세',
 '2019년02월_계_25세',
 '2019년02월_계_26세',
 '2019년02월_계_27세',
 '2019년02월_계_28세',
 '2019년02월_계_29세',
 '2019년02월_계_30세',
 '2019년02월_계_31세',
 '2019년02월_계_32세',
 '2019년02월_계_33세',
 '2019년02월_계_34세',
 '2019년02월_계_35세',
 '2019년02월_계_36세',
 '2019년02월_계_37세',
 '2019년02월_계_38세',
 '2019년02월_계_39세',
 '2019년02월_계_40세',
 '2019년02월_계_41세',
 '2019년02월_계_42세',
 '2019년02월_계_43세',
 '2019년02월_계_44세',
 '2019년02월_계_45세',
 '2019년02월_계_46세',
 '2019년02월_계_47세',
 '2019년02월_계_48세',
 '2019년02월_계_49세',
 '2019년02월_계_50세',
 '2019년02월_계_51세',
 '2019년02월_계_52세',
 '2019년02월_계_53세',
 '2019년02월_계_54세',
 '2019년02월_계_55세',
 '2019년02월_계_56세',
 '2019년02월_계_57세',
 '2019년02월_계_58세',
 '2019년02월_계_59세',
 '2019년02월_계_60세',
 '2019년02월_계_61세',
 '2019년02월_계_62세',
 '2019년02월_계_63세',
 '2019년02월_계_64세',
 '2019년02월_계_65세',
 '2019년02월_계_66세',
 '2019년02월_계_67세',
 '2019년02월_계_68세',
 '2019년02월_계_69세',
 '2019년02월_계_70세',
 '2019년02월_계_71세',
 '2019년02월_계_72세',
 '2019년02월_계_73세',
 '2019년02월_계_74세',
 '2019년02월_계_75세',
 '2019년02월_계_76세',
 '2019년02월_계_77세',
 '2019년02월_계_78세',
 '2019년02월_계_79세',
 '2019년02월_계_80세',
 '2019년02월_계_81세',
 '2019년02월_계_82세',
 '2019년02월_계_83세',
 '2019년02월_계_84세',
 '2019년02월_계_85세',
 '2019년02월_계_86세',
 '2019년02월_계_87세',
 '2019년02월_계_88세',
 '2019년02월_계_89세',
 '2019년02월_계_90세',
 '2019년02월_계_91세',
 '2019년02월_계_92세',
 '2019년02월_계_93세',
 '2019년02월_계_94세',
 '2019년02월_계_95세',
 '2019년02월_계_96세',
 '2019년02월_계_97세',
 '2019년02월_계_98세',
 '2019년02월_계_99세',
 '2019년02월_계_100세 이상']

In [ ]:

지역 나이대별 인구수 검색

In [9]:

f = open('age.csv')
data = csv.reader(f)
next(data)
home=[]
name=input('지역이나 읍면동을 검색하시오>>>')
for row in data:
    if name in row[0]:
        for i in row[3:]:
            home.append(int(i))
print(home)

지역이나 읍면동을 검색하시오>>>송하동
[128, 136, 159, 171, 150, 159, 183, 179, 170, 169, 165, 196, 155, 155, 155, 163, 152, 213, 198, 213, 197, 230, 204, 205, 204, 193, 186, 175, 167, 197, 163, 195, 203, 169, 185, 229, 251, 238, 248, 216, 243, 236, 246, 305, 256, 294, 255, 267, 271, 288, 283, 254, 278, 240, 253, 258, 294, 269, 339, 284, 219, 214, 231, 172, 183, 137, 133, 142, 118, 102, 134, 85, 75, 49, 103, 73, 104, 80, 65, 71, 59, 60, 67, 56, 56, 34, 37, 27, 25, 15, 20, 7, 6, 11, 7, 5, 4, 3, 3, 0, 5]

In [ ]:

지역 나이대별 인구수 그래프화

In [11]:

import numpy as np
import csv
f =open('age.csv')
data = csv.reader(f)
next(data)
name = input('인구 구조가 알고 싶은 지역의 이름(읍면동 단위)을 입력해주세요 : ')
for row in data :
    if name in row[0] :
        home = np.array(row[3:], dtype = int)
        
        
import matplotlib.pyplot as plt
plt.style.use('ggplot')
plt.figure(figsize = (10,5), dpi=300)
plt.rc('font', family ='Malgun Gothic')
plt.title(name +' 지역의 인구 구조')
plt.plot(home)

인구 구조가 알고 싶은 지역의 이름(읍면동 단위)을 입력해주세요 : 송하동

Out[11]:

[<matplotlib.lines.Line2D at 0x17cc73fdd60>]

In [ ]:

In [33]:

import numpy as np
import csv
f= open('age.csv', 'r', encoding='cp949')
data = csv.reader(f,delimiter=',')
next(data)
data = list(data)
name = input('인구 구조가 알고 싶은 지역의 이름(읍면동 단위)을 입력해주세요 : ')
for row in data :
    if name in row[0] :     
        home = np.array(row[3:], dtype =int) / int(row[2].replace(',', ''))
        
        
import matplotlib.pyplot as plt
plt.style.use('ggplot')
plt.figure(figsize = (10,5), dpi=300)
plt.rc('font', family ='Malgun Gothic')
plt.title(name +' 지역의 인구 구조')
plt.plot(home)
plt.show()

인구 구조가 알고 싶은 지역의 이름(읍면동 단위)을 입력해주세요 : 송하동

In [ ]:

In [35]:

import numpy as np
import csv
f =open('age.csv')
data = csv.reader(f) # ➊
next(data)
name = input('인구 구조가 알고 싶은 지역의 이름(읍면동 단위)을 입력해주세요 : ')
for row in data : # ➋for row in data : # ➍

    if name in row[0] :
        home = np.array(row[3:], dtype =int) / int(row[2].replace(',', '')) # ➌
    print(row) # ➎

인구 구조가 알고 싶은 지역의 이름(읍면동 단위)을 입력해주세요 : 송하동

In [138]:

f =open('age.csv',thousands = ',')
data = csv.reader(f)
next(data)
data = list(data) #추가
name = input('인구 구조가 알고 싶은 지역의 이름(읍면동 단위)을 입력해주세요 : ')
for row in data :
    row[]
    if name in row[0] :
        home = np.array(row[3:], dtype =int) / int(row[2])
for row in data :
    away = np.array(row[3:], dtype = int) / int(row[2])
    print(home - away)

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Input In [138], in <cell line: 1>()
----> 1 f =open('age.csv',thousands = ',')
      2 data = csv.reader(f)
      3 next(data)

TypeError: 'thousands' is an invalid keyword argument for open()

In [ ]:

In [40]:

f =open('age.csv')
data = csv.reader(f)
next(data)
data = list(data)
#2. 궁금한 지역의 이름을 입력받는다.
name = input('인구 구조가 알고 싶은 지역의 이름(읍면동 단위)을 입력해주세요 : ')
mn =1
result_name =''
result =0
#3. 궁금한 지역의 인구 구조를 저장한다.
for row in data :
    if name in row[0] :
        home = np.array(row[3:], dtype =int) /int(row[2].replace(',',''))
#4. 궁금한 지역의 인구 구조와 가장 비슷한 인구 구조를 가진 지역을 찾는다.
for row in data :
    away = np.array(row[3:], dtype =int) /int(row[2].replace(',',''))
    s = np.sum((home - away) **2)
    if s < mn and name not in row[0] :
        mn = s
        result_name = row[0]
        result = away
#5. 궁금한 지역의 인구 구조와 가장 비슷한 곳의 인구 구조를 시각화한다.
import matplotlib.pyplot as plt
plt.style.use('ggplot')
plt.figure(figsize = (10,5), dpi=300)
plt.rc('font', family ='Malgun Gothic')
plt.title(name +' 지역과 가장 비슷한 인구 구조를 가진 지역')
plt.plot(home, label = name)
plt.plot(result, label = result_name)
plt.legend()
plt.show()

인구 구조가 알고 싶은 지역의 이름(읍면동 단위)을 입력해주세요 : 송하동

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Input In [40], in <cell line: 15>()
     14 #4. 궁금한 지역의 인구 구조와 가장 비슷한 인구 구조를 가진 지역을 찾는다.
     15 for row in data :
---> 16     away = np.array(row[3:], dtype =int) /int(row[2].replace(',',''))
     17     s = np.sum((home - away) **2)
     18     if s < mn and name not in row[0] :

ValueError: invalid literal for int() with base 10: '53,738'

판다스로 분석하기¶

In [17]:

import pandas as pd
df= pd.read_csv('age.csv', encoding='cp949', thousands = ',')
df.head()

Out[17]:

	행정구역	2019년02월_계_총인구수	2019년02월_계_연령구간인구수	2019년02월_계_0세	2019년02월_계_1세	2019년02월_계_2세	2019년02월_계_3세	2019년02월_계_4세	2019년02월_계_5세	2019년02월_계_6세	...	2019년02월_계_91세	2019년02월_계_92세	2019년02월_계_93세	2019년02월_계_94세	2019년02월_계_95세	2019년02월_계_96세	2019년02월_계_97세	2019년02월_계_98세	2019년02월_계_99세	2019년02월_계_100세 이상
0	서울특별시 (1100000000)	9770638	9770638	53738	59599	65815	70726	70037	69000	76377	...	7199	5374	3972	2927	2813	2260	1565	1180	880	5932
1	서울특별시 종로구 (1111000000)	152880	152880	593	683	775	874	852	863	1023	...	163	132	112	79	69	81	53	40	39	236
2	서울특별시 종로구 청운효자동(1111051500)	13010	13010	66	79	72	98	98	94	108	...	9	8	14	6	7	9	4	2	2	16
3	서울특별시 종로구 사직동(1111053000)	9584	9584	43	50	64	67	67	70	94	...	14	5	12	6	5	8	2	5	4	17
4	서울특별시 종로구 삼청동(1111054000)	2820	2820	9	15	3	21	11	13	9	...	6	6	1	3	0	3	1	0	2	7

5 rows × 104 columns

In [18]:

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3841 entries, 0 to 3840
Columns: 104 entries, 행정구역 to 2019년02월_계_100세 이상
dtypes: int64(103), object(1)
memory usage: 3.0+ MB

In [19]:

ity_data = df.set_index('행정구역')
ity_data.head()

Out[19]:

	2019년02월_계_총인구수	2019년02월_계_연령구간인구수	2019년02월_계_0세	2019년02월_계_1세	2019년02월_계_2세	2019년02월_계_3세	2019년02월_계_4세	2019년02월_계_5세	2019년02월_계_6세	2019년02월_계_7세	...	2019년02월_계_91세	2019년02월_계_92세	2019년02월_계_93세	2019년02월_계_94세	2019년02월_계_95세	2019년02월_계_96세	2019년02월_계_97세	2019년02월_계_98세	2019년02월_계_99세	2019년02월_계_100세 이상
행정구역
서울특별시 (1100000000)	9770638	9770638	53738	59599	65815	70726	70037	69000	76377	73745	...	7199	5374	3972	2927	2813	2260	1565	1180	880	5932
서울특별시 종로구 (1111000000)	152880	152880	593	683	775	874	852	863	1023	904	...	163	132	112	79	69	81	53	40	39	236
서울특별시 종로구 청운효자동(1111051500)	13010	13010	66	79	72	98	98	94	108	109	...	9	8	14	6	7	9	4	2	2	16
서울특별시 종로구 사직동(1111053000)	9584	9584	43	50	64	67	67	70	94	65	...	14	5	12	6	5	8	2	5	4	17
서울특별시 종로구 삼청동(1111054000)	2820	2820	9	15	3	21	11	13	9	16	...	6	6	1	3	0	3	1	0	2	7

5 rows × 103 columns

In [20]:

r = ity_data.div(ity_data['2019년02월_계_총인구수'],axis=0)
r

Out[20]:

	2019년02월_계_총인구수	2019년02월_계_연령구간인구수	2019년02월_계_0세	2019년02월_계_1세	2019년02월_계_2세	2019년02월_계_3세	2019년02월_계_4세	2019년02월_계_5세	2019년02월_계_6세	2019년02월_계_7세	...	2019년02월_계_91세	2019년02월_계_92세	2019년02월_계_93세	2019년02월_계_94세	2019년02월_계_95세	2019년02월_계_96세	2019년02월_계_97세	2019년02월_계_98세	2019년02월_계_99세	2019년02월_계_100세 이상
행정구역
서울특별시 (1100000000)	1.0	1.0	0.005500	0.006100	0.006736	0.007239	0.007168	0.007062	0.007817	0.007548	...	0.000737	0.000550	0.000407	0.000300	0.000288	0.000231	0.000160	0.000121	0.000090	0.000607
서울특별시 종로구 (1111000000)	1.0	1.0	0.003879	0.004468	0.005069	0.005717	0.005573	0.005645	0.006692	0.005913	...	0.001066	0.000863	0.000733	0.000517	0.000451	0.000530	0.000347	0.000262	0.000255	0.001544
서울특별시 종로구 청운효자동(1111051500)	1.0	1.0	0.005073	0.006072	0.005534	0.007533	0.007533	0.007225	0.008301	0.008378	...	0.000692	0.000615	0.001076	0.000461	0.000538	0.000692	0.000307	0.000154	0.000154	0.001230
서울특별시 종로구 사직동(1111053000)	1.0	1.0	0.004487	0.005217	0.006678	0.006991	0.006991	0.007304	0.009808	0.006782	...	0.001461	0.000522	0.001252	0.000626	0.000522	0.000835	0.000209	0.000522	0.000417	0.001774
서울특별시 종로구 삼청동(1111054000)	1.0	1.0	0.003191	0.005319	0.001064	0.007447	0.003901	0.004610	0.003191	0.005674	...	0.002128	0.002128	0.000355	0.001064	0.000000	0.001064	0.000355	0.000000	0.000709	0.002482
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
제주특별자치도 서귀포시 서홍동(5013058000)	1.0	1.0	0.007429	0.008381	0.010952	0.011048	0.010095	0.013429	0.014667	0.015333	...	0.000381	0.000095	0.000286	0.000095	0.000000	0.000476	0.000381	0.000000	0.000000	0.000190
제주특별자치도 서귀포시 대륜동(5013059000)	1.0	1.0	0.009253	0.010444	0.012617	0.012127	0.012057	0.011496	0.009393	0.010515	...	0.001192	0.000421	0.000280	0.000070	0.000771	0.000491	0.000140	0.000210	0.000210	0.000421
제주특별자치도 서귀포시 대천동(5013060000)	1.0	1.0	0.008595	0.013297	0.015354	0.014693	0.012930	0.013003	0.012122	0.012122	...	0.001102	0.000367	0.000294	0.000367	0.000441	0.000220	0.000147	0.000147	0.000073	0.000147
제주특별자치도 서귀포시 중문동(5013061000)	1.0	1.0	0.006381	0.004943	0.007010	0.009437	0.008538	0.009077	0.011144	0.010425	...	0.000899	0.000359	0.000449	0.000449	0.000539	0.000539	0.000180	0.000000	0.000270	0.000539
제주특별자치도 서귀포시 예래동(5013062000)	1.0	1.0	0.008483	0.004847	0.006302	0.007513	0.006544	0.004363	0.009210	0.007513	...	0.001697	0.001454	0.000969	0.000727	0.001697	0.000242	0.000485	0.000242	0.000242	0.001212

3841 rows × 103 columns

In [ ]:

In [21]:

import pandas as pd
df= pd.read_csv('age.csv', encoding='cp949')
df.head()

Out[21]:

	행정구역	2019년02월_계_총인구수	2019년02월_계_연령구간인구수	2019년02월_계_0세	2019년02월_계_1세	2019년02월_계_2세	2019년02월_계_3세	2019년02월_계_4세	2019년02월_계_5세	2019년02월_계_6세	...	2019년02월_계_91세	2019년02월_계_92세	2019년02월_계_93세	2019년02월_계_94세	2019년02월_계_95세	2019년02월_계_96세	2019년02월_계_97세	2019년02월_계_98세	2019년02월_계_99세	2019년02월_계_100세 이상
0	서울특별시 (1100000000)	9,770,638	9,770,638	53,738	59,599	65,815	70,726	70,037	69,000	76,377	...	7,199	5,374	3,972	2,927	2,813	2,260	1,565	1,180	880	5,932
1	서울특별시 종로구 (1111000000)	152,880	152,880	593	683	775	874	852	863	1,023	...	163	132	112	79	69	81	53	40	39	236
2	서울특별시 종로구 청운효자동(1111051500)	13,010	13,010	66	79	72	98	98	94	108	...	9	8	14	6	7	9	4	2	2	16
3	서울특별시 종로구 사직동(1111053000)	9,584	9,584	43	50	64	67	67	70	94	...	14	5	12	6	5	8	2	5	4	17
4	서울특별시 종로구 삼청동(1111054000)	2,820	2,820	9	15	3	21	11	13	9	...	6	6	1	3	0	3	1	0	2	7

5 rows × 104 columns

In [22]:

plt.rc('font', family ='Malgun Gothic')
df = pd.read_csv('age.csv', encoding='cp949', index_col = 0, thousands = ',') 
df = df.div(df['2019년02월_계_총인구수'], axis = 0)     # ①-1. 전체 데이터를 총인구수로 나눠서 비율로 변환
del df['2019년02월_계_총인구수'], df['2019년02월_계_연령구간인구수']  # ①-2. 총인구수, 연령구간인구수 열 삭제
name = input('원하는 지역의 이름을 입력해주세요 : ')  # ② 지역 이름 입력
a = df.index.str.contains(name)  # ③ 해당 행을 찾아서 해당 지역의 인구 구조를 저장
df2 = df[a]
df.loc[np.power(df.sub(df2.iloc[0], axis = 1), 2).sum(axis = 1).sort_values().index[:5]].T.plot()
plt.show()

원하는 지역의 이름을 입력해주세요 : 산본2동