여러 날의 데이터를 합쳐서 보는 코드입니다.

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

month = pd.DataFrame()

for i in range(10):
    i = str(i)
    path = 'data/GREEND/dataset_2014-04-1' + i + '.csv'
    day = pd.read_csv(path, sep=',')
    month = month.append(day)

month.head()

	timestamp	000D6F00036BB04C	000D6F000353AC8C	000D6F0003562E10	000D6F0003562C48	000D6F00029C2984	000D6F0003562C0F	000D6F0003562BF6
0	1.397080e+09	2.274737	0.000000	NaN	NaN	0.0	0.0	NaN
1	1.397080e+09	0.000000	0.000000	NaN	NaN	0.0	0.0	NaN
2	1.397080e+09	2.274737	0.000000	NaN	NaN	NaN	NaN	NaN
3	1.397080e+09	0.000000	2.171969	NaN	NaN	NaN	NaN	NaN
4	1.397080e+09	2.274737	0.000000	NaN	NaN	NaN	NaN	NaN

month.describe()

	timestamp	000D6F00036BB04C	000D6F00029C2BD7	000D6F000353AC8C	000D6F0003562E10	000D6F0003562C48	000D6F00029C2984	000D6F000353AE51	000D6F0003562C0F	000D6F0003562BF6
count	7.939110e+05	793015.000000	758923.000000	793623.000000	534961.000000	628410.000000	512901.000000	584154.000000	643603.000000	508756.000000
mean	1.397516e+09	20.885077	14.205696	0.822452	0.064507	9.766527	1.294510	5.995056	0.054579	1.440926
std	2.478460e+05	39.967829	142.580053	16.644136	7.348868	125.418035	4.604258	96.307246	2.725365	9.280212
min	1.397080e+09	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
25%	1.397305e+09	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
50%	1.397518e+09	2.274737	2.352666	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
75%	1.397730e+09	2.274737	2.352666	2.171969	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
max	1.397944e+09	1187.154249	1835.589011	1312.731137	844.095295	2174.858523	42.260206	1649.333400	255.266878	63.474440

month.describe()['timestamp'][0] / (3600 * 24)

9.188784722222222

building = month
apps = ['Coffee machine', 
        'washing machine', 
        'radio', 
        'water kettle', 
        'fridge', 
        'dishwasher', 
        'kitchen lamp', 
        'TV', 
        'vacuum cleaner']
building.columns = ['time'] + apps

building.head()

	time	Coffee machine	radio	water kettle	fridge	dishwasher	TV	vacuum cleaner
0	1.397080e+09	2.274737	0.000000	NaN	NaN	0.0	0.0	NaN
1	1.397080e+09	0.000000	0.000000	NaN	NaN	0.0	0.0	NaN
2	1.397080e+09	2.274737	0.000000	NaN	NaN	NaN	NaN	NaN
3	1.397080e+09	0.000000	2.171969	NaN	NaN	NaN	NaN	NaN
4	1.397080e+09	2.274737	0.000000	NaN	NaN	NaN	NaN	NaN

building['datetime'] = building['time'].astype("datetime64[s]")

building.head()

	time	Coffee machine	washing machine	radio	water kettle	fridge	dishwasher	kitchen lamp	TV	vacuum cleaner	Power	datetime
datetime
2014-04-09 21:00:00	1.397081e+09	1.170731	0.000000	0.550232	0.0	0.0	0.0	0.00000	0.00000	0.0	1.720963	2014-04-09 21:56:28
2014-04-09 22:00:00	1.397083e+09	17.312213	0.013444	0.566091	0.0	0.0	0.0	0.00000	0.00000	0.0	17.891748	2014-04-09 22:29:59
2014-04-09 23:00:00	1.397086e+09	25.322587	0.003758	0.561381	0.0	0.0	0.0	0.00000	0.00000	0.0	25.887726	2014-04-09 23:29:56
2014-04-10 00:00:00	1.397090e+09	14.767736	0.000000	0.562385	0.0	0.0	0.0	0.00074	0.00206	0.0	15.332921	2014-04-10 00:29:56
2014-04-10 01:00:00	1.397093e+09	28.574747	0.000000	0.565088	0.0	0.0	0.0	0.00000	0.00000	0.0	29.139835	2014-04-10 01:30:02

building = building.fillna(0)
building.shape

(241, 12)

building['Power'] = building[apps].sum(axis=1)
building.head()

	time	Coffee machine	washing machine	radio	water kettle	fridge	dishwasher	kitchen lamp	TV	vacuum cleaner	Power	datetime
datetime
2014-04-09 21:00:00	1.397081e+09	1.170731	0.000000	0.550232	0.0	0.0	0.0	0.00000	0.00000	0.0	1.720963	2014-04-09 21:56:28
2014-04-09 22:00:00	1.397083e+09	17.312213	0.013444	0.566091	0.0	0.0	0.0	0.00000	0.00000	0.0	17.891748	2014-04-09 22:29:59
2014-04-09 23:00:00	1.397086e+09	25.322587	0.003758	0.561381	0.0	0.0	0.0	0.00000	0.00000	0.0	25.887726	2014-04-09 23:29:56
2014-04-10 00:00:00	1.397090e+09	14.767736	0.000000	0.562385	0.0	0.0	0.0	0.00074	0.00206	0.0	15.332921	2014-04-10 00:29:56
2014-04-10 01:00:00	1.397093e+09	28.574747	0.000000	0.565088	0.0	0.0	0.0	0.00000	0.00000	0.0	29.139835	2014-04-10 01:30:02

building.index = building['datetime']
building = building.groupby(pd.Grouper(freq='H')).mean()

building.head()

	time	Coffee machine	washing machine	radio	water kettle	fridge	dishwasher	kitchen lamp	TV	vacuum cleaner	Power
datetime
2014-04-09 21:00:00	1.397081e+09	1.170731	0.000000	0.550232	0.0	0.0	0.0	0.00000	0.00000	0.0	1.720963
2014-04-09 22:00:00	1.397083e+09	17.312213	0.013444	0.566091	0.0	0.0	0.0	0.00000	0.00000	0.0	17.891748
2014-04-09 23:00:00	1.397086e+09	25.322587	0.003758	0.561381	0.0	0.0	0.0	0.00000	0.00000	0.0	25.887726
2014-04-10 00:00:00	1.397090e+09	14.767736	0.000000	0.562385	0.0	0.0	0.0	0.00074	0.00206	0.0	15.332921
2014-04-10 01:00:00	1.397093e+09	28.574747	0.000000	0.565088	0.0	0.0	0.0	0.00000	0.00000	0.0	29.139835

# building 데이터에 연, 월, 일, 시, 분, 초를 나타내는 새로운 컬럼을 생성합니다.
# 각각의 이름을 datetime-month/day/hour/minute/second라고 지정합니다.
# 이 컬럼에 날짜(datetime) 컬럼의 dt(datetime의 약자입니다) 옵션을 활용하여 월일시분을 따로 넣어줍니다.
building["datetime-month"] = building.index.month
building["datetime-day"] = building.index.day
building["datetime-hour"] = building.index.hour

# dayofweek는 날짜에서 요일(월~일)을 가져오는 기능입니다.
# 값은 0(월), 1(화), 2(수), 3(목), 4(금), 5(토), 6(일) 을 나타냅니다.
building["datetime-dayofweek"] = building.index.dayofweek

# building 변수에 할당된 데이터의 행렬 사이즈를 출력합니다.
# 출력은 (row, column) 으로 표시됩니다.
print(building.shape)

# .head()로 building 데이터의 상위 5개를 띄우되,
# datetime과 이와 연관된 나머지 다섯 개의 컬럼만을 출력합니다.
building[["datetime-month", "datetime-day", 
       "datetime-hour", "datetime-dayofweek"]].head()

(241, 15)

	datetime-month	datetime-day	datetime-hour	datetime-dayofweek
datetime
2014-04-09 21:00:00	4	9	21	2
2014-04-09 22:00:00	4	9	22	2
2014-04-09 23:00:00	4	9	23	2
2014-04-10 00:00:00	4	10	0	3
2014-04-10 01:00:00	4	10	1	3

pivoted = building.pivot_table(index = 'datetime-dayofweek', columns = 'datetime-hour', values = 'Power')
sns.heatmap(pivoted)
plt.gcf().set_size_inches(20, 5)
plt.show()

# 세탁기, 냉장고의 전력 소비량이 총 전력 소비량과 가장 연관이 높습니다.
# 전기 물 끓이기(water kettle) 은 연관성이 매우 낮음을 알 수 있습니다.
corr = building[apps].corrwith(building['Power'])
sorted(zip(map(lambda x : str(x)[:6], corr.values), corr.index), reverse=True)

[('0.7186', 'washing machine'),
 ('0.5881', 'fridge'),
 ('0.2820', 'kitchen lamp'),
 ('0.1347', 'TV'),
 ('0.1174', 'dishwasher'),
 ('0.0909', 'Coffee machine'),
 ('0.0454', 'radio'),
 ('0.0216', 'vacuum cleaner'),
 ('-0.011', 'water kettle')]

# matplotlib의 subplots를 사용합니다. 이 함수는 여러 개의 시각화를 한 화면에 띄울 수 있도록 합니다.
# 이번에는 3x1으로 총 3개의 시각화를 한 화면에 띄웁니다.
figure, ((ax1, ax2), (ax3, ax4)) = plt.subplots(nrows=2, ncols=2)

# 시각화의 전체 사이즈는 18x8로 설정합니다.
figure.set_size_inches(18, 8)

pwer = building.pivot_table(index = 'datetime-dayofweek', columns = 'datetime-hour', values = 'Power')
wash = building.pivot_table(index = 'datetime-dayofweek', columns = 'datetime-hour', values = 'washing machine')
frid = building.pivot_table(index = 'datetime-dayofweek', columns = 'datetime-hour', values = 'fridge')
lamp = building.pivot_table(index = 'datetime-dayofweek', columns = 'datetime-hour', values = 'water kettle')

# seaborn의 heatmap으로 subplots의 각 구역에
# 월, 일, 시, 분 별 전력 소비량을 출력합니다.
sns.heatmap(pwer, ax=ax1)
sns.heatmap(wash, ax=ax2)
sns.heatmap(frid, ax=ax3)
sns.heatmap(lamp, ax=ax4)

<matplotlib.axes._subplots.AxesSubplot at 0x1cb0c6d80b8>

'[중급] 가볍게 이것저것' 카테고리의 다른 글

알고리즘 문제풀이_1 (0)	2019.10.15
[자연어 처리]와인 추천 시스템 간단하게 구현하기 (0)	2019.09.24
[스마트 빌딩] 가정집 전력 소비량 알아보기_1 (0)	2019.08.21
[스마트 빌딩] 건물의 전력 소비량 알아보기_2 (0)	2019.08.21
[스마트 빌딩] 건물의 전력 소비량 알아보기_1 (0)	2019.08.21

PassionPython

[스마트 빌딩] 가정집 전력 소비량 알아보기_2

여러 날의 데이터를 합쳐서 보는 코드입니다.

'[중급] 가볍게 이것저것' 카테고리의 다른 글

티스토리툴바

[스마트 빌딩] 가정집 전력 소비량 알아보기_2

여러 날의 데이터를 합쳐서 보는 코드입니다.

'[중급] 가볍게 이것저것' 카테고리의 다른 글

'[중급] 가볍게 이것저것' Related Articles

티스토리툴바