import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import r2_score, mean_squared_error
# Membaca dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00242/ENB2012_data.xlsx"
df = pd.read_excel(url)
# Mengganti nama kolom pada dataset
df = df.rename(columns={'X1': 'relative compactness',
'X2': 'surface area',
'X3': 'wall area',
'X4': 'roof area',
'X5': 'overall height',
'X6': 'orientation',
'X7': 'glazing area',
'X8': 'glazing area distribution',
'Y1': 'heating load',
'Y2': 'cooling load'})
# Mengambil variabel yang akan digunakan dalam analisis
X = df[['roof area', 'overall height']]
y1 = df['heating load']
y2 = df['cooling load']
# Membagi dataset menjadi data latih dan data uji
from sklearn.model_selection import train_test_split
X_train, X_test, y1_train, y1_test, y2_train, y2_test = train_test_split(X, y1, y2, test_size=0.2, random_state=0)
# Melakukan regresi polinomial pada data latih
poly = PolynomialFeatures(degree=2)
X_poly_train = poly.fit_transform(X_train)
X_poly_test = poly.transform(X_test)
regressor1 = LinearRegression()
regressor1.fit(X_poly_train, y1_train)
regressor2 = LinearRegression()
regressor2.fit(X_poly_train, y2_train)
# Memprediksi nilai y berdasarkan data uji
y1_pred = regressor1.predict(X_poly_test)
y2_pred = regressor2.predict(X_poly_test)
# Menampilkan persamaan regresi
print("Persamaan Regresi Heating Load: Y = {} + {}*X1 + {}*X2 + {}*X1^2 + {}*X2^2 + {}*X1*X2".format(
regressor1.intercept_, regressor1.coef_[0], regressor1.coef_[1], regressor1.coef_[2], regressor1.coef_[3], regressor1.coef_[4]))
print("Persamaan Regresi Cooling Load: Y = {} + {}*X1 + {}*X2 + {}*X1^2 + {}*X2^2 + {}*X1*X2".format(
regressor2.intercept_, regressor2.coef_[0], regressor2.coef_[1], regressor2.coef_[2], regressor2.coef_[3], regressor2.coef_[4]))
# Menampilkan plot kurva regresi
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10,5))
ax1.scatter(X_test['roof area'], y1_test, color='red')
ax1.plot(X_test['roof area'], y1_pred, color='blue', linewidth=2)
ax1.set_xlabel('roof area')
ax1.set_ylabel('Heating Load')
ax2.scatter(X_test['roof area'], y2_test, color='blue')
ax2.plot(X_test['roof area'], y2_pred, color='red', linewidth=2)
ax2.set_xlabel('roof area')
ax2.set_ylabel('Cooling Load')
# Menampilkan plot kurva regresi
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10,5))
ax1.scatter(X_test['overall height'], y1_test, color='red')
ax1.plot(X_test['overall height'], y1_pred, color='blue', linewidth=2)
ax1.set_xlabel('overall height')
ax1.set_ylabel('Heating Load')
ax2.scatter(X_test['overall height'], y2_test, color='blue')
ax2.plot(X_test['overall height'], y2_pred, color='red', linewidth=2)
ax2.set_xlabel('overall height')
ax2.set_ylabel('Cooling Load')
# Menampilkan kolom aktual dan hasil prediksi Y1
result1 = pd.DataFrame({'Actual Y1': y1_test, 'Predicted Y1': y1_pred})
print(result1)
# Menampilkan kolom aktual dan hasil prediksi Y2
result2 = pd.DataFrame({'Actual Y2': y2_test, 'Predicted Y2': y2_pred})
print(result2)
# Menyimpan kolom aktual dan hasil prediksi Y1 dan Y2 ke file excel
with pd.ExcelWriter('predicted_results.xlsx') as writer:
result1.to_excel(writer, sheet_name='Y1')
result2.to_excel(writer, sheet_name='Y2')
# Menghitung MAE
from sklearn.metrics import mean_absolute_error
mae1 = mean_absolute_error(y1_test, y1_pred)
mae2 = mean_absolute_error(y2_test, y2_pred)
# Menampilkan nilai R-squared, MSE, dan MAE
print("Heating Load:")
print("R-squared:", r2_score(y1_test, y1_pred))
print("MSE:", mean_squared_error(y1_test, y1_pred))
print("MAE:", mae1)
print("Cooling Load:")
print("R-squared:", r2_score(y2_test, y2_pred))
print("MSE:", mean_squared_error(y2_test, y2_pred))
print("MAE:", mae2)
# Menyimpan nilai R-squared, MSE, dan MAE ke file excel
data = {'R-squared': [r2_score(y1_test, y1_pred), r2_score(y2_test, y2_pred)],
'MSE': [mean_squared_error(y1_test, y1_pred), mean_squared_error(y2_test, y2_pred)],
'MAE': [mae1, mae2]}
df = pd.DataFrame(data, index=['Heating Load', 'Cooling Load'])
df.to_excel(writer, sheet_name='Metrics')