заданием было сделать через одиночное дерево
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeRegressor
from sklearn import tree
from sklearn.model_selection import train_test_split
ds1 = 'https://raw.githubusercontent.com/aniruddhachoudhury/Red-Wine-Quality/master/winequality-red.csv'
ds = pd.read_csv(ds1)
ds.head()
ds.shape
ds.describe()
plt.scatter(ds['fixed acidity'], ds['citric acid'], color='g', label='idk what')
plt.show
mtr = ds.corr()
print(mtr)
from sklearn import metrics
regressor = DecisionTreeRegressor(max_depth=4, random_state=42)
ds = ds.drop_duplicates()
#X = ds.iloc[:,:-1].values
#X = ds[['alcohol', 'volatile acidity', 'sulphates', 'citric acid', 'total sulfur dioxide', 'density']].values 41%
#X = ds[['alcohol', 'volatile acidity', 'sulphates', 'citric acid', 'density']].values 44%
X = ds[['alcohol', 'volatile acidity', 'sulphates', 'density']].values
y = ds.iloc[:,-1].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)
regressor.fit(X_train, y_train)
tree.plot_tree(regressor)
y_pred = regressor.predict(X_test)
df = pd.DataFrame({'actual':y_test, 'pred':y_pred})
df
mse = metrics.mean_squared_error(y_test,y_pred)
mae = metrics.mean_absolute_error(y_test,y_pred)
print('mae',mae)
print('rmse', mse**0.5)
r2 = metrics.r2_score(y_test, y_pred)
print(f"точность R2: {r2 * 100:.2f}%")
заданием было сделать через одиночное дерево
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeRegressor
from sklearn import tree
from sklearn.model_selection import train_test_split
ds1 = 'https://raw.githubusercontent.com/aniruddhachoudhury/Red-Wine-Quality/master/winequality-red.csv'
ds = pd.read_csv(ds1)
ds.head()
ds.shape
ds.describe()
plt.scatter(ds['fixed acidity'], ds['citric acid'], color='g', label='idk what')
plt.show
mtr = ds.corr()
print(mtr)
from sklearn import metrics
regressor = DecisionTreeRegressor(max_depth=4, random_state=42)
ds = ds.drop_duplicates()
#X = ds.iloc[:,:-1].values
#X = ds[['alcohol', 'volatile acidity', 'sulphates', 'citric acid', 'total sulfur dioxide', 'density']].values 41%
#X = ds[['alcohol', 'volatile acidity', 'sulphates', 'citric acid', 'density']].values 44%
X = ds[['alcohol', 'volatile acidity', 'sulphates', 'density']].values
y = ds.iloc[:,-1].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)
regressor.fit(X_train, y_train)
tree.plot_tree(regressor)
y_pred = regressor.predict(X_test)
df = pd.DataFrame({'actual':y_test, 'pred':y_pred})
df
mse = metrics.mean_squared_error(y_test,y_pred)
mae = metrics.mean_absolute_error(y_test,y_pred)
print('mae',mae)
print('rmse', mse**0.5)
r2 = metrics.r2_score(y_test, y_pred)
print(f"точность R2: {r2 * 100:.2f}%")