You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
871 lines
31 KiB
Python
871 lines
31 KiB
Python
2 years ago
|
import numpy as np
|
||
|
import pandas as pd
|
||
|
import matplotlib.pyplot as plt
|
||
|
import sklearn
|
||
|
from sklearn.linear_model import LogisticRegression
|
||
|
from sklearn.datasets import load_breast_cancer
|
||
|
from sklearn.datasets import make_circles
|
||
|
from sklearn.datasets import make_moons
|
||
|
from sklearn.datasets import make_classification
|
||
|
from sklearn.datasets import load_digits
|
||
|
from sklearn.datasets import fetch_openml
|
||
|
from sklearn.metrics import accuracy_score, precision_score, recall_score, precision_recall_fscore_support,f1_score
|
||
|
from sklearn.metrics import confusion_matrix, roc_curve, roc_auc_score
|
||
|
from sklearn.model_selection import train_test_split
|
||
|
from sklearn.model_selection import KFold
|
||
|
from sklearn.model_selection import GridSearchCV
|
||
|
from sklearn.tree import DecisionTreeClassifier
|
||
|
from sklearn import tree
|
||
|
from sklearn.ensemble import RandomForestClassifier
|
||
|
from sklearn.neural_network import MLPClassifier
|
||
|
from IPython.display import Image
|
||
|
|
||
|
|
||
|
##Funktion zur Berechnung von specificity
|
||
|
def specificity_score(y_true, y_pred):
|
||
|
p, r, f, s = precision_recall_fscore_support(y_true, y_pred)
|
||
|
return r[0]
|
||
|
|
||
|
##Funktion zum automatisierten Scoring eines Models
|
||
|
def score_model(X, y, kf):
|
||
|
accuracy_scores = []
|
||
|
precision_scores = []
|
||
|
recall_scores = []
|
||
|
f1_scores = []
|
||
|
for train_index, test_index in kf.split(X):
|
||
|
X_train, X_test = X[train_index], X[test_index]
|
||
|
y_train, y_test = y[train_index], y[test_index]
|
||
|
model = LogisticRegression()
|
||
|
model.fit(X_train, y_train)
|
||
|
y_pred = model.predict(X_test)
|
||
|
accuracy_scores.append(accuracy_score(y_test, y_pred))
|
||
|
precision_scores.append(precision_score(y_test, y_pred))
|
||
|
recall_scores.append(recall_score(y_test, y_pred))
|
||
|
f1_scores.append(f1_score(y_test, y_pred))
|
||
|
print("accuracy:", np.mean(accuracy_scores))
|
||
|
print("precision:", np.mean(precision_scores))
|
||
|
print("recall:", np.mean(recall_scores))
|
||
|
print("f1 score:", np.mean(f1_scores))
|
||
|
|
||
|
###
|
||
|
# Basics
|
||
|
###
|
||
|
#Aufgabe 1 numpy/durchschnitt mittelwert
|
||
|
#data = [15, 16, 18, 19, 22, 24, 29, 30, 34]
|
||
|
|
||
|
#print("mean:", np.mean(data))
|
||
|
#print("median:", np.median(data))
|
||
|
#print("50th percentile (median):", np.percentile(data, 50))
|
||
|
#print("25th percentile:", np.percentile(data, 25))
|
||
|
#print("75th percentile:", np.percentile(data, 75))
|
||
|
#print("standard deviation:", np.std(data))
|
||
|
#print("variance:", np.var(data))
|
||
|
|
||
|
#Aufgabe 2 pandas auslesen Übung
|
||
|
#pd.options.display.max_columns = 6
|
||
|
#df = pd.read_csv('https://sololearn.com/uploads/files/titanic.csv')
|
||
|
#print(df.describe())
|
||
|
|
||
|
#Aufgabe 3 pandas daten manipulation Übung (neue Spalte hinzufügen und benennen 'male')
|
||
|
#df = pd.read_csv('https://sololearn.com/uploads/files/titanic.csv')
|
||
|
#df['male'] = df['Sex'] == 'male'
|
||
|
#print(df.head())
|
||
|
|
||
|
#Aufgabe 4 Dataframe Shape übung (Wie ist Dataframe geformt [Anzahl Zeilen,Anzahl Spalten])
|
||
|
#df = pd.read_csv('https://sololearn.com/uploads/files/titanic.csv')
|
||
|
#arr = df[['Pclass', 'Fare', 'Age']].values
|
||
|
#print(arr.shape)
|
||
|
|
||
|
#Aufgabe 5 Summieren von Dataframe Inhalten
|
||
|
#df = pd.read_csv('https://sololearn.com/uploads/files/titanic.csv')
|
||
|
#arr = df[['Pclass', 'Fare', 'Age']].values
|
||
|
#mask = arr[:, 2] < 18
|
||
|
|
||
|
#print(mask.sum())
|
||
|
#print((arr[:, 2] < 18).sum())
|
||
|
|
||
|
#Aufgabe 6 Plotting Übung
|
||
|
#df = pd.read_csv('https://sololearn.com/uploads/files/titanic.csv')
|
||
|
#df['Gender'] = df['Sex'] == 'male'
|
||
|
#plt.scatter(df['Age'], df['Fare'], c=df['Pclass'])
|
||
|
|
||
|
#plt.xlabel('Age')
|
||
|
#plt.ylabel('Fare')
|
||
|
|
||
|
#cbar = plt.colorbar()
|
||
|
#plt.plot([0, 80], [85, 5])
|
||
|
#plt.show()
|
||
|
|
||
|
#2.Beispiel
|
||
|
#plt.style.use('fivethirtyeight')
|
||
|
|
||
|
#fig, ax=plt.subplots()
|
||
|
#x=df['Age']
|
||
|
#y1=df['Fare']
|
||
|
#color=df['Gender']
|
||
|
#size=df['Pclass']
|
||
|
#ax.scatter(x,y1,c=color,s=30*size,alpha=0.3)
|
||
|
#for size in [1,2,3]:
|
||
|
# plt.scatter([],[],c='r',s=30*size,label=str(size)+'class')
|
||
|
# plt.legend(scatterpoints=1,frameon=False,labelspacing=1,title='Titanic')
|
||
|
#ax.set_xlabel('Age')
|
||
|
#ax.set_ylabel('Fare')
|
||
|
#ax.set_xlim(0,90)
|
||
|
#ax.set_ylim(0,555)
|
||
|
#cbar = plt.colorbar()
|
||
|
#plt.show()
|
||
|
|
||
|
#################################################
|
||
|
###
|
||
|
# MachineLearning Algorithms mit Sklearn
|
||
|
###
|
||
|
|
||
|
#Aufgabe 1 Pandas Daten für Model aufbereiten
|
||
|
#Ergebnis: x=2D numpy Array(Matrix) aller Features, y=1D NumpyArray des Targets
|
||
|
#df = pd.read_csv('https://sololearn.com/uploads/files/titanic.csv')
|
||
|
#df['male'] = df['Sex'] == 'male'
|
||
|
#X = df[['Pclass', 'male', 'Age', 'Siblings/Spouses', 'Parents/Children', 'Fare']].values
|
||
|
#y = df['Survived'].values
|
||
|
#print(X)
|
||
|
#print(y)
|
||
|
|
||
|
#Aufgabe 2 mit SKLearn Daten "fitten"
|
||
|
#df = pd.read_csv('https://sololearn.com/uploads/files/titanic.csv')
|
||
|
#X = df[['Fare', 'Age']].values
|
||
|
#y = df['Survived'].values
|
||
|
|
||
|
#model = LogisticRegression()
|
||
|
#model.fit(X, y)
|
||
|
|
||
|
#print(model.coef_, model.intercept_)
|
||
|
# Output sollte sein:[[ 0.01615949 -0.01549065]] [-0.51037152]
|
||
|
|
||
|
#Aufgabe 3 mit SKLearn und Pandas Targetwerte vorhersagen
|
||
|
#df = pd.read_csv('https://sololearn.com/uploads/files/titanic.csv')
|
||
|
#df['male'] = df['Sex'] == 'male'
|
||
|
#X = df[['Pclass', 'male', 'Age', 'Siblings/Spouses', 'Parents/Children', 'Fare']].values
|
||
|
#y = df['Survived'].values
|
||
|
|
||
|
#model = LogisticRegression()
|
||
|
#model.fit(X, y)
|
||
|
|
||
|
#print(model.predict([[3, True, 22.0, 1, 0, 7.25]]))
|
||
|
#print(model.predict(X[:10]))
|
||
|
#print(y[:10])
|
||
|
|
||
|
#y_pred = model.predict(X)
|
||
|
#print((y == y_pred).sum()/y.shape[0])
|
||
|
#synonym da oft gebraucht:
|
||
|
#print(model.score(X, y))
|
||
|
# Output Genauigkeit der vorhersagen: 0.8049605411499436
|
||
|
|
||
|
#print((y == y_pred).sum())
|
||
|
#print((y == y_pred).sum() / y.shape[0])
|
||
|
#print(model.score(X, y))
|
||
|
|
||
|
#Aufgabe 4 Model mit vordefiniertem Brust_krebs Datenset
|
||
|
#cancer_data = load_breast_cancer()
|
||
|
#print(cancer_data.keys())
|
||
|
# DESCR (Description ist teil der Daten)
|
||
|
#print(cancer_data['DESCR'])
|
||
|
|
||
|
#print(cancer_data['target'])
|
||
|
#print(cancer_data['target'].shape)
|
||
|
#print(cancer_data['target_names'])
|
||
|
#df = pd.DataFrame(cancer_data['data'], columns=cancer_data['feature_names'])
|
||
|
#df['target'] = cancer_data['target']
|
||
|
#print(df.head())
|
||
|
|
||
|
##feature matrix/target array
|
||
|
#X = df[cancer_data.feature_names].values
|
||
|
#y = df['target'].values
|
||
|
|
||
|
##model aufbereiten
|
||
|
#model = LogisticRegression(solver='liblinear')
|
||
|
#model.fit(X, y)
|
||
|
|
||
|
#model.predict([X[0]])
|
||
|
#print("prediction for datapoint 0:", model.predict([X[0]]))
|
||
|
#print(model.score(X, y))
|
||
|
|
||
|
#Aufgabe 5 Bob der Baumeister
|
||
|
##Ziel: Input aus <AnzahlParams>,<matrixAusWerten>,<targetWerteArray>,<DatapointZumTesten>
|
||
|
## Output: 1 oder 0
|
||
|
|
||
|
##Einlesroutine
|
||
|
#n = int(input())
|
||
|
#X = []
|
||
|
#for i in range(n):
|
||
|
# X.append([float(x) for x in input().split()])
|
||
|
#y = [int(x) for x in input().split()]
|
||
|
#testing_datapoint = [float(x) for x in input().split()]
|
||
|
|
||
|
##Modelbau,fitting und ausgabe
|
||
|
#model = LogisticRegression()
|
||
|
#model.fit(X, y)
|
||
|
#result = model.predict([testing_datapoint])
|
||
|
#print(result[0])
|
||
|
|
||
|
#Aufgabe 6 Metriken mit SKLEARN berechnen
|
||
|
#df = pd.read_csv('https://sololearn.com/uploads/files/titanic.csv')
|
||
|
#df['male'] = df['Sex'] == 'male'
|
||
|
#X = df[['Pclass', 'male', 'Age', 'Siblings/Spouses', 'Parents/Children', 'Fare']].values
|
||
|
#y = df['Survived'].values
|
||
|
#model = LogisticRegression()
|
||
|
#model.fit(X, y)
|
||
|
#y_pred = model.predict(X)
|
||
|
|
||
|
##Confusion/Verwirrungs Matrix (Zeigt TN,FP,FN,TP an)
|
||
|
#print(confusion_matrix(y, y_pred))
|
||
|
|
||
|
##Accurcy/Genauigkeit -> Wie oft war die vorhersage richtig (TP+TN)/(TP+FP+FN+TN)
|
||
|
#print("accuracy:", accuracy_score(y, y_pred))
|
||
|
##Precicion/Präzision -> Verhältnismäßige Anzahl von Falschen Positiven (TP)/(TP+NP)
|
||
|
## *Note wenn Precition gegen 1 geht ist die Zahl der FalsePositives niedrig (Interessanter wenn FP gefährlicher/unerwünschter ist)
|
||
|
#print("precision:", precision_score(y, y_pred))
|
||
|
## Recall/Sensitivity/Sensibilität -> Verhältnismäßige Anzahl von FalseNegatives (TP)/(TP+FN)
|
||
|
## *Note wenn recall gegen 1 geht ist die Zahl der FalseNegatives niedrig (Interessant wenn FN gefährlicher/unerwünschter ist)
|
||
|
#print("recall:", recall_score(y, y_pred))
|
||
|
## F1 Score -> Durchschnitt aus precision und recall
|
||
|
## *Note wenn F1 score gegen 1 geht ist die anzahl an FPs und FNs niedrig -> Gute vorhersage im allgemeinen
|
||
|
#print("f1 score:", f1_score(y, y_pred))
|
||
|
|
||
|
#Aufgabe 7 Training Data & Test Data (Verhinderung von Overfitting)
|
||
|
#df = pd.read_csv('https://sololearn.com/uploads/files/titanic.csv')
|
||
|
#df['male'] = df['Sex'] == 'male'
|
||
|
#X = df[['Pclass', 'male', 'Age', 'Siblings/Spouses', 'Parents/Children', 'Fare']].values
|
||
|
#y = df['Survived'].values
|
||
|
|
||
|
#train_size-> Prozentanteil des Trainingsets; random_state= randomizer-seed
|
||
|
#X_train, X_test, y_train, y_test = train_test_split(X, y,train_size=0.75,random_state=80613)
|
||
|
|
||
|
#print("whole dataset:", X.shape, y.shape)
|
||
|
#print("training set:", X_train.shape, y_train.shape)
|
||
|
#print("test set:", X_test.shape, y_test.shape)
|
||
|
|
||
|
# building the model
|
||
|
#model = LogisticRegression()
|
||
|
#model.fit(X_train, y_train)
|
||
|
|
||
|
# evaluating the model
|
||
|
#y_pred = model.predict(X_test)
|
||
|
#print("accuracy:", accuracy_score(y_test, y_pred))
|
||
|
#print("precision:", precision_score(y_test, y_pred))
|
||
|
#print("recall:", recall_score(y_test, y_pred))
|
||
|
#print("f1 score:", f1_score(y_test, y_pred))
|
||
|
|
||
|
#sensitivity_score = recall_score
|
||
|
#print("sensitivity:", sensitivity_score(y_test, y_pred))
|
||
|
#print("specificity:", specificity_score(y_test, y_pred))
|
||
|
|
||
|
#Aufgabe 8 Receiver operating characteristic (ROC) Modulieren
|
||
|
#*Note ROC-Kurve ist ein Graph der alle möglichen Modelle
|
||
|
# und deren Performance anzeigt
|
||
|
#df = pd.read_csv('https://sololearn.com/uploads/files/titanic.csv')
|
||
|
#df['male'] = df['Sex'] == 'male'
|
||
|
#X = df[['Pclass', 'male', 'Age', 'Siblings/Spouses', 'Parents/Children', 'Fare']].values
|
||
|
#y = df['Survived'].values
|
||
|
|
||
|
#X_train, X_test, y_train, y_test = train_test_split(X, y,train_size=0.75,random_state=80613)
|
||
|
#model = LogisticRegression()
|
||
|
#model.fit(X_train, y_train)
|
||
|
#y_pred_proba = model.predict_proba(X_test)
|
||
|
#fpr, tpr, thresholds = roc_curve(y_test, y_pred_proba[:,1])
|
||
|
|
||
|
#plt.plot(fpr, tpr)
|
||
|
#plt.plot([0, 1], [0, 1], linestyle='--')
|
||
|
#plt.xlim([0.0, 1.0])
|
||
|
#plt.ylim([0.0, 1.0])
|
||
|
#plt.xlabel('1 - specificity')
|
||
|
#plt.ylabel('sensitivity')
|
||
|
#plt.show()
|
||
|
|
||
|
##Strategie bei der Auswahl:
|
||
|
##Generell gilt je weiter Links oben (hohe Sensitivity+ niedrieger 1-specifity)
|
||
|
##desto allgemein besser ist das Model
|
||
|
##Je weiter Links
|
||
|
##desto mehr von uns positiv vorhergesagte Resultate sind korrekt
|
||
|
##Je weiter oben
|
||
|
##desto mehr wirklich positive Resultate werden gefunden (FalsePositive Minimierung)
|
||
|
|
||
|
##Vergleich von 2 ModelVarianten gegen einander
|
||
|
#model1 = LogisticRegression()
|
||
|
#model1.fit(X_train, y_train)
|
||
|
#y_pred_proba1 = model1.predict_proba(X_test)
|
||
|
#print("model 1 AUC score:", roc_auc_score(y_test, y_pred_proba1[:, 1]))
|
||
|
|
||
|
#model2 = LogisticRegression()
|
||
|
#model2.fit(X_train[:, 0:2], y_train)
|
||
|
#y_pred_proba2 = model2.predict_proba(X_test[:, 0:2])
|
||
|
#print("model 2 AUC score:", roc_auc_score(y_test, y_pred_proba2[:, 1]))
|
||
|
|
||
|
## Vergleich von unterschiedlichen Train/Test Splits gegeneinander
|
||
|
#y_pred = model.predict(X_test)
|
||
|
#print(" accuracy: {0:.5f}".format(accuracy_score(y_test, y_pred)))
|
||
|
#print("precision: {0:.5f}".format(precision_score(y_test, y_pred)))
|
||
|
#print(" recall: {0:.5f}".format(recall_score(y_test, y_pred)))
|
||
|
#print(" f1 score: {0:.5f}".format(f1_score(y_test, y_pred)))
|
||
|
|
||
|
#Aufgabe 9 KFold Cross Validierte Modelle erstellen
|
||
|
#df = pd.read_csv('https://sololearn.com/uploads/files/titanic.csv')
|
||
|
#df['male'] = df['Sex'] == 'male'
|
||
|
#X = df[['Pclass', 'male', 'Age', 'Siblings/Spouses', 'Parents/Children', 'Fare']].values
|
||
|
#y = df['Survived'].values
|
||
|
|
||
|
#scores = []
|
||
|
#kf = KFold(n_splits=5, shuffle=True)
|
||
|
|
||
|
#for train_index, test_index in kf.split(X):
|
||
|
# X_train, X_test = X[train_index], X[test_index]
|
||
|
# y_train, y_test = y[train_index], y[test_index]
|
||
|
# model = LogisticRegression()
|
||
|
# model.fit(X_train, y_train)
|
||
|
# scores.append(model.score(X_test, y_test))
|
||
|
|
||
|
#Finaler Wert der precition aus allen folds(Durchschnitt)
|
||
|
#print(scores)
|
||
|
#print(np.mean(scores))
|
||
|
|
||
|
#Aufgabe 10 unterschiedliche Modelle Vergleichen
|
||
|
#df = pd.read_csv('https://sololearn.com/uploads/files/titanic.csv')
|
||
|
#df['male'] = df['Sex'] == 'male'
|
||
|
|
||
|
#Gleiches KFold CrossValidierungsobjekt
|
||
|
#da sonst unfaire Verhältnisse bei den Tests (Gleiche Chunkanzahl)
|
||
|
#kf = KFold(n_splits=5, shuffle=True)
|
||
|
|
||
|
#Verschiedene Modelle mit verschiedenen Featur-Ausprägungen
|
||
|
#X1 = df[['Pclass', 'male', 'Age', 'Siblings/Spouses', 'Parents/Children', 'Fare']].values
|
||
|
#X2 = df[['Pclass', 'male', 'Age']].values
|
||
|
#X3 = df[['Fare', 'Age']].values
|
||
|
|
||
|
#Ziel numpy array für alle gleich
|
||
|
#y = df['Survived'].values
|
||
|
|
||
|
#print("Logistic Regression with all features (Model1)")
|
||
|
#score_model(X1, y, kf)
|
||
|
#print()
|
||
|
#print("Logistic Regression with Pclass, Sex & Age features(Model2)")
|
||
|
#score_model(X2, y, kf)
|
||
|
#print()
|
||
|
#print("Logistic Regression with Fare & Age features(Model3)")
|
||
|
#score_model(X3, y, kf)
|
||
|
|
||
|
#Model1 und Model2 haben fast identische Werte
|
||
|
#-> Model 2 wäre bessere wahl da weniger features (damit schneller computation)
|
||
|
#model = LogisticRegression()
|
||
|
#model.fit(X1, y)
|
||
|
|
||
|
#print(model.predict([[3, False, 25, 0, 1, 2]]))
|
||
|
|
||
|
#Aufgabe 11 Berechnen von Accuratcy/precision/recall/f1 score
|
||
|
#tp, fp, fn, tn = [int(x) for x in input().split()]
|
||
|
#total = tp+fp+fn+tn
|
||
|
#print(tp)
|
||
|
#print(fp)
|
||
|
#print(fn)
|
||
|
#print(tn)
|
||
|
#print(total)
|
||
|
#accuracy
|
||
|
#accuracy = (tp+tn)/total
|
||
|
#print(round(accuracy,4))
|
||
|
#precision
|
||
|
#precision = (tp)/(tp+fp)
|
||
|
#print(round(precision,4))
|
||
|
#recal
|
||
|
#recall =(tp)/(tp+fn)
|
||
|
#print(round(recall,4))
|
||
|
#f1 score
|
||
|
#f1_score = (2*(precision)*recall)/(precision+recall)
|
||
|
#print(round(f1_score,4))
|
||
|
|
||
|
## ^^^^^^ bis hier hin war logistic regression a.k.a "parametrisches" machine learning
|
||
|
####
|
||
|
## vvvvvv ab hier entscheidungsbäume (nicht parametisch)
|
||
|
|
||
|
#Aufgabe 1 Purity (Reinheit) ermitteln
|
||
|
#Gini Impurity = 2 x <ProzentZustandA(z.b.survived)> x (1-<ProzentZustandA(und damit nicht survived)>)
|
||
|
##-> gini = 2*p*(1-p)
|
||
|
#Entropy = -[<ProzentZustandA>*log2<ProzentZustandA>+(1-<ProzentZustandA>)log2(1-<ProzentZustandA>)]
|
||
|
##-> entropy = -[plog2p+(1-p)log2(1-p)]
|
||
|
## Purity wird in weiteren Formeln mit H abgekürzt/dargestellt
|
||
|
|
||
|
##Hinweis: Auswahl ob gini oder entropy ist nicht direkt ersichtlich
|
||
|
## aber beide können berechnet und abgeglichen werden um beste Modell zu wählen
|
||
|
|
||
|
#Aufgabe 2 Information Gain aus Purity ermitteln
|
||
|
# Formel: Information Gain = H(QuellNode)-((|A|/|QuellNode|)*H(A))-((|B|/|QuellNode|)*H(B))
|
||
|
|
||
|
#Aufgabe 3 Decision Tree Model erstellen
|
||
|
#df = pd.read_csv('https://sololearn.com/uploads/files/titanic.csv')
|
||
|
#df['male'] = df['Sex'] == 'male'
|
||
|
#X = df[['Pclass', 'male', 'Age', 'Siblings/Spouses', 'Parents/Children', 'Fare']].values
|
||
|
#y = df['Survived'].values
|
||
|
|
||
|
#X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=80613)
|
||
|
|
||
|
#model = DecisionTreeClassifier()
|
||
|
#model.fit(X_train, y_train)
|
||
|
#print(model.predict([[3, True, 22, 1, 0, 7.25]]))
|
||
|
|
||
|
#Aufgabe 4 Metriken für Decision Tree ermitteln
|
||
|
#df = pd.read_csv('https://sololearn.com/uploads/files/titanic.csv')
|
||
|
#df['male'] = df['Sex'] == 'male'
|
||
|
#X = df[['Pclass', 'male', 'Age', 'Siblings/Spouses', 'Parents/Children', 'Fare']].values
|
||
|
#y = df['Survived'].values
|
||
|
|
||
|
#kf = KFold(n_splits=5, shuffle=True, random_state=80613)
|
||
|
#dt_accuracy_scores = []
|
||
|
#dt_precision_scores = []
|
||
|
#dt_recall_scores = []
|
||
|
#lr_accuracy_scores = []
|
||
|
#lr_precision_scores = []
|
||
|
#lr_recall_scores = []
|
||
|
#for train_index, test_index in kf.split(X):
|
||
|
# X_train, X_test = X[train_index], X[test_index]
|
||
|
# y_train, y_test = y[train_index], y[test_index]
|
||
|
# dt = DecisionTreeClassifier(criterion='entropy')
|
||
|
# dt.fit(X_train, y_train)
|
||
|
# dt_accuracy_scores.append(dt.score(X_test, y_test))
|
||
|
# dt_y_pred = dt.predict(X_test)
|
||
|
# dt_precision_scores.append(precision_score(y_test, dt_y_pred))
|
||
|
# dt_recall_scores.append(recall_score(y_test, dt_y_pred))
|
||
|
# lr = LogisticRegression()
|
||
|
# lr.fit(X_train, y_train)
|
||
|
# lr_accuracy_scores.append(lr.score(X_test, y_test))
|
||
|
# lr_y_pred = lr.predict(X_test)
|
||
|
# lr_precision_scores.append(precision_score(y_test, lr_y_pred))
|
||
|
# lr_recall_scores.append(recall_score(y_test, lr_y_pred))
|
||
|
#print("Decision Tree")
|
||
|
#print(" accuracy:", np.mean(dt_accuracy_scores))
|
||
|
#print(" precision:", np.mean(dt_precision_scores))
|
||
|
#print(" recall:", np.mean(dt_recall_scores))
|
||
|
#print("Logistic Regression")
|
||
|
#print(" accuracy:", np.mean(lr_accuracy_scores))
|
||
|
#print(" precision:", np.mean(lr_precision_scores))
|
||
|
#print(" recall:", np.mean(lr_recall_scores))
|
||
|
|
||
|
# Vergleich gini vs entropy
|
||
|
#for criterion in ['gini', 'entropy']:
|
||
|
# print("Decision Tree - {}".format(criterion))
|
||
|
# accuracy = []
|
||
|
# precision = []
|
||
|
# recall = []
|
||
|
# for train_index, test_index in kf.split(X):
|
||
|
# X_train, X_test = X[train_index], X[test_index]
|
||
|
# y_train, y_test = y[train_index], y[test_index]
|
||
|
# dt = DecisionTreeClassifier(criterion=criterion)
|
||
|
# dt.fit(X_train, y_train)
|
||
|
# y_pred = dt.predict(X_test)
|
||
|
# accuracy.append(accuracy_score(y_test, y_pred))
|
||
|
# precision.append(precision_score(y_test, y_pred))
|
||
|
# recall.append(recall_score(y_test, y_pred))
|
||
|
# print("accuracy:", np.mean(accuracy))
|
||
|
# print("precision:", np.mean(precision))
|
||
|
# print("recall:", np.mean(recall), '\n')
|
||
|
|
||
|
#Aufgabe 5 Entscheidungsbaum Plotten
|
||
|
#df = pd.read_csv('https://sololearn.com/uploads/files/titanic.csv')
|
||
|
#df['male'] = df['Sex'] == 'male'
|
||
|
|
||
|
#feature_names = ['Pclass', 'male']
|
||
|
#X = df[feature_names].values
|
||
|
#y = df['Survived'].values
|
||
|
|
||
|
#dt = DecisionTreeClassifier()
|
||
|
#dt.fit(X, y)
|
||
|
|
||
|
#fig = plt.figure(figsize=(10,5))
|
||
|
#tree.plot_tree(dt, feature_names=feature_names)
|
||
|
#plt.show()
|
||
|
|
||
|
#Aufgabe 6 Decision-Tree Pruning/Beschneidung
|
||
|
#Methode 1 Limitierung der Verzweigungstiefe
|
||
|
#Methode 2 Leave/Blatt-Knoten mit geringen Sample-Zahlen vermeiden
|
||
|
#Methode 3 Limitierung der Leave/Blatt-Knoten Anzahl
|
||
|
|
||
|
#BSP:
|
||
|
#df = pd.read_csv('https://sololearn.com/uploads/files/titanic.csv')
|
||
|
#df['male'] = df['Sex'] == 'male'
|
||
|
|
||
|
#feature_names = ['Pclass', 'male', 'Age', 'Siblings/Spouses', 'Parents/Children', 'Fare']
|
||
|
#X = df[feature_names].values
|
||
|
#y = df['Survived'].values
|
||
|
|
||
|
#dt = DecisionTreeClassifier(max_depth=3, min_samples_leaf=2, max_leaf_nodes=10)
|
||
|
#dt.fit(X, y)
|
||
|
|
||
|
#fig = plt.figure(figsize=(10,5))
|
||
|
#tree.plot_tree(dt, feature_names=feature_names)
|
||
|
#plt.show()
|
||
|
|
||
|
#Aufgabe 7 Finden der Besten Pruning Parameter via GridSearch
|
||
|
#df = pd.read_csv('https://sololearn.com/uploads/files/titanic.csv')
|
||
|
#df['male'] = df['Sex'] == 'male'
|
||
|
|
||
|
#feature_names = ['Pclass', 'male', 'Age', 'Siblings/Spouses', 'Parents/Children', 'Fare']
|
||
|
#X = df[feature_names].values
|
||
|
#y = df['Survived'].values
|
||
|
|
||
|
#param_grid = {
|
||
|
# 'max_depth': [5, 15, 25],
|
||
|
# 'min_samples_leaf': [1, 3],
|
||
|
# 'max_leaf_nodes': [10, 20, 35, 50]}
|
||
|
|
||
|
#dt = DecisionTreeClassifier()
|
||
|
#gs = GridSearchCV(dt, param_grid, scoring='f1', cv=5)
|
||
|
#dt.fit(X, y)
|
||
|
#gs.fit(X, y)
|
||
|
#print("best params:", gs.best_params_)
|
||
|
#print("best score:", gs.best_score_)
|
||
|
|
||
|
#dt_best = DecisionTreeClassifier(
|
||
|
# max_depth=gs.best_params_["max_depth"],
|
||
|
# min_samples_leaf=gs.best_params_["min_samples_leaf"],
|
||
|
# max_leaf_nodes=gs.best_params_["max_leaf_nodes"])
|
||
|
#dt_best.fit(X,y)
|
||
|
|
||
|
#fig = plt.figure(figsize=(20,10))
|
||
|
#tree.plot_tree(dt_best, feature_names=feature_names)
|
||
|
#plt.show()
|
||
|
|
||
|
#Pros/Cons
|
||
|
#decision tree ist
|
||
|
#-> (+) zeittechnisch teuer zu bauen aber predicttechnisch günstig vorher zu sagen
|
||
|
#-> (-vorsicht: ungünstige config verteilt Aussage kraft schlecht auf viele samples a.k.a overfitting weil einzelnes sample zu mächtig
|
||
|
#-> (+)einfach zu verstehen und zu erläutern
|
||
|
|
||
|
#Aufgabe 8 Information Gain ausrechnen
|
||
|
|
||
|
#S = [int(x) for x in input().split()]
|
||
|
#A = [int(x) for x in input().split()]
|
||
|
#B = [int(x) for x in input().split()]
|
||
|
|
||
|
#-> gini = 2*p*(1-p)
|
||
|
#Information Gain = H(QuellNode)-((|A|/|QuellNode|)*H(A))-((|B|/|QuellNode|)*H(B))
|
||
|
#p_source = S.count(1)/len(S)
|
||
|
#q_source = 1 - p_source
|
||
|
#gini_source = (2 * p_source * q_source)
|
||
|
#print(gini_source)
|
||
|
|
||
|
#p_left = A.count(1)/len(A)
|
||
|
#q_left = 1 - p_left
|
||
|
#gini_left = (2 * p_left * q_left)
|
||
|
#print(gini_left)
|
||
|
|
||
|
#p_right = B.count(1)/len(B)
|
||
|
#q_right = 1 - p_right
|
||
|
#gini_right = (2 * p_right * q_right)
|
||
|
#print(gini_right)
|
||
|
|
||
|
#gain = gini_source - ((len(A)/len(S))*gini_left) - ((len(B)/len(S))*gini_right)
|
||
|
#print(round(gain,5))
|
||
|
|
||
|
|
||
|
## ^^^^^^ bis hier hin war entscheidungsbäume a.k.a deciion trees
|
||
|
####
|
||
|
## vvvvvv ab hier Random Forests
|
||
|
|
||
|
#Aufgabe 1 Erstellen des Random Forest
|
||
|
#cancer_data = load_breast_cancer()
|
||
|
#df = pd.DataFrame(cancer_data['data'], columns=cancer_data['feature_names'])
|
||
|
#df['target'] = cancer_data['target']
|
||
|
|
||
|
#X = df[cancer_data.feature_names].values
|
||
|
#y = df['target'].values
|
||
|
#print('data dimensions', X.shape)
|
||
|
|
||
|
#X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=80613)
|
||
|
|
||
|
#rf = RandomForestClassifier()
|
||
|
#rf.fit(X_train, y_train)
|
||
|
|
||
|
#first_row = X_test[0]
|
||
|
#print("prediction:", rf.predict([first_row]))
|
||
|
#print("true value:", y_test[0])
|
||
|
#print("random forest accuracy:", rf.score(X_test, y_test))
|
||
|
|
||
|
#dt = DecisionTreeClassifier()
|
||
|
#dt.fit(X_train, y_train)
|
||
|
#print("decision tree accuracy:", dt.score(X_test, y_test))
|
||
|
|
||
|
#Aufgabe 2 Random Forest Tuning
|
||
|
#cancer_data = load_breast_cancer()
|
||
|
#df = pd.DataFrame(cancer_data['data'], columns=cancer_data['feature_names'])
|
||
|
#df['target'] = cancer_data['target']
|
||
|
|
||
|
#X = df[cancer_data.feature_names].values
|
||
|
#y = df['target'].values
|
||
|
#print('data dimensions', X.shape)
|
||
|
|
||
|
#X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=80613)
|
||
|
|
||
|
#param_grid = {
|
||
|
|
||
|
# 'n_estimators': [10, 25, 50, 75, 100],
|
||
|
|
||
|
#}
|
||
|
|
||
|
#Hinweis default bei RF ist normalerweise "auto" a.k.a SQRT(AnzahlFeatures) -> normalerweise gute Wahl
|
||
|
#rf = RandomForestClassifier(max_features=5,n_estimators=15)
|
||
|
#rf.fit(X_train, y_train)
|
||
|
#rf = RandomForestClassifier(random_state=80613)
|
||
|
##Hinweis2 scoring = 'f1' wird meist gewählt wenn Datenset nicht sehr balanciert ist da bei unbalancierten Daten gini/accuracy schlechte ergebnisse liefert
|
||
|
#gs = GridSearchCV(rf, param_grid, scoring='f1',cv=5)
|
||
|
|
||
|
#gs.fit(X,y)
|
||
|
#print ("best params:",gs.best_params_)
|
||
|
|
||
|
#first_row = X_test[0]
|
||
|
#print("prediction:", gs.predict([first_row]))
|
||
|
#print("true value:", y_test[0])
|
||
|
#print("random forest accuracy:", gs.score(X_test, y_test))
|
||
|
|
||
|
#Aufgabe 3 Plotten von Random Forest mit "Elbow-Graph"
|
||
|
#cancer_data = load_breast_cancer()
|
||
|
#df = pd.DataFrame(cancer_data['data'], columns=cancer_data['feature_names'])
|
||
|
#df['target'] = cancer_data['target']
|
||
|
|
||
|
#X = df[cancer_data.feature_names].values
|
||
|
#y = df['target'].values
|
||
|
#print('data dimensions', X.shape)
|
||
|
|
||
|
#X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=80613)
|
||
|
|
||
|
#n_estimators = list(range(1,101))
|
||
|
##nach 100er Graph sieht man bei 10 anfang von stagnation -> für verbesserte Performance nur bis hier hin generieren, da weitere Bäume zu wenig dazugewinn sind
|
||
|
#n_estimators = list(range(1,10))
|
||
|
#param_grid = {
|
||
|
|
||
|
# 'n_estimators': n_estimators,
|
||
|
|
||
|
#}
|
||
|
|
||
|
#rf = RandomForestClassifier(random_state=80613)
|
||
|
#gs = GridSearchCV(rf, param_grid, scoring='f1',cv=5)
|
||
|
#gs.fit(X,y)
|
||
|
#print ("best params:",gs.best_params_)
|
||
|
#scores = gs.cv_results_['mean_test_score']
|
||
|
|
||
|
#first_row = X_test[0]
|
||
|
##print("prediction:", gs.predict([first_row]))
|
||
|
##print("true value:", y_test[0])
|
||
|
##print("random forest accuracy:", gs.score(X_test, y_test))
|
||
|
#plt.plot(n_estimators, scores)
|
||
|
#plt.xlabel("n_estimators")
|
||
|
#plt.ylabel("accuracy")
|
||
|
##plt.xlim(0, 100)
|
||
|
#plt.xlim(0, 10)
|
||
|
#plt.ylim(0.9, 1)
|
||
|
#plt.show()
|
||
|
|
||
|
#Aufgabe 4 Feature Selection (Limitierung der genutzten Feature für Performance)
|
||
|
#cancer_data = load_breast_cancer()
|
||
|
#df = pd.DataFrame(cancer_data['data'], columns=cancer_data['feature_names'])
|
||
|
#df['target'] = cancer_data['target']
|
||
|
|
||
|
#X = df[cancer_data.feature_names].values
|
||
|
#y = df['target'].values
|
||
|
|
||
|
#X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=80613)
|
||
|
|
||
|
#rf = RandomForestClassifier(n_estimators=10, random_state=80613)
|
||
|
#rf.fit(X_train, y_train)
|
||
|
|
||
|
#ft_imp = pd.Series(rf.feature_importances_, index=cancer_data.feature_names).sort_values(ascending=False)
|
||
|
#print(ft_imp.head(10))
|
||
|
|
||
|
##warum ist feature Selection wichtig:
|
||
|
##- schnelleres Model Training
|
||
|
##- reduziert komplexität
|
||
|
##- bei richtiger Feature Wahl -> Verbesserung der Genauigkeit da unnötige Features (Noise) entfernt wird
|
||
|
#print(rf.score(X_test, y_test))
|
||
|
#worst_cols = [col for col in df.columns if 'worst' in col]
|
||
|
#print(worst_cols)
|
||
|
|
||
|
#X_worst = df[worst_cols]
|
||
|
#X_train, X_test, y_train, y_test = train_test_split(X_worst, y, random_state=80613)
|
||
|
#rf.fit(X_train, y_train)
|
||
|
#print(rf.score(X_test, y_test))
|
||
|
|
||
|
#Aufgabe 5 Random Forest Pros/Cons Beispiele
|
||
|
|
||
|
##feature matrix/target array
|
||
|
#X, y = make_circles(noise=0.2, factor=0.5, random_state=1)
|
||
|
|
||
|
#df bauen für plotting anzeige
|
||
|
#df = pd.DataFrame(X,columns=["x", "y"])
|
||
|
#df['target'] = y
|
||
|
#print(df.shape)
|
||
|
|
||
|
#print("x:",X)
|
||
|
#print("y:",y)
|
||
|
|
||
|
#kf = KFold(n_splits=5, shuffle=True, random_state=1)
|
||
|
#lr_scores = []
|
||
|
#rf_scores = []
|
||
|
#for train_index, test_index in kf.split(X):
|
||
|
# X_train, X_test = X[train_index], X[test_index]
|
||
|
# y_train, y_test = y[train_index], y[test_index]
|
||
|
# lr = LogisticRegression(solver='lbfgs')
|
||
|
# lr.fit(X_train, y_train)
|
||
|
# lr_scores.append(lr.score(X_test, y_test))
|
||
|
# rf = RandomForestClassifier(n_estimators=100)
|
||
|
# rf.fit(X_train, y_train)
|
||
|
# rf_scores.append(rf.score(X_test, y_test))
|
||
|
#print("LR accuracy:", np.mean(lr_scores))
|
||
|
#print("RF accuracy:", np.mean(rf_scores))
|
||
|
|
||
|
#plt.scatter(df["x"],df["y"],c=y)
|
||
|
#plt.xlabel('x')
|
||
|
#plt.ylabel('y')
|
||
|
#cbar = plt.colorbar()
|
||
|
|
||
|
#plt.show()
|
||
|
|
||
|
##Best Practice (Benchmarking):
|
||
|
# Bei neuem Classification Problem ist es üblich ein Linear Regression sowie Random Forest Model zu erstellen.
|
||
|
# Diese benötigen zu beginn wenig bis kein Tuning um relativ gute Ergebnisse zu liefern.
|
||
|
# Es ist so auch direkt ersichtlich welcher Modeltyp eine generel bessere Wahl ist.
|
||
|
# Diese Methode gibt erste Anzeichen für mögliche/offensichtliche Optimierungen
|
||
|
|
||
|
##Aufgabe 6 Übung Aus Input Daten Sätze Random Forest bauen
|
||
|
##Param1 Random state für traintestsplit&RF
|
||
|
#random_s = int(input())
|
||
|
##Param2 Anzahl Datepunkte
|
||
|
#n_datapoints = int(input())
|
||
|
#rows = []
|
||
|
##Param3 Daten für X Array
|
||
|
#for i in range(n_datapoints):
|
||
|
# rows.append([float(a) for a in input().split()])
|
||
|
|
||
|
#X = np.array(rows)
|
||
|
##Param4 Daten für Target Werte
|
||
|
#y = np.array([int(a) for a in input().split()])
|
||
|
|
||
|
#print("randoms: ",random_s)
|
||
|
#print("datapoints: ",n_datapoints)
|
||
|
#print("rows: ",rows)
|
||
|
#print("X: ",X)
|
||
|
#print("y: ",y)
|
||
|
|
||
|
#X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=random_s)
|
||
|
|
||
|
#rf = RandomForestClassifier(n_estimators=5,random_state=random_s)
|
||
|
#rf.fit(X_train, y_train)
|
||
|
#Output Vorhersage mit den Test Set
|
||
|
#print(rf.predict(X_test))
|
||
|
#print("true value:", y_test[0])
|
||
|
|
||
|
## ^^^^^^ bis hier hin war Random Forest
|
||
|
####
|
||
|
## vvvvvv ab hier Neural Networks
|
||
|
#Fun Fact: Künstliches Neural Network(ANN) ist biologischem Neural Network im Gehirn Nachempfunden
|
||
|
#-> Menschliches Gehirn hat 86 Milliarden Neuronen und ca. 100 Trillionen Synapsen sprich wenn Neurales Netzwerk mehr
|
||
|
# als das hat dann ist es vernetzter als ein menschliches Gehirn
|
||
|
|
||
|
#Basics: Neuronen
|
||
|
#3 Activation Funktionen (Funktionen, die Input eines Neurons zu Output wandeln)
|
||
|
#A) Sigmoid: 1/(1+e^(-x)) mit x = w1x1 + w2x2 + b-
|
||
|
# -> Liefert Output zwischen 0 und 1
|
||
|
#B) hyperbolic Tangens: tanh(x) = sinh(x)/cosh(x) = (e^(x) - e^(-x))/(e^(x) + e^(-x))
|
||
|
# -> Liefer Output zwischen -1 und 1
|
||
|
#C) Rectified Linear Unit: ReLU(x) = {0 wenn x <= 0, x wenn x>0}
|
||
|
# -> Liefert Output ab 0 bis x (negativ werte werden geschluckt)
|
||
|
|
||
|
#Neuronen werden so geformt, dass deren Output oft weiteren Neuronen als Input dient
|
||
|
# -> Multi-Layered Perceptron(MLP)
|
||
|
# -> Feed Forward (nur in eine Richtung weiter verteilt)
|
||
|
|
||
|
#Artificial Neural Network (ANN) trainieren
|
||
|
# Grundsätzlich immer: Optimieren einer Loss-Funktion
|
||
|
# -> Genutzt wird meist cross entropy [p wenn y = 1, 1-p wenn y = 0]
|
||
|
|
||
|
##Aufgabe 1 Generierung von Random Datensets (für test) + Plotten
|
||
|
#X, y = make_classification(n_features=2, n_redundant=0, n_informative=2, random_state=80613)
|
||
|
#print(X)
|
||
|
#print(y)
|
||
|
#plt.scatter(X[y==0][:, 0], X[y==0][:, 1], s=100, edgecolors='k')
|
||
|
#plt.scatter(X[y==1][:, 0], X[y==1][:, 1], s=100, edgecolors='k', marker='^')
|
||
|
#plt.show()
|
||
|
|
||
|
##Aufgabe 2 neural network bauen
|
||
|
#X, y = make_classification(n_features=2, n_redundant=0, n_informative=2, random_state=80613)
|
||
|
#X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=80613)
|
||
|
#mlp = MLPClassifier(max_iter=1000,hidden_layer_sizes=(100, 50),alpha=0.0001, solver='adam', random_state=80613)
|
||
|
#mlp.fit(X_train, y_train)
|
||
|
#print("accuracy:", mlp.score(X_test, y_test))
|
||
|
|
||
|
##Aufgabe 3 Reale Datensets nutzen (MNIST für Handgeschriebene Zahlen-Zeichen)
|
||
|
##Hinweis MNIST Dataset hat Numernzeichen in Grayscale in en Werten 0(schwarz)-16(hellstes Weiß) gespeichert
|
||
|
#X, y = load_digits(return_X_y=True)
|
||
|
#print(X.shape, y.shape)
|
||
|
#print(X[0].reshape(8,8))
|
||
|
#print(y[0])
|
||
|
|
||
|
##matshow zeichnet die Daten von x in einer 8x8 Matrix in der colormap grau an
|
||
|
##-> nur nützlich wenn Bilddaten und Bild-Auflösung bekannt ist
|
||
|
#plt.matshow(X[1].reshape(8,8),cmap=plt.cm.gray)
|
||
|
##xticks und yticks funktionen entferenn die zentrierten coordinaten lineale am Rand
|
||
|
#plt.xticks(())
|
||
|
#plt.yticks(())
|
||
|
#plt.show()
|
||
|
|
||
|
#X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=80613)
|
||
|
#mlp = MLPClassifier(random_state=80613)
|
||
|
#mlp.fit(X_train, y_train)
|
||
|
#x = X_test[1]
|
||
|
|
||
|
#print(mlp.predict([x]))
|
||
|
#print(mlp.score(X_test, y_test))
|
||
|
|
||
|
##prüfen, welche nicht korrekt waren
|
||
|
##1) ganzen testsplit vorhersagen lassen
|
||
|
##2) vorhersage array nach falsch aussagen filtern und speichern
|
||
|
##3) vorhergesagter wert& wahrer wert für anzeige wegspeichern
|
||
|
#y_pred = mlp.predict(X_test)
|
||
|
#incorrect = X_test[y_pred != y_test]
|
||
|
#incorrect_true = y_test[y_pred != y_test]
|
||
|
#incorrect_pred = y_pred[y_pred != y_test]
|
||
|
|
||
|
##ersten anzeigen der nicht korrekt war
|
||
|
#j = 0
|
||
|
#plt.matshow(incorrect[j].reshape(8,8),cmap=plt.cm.gray)
|
||
|
#plt.xticks(())
|
||
|
#plt.yticks(())
|
||
|
#plt.show()
|
||
|
#print("True value: ",incorrect_true[j])
|
||
|
#print("predicted value: ",incorrect_pred[j])
|
||
|
|
||
|
##Aufgabe 4 Visualisierung von MLP-Gewichtung
|
||
|
X, y = fetch_openml('mnist_784', version=1, return_X_y=True)
|
||
|
#print(X.shape, y.shape)
|
||
|
##anzeigen der maximum und minimum werte
|
||
|
#print(np.min(X), np.max(X))
|
||
|
#print(y[0:5])
|
||
|
|
||
|
## Wertebereich eingrenzen
|
||
|
X5 = X[y <= '3']
|
||
|
y5 = y[y <= '3']
|
||
|
|
||
|
mlp=MLPClassifier(
|
||
|
hidden_layer_sizes=(6,),
|
||
|
max_iter=200, alpha=1e-4,
|
||
|
solver='sgd', random_state=80613)
|
||
|
|
||
|
mlp.fit(X5, y5)
|
||
|
|
||
|
## Anzeige der Koeffizenten
|
||
|
## -> Anzahl der Layer (meist arrays bei mehreren Knoten welche deren gewichtungen je knoten zeigen[hier zum beispiel 6 knoten bzw deren gewichtungen])
|
||
|
print(mlp.coefs_)
|
||
|
print(mlp.coefs_[0].shape)
|
||
|
|
||
|
fig, axes = plt.subplots(2, 3, figsize=(5, 4))
|
||
|
|
||
|
for i, ax in enumerate(axes.ravel()):
|
||
|
coef = mlp.coefs_[0][:, i]
|
||
|
ax.matshow(coef.reshape(28, 28), cmap=plt.cm.gray)
|
||
|
ax.set_xticks(())
|
||
|
ax.set_yticks(())
|
||
|
ax.set_title(i + 1)
|
||
|
plt.show()
|