-
Notifications
You must be signed in to change notification settings - Fork 45
Expand file tree
/
Copy pathlearning_curve.py
More file actions
54 lines (42 loc) · 1.7 KB
/
learning_curve.py
File metadata and controls
54 lines (42 loc) · 1.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
"""Explore learning curves for classification of handwritten digits"""
import matplotlib.pyplot as plt
import numpy
from sklearn.datasets import *
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
def display_digits():
"""Read in the 8x8 pictures of numbers and display 10 of them"""
digits = load_digits()
print(digits.DESCR)
fig = plt.figure()
for i in range(10):
subplot = fig.add_subplot(5, 2, i+1)
subplot.matshow(numpy.reshape(digits.data[i], (8, 8)), cmap='gray')
plt.show()
def train_model():
"""Train a model on pictures of digits.
Read in 8x8 pictures of numbers and evaluate the accuracy of the model
when different percentages of the data are used as training data. This function
plots the average accuracy of the model as a function of the percent of data
used to train it.
"""
data = load_digits()
num_trials = 10
train_percentages = range(5, 95, 5)
test_accuracies = numpy.zeros(len(train_percentages))
# train models with training percentages between 5 and 90 (see
# train_percentages) and evaluate the resultant accuracy for each.
# You should repeat each training percentage num_trials times to smooth out
# variability.
# For consistency with the previous example use
# model = LogisticRegression(C=10**-10) for your learner
# TODO: your code here
fig = plt.figure()
plt.plot(train_percentages, test_accuracies)
plt.xlabel('Percentage of Data Used for Training')
plt.ylabel('Accuracy on Test Set')
plt.show()
if __name__ == "__main__":
# Feel free to comment/uncomment as needed
display_digits()
# train_model()