汽车状态分类器

  • 这里使用的是UCI的汽车评估数据集
  • 数据集的标签
buying maint doors persons lug_boot safety class
vhigh vhigh 2 2 small low unacc
high high 3 4 med med acc
med med 4 more big high good
low low 5more vgood
  • 代码如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# -*- coding:utf-8 -*-


import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas as pd
from urllib.request import urlretrieve


# 获取数据
class Preprocessing:
# 数据预处理
def load_data(download=True):
# 下载数据集
if download:
data_path, _ = urlretrieve("http://archive.ics.uci.edu/ml/machine-learning-databases/car/car.data", "car.csv")
print("Downloaded to car.csv")

# 使用pandas查看数据结构
col_names = ["buying", "maint", "doors", "persons", "lug_boot", "safety", "class"]
data = pd.read_csv("car.csv", names=col_names)
return data

# 数据类型转化
def covert_2_onehot(data):
data = pd.get_dummies(data, prefix=data.columns) # 将类型转化为0、1数组
return data


data = Preprocessing.load_data()
new_data = Preprocessing.covert_2_onehot(data)


# 准备训练数据
# 训练集测试集7:3
new_data = new_data.values.astype(np.float32) # 转换数据类型
np.random.shuffle(new_data) # 打乱数据集
sep = int(0.7*len(new_data))
train_data = new_data[:sep]
test_data = new_data[sep:]


# 建立网络模型
tf_input = tf.placeholder(tf.float32, [None, 25], "input") # 函数参数
tfx = tf_input[:, :21]
tfy = tf_input[:, 21:]

l1 = tf.layers.dense(tfx, 128, tf.nn.relu, name="l1") # 第一层、使用relu激活函数
l2 = tf.layers.dense(l1, 128, tf.nn.relu, name="l2") # 第二层、使用relu激活函数
out = tf.layers.dense(l2, 4, name="l3") # 第三层、没有使用激活函数
prediction = tf.nn.softmax(out, name="prediction") # softmax函数

loss = tf.losses.softmax_cross_entropy(onehot_labels=tfy, logits=out) # 损失函数
accuracy = tf.metrics.accuracy(labels=tf.argmax(tfy, axis=1), predictions=tf.argmax(out, axis=1),)[1] # 计算预准率
opt = tf.train.GradientDescentOptimizer(learning_rate=0.1) # 学习率为固定值
train_op = opt.minimize(loss) # 最小化的目标变量

sess = tf.Session() # 会话
sess.run(tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())) # 初始化参数


# 训练网络
accuracies, steps = [], []
for t in range(10000):
# 训练
batch_index = np.random.randint(len(train_data), size=32)
sess.run(train_op, {tf_input: train_data[batch_index]})

if t % 50 == 0: # 每训练50次测试一次
# 测试
acc_, pred_, loss_ = sess.run([accuracy, prediction, loss], {tf_input: test_data})
accuracies.append(acc_)
steps.append(t)
print("Step: ", t, "| Accurate: ", acc_, "| Loss: ", loss_)

# 可视化
plt.ion()
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(8, 4))
ax1.cla()
for c in range(4):
bp = ax1.bar(c+0.1, height=sum((np.argmax(pred_, axis=1) == c)), width=0.2, color='red')
bt = ax1.bar(c-0.1, height=sum((np.argmax(test_data[:, 21:], axis=1) == c)), width=0.2, color='blue')
ax1.set_xticks(range(4), ["accepted", "good", "unaccepted", "very good"])
ax1.legend(handles=[bp, bt], labels=["prediction", "target"])
ax1.set_ylim((0, 400))
ax2.cla()
ax2.plot(steps, accuracies, label="accuracy")
ax2.set_ylim(ymax=1)
ax2.set_ylabel("accuracy")
plt.pause(0.01)
plt.ioff()
plt.show()
plt.close()
  • 结果:

结果

Donate comment here