main.py 3.74 KB
Newer Older
Kiryuu Sakuya's avatar
Kiryuu Sakuya committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# -*- coding: utf-8 -*

# %matplotlib notebook

import tensorflow as tf
import matplotlib.pyplot as pyplot
import numpy
import pandas as panda
from sklearn.utils import shuffle

with tf.compat.v1.Session() as sess:

    # 读取配置文件
    read_data = panda.read_csv("03. predicting-boston-house-price/data/boston.csv", header = 0)

    # 显示数据摘要描述信息
    # print(read_data.describe())
    # 获取配置文件的值
    read_data = read_data.values
    # 二维数组,13 列 506 行
    # print(read_data)
    # 转换为 numpy 的数组格式
    read_data = numpy.array(read_data)

    # 对特征数据(0 到 11 列)做(0 - 1)的归一化
    for i in range(12):
        read_data[:, i] = read_data[:, i] / (read_data[:, i].max() - read_data[:, i].min())

    # x_data 为前 12 列特征数据
    # 前半部分是所有数据,右半部分是列(0 - 11)
    x_data = read_data[:, :12]
    # y_data 为最后 1 列标签数据
    y_data = read_data[:, 12]
    # 是一个二维数组,506 行,12 列
    # print(x_data, "\n shape = ", x_data.shape)
    # 是一个一维数组,有 506 个元素/单元
    # print(y_data, "\n shape = ", y_data.shape)

    # 定义特征数据和标签数据的占位符(placeholder)
    # 具有 12 个特征,shape 要和实际上的特征数据相吻合
    # 12 个特征数据
    # x = tf.placeholder(tf.float32, [None, 12], name = "X")
    # 即行不管,列有 12 列
    x = tf.keras.Input(name = "X", shape = (12), dtype = tf.dtypes.float32)
    # 1 个标签数据
    # y = tf.placeholder(tf.float32, [None, 1], name = "Y")
    # 有 1 列
    y = tf.keras.Input(name = "Y", shape = (1), dtype = tf.dtypes.float32)

    # 定义命名空间
    with tf.name_scope("Model"):
        # 12 行 1 列的列向量,w1、w2、w3...
        # w 初始化为 shape = (12, 1) 的随机数,标准差设置为 0.01
        w = tf.Variable(tf.random.normal([12, 1], stddev = 0.01, name = "w"))
        # b 初始化为 1.0
        b = tf.Variable(1.0, name = "b")
        # w 和 x 是矩阵相乘,用 matmul,不能使用 mutiply 或者 *
        # 矩阵叉乘
        # x 以后会是一个行向量!
        # b 是想要预测出来的标签值 y
        # y = x1 * w1 + ... + x12 * w12 + b
        def model(x, w, b):
            return tf.matmul(x, w) + b
        # 预测计算操作,前向计算节点
        predict = model(x, w, b)

    # 模型训练
    # 设置模型训练超参数
    # 迭代轮次
    train_epochs = 50
    # 学习率
    learning_rate = 0.0251
    # 定义均方差损失函数
    with tf.name_scope("LossFunction"):
        # 均方误差
        loss_function = tf.reduce_mean(tf.pow(y - predict, 2))
    # 创建优化器
    # optimizer = tf.train.GradientDescentOptimizer(learn_rate).minimize(loss_function)
    # Use something like this in v2
    # optimizer = tf.keras.optimizers.SGD(learning_rate).minimize(loss_function, var_list=[w, b])
    # TypeError: 'Tensor' object is not callable
    optimizer = tf.compat.v1.train.GradientDescentOptimizer(learning_rate).minimize(loss_function)
    
    # 模型训练
    for epoch in range(train_epochs):
        loss_sum = 0.0
        for xs, ys in zip(x_data, y_data):
            # Feed 数据必须和 Placeholder 的 shape 一致
            xs = xs.reshape(1, 12)
            ys = ys.reshape(1, 1)

            _, loss = sess.run([optimizer, loss_function], feed_dict = {x: xs, y: ys})

            loss_sum = loss_sum + loss
        
        # 打乱数据顺序
        x_data, y_data = shuffle(x_data, y_data)

        b0temp = b.eval(session = sess)
        w0temp = w.eval(session = sess)
        loss_average = loss_sum / len(y_data)

        print("epoch = ", epoch + 1, "loss = ", loss_average, "b = ", b0temp, " w = ", w0temp)