PyTorch 实现线性回归

我们通过手动实现线性回归的假设函数、平方损失、SGD优化方法、以及训练函数来实现对 sklearn make_regression 函数产生的数据集进行拟合,最后通过拟合直线、训练损失变化进行可视化。

import torch
from sklearn.datasets import make_regression
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
import random


# 模型参数
w = torch.tensor(0.1, requires_grad=True, dtype=torch.float64)
b = torch.tensor(0.0, requires_grad=True, dtype=torch.float64)


# 1. 数据集函数
def create_dataset():

    x, y, coef = make_regression(n_samples=100,
                                 n_features=1,
                                 noise=10,
                                 coef=True,
                                 bias=14.5,
                                 random_state=0)

    # 转换为张量
    x = torch.tensor(x)
    y = torch.tensor(y)

    return x, y, coef


# 2. 构建数据加载器
def data_loader(x, y, batch_size):

    data_len = len(y)
    data_index = list(range(data_len))
    random.shuffle(data_index)
    batch_number = data_len // batch_size

    for idx in range(batch_number):

        start = idx * batch_size
        end = start + batch_size

        batch_train_x = x[start: end]
        batch_train_y = y[start: end]

        yield batch_train_x, batch_train_y


# 3. 假设函数
def linear_regression(x):
    return w * x + b


# 4. 损失函数
def square_loss(y_pred, y_true):
    return (y_pred - y_true) ** 2


# 5. 优化方法
def sgd(lr=0.01):
    # 使用批量样本的平均梯度
    w.data = w.data - lr * w.grad.data / 16
    b.data = b.data - lr * b.grad.data / 16


# 6. 训练函数
def train():

    # 加载数据集
    x, y, coef = create_dataset()
    # 定义训练参数
    epochs = 100
    learning_rate = 0.01
    # 存储损失
    epoch_loss = []
    total_loss = 0.0
    train_sample = 0

    for _ in range(epochs):

        for train_x, train_y in data_loader(x, y, 16):

            # 训练数据送入模型
            y_pred = linear_regression(train_x)

            # 计算损失值
            loss = square_loss(y_pred, train_y.reshape(-1, 1)).sum()
            total_loss += loss.item()
            train_sample += len(train_y)

            # 梯度清零
            if w.grad is not None:
                w.grad.data.zero_()

            if b.grad is not None:
                b.grad.data.zero_()

            # 反向传播
            loss.backward()

            # 更新参数
            sgd(learning_rate)

            print('loss: %.10f' % (total_loss / train_sample))

        epoch_loss.append(total_loss / train_sample)


    # 绘制拟合直线
    print(coef, w.data.item())
    plt.scatter(x, y)

    x = torch.linspace(x.min(), x.max(), 1000)
    y1 = torch.tensor([v * w + 14.5 for v in x])
    y2 = torch.tensor([v * coef + 14.5 for v in x])

    plt.plot(x, y1, label='训练')
    plt.plot(x, y2, label='真实')
    plt.grid()
    plt.legend()
    plt.show()

    # 打印损失变化曲线
    plt.plot(range(epochs), epoch_loss)
    plt.title('损失变化曲线')
    plt.grid()
    plt.show()


if __name__ == '__main__':
    train()

输出结果:

loss: 1296.1368728541
loss: 2436.9092349261
loss: 2223.9904977827
loss: 2226.8678712042
loss: 2346.9798105877
loss: 2059.6439491472
loss: 1913.1633767652
loss: 2027.7041992174
loss: 1958.8407893180
loss: 1936.7456442893
loss: 1958.9884082751
loss: 1836.6773229252
...
loss: 200.2883544967
loss: 200.1779015367
loss: 200.0914139110
loss: 199.9625017734
loss: 199.8119659842
loss: 199.5681193811
loss: 199.4116960121
loss: 199.3038260199
loss: 199.2196718821
loss: 199.0935106288
loss: 198.9459334305
loss: 198.7059688806
42.38550485581797 42.887112362029214

Process finished with exit code 0

未经允许不得转载:一亩三分地 » PyTorch 实现线性回归
评论 (0)

1 + 1 =