PyTorch 提供了两种损失函数的使用方法：函数形式、模块形式，函数形式的损失函数定义在 torch.nn.functional 库中，使用时传入神经网络的预测值和目标值来计算损失，模型形式是通过构建一个模块的实例对象，然后通过模块的 forward 方法来计算损失，损失函数模型在 torch.nn 库。

在分类问题中，常用的损失函数有: BCELoss、BCEWithLogitsLoss、CrossEntropyLoss。

下面为损失函数使用代码示例：

import torch.nn as nn
import torch.nn.functional as F
import torch


# 1. 函数形式
def test01():

    # 固定随机数种子
    torch.manual_seed(0)

    inputs = torch.randint(0, 10, size=[10, 1]).float()
    target = torch.randint(0, 10, size=[10, 1]).float()

    print(inputs)
    print(target)

    # 第一个参数为网络的预测值
    # 第二个参数为目标值
    loss = F.mse_loss(inputs, target)
    print('loss: %.2f' % loss.item())


# 2. 模块形式:
def test02():

    # 固定随机数种子
    torch.manual_seed(0)

    inputs = torch.randint(0, 10, size=[10, 1]).float()
    target = torch.randint(0, 10, size=[10, 1]).float()

    # 实例化模块对象
    criterion = nn.MSELoss()
    # 计算损失
    loss = criterion(inputs, target)
    print('loss: %.2f' % loss.item())


if __name__ == '__main__':
    test01()
    test02()

程序输出结果：

tensor([[4.],
        [9.],
        [3.],
        [0.],
        [3.],
        [9.],
        [7.],
        [3.],
        [7.],
        [3.]])
tensor([[1.],
        [6.],
        [6.],
        [9.],
        [8.],
        [6.],
        [6.],
        [8.],
        [4.],
        [3.]])
loss: 17.70
loss: 17.70

2. BCELoss

BCELoss 用于单标签、多标签的二分类问题，输出和目标值的维度为：(B, C)，B 表示样本数量，C 表示类别数量，每一个 C 值表示属于某个标签的概率。

N 表示样本数量;
y 表示样本的真实标签，0 或者 1;
x 表示样本的预测为正确类别的概率;
如果样本真实标签为 1，则希望 x 的值概率越大越好。如果样本的真实标签为 0，则希望 x 的值越小越好.

N 表示样本数量;
M 表示每个样本的真实标签数量;
y 表示某个样本属于某个标签，标签值为 0 或者 1;
x 表示某个样本属于某个标签的概率;

下面为单标签和多标签计算损失的示例代码：

import torch
import torch.nn as nn
import torch.nn.functional as F


# 1. 单标签
def test01():

    # 固定随机数种子
    torch.manual_seed(0)

    # 模块方式初始化损失函数
    criterion = nn.BCELoss()

    # 构建目标标签和预测概率值
    y_true = torch.randint(0, 2, size=[10, 1]).float()
    y_pred = torch.sigmoid(torch.randn(10, 1)).float()

    print(y_true)
    print(y_pred)

    # 输入形状: (batch_size, pred_proba)
    loss = criterion(y_pred, y_true)
    print('loss: %.2f' % loss.item())


# 2. 多标签
def test02():

    # 固定随机数种子
    torch.manual_seed(0)

    # 模块方式初始化损失函数
    criterion = nn.BCELoss()

    # 构建目标标签和预测概率值
    y_true = torch.randint(0, 2, size=[10, 2]).float()
    y_pred = torch.sigmoid(torch.randn(10, 2)).float()

    print(y_true)
    print(y_pred)

    # 输入形状: (batch_size, pred_proba)
    loss = criterion(y_pred, y_true)
    print('loss: %.2f' % loss.item())


if __name__ == '__main__':
    test01()
    print('-' * 30)
    test02()

程序输出结果：

tensor([[0.],
        [1.],
        [1.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.]])
tensor([[0.5400],
        [0.3529],
        [0.3137],
        [0.7431],
        [0.4350],
        [0.7440],
        [0.6025],
        [0.6984],
        [0.3044],
        [0.3111]])
loss: 0.87
------------------------------
tensor([[0., 1.],
        [1., 0.],
        [1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 0.],
        [0., 1.],
        [0., 0.],
        [0., 0.],
        [0., 1.]])
tensor([[0.6454, 0.1744],
        [0.4155, 0.8645],
        [0.4462, 0.3224],
        [0.6371, 0.5645],
        [0.4566, 0.3365],
        [0.7187, 0.6198],
        [0.7691, 0.5211],
        [0.2315, 0.4988],
        [0.3733, 0.4239],
        [0.1707, 0.8464]])
loss: 0.81

2. BCEWithLogitsLoss

BCEWithLogitsLoss 和 BCELoss 一样用于单标签、或者多标签的二分类问题，它相当于 Sigmoid 和 BCELoss 的结合，对网络的输出结果先进行 Sigmoid 计算将预测值的映射到 (0, 1) 之间，再对其使用 BCELoss 计算损失。

由此可见，当我们的神经网络最后一层使用的是 Sigmoid 时，则直接使用 BCELoss 计算损失，否则使用 BCEWithLogitsLoss 损失函数。

BCEWithLogitsLoss 的计算公式中只是比 BCELoss 多个一个 sigmoid 函数计算。

下面的示例代码仅仅把 sigmoid 函数去除，其他并没有改变：

import torch
import torch.nn as nn
import torch.nn.functional as F


# 1. 单标签
def test01():

    # 固定随机数种子
    torch.manual_seed(0)

    # 模块方式初始化损失函数
    criterion = nn.BCEWithLogitsLoss()

    # 构建目标标签和预测概率值
    y_true = torch.randint(0, 2, size=[10, 1]).float()
    y_pred = torch.randn(10, 1).float()

    print(y_true)
    print(y_pred)

    # 输入形状: (batch_size, pred_proba)
    loss = criterion(y_pred, y_true)
    print('loss: %.2f' % loss.item())


# 2. 多标签
def test02():

    # 固定随机数种子
    torch.manual_seed(0)

    # 模块方式初始化损失函数
    criterion = nn.BCEWithLogitsLoss()

    # 构建目标标签和预测概率值
    y_true = torch.randint(0, 2, size=[10, 2]).float()
    y_pred = torch.randn(10, 2)

    print(y_true)
    print(y_pred)

    # 输入形状: (batch_size, pred_proba)
    loss = criterion(y_pred, y_true)
    print('loss: %.2f' % loss.item())


if __name__ == '__main__':
    test01()
    print('-' * 30)
    test02()

程序输出结果：

tensor([[0.],
        [1.],
        [1.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.]])
tensor([[ 0.1604],
        [-0.6065],
        [-0.7831],
        [ 1.0622],
        [-0.2613],
        [ 1.0667],
        [ 0.4159],
        [ 0.8396],
        [-0.8265],
        [-0.7949]])
loss: 0.87
------------------------------
tensor([[0., 1.],
        [1., 0.],
        [1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 0.],
        [0., 1.],
        [0., 0.],
        [0., 0.],
        [0., 1.]])
tensor([[ 0.5988, -1.5551],
        [-0.3414,  1.8530],
        [-0.2159, -0.7425],
        [ 0.5627,  0.2596],
        [-0.1740, -0.6787],
        [ 0.9383,  0.4889],
        [ 1.2032,  0.0845],
        [-1.2001, -0.0048],
        [-0.5181, -0.3067],
        [-1.5810,  1.7066]])
loss: 0.81

3. CrossEntropyLoss

CrossEntropyLoss 主要用于多分类问题，输出和目标的维度是 (batch, C) ，batch 表示样本数量，C 表示类别数量。假设：某个样本预测的类别有 10 个，神经网络对 10 个类别都得到了 logits，CrossEntropyLoss 会对这 10 个 logits 计算 softmax，即: 属于某个类别的概率，然后找到最大概率的索引作为预测类别.

下列为使用示例代码：

import torch
import torch.nn as nn
import torch.nn.functional as F


# 1. 单标签
def test01():

    # 固定随机数种子
    torch.manual_seed(0)

    # 模块方式初始化损失函数
    criterion = nn.CrossEntropyLoss()

    # 程序表示10个样本的标签为 0-7
    y_true = torch.randint(0, 4, size=[10,])
    # 程序表示10个样本，每个样本都预测为8个标签的logits
    y_pred = torch.randn(10, 4).float()

    print(y_true)
    print(y_pred)

    # 输入形状: (batch_size, pred_proba)
    loss = criterion(y_pred, y_true)
    print('loss: %.2f' % loss.item())


# 2. 多标签
def test02():

    # 固定随机数种子
    torch.manual_seed(0)

    # 模块方式初始化损失函数
    criterion = nn.CrossEntropyLoss()

    # 程序表示10个样本，每个样本有2个目标标签
    y_true = torch.randint(0, 4, size=[5, 2])
    # 程序表示10个样本，每个样本预测8个类别，每个类别预测2个标签的logits
    y_pred = torch.randn(5, 4, 2).float()

    print(y_true)
    print(y_pred)

    # 输入形状: (batch_size, pred_proba)
    loss = criterion(y_pred, y_true)
    print('loss: %.2f' % loss.item())

if __name__ == '__main__':
    test01()
    print('-' * 30)
    test02()

程序的输出结果：

tensor([0, 3, 1, 0, 3, 3, 3, 3, 1, 3])
tensor([[ 0.4913, -0.2041, -0.0885,  0.5239],
        [-0.6659,  0.8504, -1.3527, -1.6959],
        [ 0.7854,  0.9928, -0.1932, -0.3090],
        [ 0.5026, -0.8594,  0.7502, -0.5855],
        [ 1.4437,  0.2660,  0.1665,  0.8744],
        [-0.1435, -0.1116, -0.6136,  0.0316],
        [ 1.0554,  0.1778, -0.2303, -0.3918],
        [ 0.5433, -0.3952,  0.2055, -0.4503],
        [ 1.5210,  3.4105, -1.5312, -1.2341],
        [ 1.8197, -0.5515, -1.3253,  0.1886]])
loss: 1.45
------------------------------
tensor([[0, 3],
        [1, 0],
        [3, 3],
        [3, 3],
        [1, 3]])
tensor([[[ 0.4913, -0.2041],
         [-0.0885,  0.5239],
         [-0.6659,  0.8504],
         [-1.3527, -1.6959]],

        [[ 0.7854,  0.9928],
         [-0.1932, -0.3090],
         [ 0.5026, -0.8594],
         [ 0.7502, -0.5855]],

        [[ 1.4437,  0.2660],
         [ 0.1665,  0.8744],
         [-0.1435, -0.1116],
         [-0.6136,  0.0316]],

        [[ 1.0554,  0.1778],
         [-0.2303, -0.3918],
         [ 0.5433, -0.3952],
         [ 0.2055, -0.4503]],

        [[ 1.5210,  3.4105],
         [-1.5312, -1.2341],
         [ 1.8197, -0.5515],
         [-1.3253,  0.1886]]])
loss: 2.14

CrossEntropyLoss 等价于使用 LogSoftmax 和 NLLLoss 的组合。

BCELoss、CrossEntropyLoss

2. BCELoss

2. BCEWithLogitsLoss

3. CrossEntropyLoss

文章目录