pytorch Dropout過擬合的操作
發(fā)布日期:2022-04-04 15:56 | 文章來源:CSDN
如下所示:
import torch from torch.autograd import Variable import matplotlib.pyplot as plt torch.manual_seed(1) N_SAMPLES = 20 N_HIDDEN = 300 # training data x = torch.unsqueeze(torch.linspace(-1, 1, N_SAMPLES), 1) y = x + 0.3 * torch.normal(torch.zeros(N_SAMPLES, 1), torch.ones(N_SAMPLES, 1)) x, y = Variable(x), Variable(y) # test data test_x = torch.unsqueeze(torch.linspace(-1, 1, N_SAMPLES), 1) test_y = test_x + 0.3 * torch.normal(torch.zeros(N_SAMPLES, 1), torch.ones(N_SAMPLES, 1)) test_x = Variable(test_x, volatile=True) test_y = Variable(test_y, volatile=True) # show data # plt.scatter(x.data.numpy(), y.data.numpy(), c='magenta', s=50, alpha=0.5, label='train') # plt.scatter(test_x.data.numpy(), test_y.data.numpy(), c='cyan', s=50, alpha=0.5, label='test') # plt.legend(loc='upper left') # plt.ylim((-2.5, 2.5)) # plt.show() net_overfitting = torch.nn.Sequential( torch.nn.Linear(1, N_HIDDEN), torch.nn.ReLU(), torch.nn.Linear(N_HIDDEN, N_HIDDEN), torch.nn.ReLU(), torch.nn.Linear(N_HIDDEN, 1), ) net_dropped = torch.nn.Sequential( torch.nn.Linear(1, N_HIDDEN), torch.nn.Dropout(0.5), torch.nn.ReLU(), torch.nn.Linear(N_HIDDEN, N_HIDDEN), torch.nn.Dropout(0.5), torch.nn.ReLU(), torch.nn.Linear(N_HIDDEN, 1), ) print(net_overfitting) print(net_dropped) optimizer_ofit = torch.optim.Adam( net_overfitting.parameters(), lr = 0.01, ) optimizer_drop = torch.optim.Adam( net_dropped.parameters(), lr = 0.01, ) loss_func = torch.nn.MSELoss() plt.ion() for t in range(500): pred_ofit = net_overfitting(x) pred_drop = net_dropped(x) loss_ofit = loss_func(pred_ofit, y) loss_drop = loss_func(pred_drop, y) optimizer_ofit.zero_grad() optimizer_drop.zero_grad() loss_ofit.backward() loss_drop.backward() optimizer_ofit.step() optimizer_drop.step() if t % 10 == 0: net_overfitting.eval() net_dropped.eval() plt.cla() test_pred_ofit = net_overfitting(test_x) test_pred_drop = net_dropped(test_x) plt.scatter(x.data.numpy(), y.data.numpy(), c='magenta', s=50, alpha=0.3, label='train') plt.scatter(test_x.data.numpy(), test_y.data.numpy(), c='cyan', s=50, alpha=0.3, label='test') plt.plot(test_x.data.numpy(), test_pred_ofit.data.numpy(), 'r-', lw=3, label='overfitting') plt.plot(test_x.data.numpy(), test_pred_drop.data.numpy(), 'b--', lw=3, label='dropout(50%)') plt.text(0, -1.2, 'overfitting loss=%.4f' % loss_func(test_pred_ofit, test_y).data[0], fontdict={'size': 20, 'color': 'red'}) plt.text(0, -1.5, 'dropout loss=%.4f' % loss_func(test_pred_drop, test_y).data[0], fontdict={'size': 20, 'color': 'blue'}) plt.legend(loc='upper left'); plt.ylim((-2.5, 2.5));plt.pause(0.1) net_overfitting.train() net_dropped.train() plt.ioff() plt.show()
補充:pytorch避免過擬合-dropout丟棄法的實現(xiàn)
對于一個單隱藏層的多層感知機,其中輸入個數(shù)為4,隱藏單元個數(shù)為5,且隱藏單元的計算表達式為:
開始實現(xiàn)drop丟棄法避免過擬合
定義dropout函數(shù):
%matplotlib inline import torch import torch.nn as nn import numpy as np def dropout(X, drop_prob): X = X.float() assert 0 <= drop_prob <= 1 keep_prob = 1 - drop_prob # 這種情況下把全部元素都丟棄 if keep_prob == 0: return torch.zeros_like(X) mask = (torch.rand(X.shape) < keep_prob).float() return mask * X / keep_prob
定義模型參數(shù):
num_inputs, num_outputs, num_hiddens1, num_hiddens2 = 784, 10, 256, 256 W1 = torch.tensor(np.random.normal(0, 0.01, size=(num_inputs, num_hiddens1)), dtype=torch.float, requires_grad=True) b1 = torch.zeros(num_hiddens1, requires_grad=True) W2 = torch.tensor(np.random.normal(0, 0.01, size=(num_hiddens1, num_hiddens2)), dtype=torch.float, requires_grad=True) b2 = torch.zeros(num_hiddens2, requires_grad=True) W3 = torch.tensor(np.random.normal(0, 0.01, size=(num_hiddens2, num_outputs)), dtype=torch.float, requires_grad=True) b3 = torch.zeros(num_outputs, requires_grad=True) params = [W1, b1, W2, b2, W3, b3]
定義模型將全連接層和激活函數(shù)ReLU串起來,并對每個激活函數(shù)的輸出使用丟棄法。
分別設置各個層的丟棄概率。通常的建議是把靠近輸入層的丟棄概率設得小一點。
在這個實驗中,我們把第一個隱藏層的丟棄概率設為0.2,把第二個隱藏層的丟棄概率設為0.5。
我們可以通過參數(shù)is_training來判斷運行模式為訓練還是測試,并只在訓練模式下使用丟棄法。
drop_prob1, drop_prob2 = 0.2, 0.5 def net(X, is_training=True): X = X.view(-1, num_inputs) H1 = (torch.matmul(X, W1) + b1).relu() if is_training: # 只在訓練模型時使用丟棄法 H1 = dropout(H1, drop_prob1) # 在第一層全連接后添加丟棄層 H2 = (torch.matmul(H1, W2) + b2).relu() if is_training: H2 = dropout(H2, drop_prob2) # 在第二層全連接后添加丟棄層 return torch.matmul(H2, W3) + b3 def evaluate_accuracy(data_iter, net): acc_sum, n = 0.0, 0 for X, y in data_iter: if isinstance(net, torch.nn.Module): net.eval() # 評估模式, 這會關閉dropout acc_sum += (net(X).argmax(dim=1) == y).float().sum().item() net.train() # 改回訓練模式 else: # 自定義的模型 if('is_training' in net.__code__.co_varnames): # 如果有is_training這個參數(shù) # 將is_training設置成False acc_sum += (net(X, is_training=False).argmax(dim=1) == y).float().sum().item() else: acc_sum += (net(X).argmax(dim=1) == y).float().sum().item() n += y.shape[0] return acc_sum / n
訓練和測試模型:
num_epochs, lr, batch_size = 5, 100.0, 256 loss = torch.nn.CrossEntropyLoss() def load_data_fashion_mnist(batch_size, resize=None, root='~/Datasets/FashionMNIST'): """Download the fashion mnist dataset and then load into memory.""" trans = [] if resize: trans.append(torchvision.transforms.Resize(size=resize)) trans.append(torchvision.transforms.ToTensor()) transform = torchvision.transforms.Compose(trans) mnist_train = torchvision.datasets.FashionMNIST(root=root, train=True, download=True, transform=transform) mnist_test = torchvision.datasets.FashionMNIST(root=root, train=False, download=True, transform=transform) if sys.platform.startswith('win'): num_workers = 0 # 0表示不用額外的進程來加速讀取數(shù)據(jù) else: num_workers = 4 train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=num_workers) test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=num_workers) return train_iter, test_iter def train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params=None, lr=None, optimizer=None): for epoch in range(num_epochs): train_l_sum, train_acc_sum, n = 0.0, 0.0, 0 for X, y in train_iter: y_hat = net(X) l = loss(y_hat, y).sum() # 梯度清零 if optimizer is not None: optimizer.zero_grad() elif params is not None and params[0].grad is not None: for param in params: param.grad.data.zero_() l.backward() if optimizer is None: sgd(params, lr, batch_size) else: optimizer.step() # “softmax回歸的簡潔實現(xiàn)”一節(jié)將用到 train_l_sum += l.item() train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item() n += y.shape[0] test_acc = evaluate_accuracy(test_iter, net) print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f' % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc)) train_iter, test_iter = load_data_fashion_mnist(batch_size) train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params, lr)
以上為個人經(jīng)驗,希望能給大家一個參考,也希望大家多多支持本站。
版權聲明:本站文章來源標注為YINGSOO的內(nèi)容版權均為本站所有,歡迎引用、轉(zhuǎn)載,請保持原文完整并注明來源及原文鏈接。禁止復制或仿造本網(wǎng)站,禁止在非www.sddonglingsh.com所屬的服務器上建立鏡像,否則將依法追究法律責任。本站部分內(nèi)容來源于網(wǎng)友推薦、互聯(lián)網(wǎng)收集整理而來,僅供學習參考,不代表本站立場,如有內(nèi)容涉嫌侵權,請聯(lián)系alex-e#qq.com處理。
相關文章
下一篇: