6.1. Layers and Modules

narcissuskid
发布于 2023-08-23 / 198 阅读 / 0 评论 / 0 点赞

6.1. Layers and Modules

github:
https://github.com/pandalabme/d2l/tree/main/exercises

1. What kinds of problems will occur if you change MySequential to store modules in a Python list?

If you change MySequential to store modules in a Python list instead of using the nn.Sequential container, you might get ValueError: optimizer got an empty parameter list error, because the nn.Sequential container automatically tracks and registers the parameters of each module added to it. If you use a Python list, you’ll need to manually manage parameter registration, which can lead to errors if not done correctly.

import torch.nn as nn
import torch
import sys
sys.path.append('/home/jovyan/work/d2l_solutions/notebooks/exercises/d2l_utils/')
import d2l
import warnings
warnings.filterwarnings("ignore")

class MySequential(d2l.Module):
    def __init__(self, *args):
        super().__init__()
        self.modules = []
        for idx, module in enumerate(args):
            self.modules.append(module)
            
    def forward(self, X):
        for module in self.modules:
            X = module(X)
        return X
    
class MySequentialMLP(d2l.Classifier):
    def __init__(self, num_outputs, num_hiddens, lr):
        super().__init__()
        self.save_hyperparameters()
        layers = [nn.Flatten()]
        for num in num_hiddens:
            layers.append(nn.LazyLinear(num))
            layers.append(nn.ReLU())
        layers.append(nn.LazyLinear(num_outputs))
        self.net = MySequential(*layers)
hparams = {'num_hiddens':[256],'num_outputs':10,'lr':0.1}
model = d2l.MulMLP(**hparams)
data = d2l.FashionMNIST(batch_size=256)
trainer = d2l.Trainer(max_epochs=10)
trainer.fit(model, data)
(86.71588633954525, 16.023116797208786)

svg

model = MySequentialMLP(**hparams)
trainer = d2l.Trainer(max_epochs=10)
trainer.fit(model, data)
---------------------------------------------------------------------------

ValueError                                Traceback (most recent call last)

Cell In[24], line 3
      1 model = MySequentialMLP(**hparams)
      2 trainer = d2l.Trainer(max_epochs=10)
----> 3 trainer.fit(model, data)


File ~/work/d2l_solutions/notebooks/exercises/d2l_utils/d2l.py:201, in Trainer.fit(self, model, data)
    199 self.prepare_data(data)
    200 self.prepare_model(model)
--> 201 self.optim = model.configure_optimizers()
    202 self.epoch = 0
    203 self.train_batch_idx = 0


File ~/work/d2l_solutions/notebooks/exercises/d2l_utils/d2l.py:346, in Classifier.configure_optimizers(self)
    345 def configure_optimizers(self):
--> 346     return torch.optim.SGD(self.parameters(), lr=self.lr)


File ~/.local/lib/python3.11/site-packages/torch/optim/sgd.py:27, in SGD.__init__(self, params, lr, momentum, dampening, weight_decay, nesterov, maximize, foreach, differentiable)
     25 if nesterov and (momentum <= 0 or dampening != 0):
     26     raise ValueError("Nesterov momentum requires a momentum and zero dampening")
---> 27 super().__init__(params, defaults)


File ~/.local/lib/python3.11/site-packages/torch/optim/optimizer.py:187, in Optimizer.__init__(self, params, defaults)
    185 param_groups = list(params)
    186 if len(param_groups) == 0:
--> 187     raise ValueError("optimizer got an empty parameter list")
    188 if not isinstance(param_groups[0], dict):
    189     param_groups = [{'params': param_groups}]


ValueError: optimizer got an empty parameter list

2. Implement a module that takes two modules as an argument, say net1 and net2 and returns the concatenated output of both networks in the forward propagation. This is also called a parallel module.

class ConcatLayer(d2l.Classifier, d2l.HyperParameters):
    def __init__(self, net1, net2, lr):
        super().__init__()
        self.save_hyperparameters()
        
    def forward(self, X):
        X1 = self.net1(X)
        X2 = self.net2(X)
        return torch.cat((X1,X2),dim=-1)

hparams1 = {'num_hiddens':[256],'num_outputs':5,'lr':0.1}
hparams2 = {'num_hiddens':[256],'num_outputs':5,'lr':0.1}
model = ConcatLayer(d2l.MulMLP(**hparams1),d2l.MulMLP(**hparams2),lr=0.1)
trainer = d2l.Trainer(max_epochs=10)
trainer.fit(model, data)

svg

3. Assume that you want to concatenate multiple instances of the same network. Implement a factory function that generates multiple instances of the same module and build a larger network from it.

class ConcatMulMLP(d2l.MulMLP):
    def __init__(self, num_outputs, num_hiddens, lr, concats):
        super().__init__(num_outputs, num_hiddens, lr)
        self.save_hyperparameters()
        
    def forward(self, X):
        return torch.cat([self.net[:i+1](X) for i in self.concats],dim=-1)
    
hparams = {'num_hiddens':[16,8,2],'num_outputs':5,'lr':0.1,'concats':[1,2]}
model = ConcatMulMLP(**hparams)
trainer = d2l.Trainer(max_epochs=3)
trainer.fit(model, data)

Reference

  1. https://d2l.ai/chapter_multilayer-perceptrons/kaggle-house-price.html

评论