Architectures¶
Common¶
class NeuralNet(nn.Module):
    def __init__(self, init_data, hidden_layers):
        # init network architecture
        pass
    def forward(self, x):
        return self.network(self.reshape(x)).squeeze()
    # Helper Functions
    def predict_logits(self, X):
        return self.forward(X)
    def predict_proba(self, X):
        return torch.softmax(self.predict_logits(X), dim=0)
    def predict_from_proba(self, proba)
        return proba.argmax(axis=1)
    def predict(self, X):
        return self.predict_from_proba(self.predict_proba(X))
One-vs-Rest Classifier¶
class NeuralNet(nn.Module):
    def __init__(self, init_data, hidden_layers):
        super().__init__()
        for x, y in DataLoader(init_data):
            self.input_size = x.shape[-1]
            self.output_size = y.shape[-1]
            break
        output_layer = nn.LazyLinear(self.output_size) # output layer
        layers = (
            # [input_layer] +
            hidden_layers +
            [output_layer]
        )
        self.network = nn.Sequential(
            *layers
        )
        # init lazy layers
        self.forward(x)
    def reshape(self, x):
        # batch_size, no_of_channels, width, height
        return x.view(x.shape[0], 1, x.shape[1], x.shape[2])
    def forward(self, x):
        return self.network(self.reshape(x)).squeeze()
One vs Rest with \(k-1\) Classifiers¶
- Advantage: Will save compute if lots of neurons in pre-output layer, which are connected to output layer
- Disadvantage: Looks confusing
class NeuralNet(nn.Module):
    def __init__(self, init_data, hidden_layers):
        super().__init__()
        for x, y in DataLoader(init_data):
            self.input_size = x.shape[-1]
            self.output_size = y.shape[-1]
            break
        output_layer = nn.LazyLinear(self.output_size - 1) # output layer
        layers = (
            # [input_layer] +
            hidden_layers +
            [output_layer]
        )
        self.network = nn.Sequential(
            *layers
        )
        # init lazy layers
        self.forward(x)
    def reshape(self, x):
        # batch_size, no_of_channels, width, height
        return x.view(x.shape[0], 1, x.shape[1], x.shape[2])
    def forward(self, x):
        logits_except_last = self.network(self.reshape(x)).squeeze()
        logit_last = torch.log(1 - torch.exp(logits_except_last).sum())
        logits = (logits_except_last, logit_last.view(-1))
        return logits
Testing logic
# Given logits for the first two classes
probs_except_last = torch.tensor([0.1, 0.2])
logits_except_last = probs_except_last.log()
# Compute the logit for the last class
logit_last = torch.log(1 - torch.exp(logits_except_last).sum())
# Combine all logits
logits = torch.cat((logits_except_last, logit_last.view(-1)))
# Compute softmax probabilities
probabilities = torch.softmax(logits, dim=0)
# Verify that probabilities sum to 1
print(f"{probs_except_last = }")
print(f"{logits_except_last = }")
print()
print(f"{probabilities = }")
print(f"{logits = }")
probs_except_last = tensor([0.1000, 0.2000])
logits_except_last = tensor([-2.3026, -1.6094])
probabilities = tensor([0.1000, 0.2000, 0.7000])
logits = tensor([-2.3026, -1.6094, -0.3567])