好的,我写了一些代码来模拟你的问题。我发现了同样的问题,因此简化了问题。当我修改标签函数以始终选择最大半径而不考虑任意规则时,我发现它仍然无法计算出来,而是会收敛到预测 5 个圆中的每一个的 0.2。看来,如果您不在输入处对圆圈进行排序,则网络无法区分它们。如果您考虑通过密集连接的网络的流量,这是有道理的。如果我们在输入之前尝试对圆圈进行排序,可能会取得一些成功。
import numpy as np
from tqdm import tqdm
N_CIRCLES = 5
CENTRE_RANGE = 1
RMIN, RMAX = 0.1, 0.5
THRESHOLD = 0.45
def label(x):
# If above threshold, then choose largest circle
if np.any(x[:5] > THRESHOLD):
return np.argmax(x[:5])
# Else, choose the circle nearest to (0, 0)
return np.argmax([np.linalg.norm(x[i:i+2]) for i in range(N_CIRCLES, 3*N_CIRCLES, 2)])
def generate_sample():
# {r0, r1, r2, r3, r4, x0, y0, x1, y1, x2, y2, x3, y3, x4, y4}
x = np.concatenate((np.random.uniform(RMIN, RMAX, N_CIRCLES),
np.random.uniform(-CENTRE_RANGE, CENTRE_RANGE, 2*N_CIRCLES)))
return x, label(x)
def generate_samples(n):
x = np.zeros((n, N_CIRCLES*3))
y = np.zeros(n)
for i in range(n):
x[i], y[i] = generate_sample()
return x, y
import torch
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
super().__init__()
# Kernel size 5
self.fc1 = nn.Linear(3*N_CIRCLES, 32)
self.fc2 = nn.Linear(32, 64)
self.fc3 = nn.Linear(64, N_CIRCLES)
def forward(self, x):
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = F.relu(self.fc3(x))
return F.softmax(x, dim=1)
net = Net()
import torch.optim as optim
optimizer = optim.Adam(net.parameters(), lr=0.001)
loss_function = nn.MSELoss()
BATCH_SIZE = 100
EPOCHS = 1_000
losses = []
for epoch in tqdm(range(EPOCHS)):
X, y = generate_samples(BATCH_SIZE)
y = np.array(y, dtype=int)
ohe = np.zeros((y.size, y.max()+1))
ohe[np.arange(y.size), y] = 1
X = torch.Tensor(X).view(-1, 3*N_CIRCLES)
y = torch.Tensor(ohe)
net.zero_grad()
yhat = net(X)
loss = loss_function(yhat, y)
loss.backward()
optimizer.step()
losses.append(float(loss.detach().numpy()))
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
fig, ax = plt.subplots(figsize=(20, 10))
ax.plot(losses)
plt.show()
```