计算规则(均为行优先展平与输出,四舍五入保留 2 位小数):
输出共 4 行:
1) y_pred(K 个数)
2) MSE(1 个数)
3) 更新后的 W_mlp(D×D 个数,行优先)
4) 更新后的 W_cls(D×K 个数,行优先)
1,2,3,0.3 0.5,1.5,2.0 1.0,2.0 1.0,0.0,0.0,1.0 1.0,0.0,0.0,0.0,1.0,1.0
1.00,2.00,2.00 0.17 0.90,-0.10,-0.20,0.80 0.90,-0.10,0.00,-0.20,0.80,1.00
h_mean = [1,2];y_pred = [1,2,2];MSE = 0.17。
g = (2/3)*([0.5,0.5,0]) = [0.33,0.33,0.00];据此求两矩阵梯度并以 η=0.3 更新后得到上述权重。
本题由牛友@Charles 整理上传
import sys
import numpy as np
def main():
# 读取输入
line1 = sys.stdin.readline().strip().split(',')
L, D, K, eta = int(line1[0]), int(line1[1]), int(line1[2]), float(line1[3])
# 真实标签
y = np.array(list(map(float, sys.stdin.readline().strip().split(','))))
# 输入序列
X_flat = list(map(float, sys.stdin.readline().strip().split(',')))
X = np.array(X_flat).reshape(L, D)
# 映射矩阵
W_mlp_flat = list(map(float, sys.stdin.readline().strip().split(',')))
W_mlp = np.array(W_mlp_flat).reshape(D, D)
# 分类矩阵
W_cls_flat = list(map(float, sys.stdin.readline().strip().split(',')))
W_cls = np.array(W_cls_flat).reshape(D, K)
# 前向传播
H = X @ W_mlp # L x D
h_mean = np.mean(H, axis=0) # 1 x D
y_pred = h_mean @ W_cls # 1 x K
# 计算损失
MSE = np.mean((y_pred - y) ** 2)
# 反向传播
g = (2.0 / K) * (y_pred - y) # 1 x K
# 计算梯度
grad_W_cls = np.outer(h_mean, g) # D x K
u = g @ W_cls.T # 1 x D
x_mean = np.mean(X, axis=0) # 1 x D
grad_W_mlp = np.outer(x_mean, u) # D x D
# 更新参数
W_mlp_new = W_mlp - eta * grad_W_mlp
W_cls_new = W_cls - eta * grad_W_cls
# 输出结果(四舍五入到2位小数)
y_pred_str = ','.join([f"{val:.2f}" for val in y_pred])
print(y_pred_str)
print(f"{MSE:.2f}")
W_mlp_str = ','.join([f"{val:.2f}" for val in W_mlp_new.flatten()])
print(W_mlp_str)
W_cls_str = ','.join([f"{val:.2f}" for val in W_cls_new.flatten()])
print(W_cls_str)
if __name__ == "__main__":
main()