第一行:N(目标簇数,整数)
第二行:M(站点数,整数)
接下来 M 行:每行两个整数 x y(0≤x,y≤1000)
共输出 N−1 行。第 k 行为完成第 k 次二分后的各簇规模(降序),以空格分隔
3 5 0 0 1 0 10 0 11 0 12 0
3 2 2 2 1
首次把点大致按中间位置分成 {0,1} 与 {10,11,12},规模为 2 与 3;第二次优先二分规模更大且 SSE 更大的簇 {10,11,12} 为 {10} 与 {11,12},此时三个簇规模降序为 2 2 1。本题由牛友@Charles 整理上传
import math import copy def get_dis(coord1, coord2): return math.sqrt((coord1[0] - coord2[0])**2 + (coord1[1] - coord2[1])**2) def SSE(center, cluster): dis = 0.0 for p in cluster: dis += get_dis(center, p) ** 2 return dis def center_coord(cluster): n = len(cluster) x = sum(p[0] for p in cluster) y = sum(p[1] for p in cluster) return [x / n, y / n] def Kmeans_split(cluster): centers = [] temp_coords = copy.deepcopy(cluster) temp_coords.sort(key=lambda x: (x[0], x[1], x[2])) centers.append(temp_coords[0][:2]) temp_coords.sort(key=lambda x: (-x[0], -x[1], x[2])) centers.append(temp_coords[0][:2]) new_clusters = [[], []] for _ in range(1000): new_clusters = [[], []] for p in cluster: best_center = -1 best_dis = 1e9 for idx, c in enumerate(centers): dis = get_dis(c, p) if dis < best_dis: best_center = idx best_dis = dis new_clusters[best_center].append(p) move = 0.0 for idx, new_cluster in enumerate(new_clusters): old_center = centers[idx] centers[idx] = center_coord(new_cluster) move += get_dis(old_center, centers[idx]) if move < 1e-6: break return centers[0], new_clusters[0], centers[1], new_clusters[1] N = int(input().strip()) M = int(input().strip()) coords = [] for i in range(M): coords.append(list(map(int, input().split())) + [i]) # init centers = [center_coord(coords)] clusters = [coords] steps = N - 1 while steps > 0: split_cluster_idx = 0 idx = 0 split_sse = 0.0 for center, cluster in zip(centers, clusters): sse = SSE(center, cluster) if sse > split_sse: split_cluster_idx = idx split_sse = sse idx += 1 new_centers = [] new_clusters = [] idx = 0 for center, cluster in zip(centers, clusters): if idx == split_cluster_idx: idx += 1 continue new_centers.append(center) new_clusters.append(cluster) idx += 1 center1, cluster1, center2, cluster2 = Kmeans_split(clusters[split_cluster_idx]) new_centers.append(center1) new_centers.append(center2) new_clusters.append(cluster1) new_clusters.append(cluster2) centers = new_centers clusters = new_clusters sizes = [len(cluster) for cluster in clusters] sizes.sort(reverse=True) line = "" for idx, number in enumerate(sizes): line += str(number) if idx != len(sizes) - 1: line += " " print(line) steps -= 1
#include <iostream>
#include <queue>
#include <vector>
#include <map>
#include <algorithm>
using namespace std;
struct group {
double SSE;
vector<int>members;//保证升序
group(double SSE, const vector<int>& members): SSE(SSE), members(members) {}
bool operator<(const group& other)const {
if (members.size() == other.members.size()) {
return SSE < other.SSE;
}
return members.size() < other.members.size();
}
};
vector<pair<int, int>>a;
bool cmp(const int p1, const int p2) {
if (a[p1].first == a[p2].first) {
return a[p1].second < a[p2].second;
}
return a[p1].first < a[p2].first;
}
long long dist2(int i, int j) {
return (a[i].first - a[j].first) * (a[i].first - a[j].first) +
(a[i].second - a[j].second) * (a[i].second - a[j].second);
}
double getSSE(vector<int>& mem) {
double x = 0;
double y = 0;
for (int i : mem) {
x += a[i].first;
y += a[i].second;
}
x /= mem.size();
y /= mem.size();
double SSE = 0;
for (int i : mem) {
SSE += (a[i].first - x) * (a[i].first - x) + (a[i].second - y) *
(a[i].second - y);
}
return SSE;
}
int main() {
int n, m;
cin >> n >> m;
a.resize(m);
for (int i = 0; i < m; i++) {
cin >> a[i].first >> a[i].second;
}
//交换顺序不影响结果
sort(a.begin(), a.end());
vector<int>tmp;
for (int i = 0; i < m; i++) {
tmp.push_back(i);
}
priority_queue<group>pq;
map<int, int> flag;
flag[m]++;
pq.push(group(0, tmp));
vector<vector<int>>ans;
while (pq.size() < n) {
group now = pq.top();
pq.pop();
int a1 = now.members[0];
int a2 = now.members[now.members.size() - 1];
flag[now.members.size()]--;
if (flag[now.members.size()] == 0) {
flag.erase(now.members.size());
}
vector<int>mem1, mem2;
mem1.push_back(a1);
for (int i = 1; i < now.members.size() - 1; i++) {
long long d1 = dist2(i, a1);
long long d2 = dist2(i, a2);
if (d1 < d2) {
mem1.push_back(i);
} else {
mem2.push_back(i);
}
}
mem2.push_back(a2);
pq.push(group(getSSE(mem1), mem1));
pq.push(group(getSSE(mem2), mem2));
flag[mem1.size()]++;
flag[mem2.size()]++;
vector<int>tmp;//从小到大
for (auto& pair : flag) {
for (int i = 0; i < pair.second; i++)
tmp.push_back(pair.first);
}
ans.push_back(tmp);
}
for (int i = 0; i < ans.size(); i++) {
//倒序输出
for (int j = ans[i].size() - 1; j >= 0; j--) {
cout << ans[i][j] << " ";
}
cout << "\n";
}
} #include <bits/stdc++.h>
using namespace std;
const int MAXP = 100010; // 最大点数
const int MAXC = 50; // 最大簇数(N ≤ 50假设)
double px[MAXP], py[MAXP]; // 点坐标
int cluster_members[MAXC][MAXP]; // 每簇的成员点下标
int cluster_size[MAXC]; // 每簇成员数
double center_x[MAXC], center_y[MAXC], sse[MAXC]; // 簇心 & SSE
int N, M; // 簇目标数量 & 点数量
void compute_center_sse(int cid) {
double sumx = 0, sumy = 0;
for (int i = 0; i < cluster_size[cid]; ++i) {
int pid = cluster_members[cid][i];
sumx += px[pid];
sumy += py[pid];
}
center_x[cid] = sumx / cluster_size[cid];
center_y[cid] = sumy / cluster_size[cid];
double total_sse = 0;
for (int i = 0; i < cluster_size[cid]; ++i) {
int pid = cluster_members[cid][i];
double dx = px[pid] - center_x[cid];
double dy = py[pid] - center_y[cid];
total_sse += dx*dx + dy*dy;
}
sse[cid] = total_sse;
}
// 二分 K-means 分裂簇 cid -> 新簇 cid1, cid2
bool split_cluster(int cid, int cid1, int cid2) {
if (cluster_size[cid] <= 1) return false;
// 找 x最小和最大点
double minx = px[cluster_members[cid][0]], maxx = minx;
int min_id = cluster_members[cid][0], max_id = min_id;
for (int i = 1; i < cluster_size[cid]; ++i) {
int pid = cluster_members[cid][i];
if (px[pid] < minx) { minx = px[pid]; min_id = pid; }
if (px[pid] > maxx) { maxx = px[pid]; max_id = pid; }
}
double cx1 = px[min_id], cy1 = py[min_id];
double cx2 = px[max_id], cy2 = py[max_id];
for (int it = 0; it < 1000; ++it) {
cluster_size[cid1] = 0;
cluster_size[cid2] = 0;
// 分配
for (int i = 0; i < cluster_size[cid]; ++i) {
int pid = cluster_members[cid][i];
double d1 = (px[pid]-cx1)*(px[pid]-cx1)+(py[pid]-cy1)*(py[pid]-cy1);
double d2 = (px[pid]-cx2)*(px[pid]-cx2)+(py[pid]-cy2)*(py[pid]-cy2);
if (d1 <= d2) cluster_members[cid1][cluster_size[cid1]++] = pid;
else cluster_members[cid2][cluster_size[cid2]++] = pid;
}
if (cluster_size[cid1] == 0 || cluster_size[cid2] == 0) return false;
// 更新簇心
double sumx1 = 0, sumy1 = 0;
for (int i = 0; i < cluster_size[cid1]; ++i) {
int pid = cluster_members[cid1][i];
sumx1 += px[pid]; sumy1 += py[pid];
}
double new_cx1 = sumx1 / cluster_size[cid1];
double new_cy1 = sumy1 / cluster_size[cid1];
double sumx2 = 0, sumy2 = 0;
for (int i = 0; i < cluster_size[cid2]; ++i) {
int pid = cluster_members[cid2][i];
sumx2 += px[pid]; sumy2 += py[pid];
}
double new_cx2 = sumx2 / cluster_size[cid2];
double new_cy2 = sumy2 / cluster_size[cid2];
double move = sqrt((new_cx1-cx1)*(new_cx1-cx1)+(new_cy1-cy1)*(new_cy1-cy1))
+ sqrt((new_cx2-cx2)*(new_cx2-cx2)+(new_cy2-cy2)*(new_cy2-cy2));
cx1 = new_cx1; cy1 = new_cy1;
cx2 = new_cx2; cy2 = new_cy2;
if (move <= 1e-6) break;
}
compute_center_sse(cid1);
compute_center_sse(cid2);
return true;
}
int main() {
ios::sync_with_stdio(false);
cin.tie(nullptr);
cin >> N >> M;
for (int i = 0; i < M; ++i) {
cin >> px[i] >> py[i];
}
int cluster_count =1;
cluster_size[0] = M;
for (int i = 0; i < M; ++i) cluster_members[0][i] = i;
compute_center_sse(0);
while (cluster_count < N) {
// 找 SSE 最大的簇
int best_idx = 0;
for (int i = 1; i < cluster_count; ++i) {
if (sse[i] > sse[best_idx]) best_idx = i;
}
int new_cid1 = cluster_count++;
int new_cid2 = cluster_count++;
if (!split_cluster(best_idx, new_cid1, new_cid2)) {
cluster_count -= 2; // 分裂失败
break;
}
// 删除 old cluster
cluster_size[best_idx] = cluster_size[new_cid1];
for (int i = 0; i < cluster_size[new_cid1]; ++i) {
cluster_members[best_idx][i] = cluster_members[new_cid1][i];
}
center_x[best_idx] = center_x[new_cid1];
center_y[best_idx] = center_y[new_cid1];
sse[best_idx] = sse[new_cid1];
// 最后一个簇改成 new_cid2
cluster_size[new_cid1] = cluster_size[new_cid2];
for (int i = 0; i < cluster_size[new_cid2]; ++i) {
cluster_members[new_cid1][i] = cluster_members[new_cid2][i];
}
center_x[new_cid1] = center_x[new_cid2];
center_y[new_cid1] = center_y[new_cid2];
sse[new_cid1] = sse[new_cid2];
cluster_count--; // 实际增加了1簇
// 输出簇大小降序
int sizes[MAXC];
for (int i = 0; i < cluster_count; ++i) sizes[i] = cluster_size[i];
sort(sizes, sizes+cluster_count, greater<int>());
for (int i = 0; i < cluster_count; ++i) {
cout << sizes[i] << (i+1 == cluster_count ? '\n' : ' ');
}
}
return 0;
}