// 从1亿个数中找到最大的1000个数
void find_max_data(int *source_data, int length, int k)
{
int count = 0;
multiset<int> set;
multiset<int>::iterator it;
for(int i = 0; i < length; i++)
{
if(count < k)
{
set.insert(source_data[i]);
}
else
{
it = set.begin();
if(*it < source_data[i])
{
set.erase(it);
set.insert(source_data[i]);
}
}
count++;
}
it = set.begin();
// 打印
for (; it != set.end(); it++)
{
cout<<*it<<endl;
}
}
这种情况当然是使用堆排序了,一次构造堆+999次重建堆,在一亿个元素的情况下时间复杂度接近o(n)
/*
一亿个数找最大的1000个数,要求效率高占用内存少。
*/
#include <array>
(2816)#include <algorithm>
using namespace std;
const int NUM_SET_SIZE = 100000000;
const int MAX_AMOUNT = 1000;
array<int, MAX_AMOUNT> find_max_data(array<int, NUM_SET_SIZE> num_set)
{
array<int, MAX_AMOUNT> result;
make_heap(num_set.begin(), num_set.end()); // 构造堆
for (int i = 0; i < MAX_AMOUNT; i++)
// 将last - 1处的值与first处进行交换,并使[first, last - 2]处成为一个有效堆
pop_heap(num_set.begin(), num_set.end() - i);
auto riter = num_set.rbegin();
for (int i = 0; i < MAX_AMOUNT; i++)
result[i] = *riter++;
return result;
}
/* 基数排序 */ #include <iostream> #include <stdlib.h> #include <ctime> #include <algorithm> #include <cassert> #include "windows.h" using namespace std; #define MAXN 100000000 #define MAXM 1000 void find_max_data0( int dest[], int m, int src[], int n ) { fill( dest, dest + n, -INT_MAX ); for ( int i = 0; i < n; i++ ) { for ( int j = 0; j < m; j++ ) { if ( src[i] > dest[j] ) { dest[j] = src[i]; break; } } } } void find_max_data1( int dest[], int m, int src[], int n ) { make_heap( src, src + n ); for( int i = 0; i < m; i++ ) { dest[i] = src[0]; pop_heap( src, src + n-- ); } } void radix_sort( int *src, int n ) { int *tmp = new int[MAXN]; int cnt[256]; for( int i = 0; i < 4; i++ ) { memset( cnt, 0, sizeof(cnt) ); for( int j = 0; j < MAXN; j++ ) { cnt[ ( src[j] >> (8 * i) ) & 0xff ]++; } for ( int j = 1; j < 256; j++ ) { cnt[j] = cnt[j - 1] + cnt[j]; } for( int j = MAXN - 1; j >= 0; j-- ) { tmp[ -- cnt[ ( src[j] >> (8 * i) ) & 0xff ] ] = src[j]; swap( src, tmp ); } } delete [] tmp; } void find_max_data2( int dest[], int m, int src[], int n ) { radix_sort( src, n ); int j = 0; for( int i = n - 1; i >= n - m; i-- ) { dest[j++] = src[i]; } } int src[MAXN]; int main() { ( (unsigned int) time( NULL ) ); src[i] = abs( i * 6516187 ); DWORD t1, t2; t1 = GetTickCount(); find_max_data2( dest2, MAXM, src, MAXN ); t2 = GetTickCount(); printf( "find_max_data2 time cost:%dn", t2 - t1 ); for( int i = 0; i < MAXM; i++ ) { printf( "n" ); return 0; } }