首页 > 试题广场 >

压缩数据

[编程题]压缩数据

热度指数：280 时间限制：C/C++ 1秒，其他语言2秒空间限制：C/C++ 32M，其他语言64M
算法知识视频讲解

霍夫曼编码是一种被广泛应用而且非常有效的数据压缩技术，它使用一张字符出现频度表，根据它来构造一种将每个字符表示成二进制串的最优方式，一般可压缩掉20%~90%。
|---------+-----+-----+-----+-----+------+------|
| letter | a | b | c | d | e | f |
|---------+-----+-----+-----+-----+------+------|
| count | 45 | 13 | 12 | 16 | 9 | 5 |
| code | 000 | 001 | 010 | 011 | 100 | 101 |
| huffman | 0 | 101 | 100 | 111 | 1101 | 1100 |
|---------+-----+-----+-----+-----+------+------|
如上表所示，假设一篇文章包含45个字母a、13个字母b……，如果用长度相同的编码000、001等去存储这些信息，则需要（45＋13＋12＋16＋9＋5）×3＝300位空间。
但如果换成可变长编码，即编码长度不固定的霍夫曼编码，则仅需要45×1＋13×3＋12×3＋16×3＋9×4＋5×4＝224位空间。
现在给你这些原始数据，请你计算改用霍夫曼编码之后需要多少位空间。

输入描述:

输入包含多组数据，每组数据第一行包含一个正整数n（2≤n≤50），表示字符的数量。

第二行包含n个正整数，分别表示这n个字符出现的次数，每个字符出现次数不超过10000。

输出描述:

对应每一组数据，输出使用霍夫曼编码需要多少位空间存储这些信息。

示例1

输入

6
45 13 12 16 9 5

输出

算法知识视频讲解

查看答案及解析

推荐

doodle777

用 priority_queue 来存储 Hoffman 树的中间节点，并手工模拟建立Hoffman树的过程，每次从队列中取出最小的两个节点，建立父节点联系，然后放入到并查集和队列。重复此过程，直到队列为空。这时的并查集即为最终的Hoffman树。用 vector 来存储此Hoffman树。注释已经写的很详细了。

#include <iostream>
#include <queue>
#include <vector>
using namespace std;

struct Letter
{
	int index;			// 在vector中存储的下标值，方便随机查找
	int count;			// 节点计数，用于 Hoffman 过程
	int parent = -1;	// 父节点，初始化为-1表示没有父节点
	bool operator<(const Letter & a) const		// 用于优先队列排序
	{
		return count > a.count;
	}
};

void Hoffman(priority_queue<Letter> & que, vector<Letter> & list)
{
	Letter lt1, lt2, root;
	while (!que.empty())						// 如果队列非空，每次弹出两个（子节点），压入一个（父节点）
	{
		lt1 = que.top(); que.pop();
		lt2 = que.top(); que.pop();

		root.index = list.size();				// root 表示父节点，将其放到vector最后
		root.count = lt1.count + lt2.count;		// 父节点计数等于子节点之和
		list[lt1.index].parent = root.index;	// 建立并查集父子关系
		list[lt2.index].parent = root.index;
		list.push_back(root);					// 将父节点放到vector最后
		if (que.empty()) break;					// 如果队列为空，退出过程。（但是前提是需要将父节点加入到并查集）
		que.push(root);							// 将父节点压入队列进行下一次循环
	}
}

int solve(vector<Letter> & list, int n)
{
	int count = 0;
	for (int i = 0; i < n; ++i)			// 对每个码统计码长
	{
		int l = 0, j = i;
		while (list[j].parent != -1)	// 如果有父节点，高度加一
		{
			++l;
			j = list[j].parent;			// 迭代往上查找
		}
		count += l * list[i].count;		// 计算这个字符的总码数（等于单个码长*字符个数）
	}
	return count;						// 返回总码长
}

int main()
{
	int n;
	while (cin >> n && n)
	{
		priority_queue<Letter> que;		// 优先队列，用于Hoffman过程
		vector<Letter> list;			// 存储最终的Hoffman树对应的并查集
		Letter temp;
		for (int i = 0; i < n; ++i)
		{
			temp.index = i;				// 叶子节点按顺序存储
			cin >> temp.count;			// 此即为叶子节点的频率
			que.push(temp);
			list.push_back(temp);
		}

		Hoffman(que, list);
		cout << solve(list, n) << endl;	// 只计算前n个，因为后面的都是父节点
	}
	return 0;
}

编辑于 2015-12-27 18:40:46 回复(0)

ID84848484

// write your code here
import java.util.*;

public class Main{
    public static void main(String[] args){
        Scanner in=new Scanner(System.in);
        while(in.hasNext()){
        int n=in.nextInt();
        Queue<Integer> pq=new PriorityQueue<>();

        for(int i=0;i<n;i++){
            int tem=in.nextInt();
            pq.add(tem);
        }

        int sum=0;
        while(true){
            int tem=pq.poll()+pq.poll();
            sum+=tem;
            if(pq.isEmpty())
                break;
            pq.add(tem);
        }

        System.out.println(sum);
        }
    }
}

发表于 2019-05-12 15:10:57 回复(0)

麻麻我想要offer

importjava.util.*;

importjava.lang.*;

publicclassMain{

publicstaticintHuffman(int[] A,intn){

intsum=0;

for(inti=0;i<n-1;i++){

Arrays.sort(A);

A[i+1]=A[i]+A[i+1];

A[i]=0;

sum+=A[i+1];

}

returnsum;

}

publicstaticvoidmain(String[] args){

Scanner sc = newScanner(System.in);

while(sc.hasNext()){

intn =sc.nextInt();

int[] A = newint[n];

for(inti=0;i<n;i++)

A[i] = sc.nextInt();

intres = Huffman(A,n);

System.out.println(res);

}

这是什么思路没看懂啊，有没有人能解释一下谢谢各位

发表于 2019-03-07 17:28:08 回复(2)

C/C++

Cavs

#include <iostream>
#include <queue>

using namespace std;

struct TreeNode{
 int _val;
 TreeNode* _left{ nullptr }, *_right{ nullptr };
 TreeNode(int val) :_val(val){}
};

struct cmp{
 bool operator ()(TreeNode* t1, TreeNode* t2)
 {
  return t1->_val > t2->_val;
 }
};

int main()
{
 //freopen("in.txt", "r", stdin);
 int n,val;
 while (cin >> n && n)
 {
  priority_queue<TreeNode*, vector<TreeNode*>, cmp> pq;
  for (int i = 0; i < n; ++i)
  {
   cin >> val;
   pq.emplace(new TreeNode(val));
  }
  while (pq.size() > 1)
  {
   auto p1 = pq.top();
   pq.pop();
   auto p2 = pq.top();
   pq.pop();
   auto p = new TreeNode(p1->_val + p2->_val);
   p->_left = p1;
   p->_right = p2;
   pq.emplace(p);
  }

  int res = 0;
  int bits = 0;
  vector<TreeNode*> cur{ pq.top() };
  while (!cur.empty())
  {
   vector<TreeNode*> next;
   for (auto& node : cur)
   {
    if (!node->_left && !node->_right) res += bits * node->_val;
    if (node->_left) next.emplace_back(node->_left);
    if (node->_right) next.emplace_back(node->_right);
   }
   swap(cur, next);
   ++bits;
  }
  cout << res << endl;
 }
 return 0;
}

发表于 2017-07-28 21:54:45 回复(0)

柳风鍚ф墦鍒

也是用优先队列，不过没有模拟霍夫曼的过程只需要模仿结果就好

#include<iostream>
#include<vector>
#include<queue>
using namespace std;
int main(){
    priority_queue<int,vector<int>,greater<int> >que;
    int n=0;
    while(cin>>n){
        int sum=0;
        int temp;
        while(n--){
            cin>>temp;
            que.push(temp);
        }
        while(true){
            temp=que.top();
            que.pop();
            temp+=que.top();
            que.pop();
            sum+=temp;
            if(que.empty())
                break;
            que.push(temp);
        }
        cout<<sum<<endl;
    }
}

发表于 2016-09-16 17:33:26 回复(0)

1hani1

小顶堆是greater！ 小顶堆是greater！！小顶堆是greater！！！

先选取最小的两个数对其编码，然后将和放回堆中，相当于对这两个数已经编了一位，然后将和再扔进堆里

#include<iostream>
#include<vector>
#include<algorithm>
using namespace std;
int main()
{
    int  n;
    while(cin>>n)
    {
        vector<int> v(n);
        for(auto &x:v)
            cin>>x;
        make_heap(v.begin(),v.end(),greater<int>());
        int res=0;
        vector<int> num(n);
        while(v.size()>=2)
        {

            pop_heap(v.begin(),v.end(),greater<int>());
            int tmp=v.back();
            v.pop_back();
            pop_heap(v.begin(),v.end(),greater<int>());
            tmp += v.back();
            v.pop_back();
            res+=tmp;
            v.push_back(tmp);
            push_heap(v.begin(),v.end(),greater<int>());
        }
        cout<<res<<endl;
    }
    return 0;
}

发表于 2018-09-05 11:41:43 回复(0)

Java

adore_Geek

// write your code here
//博客里有分享思路https://blog.csdn.net/m0_37924505/article/details/79757590
import java.util.*;
public class Main {  public static void main(String[] args)
 {  Scanner sc = new Scanner(System.in);  while(sc.hasNext()) {  int num = sc.nextInt();  int[] figure = new int[num];  for (int i = 0; i  figure[i] = sc.nextInt();  }  LinkedList arr = new LinkedList();  for (int i = 0; i  arr.add(new Node(figure[i]));  }  //数字从小到大排序  Comparator comparator = new Comparator() {  @Override  public int compare(Node o1, Node o2) {  return o1.val-o2.val;  }  };    ArrayList myMaps = new ArrayList();  Node root = getHuffmanDataSpace(arr, comparator);  int dep =0;//节点深度  //遍历树  gainHuffmancode(root,myMaps,dep);  int getres = getres(myMaps);  System.out.println(getres);  }  }    //构造树，返回根节点  public static Node getHuffmanDataSpace(LinkedList arr,Comparator comparator) {  while(arr.size()!=1) {  Collections.sort(arr,comparator);  Node n1 = arr.removeFirst();  Node n2 = arr.removeFirst();  Node n3 = new Node(n1.val+n2.val);  n3.left=n1.val n3.right=n1.val>n2.val?n1:n2;  arr.add(n3);  }  return arr.get(0);  }  /**   *   * @param root 二叉树   * @param hashMap  （字符数量，代表字母数量哈夫码空间大小）   * @param figure 储存字符数量的数组   */  public static void gainHuffmancode(Node root, ArrayList myMaps, int dep) {  if(root.left==null&&root.right==null) {  myMaps.add(new MyMap(root.val, dep));  return;  }  if(root.left!=null) {  dep++;  gainHuffmancode(root.left, myMaps,  dep);  dep--;  }  if(root.right!=null) {  dep++;  gainHuffmancode(root.right, myMaps,  dep);  dep--;  }    }    public static int getres(ArrayList myMaps) {  int totalspace = 0;  //遍历myMaps  for (MyMap myMap : myMaps) {  totalspace +=myMap.key*myMap.value;  }  return totalspace;  }    //构造树节点  static class Node{  int val;  Node left;  Node right;  public Node(int val) {  this.val = val;  }  }    static class MyMap{  int key;  int value;  public MyMap(int key,int value) {  this.key=key;  this.value=value;  }  }   }

编辑于 2018-03-30 17:07:54 回复(0)

Java

詆調壹點

import java.util.*;
public class Main {
	public static void main(String[] args) {
		Scanner sc = new Scanner(System.in);
		while (sc.hasNext()) {
			int n = sc.nextInt();
			int[] arr = new int[n];
			for (int i = 0; i < arr.length; i ++ ) {
				arr[i] = sc.nextInt();
			}
			System.out.println(Hoffman(arr));
		}
	}
	public static int Hoffman(int[] arr) {
		PriorityQueue<Node> queue = new PriorityQueue<>();
		for (int i = 0; i < arr.length; i ++ ) {
			Node node = new Node(arr[i], null, null);
			queue.add(node);
		}
		Node root = null;
		while (queue.size() > 1) {
			Node left = queue.poll();
			Node right = queue.poll();
			root = new Node(left.weight + right.weight, left, right);
			queue.add(root);
		}
		return getLength(root, 0);
	}
	public static int getLength(Node node, int level) {
		if(node.left == null || node.right == null) return node.weight * level;
		return getLength(node.left, level + 1) + getLength(node.right, level + 1);
	}
	static class Node implements Comparable<Node> {
		int weight;
		Node left;
		Node right;
		public Node(int weight, Node left, Node right) {
			this.weight = weight;
			this.left = left;
			this.right = right;
		}
		@Override
		public int compareTo(Node o) {
			return this.weight - o.weight;
		}
	}
}

发表于 2016-10-12 16:41:16 回复(0)

谓所无

#include<stdio.h>
#define N 100
int main()
{

    int n;
    int top,size;
    int pool[N][2];
    while(scanf("%d",&top)!=EOF){

        for(int i=0;i<top;i++)
        {
            scanf("%d",&pool[i][0]);
            pool[i][1] = -1;//标记是否使用过 -1表示未筛选用过
        }
        n = top;
        size = top;
        while(n!=1){//pool里面不只一个数据

            int m1,m2;
            int s = 0;
            for(int i = 0; i < top; i++)
            {
                if(pool[i][1]==-1)
                {
                    s = i;
                    break;
                }

            }
            m1 = s;//未选择数据中 找最小的
            for(int i = s; i < top; i++)
            {
                if(pool[i][0] < pool[m1][0] && pool[i][1]==-1)
                {
                    m1 = i;
                }
            }
            pool[m1][1] = top;//指向父节点


            for(int i = 0; i < top; i++)
            {
                if(pool[i][1]==-1 && i!=m1)
                {
                    s = i;
                    break;
                }

            }
            m2 = s;//未选择数据中 找次小的
            for(int i = s; i < top; i++)
            {
                if(pool[i][0] < pool[m2][0] && pool[i][1]==-1 && i!= m1)
                {
                    m2 = i;
                }
            }
            pool[m2][1] = top;
            pool[top][1] = -1;
            pool[top++][0] = pool[m1][0]+pool[m2][0];
            /*for(int i=0;i<top;i++)
            {
                if(pool[i][1]==-1)
                    printf("%d ",pool[i][0]);
            }
            printf("\n");*/

            n--;//可用数据减1
        }

        for(int i=0;i<size;i++)
        {
            int step = 0;
            int cur = i;
            while(pool[cur][1]!=-1)
            {
                step++;
                cur = pool[cur][1];
            }
            pool[i][1] = step;//确定数据的访问步数(亦 树节点的高度)
        }
        int sum = 0;
        for(int i=0;i<size;i++)
            sum += pool[i][0]*pool[i][1];
         printf("%d\n",sum);
    }
    return 0;
}

没有优化,在一个数组中构建哈弗曼树；循环用得太多....

编辑于 2016-08-06 09:09:06 回复(0)