算法散列4 Hashing - Hard Version

最新推荐文章于 2020-12-18 21:38:10 发布

原创最新推荐文章于 2020-12-18 21:38:10 发布 · 5.3k 阅读

1 ·

CC 4.0 BY-SA版权

算法专栏收录该内容

125 篇文章

订阅专栏

本文探讨了如何从已知的哈希表状态逆向重构输入序列，利用线性探测解决冲突的特点，通过拓扑排序算法，寻找最小入度节点，实现对原始输入序列的正确还原。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

全部每周作业和视频思考题答案和解析见浙江大学数据结构思考题+每周练习答案

题目：Given a hash table of size N, we can define a hash function H(x)=x%N. Suppose that the linear probing is used to solve collisions, we can easily obtain the status of the hash table with a given sequence of input numbers.

However, now you are asked to solve the reversed problem: reconstruct the input sequence from the given status of the hash table. Whenever there are multiple choices, the smallest number is always taken.

Input Specification:

Each input file contains one test case. For each test case, the first line contains a positive integer N (≤1000), which is the size of the hash table. The next line contains N integers, separated by a space. A negative integer represents an empty cell in the hash table. It is guaranteed that all the non-negative integers are distinct in the table.

Output Specification:

For each test case, print a line that contains the input sequence, with the numbers separated by a space. Notice that there must be no extra space at the end of each line.

Sample Input:

11
33 1 13 12 34 38 27 22 32 -1 21

Sample Output:

1 13 12 21 33 34 38 27 22 32

解答：

嗯，有趣，反着来的。

先分析一下有什么特点吧。由题知，线性解决冲突的方法。

首先我们知道如果某个列表中的数对11取余数的值不等于当前坐标，那一定是被位移过的。

题目说每当有多个选择总是取最小的数字，也就是说，比如我们计算一下一定没有冲突的数：

数据：33 1 13 12 34 38 27 22 32 -1 21
位置：0  1 2  3  4  5  6  7  8  9  10
余数：0  1 2  1  1  5  5  0  10    10

33,1,13,38,21

所以第一个进入的数，我们选择的是1。

第二个进入的数可以是多少呢？

可以是任何一个没有冲突的，也可以是算出hash为1的基础上只位移了一次的。但是显然没有在1的基础上位移过的，所以从没有冲突中的选个最小的。我们选13。

第三个进入的数可以是多少呢？

可以是没有冲突的，也可以是在1或13的基础上偏移1的。这里有一个，是12，而没有冲突的里面最小的是21，最后选12。

以此类推。

有没有规律呢？

有是有，但是貌似不是很好说，而且这样的表述写程序肯定运行太慢。

总结一下规律，我们似乎可以这么做：

用链表的形式建立哈希表，然后每次从表中的找个头结点最小的取出来。注意这里建表的插入顺序就是我们的输入顺序。

因为很显然比如我们按照这种方式建立表：

数据：33 1 13 12 34 38 27 22 32 -1 21
位置：0  1 2  3  4  5  6  7  8  9  10
余数：0  1 2  1  1  5  5  0  10    10
建表：
0 33 22
1 1 12 34
2 13
3 
4 
5 38 27
6
7
8
9
10 32 21

取的时候，我们就把表扫描一遍，然后取出里面最小的那个头结点。

第一次取了1，所以索引为1的那一列的头结点变成了12

第二次取了12，索引为1的那一列头结点变成了34

第三次取了13，索引为2的那一列头结点变成了Null

以此类推。

但是这样是错误的。

因为13取出来之前不能取出12。由此观之，12不能挂在1的后面，而是应该挂在13的后面。

我们重新画图来分析，发现数据的输出问题好像和之前的选修问题很像！也就是说，比如你必须要先修完微积分才能去学习线性代数，然后才能再去学习机器学习，这个问题就是拓扑排序问题。

老规矩，先搞一个一般的拓扑排序问题：

#include <iostream>
#include <queue>
using namespace std;

#define MaxVertexNum 1000
typedef int Vertex;

// 邻接表存储 - Kruskal最小生成树算法 

//-------------------- 顶点并查集定义 --------------------
typedef Vertex ElementType; // 默认元素可以用非负整数表示 
typedef Vertex SetName;     // 默认用根结点的下标作为集合名称 
typedef ElementType SetType[MaxVertexNum]; // 假设集合元素下标从0开始 							    
typedef int WeightType;       // 边的权值设为整型 
typedef char DataType;        // 顶点存储的数据类型设为字符型 

queue<Vertex> myQueue;

// 边的定义
typedef struct ENode *PtrToENode;
struct ENode {
	Vertex V1, V2;      // 有向边<V1, V2> 
	WeightType Weight;  // 权重 
};
typedef PtrToENode Edge;
//邻接点的定义 
typedef struct AdjVNode *PtrToAdjVNode;
struct AdjVNode {
	Vertex AdjV;        // 邻接点下标 
	WeightType Weight;  // 边权重 
	PtrToAdjVNode Next;    // 指向下一个邻接点的指针 
};
//顶点表头结点的定义
typedef struct Vnode {
	PtrToAdjVNode FirstEdge;	// 边表头指针 
	DataType Data;				// 存顶点的数据 
								// 注意：很多情况下，顶点无数据，此时Data可以不用出现 
} AdjList[MaxVertexNum];		// AdjList是邻接表类型 
								//图结点的定义 
typedef struct GNode *PtrToGNode;
struct GNode {
	int Nv;			// 顶点数 
	int Ne;			// 边数   
	AdjList G;		// 邻接表 
};
typedef PtrToGNode LGraph; // 以邻接表方式存储的图类型 

LGraph CreateGraph(int VertexNum)
{ //初始化一个有VertexNum个顶点但没有边的图 
	Vertex V;
	LGraph Graph;

	Graph = (LGraph)malloc(sizeof(struct GNode)); // 建立图 
	Graph->Nv = VertexNum;
	Graph->Ne = 0;
	//初始化邻接表头指针 
	//注意：这里默认顶点编号从0开始，到(Graph->Nv - 1) 
	for (V = 0; V<Graph->Nv; V++)
		Graph->G[V].FirstEdge = NULL;

	return Graph;
}

void InsertEdge(LGraph Graph, Edge E)
{
	PtrToAdjVNode NewNode;

	//插入边 <V1, V2> 
	//为V2建立新的邻接点 
	NewNode = (PtrToAdjVNode)malloc(sizeof(struct AdjVNode));
	NewNode->AdjV = E->V2;
	NewNode->Weight = E->Weight;
	//将V2插入V1的表头 
	NewNode->Next = Graph->G[E->V1].FirstEdge;
	Graph->G[E->V1].FirstEdge = NewNode;
	//注意拓扑排序是用的有向图
}

LGraph BuildGraph()
{
	LGraph Graph;
	Edge E;
	Vertex V;
	int Nv, i;

	cin >> Nv;   //读入顶点个数 
	Graph = CreateGraph(Nv); //初始化有Nv个顶点但没有边的图 

	cin >> Graph->Ne;   //读入边数 
	if (Graph->Ne != 0) { //如果有边 
		E = (Edge)malloc(sizeof(struct ENode)); //建立边结点 
												//读入边，格式为"起点 终点 权重"，插入邻接矩阵 
		for (i = 0; i<Graph->Ne; i++) {
			cin >> E->V1 >> E->V2 >> E->Weight;
			//E->V1--;
			//E->V2--;
			//注意：如果权重不是整型，Weight的读入格式要改 
			InsertEdge(Graph, E);
		}
	}

	return Graph;
}

//邻接表存储 - 拓扑排序算法 
bool TopSort(LGraph Graph, Vertex TopOrder[])
{ //对Graph进行拓扑排序,  TopOrder[]顺序存储排序后的顶点下标 
	int Indegree[MaxVertexNum], cnt;
	Vertex V;
	PtrToAdjVNode W;

	//初始化Indegree[] 
	for (V = 0; V<Graph->Nv; V++)
		Indegree[V] = 0;

	//遍历图，得到Indegree[] 
	for (V = 0; V<Graph->Nv; V++)
		for (W = Graph->G[V].FirstEdge; W; W = W->Next)
			Indegree[W->AdjV]++; //对有向边<V, W->AdjV>累计终点的入度 
								 //将所有入度为0的顶点入列 
	for (V = 0; V < Graph->Nv; V++)
		if (Indegree[V] == 0)
			myQueue.push(V);
	//下面进入拓扑排序 
	cnt = 0;
	while (!myQueue.empty()) {
		V = myQueue.front(); //弹出一个入度为0的顶点 
		myQueue.pop();
		TopOrder[cnt++] = V; //将之存为结果序列的下一个元素 
							 //对V的每个邻接点W->AdjV 
		for (W = Graph->G[V].FirstEdge; W; W = W->Next)
			if (--Indegree[W->AdjV] == 0)//若删除V使得W->AdjV入度为0 
				myQueue.push(W->AdjV); //则该顶点入列 
	} //while结束

	if (cnt != Graph->Nv)
		return false; //说明图中有回路, 返回不成功标志 
	else
		return true;
}
int main(void) {

	LGraph myGraph = BuildGraph();
	Vertex TopOrder[MaxVertexNum];
	bool flag = TopSort(myGraph, TopOrder);

	if (true == flag) {
		for (int i = 0;i < myGraph->Nv;i++) {
			cout << TopOrder[i] << " ";
		}
		cout << endl;
	}
	else {
		cout << " 有回路 " << endl;
	}


	cout << endl;
	system("pause");
	return 0;
}

然后从这个最基本的拓扑排序算法进行拓展。

首先我们需要特别注意，测试中有一项是存在非-1的负数，所以我们需要判断条件改为 >0，而不能是 != -1。

注意在前面的程序中 typedef char DataType; 因为是用的基础例程修改的，没有注意到竟然这里用的是char类型的！！！调试浪费了好长好长时间。

我们需要一个函数来找到邻接表中，入度为0的最小值，然后让它进队列。

int FindMin(LGraph Graph)//寻找目前入度为零并且数值最小的数
{
	int i, index = -1, min = MYINFINITY;
	for (i = 0;i < Graph->Nv;i++)
	{
		if (Indegree[i] == 0 && Graph->G[i].Data < min && Graph->G[i].Data>=0)
		{
			min = Graph->G[i].Data;
			index = i;
		}
	}
	return index;
}

因为算法非常简单，这里就不再赘述，给出最后答案：

程序代码：

#include <iostream>
#include <queue>
using namespace std;
#define MYINFINITY 65535
#define MaxVertexNum 1000

// 邻接表存储 - Kruskal最小生成树算法 

//-------------------- 顶点并查集定义 --------------------
typedef int ElementType; // 默认元素可以用非负整数表示 
typedef int SetName;     // 默认用根结点的下标作为集合名称 
typedef int DataType;        // 顶点存储的数据类型设为字符型 

queue<int> myQueue;
int mySeq[MaxVertexNum];
int TopOrder[MaxVertexNum];
int Indegree[MaxVertexNum];
// 边的定义
typedef struct ENode *PtrToENode;
struct ENode {
	int V1, V2;      // 有向边<V1, V2> 
};
typedef PtrToENode Edge;
//邻接点的定义 
typedef struct AdjVNode *PtrToAdjVNode;
struct AdjVNode {
	int AdjV;        // 邻接点下标 
	PtrToAdjVNode Next;    // 指向下一个邻接点的指针 
};
//顶点表头结点的定义
typedef struct Vnode {
	PtrToAdjVNode FirstEdge;	// 边表头指针 
	DataType Data;				// 存顶点的数据 
								// 注意：很多情况下，顶点无数据，此时Data可以不用出现 
} AdjList[MaxVertexNum];		// AdjList是邻接表类型 
								//图结点的定义 
typedef struct GNode *PtrToGNode;
struct GNode {
	int Nv;			// 顶点数 
	int Ne;			// 边数   
	AdjList G;		// 邻接表 
};
typedef PtrToGNode LGraph; // 以邻接表方式存储的图类型 

LGraph CreateGraph(int VertexNum)
{ //初始化一个有VertexNum个顶点但没有边的图 
	LGraph Graph;

	Graph = (LGraph)malloc(sizeof(struct GNode)); // 建立图 
	Graph->Nv = VertexNum;
	//初始化邻接表头指针 
	for (int V = 0; V<Graph->Nv; V++)
		Graph->G[V].FirstEdge = NULL;
	return Graph;
}

void InsertEdge(LGraph Graph, Edge E)
{
	PtrToAdjVNode NewNode = (PtrToAdjVNode)malloc(sizeof(struct AdjVNode));
	NewNode->AdjV = E->V2;
	//将V2插入V1的表头 
	NewNode->Next = Graph->G[E->V1].FirstEdge;
	Graph->G[E->V1].FirstEdge = NewNode;
}

int Hash(int Key, int P)
{ // 除留余数法散列函数 
	return Key%P;
}

//N表示一共有几个读入数据，NReal表示表里面一共几个数据（注意因为可能存在 -1）
LGraph BuildGraph(int &N,int &NReal)
{
	LGraph Graph;
	Edge E;
	int neNum = 0;	//边数
	cin >> N;		//读入顶点个数 
	Graph = CreateGraph(N); //初始化有Nv个顶点但没有边的图 
	for (int i = 0;i < N;i++) {
		cin >> mySeq[i];
		Graph->G[i].Data = mySeq[i];
		if (mySeq[i] >= 0)
			NReal++;
	}
	//初始化Indegree[] 
	for (int V = 0; V < Graph->Nv; V++) {
		Indegree[V] = 0;
	}
	//虽然可以和上上面的for循环合二为一，但是这里为了表示更清楚，就不合二为一了。
	for (int i = 0;i < N;i++) {
		int tempHash = Hash(mySeq[i], N);
		if (mySeq[i] >= 0 && tempHash != i) {
			for (int j = i;j != tempHash;)
			{
				j = (j + N - 1) % N;
				E = (Edge)malloc(sizeof(struct ENode)); //建立边结点
				E->V1 = j;
				E->V2 = i;
				InsertEdge(Graph, E);
				Indegree[i]++;
			}		
		}
		else if(mySeq[i] < 0){
			Indegree[i] = MYINFINITY;
		}
	}
	return Graph;
}

int FindMin(LGraph Graph)//寻找目前入度为零并且数值最小的数
{
	int i, index = -1, min = MYINFINITY;
	for (i = 0;i < Graph->Nv;i++)
	{
		if (Indegree[i] == 0 && Graph->G[i].Data < min && Graph->G[i].Data>=0)
		{
			min = Graph->G[i].Data;
			index = i;
		}
	}
	return index;
}

//邻接表存储 - 拓扑排序算法 
int TopSort(LGraph Graph, int TopOrder[])
{ //对Graph进行拓扑排序,  TopOrder[]顺序存储排序后的顶点下标 
	int cnt;
	int V;
	PtrToAdjVNode W;		

	//下面进入拓扑排序 
	cnt = 0;
	int minV = FindMin(Graph);
	myQueue.push(minV);
	while (!myQueue.empty()) {
		V = myQueue.front(); //弹出一个入度为0的顶点 
		//cout << V << endl;
		myQueue.pop();
		Indegree[V] = MYINFINITY;
		TopOrder[cnt++] = Graph->G[V].Data; //将之存为结果序列的下一个元素 
		//对V的每个邻接点W->AdjV 
		for (W = Graph->G[V].FirstEdge; W; W = W->Next) {
			--Indegree[W->AdjV];
		}
	
		minV = FindMin(Graph);
		if (minV == -1)break;
		myQueue.push(minV);
		
	} //while结束

	return cnt;
}

int main(void) {

	int N=0, NReal=0;
	LGraph myGraph = BuildGraph(N, NReal);
	

	int cnt = TopSort(myGraph, TopOrder);
	for (int i = 0;i < cnt -1;i++) {
		cout << TopOrder[i] << " ";
	}cout << TopOrder[cnt -1];
	
	
	system("pause");
	return 0;
}

测试结果：