【编程珠玑】读书笔记 第二章 算法

2013-07-11 22:00:28

第二章 算法

本章围绕三个问题进行算法讨论,包括元素的查找、字符串的旋转、以及变位词的查找。

下面给出了实现代码、以及测试结果。

问题一 查找不存在的元素

思路一:用位图;

思路二:用二分搜索法,这种方法需要先对数组排序;

若用二分搜索法,有以下限制:

满足以下假设:
1)数组中没有重复元素
2)数组中必有缺失元素

书中给出的问题也正是具有这样的特殊性,因此可以使用二分法进行缺失元素的查找。

下面的代码给出了上述两种方法的实现代码,以及测试代码所需要的数据产生程序、位图排序程序,后面附上了测试结果。

注意:此处的代码没有数据的合法性检查,假设数据满足上面的条件。

完整代码:

  1 #include <iostream>
  2 #include <bitset>
  3 #include <ctime>
  4 using namespace std;
  5 
  6 const int MAX_NUMBER = 10000010;
  7 const int NUMBER_OF_DATA = 10000000;
  8 
  9 //产生在区间[lowerBound,upperBound]内的一个随机数
 10 int RandomInt(const int lowerBound,const int upperBound)
 11 {
 12     return ( lowerBound + ( RAND_MAX * rand() + rand() ) % (upperBound - lowerBound + 1) );
 13 }
 14 
 15 //产生在[0,maxOfRandomInt]范围内的numberOfRandomInt个随机数
 16 //结果存入数组randomInt中
 17 void RandomIntGen(const int numberOfRandomInt,const int maxOfRandomInt,int randomInt[])
 18 {
 19     int i;
 20     int tmp;
 21     int randomTmp;
 22     int *sequenceInt = new int[maxOfRandomInt];
 23 
 24     srand((unsigned) time(NULL));
 25 
 26     for (i = 0; i < maxOfRandomInt; i++)
 27     {
 28         sequenceInt[i] = i;    
 29     }
 30 
 31     for (i = 0; i < numberOfRandomInt; i++) 
 32     {
 33         randomTmp = RandomInt(i, maxOfRandomInt - 1);
 34         tmp = sequenceInt[randomTmp];
 35         sequenceInt[randomTmp] = sequenceInt[i];
 36         sequenceInt[i] = tmp;
 37         randomInt[i] = sequenceInt[i];    //随机数保存在数组中
 38     }
 39 
 40     delete [] sequenceInt;   //释放内存
 41 }
 42 
 43 //注意写初始化函数
 44 void Init(int BitmapArray[],const int n)
 45 {
 46     for (int i = 0;i < n;++i)
 47     {
 48         BitmapArray[i] = 0;
 49     }
 50 }
 51 
 52 //用int型模拟位图,实现的位图排序
 53 const int BitsPerInt = 32;
 54 const int Div32Shift = 5;
 55 const int Mod32Mask = 0x1F;
 56 
 57 //BitmapSort_2:用位运算实现置位、清零、判断是否为1
 58 void SetBit(int BitmapArray[],const int BitToSet)
 59 {
 60     //BitmapArray[ (BitToSet >> Div32Shift) ] | = ( 1 << (BitToSet & Mod32Mask) );  //error C2059: syntax error : '='
 61     BitmapArray[ (BitToSet >> Div32Shift) ] = BitmapArray[ (BitToSet >> Div32Shift) ] 
 62     | ( 1 << (BitToSet & Mod32Mask) ); 
 63 }
 64 
 65 void ClearBit(int BitmapArray[],const int BitToClear)
 66 {
 67     BitmapArray[ (BitToClear >> Div32Shift) ] = BitmapArray[ (BitToClear >> Div32Shift) ] 
 68     & ~( 1 << (BitToClear & Mod32Mask) ); 
 69 }
 70 
 71 bool IsBitSet(const int BitmapArray[],const int BitToCheck)  //定义为const类型,在不小心改变时,会给出报错信息
 72 {
 73     return ( BitmapArray[ (BitToCheck >> Div32Shift) ] & ( 1 << (BitToCheck & Mod32Mask) ) ); 
 74 }
 75 
 76 void BitmapSort_2(int ArrayUnsorted[],const int n)
 77 {
 78     int i ;
 79     int cnt = 0;
 80     int *BitmapArray = new int[1 + MAX_NUMBER/BitsPerInt];   //当MaxRandomInt较大时,必须从对上分配内存,否则内存不够用,导致出错
 81 
 82     if (NULL == BitmapArray)
 83     {
 84         cout<<"memory allocation error !"<<endl;
 85         exit(0);
 86     }
 87 
 88     Init(BitmapArray,1 + MAX_NUMBER/BitsPerInt);   //初始化为0
 89 
 90     for (i = 0;i < n;++i)
 91     {
 92         SetBit(BitmapArray,ArrayUnsorted[i]);
 93     }
 94 
 95     for (i = 0;i < MAX_NUMBER;++i)  //不是for (i = 0;i < n;++i)
 96     {
 97         if ( IsBitSet(BitmapArray,i) )
 98         {
 99             ArrayUnsorted[cnt++] = i;
100         }
101     }
102 
103     delete [] BitmapArray;
104 }
105 
106 //用位图的方法查找丢失元素
107 //满足以下假设:
108 //1)数组中没有重复元素
109 //2)数组中必有缺失元素
110 int FindLostByBitmap(int arrayToFind[],int numberOfData)
111 {
112     int i ;
113     int cnt = 0;
114     int *BitmapArray = new int[1 + MAX_NUMBER/BitsPerInt];   //当MaxRandomInt较大时,必须从对上分配内存,否则内存不够用,导致出错
115 
116     if (NULL == BitmapArray)
117     {
118         cout<<"memory allocation error !"<<endl;
119         exit(0);
120     }
121 
122     Init(BitmapArray,1 + MAX_NUMBER/BitsPerInt);   //初始化为0
123 
124     for (i = 0;i < numberOfData;++i)
125     {
126         SetBit(BitmapArray,arrayToFind[i]);
127     }
128 
129     for (i = 0;i < MAX_NUMBER;++i)  //不是for (i = 0;i < n;++i)
130     {
131         if ( !IsBitSet(BitmapArray,i) )
132         {
133             return i;
134         }
135     }
136 
137     if (MAX_NUMBER == i)
138     {
139         cout<<"no element is lost!"<<endl;
140         return -1;
141     }
142 
143     delete [] BitmapArray;
144 }
145 
146 //二分搜索法
147 //满足以下假设:
148 //1)数组中没有重复元素
149 //2)数组中必有缺失元素
150 int FindLostByBinarySearch(int arrayToFind[],int numberOfData)
151 {
152     int cnt = 0;
153     int begin  = 0;
154     int end = numberOfData - 1;
155     int mid = 0;
156 
157     if (NULL == arrayToFind || numberOfData < 2)
158     {
159         cout<<"the input array is invalid!"<<endl;
160         return -1;
161     }
162 
163     while (begin < end - 1)
164     {
165         mid = (begin + end)/2;
166         if (arrayToFind[mid] - arrayToFind[begin] > mid - begin)
167         {
168             end = mid;
169         }
170         else
171         {
172             begin = mid;
173         }
174     }
175 
176     if (arrayToFind[end] - arrayToFind[begin] > 1)
177     {  
178         return (arrayToFind[begin] + 1);
179     }
180     else 
181     {
182         cout<<"no element is lost!"<<endl;
183         return -1;
184     }
185 }
186 
187 //将数组写入txt中,便于检查结果的正确性
188 void WriteArrayToUnsortedTxt(int array[],int n)
189 {
190     FILE *fout;
191     int i;
192 
193     fout = fopen("array_unsorted.txt","wt");
194 
195     if(fout == NULL)
196     {
197         printf("forward_i.txt open error!
");
198         exit(0);
199     }
200 
201     printf("file open success!
");
202 
203     for (i = 0;i < n;i++) 
204     {
205         fprintf(fout,"%d
",array[i]);
206     }
207 
208     fclose(fout);    
209 }
210 
211 //将数组写入txt中,便于检查结果的正确性
212 void WriteArrayToSortedTxt(int array[],int n)
213 {
214     FILE *fout;
215     int i;
216 
217     fout = fopen("array_sorted.txt","wt");
218 
219     if(fout == NULL)
220     {
221         printf("forward_i.txt open error!
");
222         exit(0);
223     }
224 
225     printf("file open success!
");
226 
227     for (i = 0;i < n;i++) 
228     {
229         fprintf(fout,"%d
",array[i]);
230     }
231 
232     fclose(fout);    
233 }
234 
235 //显示数组元素
236 void DisplayArray(int ArrayUnsorted[],int n)
237 {
238     for (int i = 0;i < n;++i)
239     {
240         cout<<ArrayUnsorted[i]<<"	";
241     }
242     cout<<endl;
243 }
244 
245 //测试代码
246 int main(void)
247 {
248     const int numberOfData = NUMBER_OF_DATA;
249     const int maxOfRandomInt = MAX_NUMBER;
250     int *arrayToFind = new int[NUMBER_OF_DATA];
251     int StartTime = 0;
252     int TimeCost = 0; 
253 
254     cout<<"the max number of integer is : "<<maxOfRandomInt<<endl;
255     cout<<"the number of integer is : "<<numberOfData<<endl;
256 
257     RandomIntGen(numberOfData,maxOfRandomInt,arrayToFind);
258     WriteArrayToUnsortedTxt(arrayToFind,numberOfData);
259     /*cout<<"the randomInt array is :"<<endl;
260     DisplayArray(arrayToFind,numberOfData);*/
261     
262     //Test FindLostByBitmap
263     cout<<"Test FindLostByBitmap..."<<endl;
264     StartTime = clock();
265     cout<<"the lost number is : "<<FindLostByBitmap(arrayToFind,numberOfData)<<endl;
266     TimeCost = 1e6 * ( clock() - StartTime ) / CLOCKS_PER_SEC;
267     cout<<"the time cost of FindLostByBitmap is : "<<TimeCost<<"ms"<<endl;
268 
269     //BitmapSort_2,为使用基于二分法的搜索进行排序的预处理
270     cout<<"Test BitmapSort_2..."<<endl;
271 
272     StartTime = clock();
273     BitmapSort_2(arrayToFind,numberOfData);
274     TimeCost = 1e6 * ( clock() - StartTime ) / CLOCKS_PER_SEC;
275     cout<<"the time cost of BitmapSort_2 is : "<<TimeCost<<"ms"<<endl;
276 
277     WriteArrayToSortedTxt(arrayToFind,numberOfData);
278     /*cout<<"the sorted array is : "<<endl;
279     DisplayArray(arrayToFind,numberOfData);  */
280 
281     //Test FindLostByBinarySearch
282     cout<<"Test FindLostByBinarySearch..."<<endl;
283     StartTime = clock();
284     cout<<"the lost number is : "<<FindLostByBinarySearch(arrayToFind,numberOfData)<<endl;
285     TimeCost = 1e6 * ( clock() - StartTime ) / CLOCKS_PER_SEC;
286     cout<<"the time cost of FindLostByBinarySearch is : "<<TimeCost<<"ms"<<endl;
287 
288     delete [] arrayToFind;   //delete与new要配对出现
289     return 0;
290 }

测试结果,1000,0010个数据中缺失10个时:

(可见,在该规模下,用二分搜索法的速度约为位图法的1000+倍)

the max number of integer is : 10000010
the number of integer is : 10000000
file open success!
Test FindLostByBitmap...
the lost number is : 7952
the time cost of FindLostByBitmap is : 1486000ms
Test BitmapSort_2...
the time cost of BitmapSort_2 is : 2078000ms
file open success!
Test FindLostByBinarySearch...
the lost number is : 7952
the time cost of FindLostByBinarySearch is : 8000ms
请按任意键继续. . .

上面的结果只是程序运行一次的结果,如果要得到比较靠靠的分析,应该多次测试,而且测试的结果可能与数据集有关,若要固定数据集,将随机数产生函数RandomIntGen中的//srand((unsigned) time(NULL));注释掉即可。


问题二 数组的旋转

对于该问题,大多数人都能想到比较直观的解决方法,书中已有说明,此处不再赘述,另外,书中给出了一种比较巧妙的解法,称为杂技法,并给出了代码实现,但对于该方法的原理没有介绍,杂技法不易理解,此处暂时不做讨论。


问题三 变位词的查找

可借助于[标识,单词]对,将变位词进行标识,根据标识划分变位词。

借助于C++的工具vector、以及map,可以方便地实现。

注意几点:

  1. 数据结构的选择,存储单词采用 vector <string> ,因为每个单词是string类型 ;
  2. 存储[标识,单词],采用map < string ,vector <string> >,因为一个标识sign对应多个string类型的单词;
  3. 标识通过对单词的排序得到,此处采用C++的排序函数sort,sort( sign.begin(),sign.end() );

完整代码如下:

 1 //显示vector <string>类型对象的元素
 2 void DisplayVector(vector <string> stringVector)
 3 {
 4     vector <string> ::iterator iterVector;
 5 
 6     for (iterVector = stringVector.begin();iterVector != stringVector.end();++iterVector)
 7     {
 8         cout<<*iterVector<<"	";
 9     }
10     cout<<endl;
11 }
12 
13 //变位词的查找
14 void FindAnagrams()
15 {
16     map < string ,vector <string> > signWordMap;  
17     vector <string> dictVector;
18     string word;
19     string sign;
20     
21     cout<<"please enter a word ,end with ctrl+z: "<<endl;
22     while (cin>>word)        //输入字典中单词
23     {
24         dictVector.push_back(word);
25         cout<<"please enter a word ,end with ctrl+z: "<<endl;
26     }
27     cout<<"the dictionary is : "<<endl;
28     DisplayVector(dictVector);
29 
30     vector <string> :: iterator iterVector;
31 
32     for (iterVector = dictVector.begin();iterVector != dictVector.end();++iterVector)  //制作标识、单词对
33     {
34         sign = *iterVector;
35         sort( sign.begin(),sign.end() );
36         signWordMap[sign].push_back(*iterVector);
37     }
38 
39     map < string ,vector <string> > :: iterator iterMap;
40 
41     for (iterMap = signWordMap.begin();iterMap != signWordMap.end();++iterMap)//输出变位词
42     {
43         cout<<"the anagrams signed by "<<(*iterMap).first<<" is :"<<endl;
44         for (iterVector = (*iterMap).second.begin() ;iterVector != (*iterMap).second.end();++iterVector)
45         {
46             cout<<*iterVector<<"	";
47         }
48         cout<<endl;
49     }
50 }
51 
52 int main(void)
53 {
54     //test FindAnagrams
55     FindAnagrams();
56     return 0;
57 }

运行结果:

please enter a word ,end with ctrl+z:
tops  stop snap pans top ant at
please enter a word ,end with ctrl+z:
please enter a word ,end with ctrl+z:
please enter a word ,end with ctrl+z:
please enter a word ,end with ctrl+z:
please enter a word ,end with ctrl+z:
please enter a word ,end with ctrl+z:
please enter a word ,end with ctrl+z:
^Z
the dictionary is :
tops    stop    snap    pans    top     ant     at
the anagrams signed by anps is :
snap    pans
the anagrams signed by ant is :
ant
the anagrams signed by at is :
at
the anagrams signed by opst is :
tops    stop
the anagrams signed by opt is :
top
请按任意键继续. . .
原文地址:https://www.cnblogs.com/youngforever/p/3185149.html