1. If memory were not scarce, how would you implement a sort in a language with libraries for representing and sorting sets?
#include <set>
#include <iostream>
using namespace std;
int main()
{
set<int> mySet;
set<int>::iterator it;
int record_num;
cout << "Enter record number(not duplicate and CTRL+Z to end):" << endl;
while(cin >> record_num)
mySet.insert(record_num);
cout << "Start output sorted records:" << endl;
for(it = mySet.begin();it != mySet.end();it++)
cout << *it << endl;
return 0;
}
2. How would you implement bit vectors using bitwise logical operations (such as and, or and shift)?
void initBits(char data[],int size)
{
for(int i = 0;i < size;i++)
data[i] = 0x0;
}
void setBit(char data[],int pos)
{
int data_index = pos / (8 * sizeof(char));
int byte_index = pos % (8 * sizeof(char));
char byte = 0x1 << 7;
byte = (byte >> byte_index);
byte = ~(byte ^ ((~byte << 0x1) | 0x1));
data[data_index] |= byte;
}
bool testBit(char data[], int pos)
{
bool ret = false;
int data_index = pos / (8 * sizeof(char));
int byte_index = pos % (8 * sizeof(char));
char byte = 0x1 << 7;
byte = (byte >> byte_index);
byte = ~(byte ^ ((~byte << 0x1) | 0x1));
ret = !!(data[data_index] & byte);
return ret;
}
3. Run-time efficiency was an important part of the design goal, and the resulting program was efficient enough. Implement the bitmap sort on your system and measure its run time; how does it compare to the system sort and to the sorts in Problem 1? Assume that n is 10,000,000, and that the input file contains 1,000,000 integers.
The following program create a reverse one million record numbers
#include <fstream>
#define RECORDS (int)1e6 //assume partial records
using namespace std;
int main()
{
int maxRecords = (int)RECORDS - 1;
ofstream oFile("records.txt");
for(int i = 0;i < maxRecords + 1;i++)
{
oFile << maxRecords - i << endl;//write reverse record number to file
}
oFile.close();
return 0;
}
The following program is version of Problem 1
#include <iostream>
#include <fstream>
#include <sstream>
#include <set>
#include <ctime>
using namespace std;
#define RECORDS (int)1e6 //assume partial records
int main()
{
clock_t start, end, start_io, end_io, io;
start = clock();
io = 0;
ifstream iFile("records.txt");
string record;
int record_num;
set<int> mySet;
set<int>::iterator it_set;
cout << "Starting reading records for Problem 1!" << endl;
for(int i = 0;i < RECORDS;i++)
{
stringstream ss(stringstream::in | stringstream::out);
start_io = clock();
getline(iFile,record); //read a record number a line at a time
end_io = clock();
io += end_io - start_io;
ss << record; //send record to stringstream
ss >> record_num; //output in integer format
mySet.insert(record_num); //read record number for Problem 1
}
ofstream oFile ("sorted.txt");
cout << "Starting writing sorted records for Problem 1!" << endl;
for(it_set = mySet.begin();it_set != mySet.end();it_set++)
{
start_io = clock();
oFile << *it_set << endl;
end_io = clock();
io += end_io - start_io;
}
end = clock();
cout << "Total time: " << (end - start) / CLOCKS_PER_SEC << endl;
cout << "Compute time: " << (end - start - io) / CLOCKS_PER_SEC << endl;
return 0;
}
The following program is version of System sort
#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>
#include <algorithm>
#include <ctime>
using namespace std;
#define RECORDS (int)1e6 //assume partial records
int main()
{
clock_t start, end, start_io, end_io, io;
start = clock();
io = 0;
ifstream iFile("records.txt");
string record;
int record_num;
vector<int> myVec;
vector<int>::iterator it_vec;
cout << "Starting reading records for System sort!" << endl;
for(int i = 0;i < RECORDS;i++)
{
stringstream ss(stringstream::in | stringstream::out);
start_io = clock();
getline(iFile,record); //read a record number a line at a time
end_io = clock();
io += end_io - start_io;
ss << record; //send record to stringstream
ss >> record_num; //output in integer format
myVec.push_back(record_num); //read record number for System sort
}
sort(myVec.begin(),myVec.end());//starting sorting records
ofstream oFile ("sorted.txt");
cout << "Starting writing sorted records for Problem 1!" << endl;
for(it_vec = myVec.begin();it_vec != myVec.end();it_vec++)
{
start_io = clock();
oFile << *it_vec << endl;
end_io = clock();
io += end_io - start_io;
}
end = clock();
cout << "Total time: " << (end - start) / CLOCKS_PER_SEC << endl;
cout << "Compute time: " << (end - start - io) / CLOCKS_PER_SEC << endl;
return 0;
}
The following program is version of Bitmap sort
#include <iostream>
#include <fstream>
#include <sstream>
#include <iomanip>
#include <ctime>
using namespace std;
#define RECORDS (int)1e6 //assume partial records
#define MAX_RECORDS (int)1e7//maximum records
#define BYTE 8 //a byte contains 8 bits
void initBits(char data[],int size);
void setBit(char data[],int pos);
bool testBit(char data[], int pos);
int main()
{
clock_t start, end, start_io, end_io, io;
start = clock();
io = 0;
ifstream iFile("records.txt");
string record;
int record_num;
int size = (int)MAX_RECORDS / (BYTE * sizeof(char));
if((int)MAX_RECORDS % (BYTE * sizeof(char)) != 0)
size++;
//records set contain size bits and initialize bits to all zero
char *myBitmap = new char[size];
initBits(myBitmap,size);
cout << "Starting reading records for Bitmap sort!" << endl;
for(int i = 0;i < RECORDS;i++)
{
stringstream ss(stringstream::in | stringstream::out);
start_io = clock();
getline(iFile,record); //read a record number a line at a time
end_io = clock();
io += end_io - start_io;
ss << record; //send record to stringstream
ss >> record_num; //output in integer format
setBit(myBitmap,record_num); // read record number for Bitmap sort
}
ofstream oFile ("sorted.txt");
cout << "Starting writing sorted records for Bitmap sort!" << endl;
for(int i = 0;i < MAX_RECORDS;i++)
{
if(testBit(myBitmap,i))//if set bits position is set to 1,then output record number
{
start_io = clock();
oFile << i << endl;
end_io = clock();
io += end_io - start_io;
}
}
end = clock();
cout << "Total time: " << (end - start) / CLOCKS_PER_SEC << endl;
cout << fixed << setprecision(2);
cout << "Compute time: " << (double)((end - start - io)) / CLOCKS_PER_SEC << endl;
return 0;
}
void initBits(char data[],int size)
{
for(int i = 0;i < size;i++)
data[i] = 0x0;
}
void setBit(char data[],int pos)
{
int data_index = pos / (BYTE * sizeof(char));
int byte_index = pos % (BYTE * sizeof(char));
char byte = 0x1 << (BYTE - 1);
byte = (byte >> byte_index);
byte = ~(byte ^ ((~byte << 0x1) | 0x1));
data[data_index] |= byte;
}
bool testBit(char data[], int pos)
{
bool ret = false;
int data_index = pos / (BYTE * sizeof(char));
int byte_index = pos % (BYTE * sizeof(char));
char byte = 0x1 << (BYTE - 1);
byte = (byte >> byte_index);
byte = ~(byte ^ ((~byte << 0x1) | 0x1));
ret = !!(data[data_index] & byte);
return ret;
}
The comparing results is as follows:
Problem 1: Total time, 4s; Compute time, 2s; Memory, 664K
System sort: Total time, 21s; Compute time, 7s; Memory, 664K
Bitmap sort: Total time, 2s; Compute time, 0.81s;Memory, 668K
The compute time doesn't contain the I/O operations and memory consist of the whole working process.
4. If you take Problem 3 seriously, you will face the problem of generating k integers less than n without duplicates. The simplest approach uses the first k positive integers. This extreme data set won't alter the run time of the bitmap method by much, but it might skew the run time of a system sort. How could you generate a file of k unique random integers between 0 and n-1 in random order? Strive for a short program that is also efficient.
The problem describes for effecting system sort doesn't exists in here, as I used the sort() not qsort(). The answer now lies to random and uniformly distributed.(The program as follows is altered from http://www.cs.bell-labs.com/cm/cs/pearls/bitsortgen.c)
#include <fstream>
#include <iostream>
#include <cstdlib>
#include <ctime>
#define RECORDS (int)1e6 //assume partial records
#define MAX_RECORDS (int)1e7//maximum records
#define BYTE 8 //a byte contains 8 bits
using namespace std;
int randint(int a, int b);
void swap(int& left, int& right);
int main()
{
//records set contain size bits and initialize bits to all zero
int records[MAX_RECORDS];
for(int i = 0;i < MAX_RECORDS;i++)
{
records[i] = i;
}
ofstream oFile("records.txt");
int rand_num;
cout << "Starting random records!" << endl;
int records_size = RECORDS < MAX_RECORDS ? RECORDS: MAX_RECORDS;
srand(time(NULL));
for(int i = 0;i < records_size;i++)
{
rand_num = randint(i,MAX_RECORDS - 1);
swap(records[i],records[rand_num]);
oFile << records[i] << endl;
}
return 0;
}
int randint(int a, int b)
{
return a + (RAND_MAX * rand() + rand()) % (b + 1 - a);
}
void swap(int& left, int& right)
{
int temp = left;
left = right;
right = temp;
}
5. The programmer said that he about a megabyte of free storage, but the code we sketched uses 1.25 megabytes. He was able to scrounge the extra space without much trouble. If the megabyte had been a hard and fast boundary, what would you have recommended? What is the run time of your algorithm?
A solution uses a two-way pass to read and write the sorted integers, the runtime would be twice as much as the one-way pass version. The program is as follows:(The 1.25 megabytes refer only to used memory for ten million bits excluded the code and whatever temporary storage variables)
#include <iostream>
#include <fstream>
#include <sstream>
#include <iomanip>
#include <ctime>
using namespace std;
#define RECORDS (int)1e6 //assume partial records
#define MAX_RECORDS (int)1e7//maximum records
#define BYTE 8 //a byte contains 8 bits
void initBits(char data[],int size);
void setBit(char data[],int pos);
bool testBit(char data[], int pos);
int main()
{
clock_t start, end, start_io, end_io, io;
start = clock();
io = 0;
ofstream oFile ("sorted.txt");
string record;
int record_num;
int size = (int)MAX_RECORDS / (BYTE * sizeof(char));
if((int)MAX_RECORDS % (BYTE * sizeof(char)) != 0)
size++;
if(size % 2 != 0)
size++;
size /= 2;
char *myBitmap = new char[size];
for(int j = 0;j < 2;j++)//two pass
{
ifstream iFile("records.txt");
initBits(myBitmap,size);
cout << "Starting reading records for Bitmap sort!" << endl;
int low_bound = j * (MAX_RECORDS / 2);
int upper_bound = (MAX_RECORDS / 2) + low_bound;
for(int i = 0;i < RECORDS;i++)
{
stringstream ss(stringstream::in | stringstream::out);
start_io = clock();
getline(iFile,record); //read a record number a line at a time
end_io = clock();
io += end_io - start_io;
ss << record; //send record to stringstream
ss >> record_num; //output in integer format
if(record_num < upper_bound && record_num >= low_bound)
setBit(myBitmap,record_num - low_bound); // read record number for Bitmap sort
}
iFile.close();
cout << "Starting writing sorted records for Bitmap sort!" << endl;
for(int i = low_bound;i < upper_bound;i++)
{
if(testBit(myBitmap,i - low_bound))//if set bits position is set to 1,then output record number
{
start_io = clock();
oFile << i << endl;
end_io = clock();
io += end_io - start_io;
}
}
}
oFile.close();
end = clock();
cout << "Total time: " << (end - start) / CLOCKS_PER_SEC << endl;
cout << fixed << setprecision(2);
cout << "Compute time: " << (double)((end - start - io)) / CLOCKS_PER_SEC << endl;
return 0;
}
void initBits(char data[],int size)
{
for(int i = 0;i < size;i++)
data[i] = 0x0;
}
void setBit(char data[],int pos)
{
int data_index = pos / (BYTE * sizeof(char));
int byte_index = pos % (BYTE * sizeof(char));
char byte = 0x1 << (BYTE - 1);
byte = (byte >> byte_index);
byte = ~(byte ^ ((~byte << 0x1) | 0x1));
data[data_index] |= byte;
}
bool testBit(char data[], int pos)
{
bool ret = false;
int data_index = pos / (BYTE * sizeof(char));
int byte_index = pos % (BYTE * sizeof(char));
char byte = 0x1 << (BYTE - 1);
byte = (byte >> byte_index);
byte = ~(byte ^ ((~byte << 0x1) | 0x1));
ret = !!(data[data_index] & byte);
return ret;
}
6.