Re: Mergesort Vs Quicksort



pete wrote:
Robert Maas, http://tinyurl.com/uh3t wrote:
From: pete <pfil...@xxxxxxxxxxxxxx>
I would do it with a linked list.

You're ignoring the ground rules of this contest: Your data is
given as variable-length records packed end-to-end in one array,
and the data can be parsed only from the left end towards the
right, and you're limited in the quantity of extra storage you
allocate temporarily to run your algorithm, which must perform
stable-sort. The final result must be those same records, packed
back into that same array, but now in ascending sequence, and in
case of ties the left-most of any two originals must also be
left-most among final location of those two records, which makes no
difference if you're sorting the whole records, but *does* make a
difference if the keys being sorted are not the whole records.

Also, we'd like to maintain good locality of reference, so that if
the whole array is much larger than physical RAM, and the records
are initially in random sequence, at no point during the process
will virtual memory thrash because of references jumping all over
virtual address space at random.

Building a linked list violates locality of reference, and it is
not guaranteed that it will fit in the half-sized aux array if you
happen to have a very large number of very small records.

If your dataset is large enough that you can't allocate two full
sized arrays to use my fastest algorithm,

I'll race your fastest algorithm.

If you want to sort on the same data,

"The array was 8000000 bytes in size,
and the array contained 195304 records,
each ranging anywhere from 2 to 80 bytes in size."

then you can use rec_sort_test to create an input file,
rec_sort_in.txt.

12/03/2008 08:17 PM 8,195,304 rec_sort_in.txt
12/03/2008 08:17 PM 8,195,304 rec_sort_out.txt
2 File(s) 16,390,608 bytes
0 Dir(s) 18,372,935,680 bytes free

rec_sort_out.txt is a sorted ouput, if that kind of thing interests you.
In both of those text files, the null bytes of the array
are represented by newline characters in the file.

/* BEGIN rec_sort_test.c */

#include "rec_sort.h"

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <assert.h>
#include <time.h>

#define ARRAY_SIZE 8000000 /* 50, 8000000 */
#define MAX_REC_SIZE 80 /* 10, 80 */
#define SHOW_TIMES_INSTEAD 1 /* 0, 1 */
#define CREATE_INPUT_FILE 1
#define CREATE_OUTPUT_FILE 1
#define REMOVE_INPUT_FILE 0
#define REMOVE_OUTPUT_FILE 0
#define FNI "rec_sort_in.txt"
#define FNO "rec_sort_out.txt"

#define SORT_FUNCTIONS { \
no_sort, "Original order of the strings:", \
rec_sort, "Stable sorted:" \
}
#define COMP_FUNCTIONS { \
scmp, "alphabetically", \
lcmp, "by string length" \
}
#define NMEMB(A) (sizeof (A) / sizeof *(A))
#define LU_RAND_SEED 123456789LU
#define LU_RAND(S) ((S) * 69069LU + 362437 & 0XFFFFFFFF)

int scmp(const void *a, const void *b);
int lcmp(const void *a, const void *b);
size_t recsize(const void *a);
int no_sort(
void *base,
size_t nrec,
size_t (*rec_size)(const void *),
int (*compar)(const void *, const void *));
size_t n_rec(char *a, size_t nmemb);

int main(void)
{
char *s;
size_t element, sort, comp, index;
struct sf {
int (*function)(
void *base,
size_t nmemb,
size_t (*rec_size)(const void *),
int (*compar)(const void *, const void *));
const char *description;
} s_F[] = SORT_FUNCTIONS;
struct cf {
int (*compar)(const void *, const void *);
const char *description;
} c_F[] = COMP_FUNCTIONS;
const char *const FnO = FNO;
const int create_output_file = CREATE_OUTPUT_FILE;
const int remove_output_file = REMOVE_OUTPUT_FILE;
const int show_times_instead = SHOW_TIMES_INSTEAD;
const long unsigned max_rec_size = MAX_REC_SIZE;
const long unsigned array_size = ARRAY_SIZE;
char *const acpy = malloc(array_size);
char *const array = malloc(array_size);
const long unsigned n = n_rec(array, array_size);

assert(MAX_REC_SIZE > 1);
if (array == NULL) {
puts("array == NULL");
exit(EXIT_FAILURE);
}
if (acpy == NULL) {
free(array);
puts("acpy == NULL");
exit(EXIT_FAILURE);
}
puts("\n/* BEGIN output from rec_sort_test.c */");
for (comp = 0; comp != NMEMB(c_F); ++comp) {
printf("A contiguous block of strings "
"is being sorted %s.\n",
c_F[comp].description);
for (sort = 0; sort != NMEMB(s_F); ++sort) {
int success;
clock_t start, stop;

puts(s_F[sort].description);
memcpy(acpy, array, array_size);
stop = clock();
do {
start = clock();
} while (start == stop);
success = s_F[sort]
.function(acpy, n, recsize, c_F[comp].compar);
stop = clock();
if (success) {
if (!show_times_instead) {
s = acpy;
for (element = 0; element != n; ++element) {
puts(s);
s += recsize(s);
}
} else {
double seconds =
(double)(stop - start)
/ (double)CLOCKS_PER_SEC;

printf("%f seconds.\n", seconds);
}
if (create_output_file) {
FILE *fpo = fopen(FnO, "w");

if (fpo != NULL) {
for (index = 0; index != array_size; ++index) {
putc(acpy[index] ? acpy[index] : '\n', fpo);
}
fclose(fpo);
printf("\n%s has been created.\n", FnO);
} else {
printf("\nfopen(\"%s\", \"wb\") == NULL", FnO);
}
}
} else {
puts("Sort failed.");
}
putchar('\n');
}
}
free(acpy);
free(array);
printf("The array was %lu bytes in size, "
"and the array contained %lu records,\n"
"each ranging anywhere from 2 to %lu bytes in size.\n\n",
array_size,
n,
max_rec_size);
if (remove_output_file) {
if (!remove(FnO)) {
printf("\n%s has been removed.\n", FnO);
} else {
printf("\nRemoval of %s failed.\n", FnO);
}
}
puts("/* END output from rec_sort_test.c */");
return 0;
}

size_t recsize(const void *a)
{
return 1 + strlen(a);
}

int lcmp(const void *a, const void *b)
{
const size_t a_len = strlen(a);
const size_t b_len = strlen(b);

return b_len > a_len ? -1 : b_len != a_len;
}

int scmp(const void *a, const void *b)
{
return strcmp(a, b);
}

int no_sort(
void *base,
size_t nrec,
size_t (*rec_size)(const void *),
int (*compar)(const void *, const void *))
{
base, nrec, rec_size, compar;
/*
** return 1; because this function always
** does what it's supposed to do.
** There's no point in calling attention to its failure to sort.
*/
return 1;
}

size_t n_rec(char *a, size_t nmemb)
{
const int create_input_file = CREATE_INPUT_FILE;
const int remove_input_file = REMOVE_INPUT_FILE;
const char *const FnI = FNI;

long unsigned size, index, seed;
const char letter[] = "abcdefghijklmnopqrstuvwxyz";
size_t n = 0;

if (a != NULL) {
seed = LU_RAND_SEED;
for (index = 0; index != nmemb; ++index) {
a[index] = letter[seed % (sizeof letter - 1)];
seed = LU_RAND(seed);
}
index = 0;
while (nmemb - index > MAX_REC_SIZE) {
seed = LU_RAND(seed);
size = 2 + seed % (MAX_REC_SIZE - 1);
index += size;
a[index] = '\0';
++n;
}
a[nmemb - 1] = '\0';
++n;
}
if (create_input_file) {
FILE *fpi = fopen(FnI, "w");

if (fpi != NULL) {
for (index = 0; index != nmemb; ++index) {
putc(a[index] ? a[index] : '\n', fpi);
}
fclose(fpi);
printf("\n%s has been created.\n", FnI);
} else {
printf("\nfopen(\"%s\", \"wb\") == NULL", FnI);
}
}
if (remove_input_file) {
if (!remove(FnI)) {
printf("\n%s has been removed.\n", FnI);
} else {
printf("\nRemoval of %s failed.\n", FnI);
}
}
return n;
}

/* END rec_sort_test.c */


/* BEGIN rec_sort.h */

#ifndef H_REC_SORT_H
#define H_REC_SORT_H

#include <stddef.h>

int rec_sort(void *base,
size_t nrec,
size_t (*rec_size)(const void *),
int (*compar)(const void *, const void *));

#endif

/* END rec_sort.h */


/* BEGIN rec_sort.c */

#include "rec_sort.h"

#include <stdlib.h>
#include <string.h>
#include <assert.h>

#define SMALL 3

static void merge(char **array,
size_t nmemb,
char **buff,
int (*compar)(const void *, const void *));

int rec_sort(void *base,
size_t nrec,
size_t (*rec_size)(const void *),
int (*compar)(const void *, const void *))
{
size_t size, whole_size, n;
void *buff, *aux;
char *p;
char **ptrs;

assert(SMALL >= 1);
if (2 > nrec) {
return 1;
}
if ((ptrs = malloc(nrec * sizeof *ptrs)) == NULL
|| (buff = malloc(nrec / 2 * sizeof *ptrs)) == NULL
&& nrec > SMALL)
{
free(ptrs);
return 0;
}
p = base;
for (whole_size = n = 0; n != nrec; ++n) {
ptrs[n] = p;
size = rec_size(p);
p += size;
whole_size += size;
}
merge(ptrs, nrec, buff, compar);
free(buff);
if ((p = aux = malloc(whole_size)) == NULL) {
free(ptrs);
return 0;
}
for (n = 0; n != nrec; ++n) {
size = rec_size(ptrs[n]);
memcpy(p, ptrs[n], size);
p += size;
}
free(ptrs);
memcpy(base, aux, whole_size);
free(aux);
return 1;
}

static void merge(char **array,
size_t nmemb,
char **buff,
int (*compar)(const void *, const void *))
{
if (nmemb > SMALL) {
char **mid_ptr;
size_t const half = nmemb / 2;
char **const after_buff = buff + half;
char **const middle = array + half;
char **const after_array = array + nmemb;

merge(middle, nmemb - half, buff, compar);
merge(array, half, buff, compar);
do {
if (compar(*array, *middle) > 0) {
mid_ptr = middle;
buff = after_buff;
do {
*--buff = *--mid_ptr;
} while (array != mid_ptr);
*array = *middle;
mid_ptr = middle + 1;
++array;
while (middle != array) {
*array++ = compar(*buff, *mid_ptr) > 0
? *mid_ptr++ : *buff++;
}
while (after_buff != buff) {
if (after_array != mid_ptr) {
*array++ = compar(*buff, *mid_ptr) > 0
? *mid_ptr++ : *buff++;
} else {
do {
*array++ = *buff++;
} while (after_buff != buff);
}
}
} else {
++array;
}
} while (middle > array);
} else {
char *temp, **base, **low, **high;

if (nmemb > 1) {
--nmemb;
base = array;
do {
low = array++;
if (compar(*low, *array) > 0) {
high = array;
temp = *high;
do {
*high-- = *low;
if (low == base) {
break;
}
--low;
} while (compar(*low, temp) > 0);
*high = temp;
}
} while (--nmemb != 0);
}
}
}

/* END rec_sort.c */


--
pete
.



Relevant Pages

  • Re: Newbie question about class member functions...
    ... mainalways returns int, never void. ... // Open playlist file and read it into an array ...
    (comp.lang.cpp)
  • Jtable repaint - it just doesnt work! tried
    ... public int numRows; ... public void setValueAt{ ... array, then plugs it into a jTable ... public void windowClosing(java.awt.event.WindowEvent evt) { ...
    (comp.lang.java.programmer)
  • Re: question on qsort library function
    ... This is the original order of the test array: ... int lencomp(const void *, const void *); ... int lencomp(const void *a, const void *b) ...
    (comp.lang.c)
  • Re: quick sort
    ... void quicksort; ... int partition; ... and a populateIntArray(int *array, size_t size) function such ... printIntArray, quicksort() (yes, I'd have called it something ...
    (comp.lang.c)
  • Re: Mergesort Vs Quicksort
    ... given as variable-length records packed end-to-end in one array, ... int scmp(const void *a, const void *b) ...
    (comp.programming)