位排序 -- 基于计数排序和基数排序产生的特例

silitex · 发表于 2009-10-30 11:21:18

马上注册，结交更多好友，享用更多功能，让你轻松玩转社区。

您需要登录才可以下载或查看，没有账号？欢迎注册

×

原创地址：http://blog.csdn.net/Silitex/archive/2009/10/30/4746697.aspx

鄙人在很多排序虽然懂，但真正动起手来还是错漏百出，后来下定决心，这段时间在研究排序算法，后来根据计数排序和基数排序，想到一个他们的特例 -- 位排序。
位排序 ---
时间复杂度：O(n*logm), 最好和最坏的情况下都是这个时间复杂度 (此排序算法总是认为m <= n的)
空间复杂度：O(logm), 如果m小于4294967296, 即最大临时空间为32的倍数
运行时间：经过大量的数据测试，位排序的运行时间仅次于计数排序和快速排序
算法的优点：能够在空间复杂度为O(logm)上面实现O(n*logm)的算法，空间上比快速排序和堆排序的最坏情况（最坏情况其空间复杂度都为O(n)）下要好
算法的缺点：m <= n

源代码：

void BitSortRecursive(ELEM_TYPE *pData, int nLow, int nHigh, UNIQUE_HASH_CALLBACK pUniqueHashFunc, int nBit)
{
int i;
int j;
int nAndMask;
if (nLow >= nHigh)
{
return;
}
else if (nLow+1 == nHigh) // 只剩下两个元素, 那么直接比较就可以了
{
if (pUniqueHashFunc(pData[nLow].eKey) > pUniqueHashFunc(pData[nHigh].eKey))
DsSwapElem(&pData[nLow], &pData[nHigh]);
return;
}
for (i = nLow, j = nHigh, nAndMask = 1<<(nBit-1); ; )
{
for (; i <= nHigh; i++)
{
if ((pUniqueHashFunc(pData[i].eKey)&nAndMask) != 0)
break;
}
for (; j > i; j--)
{
if ((pUniqueHashFunc(pData[j].eKey)&nAndMask) == 0)
break;
}
if (j > i) // 表明还找到两个这样的数
{
DsSwapElem(&pData[i++], &pData[j--]);
if (j < i)
break;
}
else
{
break;
}
}
if (nBit > 1)
{
nBit--;
BitSortRecursive(pData, nLow, i-1, pUniqueHashFunc, nBit);
BitSortRecursive(pData, i, nHigh, pUniqueHashFunc, nBit);
}
}
// 位排序 -- 由计数排序和基数排序引申出的一种排序: 位排序
// 计数排序当m的取值范围从0~65535时，申请的空间不大，排序算法是相当好用的
// 当如果m的取值范围从0~4294967295时，要申请的空间就太大了，不过可以根据基数排序的思想
// 对于HighWord先采用计数排序，然后再对LowWord再次计数排序，这个就是基数排序的思想
// 如果有人认为计数排序64K的空间申请太大了，那么缩小成2^8=256(即8位计数排序)，这个空间是很小的
// 如果空间还想再小: 2^4, 2^2, 2^1..., 最后，产生了一个特例，就是2^0的位排序
// 其排序方法:
// 如果m取值范围0~65535, 先从左往右找到第一个最高位为1的数，然后从右往左找一个最高位为0的数，交换之, 直至交换完毕，那么最高位就有序了
// 再来次高位, ..., 最后一位
// 根据前面的描述，算法空间复杂度: O(logm), 时间复杂度O(nlogm)(最好和最坏都一样的时间复杂度)
// 经过大量的数据测试，位排序的运行时间仅次于计数排序和快速排序
// E-mail: silitex@yeah.net
void BitSort(ELEM_TYPE *pData, int nLen, UNIQUE_HASH_CALLBACK pUniqueHashFunc, int nUniqueHashRange)
{
int m = nUniqueHashRange-1;
int nBit;
// Get log2(m)
for (nBit = 0; m > 0; nBit++)
m >>= 1;
if (nBit == 0)
return;
BitSortRecursive(pData, 0, nLen-1, pUniqueHashFunc, nBit);
}

复制代码

================================================================
下面给出完整的测试代码
================================================================

// 编译器VC6.0
// 需要设置支持MFC
// E-mail: silitex@yeah.net
#include <afxtempl.h>
#include <stdio.h>
/* The macro definition */
#define _DEBUG_OUT
typedef int ELEM_KEY_TYPE; // 关键字段类型
typedef int ELEM_REST_TYPE; // 剩余字段类型
typedef unsigned char byte;
typedef unsigned short int word;
typedef unsigned long int dword;
#define DS_BUBBLE_SORT_DATA_MAX 100000
#define DS_BUBBLE_SORT_DATA_RANGE (DS_BUBBLE_SORT_DATA_MAX)
// 单条记录的结构
typedef struct {
ELEM_KEY_TYPE eKey; // 记录的主关键字段
ELEM_REST_TYPE eRest[7]; // 记录的剩余字段, 主关键字与其他的比例暂时按照1:7分配
} ELEM_TYPE;
typedef int (*UNIQUE_HASH_CALLBACK)(ELEM_KEY_TYPE eKey);
typedef int (*SHELL_GAP_CALLBACK)(int nGap);
// 交换两个记录的数据
void DsSwapElem(ELEM_TYPE *pElem1, ELEM_TYPE *pElem2)
{
ELEM_TYPE eTemp;
eTemp = *pElem2;
*pElem2 = *pElem1;
*pElem1 = eTemp;
}
// 复制记录数据
void DsCopyElem(ELEM_TYPE *pElemDst, ELEM_TYPE *pElemSrc)
{
*pElemDst = *pElemSrc;
}
// Windows系统下使用的真随机函数
// By Silitex, at 2009/09/17
DWORD WinRand(void)
{
LARGE_INTEGER Counter;
QueryPerformanceCounter(&Counter);
return Counter.LowPart;
}
// 用于测试的一一对应的Hash函数
int UniqueHashTest(ELEM_KEY_TYPE eKey)
{
return eKey;
}
// 希尔排序的Gap选取测试函数
// 已知的最好nGap序列是: 1, 4, 10, 23, 57, 132, 301, 701, 1750，...
// 在1750之后的序列值按如下公式计算: next_step = round(step * 2.3)
int ShellGapTest(int nGap)
{
const int GAP_SEQ[] = {0, 1, 4, 10, 23, 57, 132, 301, 701, 1750, 4025, 9257, 21291, 48969, 112628, 259044};
int i;
int j;
for (i = 0, j = 0; GAP_SEQ[i] < nGap; i++)
j = i;
return GAP_SEQ[j];
}
void DebugOut(ELEM_TYPE *pData, int nLen)
{
int i;
printf("\n长度:%d\n 数据:", nLen);
for (i = 0; i < nLen; i++)
printf("%d ", pData[i].eKey);
printf("\n");
}
// 计数排序
// 计数排序适用于诸如取值个数只有m个(m远小于n)的场合(比如10000个学生的成绩范围从0~100之间取值)
// 在正常情况下，含有其他非关键字段，并且关键字段的取值并非连续的m个(比如取值0, 10, 100, 101, ...等无规律的数值)，
// 这时计数排序的时间复杂度为O(nlogm); 如果要排序的数组只有一个字段，那么其时间复杂度可以直接为O(n)
// 当然研究算法，就从最普通的现象研究，在普通情况下，取m个值，我们通过唯一数值Hash算法(比如，0~0， 10~1， 100~2， 101~3)
// 进行这个Hash算法的最快的时间复杂度就是O(logm)了，因为对于无规律的取值来说，最快的就是二分查找算法了
// 所以一个标准的计数排序除了传入待排序的数组及数组长度外，还需要传入对这个数组的Hash函数指针和Hash以后的样本空间
// (这里规定Hash以后的样本空间只能够取值范围为0~m-1)
// 计数排序需要O(m)个辅助空间
// 计数排序还发现一个特点，就是即使m=n的情况下，虽然会浪费空间，但可以得到比快速排序还要少的交换次数
// E-mail: silitex@yeah.net
void CountSort(ELEM_TYPE *pData, int nLen, UNIQUE_HASH_CALLBACK pUniqueHashFunc, int nUniqueHashRange)
{
int *ElemCount = new int[nUniqueHashRange]; // 统计某一个值的出现次数
int *ElemStartPosi = new int[nUniqueHashRange]; // 某一个值的开始地址
int *ElemIncPosi = new int[nUniqueHashRange]; // 某一个值的增量地址
int ElemPosi;
int i;
int nPosi;
int nNewPosi;
int nTemp;
// 初始化统计数组
for (i = 0; i < nUniqueHashRange; i++)
{
ElemCount[i] = 0;
ElemStartPosi[i] = 0;
ElemIncPosi[i] = 0;
}
// 统计每一个值出现的频率
for (i = 0; i < nLen; i++)
ElemCount[pUniqueHashFunc(pData[i].eKey)]++;
// 把每次的计数相加，得到实际想要的位置
ElemPosi = 0;
for (i = 1; i < nUniqueHashRange; i++)
{
ElemPosi += ElemCount[i-1];
ElemStartPosi[i] = ElemPosi;
}
// 根据出现的频率进行交换它们的位置
for (nPosi = 0; nPosi < nLen; nPosi++)
{
while (TRUE)
{
nTemp = pUniqueHashFunc(pData[nPosi].eKey);
if (nPosi >= ElemStartPosi[nTemp] && nPosi < ElemStartPosi[nTemp]+ElemCount[nTemp])
break;
while (ElemIncPosi[nTemp] < ElemCount[nTemp])
{
nNewPosi = ElemStartPosi[nTemp] + ElemIncPosi[nTemp];
ElemIncPosi[nTemp]++;
if (pUniqueHashFunc(pData[nNewPosi].eKey) != nTemp)
{
DsSwapElem(&pData[nPosi], &pData[nNewPosi]);
break;
}
}
}
}
delete []ElemCount;
delete []ElemStartPosi;
delete []ElemIncPosi;
}
// 对严蔚敏书上算法的改进, 严蔚敏的算法为了节约一个变量而造成了运行时间的浪费，划不来
// 这种方法的比较次数是最少的，移动次数也是最少的
static int QuickSortPartition(ELEM_TYPE *pData, int nLow, int nHigh)
{
ELEM_TYPE eTemp;
int nPrivot;
DsCopyElem(&eTemp, &pData[nLow]);
nPrivot = nLow++;
while (nLow <= nHigh)
{
if (nLow > nPrivot) // 照样可以用(nHigh > nPrivot)
{
while (nLow <= nHigh)
{
if (pData[nHigh].eKey < eTemp.eKey)
{
DsCopyElem(&pData[nPrivot], &pData[nHigh]);
nPrivot = nHigh--;
break;
}
nHigh--;
}
}
else
{
while (nLow <= nHigh)
{
if (pData[nLow].eKey > eTemp.eKey)
{
DsCopyElem(&pData[nPrivot], &pData[nLow]);
nPrivot = nLow++;
break;
}
nLow++;
}
}
}
DsCopyElem(&pData[nPrivot], &eTemp);
return nPrivot;
}
static void QuickSortRecursive(ELEM_TYPE *pData, int nLow, int nHigh)
{
int nPrivot;
if (nLow < nHigh)
{
nPrivot = QuickSortPartition(pData, nLow, nHigh);
QuickSortRecursive(pData, nLow, nPrivot-1);
QuickSortRecursive(pData, nPrivot+1, nHigh);
}
}
// 快速排序
// E-mail: silitex@yeah.net
void QuickSort(ELEM_TYPE *pData, int nLen)
{
QuickSortRecursive(pData, 0, nLen-1);
}
void BitSortRecursive(ELEM_TYPE *pData, int nLow, int nHigh, UNIQUE_HASH_CALLBACK pUniqueHashFunc, int nBit)
{
int i;
int j;
int nAndMask;
if (nLow >= nHigh)
{
return;
}
else if (nLow+1 == nHigh) // 只剩下两个元素, 那么直接比较就可以了
{
if (pUniqueHashFunc(pData[nLow].eKey) > pUniqueHashFunc(pData[nHigh].eKey))
DsSwapElem(&pData[nLow], &pData[nHigh]);
return;
}
for (i = nLow, j = nHigh, nAndMask = 1<<(nBit-1); ; )
{
for (; i <= nHigh; i++)
{
if ((pUniqueHashFunc(pData[i].eKey)&nAndMask) != 0)
break;
}
for (; j > i; j--)
{
if ((pUniqueHashFunc(pData[j].eKey)&nAndMask) == 0)
break;
}
if (j > i) // 表明还找到两个这样的数
{
DsSwapElem(&pData[i++], &pData[j--]);
if (j < i)
break;
}
else
{
break;
}
}
if (nBit > 1)
{
nBit--;
BitSortRecursive(pData, nLow, i-1, pUniqueHashFunc, nBit);
BitSortRecursive(pData, i, nHigh, pUniqueHashFunc, nBit);
}
}
// 位排序 -- 由计数排序和基数排序引申出的一种排序: 位排序
// 计数排序当m的取值范围从0~65535时，申请的空间不大，排序算法是相当好用的
// 当如果m的取值范围从0~4294967295时，要申请的空间就太大了，不过可以根据基数排序的思想
// 对于HighWord先采用计数排序，然后再对LowWord再次计数排序，这个就是基数排序的思想
// 如果有人认为计数排序64K的空间申请太大了，那么缩小成2^8=256(即8位计数排序)，这个空间是很小的
// 如果空间还想再小: 2^4, 2^2, 2^1..., 最后，产生了一个特例，就是2^0的位排序
// 其排序方法:
// 如果m取值范围0~65535, 先从左往右找到第一个最高位为1的数，然后从右往左找一个最高位为0的数，交换之, 直至交换完毕，那么最高位就有序了
// 再来次高位, ..., 最后一位
// 根据前面的描述，算法空间复杂度: O(logm), 时间复杂度O(nlogm)(最好和最坏都一样的时间复杂度)
// 经过大量的数据测试，位排序的运行时间仅次于计数排序和快速排序
// E-mail: silitex@yeah.net
void BitSort(ELEM_TYPE *pData, int nLen, UNIQUE_HASH_CALLBACK pUniqueHashFunc, int nUniqueHashRange)
{
int m = nUniqueHashRange-1;
int nBit;
// Get log2(m)
for (nBit = 0; m > 0; nBit++)
m >>= 1;
if (nBit == 0)
return;
BitSortRecursive(pData, 0, nLen-1, pUniqueHashFunc, nBit);
}
// 测试内存排序
void TestEmsSort(void)
{
CArray<ELEM_TYPE, ELEM_TYPE> aTest;
CArray<ELEM_TYPE, ELEM_TYPE> aCopy;
ELEM_TYPE eTemp;
UINT i;
for (i = 0; i < DS_BUBBLE_SORT_DATA_MAX; i++)
{
eTemp.eKey = WinRand()%DS_BUBBLE_SORT_DATA_RANGE;
aCopy.Add(eTemp);
}
#if 0
aTest.Copy(aCopy);
BubbleSort(aTest.GetData(), aTest.GetSize());
aTest.Copy(aCopy);
BubbleSortDouble(aTest.GetData(), aTest.GetSize());
aTest.Copy(aCopy);
GnomeSort(aTest.GetData(), aTest.GetSize());
aTest.Copy(aCopy);
OddEvenSort(aTest.GetData(), aTest.GetSize());
aTest.Copy(aCopy);
SelectSort(aTest.GetData(), aTest.GetSize());
aTest.Copy(aCopy);
InsertSort(aTest.GetData(), aTest.GetSize());
aTest.Copy(aCopy);
BinInsertSort(aTest.GetData(), aTest.GetSize());
#endif
#if 0
aTest.Copy(aCopy);
ShellSort(aTest.GetData(), aTest.GetSize(), ShellGapTest);
aTest.Copy(aCopy);
MergeSort(aTest.GetData(), aTest.GetSize());
aTest.Copy(aCopy);
CombSort(aTest.GetData(), aTest.GetSize());
#endif
aTest.Copy(aCopy);
DebugOut(aTest.GetData(), aTest.GetSize());
CountSort(aTest.GetData(), aTest.GetSize(), UniqueHashTest, DS_BUBBLE_SORT_DATA_RANGE);
DebugOut(aTest.GetData(), aTest.GetSize());
#if 0
aTest.Copy(aCopy);
CountSortSimp(aTest.GetData(), aTest.GetSize(), DS_BUBBLE_SORT_DATA_RANGE);
#endif
aTest.Copy(aCopy);
DebugOut(aTest.GetData(), aTest.GetSize());
QuickSort(aTest.GetData(), aTest.GetSize());
DebugOut(aTest.GetData(), aTest.GetSize());
aTest.Copy(aCopy);
DebugOut(aTest.GetData(), aTest.GetSize());
BitSort(aTest.GetData(), aTest.GetSize(), UniqueHashTest, DS_BUBBLE_SORT_DATA_RANGE);
DebugOut(aTest.GetData(), aTest.GetSize());
#if 0
aTest.Copy(aCopy);
BitSortSimp(aTest.GetData(), aTest.GetSize(), DS_BUBBLE_SORT_DATA_RANGE);
#endif
}
int main(void)
{
TestEmsSort();
return 0;
}

复制代码

〇〇 · 发表于 2009-10-30 14:01:04

vc6本身就带qsort

〇〇 · 发表于 2009-10-30 14:15:39

http://blog.csdn.net/drzhouweimi ... /08/23/1109499.aspx
双核CPU上的快速排序效率

〇〇 · 发表于 2009-10-30 14:24:46

还用到了goto

/***
*qsort.c - quicksort algorithm; qsort() library function for sorting arrays
*
* Copyright (c) 1985-1997, Microsoft Corporation. All rights reserved.
*
*Purpose:
* To implement the qsort() routine for sorting arrays.
*
*******************************************************************************/
#include <cruntime.h>
#include <stdlib.h>
#include <search.h>
/* prototypes for local routines */
static void __cdecl shortsort(char *lo, char *hi, unsigned width,
int (__cdecl *comp)(const void *, const void *));
static void __cdecl swap(char *p, char *q, unsigned int width);
/* this parameter defines the cutoff between using quick sort and
insertion sort for arrays; arrays with lengths shorter or equal to the
below value use insertion sort */
#define CUTOFF 8 /* testing shows that this is good value */
/***
*qsort(base, num, wid, comp) - quicksort function for sorting arrays
*
*Purpose:
* quicksort the array of elements
* side effects: sorts in place
*
*Entry:
* char *base = pointer to base of array
* unsigned num = number of elements in the array
* unsigned width = width in bytes of each array element
* int (*comp)() = pointer to function returning analog of strcmp for
* strings, but supplied by user for comparing the array elements.
* it accepts 2 pointers to elements and returns neg if 1<2, 0 if
* 1=2, pos if 1>2.
*
*Exit:
* returns void
*
*Exceptions:
*
*******************************************************************************/
/* sort the array between lo and hi (inclusive) */
void __cdecl qsort (
void *base,
unsigned num,
unsigned width,
int (__cdecl *comp)(const void *, const void *)
)
{
char *lo, *hi; /* ends of sub-array currently sorting */
char *mid; /* points to middle of subarray */
char *loguy, *higuy; /* traveling pointers for partition step */
unsigned size; /* size of the sub-array */
char *lostk[30], *histk[30];
int stkptr; /* stack for saving sub-array to be processed */
/* Note: the number of stack entries required is no more than
1 + log2(size), so 30 is sufficient for any array */
if (num < 2 || width == 0)
return; /* nothing to do */
stkptr = 0; /* initialize stack */
lo = base;
hi = (char *)base + width * (num-1); /* initialize limits */
/* this entry point is for pseudo-recursion calling: setting
lo and hi and jumping to here is like recursion, but stkptr is
prserved, locals aren't, so we preserve stuff on the stack */
recurse:
size = (hi - lo) / width + 1; /* number of el's to sort */
/* below a certain size, it is faster to use a O(n^2) sorting method */
if (size <= CUTOFF) {
shortsort(lo, hi, width, comp);
}
else {
/* First we pick a partititioning element. The efficiency of the
algorithm demands that we find one that is approximately the
median of the values, but also that we select one fast. Using
the first one produces bad performace if the array is already
sorted, so we use the middle one, which would require a very
wierdly arranged array for worst case performance. Testing shows
that a median-of-three algorithm does not, in general, increase
performance. */
mid = lo + (size / 2) * width; /* find middle element */
swap(mid, lo, width); /* swap it to beginning of array */
/* We now wish to partition the array into three pieces, one
consisiting of elements <= partition element, one of elements
equal to the parition element, and one of element >= to it. This
is done below; comments indicate conditions established at every
step. */
loguy = lo;
higuy = hi + width;
/* Note that higuy decreases and loguy increases on every iteration,
so loop must terminate. */
for (;;) {
/* lo <= loguy < hi, lo < higuy <= hi + 1,
A[i] <= A[lo] for lo <= i <= loguy,
A[i] >= A[lo] for higuy <= i <= hi */
do {
loguy += width;
} while (loguy <= hi && comp(loguy, lo) <= 0);
/* lo < loguy <= hi+1, A[i] <= A[lo] for lo <= i < loguy,
either loguy > hi or A[loguy] > A[lo] */
do {
higuy -= width;
} while (higuy > lo && comp(higuy, lo) >= 0);
/* lo-1 <= higuy <= hi, A[i] >= A[lo] for higuy < i <= hi,
either higuy <= lo or A[higuy] < A[lo] */
if (higuy < loguy)
break;
/* if loguy > hi or higuy <= lo, then we would have exited, so
A[loguy] > A[lo], A[higuy] < A[lo],
loguy < hi, highy > lo */
swap(loguy, higuy, width);
/* A[loguy] < A[lo], A[higuy] > A[lo]; so condition at top
of loop is re-established */
}
/* A[i] >= A[lo] for higuy < i <= hi,
A[i] <= A[lo] for lo <= i < loguy,
higuy < loguy, lo <= higuy <= hi
implying:
A[i] >= A[lo] for loguy <= i <= hi,
A[i] <= A[lo] for lo <= i <= higuy,
A[i] = A[lo] for higuy < i < loguy */
swap(lo, higuy, width); /* put partition element in place */
/* OK, now we have the following:
A[i] >= A[higuy] for loguy <= i <= hi,
A[i] <= A[higuy] for lo <= i < higuy
A[i] = A[lo] for higuy <= i < loguy */
/* We've finished the partition, now we want to sort the subarrays
[lo, higuy-1] and [loguy, hi].
We do the smaller one first to minimize stack usage.
We only sort arrays of length 2 or more.*/
if ( higuy - 1 - lo >= hi - loguy ) {
if (lo + width < higuy) {
lostk[stkptr] = lo;
histk[stkptr] = higuy - width;
++stkptr;
} /* save big recursion for later */
if (loguy < hi) {
lo = loguy;
goto recurse; /* do small recursion */
}
}
else {
if (loguy < hi) {
lostk[stkptr] = loguy;
histk[stkptr] = hi;
++stkptr; /* save big recursion for later */
}
if (lo + width < higuy) {
hi = higuy - width;
goto recurse; /* do small recursion */
}
}
}
/* We have sorted the array, except for any pending sorts on the stack.
Check if there are any, and do them. */
--stkptr;
if (stkptr >= 0) {
lo = lostk[stkptr];
hi = histk[stkptr];
goto recurse; /* pop subarray from stack */
}
else
return; /* all subarrays done */
}
/***
*shortsort(hi, lo, width, comp) - insertion sort for sorting short arrays
*
*Purpose:
* sorts the sub-array of elements between lo and hi (inclusive)
* side effects: sorts in place
* assumes that lo < hi
*
*Entry:
* char *lo = pointer to low element to sort
* char *hi = pointer to high element to sort
* unsigned width = width in bytes of each array element
* int (*comp)() = pointer to function returning analog of strcmp for
* strings, but supplied by user for comparing the array elements.
* it accepts 2 pointers to elements and returns neg if 1<2, 0 if
* 1=2, pos if 1>2.
*
*Exit:
* returns void
*
*Exceptions:
*
*******************************************************************************/
static void __cdecl shortsort (
char *lo,
char *hi,
unsigned width,
int (__cdecl *comp)(const void *, const void *)
)
{
char *p, *max;
/* Note: in assertions below, i and j are alway inside original bound of
array to sort. */
while (hi > lo) {
/* A[i] <= A[j] for i <= j, j > hi */
max = lo;
for (p = lo+width; p <= hi; p += width) {
/* A[i] <= A[max] for lo <= i < p */
if (comp(p, max) > 0) {
max = p;
}
/* A[i] <= A[max] for lo <= i <= p */
}
/* A[i] <= A[max] for lo <= i <= hi */
swap(max, hi, width);
/* A[i] <= A[hi] for i <= hi, so A[i] <= A[j] for i <= j, j >= hi */
hi -= width;
/* A[i] <= A[j] for i <= j, j > hi, loop top condition established */
}
/* A[i] <= A[j] for i <= j, j > lo, which implies A[i] <= A[j] for i < j,
so array is sorted */
}
/***
*swap(a, b, width) - swap two elements
*
*Purpose:
* swaps the two array elements of size width
*
*Entry:
* char *a, *b = pointer to two elements to swap
* unsigned width = width in bytes of each array element
*
*Exit:
* returns void
*
*Exceptions:
*
*******************************************************************************/
static void __cdecl swap (
char *a,
char *b,
unsigned width
)
{
char tmp;
if ( a != b )
/* Do the swap one character at a time to avoid potential alignment
problems. */
while ( width-- ) {
tmp = *a;
*a++ = *b;
*b++ = tmp;
}
}

复制代码

silitex · 发表于 2009-10-30 15:07:17

微软的这个源代码是在哪个目录下面的，我好像没有找到。谢谢！

silitex · 发表于 2009-10-30 15:18:50

在vc9.0的版本下找到了，感谢！哎，好久忘记了微软crt下面的源代码了！

silitex · 发表于 2009-10-30 16:11:40

本帖最后由 silitex 于 2009-10-30 16:42 编辑

居然出现这样的例子，本来想把维基百科上面所有的排序算法熟悉了以后再来看周建钦的超快速排序算法，但看到计数排序和基数排序的时候突然想到了这个位排序算法。后来再回过头来看周建钦的超快速排序算法的时候，发现原来排序思想是一样的！但显然：周建钦的超快排序算法并没有这个的BitSort的排序算法快，但经过大量数据的验证(这个时间的验证是以CPU的计数频率进行的，可以精确到纳秒)，当n约等于m的时候，其速度并比不上快速排序！当n>=200000的时候，其运行时间几乎相当。如果 n远远大于m的时候，还不如用计数排序来得快！并且显然周在表达时间复杂度和空间复杂度的时候是错误的。

silitex · 发表于 2009-10-31 08:46:24

本帖最后由 silitex 于 2009-10-31 10:12 编辑

微软的非递归的快速排序居然在最坏的情况下也知道O(logn)的空间复杂度，确实很让人惊讶，为了研究这样的快速排序，我把微软的非递归的快速排序翻译成了和我写的快速排序一样的代码，但发现其运行时间居然还没有递归版本的快（经过了大量的测试），究其原因，应该是微软在进行交换的时候采用了殷人昆一书类似的方案，但其实严蔚敏一书的方案可以得到最节约的时间。如下是一个n = 100000, m = n的测试时间的结果(其中第一列是一个精确的时间计数)：
  |          398067668| 3966754| 768735|  Comb Sort - 比较次数:3966754  交换次数:768735  空间复杂度:O(1)
  |          139068285| 2106347| 262648|  快速排序 - 比较次数:2106347  准交换次数:262648.1(交换次数+移动次数/3.移动次数%3)  空间复杂度:O(n)(最坏情况), O(logn)(平均情况)
  |          163932502| 2102328| 397655|  快速排序(微软) - 比较次数:2102328  交换次数:397655  空间复杂度:O(logn)
  |          167993880| 2106347| 301311|  快速排序(严蔚敏) - 比较次数:2106347  准交换次数:301311.1(交换次数+移动次数/3.移动次数%3)  空间复杂度:O(n)(最坏情况), O(logn)(平均情况)
  |          380433367| 2120692| 1158540|  快速排序(殷人昆) - 比较次数:2120692  准交换次数:1158540.0(交换次数+移动次数/3.移动次数%3)  空间复杂度:O(n)(最坏情况), O(logn)(平均情况)
  |          188575853| 299982|    99990|  计数排序 - 比较次数:299982  交换次数:99990  空间复杂度:O(m)(m为排序数据的取值可能数目)
  |          195871200| 299982|    99990|  简化的计数排序 - 比较次数:299982  交换次数:99990  空间复杂度:O(m)(m为排序数据的取值可能数目)
  |          320452718| 1341610| 447203|  希尔排序 - 比较次数:1341610  准交换次数:447203.1(交换次数+移动次数/3.移动次数%3)  空间复杂度:O(1)
  |          593682330| 1466030| 1251155|  归并排序 - 比较次数:1466030  交换次数:1251155  空间复杂度:O(n)
  |          601350990| 1466030| 987002|  普通归并排序 - 比较次数:1466030  交换次数:987002  空间复杂度:O(n)
  |          177502995| 1679679| 380504|  位排序 - 比较次数:1679679  交换次数:380504  空间复杂度:O(logn)
  |          154381163| 1679679| 380504|  简化的位排序 - 比较次数:1679679  交换次数:380504  空间复杂度:O(logn)

silitex · 发表于 2009-10-31 09:07:44

附上测试用的严蔚敏、殷人昆、以及修改风格后的微软版本的三个版本的快速排序算法:

// 严蔚敏书上的算法
// 这种算法移动次数比殷人昆的少，但比较次数又增加了
static int QuickSortPartition2(ELEM_TYPE *pData, int nLow, int nHigh)
{
ELEM_TYPE eTemp;
DsCopyElem(&eTemp, &pData[nLow]);
while (nLow < nHigh)
{
while (nLow < nHigh && pData[nHigh].eKey >= eTemp.eKey)
nHigh--;
DsCopyElem(&pData[nLow], &pData[nHigh]);
while (nLow < nHigh && pData[nLow].eKey <= eTemp.eKey)
nLow++;
DsCopyElem(&pData[nHigh], &pData[nLow]);
}
DsCopyElem(&pData[nLow], &eTemp);
return nLow;
}
static void QuickSortRecursive2(ELEM_TYPE *pData, int nLow, int nHigh)
{
int nPrivot;
if (nLow < nHigh)
{
nPrivot = QuickSortPartition2(pData, nLow, nHigh);
QuickSortRecursive2(pData, nLow, nPrivot-1);
QuickSortRecursive2(pData, nPrivot+1, nHigh);
}
}
// 快速排序
void QuickSort2(ELEM_TYPE *pData, int nLen)
{
QuickSortRecursive2(pData, 0, nLen-1);
}
// 殷人昆书上的算法, 小地方进行了改进
// 这种算法移动次数比严蔚敏的多，虽然比较次数减少了
static int QuickSortPartition3(ELEM_TYPE *pData, int nLow, int nHigh)
{
int nPrivot = nLow;
int i;
for (i = nLow+1; i <= nHigh; i++)
{
if (pData[i].eKey < pData[nLow].eKey)
{
nPrivot++;
if (nPrivot != i)
DsSwapElem(&pData[nPrivot], &pData[i]);
}
}
DsSwapElem(&pData[nLow], &pData[nPrivot]);
return nPrivot;
}
template <class TYPE>
static void QuickSortRecursive3(TYPE *pData, int nLow, int nHigh)
{
int nPrivot;
if (nLow < nHigh)
{
nPrivot = QuickSortPartition3(pData, nLow, nHigh);
QuickSortRecursive3(pData, nLow, nPrivot-1);
QuickSortRecursive3(pData, nPrivot+1, nHigh);
}
}
// 快速排序
void QuickSort3(ELEM_TYPE *pData, int nLen)
{
QuickSortRecursive3(pData, 0, nLen-1);
}
#define CUTOFF 8 /* testing shows that this is good value */
static void shortsort (ELEM_TYPE *pData, int lo, int hi)
{
int i;
int max;
while (hi > lo) {
max = lo;
for (i = lo+1; i <= hi; i++) {
if (pData[i].eKey > pData[max].eKey) {
max = i;
}
}
if (max != hi) {
DsSwapElem(&pData[max], &pData[hi]);
}
hi--;
}
}
// 对微软写的快速排序的翻译，这样便于调试，也便于理解微软所写的快速排序的思想
void QuickSortMs(ELEM_TYPE *pData, int nLen)
{
#define STKSIZ (8*sizeof(nLen)-2)
int lo, hi; /* ends of sub-array currently sorting */
int mid; /* points to middle of subarray */
int loguy, higuy; /* traveling pointers for partition step */
int size; /* size of the sub-array */
int lostk[STKSIZ], histk[STKSIZ];
int stkptr; /* stack for saving sub-array to be processed */
if (nLen < 2)
return; /* nothing to do */
stkptr = 0; /* initialize stack */
lo = 0;
hi = nLen-1; /* initialize limits */
/* this entry point is for pseudo-recursion calling: setting
lo and hi and jumping to here is like recursion, but stkptr is
prserved, locals aren't, so we preserve stuff on the stack */
recurse:
size = hi - lo + 1; /* number of el's to sort */
/* below a certain size, it is faster to use a O(n^2) sorting method */
if (size <= CUTOFF) {
shortsort(pData, lo, hi);
}
else {
/* First we pick a partititioning element. The efficiency of the
algorithm demands that we find one that is approximately the
median of the values, but also that we select one fast. Using
the first one produces bad performace if the array is already
sorted, so we use the middle one, which would require a very
wierdly arranged array for worst case performance. Testing shows
that a median-of-three algorithm does not, in general, increase
performance. */
mid = lo + size / 2;
DsSwapElem(&pData[mid], &pData[lo]);
/* We now wish to partition the array into three pieces, one
consisiting of elements <= partition element, one of elements
equal to the parition element, and one of element >= to it. This
is done below; comments indicate conditions established at every
step. */
loguy = lo;
higuy = hi + 1;
/* Note that higuy decreases and loguy increases on every iteration,
so loop must terminate. */
for (;;) {
/* lo <= loguy < hi, lo < higuy <= hi + 1,
A[i] <= A[lo] for lo <= i <= loguy,
A[i] >= A[lo] for higuy <= i <= hi */
do {
loguy++;
} while (loguy <= hi && pData[loguy].eKey <= pData[lo].eKey);
/* lo < loguy <= hi+1, A[i] <= A[lo] for lo <= i < loguy,
either loguy > hi or A[loguy] > A[lo] */
do {
higuy--;
} while (higuy > lo && pData[higuy].eKey >= pData[lo].eKey);
/* lo-1 <= higuy <= hi, A[i] >= A[lo] for higuy < i <= hi,
either higuy <= lo or A[higuy] < A[lo] */
if (higuy < loguy)
break;
/* if loguy > hi or higuy <= lo, then we would have exited, so
A[loguy] > A[lo], A[higuy] < A[lo],
loguy < hi, highy > lo */
DsSwapElem(&pData[loguy], &pData[higuy]);
/* A[loguy] < A[lo], A[higuy] > A[lo]; so condition at top
of loop is re-established */
}
/* A[i] >= A[lo] for higuy < i <= hi,
A[i] <= A[lo] for lo <= i < loguy,
higuy < loguy, lo <= higuy <= hi
implying:
A[i] >= A[lo] for loguy <= i <= hi,
A[i] <= A[lo] for lo <= i <= higuy,
A[i] = A[lo] for higuy < i < loguy */
DsSwapElem(&pData[lo], &pData[higuy]); /* put partition element in place */
/* OK, now we have the following:
A[i] >= A[higuy] for loguy <= i <= hi,
A[i] <= A[higuy] for lo <= i < higuy
A[i] = A[lo] for higuy <= i < loguy */
/* We've finished the partition, now we want to sort the subarrays
[lo, higuy-1] and [loguy, hi].
We do the smaller one first to minimize stack usage.
We only sort arrays of length 2 or more.*/
if ( higuy - 1 - lo >= hi - loguy ) {
if (lo + 1 < higuy) {
lostk[stkptr] = lo;
histk[stkptr] = higuy - 1;
++stkptr;
} /* save big recursion for later */
if (loguy < hi) {
lo = loguy;
goto recurse; /* do small recursion */
}
}
else {
if (loguy < hi) {
lostk[stkptr] = loguy;
histk[stkptr] = hi;
++stkptr; /* save big recursion for later */
}
if (lo + 1 < higuy) {
hi = higuy - 1;
goto recurse; /* do small recursion */
}
}
}
/* We have sorted the array, except for any pending sorts on the stack.
Check if there are any, and do them. */
--stkptr;
if (stkptr >= 0) {
lo = lostk[stkptr];
hi = histk[stkptr];
goto recurse; /* pop subarray from stack */
}
else {
return; /* all subarrays done */
}
}

复制代码

liangbch · 发表于 2009-11-3 15:02:15

本帖最后由 liangbch 于 2009-11-3 15:06 编辑

关于位排序，前人已有研究。请看 http://bbs.emath.ac.cn/viewthrea ... &fromuid=25#pid9829。
快速扫了你代码中的注释，感觉你的这个实现不太好，需要分配额外的空间。
《无符号整数按位快速排序算法》基本思想是：
1. 任何一个整数都是m位2进制数，数n的各个比特从高到低依次为$B_m$,$B_{m-1}$, ......$B_1$,$B_0$.
2. 对于一个序列，第一个元素标为being, 最末一个元素的下标end, 则有如下算法

//将数组x[begin]到X[end]的的所有元素按照第k比特排序
void sqortByBit( x, begin, end, k)
{
i=begin,
j=end;

while (i<j)
  {
   if  bitx( X[ i ],k) > bitx(X [ j],k) //元素想x[ i ]的第k比特大于X[j]的第k比特
      X [i ] <--> X[ j ]  //交换x[ i ]与x[j]
   i++;
   j--;
  }

  //交换的结果是序列中前半部分的元素第k比特是0，序列中后半部分的元素第k比特是1

  mid=i-1;
if (k>1)
{
sqortByBit( x, begin,mid, k-1);  //对前半个子序列按照第k-1比特排序
sqortByBit( x, mid+1,end, k-1); //对后半个子序列按照第k-1比特排序
}
}
这是一个递归的算法，和快速排序非常类似。和快速排序不同的是，
1. 他每次比较时，只比较第k比特。
2. 对内存的访问是顺序访问，而快速排序则是在一定范围内的随机访问。所以对cache比较敏感的机器，这种排序更有优势。

  时间复杂度分析：快速排序的时间复杂度为n*log(n), 而按位排序的时间复杂度为n*log(m),m为最大关键字。所有若m<=n，则这个排序算法快于快速排序。

改进，如果关键字是均匀分布的。可以综合位排序和快速排序，使其速度更快。
改进方法，当m>n, 当序列长度小于某一阀值,调用快速排序算法。

账号		自动登录	找回密码
密码			欢迎注册

[原创] 位排序 -- 基于计数排序和基数排序产生的特例

马上注册，结交更多好友，享用更多功能，让你轻松玩转社区。