找回密码
 欢迎注册
楼主: gxqcn

[擂台] 求由0和1构成的最小十进制正整数,其是指定正整数的倍数

[复制链接]
 楼主| 发表于 2009-8-16 07:46:30 | 显示全部楼层

优化版本

  1. // http://bbs.emath.ac.cn/viewthread.php?tid=1689&page=2&fromuid=8#pid21088
  2. #include <iostream>
  3. #include <ctime>
  4. #include <vector>
  5. #include <map>
  6. using namespace std;
  7. typedef unsigned int UINT32, *PUINT32;
  8. typedef unsigned __int64 UINT64, *PUINT64;
  9. typedef struct node
  10. {
  11. UINT32 key;
  12. UINT32 value;
  13. }NODE;
  14. typedef vector< NODE > NODE_VECTOR;
  15. typedef map< UINT32, UINT32 > KEY_INDEX_MAP;
  16. typedef KEY_INDEX_MAP::value_type MVT;
  17. #ifndef UInt32x32To64
  18. # define UInt32x32To64(a, b) ((UINT64)(((UINT64)(a)) * ((UINT64)(b))))
  19. #endif
  20. char szResult[128];
  21. // http://bbs.emath.ac.cn/viewthread.php?tid=1207&page=6&fromuid=8#pid16108
  22. UINT32 Sqrt( UINT32 n )
  23. {
  24. UINT32 t;
  25. UINT32 ret = 0;
  26. #define SQRT_CORE(x) \
  27. t = ret + (1UL<<(x)); \
  28. ret >>= 1; \
  29. if ( n >= t ) \
  30. { \
  31. n -= t; \
  32. ret += (1UL<<(x)); \
  33. }
  34. SQRT_CORE(30);
  35. SQRT_CORE(28);
  36. SQRT_CORE(26);
  37. SQRT_CORE(24);
  38. SQRT_CORE(22);
  39. SQRT_CORE(20);
  40. SQRT_CORE(18);
  41. SQRT_CORE(16);
  42. SQRT_CORE(14);
  43. SQRT_CORE(12);
  44. SQRT_CORE(10);
  45. SQRT_CORE(8);
  46. SQRT_CORE(6);
  47. SQRT_CORE(4);
  48. SQRT_CORE(2);
  49. SQRT_CORE(0);
  50. #undef SQRT_CORE
  51. return ret;
  52. }
  53. const char * GetMin01( UINT32 n )
  54. {
  55. char * p = szResult;
  56. UINT32 i=0, j=0, d, ten, bits, index, size0, size1;
  57. UINT32 key, SqrtN;
  58. UINT32 value;
  59. NODE info;
  60. NODE_VECTOR vNode;
  61. KEY_INDEX_MAP mKeyIndex;
  62. KEY_INDEX_MAP::iterator it;
  63. *( p += 127 ) = '\0';
  64. // if ( 0 == n ) return p;
  65. while ( 0 == ( 1 & n ))
  66. {
  67. ++i; //times of prime 2
  68. n >>= 1;
  69. }
  70. while ( 0 == n % 5 )
  71. {
  72. ++j; //times of prime 5
  73. n /= 5;
  74. }
  75. if ( i < j ) i = j; //max times of prime 2 and prime 5
  76. memset( p -= i, '0', i * sizeof(char));
  77. if ( 1 == n )
  78. {
  79. *(--p) = '1';
  80. return p;
  81. }
  82. //special case that n+1 is 10^j
  83. i = n + 1;
  84. j = 0;
  85. while ( 0 == i % 10 )
  86. {
  87. ++j;
  88. i /= 10;
  89. }
  90. if ( 1 == i )
  91. {
  92. j *= 9;
  93. memset( p -= j, '1', j * sizeof(char));
  94. return p;
  95. }
  96. switch( n )
  97. {
  98. case 3:
  99. case 37: n = 111;
  100. break;
  101. case 7:
  102. case 13: n = 1001;
  103. break;
  104. case 337: n = 1011;
  105. break;
  106. case 367: n = 1101;
  107. break;
  108. default:
  109. break;
  110. }
  111. i = n;
  112. j = 0;
  113. while (( d = i % 10 ) <= 1 )
  114. {
  115. *(--p) = '0' + d;
  116. if ( 0 == ( i /= 10 ))
  117. {
  118. return p;
  119. }
  120. ++j;
  121. }
  122. p += j;
  123. const UINT32 INIT4BITS[] = { 0, 1, 10, 11, 100, 101, 110, 111, 1000, 1001, 1010, 1011, 1100, 1101, 1110, 1111, };
  124. for ( i=0; 16!=i; ++i )
  125. {
  126. info.key = INIT4BITS[i] % n;
  127. info.value = i;
  128. vNode.push_back( info );
  129. mKeyIndex.insert( MVT( info.key, i ));
  130. }
  131. index = i - 1;
  132. SqrtN = Sqrt( n );
  133. for ( bits=4, ten=10000%n, size1=size0=vNode.size(); ; )
  134. {
  135. for ( i=1; size0!=i; ++i )
  136. {
  137. d = (UINT32)( UInt32x32To64( vNode[i].key, ten ) % n );
  138. if ( mKeyIndex.end() != ( it = mKeyIndex.find( n - d )))
  139. {
  140. value = vNode[(*it).second].value;
  141. do
  142. {
  143. *(--p) = '0' + ( 1 & value );
  144. value >>= 1;
  145. } while( 0 != --bits );
  146. value = vNode[i].value;
  147. do
  148. {
  149. *(--p) = '0' + ( 1 & value );
  150. } while( 0 != ( value >>= 1 ));
  151. return p;
  152. }
  153. }
  154. for ( i=1; size1!=i; ++i )
  155. {
  156. d = (UINT32)( UInt32x32To64( vNode[i].key, ten ) % n );
  157. value = vNode[i].value << bits;
  158. for ( j=0; size0!=j; ++j )
  159. {
  160. key = vNode[j].key + d;
  161. if ( key < d || key >= n )
  162. {
  163. key -= n;
  164. }
  165. if ( mKeyIndex.end() == mKeyIndex.find( key ))
  166. {
  167. mKeyIndex.insert( MVT( info.key = key, ++index ));
  168. info.value = vNode[j].value | value;
  169. vNode.push_back( info );
  170. }
  171. }
  172. }
  173. if ( size1 == size0 )
  174. {
  175. bits <<= 1;
  176. ten = (UINT32)( UInt32x32To64( ten, ten ) % n );
  177. }
  178. else
  179. {
  180. ++bits;
  181. ten = (UINT32)( UInt32x32To64( ten, 10 ) % n );
  182. }
  183. size0 = vNode.size();
  184. if ( size0 <= SqrtN /*&& bits < 16*/ )
  185. {
  186. size1 = size0;
  187. }
  188. else
  189. {
  190. size1 = 2;
  191. }
  192. }
  193. return p;
  194. }
  195. int main( void )
  196. {
  197. UINT32 n;
  198. const char * p;
  199. while( 1 )
  200. {
  201. cout << "N=";
  202. cin >> n;
  203. if( 0 == n )
  204. {
  205. break;
  206. }
  207. clock_t start=clock();
  208. p = GetMin01( n );
  209. clock_t end=clock();
  210. cout << p << "\n";
  211. cout<< "Total cost " << end-start << "ms" << "\n" << endl;
  212. }
  213. return 0;
  214. }
复制代码
Change list: 1、新增整数开方函数; 2、将 value 由 UINT64 修改成 UINT32
毋因群疑而阻独见  毋任己意而废人言
毋私小惠而伤大体  毋借公论以快私情
发表于 2009-8-16 17:22:05 | 显示全部楼层
呃 那个函数代码行数有点大哦
毋因群疑而阻独见  毋任己意而废人言
毋私小惠而伤大体  毋借公论以快私情
发表于 2009-8-17 07:48:20 | 显示全部楼层
运行大量数据的统计结果,11#的程序速度并没有优势. 我这里运行时比如数据2230767,2384613,2389761都比较慢
毋因群疑而阻独见  毋任己意而废人言
毋私小惠而伤大体  毋借公论以快私情
 楼主| 发表于 2009-8-17 08:23:17 | 显示全部楼层
很奇怪,在我这里结论是相反的: 2#的测试结果如下:
  1. N=2230767
  2. 1000001010001010101110111110111110111111101
  3. Total cost 1418ms
  4. N=2384613
  5. 1000001010001010111110111111101110101110111
  6. Total cost 1637ms
  7. N=2389761
  8. 1000011101111111110111111011111111111111111
  9. Total cost 1606ms
  10. N=4294967291
  11. 1011101001001001001010000111001
  12. Total cost 312ms
  13. N=4294967279
  14. 1111011110011001110000101001001001
  15. Total cost 407ms
  16. N=4294967231
  17. 11111001101001000000010011010111
  18. Total cost 437ms
  19. N=899999999
  20. 1110010111111110000001111011010101101010101
  21. Total cost 2296ms
  22. N=
复制代码
我的则为:
  1. N=2230767
  2. 1000001010001010101110111110111110111111101
  3. Total cost 702ms
  4. N=2384613
  5. 1000001010001010111110111111101110101110111
  6. Total cost 779ms
  7. N=2389761
  8. 1000011101111111110111111011111111111111111
  9. Total cost 779ms
  10. N=4294967291
  11. 1011101001001001001010000111001
  12. Total cost 62ms
  13. N=4294967279
  14. 1111011110011001110000101001001001
  15. Total cost 234ms
  16. N=4294967231
  17. 11111001101001000000010011010111
  18. Total cost 93ms
  19. N=899999999
  20. 1110010111111110000001111011010101101010101
  21. Total cost 750ms
  22. N=
复制代码
我现在最新的代码如下(删除了开方函数,消减了初始化数量,新增了快速除5、除10的子函数):
  1. // http://bbs.emath.ac.cn/viewthread.php?tid=1689&page=2&fromuid=8#pid21101
  2. #include <iostream>
  3. #include <ctime>
  4. #include <vector>
  5. #include <map>
  6. using namespace std;
  7. typedef unsigned int UINT32, *PUINT32;
  8. typedef unsigned __int64 UINT64, *PUINT64;
  9. typedef struct node
  10. {
  11. UINT32 key;
  12. UINT32 value;
  13. }NODE;
  14. typedef vector< NODE > NODE_VECTOR;
  15. typedef map< UINT32, UINT32 > KEY_INDEX_MAP;
  16. typedef KEY_INDEX_MAP::value_type MVT;
  17. #ifndef UInt32x32To64
  18. # define UInt32x32To64(a, b) ((UINT64)(((UINT64)(a)) * ((UINT64)(b))))
  19. #endif
  20. char szResult[128];
  21. __declspec(naked)
  22. UINT32 DivMod5( UINT32& n )
  23. {
  24. __asm
  25. {
  26. mov ecx, dword ptr[esp + 0x04];
  27. mov edx, dword ptr[ecx];
  28. mov eax, 0xCCCCCCCD;
  29. push edx;
  30. mul edx;
  31. shr edx, 2;
  32. mov dword ptr[ecx], edx;
  33. lea edx, [edx + 0x04*edx];
  34. pop eax;
  35. sub eax, edx;
  36. ret;
  37. }
  38. }
  39. __declspec(naked)
  40. UINT32 DivMod10( UINT32& n )
  41. {
  42. __asm
  43. {
  44. mov ecx, dword ptr[esp + 0x04];
  45. mov edx, dword ptr[ecx];
  46. mov eax, 0xCCCCCCCD;
  47. push edx;
  48. mul edx;
  49. shr edx, 3;
  50. mov dword ptr[ecx], edx;
  51. imul edx, 10;
  52. pop eax;
  53. sub eax, edx;
  54. ret;
  55. }
  56. }
  57. const char * GetMin01( UINT32 n )
  58. {
  59. char * p = szResult;
  60. UINT32 i=0, j=0, d, ten, bits, index, size0, size1;
  61. UINT32 key;
  62. UINT32 value;
  63. NODE info;
  64. NODE_VECTOR vNode;
  65. KEY_INDEX_MAP mKeyIndex;
  66. KEY_INDEX_MAP::iterator it;
  67. *( p += 127 ) = '\0';
  68. // if ( 0 == n ) return p;
  69. while ( 0 == ( 1 & n ))
  70. {
  71. ++i; //times of prime 2
  72. n >>= 1;
  73. }
  74. while ( 0 == ( d = DivMod5( n )))
  75. {
  76. ++j; //times of prime 5
  77. }
  78. n = n * 5 + d;
  79. if ( i < j ) i = j; //max times of prime 2 and prime 5
  80. memset( p -= i, '0', i * sizeof(char));
  81. if ( 1 == n )
  82. {
  83. *(--p) = '1';
  84. return p;
  85. }
  86. //special case that n+1 is 10^j
  87. i = n + 1;
  88. j = 0;
  89. while ( 0 == ( d = DivMod10( i )))
  90. {
  91. ++j;
  92. }
  93. if ( 0 == i && 1 == d )
  94. {
  95. j *= 9;
  96. memset( p -= j, '1', j * sizeof(char));
  97. return p;
  98. }
  99. i = n;
  100. j = 0;
  101. while (( d = DivMod10( i )) <= 1 )
  102. {
  103. *(--p) = '0' + d;
  104. if ( 0 == i )
  105. {
  106. return p;
  107. }
  108. ++j;
  109. }
  110. p += j;
  111. info.value = info.key = 0;
  112. vNode.push_back( info );
  113. mKeyIndex.insert( MVT( info.key, index = 0 ));
  114. info.value = info.key = 1;
  115. vNode.push_back( info );
  116. mKeyIndex.insert( MVT( info.key, ++index ));
  117. for ( bits=1, ten=10%n, size1=size0=vNode.size(); ; )
  118. {
  119. for ( i=1; size0!=i; ++i )
  120. {
  121. d = (UINT32)( UInt32x32To64( vNode[i].key, ten ) % n );
  122. if ( mKeyIndex.end() != ( it = mKeyIndex.find( n - d )))
  123. {
  124. value = vNode[(*it).second].value;
  125. do
  126. {
  127. *(--p) = '0' + ( 1 & value );
  128. value >>= 1;
  129. } while( 0 != --bits );
  130. value = vNode[i].value;
  131. do
  132. {
  133. *(--p) = '0' + ( 1 & value );
  134. } while( 0 != ( value >>= 1 ));
  135. return p;
  136. }
  137. }
  138. for ( i=1; size1!=i; ++i )
  139. {
  140. d = (UINT32)( UInt32x32To64( vNode[i].key, ten ) % n );
  141. value = vNode[i].value << bits;
  142. for ( j=0; size0!=j; ++j )
  143. {
  144. key = vNode[j].key + d;
  145. if ( key < d || key >= n )
  146. {
  147. key -= n;
  148. }
  149. if ( mKeyIndex.end() == mKeyIndex.find( key ))
  150. {
  151. mKeyIndex.insert( MVT( info.key = key, ++index ));
  152. info.value = vNode[j].value | value;
  153. vNode.push_back( info );
  154. }
  155. }
  156. }
  157. if ( size1 == size0 )
  158. {
  159. bits <<= 1;
  160. ten = (UINT32)( UInt32x32To64( ten, ten ) % n );
  161. }
  162. else
  163. {
  164. ++bits;
  165. ten = (UINT32)( UInt32x32To64( ten, 10 ) % n );
  166. }
  167. size0 = vNode.size();
  168. if ( bits < 16 )
  169. {
  170. size1 = size0;
  171. }
  172. else
  173. {
  174. size1 = 2;
  175. }
  176. }
  177. return p;
  178. }
  179. int main( void )
  180. {
  181. UINT32 n;
  182. const char * p;
  183. while( 1 )
  184. {
  185. cout << "N=";
  186. cin >> n;
  187. if( 0 == n )
  188. {
  189. break;
  190. }
  191. clock_t start=clock();
  192. p = GetMin01( n );
  193. clock_t end=clock();
  194. cout << p << "\n";
  195. cout<< "Total cost " << end-start << "ms" << "\n" << endl;
  196. }
  197. return 0;
  198. }
复制代码
这是用VC2008编译的版本: search_pid21021.exe (155.5 KB, 下载次数: 4) search_pid21101.exe (149 KB, 下载次数: 5)
毋因群疑而阻独见  毋任己意而废人言
毋私小惠而伤大体  毋借公论以快私情
发表于 2009-8-17 08:57:59 | 显示全部楼层
应该U不同的原因 毕竟小肚子的机器忒好了 而且是正宗的Intel机器 郭的程序受缓存,指令周期的影响太大
毋因群疑而阻独见  毋任己意而废人言
毋私小惠而伤大体  毋借公论以快私情
发表于 2009-8-17 09:01:09 | 显示全部楼层
另外,哦只是另外 我没仔细考虑过 单纯用堆栈使用迭代的浮点运算 从头部计算是否能避免一些复杂数据结构的冗余带来的性能降低?
毋因群疑而阻独见  毋任己意而废人言
毋私小惠而伤大体  毋借公论以快私情
 楼主| 发表于 2009-8-17 09:10:15 | 显示全部楼层
我在家里的CPU也是intel的,不如mathe的好那是肯定的了。 无心人可以下载 14# 编译好的程序运行对比一下。 另外,这个问题如何用浮点解决?
毋因群疑而阻独见  毋任己意而废人言
毋私小惠而伤大体  毋借公论以快私情
 楼主| 发表于 2009-8-17 09:24:01 | 显示全部楼层
mathe 的程序强大在输入不局限32位内,可以适当超出一些。 我的估计内存耗用会少些(自我感觉,不敢确定),因为我是用32位存储的。
毋因群疑而阻独见  毋任己意而废人言
毋私小惠而伤大体  毋借公论以快私情
发表于 2009-8-17 09:55:28 | 显示全部楼层
呵呵,我没有说你的程序慢,只是觉得性能基本相当. 还有我没有测试单个数据的速度区别(只是用gxqcn的程序找出几个比较慢的数据,在我的机器上500ms左右) 不过我的测量也不是完全准确,因为我的程序里面很多对最后结果转换部分删除了(我只统计每个结果的长度,而不管结果本身,所以可以删除部分代码).但是我觉得这部分代码对总体运行时间影响不大,所以就直接比较了. 而机器的区别在于我的机器内存缓存会比较好,所以对内存的访问速度要好很多
毋因群疑而阻独见  毋任己意而废人言
毋私小惠而伤大体  毋借公论以快私情
发表于 2009-8-17 09:57:02 | 显示全部楼层
我那个程序输入如果超过32位,还是需要修改一下,提供一个64比特整数乘64比特整数然后模一个64比特整数的函数,现在的程序结果会错的.但是可以大概用来评估一下计算超过32位时数据的速度.
毋因群疑而阻独见  毋任己意而废人言
毋私小惠而伤大体  毋借公论以快私情
您需要登录后才可以回帖 登录 | 欢迎注册

本版积分规则

小黑屋|手机版|数学研发网 ( 苏ICP备07505100号 )

GMT+8, 2024-11-22 01:21 , Processed in 0.029335 second(s), 17 queries .

Powered by Discuz! X3.5

© 2001-2024 Discuz! Team.

快速回复 返回顶部 返回列表