找回密码
 欢迎注册
楼主: 无心人

[讨论] 10秒内计算出1亿素数表, 不输出

[复制链接]
发表于 2008-3-13 15:56:24 | 显示全部楼层
//jjww 说不清, 这个是没有太多优化的java版本,采用的是分段筛选法 // 缓存大小可以自己调节,暂时不考虑输出, 运行PI(2^31) 耗时 6s // 上面的附件是 C++多线程版本, 也是筛选法(不只是统计个数)。 // 其实不喜欢用java些, 运行命令 // java PrimeNumber (单线程,不输出) // java PrimeNumber 2 (多线程) // java PrimeNumber 2 2 (输出素数, 超级慢) public class PrimeNumber { static final int FACT = 255255; //3 * 5 * 7 * 11 * 13 * 17 = 255255 static final int SP = 7; static final int BLOCKSIZE = FACT << 2; static final int MOVE = 5; static final int MASK = (1 << (MOVE - 1)) - 1; static final int MAXN = 1 << 30; static final int MAXM = ((BLOCKSIZE >> MOVE) + 2); static final int MAX_THREADS = 4; static final byte[] multp = { 6, 4, 2, 4, 2, 4, 6, 2 }; static boolean printout = false; static int[] prime = new int[4794]; static char[] BaseTpl = new char[MAXM]; static byte[] bits = new byte[1 << (MASK + 1)]; static byte[] tablep30 = new byte[32]; static char MASKN(int n) { return (char)(1 << ((n >> 1) & MASK)); } static int sieve( ) { int i, pnums = 1; int QMAXN = (int)Math.sqrt((double)MAXN) + 20; prime[0] = 2; for (i = 3; i < QMAXN; i += 2){ if ( (BaseTpl[i >> MOVE] & MASKN(i)) == 0 ){ prime[pnums++] = i; for (int p = i * i; p < QMAXN && p > 0; p += i << 1) BaseTpl[p >> MOVE] |= MASKN(p); } } for (i = 0; i < MAXM; i++) BaseTpl[i] = (char)0; for (i = 1; i < SP; i++){ for (int p = prime[i]; p < BLOCKSIZE; p += prime[i] << 1) BaseTpl[p >> MOVE] |= MASKN(p); } return pnums; } static int getBlocks(int start, int len) { int next, pnums = 0; int maxp = (int)Math.sqrt((float)start + len) + 1; char[] block = new char[MAXM]; for (int i = 0; i < (len >> MOVE) + 1; ++i) block[i] = BaseTpl[i]; block[len >> MOVE] |= (char)~(MASKN(len) - 1); for (int i = SP, p = prime[SP]; p < maxp; p = prime[++i]){ if (start > 0){ next = (p - (start - 1) % p) - 1; if ((next & 1) == 0) next += p; } else next = p * p; /* int[] buf = {1, 2, 3, 4, 5, 6, 7, 8}; int mrid = ((start + next) / p) % 30; for (int k = 0; k < 8; k++) buf[k] = p * multp[k]; while (tablep30[mrid] == 0){ next += p << 1; mrid = (mrid + 2) % 30; } int st = (tablep30[mrid] + 6) & 7; //for (; next < len; next += buf[st = nexti[st]]) for (; next < len; next += buf[++st & 7]) block[next >> MOVE] |= (char)(1 << ((next >> 1) & MASK)); */ int mrid = (start + next) / p; if ((mrid %= 6) == 3){ next += 2 * p; mrid += 2; } int nex1 = next + ((mrid == 1) ? (p << 2) : (p << 1)); p *= 6; for (; nex1 < len; nex1 += p, next += p){ block[next >> MOVE] |= (char)(1 << ((next >> 1) & 15)); block[nex1 >> MOVE] |= (char)(1 << ((nex1 >> 1) & 15)); } //for (; next < len; next += p) if (next < len) block[next >> MOVE] |= MASKN(next); } pnums += outPrint(block, start, len); //too slow return pnums; } static int outPrint(char mask[], int start, int len) { int pnums = 0; if (printout == false){ len >>= MOVE; for (int k = 0; k <= len; k++) pnums += bits[mask[k]]; } else{ for (int i = 1; i <= len; i += 2){ if ((mask[i >> 5] & MASKN(i)) == 0){ System.out.println(start + i); pnums ++; } } } return pnums; } static int[] table = new int[MAXN / BLOCKSIZE + 1]; static int getPrimes(int beg, int end) { int pnums = 0; int _end = end; if (beg < 2) beg = 2; for (int j = SP - 1; j >= 0 && beg <= prime[j]; j--){ if (end >= prime[j]) pnums++; } // int size = end % BLOCKSIZE; // if (size != 0) // pnums += getBlocks(end - size, size + 1); int size = beg % BLOCKSIZE; if (size != 0) pnums -= getBlocks(beg - size, size); beg /= BLOCKSIZE; end /= BLOCKSIZE; for (int i = beg; i < end; i++){ if (table[i] == 0) table[i] = getBlocks(i * BLOCKSIZE, BLOCKSIZE); pnums += table[i]; } if (_end % BLOCKSIZE != 0) pnums += getBlocks(end * BLOCKSIZE, _end % BLOCKSIZE + 1); return pnums; } private static int multiThread() { int i; int nThread = MAX_THREADS; int totalCount = 0; Worker[] works = new Worker[nThread]; for (i = 0; i < nThread; ++i){ works[i] = new Worker(); works[i].setStart(MAXN / nThread * i); works[i].setEnd(MAXN / nThread * (i + 1)); } works[nThread - 1].setEnd(MAXN); Thread[] threads = new Thread[nThread]; for (i = 0; i < nThread; ++i){ threads[i] = new Thread(works[i]); threads[i].start(); } try { for (i = 0; i < nThread; ++i){ threads[i].join(); totalCount += works[i].getCount(); } } catch (Exception e) { } return totalCount; } static class Worker implements Runnable { int count, start, end; public void run(){ count = getPrimes(start, end); System.out.println("Java : PI[" + start + ", " + (end) + "]: primes = " + count); } public void setStart(int n) { start = n; } public void setEnd(int n) { end = n; } public int getCount() { return count; } }; public static void main(String[] args) { long start = System.currentTimeMillis(); int i, totalCount; sieve( ); byte[] pri30 = { 1, 7, 11, 13, 17, 19, 23, 29 }; for (i = 0; i < 8; i++) tablep30[pri30[i] % 30] = (byte)(i + 1); for (i = 1; i < bits.length; i++) bits[i] = (byte)(bits[i >> 1] + (i & 1)); for (i = 0; i < bits.length; i++) bits[i] = (byte)( (1 << ( MOVE - 1)) - bits[i]); if (args.length == 1) totalCount = multiThread(); else{ if (args.length == 2) printout = true; totalCount = getPrimes(0, MAXN); } System.out.println("PI[0, " + (MAXN) + "]: primes = " + totalCount + ", time use " + (System.currentTimeMillis() - start) + " ms"); //for test for (i = 2; i < 31; i++){ start = System.currentTimeMillis(); totalCount = getPrimes(0, 1 << i); start = System.currentTimeMillis() - start; System.out.println("PI(" + (1 << i) + ") = " + totalCount); System.out.println("It take " + start + " ms"); } } }
毋因群疑而阻独见  毋任己意而废人言
毋私小惠而伤大体  毋借公论以快私情
发表于 2008-3-13 15:59:58 | 显示全部楼层
下面给出一段代码,算法来源于http://www.cnblogs.com/suno/archive/2008/02/04/1064368.html。 该程序的缺点是需要较大的内存,需要一个 名为 notp 的buff, 需要内存为 sizeof(char)*n, 一亿的话需要100M, 若想进一步所想空间,可采用位运算,那样就只需 sizeof(char)*n/8, 即使求 2^32 也只需500M内存。 下面给出完整的代码。
  1. #include "stdafx.h"
  2. #include "memory.h"
  3. #include "math.h"
  4. #include "assert.h"
  5. #include "windows.h"
  6. typedef unsigned long DWORD;
  7. // 线形时间筛素数,代码来源: http://www.cnblogs.com/suno/archive/2008/02/04/1064368.html
  8. // 为了适应本贴需求,略作修改
  9. //估计n以内素数的个数
  10. DWORD pi(DWORD n)
  11. {
  12. double f= (double)n /log(n) * 1.20 + 32.00;
  13. return (DWORD)f;
  14. }
  15. DWORD getprime(DWORD mr )
  16. {
  17. DWORD pn;
  18. DWORD count=pi(mr);
  19. char *notp= new char[mr+1];
  20. DWORD *primeTab= new DWORD[count];
  21. if (primeTab==NULL || notp==NULL)
  22. {
  23. printf("No enogh memory, fail!!!");
  24. return 0;
  25. }
  26. memset(notp,0,mr+1);
  27. pn=0;
  28. for(DWORD i=2;i<mr;i++)
  29. {
  30. if(!notp[i])
  31. {
  32. primeTab[pn++]=i;
  33. assert( pn<count-1);
  34. }
  35. for(DWORD j=0;j<pn && primeTab[j]*i<mr;j++)
  36. {
  37. notp[primeTab[j]*i]=1;
  38. if(i % primeTab[j]==0)
  39. break;
  40. }
  41. }
  42. delete[] notp;
  43. delete[] primeTab;
  44. return pn;
  45. }
  46. int main(int argc, char* argv[])
  47. {
  48. DWORD n;
  49. DWORD c;
  50. DWORD t;
  51. while (true)
  52. {
  53. printf("n=?");
  54. scanf("%u",&n);
  55. if (n<2)
  56. break;
  57. t=GetTickCount();
  58. c= getprime(n);
  59. if (c==0)
  60. break;
  61. t=GetTickCount()-t;
  62. printf("pi(%u)=%u\n",n,c);
  63. printf("It take %u ms\n",t);
  64. }
  65. return 0;
  66. }
复制代码
毋因群疑而阻独见  毋任己意而废人言
毋私小惠而伤大体  毋借公论以快私情
发表于 2008-3-13 16:04:36 | 显示全部楼层
12# 楼运行结果:PIV 2.6G, windows 2K xp, 768M RAM n=?100 pi(100)=25 It take 0 ms n=?1000 pi(1000)=168 It take 0 ms n=?10000 pi(10000)=1229 It take 0 ms n=?100000 pi(100000)=9592 It take 0 ms n=?1000000 pi(1000000)=78498 It take 31 ms n=?10000000 pi(10000000)=664579 It take 390 ms n=?100000000 pi(100000000)=5761455 It take 4171 ms
毋因群疑而阻独见  毋任己意而废人言
毋私小惠而伤大体  毋借公论以快私情
发表于 2008-3-13 16:27:04 | 显示全部楼层
修改为bit数组,内存节省许多,速度也快了些。
  1. DWORD getprime(DWORD mr )
  2. {
  3. DWORD pn;
  4. DWORD count=pi(mr);
  5. unsigned char maskArray[]=
  6. {1,2,4,8,16,32,64,128};
  7. char *notp= new char[mr/8+2];
  8. DWORD *primeTab= new DWORD[count];
  9. if (primeTab==NULL || notp==NULL)
  10. {
  11. printf("No enogh memory, fail!!!");
  12. return 0;
  13. }
  14. memset(notp,0,mr/8+2);
  15. pn=0;
  16. for(DWORD i=2;i<mr;i++)
  17. {
  18. if( ! (notp[ i>>3 ] & maskArray[i&7]) )
  19. {
  20. primeTab[pn++]=i;
  21. //assert( pn<count-1);
  22. }
  23. for(DWORD j=0;j<pn && primeTab[j]*i<mr;j++)
  24. {
  25. DWORD t= primeTab[j]*i;
  26. notp[ t >>3 ] |= maskArray[t&7];
  27. if(i % primeTab[j]==0)
  28. break;
  29. }
  30. }
  31. delete[] notp;
  32. delete[] primeTab;
  33. return pn;
  34. }
复制代码
毋因群疑而阻独见  毋任己意而废人言
毋私小惠而伤大体  毋借公论以快私情
发表于 2008-3-13 17:38:02 | 显示全部楼层
根据11楼程序得到的结果, 程序消耗内存为常量 PI[0, 1073741824]: primes = 54400028, time use 1398 ms PI(4) = 2 It take 0 ms PI(8) = 4 It take 0 ms PI(16) = 6 It take 0 ms PI(32) = 11 It take 0 ms PI(64) = 18 It take 0 ms PI(128) = 31 It take 0 ms PI(256) = 54 It take 0 ms PI(512) = 97 It take 0 ms PI(1024) = 172 It take 0 ms PI(2048) = 309 It take 0 ms PI(4096) = 564 It take 0 ms PI(8192) = 1028 It take 0 ms PI(16384) = 1900 It take 0 ms PI(32768) = 3512 It take 0 ms PI(65536) = 6542 It take 0 ms PI(131072) = 12251 It take 0 ms PI(262144) = 23000 It take 0 ms PI(524288) = 43390 It take 0 ms PI(1048576) = 82025 It take 0 ms PI(2097152) = 155611 It take 0 ms PI(4194304) = 295947 It take 0 ms PI(8388608) = 564163 It take 0 ms PI(16777216) = 1077871 It take 0 ms PI(33554432) = 2063689 It take 0 ms PI(67108864) = 3957809 It take 0 ms PI(134217728) = 7603553 It take 0 ms PI(268435456) = 14630843 It take 0 ms PI(536870912) = 28192750 It take 0 ms PI(1073741824) = 54400028 It take 0 ms
毋因群疑而阻独见  毋任己意而废人言
毋私小惠而伤大体  毋借公论以快私情
发表于 2008-3-13 17:59:03 | 显示全部楼层
在生成素数时,常常需要预先估计 x以内 质数的个数,然后分配内存空间,如果不需要太精确,下面给出一个估计式,通过测试得到. PI(x) :x以内素数的个数。 PI(x) < x /log(x) * 10/9 +32
毋因群疑而阻独见  毋任己意而废人言
毋私小惠而伤大体  毋借公论以快私情
发表于 2008-3-13 18:02:09 | 显示全部楼层
最顶楼并不是想求PI(x), 他想看看筛法效率, PI(10^24) 通过分布式以经被算出 但用筛法目前只算到 10^18
毋因群疑而阻独见  毋任己意而废人言
毋私小惠而伤大体  毋借公论以快私情
发表于 2008-3-16 22:29:11 | 显示全部楼层
把我以前的程序翻出来,没有重新编译过,还是几年前的版本 Input a number [2-4294967295]: 100000000 Number range [2-99999999]: Total have 5761455 prime(s). Used time: 0.12979269 second(s). Input a number [2-4294967295]: 1073741824 Number range [2-1073741823]: Total have 54400028 prime(s). Used time: 1.79648500 second(s). 不只是统计素数的个数,和tprime的一样,有计算出标志位,筛法,P4 3.0G
毋因群疑而阻独见  毋任己意而废人言
毋私小惠而伤大体  毋借公论以快私情
发表于 2008-3-17 08:27:00 | 显示全部楼层
呵呵,楼上的程序不会是当年CSDN上算100亿以内素数用的吧。
毋因群疑而阻独见  毋任己意而废人言
毋私小惠而伤大体  毋借公论以快私情
发表于 2008-3-17 10:35:05 | 显示全部楼层

回复 18# 的帖子

你的算法很快,和我的(PD 3.2G) C++单线程版本1差不多 PI[1073741823] : primes = 54400028, sieve v1 time use 1624 ms C++单线程版本2 PI[1073741823] : primes = 54400028, sieve v2 time use 1171 ms
毋因群疑而阻独见  毋任己意而废人言
毋私小惠而伤大体  毋借公论以快私情
您需要登录后才可以回帖 登录 | 欢迎注册

本版积分规则

小黑屋|手机版|数学研发网 ( 苏ICP备07505100号 )

GMT+8, 2024-12-4 01:33 , Processed in 0.025440 second(s), 15 queries .

Powered by Discuz! X3.5

© 2001-2024 Discuz! Team.

快速回复 返回顶部 返回列表