// 缓存大小可以自己调节,暂时不考虑输出, 运行PI(2^31) 耗时 6s
// 上面的附件是 C++多线程版本, 也是筛选法(不只是统计个数)。
// 其实不喜欢用java些, 运行命令
// java PrimeNumber (单线程,不输出)
// java PrimeNumber 2 (多线程)
// java PrimeNumber 2 2 (输出素数, 超级慢)
public class PrimeNumber
{
static final int FACT = 255255; //3 * 5 * 7 * 11 * 13 * 17 = 255255
static final int SP = 7;
static final int BLOCKSIZE = FACT << 2;
static final int MOVE = 5;
static final int MASK = (1 << (MOVE - 1)) - 1;
static final int MAXN = 1 << 30;
static final int MAXM = ((BLOCKSIZE >> MOVE) + 2);
static final int MAX_THREADS = 4;
static final byte[] multp = { 6, 4, 2, 4, 2, 4, 6, 2 };
static boolean printout = false;
static int[] prime = new int;
static char[] BaseTpl = new char;
static byte[] bits = new byte;
static byte[] tablep30 = new byte;
static char MASKN(int n)
{
return (char)(1 << ((n >> 1) & MASK));
}
static int sieve( )
{
int i, pnums = 1;
int QMAXN = (int)Math.sqrt((double)MAXN) + 20;
prime = 2;
for (i = 3; i < QMAXN; i += 2){
if ( (BaseTpl & MASKN(i)) == 0 ){
prime = i;
for (int p = i * i; p < QMAXN && p > 0; p += i << 1)
BaseTpl |= MASKN(p);
}
}
for (i = 0; i < MAXM; i++)
BaseTpl = (char)0;
for (i = 1; i < SP; i++){
for (int p = prime; p < BLOCKSIZE; p += prime << 1)
BaseTpl |= MASKN(p);
}
return pnums;
}
static int getBlocks(int start, int len)
{
int next, pnums = 0;
int maxp = (int)Math.sqrt((float)start + len) + 1;
char[] block = new char;
for (int i = 0; i < (len >> MOVE) + 1; ++i)
block = BaseTpl;
block |= (char)~(MASKN(len) - 1);
for (int i = SP, p = prime; p < maxp; p = prime[++i]){
if (start > 0){
next = (p - (start - 1) % p) - 1;
if ((next & 1) == 0)
next += p;
}
else
next = p * p;
/*
int[] buf = {1, 2, 3, 4, 5, 6, 7, 8};
int mrid = ((start + next) / p) % 30;
for (int k = 0; k < 8; k++)
buf = p * multp;
while (tablep30 == 0){
next += p << 1;
mrid = (mrid + 2) % 30;
}
int st = (tablep30 + 6) & 7;
//for (; next < len; next += buf])
for (; next < len; next += buf[++st & 7])
block |=(char)(1 << ((next >> 1) & MASK));
*/
int mrid = (start + next) / p;
if ((mrid %= 6) == 3){
next += 2 * p;
mrid += 2;
}
int nex1 = next + ((mrid == 1) ? (p << 2) : (p << 1));
p *= 6;
for (; nex1 < len; nex1 += p, next += p){
block |= (char)(1 << ((next >> 1) & 15));
block |= (char)(1 << ((nex1 >> 1) & 15));
}
//for (; next < len; next += p)
if (next < len)
block |= MASKN(next);
}
pnums += outPrint(block, start, len); //too slow
return pnums;
}
static int outPrint(char mask[], int start, int len)
{
int pnums = 0;
if (printout == false){
len >>= MOVE;
for (int k = 0; k <= len; k++)
pnums += bits];
}
else{
for (int i = 1; i <= len; i += 2){
if ((mask & MASKN(i)) == 0){
System.out.println(start + i);
pnums ++;
}
}
}
return pnums;
}
static int[] table = new int;
static int getPrimes(int beg, int end)
{
int pnums = 0;
int _end = end;
if (beg < 2)
beg = 2;
for (int j = SP - 1; j >= 0 && beg <= prime; j--){
if (end >= prime)
pnums++;
}
// int size = end % BLOCKSIZE;
// if (size != 0)
// pnums += getBlocks(end - size, size + 1);
int size = beg % BLOCKSIZE;
if (size != 0)
pnums -= getBlocks(beg - size, size);
beg /= BLOCKSIZE;
end /= BLOCKSIZE;
for (int i = beg; i < end; i++){
if (table == 0)
table = getBlocks(i * BLOCKSIZE, BLOCKSIZE);
pnums += table;
}
if (_end % BLOCKSIZE != 0)
pnums += getBlocks(end * BLOCKSIZE, _end % BLOCKSIZE + 1);
return pnums;
}
private static int multiThread()
{
int i;
int nThread = MAX_THREADS;
int totalCount = 0;
Worker[] works = new Worker;
for (i = 0; i < nThread; ++i){
works = new Worker();
works.setStart(MAXN / nThread * i);
works.setEnd(MAXN / nThread * (i + 1));
}
works.setEnd(MAXN);
Thread[] threads = new Thread;
for (i = 0; i < nThread; ++i){
threads = new Thread(works);
threads.start();
}
try
{
for (i = 0; i < nThread; ++i){
threads.join();
totalCount += works.getCount();
}
}
catch (Exception e) { }
return totalCount;
}
static class Worker implements Runnable
{
int count, start, end;
public void run(){
count = getPrimes(start, end);
System.out.println("Java : PI[" + start + ", " + (end) + "]:primes = " + count);
}
public void setStart(int n) { start = n; }
public void setEnd(int n) { end = n; }
public int getCount() { return count; }
};
public static void main(String[] args)
{
long start = System.currentTimeMillis();
int i, totalCount;
sieve( );
byte[] pri30 = { 1, 7, 11, 13, 17, 19, 23, 29 };
for (i = 0; i < 8; i++)
tablep30 % 30] = (byte)(i + 1);
for (i = 1; i < bits.length; i++)
bits = (byte)(bits + (i & 1));
for (i = 0; i < bits.length; i++)
bits = (byte)( (1 << ( MOVE -1))- bits);
if (args.length == 1)
totalCount = multiThread();
else{
if (args.length == 2)
printout = true;
totalCount = getPrimes(0, MAXN);
}
System.out.println("PI:primes = " + totalCount + ", time use " + (System.currentTimeMillis() - start) + " ms");
//for test
for (i = 2; i < 31; i++){
start = System.currentTimeMillis();
totalCount = getPrimes(0, 1 << i);
start = System.currentTimeMillis() - start;
System.out.println("PI(" + (1 << i) + ") = " + totalCount);
System.out.println("It take " + start + " ms");
}
}
} 下面给出一段代码,算法来源于http://www.cnblogs.com/suno/archive/2008/02/04/1064368.html。
该程序的缺点是需要较大的内存,需要一个 名为 notp 的buff, 需要内存为 sizeof(char)*n, 一亿的话需要100M, 若想进一步所想空间,可采用位运算,那样就只需 sizeof(char)*n/8, 即使求 2^32 也只需500M内存。
下面给出完整的代码。#include "stdafx.h"
#include "memory.h"
#include "math.h"
#include "assert.h"
#include "windows.h"
typedef unsigned long DWORD;
// 线形时间筛素数,代码来源: http://www.cnblogs.com/suno/archive/2008/02/04/1064368.html
// 为了适应本贴需求,略作修改
//估计n以内素数的个数
DWORD pi(DWORD n)
{
double f= (double)n /log(n) * 1.20 + 32.00;
return (DWORD)f;
}
DWORD getprime(DWORD mr )
{
DWORD pn;
DWORD count=pi(mr);
char *notp= new char;
DWORD *primeTab= new DWORD;
if (primeTab==NULL || notp==NULL)
{
printf("No enogh memory, fail!!!");
return 0;
}
memset(notp,0,mr+1);
pn=0;
for(DWORD i=2;i<mr;i++)
{
if(!notp)
{
primeTab=i;
assert( pn<count-1);
}
for(DWORD j=0;j<pn && primeTab*i<mr;j++)
{
notp*i]=1;
if(i % primeTab==0)
break;
}
}
delete[] notp;
delete[] primeTab;
return pn;
}
int main(int argc, char* argv[])
{
DWORD n;
DWORD c;
DWORD t;
while (true)
{
printf("n=?");
scanf("%u",&n);
if (n<2)
break;
t=GetTickCount();
c= getprime(n);
if (c==0)
break;
t=GetTickCount()-t;
printf("pi(%u)=%u\n",n,c);
printf("It take %u ms\n",t);
}
return 0;
} 12# 楼运行结果:PIV 2.6G,windows 2K xp, 768M RAM
n=?100
pi(100)=25
It take 0 ms
n=?1000
pi(1000)=168
It take 0 ms
n=?10000
pi(10000)=1229
It take 0 ms
n=?100000
pi(100000)=9592
It take 0 ms
n=?1000000
pi(1000000)=78498
It take 31 ms
n=?10000000
pi(10000000)=664579
It take 390 ms
n=?100000000
pi(100000000)=5761455
It take 4171 ms 修改为bit数组,内存节省许多,速度也快了些。DWORD getprime(DWORD mr )
{
DWORD pn;
DWORD count=pi(mr);
unsigned char maskArray[]=
{1,2,4,8,16,32,64,128};
char *notp= new char;
DWORD *primeTab= new DWORD;
if (primeTab==NULL || notp==NULL)
{
printf("No enogh memory, fail!!!");
return 0;
}
memset(notp,0,mr/8+2);
pn=0;
for(DWORD i=2;i<mr;i++)
{
if( ! (notp[ i>>3 ] & maskArray) )
{
primeTab=i;
//assert( pn<count-1);
}
for(DWORD j=0;j<pn && primeTab*i<mr;j++)
{
DWORD t= primeTab*i;
notp[ t >>3 ] |= maskArray;
if(i % primeTab==0)
break;
}
}
delete[] notp;
delete[] primeTab;
return pn;
} 根据11楼程序得到的结果, 程序消耗内存为常量
PI:primes = 54400028, time use 1398 ms
PI(4) = 2
It take 0 ms
PI(8) = 4
It take 0 ms
PI(16) = 6
It take 0 ms
PI(32) = 11
It take 0 ms
PI(64) = 18
It take 0 ms
PI(128) = 31
It take 0 ms
PI(256) = 54
It take 0 ms
PI(512) = 97
It take 0 ms
PI(1024) = 172
It take 0 ms
PI(2048) = 309
It take 0 ms
PI(4096) = 564
It take 0 ms
PI(8192) = 1028
It take 0 ms
PI(16384) = 1900
It take 0 ms
PI(32768) = 3512
It take 0 ms
PI(65536) = 6542
It take 0 ms
PI(131072) = 12251
It take 0 ms
PI(262144) = 23000
It take 0 ms
PI(524288) = 43390
It take 0 ms
PI(1048576) = 82025
It take 0 ms
PI(2097152) = 155611
It take 0 ms
PI(4194304) = 295947
It take 0 ms
PI(8388608) = 564163
It take 0 ms
PI(16777216) = 1077871
It take 0 ms
PI(33554432) = 2063689
It take 0 ms
PI(67108864) = 3957809
It take 0 ms
PI(134217728) = 7603553
It take 0 ms
PI(268435456) = 14630843
It take 0 ms
PI(536870912) = 28192750
It take 0 ms
PI(1073741824) = 54400028
It take 0 ms:) 在生成素数时,常常需要预先估计 x以内 质数的个数,然后分配内存空间,如果不需要太精确,下面给出一个估计式,通过测试得到.
PI(x) :x以内素数的个数。
PI(x) < x /log(x) * 10/9 +32 最顶楼并不是想求PI(x), 他想看看筛法效率, PI(10^24) 通过分布式以经被算出
但用筛法目前只算到 10^18 把我以前的程序翻出来,没有重新编译过,还是几年前的版本
Input a number : 100000000
Number range :
Total have 5761455 prime(s).
Used time: 0.12979269 second(s).
Input a number : 1073741824
Number range :
Total have 54400028 prime(s).
Used time: 1.79648500 second(s).
不只是统计素数的个数,和tprime的一样,有计算出标志位,筛法,P4 3.0G 呵呵,楼上的程序不会是当年CSDN上算100亿以内素数用的吧。
回复 18# 的帖子
你的算法很快,和我的(PD 3.2G)C++单线程版本1差不多
PI : primes = 54400028, sieve v1 time use 1624 ms
C++单线程版本2
PI : primes = 54400028, sieve v2 time use 1171 ms