无心人 发表于 2023-4-28 21:55:58

BPSW和QFT素性检验算法执行时间比较

数据用2^64以内强伪素数,除去 1093^2 和 3511^2 组成

先二次域测试 QFT

测试数据在百度网盘共享
链接:https://pan.baidu.com/s/1H7j_QZDvykp1XyuTMgbb7A?pwd=o1sw
提取码:o1sw
算法原理见: https://bbs.emath.ac.cn/thread-16936-1-1.html 6层

#include <stdbool.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <flint/ulong_extras.h>

typedef struct
{
uint64_t r;
uint64_t i;
} QF;

QF qfmul(QF l, QF r, int64_t c, uint64_t p, uint64_t pinv)
{
QF dst;
uint64_t t1 = n_mulmod2_preinv(l.r, r.r, p, pinv);
uint64_t t2 = n_mulmod2_preinv(l.i, r.i, p, pinv);
switch (c)
{
    case -1:
      dst.r = n_submod(t1, t2, p);
      break;
    case 2:
      t2 = n_addmod(t2, t2, p);
      dst.r = n_addmod(t1, t2, p);
      break;
    default:
      t2 = n_mulmod2_preinv(t2, c, p, pinv);
      dst.r = n_addmod(t1, t2, p);
}
t1 = n_mulmod2_preinv(l.i, r.r, p, pinv);
t2 = n_mulmod2_preinv(l.r, r.i, p, pinv);
dst.i = n_addmod(t1, t2, p);
return dst;
}

#define spCount 32
uint64_t sPrime =
{
   2,3,5,7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53,
59, 61, 67, 71, 73, 79, 83, 89, 97,101,103,107,109,113,127,131
};

int64_t selectParam(uint64_t p)
{
if (p % 4 == 3)
    return -1;
if (p % 8 == 5)
    return 2;
for (int i = 1; i < spCount; i ++)
    if (n_jacobi(sPrime, p) == -1)
      return sPrime;
uint64_t t = sPrime;
do
{
    t ++;
    t = n_nextprime(t, 1);
} while (n_jacobi(t, p) != -1);
return t;
}

bool isprime(uint64_t p)
{
//printf("test %lu", p);
if (p == 1093 * 1093 || p == 3511 * 3511)
    return false;
int64_t c = selectParam(p);
//printf(" c=%ld\n", c);
QF a, b;
a.r = 1; a.i = 0;
uint64_t r = (c == -1 || c == 2) ? 2 : 1;
b.r = r;
b.i = 1;
uint64_t t = p;
uint64_t pinv = n_preinvert_limb(p);
while (t > 0)
{
    if (t & 1 == 1)
      a = qfmul(a, b, c, p, pinv);
    b = qfmul(b, b, c, p, pinv);
    t >>= 1;
}
return (a.r == r && a.i == p - 1) ? true : false;
}

void test()
{
printf("自检...\n");
for (int i = 1; i < spCount; i ++)
    if (!isprime(sPrime))
      printf("检测错误: %lu\n", sPrime);
printf("自检结束\n");
}

int main()
{
uint64_t p, cnt = 0, tst = 0;
//test();
FILE* sprp2File = fopen("2-SPRP-2-to-64.txt", "r");
if (!sprp2File)
{
    perror("文件2-SPRP-2-to-64.txt不存在");
    return EXIT_FAILURE;
}
char line;
while (fgets(line, 32, sprp2File) != NULL)
{
    char* end;
    p = strtoull(line, &end, 10);
    if (isprime(p))
      cnt ++;
    //tst ++; if (tst >= 10) break;
}

fclose(sprp2File);
printf("测试失败次数: %lu\n", cnt);
return 0;
}


BPSW版本
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>
#include <flint/ulong_extras.h>

typedef struct
{
int64_t D, P, Q;
} BPSWParam;

typedef struct
{
uint64_t U, V, Q;
} lucasItem;

void showParam(BPSWParam param, uint64_t n)
{
printf("%lu的lucas测试参数: D=%ld, P=%ld, Q=%ld\n", n, param.D, param.P, param.Q);
}

void showItem(char* pos, char * s, lucasItem item, uint64_t n)
{
printf("当前循环%s-%s: %lu的lucas测试当前值: U=%lu, V=%lu, Q=%lu\n", pos, s, n, item.U, item.V, item.Q);
}

//U(2k)=U(k)*V(k), V(2k)=V(k)^2-2Q(k), Q(2k)=Q(k)^2
void lucasDouble(char* pos, lucasItem* I, uint64_t n, uint64_t ninv)
{
I->U = n_mulmod2_preinv(I->U, I->V, n, ninv);
I->V = n_mulmod2_preinv(I->V, I->V, n, ninv);
uint64_t t = n_addmod(I->Q, I->Q, n);
I->V = n_submod(I->V, t, n);
I->Q = n_mulmod2_preinv(I->Q, I->Q, n, ninv);
//showItem(pos, "x2", *I, n);
}

//U(k+1)=(PU(k)+V(K))/2, V(K+1)=(DU(K)+PV(K))/2, Q(K+1)=Q(K)Q
void lucasNext(char* pos, lucasItem* I, BPSWParam param, uint64_t n, uint64_t half_n, uint64_t ninv)
{
uint64_t t = I->U;
I->U = n_mulmod2_preinv(param.P, I->U, n, ninv);
I->U = n_addmod(I->U, I->V, n);
I->U = (I->U&1==1)? (n - (n - I->U) >> 1) : (I->U >> 1);
uint64_t t1 = n_mulmod2_preinv(param.P, I->V, n, ninv);
uint64_t t2 = n_mulmod2_preinv(param.D, t, n, ninv);
I->V = n_addmod(t1, t2, n);
I->V = (I->V&1==1)? (n - (n - I->V) >> 1) : (I->V >> 1);
I->Q = n_mulmod2_preinv(I->Q, param.Q, n, ninv);
//showItem(pos, "+1", *I, n);
}

BPSWParam selectParam(uint64_t n)
{
int64_t d = 5, p, q, delta = 2;
while (n_jacobi(d, n) != -1)
{
    delta = - delta;
    d = delta - d;
}
p = 1;
q = (1 - d) >> 2;
if (q == - 1)
    d = p = q = 5;
//printf("%lu的lucas测试参数: D=%ld, P=%ld, Q=%ld\n", n, d, p, q);
if (d < 0)
    d = n - (-d) % n;
if (q < 0)
    q = n - (-q) % n;   
BPSWParam param = {d%n, p, q%n};
//showParam(param, n);
return param;
}

bool strongLucasTest(uint64_t n)
{
if (n == 1093 * 1093 || n == 3511 * 3511)
    return false;
BPSWParam param = selectParam( n );
uint64_t ninv =n_preinvert_limb( n );
uint64_t m = n + 1;
uint64_t half_n = m >> 1;
int64_t k = __builtin_ctzll( m );
m >>= k;
lucasItem item = {0, 2, 1};
int64_t tm = __builtin_clzll(m);
tm = sizeof(m)*8 - tm - 1;
uint64_t t = 1 << tm;
//printf("%lu=%lu*2^%lu, tm=%ld, t=%lu, half_n=%lu\n", n+1, m, k, tm, t, half_n);
for (int64_t i = tm; i >= 0; i --)
{
    lucasDouble("1", &item, n, ninv);
    if (m&t == t)
      lucasNext("1", &item, param, n, half_n, ninv);
    m >>= 1;
    t >>= 1;
}
if (item.U == 0)
    return true;
for (int64_t i = 0; i < k; i ++)
    if (item.V != 0)
    {
      item.V = n_mulmod2_preinv(item.V, item.V, n, ninv);
      uint64_t t = n_addmod(item.Q, item.Q, n);
      item.V = n_submod(item.V, t, n);
      item.Q = n_mulmod2_preinv(item.Q, item.Q, n, ninv);
      //showItem("2", "x2", item, n);
    }
    else
      return true;
return false;
}

#define testCount 16

uint64_t testSet = { 3, 5, 7, 17,
                              33, 79, 101, 111,
                              127, 2047, 5459, 5777,
                              10877, 58519, 65535, 65537};

void test()
{
printf("自检...\n");
printf("U(2k)=U(k)*V(k), V(2k)=V(k)^2-2Q(k), Q(2k)=Q(k)^2\n");
printf("U(k+1)=(PU(k)+V(K))/2, V(K+1)=(DU(K)+PV(K))/2, Q(K+1)=Q(K)Q\n");
for (int i = 0; i < testCount; i ++)
    if (strongLucasTest(testSet))
      printf("素数: %lu\n", testSet);
    else
      printf("合数: %lu\n", testSet);
printf("自检结束\n");
}

int main( void )
{
uint64_t n, cnt = 0, tst = 0;
//test(); return 1;
FILE* sprp2File = fopen("2-SPRP-2-to-64.txt", "r");
if (!sprp2File)
{
    perror("文件2-SPRP-2-to-64.txt不存在");
    return EXIT_FAILURE;
}
char line;
while (fgets(line, 32, sprp2File) != NULL)
{
    char* end;
    n = strtoull(line, &end, 10);
    if (strongLucasTest(n))
      cnt ++;
    //tst ++; if (tst >= 1000000) break;
}

fclose(sprp2File);
printf("测试失败次数: %lu\n", cnt);

return 0;
}

无心人 发表于 2023-5-3 20:00:11

先在WSL2中测试的,两个程序都在7m左右
后来用msys2测试,发现执行时间大大降低
QFT:    1m21s
BPSW: 1m11s
放到实际linux执行发现,msys2的执行时间是正确的

实际代码,BPSW比QFT复杂很多
但是QFT要测试整个的log(n)次
BPSW类似米勒罗宾测试,测试次数小于log(n)
BPSW较少的测试次数和较复杂的测试函数,
综合起来,执行时间略少于QFT

但是,QFT有个优势就是,单独执行,
对非平方数 n ,2^64内,只要通过测试,
必然是素数,不存在伪素数,
BPSW对应的强 lucas 测试,存在伪素数

nyy 发表于 2023-5-4 09:40:10

代码没语法高亮,好不爽呀!

无心人 发表于 2023-5-4 10:22:15

QFT增加一次基3强伪素数测试后

#include <stdbool.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <flint/ulong_extras.h>

typedef struct
{
uint64_t r;
uint64_t i;
} QF;

QF qfmul(QF l, QF r, int64_t c, uint64_t p, uint64_t pinv)
{
QF dst;
uint64_t t1 = n_mulmod2_preinv(l.r, r.r, p, pinv);
uint64_t t2 = n_mulmod2_preinv(l.i, r.i, p, pinv);
switch (c)
{
    case -1:
      dst.r = n_submod(t1, t2, p);
      break;
    case 2:
      t2 = n_addmod(t2, t2, p);
      dst.r = n_addmod(t1, t2, p);
      break;
    default:
      t2 = n_mulmod2_preinv(t2, c, p, pinv);
      dst.r = n_addmod(t1, t2, p);
}
t1 = n_mulmod2_preinv(l.i, r.r, p, pinv);
t2 = n_mulmod2_preinv(l.r, r.i, p, pinv);
dst.i = n_addmod(t1, t2, p);
return dst;
}

#define spCount 32
uint64_t sPrime =
{
   2,3,5,7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53,
59, 61, 67, 71, 73, 79, 83, 89, 97,101,103,107,109,113,127,131
};

int64_t selectParam(uint64_t p)
{
if (p % 4 == 3)
    return -1;
if (p % 8 == 5)
    return 2;
for (int i = 1; i < spCount; i ++)
    if (n_jacobi(sPrime, p) == -1)
      return sPrime;
uint64_t t = sPrime;
do
{
    t ++;
    t = n_nextprime(t, 1);
} while (n_jacobi(t, p) != -1);
return t;
}

bool quadraticFrobeniusTest(uint64_t p)
{
uint64_t pinv = n_preinvert_limb(p);
int64_t k = __builtin_ctzll(p-1);
uint64_t d = (p-1) >> k;
if (!n_is_strong_probabprime2_preinv(p, pinv, 3, d))
    return false;
int64_t c = selectParam(p);
uint64_t r = (c == -1 || c == 2) ? 2 : 1;
QF a = {1, 0}, b = {r, 1};
uint64_t t = p;
while (t > 0)
{
    if (t&1==1)
      a = qfmul(a, b, c, p, pinv);
    b = qfmul(b, b, c, p, pinv);
    t>>=1;
}
return (a.r == r && a.i == p - 1) ? true : false;
}

void test()
{
printf("自检...\n");
for (int i = 1; i < spCount; i ++)
    if (!quadraticFrobeniusTest(sPrime))
      printf("检测错误: %lu\n", sPrime);
printf("自检结束\n");
}

int main()
{
uint64_t p, cnt = 0, tst = 0;
//test();
FILE* sprp2File = fopen("2-SPRP-2-to-64.txt", "r");
if (!sprp2File)
{
    perror("文件2-SPRP-2-to-64.txt不存在");
    return EXIT_FAILURE;
}
char line;
while (fgets(line, 32, sprp2File) != NULL)
{
    char* end;
    p = strtoull(line, &end, 10);
    if (quadraticFrobeniusTest(p))
      cnt ++;
    tst ++;
}

fclose(sprp2File);
printf("test: %lu, failed: %lu\n", tst, cnt);
return 0;
}



总测试数字31894014,执行时间23.027s
去掉QFT部分,只测试基3强伪素数,执行时间18.996s,
有1501720个合数通过测试
SPSPTime=18996ms/3189.4014万=5.956ms/万
QFTTime=(23027-18996)ms/150.1720万=26.843ms/万
1次QFT约等于4.5次SPSP

当增加1次SPSP测试后,剩余未通过的合数比例低于1/5时,用QFT测试是合算的
实测再增加一次基5强伪素数测试,执行时间降低到19.768s
再增加基7强伪素数测试,降低的时间有限了
因此理想的基于QFT的测试,先测试基2,3,5的强伪素数测试,再做QFT测试,是最优化的选择

无心人 发表于 2023-5-4 10:47:21

同样增加一次基3强伪素数测试的BPSW

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>
#include <flint/ulong_extras.h>

typedef struct
{
int64_t D, P, Q;
} BPSWParam;

typedef struct
{
uint64_t U, V, Q;
} lucasItem;

void showParam(BPSWParam param, uint64_t n)
{
printf("%lu的lucas测试参数: D=%ld, P=%ld, Q=%ld\n", n, param.D, param.P, param.Q);
}

void showItem(char* pos, char * s, lucasItem item, uint64_t n)
{
printf("当前循环%s-%s: %lu的lucas测试当前值: U=%lu, V=%lu, Q=%lu\n", pos, s, n, item.U, item.V, item.Q);
}

//U(2k)=U(k)*V(k), V(2k)=V(k)^2-2Q(k), Q(2k)=Q(k)^2
void lucasDouble(lucasItem* I, uint64_t n, uint64_t ninv)
{
I->U = n_mulmod2_preinv(I->U, I->V, n, ninv);
I->V = n_mulmod2_preinv(I->V, I->V, n, ninv);
uint64_t t = n_addmod(I->Q, I->Q, n);
I->V = n_submod(I->V, t, n);
I->Q = n_mulmod2_preinv(I->Q, I->Q, n, ninv);
}

//U(k+1)=(PU(k)+V(K))/2, V(K+1)=(DU(K)+PV(K))/2, Q(K+1)=Q(K)Q
void lucasNext(lucasItem* I, BPSWParam param, uint64_t n, uint64_t ninv)
{
uint64_t t = I->U;
I->U = n_mulmod2_preinv(param.P, I->U, n, ninv);
I->U = n_addmod(I->U, I->V, n);
I->U = (I->U&1==1)? (n - (n - I->U) >> 1) : (I->U >> 1);
uint64_t t1 = n_mulmod2_preinv(param.P, I->V, n, ninv);
uint64_t t2 = n_mulmod2_preinv(param.D, t, n, ninv);
I->V = n_addmod(t1, t2, n);
I->V = (I->V&1==1)? (n - (n - I->V) >> 1) : (I->V >> 1);
I->Q = n_mulmod2_preinv(I->Q, param.Q, n, ninv);
}

BPSWParam selectParam(uint64_t n)
{
int64_t d = 5, p, q, delta = 2;
while (n_jacobi(d, n) != -1)
{
    delta = - delta;
    d = delta - d;
}
p = 1;
q = (1 - d) >> 2;
if (q == - 1)
    d = p = q = 5;
//printf("%lu的lucas测试参数: D=%ld, P=%ld, Q=%ld\n", n, d, p, q);
if (d < 0)
    d = n - (-d) % n;
if (q < 0)
    q = n - (-q) % n;   
BPSWParam param = {d%n, p, q%n};
//showParam(param, n);
return param;
}

bool strongLucasTest(uint64_t n)
{
uint64_t ninv = n_preinvert_limb(n);
int64_t k = __builtin_ctzll(n-1);
uint64_t d = (n-1) >> k;
if (!n_is_strong_probabprime2_preinv(n, ninv, 3, d))
    return false;
BPSWParam param = selectParam( n );
uint64_t m = n + 1;
k = __builtin_ctzll(m);
m >>= k;
lucasItem item = {0, 2, 1};
int64_t tm = __builtin_clzll(m);
tm = sizeof(m)*8 - tm - 1;
uint64_t t = 1 << tm;
//printf("%lu=%lu*2^%lu, tm=%ld, t=%lu, half_n=%lu\n", n+1, m, k, tm, t, half_n);
for (int64_t i = tm; i >= 0; i --)
{
    lucasDouble(&item, n, ninv);
    if (m&t == t)
      lucasNext(&item, param, n, ninv);
    m >>= 1;
    t >>= 1;
}
if (item.U == 0)
    return true;
for (int64_t i = 0; i < k; i ++)
    if (item.V != 0)
    {
      item.V = n_mulmod2_preinv(item.V, item.V, n, ninv);
      uint64_t t = n_addmod(item.Q, item.Q, n);
      item.V = n_submod(item.V, t, n);
      item.Q = n_mulmod2_preinv(item.Q, item.Q, n, ninv);
    }
    else
      return true;
return false;
}

#define testCount 16

uint64_t testSet = { 3, 5, 7, 17,
                              33, 79, 101, 111,
                              127, 2047, 5459, 5777,
                              10877, 58519, 65535, 65537};

void test()
{
printf("自检...\n");
printf("U(2k)=U(k)*V(k), V(2k)=V(k)^2-2Q(k), Q(2k)=Q(k)^2\n");
printf("U(k+1)=(PU(k)+V(K))/2, V(K+1)=(DU(K)+PV(K))/2, Q(K+1)=Q(K)Q\n");
for (int i = 0; i < testCount; i ++)
    if (strongLucasTest(testSet))
      printf("素数: %lu\n", testSet);
    else
      printf("合数: %lu\n", testSet);
printf("自检结束\n");
}

int main( void )
{
uint64_t n, cnt = 0, tst = 0;
//test(); return 1;
FILE* sprp2File = fopen("2-SPRP-2-to-64.txt", "r");
if (!sprp2File)
{
    perror("文件2-SPRP-2-to-64.txt不存在");
    return EXIT_FAILURE;
}
char line;
while (fgets(line, 32, sprp2File) != NULL)
{
    char* end;
    n = strtoull(line, &end, 10);
    if (strongLucasTest(n))
      cnt ++;
    //tst ++; if (tst >= 1000000) break;
}

fclose(sprp2File);
printf("测试失败次数: %lu\n", cnt);

return 0;
}


总测试数字31894014,执行时间22.397s
去掉SLPSP测试部分,只测试基3 SPSP,执行时间18.973s,
有1501720个合数通过测试
SPSPTime=18973ms/3189.4014万=5.949ms/万
SLPSPTime=(22397-18973)ms/150.1720万=22.801ms/万
1次SLPSP测试约等于3.8次SPSP测试

当增加1次SPSP测试后,剩余未通过的合数比例低于1/4时,用BPSW的SLPSP测试是合算的
按照上面的思路,再增加一次基5的强伪素数测试,共19.807s, 时间也降低明显,
即BPSW,先执行基2,3,5的强伪素数测试,再做强lucas测试,是最优化的选择


nyy 发表于 2023-5-4 11:00:44

无心人 发表于 2023-5-4 10:47
同样增加一次基3强伪素数测试的BPSW




lucas Ulucas V都测试了吗?

nyy 发表于 2023-5-4 11:06:49

无心人 发表于 2023-5-4 10:47
同样增加一次基3强伪素数测试的BPSW




我当初问***他素数判定的实现算法,***把他的算法当成宝贝,还舍不得告诉我。
没想到你也能实现这个算法,要是早知道,我就问你了。
只可惜你在论坛上发出来之前,我已经学会了BPSW的实现算法,
我从GitHub上搜索到相关的代码,然后研究透的!

无心人 发表于 2023-5-4 11:44:50

作为比较,贴上flint自带素性测试的代码,执行时间60.261s
考虑我代码少做一次基2测试,加上基2测试,以QFT为例,是30.948秒,

#include <stdbool.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <flint/ulong_extras.h>

int main()
{
uint64_t p, cnt = 0, tst = 0;
//test();
FILE* sprp2File = fopen("2-SPRP-2-to-64.txt", "r");
if (!sprp2File)
{
    perror("文件2-SPRP-2-to-64.txt不存在");
    return EXIT_FAILURE;
}
char line;
while (fgets(line, 32, sprp2File) != NULL)
{
    char* end;
    p = strtoull(line, &end, 10);
    if (n_is_prime(p))
      cnt ++;
    tst ++;
}

fclose(sprp2File);
printf("test: %lu, failed: %lu\n", tst, cnt);
return 0;
}

nyy 发表于 2023-5-4 13:30:53

无心人 发表于 2023-5-4 11:44
作为比较,贴上flint自带素性测试的代码,执行时间60.261s
考虑我代码少做一次基2测试,加上基2测试,以QF ...

你的伪素数文件,能上传共享一下不?
要那种以2为底的强伪素数!

nyy 发表于 2023-5-4 18:53:02

无心人 发表于 2023-5-4 11:44
作为比较,贴上flint自带素性测试的代码,执行时间60.261s
考虑我代码少做一次基2测试,加上基2测试,以QF ...

只要以2为底的强伪素数!
页: [1] 2
查看完整版本: BPSW和QFT素性检验算法执行时间比较