- 注册时间
- 2009-7-21
- 最后登录
- 1970-1-1
- 威望
- 星
- 金币
- 枚
- 贡献
- 分
- 经验
- 点
- 鲜花
- 朵
- 魅力
- 点
- 上传
- 次
- 下载
- 次
- 积分
- 4489
- 在线时间
- 小时
|
楼主 |
发表于 2010-10-20 21:03:35
|
显示全部楼层
-
- /*William,
- Here is a simple program that computes PI (using GMP library) to compare the performance differences for "Cilk V2 cilk_spawn", "OpenMP 3.0 task" or serially. The program was modified from the source found at http://gmplib.org/list-archives/gmp-discuss/2010-April/004117.html and any mistakes are my own.
-
-
- /* Pi computation using Chudnovsky's algortithm.
-
- * Copyright 2002, 2005 Hanhong Xue (macroxue at yahoo dot com)
-
- * Slightly modified 2005 by Torbjorn Granlund to allow more than 2G
- digits to be computed.
-
- * Modified 2010 by David Carver (dcarver at tacc dot utexas dot edu) to
- demonstrate a parallel recursive version of the gmp-chudnovsky program
- using the Intel's C++ Compiler XE 12.0 with either "Cilk V2" or "OpenMP 3.0 task".
-
- \$ icc -V
- Intel(R) C Compiler XE for applications running on Intel(R) 64, Version 12.0.0 Beta Build 20100512
- Copyright (C) 1985-2010 Intel Corporation. All rights reserved.
-
-
- To compile for serial execution
- \$ icc -O3 -o pgmp-chudnovsky pgmp-chudnovsky.c -lgmp -limf
-
- and run:
-
- \$ ./pgmp-chudnovsky 100000000 0
- #terms=7051366 depth=24 nprocs=12 SERIAL
- bs cputime = 207.030 wallclock = 207.060
- total cputime = 241.660 wallclock = 241.694
- P size=248778666 digits (2.487787)
- Q size=248778659 digits (2.487787)
-
-
- To compile for parallel Cilk V2 execution:
- \$ icc -DOCILK -O3 -o pgmp-chudnovsky pgmp-chudnovsky.c -lgmp -limf
-
- and you may specify the number of workers with the Cilk V2 environment variable CILK_NPROC
-
- \$ export CILK_NWORKERS=12
- \$ ./pgmp-chudnovsky 100000000 0
- #terms=7051366 depth=24 nprocs=12 CILK_NWORKERS=12
- bs cputime = 639.680 wallclock = 53.331
- total cputime = 674.790 wallclock = 88.448
- P size=248778666 digits (2.487787)
- Q size=248778659 digits (2.487787)
-
-
- To compile for a simple parallel OpenMP 3.0 Task execution:
- \$ icc -openmp -O3 -o pgmp-chudnovsky pgmp-chudnovsky.c -lgmp -limf
-
- and you may specify the number of threads with the OpenMP environment variable OMP_NUM_THREADS
-
- \$ export OMP_NUM_THREADS=12
- \$ ./pgmp-chudnovsky 100000000 0
- #terms=7051366 depth=24 nprocs=12 OMP_NUM_THREADS=12
- bs cputime = 710.110 wallclock = 59.223
- total cputime = 745.390 wallclock = 94.340
- P size=248778666 digits (2.487787)
- Q size=248778659 digits (2.487787)
-
-
- To get help run the program with no options:
-
- \$ ./pgmp-chudnovsky
-
- Syntax: ./pgmp-chudnovsky <digits> <option>
- <digits> digits of pi to output
- <option> 0 - just run (default)
- 1 - output decimal digits to stdout
-
-
- * Redistribution and use in source and binary forms,with or without
- * modification,are permitted provided that the following conditions are met:
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES,INCLUDING,BUT NOT LIMITED TO,THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
- * EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT,INDIRECT,INCIDENTAL,
- * SPECIAL,EXEMPLARY,OR CONSEQUENTIAL DAMAGES (INCLUDING,BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,DATA,OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT,STRICT LIABILITY,OR TORT (INCLUDING NEGLIGENCE OR
- * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
- #include <assert.h>
- #include <math.h>
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include <time.h>
- //#include <sys/time.h>
- //#include <sys/sysinfo.h>
-
- #ifdef OCILK
- #include <cilk/cilk.h>
- #elif _OPENMP
- #include <omp.h>
- #endif /* OCILK */
- #include <gmp.h>
-
- #define A 13591409
- #define B 545140134
- #define C 640320
- #define D 12
-
- #define BITS_PER_DIGIT 3.32192809488736234787
- #define DIGITS_PER_ITER 14.1816474627254776555
- #define DOUBLE_PREC 53
-
- ////////////////////////////////////////////////////////////////////////////
- /*
- double wall_clock()
- {
- struct timeval timeval;
- (void) gettimeofday (&timeval,NULL);
- return (timeval.tv_sec + (timeval.tv_usec / 1000000.0));
- }
- */
- ////////////////////////////////////////////////////////////////////////////
-
- /* binary splitting */
- void
- bs(unsigned long a,unsigned long b,unsigned gflag,unsigned level,mpz_t pstack1,mpz_t qstack1,mpz_t gstack1)
- {
- unsigned long mid;
- mpz_t pstack2,qstack2,gstack2;
-
- if (b-a==1) {
-
- /*
- g(b-1,b) = (6b-5)(2b-1)(6b-1)
- p(b-1,b) = b^3 * C^3 / 24
- q(b-1,b) = (-1)^b*g(b-1,b)*(A+Bb).
- */
-
- mpz_set_ui(pstack1,b);
- mpz_mul_ui(pstack1,pstack1,b);
- mpz_mul_ui(pstack1,pstack1,b);
- mpz_mul_ui(pstack1,pstack1,(C/24)*(C/24));
- mpz_mul_ui(pstack1,pstack1,C*24);
-
- mpz_set_ui(gstack1,2*b-1);
- mpz_mul_ui(gstack1,gstack1,6*b-1);
- mpz_mul_ui(gstack1,gstack1,6*b-5);
-
- mpz_set_ui(qstack1,b);
- mpz_mul_ui(qstack1,qstack1,B);
- mpz_add_ui(qstack1,qstack1,A);
- mpz_mul (qstack1,qstack1,gstack1);
- if (b%2)
- mpz_neg(qstack1,qstack1);
-
- } else {
-
- /*
- p(a,b) = p(a,m) * p(m,b)
- g(a,b) = g(a,m) * g(m,b)
- q(a,b) = q(a,m) * p(m,b) + q(m,b) * g(a,m)
- */
-
- mid = a+(b-a)*0.5224; /* tuning parameter */
-
- mpz_init(pstack2);
- mpz_init(qstack2);
- mpz_init(gstack2);
-
- #ifdef OCILK
- cilk_spawn bs(mid,b,gflag,level+1,pstack2,qstack2,gstack2);
- bs(a,mid,1,level+1,pstack1,qstack1,gstack1);
- cilk_sync;
- #elif _OPENMP
- #pragma omp task firstprivate(mid,b,gflag,level) shared(pstack2,qstack2,gstack2)
- bs(mid,b,gflag,level+1,pstack2,qstack2,gstack2);
- #pragma omp task firstprivate(mid,a,gflag,level) shared(pstack1,qstack1,gstack1)
- bs(a,mid,1,level+1,pstack1,qstack1,gstack1);
- #pragma omp taskwait
- #else /* SERIAL */
- bs(mid,b,gflag,level+1,pstack2,qstack2,gstack2);
- bs(a,mid,1,level+1,pstack1,qstack1,gstack1);
- #endif /* OCILK */
-
- mpz_mul(pstack1,pstack1,pstack2);
- mpz_mul(qstack1,qstack1,pstack2);
- mpz_mul(qstack2,qstack2,gstack1);
- mpz_add(qstack1,qstack1,qstack2);
-
- if (gflag) {
- mpz_mul(gstack1,gstack1,gstack2);
- }
-
- mpz_clear(pstack2);
- mpz_clear(qstack2);
- mpz_clear(gstack2);
- }
- }
-
- int
- main(int argc,char *argv[])
- {
- mpf_t pi,qi;
- mpz_t pstack,qstack,gstack;
- unsigned long terms,d=100,out=0,depth,psize,qsize;
- clock_t begin,end;
- double wbegin,wend;
- char *prog_name;
-
- prog_name = argv[0];
-
- if (argc==1) {
- fprintf(stderr,"\nSyntax: %s <digits> <option>\n",prog_name);
- fprintf(stderr," <digits> digits of pi to output\n");
- fprintf(stderr," <option> 0 - just run (default)\n");
- fprintf(stderr," 1 - output decimal digits to stdout\n");
- exit(1);
- }
- if (argc>1)
- d = strtoul(argv[1],0,0);
- if (argc>2)
- out = atoi(argv[2]);
-
- begin = clock();
- //wbegin = wall_clock();
-
- terms = d/DIGITS_PER_ITER;
- depth = 0;
- while ((1L<<depth)<terms)
- depth++;
- depth++;
-
- mpz_init(pstack);
- mpz_init(qstack);
- mpz_init(gstack);
-
- /* begin binary splitting process */
- if (terms<=0) {
- mpz_set_ui(pstack,1);
- mpz_set_ui(qstack,0);
- mpz_set_ui(gstack,1);
- } else {
-
- #ifdef OCILK
- fprintf(stderr,"#terms=%ld depth=%ld nprocs=%d CILK_NWORKERS=%d\n",terms,depth,1/*get_nprocs()*/,
- __cilkrts_get_nworkers());
- bs(0,terms,0,0,pstack,qstack,gstack);
- #elif _OPENMP
- #pragma omp parallel
- #pragma omp single nowait
- {
- fprintf(stderr,"#terms=%ld depth=%ld nprocs=%d OMP_NUM_THREADS=%d\n",terms,depth,1/*get_nprocs()*/,
- omp_get_num_threads());
- bs(0,terms,0,0,pstack,qstack,gstack);
- }
- #else /* SERIAL */
- fprintf(stderr,"#terms=%ld depth=%ld nprocs=%d SERIAL\n",terms,depth,1/*get_nprocs()*/);
- bs(0,terms,0,0,pstack,qstack,gstack);
- #endif /* OCILK */
-
- }
-
- end = clock();
- //wend = wall_clock();
- fprintf(stderr,"bs cputime = %6.3f wallclock = %6.3f\n",
- (double)(end-begin)/CLOCKS_PER_SEC,/*(wend-wbegin)*/1);
- fflush(stderr);
-
- mpz_clear(gstack);
-
- /* prepare to convert integers to floats */
-
- mpf_set_default_prec((long int)(d*BITS_PER_DIGIT+16));
-
- /*
- p*(C/D)*sqrt(C)
- pi = -----------------
- (q+A*p)
- */
-
- psize = mpz_sizeinbase(pstack,10);
- qsize = mpz_sizeinbase(qstack,10);
-
- mpz_addmul_ui(qstack,pstack,A);
- mpz_mul_ui(pstack,pstack,C/D);
-
- mpf_init(pi);
- mpf_set_z(pi,pstack);
- mpz_clear(pstack);
-
- mpf_init(qi);
- mpf_set_z(qi,qstack);
- mpz_clear(qstack);
-
- /* final step */
-
- mpf_div(qi,pi,qi);
- mpf_sqrt_ui(pi,C);
- mpf_mul(qi,qi,pi);
- mpf_clear(pi);
-
- end = clock();
- //wend = wall_clock();
-
- /* output Pi and timing statistics */
-
- fprintf(stderr,"total cputime = %6.3f wallclock = %6.3f\n",
- (double)(end-begin)/CLOCKS_PER_SEC,1/*(wend-wbegin)*/);
- fflush(stderr);
-
- printf(" P size=%ld digits (%f)\n"
- " Q size=%ld digits (%f)\n",
- psize,(double)psize/d,qsize,(double)qsize/d);
-
- if (out&1) {
- printf("pi(0,%ld)=\n",terms);
- mpf_out_str(stdout,10,d+2,qi);
- printf("\n");
- }
-
- /* free float resources */
-
- mpf_clear(qi);
-
- exit (0);
- }
-
-
-
复制代码 D:\lt\dl\gmp421>cl chudnovsky.cpp -I .\include /link D:\lt\dl\gmp421\lib\Win32\Release\gmp.lib
Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 13.10.3077 for 80x86
Copyright (C) Microsoft Corporation 1984-2002. All rights reserved.
chudnovsky.cpp
Microsoft (R) Incremental Linker Version 7.10.3077
Copyright (C) Microsoft Corporation. All rights reserved.
/out:chudnovsky.exe
D:\lt\dl\gmp421\lib\Win32\Release\gmp.lib
chudnovsky.obj
LINK : warning LNK4098: defaultlib 'LIBCMT' conflicts with use of other libs; use /NODEFAULTLIB:library
gmp.lib(get_str.obj) : error LNK2019: unresolved external symbol __ftol2_sse referenced in function ___gmpf_get_str
chudnovsky.exe : fatal error LNK1120: 1 unresolved externals |
|