installation of vasp 5.3 in intel i7-2600k RAM 8Gb, parrallel
Posted: Fri Jan 18, 2013 6:35 pm
Hello,
I successfully compiled VASP 5.3 with the following Makefile :
.SUFFIXES: .inc .f .f90 .F
SUFFIX=.f90
CPP_=/opt/intel/bin/fpp -f_com=no -free -w0 $*.F $*$(SUFFIX)
FFLAGS = -free -names lowercase -assume byterecl
OFLAG=-O2 -ip
OFLAG_HIGH = $(OFLAG)
OBJ_HIGH =
OBJ_NOOPT =
DEBUG = -free -O0
INLINE = $(OFLAG)
MKLPATH=$(MKLROOT)/lib/intel64
LIB = -L../vasp.5.lib -ldmy \
./linpack_double.o \
$(MKLPATH)/libmkl_scalapack_lp64.a \
$(MKLPATH)/libmkl_blacs_openmpi_lp64.a
LINK=
INCS=
INC=
FFT3D = fftmpiw.o fftmpi_map.o fftw3d.o fft3dlib.o $(MKLROOT)/interfaces/fftw3xf/libfftw3xf_intel.a
FC=mpif90 -openmp
FCL=mpif90 -mkl
CPP = $(CPP_) -DMPI -DHOST=\"lepcm\" -DIFC \
-DCACHE_SIZE=12000 -DPGF90 -Davoidalloc -DNGZhalf \
-DscaLAPACK -DMPI_BLOCK=8000 -Duse_collective \
-DRPROMU_DGEMV -DRACCMU_DGEMV -DPROFILING
#-----------------------------------------------------------------------
# general rules and compile lines
#-----------------------------------------------------------------------
BASIC= symmetry.o symlib.o lattlib.o random.o
SOURCE= base.o mpi.o smart_allocate.o xml.o \
constant.o jacobi.o main_mpi.o scala.o \
asa.o lattice.o poscar.o ini.o mgrid.o xclib.o vdw_nl.o xclib_grad.o \
radial.o pseudo.o gridq.o ebs.o \
mkpoints.o wave.o wave_mpi.o wave_high.o spinsym.o \
$(BASIC) nonl.o nonlr.o nonl_high.o dfast.o choleski2.o \
mix.o hamil.o xcgrad.o xcspin.o potex1.o potex2.o \
constrmag.o cl_shift.o relativistic.o LDApU.o \
paw_base.o metagga.o egrad.o pawsym.o pawfock.o pawlhf.o rhfatm.o hyperfine.o paw.o \
mkpoints_full.o charge.o Lebedev-Laikov.o stockholder.o dipol.o pot.o \
dos.o elf.o tet.o tetweight.o hamil_rot.o \
chain.o dyna.o k-proj.o sphpro.o us.o core_rel.o \
aedens.o wavpre.o wavpre_noio.o broyden.o \
dynbr.o hamil_high.o rmm-diis.o reader.o writer.o tutor.o xml_writer.o \
brent.o stufak.o fileio.o opergrid.o stepver.o \
chgloc.o fast_aug.o fock_multipole.o fock.o mkpoints_change.o sym_grad.o \
mymath.o internals.o dynconstr.o dimer_heyden.o dvvtrajectory.o vdwforcefield.o \
nmr.o pead.o subrot.o subrot_scf.o \
force.o pwlhf.o gw_model.o optreal.o steep.o davidson.o david_inner.o \
electron.o rot.o electron_all.o shm.o pardens.o paircorrection.o \
optics.o constr_cell_relax.o stm.o finite_diff.o elpol.o \
hamil_lr.o rmm-diis_lr.o subrot_cluster.o subrot_lr.o \
lr_helper.o hamil_lrf.o elinear_response.o ilinear_response.o \
linear_optics.o \
setlocalpp.o wannier.o electron_OEP.o electron_lhf.o twoelectron4o.o \
mlwf.o ratpol.o screened_2e.o wave_cacher.o chi_base.o wpot.o \
local_field.o ump2.o bse_te.o bse.o acfdt.o chi.o sydmat.o dmft.o \
rmm-diis_mlr.o linear_response_NMR.o wannier_interpol.o linear_response.o
vasp: $(SOURCE) $(FFT3D) $(INC) main.o
rm -f vasp
$(FCL) -o vasp main.o $(SOURCE) $(FFT3D) $(LIB) $(LINK)
makeparam: $(SOURCE) $(FFT3D) makeparam.o main.F $(INC)
$(FCL) -o makeparam $(LINK) makeparam.o $(SOURCE) $(FFT3D) $(LIB)
zgemmtest: zgemmtest.o base.o random.o $(INC)
$(FCL) -o zgemmtest $(LINK) zgemmtest.o random.o base.o $(LIB)
dgemmtest: dgemmtest.o base.o random.o $(INC)
$(FCL) -o dgemmtest $(LINK) dgemmtest.o random.o base.o $(LIB)
ffttest: base.o smart_allocate.o mpi.o mgrid.o random.o ffttest.o $(FFT3D) $(INC)
$(FCL) -o ffttest $(LINK) ffttest.o mpi.o mgrid.o random.o smart_allocate.o base.o $(FFT3D) $(LIB)
kpoints: $(SOURCE) $(FFT3D) makekpoints.o main.F $(INC)
$(FCL) -o kpoints $(LINK) makekpoints.o $(SOURCE) $(FFT3D) $(LIB)
clean:
-rm -f *.g *.f *.o *.L *.mod ; touch *.F
cp /opt/intel/composer_xe_2011_sp1.9.293/mkl/include/fftw/fftw3.f .
cp ../vasp.5.lib/*.o .
cp ../vasp.5.lib/libdmy.a .
main.o: main$(SUFFIX)
$(FC) $(FFLAGS)$(DEBUG) $(INCS) -c main$(SUFFIX)
xcgrad.o: xcgrad$(SUFFIX)
$(FC) $(FFLAGS) $(INLINE) $(INCS) -c xcgrad$(SUFFIX)
xcspin.o: xcspin$(SUFFIX)
$(FC) $(FFLAGS) $(INLINE) $(INCS) -c xcspin$(SUFFIX)
makeparam.o: makeparam$(SUFFIX)
$(FC) $(FFLAGS)$(DEBUG) $(INCS) -c makeparam$(SUFFIX)
makeparam$(SUFFIX): makeparam.F main.F
#
# MIND: I do not have a full dependency list for the include
# and MODULES: here are only the minimal basic dependencies
# if one strucuture is changed then touch_dep must be called
# with the corresponding name of the structure
#
base.o: base.inc base.F
mgrid.o: mgrid.inc mgrid.F
constant.o: constant.inc constant.F
lattice.o: lattice.inc lattice.F
setex.o: setexm.inc setex.F
pseudo.o: pseudo.inc pseudo.F
mkpoints.o: mkpoints.inc mkpoints.F
wave.o: wave.F
nonl.o: nonl.inc nonl.F
nonlr.o: nonlr.inc nonlr.F
$(OBJ_HIGH):
$(CPP)
$(FC) $(FFLAGS) $(OFLAG_HIGH) $(INCS) -c $*$(SUFFIX)
$(OBJ_NOOPT):
$(CPP)
$(FC) $(FFLAGS) $(INCS) -c $*$(SUFFIX)
fft3dlib_f77.o: fft3dlib_f77.F
$(CPP)
$(F77) $(FFLAGS_F77) -c $*$(SUFFIX)
.F.o:
$(CPP)
$(FC) $(FFLAGS) $(OFLAG) $(INCS) -c $*$(SUFFIX)
.F$(SUFFIX):
$(CPP)
$(SUFFIX).o:
$(FC) $(FFLAGS) $(OFLAG) $(INCS) -c $*$(SUFFIX)
# special rules
#-----------------------------------------------------------------------
# these special rules have been tested for ifc.11 and ifc.12 only
fft3dlib.o : fft3dlib.F
$(CPP)
$(FC) -free -names lowercase -O2 -c $*$(SUFFIX)
fft3dfurth.o : fft3dfurth.F
$(CPP)
$(FC) -free -names lowercase -O1 -c $*$(SUFFIX)
fftw3d.o : fftw3d.F
$(CPP)
$(FC) -free -names lowercase -O1 -c $*$(SUFFIX)
fftmpi.o : fftmpi.F
$(CPP)
$(FC) -free -names lowercase -O1 -c $*$(SUFFIX)
fftmpiw.o : fftmpiw.F
$(CPP)
$(FC) -free -names lowercase -O1 $(INCS) -c $*$(SUFFIX)
wave_high.o : wave_high.F
$(CPP)
$(FC) -free -names lowercase -O1 -c $*$(SUFFIX)
# the following rules are probably no longer required (-O3 seems to work)
wave.o : wave.F
$(CPP)
$(FC) -free -names lowercase -O2 -c $*$(SUFFIX)
paw.o : paw.F
$(CPP)
$(FC) -free -names lowercase -O2 -c $*$(SUFFIX)
cl_shift.o : cl_shift.F
$(CPP)
$(FC) -free -names lowercase -O2 -c $*$(SUFFIX)
us.o : us.F
$(CPP)
$(FC) -free -names lowercase -O2 -c $*$(SUFFIX)
LDApU.o : LDApU.F
$(CPP)
$(FC) -free -names lowercase -O2 -c $*$(SUFFIX)
__________________________________________________________________
I tried the example http://www.vasp.at/vasp-workshop/examples/2_1_fccSi.tgz (parameter 3.9, and K mesh 41 41 41)
Why execution time at one core is the same as in four cores?
did i make some mistake in the compilation or there are parameters to add to the INCAR file for parrallel execution?
vasp5
Total CPU time used (sec): 15.281
User time (sec): 15.137
System time (sec): 0.144
Elapsed time (sec): 15.309
Maximum memory used (kb): 116504.
Average memory used (kb): 0.
Minor page faults: 29944
Major page faults: 0
Voluntary context switches: 2
mpirun -np 4 /usr/bin/vasp5
Total CPU time used (sec): 17.393
User time (sec): 17.265
System time (sec): 0.128
Elapsed time (sec): 18.403
Maximum memory used (kb): 70452.
Average memory used (kb): 0.
Minor page faults: 18088
Major page faults: 0
Voluntary context switches: 129
I tried others vasp example and i get the same result (time in one, two and four cores are roughly equal).
<span class='smallblacktext'>[ Edited ]</span>
I successfully compiled VASP 5.3 with the following Makefile :
.SUFFIXES: .inc .f .f90 .F
SUFFIX=.f90
CPP_=/opt/intel/bin/fpp -f_com=no -free -w0 $*.F $*$(SUFFIX)
FFLAGS = -free -names lowercase -assume byterecl
OFLAG=-O2 -ip
OFLAG_HIGH = $(OFLAG)
OBJ_HIGH =
OBJ_NOOPT =
DEBUG = -free -O0
INLINE = $(OFLAG)
MKLPATH=$(MKLROOT)/lib/intel64
LIB = -L../vasp.5.lib -ldmy \
./linpack_double.o \
$(MKLPATH)/libmkl_scalapack_lp64.a \
$(MKLPATH)/libmkl_blacs_openmpi_lp64.a
LINK=
INCS=
INC=
FFT3D = fftmpiw.o fftmpi_map.o fftw3d.o fft3dlib.o $(MKLROOT)/interfaces/fftw3xf/libfftw3xf_intel.a
FC=mpif90 -openmp
FCL=mpif90 -mkl
CPP = $(CPP_) -DMPI -DHOST=\"lepcm\" -DIFC \
-DCACHE_SIZE=12000 -DPGF90 -Davoidalloc -DNGZhalf \
-DscaLAPACK -DMPI_BLOCK=8000 -Duse_collective \
-DRPROMU_DGEMV -DRACCMU_DGEMV -DPROFILING
#-----------------------------------------------------------------------
# general rules and compile lines
#-----------------------------------------------------------------------
BASIC= symmetry.o symlib.o lattlib.o random.o
SOURCE= base.o mpi.o smart_allocate.o xml.o \
constant.o jacobi.o main_mpi.o scala.o \
asa.o lattice.o poscar.o ini.o mgrid.o xclib.o vdw_nl.o xclib_grad.o \
radial.o pseudo.o gridq.o ebs.o \
mkpoints.o wave.o wave_mpi.o wave_high.o spinsym.o \
$(BASIC) nonl.o nonlr.o nonl_high.o dfast.o choleski2.o \
mix.o hamil.o xcgrad.o xcspin.o potex1.o potex2.o \
constrmag.o cl_shift.o relativistic.o LDApU.o \
paw_base.o metagga.o egrad.o pawsym.o pawfock.o pawlhf.o rhfatm.o hyperfine.o paw.o \
mkpoints_full.o charge.o Lebedev-Laikov.o stockholder.o dipol.o pot.o \
dos.o elf.o tet.o tetweight.o hamil_rot.o \
chain.o dyna.o k-proj.o sphpro.o us.o core_rel.o \
aedens.o wavpre.o wavpre_noio.o broyden.o \
dynbr.o hamil_high.o rmm-diis.o reader.o writer.o tutor.o xml_writer.o \
brent.o stufak.o fileio.o opergrid.o stepver.o \
chgloc.o fast_aug.o fock_multipole.o fock.o mkpoints_change.o sym_grad.o \
mymath.o internals.o dynconstr.o dimer_heyden.o dvvtrajectory.o vdwforcefield.o \
nmr.o pead.o subrot.o subrot_scf.o \
force.o pwlhf.o gw_model.o optreal.o steep.o davidson.o david_inner.o \
electron.o rot.o electron_all.o shm.o pardens.o paircorrection.o \
optics.o constr_cell_relax.o stm.o finite_diff.o elpol.o \
hamil_lr.o rmm-diis_lr.o subrot_cluster.o subrot_lr.o \
lr_helper.o hamil_lrf.o elinear_response.o ilinear_response.o \
linear_optics.o \
setlocalpp.o wannier.o electron_OEP.o electron_lhf.o twoelectron4o.o \
mlwf.o ratpol.o screened_2e.o wave_cacher.o chi_base.o wpot.o \
local_field.o ump2.o bse_te.o bse.o acfdt.o chi.o sydmat.o dmft.o \
rmm-diis_mlr.o linear_response_NMR.o wannier_interpol.o linear_response.o
vasp: $(SOURCE) $(FFT3D) $(INC) main.o
rm -f vasp
$(FCL) -o vasp main.o $(SOURCE) $(FFT3D) $(LIB) $(LINK)
makeparam: $(SOURCE) $(FFT3D) makeparam.o main.F $(INC)
$(FCL) -o makeparam $(LINK) makeparam.o $(SOURCE) $(FFT3D) $(LIB)
zgemmtest: zgemmtest.o base.o random.o $(INC)
$(FCL) -o zgemmtest $(LINK) zgemmtest.o random.o base.o $(LIB)
dgemmtest: dgemmtest.o base.o random.o $(INC)
$(FCL) -o dgemmtest $(LINK) dgemmtest.o random.o base.o $(LIB)
ffttest: base.o smart_allocate.o mpi.o mgrid.o random.o ffttest.o $(FFT3D) $(INC)
$(FCL) -o ffttest $(LINK) ffttest.o mpi.o mgrid.o random.o smart_allocate.o base.o $(FFT3D) $(LIB)
kpoints: $(SOURCE) $(FFT3D) makekpoints.o main.F $(INC)
$(FCL) -o kpoints $(LINK) makekpoints.o $(SOURCE) $(FFT3D) $(LIB)
clean:
-rm -f *.g *.f *.o *.L *.mod ; touch *.F
cp /opt/intel/composer_xe_2011_sp1.9.293/mkl/include/fftw/fftw3.f .
cp ../vasp.5.lib/*.o .
cp ../vasp.5.lib/libdmy.a .
main.o: main$(SUFFIX)
$(FC) $(FFLAGS)$(DEBUG) $(INCS) -c main$(SUFFIX)
xcgrad.o: xcgrad$(SUFFIX)
$(FC) $(FFLAGS) $(INLINE) $(INCS) -c xcgrad$(SUFFIX)
xcspin.o: xcspin$(SUFFIX)
$(FC) $(FFLAGS) $(INLINE) $(INCS) -c xcspin$(SUFFIX)
makeparam.o: makeparam$(SUFFIX)
$(FC) $(FFLAGS)$(DEBUG) $(INCS) -c makeparam$(SUFFIX)
makeparam$(SUFFIX): makeparam.F main.F
#
# MIND: I do not have a full dependency list for the include
# and MODULES: here are only the minimal basic dependencies
# if one strucuture is changed then touch_dep must be called
# with the corresponding name of the structure
#
base.o: base.inc base.F
mgrid.o: mgrid.inc mgrid.F
constant.o: constant.inc constant.F
lattice.o: lattice.inc lattice.F
setex.o: setexm.inc setex.F
pseudo.o: pseudo.inc pseudo.F
mkpoints.o: mkpoints.inc mkpoints.F
wave.o: wave.F
nonl.o: nonl.inc nonl.F
nonlr.o: nonlr.inc nonlr.F
$(OBJ_HIGH):
$(CPP)
$(FC) $(FFLAGS) $(OFLAG_HIGH) $(INCS) -c $*$(SUFFIX)
$(OBJ_NOOPT):
$(CPP)
$(FC) $(FFLAGS) $(INCS) -c $*$(SUFFIX)
fft3dlib_f77.o: fft3dlib_f77.F
$(CPP)
$(F77) $(FFLAGS_F77) -c $*$(SUFFIX)
.F.o:
$(CPP)
$(FC) $(FFLAGS) $(OFLAG) $(INCS) -c $*$(SUFFIX)
.F$(SUFFIX):
$(CPP)
$(SUFFIX).o:
$(FC) $(FFLAGS) $(OFLAG) $(INCS) -c $*$(SUFFIX)
# special rules
#-----------------------------------------------------------------------
# these special rules have been tested for ifc.11 and ifc.12 only
fft3dlib.o : fft3dlib.F
$(CPP)
$(FC) -free -names lowercase -O2 -c $*$(SUFFIX)
fft3dfurth.o : fft3dfurth.F
$(CPP)
$(FC) -free -names lowercase -O1 -c $*$(SUFFIX)
fftw3d.o : fftw3d.F
$(CPP)
$(FC) -free -names lowercase -O1 -c $*$(SUFFIX)
fftmpi.o : fftmpi.F
$(CPP)
$(FC) -free -names lowercase -O1 -c $*$(SUFFIX)
fftmpiw.o : fftmpiw.F
$(CPP)
$(FC) -free -names lowercase -O1 $(INCS) -c $*$(SUFFIX)
wave_high.o : wave_high.F
$(CPP)
$(FC) -free -names lowercase -O1 -c $*$(SUFFIX)
# the following rules are probably no longer required (-O3 seems to work)
wave.o : wave.F
$(CPP)
$(FC) -free -names lowercase -O2 -c $*$(SUFFIX)
paw.o : paw.F
$(CPP)
$(FC) -free -names lowercase -O2 -c $*$(SUFFIX)
cl_shift.o : cl_shift.F
$(CPP)
$(FC) -free -names lowercase -O2 -c $*$(SUFFIX)
us.o : us.F
$(CPP)
$(FC) -free -names lowercase -O2 -c $*$(SUFFIX)
LDApU.o : LDApU.F
$(CPP)
$(FC) -free -names lowercase -O2 -c $*$(SUFFIX)
__________________________________________________________________
I tried the example http://www.vasp.at/vasp-workshop/examples/2_1_fccSi.tgz (parameter 3.9, and K mesh 41 41 41)
Why execution time at one core is the same as in four cores?
did i make some mistake in the compilation or there are parameters to add to the INCAR file for parrallel execution?
vasp5
Total CPU time used (sec): 15.281
User time (sec): 15.137
System time (sec): 0.144
Elapsed time (sec): 15.309
Maximum memory used (kb): 116504.
Average memory used (kb): 0.
Minor page faults: 29944
Major page faults: 0
Voluntary context switches: 2
mpirun -np 4 /usr/bin/vasp5
Total CPU time used (sec): 17.393
User time (sec): 17.265
System time (sec): 0.128
Elapsed time (sec): 18.403
Maximum memory used (kb): 70452.
Average memory used (kb): 0.
Minor page faults: 18088
Major page faults: 0
Voluntary context switches: 129
I tried others vasp example and i get the same result (time in one, two and four cores are roughly equal).
<span class='smallblacktext'>[ Edited ]</span>