Skip to content

fix bug for sys_idx dependent trust level and add option for model_devi_job #786

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jul 11, 2022
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -514,10 +514,10 @@ The bold notation of key (such aas **type_map**) means that it's a necessary key
| *#Exploration*
| **model_devi_dt** | Float | 0.002 (recommend) | Timestep for MD |
| **model_devi_skip** | Integer | 0 | Number of structures skipped for fp in each MD
| **model_devi_f_trust_lo** | Float or List of float | 0.05 | Lower bound of forces for the selection. If List, should be set for each index in `sys_configs`, respectively. |
| **model_devi_f_trust_hi** | Float or List of float | 0.15 | Upper bound of forces for the selection. If List, should be set for each index in `sys_configs`, respectively. |
| **model_devi_v_trust_lo** | Float or List of float | 1e10 | Lower bound of virial for the selection. If List, should be set for each index in `sys_configs`, respectively. Should be used with DeePMD-kit v2.x. |
| **model_devi_v_trust_hi** | Float or List of float | 1e10 | Upper bound of virial for the selection. If List, should be set for each index in `sys_configs`, respectively. Should be used with DeePMD-kit v2.x. |
| **model_devi_f_trust_lo** | Float or List of float or Dict[str, float] | 0.05 | Lower bound of forces for the selection. If List, should be set for each index in `sys_configs`, respectively. |
| **model_devi_f_trust_hi** | Float or List of float or Dict[str, float] | 0.15 | Upper bound of forces for the selection. If List, should be set for each index in `sys_configs`, respectively. |
| **model_devi_v_trust_lo** | Float or List of float or Dict[str, float] | 1e10 | Lower bound of virial for the selection. If List, should be set for each index in `sys_configs`, respectively. Should be used with DeePMD-kit v2.x. |
| **model_devi_v_trust_hi** | Float or List of float or Dict[str, float] | 1e10 | Upper bound of virial for the selection. If List, should be set for each index in `sys_configs`, respectively. Should be used with DeePMD-kit v2.x. |
| model_devi_adapt_trust_lo | Boolean | False | Adaptively determines the lower trust levels of force and virial. This option should be used together with `model_devi_numb_candi_f`, `model_devi_numb_candi_v` and optionally with `model_devi_perc_candi_f` and `model_devi_perc_candi_v`. `dpgen` will make two sets: 1. From the frames with force model deviation lower than `model_devi_f_trust_hi`, select `max(model_devi_numb_candi_f, model_devi_perc_candi_f*n_frames)` frames with largest force model deviation. 2. From the frames with virial model deviation lower than `model_devi_v_trust_hi`, select `max(model_devi_numb_candi_v, model_devi_perc_candi_v*n_frames)` frames with largest virial model deviation. The union of the two sets is made as candidate dataset|
| model_devi_numb_candi_f | Int | 10 | See `model_devi_adapt_trust_lo`.|
| model_devi_numb_candi_v | Int | 0 | See `model_devi_adapt_trust_lo`.|
Expand All @@ -536,7 +536,8 @@ The bold notation of key (such aas **type_map**) means that it's a necessary key
| **model_devi_jobs["ensembles"]** | String | "nvt" | Determining which ensemble used in MD, **options** include “npt” and “nvt”. |
| model_devi_jobs["neidelay"] | Integer | "10" | delay building until this many steps since last build |
| model_devi_jobs["taut"] | Float | "0.1" | Coupling time of thermostat (ps) |
| model_devi_jobs["taup"] | Float | "0.5" | Coupling time of barostat (ps)
| model_devi_jobs["taup"] | Float | "0.5" | Coupling time of barostat (ps) |
| model_devi_jobs["model_devi_f_trust_lo"] <br> model_devi_jobs["model_devi_f_trust_hi"] <br> model_devi_jobs["model_devi_v_trust_lo"] <br> model_devi_jobs["model_devi_v_trust_hi"] | Float or Dict[str, float] | See global model_devi config above like **model_devi_f_trust_lo**. For dict, should be set for each index in sys_idx, respectively. |
| *#Labeling*
| **fp_style** | string | "vasp" | Software for First Principles. **Options** include “vasp”, “pwscf”, “siesta” and “gaussian” up to now. |
| **fp_task_max** | Integer | 20 | Maximum of structures to be calculated in `02.fp` of each iteration. |
Expand Down
30 changes: 21 additions & 9 deletions dpgen/generator/arginfo.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import List
from typing import Dict, List
from dargs import Argument, Variant

from dpgen.arginfo import general_mdata_arginfo
Expand Down Expand Up @@ -90,6 +90,10 @@ def model_devi_jobs_args() -> List[Argument]:
doc_neidelay = 'delay building until this many steps since last build.'
doc_taut = 'Coupling time of thermostat (ps).'
doc_taup = 'Coupling time of barostat (ps).'
doc_model_devi_f_trust_lo = 'Lower bound of forces for the selection. If dict, should be set for each index in sys_idx, respectively.'
doc_model_devi_f_trust_hi = 'Upper bound of forces for the selection. If dict, should be set for each index in sys_idx, respectively.'
doc_model_devi_v_trust_lo = 'Lower bound of virial for the selection. If dict, should be set for each index in sys_idx, respectively. Should be used with DeePMD-kit v2.x.'
doc_model_devi_v_trust_hi = 'Upper bound of virial for the selection. If dict, should be set for each index in sys_idx, respectively. Should be used with DeePMD-kit v2.x.'

args = [
Argument("sys_idx", list, optional=False, doc=doc_sys_idx),
Expand All @@ -101,6 +105,14 @@ def model_devi_jobs_args() -> List[Argument]:
Argument("neidelay", int, optional=True, doc=doc_neidelay),
Argument("taut", float, optional=True, doc=doc_taut),
Argument("taup", float, optional=True, doc=doc_taup),
Argument("model_devi_f_trust_lo", [
float, Dict[str, float]], optional=False, doc=doc_model_devi_f_trust_lo),
Argument("model_devi_f_trust_hi", [
float, Dict[str, float]], optional=False, doc=doc_model_devi_f_trust_hi),
Argument("model_devi_v_trust_lo", [
float, Dict[str, float]], optional=False, doc=doc_model_devi_v_trust_lo),
Argument("model_devi_v_trust_hi", [
float, Dict[str, float]], optional=False, doc=doc_model_devi_v_trust_hi),
]

doc_model_devi_jobs = 'Settings for exploration in 01.model_devi. Each dict in the list corresponds to one iteration. The index of model_devi_jobs exactly accord with index of iterations'
Expand All @@ -110,10 +122,10 @@ def model_devi_jobs_args() -> List[Argument]:
def model_devi_lmp_args() -> List[Argument]:
doc_model_devi_dt = 'Timestep for MD. 0.002 is recommend.'
doc_model_devi_skip = 'Number of structures skipped for fp in each MD.'
doc_model_devi_f_trust_lo = 'Lower bound of forces for the selection. If list, should be set for each index in sys_configs, respectively.'
doc_model_devi_f_trust_hi = 'Upper bound of forces for the selection. If list, should be set for each index in sys_configs, respectively.'
doc_model_devi_v_trust_lo = 'Lower bound of virial for the selection. If list, should be set for each index in sys_configs, respectively. Should be used with DeePMD-kit v2.x.'
doc_model_devi_v_trust_hi = 'Upper bound of virial for the selection. If list, should be set for each index in sys_configs, respectively. Should be used with DeePMD-kit v2.x.'
doc_model_devi_f_trust_lo = 'Lower bound of forces for the selection. If list or dict, should be set for each index in sys_configs, respectively.'
doc_model_devi_f_trust_hi = 'Upper bound of forces for the selection. If list or dict, should be set for each index in sys_configs, respectively.'
doc_model_devi_v_trust_lo = 'Lower bound of virial for the selection. If list or dict, should be set for each index in sys_configs, respectively. Should be used with DeePMD-kit v2.x.'
doc_model_devi_v_trust_hi = 'Upper bound of virial for the selection. If list or dict, should be set for each index in sys_configs, respectively. Should be used with DeePMD-kit v2.x.'
doc_model_devi_adapt_trust_lo = 'Adaptively determines the lower trust levels of force and virial. This option should be used together with model_devi_numb_candi_f, model_devi_numb_candi_v and optionally with model_devi_perc_candi_f and model_devi_perc_candi_v. dpgen will make two sets:\n\n\
- 1. From the frames with force model deviation lower than model_devi_f_trust_hi, select max(model_devi_numb_candi_f, model_devi_perc_candi_f*n_frames) frames with largest force model deviation. \n\n\
- 2. From the frames with virial model deviation lower than model_devi_v_trust_hi, select max(model_devi_numb_candi_v, model_devi_perc_candi_v*n_frames) frames with largest virial model deviation. \n\n\
Expand All @@ -134,13 +146,13 @@ def model_devi_lmp_args() -> List[Argument]:
Argument("model_devi_skip", int, optional=False,
doc=doc_model_devi_skip),
Argument("model_devi_f_trust_lo", [
float, list], optional=False, doc=doc_model_devi_f_trust_lo),
float, list, Dict[str, float]], optional=False, doc=doc_model_devi_f_trust_lo),
Argument("model_devi_f_trust_hi", [
float, list], optional=False, doc=doc_model_devi_f_trust_hi),
float, list, Dict[str, float]], optional=False, doc=doc_model_devi_f_trust_hi),
Argument("model_devi_v_trust_lo", [
float, list], optional=False, doc=doc_model_devi_v_trust_lo),
float, list, Dict[str, float]], optional=False, doc=doc_model_devi_v_trust_lo),
Argument("model_devi_v_trust_hi", [
float, list], optional=False, doc=doc_model_devi_v_trust_hi),
float, list, Dict[str, float]], optional=False, doc=doc_model_devi_v_trust_hi),
Argument("model_devi_adapt_trust_lo", bool, optional=True,
doc=doc_model_devi_adapt_trust_lo),
Argument("model_devi_numb_candi_f", int, optional=True,
Expand Down
28 changes: 20 additions & 8 deletions dpgen/generator/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -1902,6 +1902,8 @@ def _make_fp_vasp_inner (modd_path,
def _trust_limitation_check(sys_idx, lim):
if isinstance(lim, list):
sys_lim = lim[sys_idx]
elif isinstance(lim, dict):
sys_lim = lim[str(sys_idx)]
else:
sys_lim = lim
return sys_lim
Expand All @@ -1912,10 +1914,10 @@ def _trust_limitation_check(sys_idx, lim):
modd_system_task.sort()
if model_devi_engine in ('lammps', 'gromacs', 'calypso'):
# convert global trust limitations to local ones
f_trust_lo_sys = _trust_limitation_check(ss, f_trust_lo)
f_trust_hi_sys = _trust_limitation_check(ss, f_trust_hi)
v_trust_lo_sys = _trust_limitation_check(ss, v_trust_lo)
v_trust_hi_sys = _trust_limitation_check(ss, v_trust_hi)
f_trust_lo_sys = _trust_limitation_check(int(ss), f_trust_lo)
f_trust_hi_sys = _trust_limitation_check(int(ss), f_trust_hi)
v_trust_lo_sys = _trust_limitation_check(int(ss), v_trust_lo)
v_trust_hi_sys = _trust_limitation_check(int(ss), v_trust_hi)

# assumed e -> v
if not model_devi_adapt_trust_lo:
Expand Down Expand Up @@ -2453,10 +2455,6 @@ def _make_fp_vasp_configs(iter_index,
jdata):
fp_task_max = jdata['fp_task_max']
model_devi_skip = jdata['model_devi_skip']
v_trust_lo = jdata.get('model_devi_v_trust_lo', 1e10)
v_trust_hi = jdata.get('model_devi_v_trust_hi', 1e10)
f_trust_lo = jdata['model_devi_f_trust_lo']
f_trust_hi = jdata['model_devi_f_trust_hi']
type_map = jdata['type_map']
iter_name = make_iter_name(iter_index)
work_path = os.path.join(iter_name, fp_name)
Expand All @@ -2469,6 +2467,20 @@ def _make_fp_vasp_configs(iter_index,
cur_job = json.load(open(os.path.join(modd_path, 'cur_job.json'), 'r'))
if 'task_min' in cur_job :
task_min = cur_job['task_min']
else:
cur_job = {}
# support iteration dependent trust levels
v_trust_lo = cur_job.get('model_devi_v_trust_lo', jdata.get('model_devi_v_trust_lo', 1e10))
v_trust_hi = cur_job.get('model_devi_v_trust_hi', jdata.get('model_devi_v_trust_hi', 1e10))
if cur_job.get('model_devi_f_trust_lo'):
f_trust_lo = cur_job.get('model_devi_f_trust_lo')
else:
f_trust_lo = jdata['model_devi_f_trust_lo']
if cur_job.get('model_devi_f_trust_hi'):
f_trust_hi = cur_job.get('model_devi_f_trust_hi')
else:
f_trust_hi = jdata['model_devi_f_trust_hi']
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not write it as

f_trust_lo = cur_job.get('model_devi_f_trust_lo', jdata['model_devi_f_trust_lo'])

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If written like this, on the condition that trust level only supplied in each model_devi_job, it might result in IndexError because of the lack of global trust level. It would be confusing for user to set global trust level when local ones set.


# make configs
fp_tasks = _make_fp_vasp_inner(modd_path, work_path,
model_devi_skip,
Expand Down
1 change: 1 addition & 0 deletions tests/generator/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
param_abacus_file = 'param-pyridine-abacus.json'
param_abacus_post_file = 'param-methane-abacus.json'
param_amber_file = "param-amber.json"
param_multiple_trust_file = 'param-mg-vasp-multi-trust.json'

def my_file_cmp(test, f0, f1):
with open(f0) as fp0 :
Expand Down
111 changes: 111 additions & 0 deletions tests/generator/param-mg-vasp-multi-trust.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
{
"type_map": ["Mg", "Al"],
"mass_map": [24, 27],

"init_data_prefix": "data",
"init_data_sys": ["deepmd"
],
"init_batch_size": [16],
"sys_configs": [
["data/mg.fcc.02x02x02/01.scale_pert/sys-0032/scale*/000000/POSCAR"],
["data/mg.fcc.02x02x02/01.scale_pert/sys-0032/scale*/000001/POSCAR"]
],
"_comment": "0 1 2 3",
"_comment": "4 5 6 7",
"sys_batch_size": [1, 1
],

"_comment": " 00.train ",
"numb_models": 4,
"train_param": "input.json",
"default_training_param" : {
"_comment": " model parameters",
"use_smooth": true,
"sel_a": [90],
"rcut_smth": 2.00,
"rcut": 6.00,
"filter_neuron": [25, 50, 100],
"filter_resnet_dt": false,
"n_axis_neuron": 12,
"n_neuron": [240, 240, 240],
"resnet_dt": true,
"coord_norm": true,
"type_fitting_net": false,

"_comment": " traing controls",
"systems": [],
"set_prefix": "set",
"stop_batch": 400000,
"batch_size": 1,
"start_lr": 0.002,
"decay_steps": 2000,
"decay_rate": 0.95,
"seed": 0,

"start_pref_e": 0.02,
"limit_pref_e": 2,
"start_pref_f": 1000,
"limit_pref_f": 1,
"start_pref_v": 0.0,
"limit_pref_v": 0.0,

"_comment": " display and restart",
"_comment": " frequencies counted in batch",
"disp_file": "lcurve.out",
"disp_freq": 2000,
"numb_test": 10,
"save_freq": 20000,
"save_ckpt": "model.ckpt",
"load_ckpt": "model.ckpt",
"disp_training": true,
"time_training": true,
"profiling": false,
"profiling_file": "timeline.json",

"_comment": "that's all"
},

"_comment": " 01.model_devi ",
"_comment": "model_devi_skip: the first x of the recorded frames",
"model_devi_dt": 0.002,
"model_devi_skip": 0,
"model_devi_f_trust_lo": [0.050, 0.050],
"model_devi_f_trust_hi": {"1": 0.150, "0": 0.150},
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't quite understand here - why not directly use an integer?

Copy link
Contributor Author

@Cloudac7 Cloudac7 Jul 7, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To make sure passing the original unit test. What only in need here is just to check whether the list and dict parser work and thus I made two examples. As a matter of fact, it should be refactored as normal usage, setting different values for each system.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, it seems that the level does not affect the test result. I would change it to different values.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mean why {"1": 0.150, "0": 0.150} is not {1: 0.150, 0: 0.150}... I don't see the reason to use a str.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, just in prevention of misleading, because most keys are string. Seems not to be so necessary.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh you are right. JSON only allows string as the key... (Python does not)

"model_devi_e_trust_lo": 1e10,
"model_devi_e_trust_hi": 1e10,
"model_devi_clean_traj": false,
"model_devi_jobs": [
{"sys_idx": [0, 1], "temps": [50,100], "press": [1.0,2.0], "trj_freq": 10, "nsteps": 1000, "ensemble": "npt", "_idx": "00"}
],

"_comment": " 02.fp ",
"fp_style": "vasp",
"shuffle_poscar": false,
"fp_task_max": 100,
"fp_task_min": 10,
"fp_pp_path": ".",
"fp_pp_files": ["vasp/potcars/POTCAR.mg", "vasp/potcars/POTCAR.al"],
"_comment": " user provided vasp script ",
"user_fp_params": {
"PREC": "A",
"ENCUT": 600,
"ISYM": 0,
"ALGO": "fast",
"EDIFF": 1e-05,
"LREAL": "A",
"NPAR": 1,
"KPAR": 1,
"NELMIN": 4,
"ISIF": 2,
"ISMEAR": 1,
"SIGMA": 0.25,
"IBRION": -1,
"NSW": 0,
"LWAVE": false,
"LCHARG": false,
"PSTRESS": 0,
"KSPACING": 0.16,
"KGAMMA": false
},
"_comment": " that's all "
}
48 changes: 48 additions & 0 deletions tests/generator/test_make_fp.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from .context import ref_cp2k_file_exinput
from .context import machine_file
from .context import param_diy_file
from .context import param_multiple_trust_file
from .context import make_kspacing_kpoints
from .context import my_file_cmp
from .context import setUpModule
Expand Down Expand Up @@ -345,6 +346,16 @@ def _check_potcar(testCase, idx, fp_pp_path, fp_pp_files) :


def _check_sel(testCase, idx, fp_task_max, flo, fhi):

def _trust_limitation_check(sys_idx, lim):
if isinstance(lim, list):
sys_lim = lim[sys_idx]
elif isinstance(lim, dict):
sys_lim = lim[str(sys_idx)]
else:
sys_lim = lim
return sys_lim

fp_path = os.path.join('iter.%06d' % idx, '02.fp')
candi_files = glob.glob(os.path.join(fp_path, 'candidate.shuffled.*.out'))
candi_files.sort()
Expand All @@ -358,6 +369,8 @@ def _check_sel(testCase, idx, fp_task_max, flo, fhi):
f_idx.append(ii.split()[1])
md_task = md_task[:fp_task_max]
f_idx = f_idx[:fp_task_max]
flo = _trust_limitation_check(int(sidx), flo)
fhi = _trust_limitation_check(int(sidx), fhi)
for tt,ff in zip(md_task, f_idx):
md_value = np.loadtxt(os.path.join(tt, 'model_devi.out'))
fvalue = md_value[int(ff)][4]
Expand Down Expand Up @@ -795,6 +808,41 @@ def test_make_fp_vasp_ele_temp(self):
# checked elsewhere
# _check_potcar(self, 0, jdata['fp_pp_path'], jdata['fp_pp_files'])
shutil.rmtree('iter.000000')

def test_make_fp_vasp_multiple_trust_level(self):
# Verify if sys_idx dependent trust level could be read.
setUpModule()
if os.path.isdir('iter.000000') :
shutil.rmtree('iter.000000')
with open (param_multiple_trust_file, 'r') as fp :
jdata = json.load (fp)
fp.close()
with open (machine_file, 'r') as fp:
mdata = json.load (fp)
fp.close()
md_descript = []
ele_temp = []
nsys = 2
nmd = 3
n_frame = 10
for ii in range(nsys) :
tmp = []
for jj in range(nmd) :
tmp.append(np.arange(0, 0.29, 0.29/10))
md_descript.append(tmp)
ele_temp.append([np.random.random() * 100000] * nmd)
atom_types = [0, 1, 0, 1]
type_map = jdata['type_map']
_make_fake_md(0, md_descript, atom_types, type_map, ele_temp = ele_temp)
make_fp(0, jdata, {})
_check_sel(self, 0, jdata['fp_task_max'], jdata['model_devi_f_trust_lo'], jdata['model_devi_f_trust_hi'])
_check_poscars(self, 0, jdata['fp_task_max'], jdata['type_map'])
_check_incar_ele_temp(self, 0, ele_temp)
_check_kpoints_exists(self, 0)
_check_kpoints(self,0)
# checked elsewhere
# _check_potcar(self, 0, jdata['fp_pp_path'], jdata['fp_pp_files'])
shutil.rmtree('iter.000000')


class TestMakeFPGaussian(unittest.TestCase):
Expand Down