Skip to content

Commit aeb1cd7

Browse files
author
Francois Berenger
committed
molenc_AP.encode_smiles_line is robust to molecules which failed encoding
- an error is logged - we don't write them out - previously we were crashing on them (exit 1)
1 parent 726dec7 commit aeb1cd7

File tree

1 file changed

+8
-3
lines changed

1 file changed

+8
-3
lines changed

src/molenc_AP.ml

+8-3
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,10 @@ type unfold_count_fp = { name: string;
124124
feat_counts: int APM.t }
125125

126126
let fp_string_output writes mode out dict fp =
127+
if APM.is_empty fp.feat_counts then
128+
() (* skip molecules which failed encoding *)
129+
else
130+
begin
127131
fprintf out "%s,0.0,[" fp.name;
128132
let feat_counts = match mode with
129133
| Output -> (* writable dict *)
@@ -159,11 +163,12 @@ let fp_string_output writes mode out dict fp =
159163
) feat_counts;
160164
fprintf out "]\n";
161165
incr writes
166+
end
162167

163168
(* unfolded counted atom pairs fingerprint encoding *)
164169
let encode_smiles_line max_dist simple_types line =
170+
let smi, name = BatString.split ~by:"\t" line in
165171
try
166-
let smi, name = BatString.split ~by:"\t" line in
167172
let mol = Rdkit.__init__ ~smi () in
168173
let n = Rdkit.get_num_atoms mol () in
169174
let typer, type2int =
@@ -191,8 +196,8 @@ let encode_smiles_line max_dist simple_types line =
191196
done;
192197
{ name; feat_counts = !fp }
193198
with Improper_atom ->
194-
let () = Log.fatal "Molenc_AP.encode_smiles_line: cannot encode: %s" line in
195-
exit 1
199+
let () = Log.error "Molenc_AP.encode_smiles_line: cannot encode: %s" line in
200+
{ name; feat_counts = APM.empty }
196201

197202
let verbose = ref false
198203

0 commit comments

Comments
 (0)