source 
fuse_smile_on_atom_mapping 
 fuse_smile_on_atom_mapping (smile:str) 
Attempts to fuse mapped SMILES into a single molecule (ie [*:1]C.[*:1]N -> CN). Returns None if fusion failed
 
smile 
str 
input SMILES string 
 
Returns str output fused SMILES string  
 
source 
 
fuse_mol_on_atom_mapping 
 fuse_mol_on_atom_mapping (mol:rdkit.Chem.rdchem.Mol) 
Attempts to fuse mapped molecules into a single molecule (ie [*:1]C.[*:1]N -> CN). Returns None if fusion failed
 
 
mol 
Chem.Mol 
input rdkit Mol 
 
Returns Union[Chem.Mol, None] output fused Mol, returns None if failed  
 
assert  fuse_smile_on_atom_mapping('[*:1]C.[*:1]N' ) ==  'CN'  
source 
 
add_fragment_mapping 
 add_fragment_mapping (smile:str, map_nums:list[int]) 
Given an unmapped fragment SMILES string and a list of mapping ints, adds mapping to SMILES.
ie add_fragment_mapping('*C*', [3,4]) -> [*:3]C[*:4]
Number of * dummy atoms should match length of map_nums
 
smile 
str 
SMILES string 
 
map_nums 
list[int] 
fragment mapping ints 
 
Returns str mapped SMILES  
 
source 
 
remove_fragment_mapping 
 remove_fragment_mapping (smile:str) 
 
smile 
str 
mapped SMILES string 
 
Returns str unmapped SMILES string  
 
assert  add_fragment_mapping('*C' , [1 ]) ==  'C[*:1]' assert  remove_fragment_mapping('C[*:1]' ) ==  '*C'  
source 
 
combine_dummies 
 combine_dummies (dummies:list[rdkit.Chem.rdchem.Mol], fuse:bool=True) 
 
dummies 
list[Chem.Mol] 
list of dummy mols 
 
fuse 
bool 
True 
if mols should be fused 
 
Returns Chem.Mol returns output mol  
 
source 
 
get_dummy_mol 
 get_dummy_mol (name:str, map_nums:list[int], id:Optional[int]=None) 
 
name 
str 
dummy name 
 
map_nums 
list[int] 
dummy mapping nums 
 
id 
Optional[int] 
None 
optional dummy ID 
 
Returns Chem.Mol returns dummy mol  
 
=  [get_dummy_mol('R1' , [1 ]), get_dummy_mol('R2' , [1 ])]assert  [to_smile(i) for  i in  dummies] ==  ['[Zr][*:1]' , '[Zr][*:1]' ]=  combine_dummies(dummies)assert  to_smile(fused) ==  '[Zr][Zr]'  
source 
 
match_mapping 
 match_mapping (molecule:chem_templates.chem.Molecule,
                mapping_idxs:list[int]) 
 
molecule 
Molecule 
input Molecule 
 
mapping_idxs 
list[int] 
mapping ints 
 
Returns bool True if mapping matches, else False  
 
source 
 
is_mapped 
 is_mapped (smile:str) 
determines mapping status by matching number of * dummy atoms with number of [*:x] mapping IDs
 
smile 
str 
SMILES string 
 
Returns bool True if mapped, else False  
 
assert  not  is_mapped('*C' )assert  is_mapped('[*:1]C' )assert  not  is_mapped('[*:1]C*' )assert  match_mapping(Molecule('[*:2]C' ), [2 ])assert  not  match_mapping(Molecule('[*:2]C' ), [1 ]) 
source 
 
generate_mapping_permutations 
 generate_mapping_permutations (smile:str, map_nums:list[int],
                                exact:bool=False) 
 
 
smile 
str 
SMILES string 
 
map_nums 
list[int] 
possible mapping ints 
 
exact 
bool 
False 
if True, number of map_nums must match number of * atoms 
 
Returns list[str] list of mapped SMILES  
 
assert  generate_mapping_permutations('*C*' , [2 ,3 ,4 ]) ==  ['C([*:2])[*:3]' ,'C([*:2])[*:4]' ,'C([*:2])[*:3]' ,'C([*:3])[*:4]' ,'C([*:2])[*:4]' ,'C([*:3])[*:4]' ] 
source 
 
match_and_map 
 match_and_map (fragment:str, mapping_idxs:list[int]) 
 
fragment 
str 
fragment SMILES 
 
mapping_idxs 
list[int] 
mapping ints 
 
Returns list[str] list of mapped SMILES  
 
assert  match_and_map('*C*' , [1 ,2 ]) ==  ['C([*:1])[*:2]' , 'C([*:1])[*:2]' ]assert  match_and_map('C([*:1])[*:2]' , [4 ,5 ]) ==  []assert  match_and_map('C([*:1])[*:2]' , [1 ,2 ]) ==  ['C([*:1])[*:2]' ] 
source 
 
shred_smiles 
 shred_smiles (smiles:list[str], cuts:list[int], max_fragment_length:int,
               generations:int, keep_long_fragments:bool, worker_pool:Opti
               onal[<boundmethodBaseContext.Poolof<multiprocessing.context
               .DefaultContextobjectat0x7fce59bc5130>>]=None) 
given a list of SMILES smiles, each SMILES string is fragmented with cuts (see fragment_smilemax_fragment_length are re-fragmented. Repeats for generations iterations. If keep_long_fragments=True, all fragments are returned. Else, only fragments shorter than max_fragment_length are returned.
keep_long_fragments=False is recommended as molecules tend to generate very large fragments (ie just cleaving off a methyl group)
source 
 
clean_fragments 
 clean_fragments (fragments:list[str], remove_mapping:bool=True) 
cleans fragments, deduplicates them, and splits multi-compound fragments
 
 
fragments 
list[str] 
list of input fragments 
 
remove_mapping 
bool 
True 
if mapping should be removed (ie [*:1]C -> *C) 
 
Returns list[str] list of cleaned fragments  
 
source 
 
fragment_smile 
 fragment_smile (smile:str, cuts:list[int]) 
 
smile 
str 
input SMILES string 
 
cuts 
list[int] 
number of cuts, ie [1,2,3] 
 
Returns list[str] list of fragments  
 
assert  fragment_smile('CCC' , [1 ,2 ]) ==  ['' , 'CC[*:1].C[*:1]' , 'C([*:1])[*:2]' , 'C[*:1].C[*:2]' ]assert  clean_fragments(fragment_smile('CCC' , [1 ,2 ])) ==  ['*C' , '*C*' , '*CC' ]