Source code for psico.fitting

'''
(c) 2011 Thomas Holder, MPI for Developmental Biology

License: BSD-2-Clause
'''

if not __name__.endswith('.fitting'):
    raise Exception("Must do 'import psico.fitting' instead of 'run ...'")

from pymol import cmd, CmdException

from .mcsalign import mcsalign  # noqa: F401

ALL_STATES = 0
CURRENT_STATE = -1

_auto_arg0_align = cmd.auto_arg[0]['align']
_auto_arg1_align = cmd.auto_arg[1]['align']


[docs] def alignwithanymethod(mobile, target, methods=None, async_=1, quiet=1, *, _self=cmd, **kwargs): ''' DESCRIPTION Align copies of mobile to target with several alignment methods ARGUMENTS mobile = string: atom selection target = string: atom selection methods = string: space separated list of PyMOL commands which take arguments "mobile" and "target" (in any order) {default: align super cealign tmalign theseus} ''' import threading import time if methods is None: methods = align_methods else: methods = methods.split() async_, quiet = int(kwargs.pop('async', async_)), int(quiet) mobile_obj = _self.get_object_list('first (' + mobile + ')')[0] def myalign(method): newmobile = _self.get_unused_name(mobile_obj + '_' + method) _self.create(newmobile, mobile_obj) start = time.time() _self.do('%s mobile=%s in %s, target=%s' % (method, newmobile, mobile, target)) if not quiet: print('Finished: %s (%.2f sec)' % (method, time.time() - start)) for method in methods: if method not in cmd.keyword: if not quiet: print('No such method:', method) continue if async_: t = threading.Thread(target=myalign, args=(method,)) t.setDaemon(1) t.start() else: myalign(method)
[docs] @cmd.extendaa(_auto_arg0_align, _auto_arg1_align) def usalign(mobile: str, target: str, mobile_state: int = 1, target_state: int = 1, *, mm: int = 1, ter: int = 0, args: str = '', exe='USalign', _self=cmd, **_kwargs): ''' DESCRIPTION USalign wrapper. See `tmalign` for details. Reference: Zhang et al., Nature Methods 2022 19: 1109-1115 https://zhanggroup.org/US-align/ ARGUMENTS mm = int: 1=multi-chain, 3=circularly permutated {default: 1} ter = int: 3=stop at the first TER record {default: 0} EXAMPLE fetch 1y39 2zjr usalign 1y39 & chain A+C, 2zjr & chain F+X, object=aln ''' args += f" -ter {ter} -mm {mm}" ter = int(ter) == 3 return tmalign(mobile, target, mobile_state, target_state, args, exe, ter, **_kwargs, _self=_self)
[docs] def tmalign(mobile, target, mobile_state=1, target_state=1, args='', exe='TMalign', ter=0, transform=1, object=None, quiet=0, *, _self=cmd): ''' DESCRIPTION TMalign wrapper. You may also use this as a TMscore or MMalign wrapper if you privide the corresponding executable with the "exe" argument. Reference: Y. Zhang and J. Skolnick, Nucl. Acids Res. 2005 33, 2302-9 http://zhanglab.ccmb.med.umich.edu/TM-align/ ARGUMENTS mobile, target = string: atom selections mobile_state, target_state = int: object states {default: 1} args = string: Extra arguments like -d0 5 -L 100 exe = string: Path to TMalign (or TMscore, MMalign, USalign) executable {default: TMalign} ter = 0/1: If ter=0, then ignore chain breaks because TMalign will stop at first TER record {default: 0} ''' import pymol.exporting import subprocess, tempfile, os, re from .exporting import save_pdb_without_ter ter, quiet = int(ter), int(quiet) mobile_filename = tempfile.mktemp('.pdb', 'mobile') target_filename = tempfile.mktemp('.pdb', 'target') matrix_filename = tempfile.mktemp('.txt', 'matrix') mobile_sele = f'({mobile}) and (not hetatm) and alt +A' target_sele = f'({target}) and (not hetatm) and alt +A' mobile_ca_sele = f'({mobile_sele}) and guide' target_ca_sele = f'({target_sele}) and guide' if ter: save = pymol.exporting.save else: save = save_pdb_without_ter save(mobile_filename, mobile_sele, state=mobile_state, _self=_self) save(target_filename, target_sele, state=target_state, _self=_self) exe = cmd.exp_path(exe) args = [exe, mobile_filename, target_filename, '-m', matrix_filename] + args.split() try: process = subprocess.Popen(args, stdout=subprocess.PIPE, universal_newlines=True) lines = process.stdout.readlines() except OSError as ex: raise CmdException( f'Cannot execute "{exe}", please provide full path to TMscore or TMalign executable' ) from ex finally: os.remove(mobile_filename) os.remove(target_filename) # TMalign >= 2012/04/17 if os.path.exists(matrix_filename): lines += open(matrix_filename).readlines() os.remove(matrix_filename) r = None re_score = re.compile(r'TM-score\s*=\s*(\d*\.\d*)') rowcount = 0 matrix = [] line_it = iter(lines) headercheck = False alignment = [""] * 3 for line in line_it: if 4 >= rowcount > 0: if rowcount >= 2: a = list(map(float, line.split())) matrix.extend(a[2:5]) matrix.append(a[1]) rowcount += 1 elif not headercheck and line.startswith(' * '): a = line.split(None, 2) if len(a) == 3: headercheck = a[1] elif line.lower().startswith(' -------- rotation matrix'): rowcount = 1 elif line.startswith('------ The rotation matrix'): rowcount = 1 elif line.startswith('(":" denotes'): alignment = [next(line_it).rstrip() for i in range(3)] else: match = re_score.search(line) if match is not None: r = float(match.group(1)) if not quiet: print(line.rstrip()) if not matrix: raise CmdException("incomplete output") if not quiet: for i in range(0, len(alignment[0]) - 1, 78): for line in alignment: print(line[i:i + 78]) print('') assert len(matrix) == 3 * 4 matrix.extend([0, 0, 0, 1]) if int(transform): for model in _self.get_object_list(mobile): _self.transform_object(model, matrix, state=0, homogenous=1) # alignment object if object is not None: mobile_idx, target_idx = [], [] space = {'mobile_idx': mobile_idx, 'target_idx': target_idx} _self.iterate_state(mobile_state, mobile_ca_sele, 'mobile_idx.append("%s`%d" % (model, index))', space=space) _self.iterate_state(target_state, target_ca_sele, 'target_idx.append("%s`%d" % (model, index))', space=space) for i, aa in enumerate(alignment[0]): if aa in ('-', '*'): mobile_idx.insert(i, None) for i, aa in enumerate(alignment[2]): if aa in ('-', '*'): target_idx.insert(i, None) if (len(mobile_idx) == len(target_idx) == len(alignment[2])): _self.rms_cur( ' '.join(idx for (idx, m) in zip(mobile_idx, alignment[1]) if m in ':.'), ' '.join(idx for (idx, m) in zip(target_idx, alignment[1]) if m in ':.'), cycles=0, matchmaker=4, object=object) else: print('Could not load alignment object') if not quiet: if headercheck: print('Finished Program:', headercheck) if r is not None: print('Found in output TM-score = %.4f' % (r)) return r
[docs] def dyndom_parse_info(filename, selection='(all)', quiet=0, *, _self=cmd): import re fixed = False fixed_name = None dom_nr = 0 color = 'none' bending = list() for line in open(filename): if line.startswith('FIXED DOMAIN'): fixed = True continue if line.startswith('MOVING DOMAIN'): fixed = False continue m = re.match(r'DOMAIN NUMBER: *(\d+) \(coloured (\w+)', line) if m: dom_nr = m.group(1) color = m.group(2) continue m = re.match(r'RESIDUE NUMBERS :(.*)', line) if m: resi = m.group(1) resi = resi.replace(',', '+') resi = resi.replace(' ', '') if not quiet: print('Domain ' + dom_nr + ' (' + color + '): resi ' + resi) name = 'domain_' + dom_nr _self.select(name, '(%s) and (resi %s)' % (selection, resi), 0) _self.color(color, name) if fixed: fixed_name = name continue m = re.match(r'BENDING RESIDUES:(.*)', line) if m: resi = m.group(1) resi = resi.replace(',', '+') resi = resi.replace(' ', '') bending.append(resi) if len(bending) > 0: name = 'bending' _self.select(name, '(%s) and (resi %s)' % (selection, '+'.join(bending)), 0) _self.color('green', name) return fixed_name
[docs] def dyndom(mobile, target, window=5, domain=20, ratio=1.0, exe='', transform=1, quiet=1, mobile_state=1, target_state=1, match='align', preserve=0, *, _self=cmd): ''' DESCRIPTION DynDom wrapper DynDom is a program to determine domains, hinge axes and hinge bending residues in proteins where two conformations are available. http://fizz.cmp.uea.ac.uk/dyndom/ USAGE dyndom mobile, target [, window [, domain [, ratio ]]] ''' import tempfile, subprocess, os, shutil, sys from .exporting import save_pdb_without_ter window, domain, ratio = int(window), int(domain), float(ratio) transform, quiet = int(transform), int(quiet) mobile_state, target_state = int(mobile_state), int(target_state) mm = MatchMaker( '(%s) & polymer & state %d' % (mobile, mobile_state), '(%s) & polymer & state %d' % (target, target_state), match, _self=_self) chains = _self.get_chains(mm.mobile) if len(chains) != 1: raise CmdException('mobile selection must be single chain') chain1id = chains[0] chains = _self.get_chains(mm.target) if len(chains) != 1: raise CmdException('target selection must be single chain') chain2id = chains[0] if not exe: from . import which exe = which('DynDom', 'dyndom') if not exe: raise CmdException('Cannot find DynDom executable') else: exe = cmd.exp_path(exe) tempdir = tempfile.mkdtemp() try: filename1 = os.path.join(tempdir, 'mobile.pdb') filename2 = os.path.join(tempdir, 'target.pdb') commandfile = os.path.join(tempdir, 'command.txt') infofile = os.path.join(tempdir, 'out_info') save_pdb_without_ter(filename1, mm.mobile, state=mobile_state, _self=_self) save_pdb_without_ter(filename2, mm.target, state=target_state, _self=_self) f = open(commandfile, 'w') f.write('title=out\nfilename1=%s\nchain1id=%s\nfilename2=%s\nchain2id=%s\n' 'window=%d\ndomain=%d\nratio=%.4f\n' % (filename1, chain1id, filename2, chain2id, window, domain, ratio)) f.close() process = subprocess.Popen([exe, commandfile], cwd=tempdir, universal_newlines=True, stderr=subprocess.STDOUT, stdout=subprocess.PIPE) for line in process.stdout: if not quiet: sys.stdout.write(line) if process.poll() != 0: raise CmdException('"%s" failed with status %d' % (exe, process.returncode)) _self.color('gray', mobile) fixed_name = dyndom_parse_info(infofile, mm.mobile, quiet) except OSError as ex: raise CmdException( f'Cannot execute "{exe}", please provide full path to DynDom executable' ) from ex finally: if not int(preserve): shutil.rmtree(tempdir) elif not quiet: print(' Not deleting temporary directory:', tempdir) if transform and fixed_name is not None: _self.align(fixed_name, target)
[docs] def gdt_ts(mobile, target, cutoffs='1 2 4 8', quiet=1, *, _self=cmd): ''' DESCRIPTION Global Distance Test Total Score (GDT_TS) ''' cutoffs = list(map(float, cutoffs.split())) quiet = int(quiet) mobile = '(' + mobile + ') and guide' target = '(' + target + ') and guide' ts = 0 N = min(_self.count_atoms(mobile), _self.count_atoms(target)) for cutoff in cutoffs: x = _self.align(mobile, target, cutoff=cutoff, transform=0) p = float(x[1]) / N if not quiet: print(' GDT_TS: GDT_P%.1f = %.2f' % (cutoff, p)) ts += p ts /= len(cutoffs) if not quiet: print(' GDT_TS: Total Score = %.2f' % (ts)) return ts
[docs] def get_rmsd_func(): ''' DESCRIPTION API only. Returns a function that uses either numpy (fast) or chempy.cpv (slow) to calculate the rmsd fit of two nx3 arrays. ''' try: # this is much faster than cpv.fit from numpy import dot, sqrt, array from numpy.linalg import svd def rmsd(X, Y): X = X - X.mean(0) Y = Y - Y.mean(0) R_x = (X**2).sum() R_y = (Y**2).sum() L = svd(dot(Y.T, X))[1] return sqrt((R_x + R_y - 2 * L.sum()) / len(X)) rmsd.array = array except ImportError: from chempy import cpv def rmsd(X, Y): return cpv.fit(X, Y)[-1] rmsd.array = lambda x: x return rmsd
[docs] class MatchMaker(object): ''' DESCRIPTION API only. Matches two atom selections and provides two matched subselections with equal atom count. May involve temporary objects or named selections which will be automatically deleted. ARGUMENTS mobile = string: first atom selection target = string: second atom selection match = string: method how to match atoms * none: (dummy) * in: match atoms by "in" operator * like: match atoms by "like" operator * align: match atoms by cmd.align (without refinement) * super: match atoms by cmd.super (without refinement) * <name of alignment object>: use given alignment RESULT Properties "mobile" and "target" hold the matched subselections as selection strings. ''' def __init__(self, mobile, target, match, *, autodelete=True, _self=cmd): self._self = _self self.autodelete = autodelete self.temporary = [] if match == 'none': self.mobile = mobile self.target = target elif match in ['in', 'like']: self.mobile = '(%s) %s (%s)' % (mobile, match, target) self.target = '(%s) %s (%s)' % (target, match, mobile) elif match in ['align', 'super']: self.align(mobile, target, match) elif match in _self.get_names('all') and _self.get_type(match) in ('object:', 'object:alignment'): self.from_alignment(mobile, target, match) else: raise CmdException('unkown match method', match)
[docs] def check(self): return self._self.count_atoms(self.mobile) == self._self.count_atoms(self.target)
[docs] def align(self, mobile, target, match): ''' Align mobile to target using the alignment method given by "match" ''' aln_obj = self._self.get_unused_name('_') self.temporary.append(aln_obj) align = cmd.keyword[match][0] align(mobile, target, cycles=0, transform=0, object=aln_obj, _self=self._self) self._self.disable(aln_obj) self.from_alignment(mobile, target, aln_obj)
[docs] def from_alignment(self, mobile, target, aln_obj): ''' Use alignment given by "aln_obj" (name of alignment object) ''' from .selecting import wait_for wait_for(aln_obj, _self=self._self) self.mobile = '(%s) and %s' % (mobile, aln_obj) self.target = '(%s) and %s' % (target, aln_obj) if self.check(): return # difficult: if selections spans only part of the alignment or # if alignment object covers more than the two objects, then we # need to pick those columns that have no gap in any of the two # given selections mobileidx = set(self._self.index(mobile)) targetidx = set(self._self.index(target)) mobileidxsel = [] targetidxsel = [] for column in self._self.get_raw_alignment(aln_obj): mobiles = mobileidx.intersection(column) if len(mobiles) == 1: targets = targetidx.intersection(column) if len(targets) == 1: mobileidxsel.extend(mobiles) targetidxsel.extend(targets) self.mobile = self._self.get_unused_name('_mobile') self.target = self._self.get_unused_name('_target') self.temporary.append(self.mobile) self.temporary.append(self.target) mobile_objects = set(idx[0] for idx in mobileidxsel) target_objects = set(idx[0] for idx in targetidxsel) if len(mobile_objects) == len(target_objects) == 1: mobile_index_list = [idx[1] for idx in mobileidxsel] target_index_list = [idx[1] for idx in targetidxsel] self._self.select_list(self.mobile, mobile_objects.pop(), mobile_index_list, mode='index') self._self.select_list(self.target, target_objects.pop(), target_index_list, mode='index') else: self._self.select(self.mobile, ' '.join('%s`%d' % idx for idx in mobileidxsel)) self._self.select(self.target, ' '.join('%s`%d' % idx for idx in targetidxsel))
def __enter__(self): return self def __exit__(self, type_, value, traceback): self._cleanup() self.autodelete = False def __del__(self): self._cleanup() def _cleanup(self): if not self.autodelete: return for name in self.temporary: self._self.delete(name)
[docs] def local_rms(mobile, target, window=20, mobile_state=1, target_state=1, match='align', load_b=1, visualize=1, quiet=1, *, _self=cmd): ''' DESCRIPTION "local_rms" computes the C-alpha RMS fit within a sliding window along the backbone. The obtained RMS is assigned as a pseudo b-factor to the residue in the middle of the window. This is useful to visualize hinge-regions. The result is very sensitive to window size. USAGE local_rms mobile, target [, window ] ARGUMENTS mobile = string: object to assign b-factors and to visualize as putty cartoon target = string: object to superimpose mobile to window = integer: width of sliding window {default: 20} match = string: in, like, align, none or the name of an alignment object {default: align} * in: match all atom identifiers (segi,chain,resn,resi,name) * like: match residue number (resi) * align: do a sequence alignment * none: assume same number of atoms in both selections * name of alignment object: take sequence alignment from object EXAMPLE fetch 2x19 2xwu, async=0 remove not chain B or not polymer local_rms 2x19, 2xwu, 40 ''' rmsd = get_rmsd_func() array = rmsd.array window = int(window) mobile_state, target_state = int(mobile_state), int(target_state) load_b, visualize, quiet = int(load_b), int(visualize), int(quiet) w2 = window // 2 w4 = window // 4 mm = MatchMaker('(%s) and guide' % (mobile), '(%s) and guide' % (target), match, _self=_self) model_mobile = _self.get_model(mm.mobile, mobile_state) model_target = _self.get_model(mm.target, target_state) if len(model_mobile.atom) != len(model_mobile.atom): raise CmdException('number of atoms differ, please check match method') seq_start = model_mobile.atom[0].resi_number seq_end = model_mobile.atom[-1].resi_number resv2i = dict((a.resi_number, i) for (i, a) in enumerate(model_mobile.atom)) resv2b = dict() X_mobile = array(model_mobile.get_coord_list()) X_target = array(model_target.get_coord_list()) for resv in range(seq_start, seq_end + 1): for resv_from in range(resv - w2, resv + 1): i_from = resv2i.get(resv_from) if i_from is not None: break for resv_to in range(resv + w2, resv - 1, -1): i_to = resv2i.get(resv_to) if i_to is not None: break if i_from is None or i_to is None: continue if i_to - i_from < w4: continue x = X_mobile[i_from:i_to + 1] y = X_target[i_from:i_to + 1] resv2b[resv] = rmsd(x, y) if not quiet: print(' resi %4d: RMS = %6.3f (%4d atoms)' % (resv, resv2b[resv], i_to - i_from + 1)) if load_b: _self.alter(mobile, 'b=resv2b.get(resv, -1.0)', space={'resv2b': resv2b}) if load_b and visualize: _self.color('yellow', '(%s) and b < -0.5' % (mobile)) _self.spectrum('b', 'blue_white_red', '(%s) and b > -0.5' % (mobile)) _self.show_as('cartoon', mobile) _self.hide('cartoon', '(%s) and b < -0.5' % (mobile)) _self.cartoon('putty', mobile) return resv2b
[docs] def extra_fit(selection='(all)', reference=None, method='align', zoom=1, quiet=0, _self=cmd, **kwargs): ''' DESCRIPTION Like "intra_fit", but for multiple objects instead of multiple states. ARGUMENTS selection = string: atom selection of multiple objects {default: all} reference = string: reference object name {default: first object in selection} method = string: alignment method (command that takes "mobile" and "target" arguments, like "align", "super", "cealign" {default: align} ... extra arguments are passed to "method" SEE ALSO alignto, cmd.util.mass_align, align_all.py from Robert Campbell ''' zoom, quiet = int(zoom), int(quiet) sele_name = _self.get_unused_name('_') _self.select(sele_name, selection) # for speed models = _self.get_object_list(sele_name) if reference is None: reference = models[0] models = models[1:] elif reference in models: models.remove(reference) else: _self.select(sele_name, reference, merge=1) if isinstance(method, str): if method in cmd.keyword: method = cmd.keyword[method][0] else: raise CmdException('Unknown method: ' + str(method)) for model in models: x = method(mobile='%s and model %s' % (sele_name, model), target='%s and model %s' % (sele_name, reference), **kwargs) if not quiet: if isinstance(x, (list, tuple)): print('%-20s RMS = %8.3f (%d atoms)' % (model, x[0], x[1])) elif isinstance(x, float): print('%-20s RMS = %8.3f' % (model, x)) elif isinstance(x, dict) and 'RMSD' in x: natoms = x.get('alignment_length', 0) suffix = (' (%s atoms)' % natoms) if natoms else '' print('%-20s RMS = %8.3f' % (model, x['RMSD']) + suffix) else: print('%-20s' % (model,)) if zoom: _self.zoom(sele_name) _self.delete(sele_name)
def _run_theseus(args, tempdir, preserve, quiet): ''' DESCRIPTION Helper function for theseus and intra_theseus ''' import subprocess, os translations = [] rotations = [] t_type = float try: if quiet: subprocess.call(args, cwd=tempdir) else: import re unesc = re.compile('\x1b' + r'\[[\d;]+m').sub process = subprocess.Popen(args, cwd=tempdir, stdout=subprocess.PIPE, universal_newlines=True) for line in process.stdout: print(unesc('', line.rstrip())) filename = os.path.join(tempdir, 'theseus_transf2.txt') if not os.path.exists(filename): # THESEUS 3.x filename = os.path.join(tempdir, 'theseus_transf.txt') if not os.path.exists(filename): raise CmdException('no theseus_transf2.txt or ' 'theseus_transf.txt output file') def t_type(t): return float(t) * -1. handle = open(filename) for line in handle: if line[10:13] == ' t:': translations.append(list(map(t_type, line[13:].split()))) elif line[10:13] == ' R:': rotations.append(list(map(float, line[13:].split()))) handle.close() except OSError as ex: raise CmdException('Cannot execute "%s"' % (args[0])) from ex finally: if not preserve: import shutil shutil.rmtree(tempdir) elif not quiet: print(' Not deleting temporary directory:', tempdir) return translations, rotations
[docs] def theseus(mobile, target, match='align', cov=0, cycles=200, mobile_state=1, target_state=1, exe='theseus', preserve=0, quiet=1, *, _self=cmd): ''' DESCRIPTION Structural superposition of two molecules with maximum likelihood. THESEUS: Maximum likelihood multiple superpositioning http://www.theseus3d.org ARGUMENTS mobile = string: atom selection for mobile atoms target = string: atom selection for target atoms match = string: in, like, align, none or the name of an alignment object (see "local_rms" help for details) {default: align} cov = 0/1: 0 is variance weighting, 1 is covariance weighting (slower) {default: 0} SEE ALSO align, super, cealign ''' import tempfile, os cov, cycles = int(cov), int(cycles) mobile_state, target_state = int(mobile_state), int(target_state) preserve, quiet = int(preserve), int(quiet) tempdir = tempfile.mkdtemp() mobile_filename = os.path.join(tempdir, 'mobile.pdb') target_filename = os.path.join(tempdir, 'target.pdb') mm = MatchMaker(mobile, target, match, _self=_self) _self.save(mobile_filename, mm.mobile, mobile_state) _self.save(target_filename, mm.target, target_state) exe = cmd.exp_path(exe) args = [exe, '-a0', '-c' if cov else '-v', '-i%d' % cycles, mobile_filename, target_filename] translations, rotations = _run_theseus(args, tempdir, preserve, quiet) matrices = [R[0:3] + [i * t[0]] + R[3:6] + [i * t[1]] + R[6:9] + [i * t[2], 0, 0, 0, 1] for (R, t, i) in zip(rotations, translations, [-1, 1])] obj_list = _self.get_object_list(mobile) for obj in obj_list: _self.transform_object(obj, matrices[0], 0, transpose=1) _self.transform_object(obj, matrices[1], 0) if not quiet: print(' theseus: done')
[docs] def intra_theseus(selection, state=1, cov=0, cycles=200, exe='theseus', preserve=0, quiet=1, *, _self=cmd): ''' DESCRIPTION Fits all states of an object to an atom selection with maximum likelihood. THESEUS: Maximum likelihood multiple superpositioning http://www.theseus3d.org ARGUMENTS selection = string: atoms to fit state = integer: keep transformation of this state unchanged {default: 1} cov = 0/1: 0 is variance weighting, 1 is covariance weighting (slower) {default: 0} SEE ALSO intra_fit, intra_rms_cur ''' import tempfile, os state, cov, cycles = int(state), int(cov), int(cycles) preserve, quiet = int(preserve), int(quiet) tempdir = tempfile.mkdtemp() filename = os.path.join(tempdir, 'mobile.pdb') _self.save(filename, selection, 0) exe = cmd.exp_path(exe) args = [exe, '-a0', '-c' if cov else '-v', '-i%d' % cycles, filename] translations = [] rotations = [] translations, rotations = _run_theseus(args, tempdir, preserve, quiet) matrices = [R[0:3] + [-t[0]] + R[3:6] + [-t[1]] + R[6:9] + [-t[2], 0, 0, 0, 1] for (R, t) in zip(rotations, translations)] # intra fit states obj_list = _self.get_object_list(selection) for i, m in enumerate(matrices): for obj in obj_list: _self.transform_object(obj, m, i + 1, transpose=1) # fit back to given state if 0 < state <= len(matrices): m = list(matrices[state - 1]) for i in [3, 7, 11]: m[i] *= -1 for obj in obj_list: _self.transform_object(obj, m, 0) if not quiet: print(' intra_theseus: %d states aligned' % (len(matrices)))
[docs] def prosmart(mobile, target, mobile_state=1, target_state=1, exe='prosmart', transform=1, object=None, quiet=0, *, _self=cmd): ''' DESCRIPTION ProSMART wrapper. http://www2.mrc-lmb.cam.ac.uk/groups/murshudov/ ''' import subprocess, tempfile, os, shutil, glob quiet = int(quiet) tempdir = tempfile.mkdtemp() mobile_filename = os.path.join(tempdir, 'mobile.pdb') target_filename = os.path.join(tempdir, 'target.pdb') _self.save(mobile_filename, mobile, state=mobile_state) _self.save(target_filename, target, state=target_state) exe = cmd.exp_path(exe) args = [exe, '-p1', mobile_filename, '-p2', target_filename, '-a'] def xglob(x): return glob.glob( os.path.join(tempdir, 'ProSMART_Output/Output_Files', x)) try: subprocess.check_call(args, cwd=tempdir) transfiles = xglob('Superposition/Transformations/*/*.txt') with open(transfiles[0]) as f: f = iter(f) for line in f: if line.startswith('ROTATION'): matrix = [list(map(float, next(f).split())) + [0] for _ in range(3)] elif line.startswith('TRANSLATION'): matrix.append([-float(v) for v in next(f).split()] + [1]) break if int(transform): matrix = [v for m in matrix for v in m] assert len(matrix) == 4 * 4 for model in _self.get_object_list(mobile): _self.transform_object(model, matrix, state=0) if object: from .importing import load_aln alnfiles = xglob('Residue_Alignment_Scores/*/*.txt') alnfiles = [x for x in alnfiles if not x.endswith('_clusters.txt')] load_aln(alnfiles[0], object, mobile, target, _self=_self) except OSError as ex: raise CmdException( f'Cannot execute "{exe}", please provide full path to prosmart executable' ) from ex finally: shutil.rmtree(tempdir) if not quiet: print(' prosmart: done')
def _bfit_get_prior(distribution, em=0): from csb.statistics import scalemixture as sm if distribution == 'student': prior = sm.GammaPrior() if em: prior.estimator = sm.GammaPosteriorMAP() elif distribution == 'k': prior = sm.InvGammaPrior() if em: prior.estimator = sm.InvGammaPosteriorMAP() else: raise AttributeError('distribution') return prior
[docs] def xfit(mobile, target, mobile_state=-1, target_state=-1, load_b=0, cycles=10, match='align', guide=1, seed=0, quiet=1, bfit=0, distribution='student', _self=cmd): ''' DESCRIPTION Weighted superposition of the model in the first selection on to the model in the second selection. The weights are estimated with maximum likelihood. The result should be very similar to "theseus". Requires CSB, https://github.com/csb-toolbox/CSB ARGUMENTS mobile = string: atom selection target = string: atom selection mobile_state = int: object state of mobile selection {default: current} target_state = int: object state of target selection {default: current} load_b = 0 or 1: save -log(weights) into B-factor column {default: 0} SEE ALSO intra_xfit, align, super, fit, cealign, theseus ''' from numpy import asarray, identity, log, dot, zeros from csb.bio.utils import distance_sq, wfit, fit from . import querying cycles, quiet = int(cycles), int(quiet) mobile_state, target_state = int(mobile_state), int(target_state) mobile_obj = querying.get_object_name(mobile, 1, _self=_self) if mobile_state < 1: mobile_state = querying.get_object_state(mobile_obj, _self=_self) if target_state < 1: target_state = querying.get_selection_state(target, _self=_self) if int(guide): mobile = '(%s) and guide' % (mobile) target = '(%s) and guide' % (target) mm = MatchMaker(mobile, target, match, _self=_self) Y = asarray(_self.get_coords(mm.mobile, mobile_state)) X = asarray(_self.get_coords(mm.target, target_state)) if int(seed): R, t = identity(3), zeros(3) else: R, t = fit(X, Y) if int(bfit): # adapted from csb.apps.bfit from csb.bio.utils import distance, probabilistic_fit from csb.statistics.scalemixture import ScaleMixture mixture = ScaleMixture(scales=X.shape[0], prior=_bfit_get_prior(distribution), d=3) for _ in range(cycles): data = distance(Y, dot(X - t, R)) mixture.estimate(data) R, t = probabilistic_fit(X, Y, mixture.scales) scales = mixture.scales else: for _ in range(cycles): data = distance_sq(Y, dot(X - t, R)) scales = 1.0 / data.clip(1e-3) R, t = wfit(X, Y, scales) m = identity(4) m[0:3, 0:3] = R m[0:3, 3] = t _self.transform_object(mobile_obj, list(m.flat)) if int(load_b): b_iter = iter(-log(scales)) _self.alter(mm.mobile, 'b = next(b_iter)', space={'b_iter': b_iter, 'next': next}) if not quiet: print(' xfit: %d atoms aligned' % (len(X)))
[docs] def intra_xfit(selection, load_b=0, cycles=20, guide=1, seed=0, quiet=1, bfit=0, distribution='student', _self=cmd): ''' DESCRIPTION Weighted superposition of all states of an object to the intermediate structure over all states. The weights are estimated with maximum likelihood. The result should be very similar to "intra_theseus". Requires CSB, https://github.com/csb-toolbox/CSB ARGUMENTS selection = string: atom selection load_b = 0 or 1: save -log(weights) into B-factor column {default: 0} NOTE Assumes all states to have identical number of CA-atoms. SEE ALSO xfit, intra_fit, intra_theseus ''' from numpy import asarray, identity, log, dot, zeros from csb.bio.utils import wfit, fit from .querying import get_ensemble_coords cycles, quiet = int(cycles), int(quiet) if int(guide): selection = '(%s) and guide' % (selection) mobile_objs = _self.get_object_list(selection) n_states_objs = [] X = [] for obj in mobile_objs: X_obj = get_ensemble_coords('({}) & {}'.format(selection, obj), _self=_self) if X and len(X_obj) and len(X[0]) != len(X_obj[0]): raise CmdException('objects have different number of atoms') X.extend(X_obj) n_states_objs.append(len(X_obj)) n_models = len(X) X = asarray(X) R, t = [identity(3)] * n_models, [zeros(3)] * n_models if int(bfit): # adapted from csb.apps.bfite from csb.bio.utils import average_structure, distance from csb.statistics.scalemixture import ScaleMixture average = average_structure(X) mixture = ScaleMixture(scales=X.shape[1], prior=_bfit_get_prior(distribution), d=3) for i in range(n_models): R[i], t[i] = fit(X[i], average) for _ in range(cycles): data = asarray([distance(average, dot(X[i] - t[i], R[i])) for i in range(n_models)]) mixture.estimate(data.T) for i in range(n_models): R[i], t[i] = wfit(X[i], average, mixture.scales) scales = mixture.scales else: if int(seed): ensemble = X else: ensemble = [] for i in range(n_models): R[i], t[i] = fit(X[i], X[0]) ensemble.append(dot(X[i] - t[i], R[i])) for _ in range(cycles): ensemble = asarray(ensemble) average = ensemble.mean(0) data = ensemble.var(0).sum(1) scales = 1.0 / data.clip(1e-3) ensemble = [] for i in range(n_models): R[i], t[i] = wfit(X[i], average, scales) ensemble.append(dot(X[i] - t[i], R[i])) m = identity(4) back = identity(4) back[0:3, 0:3] = R[0] back[0:3, 3] = t[0] transformation_i = 0 for mobile_obj, n_states in zip(mobile_objs, n_states_objs): for state_i in range(n_states): m[0:3, 0:3] = R[transformation_i].T m[3, 0:3] = -t[transformation_i] _self.transform_object(mobile_obj, list(m.flat), state=state_i + 1) transformation_i += 1 # fit back to first state _self.transform_object(mobile_obj, list(back.flat), state=0) if int(load_b): b_iter = iter(-log(scales)) _self.alter('({}) & {} & state 1'.format(selection, mobile_obj), 'b = next(b_iter)', space={'b_iter': b_iter, 'next': next}) if not quiet: print(' intra_xfit: %d atoms in %d states aligned' % (len(X[0]), n_models))
[docs] def promix(mobile, target, K=0, prefix=None, mobile_state=-1, target_state=-1, match='align', guide=1, quiet=1, async_=-1, _self=cmd, **kwargs): ''' DESCRIPTION Finds rigid segments in two objects with different conformation. Requires CSB, https://github.com/csb-toolbox/CSB ARGUMENTS mobile, target = string: atom selections K = integer: Number of segments {default: guess} prefix = string: Prefix of named segment selections to make SEE ALSO intra_promix REFERENCE Mixture models for protein structure ensembles Hirsch M, Habeck M. - Bioinformatics. 2008 Oct 1;24(19):2184-92 ''' from numpy import asarray from csb.statistics.mixtures import SegmentMixture as Mixture # noqa: F401 imported but unused from .querying import get_object_name K, guide, quiet = int(K), int(guide), int(quiet) async_ = int(kwargs.pop('async', async_)) mobile_state, target_state = int(mobile_state), int(target_state) if async_ < 0: async_ = not quiet if isinstance(target, str) and target.isdigit() and \ _self.count_atoms('?' + target) == 0 and _self.count_states(mobile) > 1: print(' Warning: sanity test suggest you want "intra_promix"') return intra_promix(mobile, target, prefix, 0, guide, quiet, async_) if guide: mobile = '(%s) and guide' % (mobile) target = '(%s) and guide' % (target) _self.color('gray', mobile) obj = get_object_name(mobile, _self=_self) mm = MatchMaker(mobile, target, match, _self=_self) selection = mm.mobile X = asarray([ _self.get_coords(mm.mobile, mobile_state), _self.get_coords(mm.target, target_state), ]) if not async_: _promix(**locals()) else: import threading t = threading.Thread(target=_promix, kwargs=locals()) t.setDaemon(1) t.start()
[docs] def intra_promix(selection, K=0, prefix=None, conformers=0, guide=1, quiet=1, async_=-1, _self=cmd, **kwargs): ''' DESCRIPTION Finds rigid segments in a multi-state object. Requires CSB, https://github.com/csb-toolbox/CSB ARGUMENTS selection = string: atom selection K = integer: Number of segments {default: guess} prefix = string: Prefix of named segment selections to make SEE ALSO promix REFERENCE Mixture models for protein structure ensembles Hirsch M, Habeck M. - Bioinformatics. 2008 Oct 1;24(19):2184-92 ''' from numpy import asarray from csb.statistics import mixtures from .querying import get_ensemble_coords, get_object_name K, conformers = int(K), int(conformers) guide, quiet, async_ = int(guide), int(quiet), int(kwargs.pop('async', async_)) if async_ < 0: async_ = not quiet Mixture = mixtures.ConformerMixture if conformers else mixtures.SegmentMixture obj = get_object_name(selection, _self=_self) n_models = _self.count_states(obj) if guide: selection = '(%s) and guide' % (selection) if n_models < 2: raise CmdException('object needs multiple states') X = asarray(get_ensemble_coords(selection, _self=_self)) assert X.shape == (n_models, _self.count_atoms(selection), 3) if not async_: _promix(**locals()) else: import threading t = threading.Thread(target=_promix, kwargs=locals()) t.setDaemon(1) t.start()
def _promix(conformers=0, prefix=None, obj=NotImplemented, selection=NotImplemented, X=NotImplemented, K=NotImplemented, Mixture=NotImplemented, _self=cmd, **_): if not prefix: if conformers: prefix = obj + '_conformer' else: prefix = obj + '_segment' _self.delete(prefix + '_*') id_list = [] _self.iterate(selection, 'id_list.append(ID)', space=locals()) mixture = Mixture.new(X, K) membership = mixture.membership if conformers: states_list = [0] * mixture.K for (i, k) in enumerate(membership): states_list[k] += 1 name = '%s_%d' % (prefix, k + 1) _self.create(name, obj, i + 1, states_list[k]) else: _self.color('gray', selection) for k in range(mixture.K): name = '%s_%d' % (prefix, k + 1) id_list_k = [i for (i, m) in zip(id_list, membership) if m == k] _self.select_list(name, obj, id_list_k) _self.disable(name) _self.color(k + 2, name) for k, (sigma, w) in enumerate(zip(mixture.sigma, mixture.w)): print(' %s_%d: sigma = %6.3f, w = %.3f' % (prefix, k + 1, sigma, w)) print(' BIC: %.2f' % (mixture.BIC)) print(' Log Likelihood: %.2f' % (mixture.log_likelihood))
[docs] def intra_boxfit(selection="polymer", center=(0.5, 0.5, 0.5), *, _self=cmd): """ DESCRIPTION Center selection in simulation box. ARGUMENTS selection = str: atom selection to center {default: polymer} center = list-of-3-floats: Target position in fractional space {default: [0.5, 0.5, 0.5]} """ from numpy import dot from .xtal import cellbasis if isinstance(center, str): center = _self.safe_list_eval(center) objects = _self.get_object_list(selection) for state in range(1, _self.count_states(selection) + 1): selecenter = _self.get_coords(selection, state).mean(0) for obj in objects: sym = _self.get_symmetry(obj, state) if not sym: raise CmdException("no symmetry") basis = cellbasis(sym[3:6], sym[0:3])[:3, :3] cset = _self.get_coordset(obj, state, copy=0) cset += dot(basis, center) - selecenter _self.rebuild(selection)
[docs] @cmd.extend def intra_center(selection: str = "polymer", state: int = CURRENT_STATE, *, _self=cmd): """ DESCRIPTION Center frames on a selection. Like intra_fit, but without rotation. ARGUMENTS selection = str: atom selection to center {default: polymer} state = int: reference state {default: current state} """ objects = _self.get_object_list(selection) center = _self.get_coords(selection, state).mean(0) for state in range(1, _self.count_states(selection) + 1): offset = center - _self.get_coords(selection, state).mean(0) offset = offset.tolist() for obj in objects: _self.translate(offset, 'none', state, camera=0, object=obj, object_mode=1)
# all those have kwargs: mobile, target, mobile_state, target_state align_methods = ['align', 'super', 'cealign', 'tmalign', 'theseus', 'prosmart', 'xfit', 'mcsalign'] align_methods_sc = cmd.Shortcut(align_methods) # pymol commands cmd.extend('alignwithanymethod', alignwithanymethod) cmd.extend('tmalign', tmalign) cmd.extend('dyndom', dyndom) cmd.extend('gdt_ts', gdt_ts) cmd.extend('local_rms', local_rms) if 'extra_fit' not in cmd.keyword: cmd.extend('extra_fit', extra_fit) cmd.extend('intra_theseus', intra_theseus) cmd.extend('theseus', theseus) cmd.extend('prosmart', prosmart) cmd.extend('xfit', xfit) cmd.extend('intra_xfit', intra_xfit) cmd.extend('promix', promix) cmd.extend('intra_promix', intra_promix) cmd.extend('intra_boxfit', intra_boxfit) # autocompletion cmd.auto_arg[0].update([ ('alignwithanymethod', _auto_arg0_align), ('tmalign', _auto_arg0_align), ('dyndom', _auto_arg0_align), ('gdt_ts', _auto_arg0_align), ('local_rms', _auto_arg0_align), ('extra_fit', _auto_arg0_align), ('theseus', _auto_arg0_align), ('intra_theseus', _auto_arg1_align), ('prosmart', _auto_arg0_align), ('xfit', _auto_arg0_align), ('intra_xfit', _auto_arg0_align), ('promix', _auto_arg0_align), ('intra_promix', _auto_arg0_align), ('intra_boxfit', _auto_arg1_align), ('intra_center', _auto_arg1_align), ]) cmd.auto_arg[1].update([ ('alignwithanymethod', _auto_arg1_align), ('tmalign', _auto_arg1_align), ('dyndom', _auto_arg1_align), ('gdt_ts', _auto_arg1_align), ('local_rms', _auto_arg1_align), ('extra_fit', cmd.auto_arg[0]['disable']), ('theseus', _auto_arg1_align), ('prosmart', _auto_arg1_align), ('xfit', _auto_arg1_align), ('promix', _auto_arg0_align), ]) cmd.auto_arg[2].update([ ('extra_fit', [align_methods_sc, 'alignment method', '']), ]) # vi: ts=4:sw=4:smarttab:expandtab