Hi Beth,
Here is the macro that should be able do that and creates a simple table (sorry quite a few lines).
Tip: use filter table (FT) option to get info that you need and you can also hide some columns.
Any questions let us know.
BW,
Eliza
"""
Compare two chemical shift lists by walking the NmrChain/NmrAtom hierarchy, flagging atoms
with shifts in both lists and reporting whether the ppm difference stays within a user-defined tolerance.
The results are exported to a project DataTable for further review.
"""
from __future__ import annotations
from collections import OrderedDict
from datetime import datetime
import re
import pandas as pd
from ccpn.ui.gui.popups.Dialog import CcpnDialogMainWidget
from ccpn.ui.gui.widgets.PulldownListsForObjects import ChemicalShiftListPulldown, NmrChainPulldown
from ccpn.ui.gui.widgets.Label import Label
from ccpn.ui.gui.widgets import Entry
from ccpn.ui.gui.widgets import CheckBox
from ccpn.ui.gui.widgets.MessageDialog import showWarning, showMessage
class CompareChemicalShiftLists(CcpnDialogMainWidget):
title = 'Compare Chemical Shift Lists'
def __init__(self, parent=None, mainWindow=None, title=title, **kwds):
super().__init__(parent, setLayout=True, windowTitle=title, size=(520, 270), **kwds)
if mainWindow:
self.mainWindow = mainWindow
self.application = mainWindow.application
self.current = self.application.current
self.project = mainWindow.project
else:
self.mainWindow = None
self.application = None
self.current = None
self.project = None
self._createWidgets()
self.setOkButton(text='Compare', callback=self._compareLists,
tipText='Create a DataTable summarising the chemical shift differences')
self.setCancelButton(callback=self.reject)
self.setCloseButton(callback=self.reject, tipText='Close')
self.setDefaultButton(CcpnDialogMainWidget.CLOSEBUTTON)
def _createWidgets(self):
row = 0
Label(self.mainWidget, text='Reference list:', grid=(row, 0), hAlign='right')
self.referenceList = ChemicalShiftListPulldown(self.mainWidget,
mainWindow=self.mainWindow,
grid=(row, 1),
showSelectName=True,
labelText='')
row += 1
Label(self.mainWidget, text='Comparison list:', grid=(row, 0), hAlign='right')
self.comparisonList = ChemicalShiftListPulldown(self.mainWidget,
mainWindow=self.mainWindow,
grid=(row, 1),
showSelectName=True,
labelText='')
row += 1
Label(self.mainWidget, text='NmrChain (required):', grid=(row, 0), hAlign='right')
self.nmrChainWidget = NmrChainPulldown(self.mainWidget,
mainWindow=self.mainWindow,
grid=(row, 1),
showSelectName=True,
labelText='')
row += 1
Label(self.mainWidget, text='Hydrogen tolerance (ppm):', grid=(row, 0), hAlign='right')
self.hydrogenToleranceEntry = Entry.Entry(self.mainWidget, text='0.02', grid=(row, 1), editable=True)
row += 1
Label(self.mainWidget, text='Heteroatom tolerance (ppm):', grid=(row, 0), hAlign='right')
self.heteroToleranceEntry = Entry.Entry(self.mainWidget, text='0.20', grid=(row, 1), editable=True)
row += 1
self.onlyOutsideCheck = CheckBox.CheckBox(self.mainWidget,
text='Only report atoms outside tolerance',
checked=False,
grid=(row, 1),
hAlign='left')
def _compareLists(self):
reference = self.referenceList.getSelectedObject()
comparison = self.comparisonList.getSelectedObject()
if not reference or not comparison:
showWarning('Missing selection', 'Please select two chemical shift lists to compare.')
return
if reference.pid == comparison.pid:
showWarning('Same list selected', 'Please choose two different chemical shift lists.')
return
tolerances = self._parseTolerances()
if tolerances is None:
return
chain = self.nmrChainWidget.getSelectedObject()
if not chain:
showWarning('Missing NmrChain', 'Please select an NmrChain to centre the comparison.')
return
rows = self._build_comparison_rows(reference, comparison, tolerances, chain)
if not rows:
showWarning('No overlaps', 'No atoms with chemical shifts in both lists were found.')
return
if self.onlyOutsideCheck.get():
rows = [row for row in rows if not row['WithinTolerance']]
if not rows:
showWarning('No differences', 'All atoms fall within the supplied tolerance.')
return
df = pd.DataFrame(rows)
safe_name = re.sub(r'[^A-Za-z0-9_]+', '_', f'{reference.name}_{comparison.name}')
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
table_name = f'CSComparison_{safe_name[:40]}_{timestamp}'
comment = (f'Comparison between {reference.name} and {comparison.name} with '
f'hydrogen {tolerances[0]:.4g} ppm and hetero {tolerances[1]:.4g} ppm tolerances.')
try:
self.project.newDataTable(name=table_name, data=df, comment=comment)
except Exception as exc:
showWarning('Creation failed', f'Unable to create DataTable: {exc}')
return
showMessage('Comparison ready', f'DataTable "{table_name}" now holds the comparison.')
return self.accept()
def _parseTolerances(self):
try:
hydrogen_tol = float(self.hydrogenToleranceEntry.get() or 0)
hetero_tol = float(self.heteroToleranceEntry.get() or 0)
except ValueError:
showWarning('Invalid tolerance', 'Tolerance must be a valid number (ppm).')
return None
if hydrogen_tol < 0 or hetero_tol < 0:
showWarning('Invalid tolerance', 'Tolerances must be zero or positive.')
return None
return hydrogen_tol, hetero_tol
def _build_comparison_rows(self, reference, comparison, tolerances, chain_filter):
"""Collect rows for atoms that exist in either of the selected chemical shift lists."""
hydrogen_tol, hetero_tol = tolerances
entries: dict[object, dict] = {}
def _consider(shift_list, label):
for shift in shift_list.chemicalShifts:
atom = shift.nmrAtom
if not atom or not atom.nmrResidue:
continue
nmr_chain = atom.nmrResidue.nmrChain
if chain_filter and (not nmr_chain or nmr_chain.pid != chain_filter.pid):
continue
entry = entries.setdefault(atom, {'atom': atom, 'residue': atom.nmrResidue, 'chain': nmr_chain, 'shifts': {}})
entry['shifts'][label] = shift
_consider(reference, 'reference')
_consider(comparison, 'comparison')
rows = []
ref_label = reference.name
cmp_label = comparison.name
for entry in entries.values():
atom = entry['atom']
residue = entry['residue']
shifts = entry['shifts']
shift_ref = shifts.get('reference')
shift_cmp = shifts.get('comparison')
value_ref = shift_ref.value if shift_ref else None
value_cmp = shift_cmp.value if shift_cmp else None
diff = None
within_tol = False
tolerance = hydrogen_tol if self._is_hydrogen(atom) else hetero_tol
if value_ref is not None and value_cmp is not None:
diff = abs(value_ref - value_cmp)
within_tol = diff <= tolerance
lists = sorted({cs.chemicalShiftList.name for cs in atom.chemicalShifts if cs.chemicalShiftList})
all_lists = ', '.join(lists)
row = OrderedDict()
row['ResiduePid'] = residue.pid if residue else ''
row['SequenceCode'] = residue.sequenceCode if residue else ''
row['ResidueType'] = residue.residueType or '' if residue else ''
row['AtomPid'] = atom.pid
row['AtomName'] = atom.name
row['IsotopeCode'] = atom.isotopeCode or ''
row[f'ChemicalShiftPid ({ref_label})'] = shift_ref.pid if shift_ref else ''
row[f'ChemicalShiftPid ({cmp_label})'] = shift_cmp.pid if shift_cmp else ''
row[f'Shift ({ref_label})'] = value_ref
row[f'Shift ({cmp_label})'] = value_cmp
row['Difference (ppm)'] = diff
row['WithinTolerance'] = within_tol
row['ToleranceUsed'] = tolerance
row['InReference'] = bool(shift_ref)
row['InComparison'] = bool(shift_cmp)
row['ShiftLists'] = all_lists
rows.append(row)
return rows
@staticmethod
def _is_hydrogen(atom):
isotope = (atom.isotopeCode or '').upper()
if isotope:
return 'H' in isotope
return atom.name.upper().startswith('H')
if __name__ == "__main__":
popup = CompareChemicalShiftLists(mainWindow=mainWindow)
popup.show()
popup.raise_()