Create an NmrPipe-like file for relaxation analyses.

Python
  1. """
  2. This macro writes an NmrPipe-like file using some of the CcpNmr peak properties.
  3.  
  4. Copy this macro inside the Macro Editor module and amend it as necessary (Main Menu -> Macro -> New).
  5. Post your changes/improvements on the Ccpn Forum!
  6.  
  7. If guessClusterId is set to True, it will add a cluster id for each peak.
  8. The algorithm used is based on a "connected component analysis" as seen in https://stackoverflow.com/questions/14607317/
  9. and might not produce the expected results! There are many other (more efficient) ways of doing this operation
  10. but is not the purpose of this macro! Set guessClusterId to False if not happy with the result.
  11. In Peak there isn`t a property for clusterID (although we use Multiplets) so for this macro
  12. is hack-ish stored as peak.annotation.
  13.  
  14. Requirements:
  15. - A spectrumGroup containing the necessary spectra. Create it from sidebar if None.
  16. - Fill the spectrumGroupName variable and outputPath in the User`s settings section
  17.  
  18. Gui Tips:
  19. - To cycle the peakLabels on display use the shortcut "p+l" until you find the one you need.
  20. - To cycle the peakSymbols on display use the shortcut "p+s" until you find the one you need.
  21. - Change a peak annotation from a peakTable, double click on its cell value.
  22.  
  23. Warnings:
  24. - Always backup the project before running macros!
  25. - Not tested on NMRPIPE
  26. - The macro will set peak annotations. This will help visualising the clusters ids. If overwrites your annotations,
  27. undo to revert to the previous state.
  28. - Before running the macro close all GuiTables to avoid waiting for Gui updates!
  29. - Check the assignment formatting if is as required. Default:
  30. format: `NmrChain.NmrResidue.NmrAtom1, NmrAtom2; ...`
  31. e.g.: `A.66.GLN.HE21, NE2`
  32. - Tested only for nD spectra and hard-coded on first two dimensions. Will not work on 1D as it is now.
  33.  
  34. """
  35.  
  36.  
  37. ############################## User`s settings #################################
  38.  
  39. spectrumGroupName = `group` # Pid for the spectrumGroup containing the spectra of interest
  40. peakListIndex = -1 # Use last added peakList as default index (-1)
  41.  
  42. export = True # Set to False if don`t want to export
  43. outputPath = `~/Desktop/myPath` # Path dir where to export the files. FileName from the spectrum Name.
  44.  
  45.  
  46. # clustering options
  47. guessClusterId = True # Guess a clusterId for all peaks. False to use just a serial
  48. tolDim1 = 8 # tolerance for the first dimension (in points). Used to find adjacent peaks
  49. tolDim2 = 8 # tolerance for the second dimension (in points)
  50. clusterByPositions = True # cluster peaks if their positions are within tolDim1 and tolDim2.
  51. clusterByLWs = False # cluster peaks with overlapping lineWidths. If True, clusterByPositions will be False
  52. increaseLWByNpercent = 50 # increase (on-the-fly) the lw by n% value for finding overlaps. No data is modified.
  53.  
  54.  
  55.  
  56. ############################## Start of the code #################################
  57.  
  58. import numpy as np
  59. import itertools
  60. import pandas as pd
  61. from ccpn.util.Path import aPath, checkFilePath
  62. from ccpn.core.lib.ContextManagers import undoBlockWithoutSideBar
  63. from ccpn.util.Common import percentage
  64. from ccpn.ui.gui.lib.GuiPeakListView import _getScreenPeakAnnotation
  65.  
  66.  
  67. ############################## Units convertion #################################
  68.  
  69. def _hzLW2pnt(lwHz, sw, npoints):
  70. """
  71. :param lwHz: float. A peak lineWidth (in Hz)
  72. :param sw: float. Spectral width (in Hz)
  73. :param npoints: int. Total number of points
  74. :return: float. A peak lineWidth in points (per dimension)
  75. """
  76. if not all([lwHz, sw, npoints]): return
  77. hzXpnt = sw/npoints
  78. lwPnt = lwHz/hzXpnt
  79. return lwPnt
  80.  
  81. def getLineWidthsPnt(peak):
  82. sp = peak.peakList.spectrum
  83. return tuple(_hzLW2pnt(lwHz,sp.spectralWidthsHz[i],sp.totalPointCounts[i]) for i,lwHz in enumerate(peak.lineWidths))
  84.  
  85. ############################## Guess peak clusters #################################
  86.  
  87. def clusterOverlaps(nodes, adjacents):
  88. "Ref: https://stackoverflow.com/questions/14607317/"
  89. clusters = []
  90. nodes = list(nodes)
  91. while len(nodes):
  92. node = nodes[0]
  93. path = dfs(node, adjacents, nodes)
  94. clusters.append(path)
  95. for pt in path:
  96. nodes.remove(pt)
  97. return clusters
  98.  
  99. def dfs(start, adjacents, nodes):
  100. "Ref: https://stackoverflow.com/questions/14607317/"
  101. path = []
  102. q = [start]
  103. while q:
  104. node = q.pop(0)
  105. if path.count(node) >= nodes.count(node):
  106. continue
  107. path = path + [node]
  108. nextNodes = [p2 for p1,p2 in adjacents if p1 == node]
  109. q = nextNodes + q
  110. return path
  111.  
  112. def _getOverlapsByLineWidths(peaks, tolDim1=0.01, tolDim2=0.01, increaseByNpercent=0):
  113. """
  114. Consider two peaks adjacent if the LW are overlapping in both dimensions.
  115. :param peaks: list of ccpn peak objs
  116. :param increaseByNpercent: increase (on-the-fly) the lw by n% value for finding overlaps. No data is modified.
  117. :return: A list of adjacent pairs of peaks
  118. """
  119. overlaps = []
  120. if len(peaks) == 0: return []
  121. sp = peaks[0].peakList.spectrum
  122. for pair in itertools.combinations(peaks, 2):
  123. peakA, peakB = pair
  124. if not all(list(peakA.lineWidths) + list(peakB.lineWidths)):
  125. warning(`LineWidths is set to None for some peaks. Peaks skipped: %s` % ``.join(map(str, pair)))
  126. continue
  127. pos1A, pos2A = [peakA.pointPosition[i] for i in range(sp.dimensionCount)]
  128. lw1A, lw2A = [lw + percentage(increaseByNpercent, lw) for lw in getLineWidthsPnt(peakA)]
  129. pos1B, pos2B = [peakB.pointPosition[i] for i in range(sp.dimensionCount)]
  130. lw1B, lw2B = [lw + percentage(increaseByNpercent, lw) for lw in getLineWidthsPnt(peakB)]
  131. dim1a, dim2a = np.linspace(pos1A-(lw1A/2), pos1A+(lw1A/2),500), np.linspace(pos2A-(lw2A/2),pos2A+(lw2A/2),500)
  132. dim1b, dim2b = np.linspace(pos1B-(lw1B/2), pos1B+(lw1B/2),500), np.linspace(pos2B-(lw2B/2),pos2B+(lw2B/2),500)
  133. inters1 = dim1b[(np.abs(dim1a[:,None] - dim1b) < tolDim1).any(0)]
  134. inters2 = dim2b[(np.abs(dim2a[:,None] - dim2b) < tolDim2).any(0)]
  135. if all([len(inters1)>0, len(inters2)>0]):
  136. overlaps.append(pair)
  137. return overlaps
  138.  
  139. def _getOverlapPairsByPositions(peaks, tolDim1=10, tolDim2=10):
  140. """
  141. Consider two peaks adjacent if the PointPositions are within the tolerances.
  142. :param peaks:
  143. :param tolDim1: tolerance for the first dimension (in points)
  144. :param tolDim2: tolerance for the second dimension (in points)
  145. :return: A list of adjacent pairs of peaks
  146. """
  147. overlaps = []
  148. for pair in itertools.combinations(peaks, 2):
  149. dim1a, dim2a = np.array(pair[0].pointPosition[0]), np.array(pair[0].pointPosition[1])
  150. dim1b, dim2b = np.array(pair[1].pointPosition[0]), np.array(pair[1].pointPosition[1])
  151. if (np.abs(dim1a - dim1b) < tolDim1) and (np.abs(dim2a - dim2b) < tolDim2):
  152. overlaps.append(pair)
  153. return overlaps
  154.  
  155. def _getClustID(peak):
  156. v = None
  157. try:
  158. v = int(peak.annotation)
  159. except Exception as e:
  160. warning(`Error converting peak annotation in int format. %s` %e)
  161. return v
  162.  
  163.  
  164. def setClusterIDs(peaks, guessClustID=True):
  165. if guessClustID:
  166. if clusterByPositions and not clusterByLWs:
  167. overlappedPeaks = _getOverlapPairsByPositions(peaks, tolDim1=tolDim1, tolDim2=tolDim2)
  168. else:
  169. overlappedPeaks = _getOverlapsByLineWidths(peaks, increaseByNpercent=increaseLWByNpercent)
  170. positions = [pk.pointPosition for pk in peaks]
  171. overlappedPositions = [(pair[0].pointPosition, pair[1].pointPosition) for pair in overlappedPeaks]
  172. result = clusterOverlaps(positions, overlappedPositions)
  173. with undoBlockWithoutSideBar():
  174. allClusters = []
  175. for i, group in enumerate(result):
  176. peakCluster = []
  177. for peak in peaks:
  178. for j in group:
  179. if j == peak.pointPosition:
  180. peakCluster.append(peak)
  181. peak.annotation = str(i)
  182. allClusters.append(peakCluster)
  183. else:
  184. with undoBlockWithoutSideBar():
  185. for i, peak in enumerate(peaks):
  186. peak.annotation = str(i)
  187.  
  188. ############################## Build DataFrame from peaks #################################
  189.  
  190. VARS = [`INDEX`, `X_AXIS`, `Y_AXIS`, `DX`, `DY`, `X_PPM`, `Y_PPM`, `X_HZ`, `Y_HZ`, `XW`, `YW`, `XW_HZ`, `YW_HZ`, `X1`, `X3`, `Y1`, `Y3`, `HEIGHT`, `DHEIGHT`, `VOL`, `PCHI2`, `TYPE`, `ASS`, `CLUSTID`, `MEMCNT`]
  191. FORMAT = [`%5d`, `%9.3f`, `%9.3f`, `%6.3f`, `%6.3f`, `%8.3f`, `%8.3f`, `%9.3f`, `%9.3f`, `%7.3f`, `%7.3f`, `%8.3f`, `%8.3f`, `%4d`, `%4d`, `%4d`, `%4d`, `%+e`, `%+e`, `%+e`, `%.5f`, `%d`, `%s`, `%4d`, `%4d`]
  192. VFdict = {k:v for k,v in zip(VARS, FORMAT)}
  193.  
  194. # NMRPIPE VARS
  195. INDEX = `INDEX` # ARBITRARY - Peak number
  196. X_AXIS = `X_AXIS` # REQUIRED - Peak position: in points in 1st dimension, from left of spectrum limit
  197. Y_AXIS = `Y_AXIS` # REQUIRED - Peak position: in points in 2nd dimension, from bottom of spectrum limit
  198. DX = `DX` # NOT REQUIRED - Line width in 1st dimension, in points
  199. DY = `DY` # NOT REQUIRED - Line width in 2nd dimension, in points
  200. X_PPM = `X_PPM` # NOT REQUIRED - Peak position: in ppm in 1st dimension
  201. Y_PPM = `Y_PPM` # NOT REQUIRED - Peak position: in ppm in 2nd dimension
  202. X_HZ = `X_HZ` # NOT REQUIRED - Peak position: in Hz in 1st dimension
  203. Y_HZ = `Y_HZ` # NOT REQUIRED - Peak position: in Hz in 2nd dimension
  204. XW = `XW` # REQUIRED - Peak width: in points in 1st dimension
  205. YW = `YW` # REQUIRED - Peak width: in points in 2nd dimension
  206. XW_HZ = `XW_HZ` # REQUIRED - Peak width: in points in 1st dimension
  207. YW_HZ = `YW_HZ` # REQUIRED - Peak width: in points in 2nd dimension
  208. X1 = `X1` # NOT REQUIRED - Left border of peak in 1st dim, in points
  209. X3 = `X3` # NOT REQUIRED - Right border of peak in 1st dim, in points
  210. Y1 = `Y1` # NOT REQUIRED - Left border of peak in 2nd dim, in points
  211. Y3 = `Y3` # NOT REQUIRED - Right border of peak in 2nd, in points
  212. HEIGHT = `HEIGHT` # NOT REQUIRED - Peak height
  213. DHEIGHT = `DHEIGHT` # NOT REQUIRED - Peak height error
  214. VOL = `VOL` # NOT REQUIRED - Peak volume
  215. PCHI2 = `PCHI2` # NOT REQUIRED - ??
  216. TYPE = `TYPE` # NOT REQUIRED - ?? default 1
  217. ASS = `ASS` # REQUIRED - Peak assignment
  218. CLUSTID = `CLUSTID` # REQUIRED - Peak cluster id
  219. MEMCNT = `MEMCNT` # REQUIRED - ?? default 1
  220.  
  221. NULLVALUE = -666
  222. NULLSTRING = `*`
  223. UNKNOWN = 1
  224.  
  225. VarsDict = {
  226. INDEX : lambda x: VFdict.get(INDEX) % x.serial,
  227. DX : lambda x: NULLVALUE, #VFdict.get(DX) % NULLVALUE,
  228. DY : lambda x: NULLVALUE, #VFdict.get(DY) % NULLVALUE,
  229. X_PPM : lambda x: VFdict.get(X_PPM) % x.position[0] if x.position[0] else NULLVALUE,
  230. Y_PPM : lambda x: VFdict.get(Y_PPM) % x.position[1] if x.position[1] else NULLVALUE,
  231. X_HZ : lambda x: NULLVALUE, #VFdict.get(X_HZ) % NULLVALUE,
  232. Y_HZ : lambda x: NULLVALUE, #VFdict.get(Y_HZ) % NULLVALUE,
  233. X_AXIS : lambda x: VFdict.get(X_AXIS) % x.pointPosition[0] if x.pointPosition[0] else NULLVALUE,
  234. Y_AXIS : lambda x: VFdict.get(Y_AXIS) % x.pointPosition[1] if x.pointPosition[1] else NULLVALUE,
  235. XW : lambda x: VFdict.get(XW) % getLineWidthsPnt(x)[0] if getLineWidthsPnt(x)[0] else NULLVALUE,
  236. YW : lambda x: VFdict.get(YW) % getLineWidthsPnt(x)[1] if getLineWidthsPnt(x)[1] else NULLVALUE,
  237. XW_HZ : lambda x: VFdict.get(XW_HZ) % x.lineWidths[0] if x.lineWidths[0] else NULLVALUE,
  238. YW_HZ : lambda x: VFdict.get(YW_HZ) % x.lineWidths[1] if x.lineWidths[0] else NULLVALUE,
  239. X1 : lambda x: NULLVALUE, #VFdict.get(X1) % NULLVALUE,
  240. X3 : lambda x: NULLVALUE, #VFdict.get(X3) % NULLVALUE,
  241. Y1 : lambda x: NULLVALUE, #VFdict.get(Y1) % NULLVALUE,
  242. Y3 : lambda x: NULLVALUE, #VFdict.get(Y3) % NULLVALUE,
  243. HEIGHT : lambda x: VFdict.get(HEIGHT) % x.height if x.height else NULLVALUE,
  244. DHEIGHT : lambda x: VFdict.get(DHEIGHT) % x.heightError if x.heightError else NULLVALUE,
  245. VOL : lambda x: VFdict.get(VOL) % x.volume if x.volume else NULLVALUE,
  246. PCHI2 : lambda x: NULLVALUE, #VFdict.get(PCHI2) % NULLVALUE,
  247. TYPE : lambda x: VFdict.get(TYPE) % UNKNOWN,
  248. ASS : lambda x: VFdict.get(ASS) % _getScreenPeakAnnotation(x, usePid=True),
  249. CLUSTID : lambda x: VFdict.get(CLUSTID) % _getClustID(x) if _getClustID(x) else NULLVALUE,
  250. MEMCNT : lambda x: VFdict.get(MEMCNT) % UNKNOWN,
  251. }
  252.  
  253. def buildDataFrame(peaks):
  254. df = pd.DataFrame()
  255. for k, v in VarsDict.items():
  256. if v:
  257. df[k] = list(map(lambda x: v(x), peaks))
  258. else:
  259. df[k] = [None]*len(peaks)
  260. return df
  261.  
  262. def dfToTab(df):
  263. "Return NmrPipe (NIH) tab-file formatted string"
  264.  
  265. string = ``
  266. string += `VARS %s\n` % ` `.join(VARS)
  267. string += `FORMAT %s\n\n` % ` `.join(FORMAT)
  268. string += `NULLVALUE -666`
  269. string += `\nNULLSTRING *\n\n`
  270. string += df.to_string(header=False, index=False) # formats are already defined when building the dataframe.
  271. return string
  272.  
  273. def runMacro():
  274.  
  275. spectrumGroup = get(`SG:`+spectrumGroupName)
  276. if spectrumGroup is None:
  277. raise RuntimeError(`You need at least a SpectrumGroup! None found with Pid "%s". `
  278. `Set the name at the beginning of this macro or create a new one with `
  279. `the required spectra` % spectrumGroupName)
  280.  
  281. for spectrum in spectrumGroup.spectra:
  282. if len(spectrum.peakLists) == 0:
  283. warning(`You need at least a peakList for %s` %spectrum)
  284. continue
  285. if len(spectrum.peakLists[peakListIndex].peaks) == 0:
  286. warning(`You need at least a peak. %s skipped` %spectrum)
  287. continue
  288.  
  289. peaks = spectrum.peakLists[peakListIndex].peaks
  290. peaks.sort(key=lambda x: x.pointPosition[0], reverse=False)
  291. setClusterIDs(peaks, guessClusterId)
  292. df = buildDataFrame(peaks)
  293. if export:
  294. isPathOk, msgErr = checkFilePath(aPath(outputPath))
  295. if not isPathOk:
  296. print(`File not saved. Error: %s.`%msgErr)
  297. else:
  298. fileName = aPath(outputPath).joinpath(spectrum.name)
  299. string = dfToTab(df)
  300. with open(fileName, `w`) as f:
  301. f.write(string)
  302. print(`File saved in: %s.` % fileName)
  303.  
  304.  
  305.  
  306. if __name__ == `__main__`:
  307. runMacro()