You are here: Home V2 Software Software More ... Python Macro Scripts Assign Closest Metabolite Peak

Assign Closest Metabolite Peak

Example Python script to automatically assign a query peak list, representing a sample with a mixture of metabolites, using reference metabolite assignments from the CCPN Metabolomics Project.

AssignMetabolites.py — Python Source, 3 kB (3959 bytes)

File contents

# Import helper functions from CcpNmr library
# See: http://www.ccpn.ac.uk/documentation/analysis/codingLibrary.html

from ccpnmr.analysis.core.AssignmentBasic import propagatePeakAssignments
from ccpnmr.analysis.core.ExperimentBasic import getPrimaryDataDimRef
from ccpnmr.analysis.core.PeakBasic import findClosePeaks
from ccpnmr.analysis.core.PeakBasic import getPeaksOverlapScore

# Globals

TOCSY_EXP_TYPES = set(['H_H.TOCSY', 'H_H.relayed'])
CHSQC_EXP_TYPES = set(['H[C]', 'HC', 'CH'])

ALLOWED_EXP_TYPES = TOCSY_EXP_TYPES | CHSQC_EXP_TYPES  # Union

# CcpNmr Macro

def assignMetabolitesMacro(argServer):
  """
  Script to run the metabolite assignment function as
  a CcpNmr Analysis Macro
  """
  peakList = argServer.getPeakList()
  
  if peakList:
    message = assignPeakListMetabolites(peakList)
    argServer.showInfo(message)
  
  else:
    argServer.showInfo('Aborted')


# Main function

def assignPeakListMetabolites(peakList, tolerances={'13C':0.5,'1H':0.05,}):
  """
  Example script to automatically assign a query peak list
  representing a sample with a mixture of metabolites.
  
  Input: Nmr.PeakList (mixture query)
  Output: String (status message)
  """
  
  # Check there are query peaks
  if not peakList.peaks:
    return "Failure: Input peak list has no picked peaks"

  # Get the spectrum, experiment and NMR project
  # record sassociated with query peak list
  spectrum = peakList.dataSource
  experiment = spectrum.experiment
  nmrProject = experiment.nmrProject

  # Check that the experiment is the right type
  # Must be HH TOCSY or C HSQC
  refExperiment = experiment.refExperiment  
  if not refExperiment:
    return "Failure: Experiment type not set"
  
  expType = refExperiment.name
  if expType not in ALLOWED_EXP_TYPES:
    return "Failure: Experiment type must be HH TOCSY or C HSQC"  

  # Find equivalent metabolite reference peak list
  
  if expType in TOCSY_EXP_TYPES:
    mExperiment = nmrProject.findFirstExperiment(name='all_metabolitesHH')
  else:
    mExperiment = nmrProject.findFirstExperiment(name='all_metabolites')  
  
  if not mExperiment:
    return "Failure: CCPN project does not contain an all-metabolite reference peak list"
  
  # Get metabolite reference spectrum and peakList
  mSpectrum = mExperiment.findFirstDataSource()
  mPeakList = mSpectrum.findFirstPeakList()
  
  # Setup the query experiment to carry the same
  # molecular system assignments as the reference
  # this step merely eliminates warnings later
  experiment.molSystems = mExperiment.molSystems | experiment.molSystems
  
  # Setup the ppm match tolerances for each dim
  # by querying dim isotope type
  dimTolerances = []
  for dataDim in mSpectrum.sortedDataDims():
    dataDimRef = getPrimaryDataDimRef(dataDim)
    isotope = ''.join(dataDimRef.expDimRef.isotopeCodes)
    dimTolerances.append(tolerances.get(isotope))
  
  # Record number of peaks matched and not matched
  numMatched = 0
  numMissed = 0
  
  # Loop through all the query peaks
  for peak in peakList.peaks:
  
    # Find close metabolite matches
    closeMatches = findClosePeaks(peak, mPeakList, dimTolerances)
    
    # Record if no matches found and skip loop
    if not closeMatches:
      numMissed += 1
      continue
  
    # Get a score for each reference peak match
    # use tolerances to weight dimensions by isotope
    # collect pairs of (score, peak)
    peakScores = []
    for mPeak in closeMatches:
      score = getPeaksOverlapScore(peak, mPeak, dimTolerances)
      peakScores.append( (score, mPeak) )
    
    # Find best score and corresponding reference peak
    peakScores.sort()
    bestScore, bestPeak = peakScores[-1]
  
    # Assign closest matching peak the same
    # as the reference metabolite peak
    propagatePeakAssignments( [peak], refPeak=bestPeak,
                             tolerances=dimTolerances)
    numMatched += 1

  return "Success: Assigned %d peaks and missed %d" % (numMatched, numMissed)