Source code for gempipe.interface.sanity

import re
import importlib.metadata


import cobra


from gempipe.interface.gaps import get_solver
from gempipe.interface.gaps import get_objectives
from ..commons import fba_no_warnings



[docs] def close_boundaries(model): """Set all the EX_change reactions to (0, 0). Args: model (cobra.Model): target model. """ for r in model.reactions: if len(r.metabolites)==1 and r.id.startswith('EX_'): r.bounds = (0, 0)
[docs] def verify_egc(model, mid, escher=False, threshold=1e-5, verbose=True): """Test the presence of energy-generating cycles (EGCs). Can also output a model for Escher, with just the reactions composing the cycle. Args: model (cobra.Model): target model. Must be encoded with the BiGG notation. mid (str): metabolite ID for which the EGC must be checked. Warning: must be without compartment, so for example ``atp`` instead of ``atp_c``. escher (bool): save a reduced ``cobra.Model`` in the current directory. To be loaded in Escher. threshold (float): values below this treshold are considered as 0. verbose (bool): if True, show results of the test even if no EGC was detected. Returns: bool: ``True`` if an EGC is detected. """ # changes as not permament: with model: # close all the exchange reactions: close_boundaries(model) # create a dissipation reaction: dissip = cobra.Reaction(f'__dissip__{mid}') model.add_reactions([dissip]) dissip = model.reactions.get_by_id(f'__dissip__{mid}') # define the dissipation reaction: modeled_mids = [m.id for m in model.metabolites] if mid == 'atp': dissip_string = 'atp_c + h2o_c --> adp_c + pi_c + h_c' elif mid == 'ctp': dissip_string = 'ctp_c + h2o_c --> cdp_c + pi_c + h_c' elif mid == 'gtp': dissip_string = 'gtp_c + h2o_c --> gdp_c + pi_c + h_c' elif mid == 'utp': dissip_string = 'utp_c + h2o_c --> udp_c + pi_c + h_c' elif mid == 'itp': dissip_string = 'itp_c + h2o_c --> idp_c + pi_c + h_c' elif mid == 'nadh': dissip_string = 'nadh_c --> nad_c + h_c' elif mid == 'nadph': dissip_string = 'nadph_c --> nadp_c + h_c' elif mid == 'fadh2': dissip_string = 'fadh2_c --> fad_c + 2.0 h_c' elif mid == 'accoa': dissip_string = 'accoa_c + h2o_c --> ac_c + coa_c + h_c' elif mid == 'glu__L': if 'nh4_c' in modeled_mids : dissip_string = 'glu__L_c + h2o_c --> akg_c + nh4_c + 2.0 h_c' elif 'nh3_c' in modeled_mids : dissip_string = 'glu__L_c + h2o_c --> akg_c + nh3_c + 3.0 h_c' else: raise Exception("'nh4_c' or 'nh3_c' must be present in the model.") elif mid == 'q8h2': dissip_string = 'q8h2_c --> q8_c + 2.0 h_c' else: raise Exception("Metabolite ID (mid) not recognized.") dissip.build_reaction_from_string(dissip_string) # set the objective and optimize: model.objective = f'__dissip__{mid}' res, obj_value, status = fba_no_warnings(model) # apply the threshold: obj_value = res.objective_value status = res.status if abs(obj_value) < threshold: obj_value = 0 # log some messages if verbose: print(dissip.reaction) print(obj_value , ':', res.status ) # log some messages if obj_value > 0 and status == 'optimal' and verbose: # get suspect !=0 fluxes fluxes = res.fluxes print() # skip a line befor printing the EGC members # get interesting fluxes (0.001 tries to take into account the approximation in glpk and cplex solvers) fluxes_interesting = fluxes[((fluxes > 0.001) | (fluxes < -0.001)) & (fluxes.index != f'__dissip__{mid}')] print(fluxes_interesting.to_string()) # create a model for escher if escher: model_copy = model.copy() all_rids = [r.id for r in model_copy.reactions] to_delete = set(all_rids) - set(fluxes_interesting.index) model_copy.remove_reactions(to_delete) cobra.io.save_json_model(model_copy, f'__dissip__{mid}' + '.json') print(f'__dissip__{mid}' + '.json', "saved in current directory.") if obj_value > 0 and status == 'optimal': return True else: return False
[docs] def verify_egc_all(model): """Quickly check the presence of EGCs for the main metabolites. Internally calls `verify_egc()` over a list of metabolite IDs. Args: model (cobra.Model): target model. """ mids_to_check = ['atp','ctp','gtp','utp','itp','nadh','nadph','fadh2','accoa','glu__L','q8h2'] all_results = [] for mid in mids_to_check: result = verify_egc(model, mid, verbose=False) all_results.append(~result) if all(all_results): print("No energy-generating cycles (EGCs) found.")
[docs] def check_sinks(model, verbose=True): """Check presence of sink reactions. Args: model (cobra.Model): target model. verbose (bool): if ``False``, don't print. Returns: list: IDs of sink reactions found. """ found_rids = [] cnt = 0 for r in model.reactions: if len(list(r.metabolites))==1 and list(r.metabolites)[0].compartment!='e': if 0 in r.bounds == False: cnt += 1 if verbose: print(cnt, ':', r.id, ':', r.reaction, ':', r.bounds) found_rids.append(r.id) if found_rids == []: if verbose: print("No sink reactions found.") return found_rids
[docs] def check_demands(model, verbose=True): """Check presence of demand reactions. Args: model (cobra.Model): target model. verbose (bool): if ``False``, don't print. Returns: list: IDs of demand reactions found. """ found_rids = [] cnt = 0 for r in model.reactions: if len(list(r.metabolites))==1 and list(r.metabolites)[0].compartment!='e': if 0 in r.bounds: cnt += 1 if verbose: print(cnt, ':', r.id, ':', r.reaction, ':', r.bounds) found_rids.append(r.id) if found_rids == []: if verbose: print("No demand reactions found.") return found_rids
[docs] def check_sinks_demands(model, verbose=True): """Check presence of sink and demand reactions. Args: model (cobra.Model): target model. verbose (bool): if ``False``, don't print. Returns: list: IDs of sink/demand reactions found. """ found_rids = [] cnt = 0 for r in model.reactions: if len(list(r.metabolites))==1 and list(r.metabolites)[0].compartment!='e': cnt += 1 if verbose: print(cnt, ':', r.id, ':', r.reaction, ':', r.bounds) found_rids.append(r.id) if found_rids == []: if verbose: print("No sink/demand reactions found.") return found_rids
[docs] def check_exr_notation(model, verbose=True): """Check that every EX_change reaction ID begins with ``EX_``. Here EX_change reactions are defined as those reactions having just 1 metabolite involved, included in the extracellular compartment. Args: model (cobra.Model): target model. verbose (bool): if ``False``, don't print. Returns: list: IDs of EX_change reactions with bad ID. """ found_rids = [] cnt = 0 for r in model.reactions: if len(r.metabolites) == 1: if list(r.metabolites)[0].id.rsplit('_', 1)[1] == 'e': # extracellular compartment if r.id.startswith("EX_") == False: cnt += 1 if verbose: print(cnt, ':', r.id, ':', r.reaction) found_rids.append(r.id) if found_rids == []: if verbose: print("No EX_change reaction with bad ID found.") return found_rids
[docs] def remove_EX_annots(model): """Remove all annotations from EX_change reactions. Args: model (cobra.Model): target model. """ cnt = 0 for r in model.reactions: if len(r.metabolites) == 1 and r.id.startswith("EX_"): if r.annotation != {}: r.annotation = {}
[docs] def check_missing_charges(model, verbose=True): """Check if all metabolites have a charge attribute. Args: model (cobra.Model): target model. verbose (bool): if ``False``, don't print. Returns: list: IDs of matabolites missing the charge attribute. """ found_mids = [] cnt = 0 for m in model.metabolites: cnt += 1 charge = m.charge if charge==None or type(charge)!=int: if verbose: print(cnt, ':', m.id) found_mids.append(m.id) if found_mids == []: if verbose: print("No metabolite with missing charge attribute found.") return found_mids
[docs] def check_missing_formulas(model, verbose=True): """Check if all metabolites have a formula attribute. Args: model (cobra.Model): target model. verbose (bool): if ``False``, don't print. Returns: list: IDs of matabolites missing the formula attribute. """ found_mids = [] cnt = 0 for m in model.metabolites: formula = m.formula if formula==None or formula=='': cnt += 1 if verbose: print(cnt, ':', m.id) found_mids.append(m.id) if found_mids == []: if verbose: print("No metabolite with missing formula attribute found.") return found_mids
[docs] def check_artificial_atoms(model, preview=None, verbose=True): """Check if artificial atoms like 'R' and 'X' are present. Args: model (cobra.Model): target model. preview (int): maximum number of metabolite IDs to show for each artificial atom. If ``None``, they will be all displayed. verbose (bool): if ``False``, don't print. Returns: list: IDs of matabolites with artificial atoms. """ found_mids = [] atom_to_mids = {} for m in model.metabolites: formula = m.formula if formula == None or formula=='': continue # there are dedicated functions for this # Matches any uppercase letter (A-Z) followed by zero or more lowercase letters (a-z) atoms = set(re.findall(r'[A-Z][a-z]*', formula)) base = set(['C', 'H', 'O', 'N', 'P', 'S']) metals = set(['Ag', 'Fe', 'Co', 'As', 'Ca', 'Cd', 'Cl', 'Cu', 'Hg', 'K', 'Mg', 'Mo', 'Na', 'Ni', 'Se', 'Zn', 'Mn']) safe_atoms = base.union(metals) strange = atoms - safe_atoms # popupale te 'found_mids' list if len(strange) > 0: found_mids.append(m.id) # iterate each 'strange' atom to create a dict: for atom in strange: if atom not in atom_to_mids.keys(): atom_to_mids[atom] = [] atom_to_mids[atom].append(f'{m.id}[{m.formula}]') # show the create 'atom_to_mids' dict: for i, atom in enumerate(atom_to_mids.keys()): mids_to_show = ', '.join(atom_to_mids[atom]) if preview != None: if len(atom_to_mids[atom]) > preview: mids_to_show = f"{', '.join(atom_to_mids[atom][:preview])}, ... ({len(atom_to_mids[atom])} in total)" if verbose: print(i+1, ':', atom, ':', mids_to_show) if found_mids == []: if verbose: print("No metabolite with artificial atoms found.") return found_mids
[docs] def get_unconstrained_bounds(model): """Get the minimum and maximum bounds used in the model. Usually they correspond to the "unconstrained" negative and positive flux constants. Args: model (cobra.Model): target model. Returns: tuple: min and max bounds. """ un_lb, un_ub = 0, 0 for r in model.reactions: if r.lower_bound < un_lb: un_lb = r.lower_bound if r.upper_bound > un_ub: un_ub = r.upper_bound return (un_lb, un_ub)
[docs] def reset_unconstrained_bounds(model): """Set the uncontrained flux constants to 1000. Args: model (cobra.Model): target model. """ un_lb, un_ub = get_unconstrained_bounds(model) for r in model.reactions: if r.lower_bound == un_lb: r.lower_bound = -1000 if r.upper_bound == un_ub: r.upper_bound = 1000
[docs] def check_constrained_metabolic(model, verbose=True): """Check the presence of constrained metabolic reactions. Metabolic reactions are here defined as those having more then 1 involved metabolites. Constrained reactions are here defined as those having bounds other then (0, 1000) or (-1000, 1000). Biomass assembly reactions are ignored. Args: model (cobra.Model): target model. verbose (bool): if ``False``, don't print. Returns: list: IDs of reactions with constrained metabolic reactions. """ found_rids = [] cnt = 0 biom_ids = search_biomass(model, verbose=False) for r in model.reactions: if len(r.metabolites) == 1 or r.id in biom_ids: continue # not interested in EXR/sink/demands, nor in biomass if r.bounds != (-1000, 1000) and r.bounds != (0, 1000): cnt += 1 if verbose: print(cnt, ':', r.id, ':', r.bounds, ':', r.reaction) found_rids.append(r.id) if found_rids == []: if verbose: print("No constrained metabolic reactions found.") return found_rids
[docs] def check_mass_unbalances(model, threshold=1e-5, verbose=True): """Check the presence of mass-unbalaned reactions in the model. EExchange, sink, demand, and biomass reactions are ignored. Args: model (cobra.Model): target model. threshold (float): values below this treshold are considered as 0. verbose (bool): if ``False``, don't print. Returns: list: IDs of reactions with mass unbalanced. """ found_rids = [] cnt = 0 biom_ids = search_biomass(model, verbose=False) for r in model.reactions: # exchage, sink, demand, and biomass reactions are excluded. if len(r.metabolites)==1 or r.id in biom_ids: continue # get suggestions suggestions = r.check_mass_balance() # apply threshold: suggestions = {atom: suggestions[atom] for atom in suggestions.keys() if suggestions[atom] >= threshold} # exclude charge is present: suggestions = {atom: suggestions[atom] for atom in list(set(list(suggestions.keys())) - set(['charge']))} # log the suggestion: if len(suggestions.keys()) > 0: cnt += 1 if verbose: print(cnt, ':', r.id, ':', r.reaction, ':', suggestions) found_rids.append(r.id) if found_rids == []: if verbose: print("No mass-unbalanced reactions found.") return found_rids
[docs] def check_charge_unbalances(model, threshold=1e-5, verbose=True): """Check the presence of charge-unbalaned reactions in the model. Exchange, sink, demand, and biomass reactions are ignored. Args: model (cobra.Model): target model. threshold (float): values below this treshold are considered as 0. verbose (bool): if ``False``, don't print. Returns: list: IDs of reactions with charge unbalanced. """ found_rids = [] cnt = 0 biom_ids = search_biomass(model, verbose=False) for r in model.reactions: # exchage, sink, demand, and biomass reactions are excluded. if len(r.metabolites)==1 or r.id in biom_ids: continue # get suggestions suggestions = r.check_mass_balance() # retain only charge if present: suggestions = {atom: suggestions[atom] for atom in suggestions.keys() if atom == 'charge'} # apply threshold: suggestions = {atom: suggestions[atom] for atom in suggestions.keys() if suggestions[atom] >= threshold} # log the suggestion: if len(suggestions.keys()) > 0: cnt += 1 if verbose: print(cnt, ':', r.id, ':', r.reaction, ':', suggestions) found_rids.append(r.id) if found_rids == []: if verbose: print("No charge-unbalanced reactions found.") return found_rids
[docs] def search_biomass(model, show_reaction=False, verbose=True): """Search for biomass reactions. Simple function involving just an exact match of substrings: 'biomass', 'growth', 'bof'. Args: model (cobra.Model): target model. show_reaction (bool): whether to show also the reaction string """ # define key objects: substrings = ['biomass', 'growth', 'bof'] found_rids = [] cnt = 0 def print_reaction(cnt, show_reaction, verbose): if verbose: if show_reaction: print(cnt, ':', r.id, ':', r.name, ':', r.reaction, ':', r.bounds) else: print(cnt, ':', r.id, ':', r.name, ':', r.bounds) # first try with reaction IDs: for r in model.reactions: for substring in substrings: if substring.lower() in r.id.lower(): cnt += 1 print_reaction(cnt, show_reaction, verbose) found_rids.append(r.id) if found_rids != []: return found_rids # then try with reaction names: for r in model.reactions: for substring in substrings: if substring.lower() in r.name.lower(): cnt += 1 print_reaction(cnt, show_reaction, verbose) found_rids.append(r.id) if found_rids != []: return found_rids # no biomass reactions found: if found_rids == []: if verbose: print("No biomass reactions found.") return found_rids
[docs] def sanity_report(model): """Print a small sanity report. Get a sanity report calling some of the gempipe.curate functions. Args: model (cobra.Model): target model. """ version = importlib.metadata.metadata("gempipe")["Version"] print(f"gempipe v{version} - sanity_report") print("model ID:", model.id) G = len(model.genes) R = len(model.reactions) M = len(model.metabolites) uM = len(set([m.id.rsplit('_', 1)[0] for m in model.metabolites])) groups = len(model.groups) print('G:', G, 'R:', R, 'M:', M, 'uM:', uM, 'groups:', groups) comps = [c for c in model.compartments] print("Compartments:", sorted(comps)) biom_ids = search_biomass(model, verbose=False) print("Biomass assemblies:", len(biom_ids), biom_ids) obj_ids = get_objectives(model) print("Objectives:", len(obj_ids), obj_ids) res = model.optimize() print("Optimization:", res.objective_value, res.status, f'({get_solver(model)})') print("Unconstrained LB-UB:", get_unconstrained_bounds(model)) print("Bad EX_change notation:", len(check_exr_notation(model, verbose=False))) print("Sinks/demands:", len(check_sinks_demands(model, verbose=False))) print("Constrained metabolic:", len(check_constrained_metabolic(model, verbose=False))) print("With 'artificial' atoms:", len(check_artificial_atoms(model, verbose=False))) print( "Missing formulas - charges:", len(check_missing_formulas(model, verbose=False)), '-', len(check_missing_charges(model, verbose=False)), ) print( "Mass - charge unbalances:", len(check_mass_unbalances(model, verbose=False)), '-', len(check_charge_unbalances(model, verbose=False)), )