import re
import importlib.metadata
import cobra
from gempipe.interface.gaps import get_solver
from gempipe.interface.gaps import get_objectives
from ..commons import fba_no_warnings
[docs]
def close_boundaries(model):
"""Set all the EX_change reactions to (0, 0).
Args:
model (cobra.Model): target model.
"""
for r in model.reactions:
if len(r.metabolites)==1 and r.id.startswith('EX_'):
r.bounds = (0, 0)
[docs]
def verify_egc(model, mid, escher=False, threshold=1e-5, verbose=True):
"""Test the presence of energy-generating cycles (EGCs).
Can also output a model for Escher, with just the reactions composing the cycle.
Args:
model (cobra.Model): target model. Must be encoded with the BiGG notation.
mid (str): metabolite ID for which the EGC must be checked. Warning: must be without compartment, so for example ``atp`` instead of ``atp_c``.
escher (bool): save a reduced ``cobra.Model`` in the current directory. To be loaded in Escher.
threshold (float): values below this treshold are considered as 0.
verbose (bool): if True, show results of the test even if no EGC was detected.
Returns:
bool: ``True`` if an EGC is detected.
"""
# changes as not permament:
with model:
# close all the exchange reactions:
close_boundaries(model)
# create a dissipation reaction:
dissip = cobra.Reaction(f'__dissip__{mid}')
model.add_reactions([dissip])
dissip = model.reactions.get_by_id(f'__dissip__{mid}')
# define the dissipation reaction:
modeled_mids = [m.id for m in model.metabolites]
if mid == 'atp':
dissip_string = 'atp_c + h2o_c --> adp_c + pi_c + h_c'
elif mid == 'ctp':
dissip_string = 'ctp_c + h2o_c --> cdp_c + pi_c + h_c'
elif mid == 'gtp':
dissip_string = 'gtp_c + h2o_c --> gdp_c + pi_c + h_c'
elif mid == 'utp':
dissip_string = 'utp_c + h2o_c --> udp_c + pi_c + h_c'
elif mid == 'itp':
dissip_string = 'itp_c + h2o_c --> idp_c + pi_c + h_c'
elif mid == 'nadh':
dissip_string = 'nadh_c --> nad_c + h_c'
elif mid == 'nadph':
dissip_string = 'nadph_c --> nadp_c + h_c'
elif mid == 'fadh2':
dissip_string = 'fadh2_c --> fad_c + 2.0 h_c'
elif mid == 'accoa':
dissip_string = 'accoa_c + h2o_c --> ac_c + coa_c + h_c'
elif mid == 'glu__L':
if 'nh4_c' in modeled_mids :
dissip_string = 'glu__L_c + h2o_c --> akg_c + nh4_c + 2.0 h_c'
elif 'nh3_c' in modeled_mids :
dissip_string = 'glu__L_c + h2o_c --> akg_c + nh3_c + 3.0 h_c'
else:
raise Exception("'nh4_c' or 'nh3_c' must be present in the model.")
elif mid == 'q8h2':
dissip_string = 'q8h2_c --> q8_c + 2.0 h_c'
else:
raise Exception("Metabolite ID (mid) not recognized.")
dissip.build_reaction_from_string(dissip_string)
# set the objective and optimize:
model.objective = f'__dissip__{mid}'
res, obj_value, status = fba_no_warnings(model)
# apply the threshold:
obj_value = res.objective_value
status = res.status
if abs(obj_value) < threshold:
obj_value = 0
# log some messages
if verbose:
print(dissip.reaction)
print(obj_value , ':', res.status )
# log some messages
if obj_value > 0 and status == 'optimal' and verbose:
# get suspect !=0 fluxes
fluxes = res.fluxes
print() # skip a line befor printing the EGC members
# get interesting fluxes (0.001 tries to take into account the approximation in glpk and cplex solvers)
fluxes_interesting = fluxes[((fluxes > 0.001) | (fluxes < -0.001)) & (fluxes.index != f'__dissip__{mid}')]
print(fluxes_interesting.to_string())
# create a model for escher
if escher:
model_copy = model.copy()
all_rids = [r.id for r in model_copy.reactions]
to_delete = set(all_rids) - set(fluxes_interesting.index)
model_copy.remove_reactions(to_delete)
cobra.io.save_json_model(model_copy, f'__dissip__{mid}' + '.json')
print(f'__dissip__{mid}' + '.json', "saved in current directory.")
if obj_value > 0 and status == 'optimal':
return True
else: return False
[docs]
def verify_egc_all(model):
"""Quickly check the presence of EGCs for the main metabolites.
Internally calls `verify_egc()` over a list of metabolite IDs.
Args:
model (cobra.Model): target model.
"""
mids_to_check = ['atp','ctp','gtp','utp','itp','nadh','nadph','fadh2','accoa','glu__L','q8h2']
all_results = []
for mid in mids_to_check:
result = verify_egc(model, mid, verbose=False)
all_results.append(~result)
if all(all_results):
print("No energy-generating cycles (EGCs) found.")
[docs]
def check_sinks(model, verbose=True):
"""Check presence of sink reactions.
Args:
model (cobra.Model): target model.
verbose (bool): if ``False``, don't print.
Returns:
list: IDs of sink reactions found.
"""
found_rids = []
cnt = 0
for r in model.reactions:
if len(list(r.metabolites))==1 and list(r.metabolites)[0].compartment!='e':
if 0 in r.bounds == False:
cnt += 1
if verbose: print(cnt, ':', r.id, ':', r.reaction, ':', r.bounds)
found_rids.append(r.id)
if found_rids == []:
if verbose: print("No sink reactions found.")
return found_rids
[docs]
def check_demands(model, verbose=True):
"""Check presence of demand reactions.
Args:
model (cobra.Model): target model.
verbose (bool): if ``False``, don't print.
Returns:
list: IDs of demand reactions found.
"""
found_rids = []
cnt = 0
for r in model.reactions:
if len(list(r.metabolites))==1 and list(r.metabolites)[0].compartment!='e':
if 0 in r.bounds:
cnt += 1
if verbose: print(cnt, ':', r.id, ':', r.reaction, ':', r.bounds)
found_rids.append(r.id)
if found_rids == []:
if verbose: print("No demand reactions found.")
return found_rids
[docs]
def check_sinks_demands(model, verbose=True):
"""Check presence of sink and demand reactions.
Args:
model (cobra.Model): target model.
verbose (bool): if ``False``, don't print.
Returns:
list: IDs of sink/demand reactions found.
"""
found_rids = []
cnt = 0
for r in model.reactions:
if len(list(r.metabolites))==1 and list(r.metabolites)[0].compartment!='e':
cnt += 1
if verbose: print(cnt, ':', r.id, ':', r.reaction, ':', r.bounds)
found_rids.append(r.id)
if found_rids == []:
if verbose: print("No sink/demand reactions found.")
return found_rids
[docs]
def check_exr_notation(model, verbose=True):
"""Check that every EX_change reaction ID begins with ``EX_``.
Here EX_change reactions are defined as those reactions having just 1 metabolite involved, included in the extracellular compartment.
Args:
model (cobra.Model): target model.
verbose (bool): if ``False``, don't print.
Returns:
list: IDs of EX_change reactions with bad ID.
"""
found_rids = []
cnt = 0
for r in model.reactions:
if len(r.metabolites) == 1:
if list(r.metabolites)[0].id.rsplit('_', 1)[1] == 'e': # extracellular compartment
if r.id.startswith("EX_") == False:
cnt += 1
if verbose: print(cnt, ':', r.id, ':', r.reaction)
found_rids.append(r.id)
if found_rids == []:
if verbose: print("No EX_change reaction with bad ID found.")
return found_rids
[docs]
def remove_EX_annots(model):
"""Remove all annotations from EX_change reactions.
Args:
model (cobra.Model): target model.
"""
cnt = 0
for r in model.reactions:
if len(r.metabolites) == 1 and r.id.startswith("EX_"):
if r.annotation != {}:
r.annotation = {}
[docs]
def check_missing_charges(model, verbose=True):
"""Check if all metabolites have a charge attribute.
Args:
model (cobra.Model): target model.
verbose (bool): if ``False``, don't print.
Returns:
list: IDs of matabolites missing the charge attribute.
"""
found_mids = []
cnt = 0
for m in model.metabolites:
cnt += 1
charge = m.charge
if charge==None or type(charge)!=int:
if verbose: print(cnt, ':', m.id)
found_mids.append(m.id)
if found_mids == []:
if verbose: print("No metabolite with missing charge attribute found.")
return found_mids
[docs]
def check_artificial_atoms(model, preview=None, verbose=True):
"""Check if artificial atoms like 'R' and 'X' are present.
Args:
model (cobra.Model): target model.
preview (int): maximum number of metabolite IDs to show for each artificial atom. If ``None``, they will be all displayed.
verbose (bool): if ``False``, don't print.
Returns:
list: IDs of matabolites with artificial atoms.
"""
found_mids = []
atom_to_mids = {}
for m in model.metabolites:
formula = m.formula
if formula == None or formula=='':
continue # there are dedicated functions for this
# Matches any uppercase letter (A-Z) followed by zero or more lowercase letters (a-z)
atoms = set(re.findall(r'[A-Z][a-z]*', formula))
base = set(['C', 'H', 'O', 'N', 'P', 'S'])
metals = set(['Ag', 'Fe', 'Co', 'As', 'Ca', 'Cd', 'Cl', 'Cu', 'Hg', 'K', 'Mg', 'Mo', 'Na', 'Ni', 'Se', 'Zn', 'Mn'])
safe_atoms = base.union(metals)
strange = atoms - safe_atoms
# popupale te 'found_mids' list
if len(strange) > 0:
found_mids.append(m.id)
# iterate each 'strange' atom to create a dict:
for atom in strange:
if atom not in atom_to_mids.keys():
atom_to_mids[atom] = []
atom_to_mids[atom].append(f'{m.id}[{m.formula}]')
# show the create 'atom_to_mids' dict:
for i, atom in enumerate(atom_to_mids.keys()):
mids_to_show = ', '.join(atom_to_mids[atom])
if preview != None:
if len(atom_to_mids[atom]) > preview:
mids_to_show = f"{', '.join(atom_to_mids[atom][:preview])}, ... ({len(atom_to_mids[atom])} in total)"
if verbose: print(i+1, ':', atom, ':', mids_to_show)
if found_mids == []:
if verbose: print("No metabolite with artificial atoms found.")
return found_mids
[docs]
def get_unconstrained_bounds(model):
"""Get the minimum and maximum bounds used in the model.
Usually they correspond to the "unconstrained" negative and positive flux constants.
Args:
model (cobra.Model): target model.
Returns:
tuple: min and max bounds.
"""
un_lb, un_ub = 0, 0
for r in model.reactions:
if r.lower_bound < un_lb: un_lb = r.lower_bound
if r.upper_bound > un_ub: un_ub = r.upper_bound
return (un_lb, un_ub)
[docs]
def reset_unconstrained_bounds(model):
"""Set the uncontrained flux constants to 1000.
Args:
model (cobra.Model): target model.
"""
un_lb, un_ub = get_unconstrained_bounds(model)
for r in model.reactions:
if r.lower_bound == un_lb: r.lower_bound = -1000
if r.upper_bound == un_ub: r.upper_bound = 1000
[docs]
def check_mass_unbalances(model, threshold=1e-5, verbose=True):
"""Check the presence of mass-unbalaned reactions in the model.
EExchange, sink, demand, and biomass reactions are ignored.
Args:
model (cobra.Model): target model.
threshold (float): values below this treshold are considered as 0.
verbose (bool): if ``False``, don't print.
Returns:
list: IDs of reactions with mass unbalanced.
"""
found_rids = []
cnt = 0
biom_ids = search_biomass(model, verbose=False)
for r in model.reactions:
# exchage, sink, demand, and biomass reactions are excluded.
if len(r.metabolites)==1 or r.id in biom_ids:
continue
# get suggestions
suggestions = r.check_mass_balance()
# apply threshold:
suggestions = {atom: suggestions[atom] for atom in suggestions.keys() if suggestions[atom] >= threshold}
# exclude charge is present:
suggestions = {atom: suggestions[atom] for atom in list(set(list(suggestions.keys())) - set(['charge']))}
# log the suggestion:
if len(suggestions.keys()) > 0:
cnt += 1
if verbose: print(cnt, ':', r.id, ':', r.reaction, ':', suggestions)
found_rids.append(r.id)
if found_rids == []:
if verbose: print("No mass-unbalanced reactions found.")
return found_rids
[docs]
def check_charge_unbalances(model, threshold=1e-5, verbose=True):
"""Check the presence of charge-unbalaned reactions in the model.
Exchange, sink, demand, and biomass reactions are ignored.
Args:
model (cobra.Model): target model.
threshold (float): values below this treshold are considered as 0.
verbose (bool): if ``False``, don't print.
Returns:
list: IDs of reactions with charge unbalanced.
"""
found_rids = []
cnt = 0
biom_ids = search_biomass(model, verbose=False)
for r in model.reactions:
# exchage, sink, demand, and biomass reactions are excluded.
if len(r.metabolites)==1 or r.id in biom_ids:
continue
# get suggestions
suggestions = r.check_mass_balance()
# retain only charge if present:
suggestions = {atom: suggestions[atom] for atom in suggestions.keys() if atom == 'charge'}
# apply threshold:
suggestions = {atom: suggestions[atom] for atom in suggestions.keys() if suggestions[atom] >= threshold}
# log the suggestion:
if len(suggestions.keys()) > 0:
cnt += 1
if verbose: print(cnt, ':', r.id, ':', r.reaction, ':', suggestions)
found_rids.append(r.id)
if found_rids == []:
if verbose: print("No charge-unbalanced reactions found.")
return found_rids
[docs]
def search_biomass(model, show_reaction=False, verbose=True):
"""Search for biomass reactions.
Simple function involving just an exact match of substrings: 'biomass', 'growth', 'bof'.
Args:
model (cobra.Model): target model.
show_reaction (bool): whether to show also the reaction string
"""
# define key objects:
substrings = ['biomass', 'growth', 'bof']
found_rids = []
cnt = 0
def print_reaction(cnt, show_reaction, verbose):
if verbose:
if show_reaction:
print(cnt, ':', r.id, ':', r.name, ':', r.reaction, ':', r.bounds)
else:
print(cnt, ':', r.id, ':', r.name, ':', r.bounds)
# first try with reaction IDs:
for r in model.reactions:
for substring in substrings:
if substring.lower() in r.id.lower():
cnt += 1
print_reaction(cnt, show_reaction, verbose)
found_rids.append(r.id)
if found_rids != []:
return found_rids
# then try with reaction names:
for r in model.reactions:
for substring in substrings:
if substring.lower() in r.name.lower():
cnt += 1
print_reaction(cnt, show_reaction, verbose)
found_rids.append(r.id)
if found_rids != []:
return found_rids
# no biomass reactions found:
if found_rids == []:
if verbose: print("No biomass reactions found.")
return found_rids
[docs]
def sanity_report(model):
"""Print a small sanity report.
Get a sanity report calling some of the gempipe.curate functions.
Args:
model (cobra.Model): target model.
"""
version = importlib.metadata.metadata("gempipe")["Version"]
print(f"gempipe v{version} - sanity_report")
print("model ID:", model.id)
G = len(model.genes)
R = len(model.reactions)
M = len(model.metabolites)
uM = len(set([m.id.rsplit('_', 1)[0] for m in model.metabolites]))
groups = len(model.groups)
print('G:', G, 'R:', R, 'M:', M, 'uM:', uM, 'groups:', groups)
comps = [c for c in model.compartments]
print("Compartments:", sorted(comps))
biom_ids = search_biomass(model, verbose=False)
print("Biomass assemblies:", len(biom_ids), biom_ids)
obj_ids = get_objectives(model)
print("Objectives:", len(obj_ids), obj_ids)
res = model.optimize()
print("Optimization:", res.objective_value, res.status, f'({get_solver(model)})')
print("Unconstrained LB-UB:", get_unconstrained_bounds(model))
print("Bad EX_change notation:", len(check_exr_notation(model, verbose=False)))
print("Sinks/demands:", len(check_sinks_demands(model, verbose=False)))
print("Constrained metabolic:", len(check_constrained_metabolic(model, verbose=False)))
print("With 'artificial' atoms:", len(check_artificial_atoms(model, verbose=False)))
print(
"Missing formulas - charges:",
len(check_missing_formulas(model, verbose=False)), '-',
len(check_missing_charges(model, verbose=False)),
)
print(
"Mass - charge unbalances:",
len(check_mass_unbalances(model, verbose=False)), '-',
len(check_charge_unbalances(model, verbose=False)),
)