Source code for pathme.wikipathways.convert_to_bel

# -*- coding: utf-8 -*-

"""This module contains the methods to convert a WikiPathways RDF network into a BELGraph."""

import logging
from typing import Any, Dict, Iterable, Mapping, Optional, Tuple

import pybel
from bio2bel_hgnc import Manager
from pybel import BELGraph
from pybel.dsl import BaseEntity, abundance, activity, bioprocess, complex_abundance, gene, protein, rna

from .utils import check_multiple, evaluate_wikipathways_metadata, get_valid_gene_identifier
from ..constants import ACTIVITY_ALLOWED_MODIFIERS, HGNC
from ..utils import add_bel_metadata, parse_id_uri

__all__ = [
    'convert_to_bel',
]

log = logging.getLogger(__name__)


def _check_empty_complex(complex: Dict[str, Dict], nodes: Dict[str, BaseEntity]):
    members = {
        nodes[member_id]
        for member_id in complex['participants']
        if member_id in nodes
    }
    if not members:
        return False

    return True


[docs]def convert_to_bel( nodes: Dict[str, Dict], complexes: Dict[str, Dict], interactions: Dict[str, Dict], pathway_info, hgnc_manager: Manager, ) -> BELGraph: """Convert RDF graph info to BEL.""" graph = BELGraph( name=pathway_info['title'], version='1.0.0', description=evaluate_wikipathways_metadata(pathway_info['description']), authors="Sarah Mubeen, Daniel Domingo-Fernández & Josep Marín-Llaó", contact='daniel.domingo.fernandez@scai.fraunhofer.de', ) add_bel_metadata(graph) pathway_id = graph.graph['pathway_id'] = pathway_info['pathway_id'] nodes = { node_id: node_to_bel(node, hgnc_manager, pathway_id) for node_id, node in nodes.items() } nodes.update(complexes_to_bel(complexes, nodes, graph)) for interaction_key, interaction in interactions.items(): participants = interaction['participants'] add_edges(graph, participants, nodes, interactions, interaction) return graph
def node_to_bel(node: Dict, hgnc_manager: Manager, pathway_id) -> BaseEntity: """Create a BEL node.""" node_types = node['node_types'] uri_id = node['uri_id'] # Get identifier from if exists else use uri_id as identifier if 'identifier' in node: identifier = node['identifier'] else: identifier = uri_id identifier = check_multiple(identifier, 'identifier', pathway_id) uri_id = check_multiple(uri_id, 'uri_id', pathway_id) _, _, namespace, _ = parse_id_uri(uri_id) name = check_multiple(node['name'], 'name', pathway_id) # Get dictinoary of multiple identifiers if 'identifiers' in node: node_ids_dict = node['identifiers'] else: node_ids_dict = node if any(node_type in node_types for node_type in ('Protein', 'Rna', 'GeneProduct')): namespace, name, identifier = get_valid_gene_identifier(node_ids_dict, hgnc_manager, pathway_id) if 'Protein' in node_types: return protein(namespace=namespace.upper(), name=name, identifier=identifier) elif 'Rna' in node_types: return rna(namespace=namespace.upper(), name=name, identifier=identifier) else: # 'GeneProduct' in node_types return gene(namespace=HGNC, name=name, identifier=identifier) elif 'Metabolite' in node_types: # Parse URI to get namespace _, _, namespace, _ = parse_id_uri(uri_id) return abundance(namespace=namespace.upper(), name=name, identifier=identifier) elif '/wikipathways/WP' in str(uri_id) and {'DataNode'} == node_types: # Check the uri_id if is a Pathway _, _, namespace, _ = parse_id_uri(uri_id) return bioprocess(namespace=namespace.upper(), name=name, identifier=identifier) elif 'DataNode' in node_types: # Parse URI to get namespace _, _, namespace, _ = parse_id_uri(uri_id) return abundance(namespace=namespace.upper(), name=name, identifier=identifier) else: log.debug('Unknown %s [pathway=%s]', node_types, pathway_id) def complexes_to_bel( complexes: Dict[str, Dict], nodes: Dict[str, BaseEntity], graph: BELGraph, ) -> Dict[str, BaseEntity]: """Convert node to BEL.""" return { complex_id: complex_to_bel(complex_dict, nodes, graph) for complex_id, complex_dict in complexes.items() if _check_empty_complex(complex_dict, nodes) } def complex_to_bel(complex, nodes, graph: BELGraph): """Convert complex abundance to BEL.""" members = list({ nodes[member_id] for member_id in complex['participants'] if member_id in nodes }) _, _, _, identifier = parse_id_uri(complex['uri_id']) complex_bel_node = complex_abundance(members=members, identifier=identifier) graph.add_node_from_data(complex_bel_node) return complex_bel_node def get_reaction_node( participants: Iterable[Tuple[str, str]], nodes, interactions, ) -> pybel.dsl.Reaction: reactants = set() products = set() for source, target in participants: source = get_node(source, nodes, interactions) if source: reactants.add(source) else: logging.debug(f'Could not find source for reaction: {source}') target = get_node(target, nodes, interactions) if target: products.add(target) else: logging.debug(f'Could not find target for reaction: {target}') return pybel.dsl.Reaction(reactants=reactants, products=products) def get_node( node: str, nodes: Mapping[str, BaseEntity], interactions: Mapping[str, Any], ) -> Optional[BaseEntity]: if node in nodes: return nodes[node] if '/Interaction/' in str(node): _, _, _, identifier = parse_id_uri(node) if identifier in interactions: return get_reaction_node(interactions[identifier]['participants'], nodes, interactions) log.debug('No valid id for node %s', node) def add_edges(graph: BELGraph, participants, nodes, interactions: Dict, att: Dict): """Add edges to BELGraph.""" uri_id = att['uri_id'] edge_types = att['interaction_types'] _, _, namespace, interaction_id = parse_id_uri(uri_id) if 'Conversion' in edge_types: graph.add_node_from_data(get_reaction_node(participants, nodes, interactions)) else: for source, target in participants: u = get_node(source, nodes, interactions) v = get_node(target, nodes, interactions) if u and v: add_simple_edge(graph, u, v, edge_types, uri_id) if u is None: log.debug(f'Source is none: {source}') if v is None: log.debug(f'Target is none: {target}') def add_simple_edge(graph: BELGraph, u: BaseEntity, v: BaseEntity, edge_types, uri_id): """Add simple edge to graph. :param graph: BEL Graph :param u: source :param v: target :param edge_types: edge type dict :param uri_id: citation URI """ if 'Stimulation' in edge_types: graph.add_increases( u, v, citation=uri_id, evidence='Extracted from WikiPathways', object_modifier=activity() if isinstance(v, ACTIVITY_ALLOWED_MODIFIERS) else None, annotations={}, ) elif 'Inhibition' in edge_types: graph.add_decreases( u, v, citation=uri_id, evidence='Extracted from WikiPathways', object_modifier=activity() if isinstance(v, ACTIVITY_ALLOWED_MODIFIERS) else None, annotations={}, ) elif 'Catalysis' in edge_types: graph.add_increases( u, v, citation=uri_id, evidence='Extracted from WikiPathways', object_modifier=activity() if isinstance(v, ACTIVITY_ALLOWED_MODIFIERS) else None, annotations={}, ) elif 'DirectedInteraction' in edge_types: graph.add_regulates( u, v, citation=uri_id, evidence='Extracted from WikiPathways', annotations={ 'EdgeTypes': edge_types, }, ) elif 'Interaction' in edge_types: log.debug('No interaction subtype for %s', str(uri_id)) elif 'TranscriptionTranslation' in edge_types: graph.add_translation(u, v) else: log.debug('No handled edge type %s', str(uri_id))