Module scenographer.relations

RelationDAG

Expand source code
"""
RelationDAG
"""

from functools import lru_cache
from typing import Iterable, List, Mapping, NamedTuple, Tuple, Type

import matplotlib.pyplot as plot
import networkx
from loguru import logger
from networkx import DiGraph
from sqlalchemy import ForeignKey, MetaData
from sqlalchemy.schema import Column, Table
from sqlalchemy_utils import get_referencing_foreign_keys

from scenographer.database import Database, UUIDField


class Relation(NamedTuple):
    "Container for the associated pair of primary key and foreign key"
    pk: Column
    fk: Column

    @property
    def edge(self) -> Tuple[Table, Table, Mapping[str, Column]]:
        "Returns a tuple that is accepted by networkx lib as a graph edge"
        return (self.pk.table, self.fk.table, {"relation": self})

    def __repr__(self) -> str:
        "REPL representation of a relation"
        relation_kwargs = [
            f"pk=database_tables.{self.pk.table.name}.c.{self.pk.name}",
            f"fk=database_tables.{self.fk.table.name}.c.{self.fk.name}",
        ]

        return f"Relation({','.join(relation_kwargs)})"

    @classmethod
    def from_foreign_key(cls: Type["Relation"], foreign_key: ForeignKey) -> "Relation":
        "Constructor for a relation given a ForeignKey object"
        return cls(
            pk=foreign_key.column,  # referenced column
            fk=foreign_key.parent,  # referer column
        )

    @classmethod
    def from_tables(cls: Type["Relation"], tables: List[Table]) -> List["Relation"]:
        """
        Constructor for a list of relations given a table
        Creates one relation for each foreign_key constraint associated with the table
        """
        return [
            cls.from_foreign_key(fk)
            for table in tables
            for fk in get_referencing_foreign_keys(table)
        ]


class RelationDAG(NamedTuple):
    "Wrapper for operations around the graph of relations"
    graph: DiGraph

    @property
    @lru_cache()
    def tables(self) -> List[Table]:
        "Lists all tables taken into consideration for sampling"
        return self.graph.nodes

    @property
    @lru_cache()
    def entrypoints(self) -> List[Table]:
        "Lists tables which have no foreign keys"
        return [n for (n, d) in self.graph.in_degree(self.graph.nodes) if d == 0]

    @property
    @lru_cache()
    def topologically_sorted(self) -> Iterable[Table]:
        """
        Returns all tables ordered in a way that
        if table X has a foreign key to Y, Y will always come first
        """
        return networkx.topological_sort(self.graph)

    def write_plot(self, filepath: str = "graph.png") -> None:
        "Meh graph image representation"
        logger.debug("Writing image file with graph")
        plot.title("RelationDAG")
        pos = networkx.drawing.nx_agraph.graphviz_layout(self.graph, prog="dot")
        plot.figure(figsize=(40, 40))

        networkx.draw_networkx_nodes(self.graph, pos, node_size=5000, alpha=0.9)
        networkx.draw_networkx_edges(
            self.graph,
            pos,
            arrows=True,
            node_size=5000,
        )
        networkx.draw_networkx_labels(self.graph, pos)

        plot.axis("off")
        plot.savefig(filepath)

    def write_dot(self, filepath: str = "graph.dot") -> None:
        "Meh graph image representation. File must be rendered with `dot`"
        logger.debug("Writing dot file with graph")
        networkx.drawing.nx_agraph.write_dot(self.graph, filepath)

    @property
    @lru_cache()
    def key_schema(self) -> MetaData:
        """
        Create schema with only the primary keys and foreign keys of each table.
        Ensures that the resulting schema is compatible with sqlite3
        """
        metadata = MetaData()

        for table in self.graph.nodes:

            # Here we use edge data for the first (only ?) time.
            # Perhaps we can shape the data better to avoid doing work here.

            # Select the relations whose foreign keys are present in this table
            relations = [
                edge[-1]["relation"]
                for edge in self.graph.reverse(copy=False).edges(
                    nbunch=table, data=True
                )
            ]

            # We get PK from table data instead of relation data,
            # because a primary key doesn't necessarily form a relation.
            # Assumes PK either is a single column or it doesn't exist.
            primary_key = [
                Column(c.name, c.type, primary_key=True)
                for c in table.primary_key.columns
            ][:1]

            columns = [
                *primary_key,
                *[
                    Column(r.fk.name, r.fk.type, ForeignKey(str(r.pk)))
                    for r in relations
                ],
            ]

            # Map postgres UUIDs into sqlite compatible UUIDs
            for column in columns:
                if str(column.type) not in ["UUID", "BIGINT", "INTEGER"]:
                    logger.warning(
                        "Key column {} has weird type {}",
                        f"{table.name}.{column.name}",
                        str(column.type),
                    )
                if str(column.type) == "UUID":
                    column.type = UUIDField()

            Table(table.name, metadata, *columns)

        return metadata

    @classmethod
    def from_graph(cls: Type["RelationDAG"], graph: DiGraph) -> "RelationDAG":
        """
        Instanciates a RelationDAG from a networkx DiGraph.
        It makes the graph is immutable and raises if the graph is not a DAG
        """
        new = cls(networkx.freeze(graph))
        if networkx.is_directed_acyclic_graph(new.graph):
            logger.debug(
                "DAG contains {} nodes and {} edges",
                len(graph.nodes),
                len(graph.edges),
            )
        else:
            logger.error("Generated graph is not a DAG.")
            raise ValueError

        return new

    @classmethod
    def load(
        cls: Type["RelationDAG"],
        database: Database,
        extend_relations: List[Relation],
        ignore_relations: List[Relation],
        ignore_tables: List[Table],
    ) -> "RelationDAG":
        """
        Create a RelationDAG
        The data loaded from this method is sourced
        from the database and from the user config
        """
        graph = DiGraph(name="RelationDAG")

        # Get actual table instances
        tables = database.tables.__dict__.values()

        # Create relations from table data and add the ones specified in settings
        relations = Relation.from_tables(tables) + extend_relations

        # Create graph
        graph.add_nodes_from(tables)
        graph.add_edges_from([r.edge for r in relations])

        # Remove excluded entities (tables and relations) from the created graph
        graph.remove_edges_from([r.edge for r in ignore_relations])
        graph.remove_nodes_from(ignore_tables)

        # Create RelationDAG instance
        return cls.from_graph(graph)

    def __str__(self) -> str:
        "Return some useful information about the graph"
        return networkx.info(self.graph)

    def __repr__(self) -> str:
        "For REPL use. Should work with only RelationDAG in scope."
        return "RelationDAG"

Classes

class Relation (pk: sqlalchemy.sql.schema.Column, fk: sqlalchemy.sql.schema.Column)

Container for the associated pair of primary key and foreign key

Expand source code
class Relation(NamedTuple):
    "Container for the associated pair of primary key and foreign key"
    pk: Column
    fk: Column

    @property
    def edge(self) -> Tuple[Table, Table, Mapping[str, Column]]:
        "Returns a tuple that is accepted by networkx lib as a graph edge"
        return (self.pk.table, self.fk.table, {"relation": self})

    def __repr__(self) -> str:
        "REPL representation of a relation"
        relation_kwargs = [
            f"pk=database_tables.{self.pk.table.name}.c.{self.pk.name}",
            f"fk=database_tables.{self.fk.table.name}.c.{self.fk.name}",
        ]

        return f"Relation({','.join(relation_kwargs)})"

    @classmethod
    def from_foreign_key(cls: Type["Relation"], foreign_key: ForeignKey) -> "Relation":
        "Constructor for a relation given a ForeignKey object"
        return cls(
            pk=foreign_key.column,  # referenced column
            fk=foreign_key.parent,  # referer column
        )

    @classmethod
    def from_tables(cls: Type["Relation"], tables: List[Table]) -> List["Relation"]:
        """
        Constructor for a list of relations given a table
        Creates one relation for each foreign_key constraint associated with the table
        """
        return [
            cls.from_foreign_key(fk)
            for table in tables
            for fk in get_referencing_foreign_keys(table)
        ]

Ancestors

  • builtins.tuple

Static methods

def from_foreign_key(foreign_key: sqlalchemy.sql.schema.ForeignKey) ‑> Relation

Constructor for a relation given a ForeignKey object

Expand source code
@classmethod
def from_foreign_key(cls: Type["Relation"], foreign_key: ForeignKey) -> "Relation":
    "Constructor for a relation given a ForeignKey object"
    return cls(
        pk=foreign_key.column,  # referenced column
        fk=foreign_key.parent,  # referer column
    )
def from_tables(tables: List[sqlalchemy.sql.schema.Table]) ‑> List[Relation]

Constructor for a list of relations given a table Creates one relation for each foreign_key constraint associated with the table

Expand source code
@classmethod
def from_tables(cls: Type["Relation"], tables: List[Table]) -> List["Relation"]:
    """
    Constructor for a list of relations given a table
    Creates one relation for each foreign_key constraint associated with the table
    """
    return [
        cls.from_foreign_key(fk)
        for table in tables
        for fk in get_referencing_foreign_keys(table)
    ]

Instance variables

var edge : Tuple[sqlalchemy.sql.schema.Table, sqlalchemy.sql.schema.Table, Mapping[str, sqlalchemy.sql.schema.Column]]

Returns a tuple that is accepted by networkx lib as a graph edge

Expand source code
@property
def edge(self) -> Tuple[Table, Table, Mapping[str, Column]]:
    "Returns a tuple that is accepted by networkx lib as a graph edge"
    return (self.pk.table, self.fk.table, {"relation": self})
var fk : sqlalchemy.sql.schema.Column

Alias for field number 1

var pk : sqlalchemy.sql.schema.Column

Alias for field number 0

class RelationDAG (graph: networkx.classes.digraph.DiGraph)

Wrapper for operations around the graph of relations

Expand source code
class RelationDAG(NamedTuple):
    "Wrapper for operations around the graph of relations"
    graph: DiGraph

    @property
    @lru_cache()
    def tables(self) -> List[Table]:
        "Lists all tables taken into consideration for sampling"
        return self.graph.nodes

    @property
    @lru_cache()
    def entrypoints(self) -> List[Table]:
        "Lists tables which have no foreign keys"
        return [n for (n, d) in self.graph.in_degree(self.graph.nodes) if d == 0]

    @property
    @lru_cache()
    def topologically_sorted(self) -> Iterable[Table]:
        """
        Returns all tables ordered in a way that
        if table X has a foreign key to Y, Y will always come first
        """
        return networkx.topological_sort(self.graph)

    def write_plot(self, filepath: str = "graph.png") -> None:
        "Meh graph image representation"
        logger.debug("Writing image file with graph")
        plot.title("RelationDAG")
        pos = networkx.drawing.nx_agraph.graphviz_layout(self.graph, prog="dot")
        plot.figure(figsize=(40, 40))

        networkx.draw_networkx_nodes(self.graph, pos, node_size=5000, alpha=0.9)
        networkx.draw_networkx_edges(
            self.graph,
            pos,
            arrows=True,
            node_size=5000,
        )
        networkx.draw_networkx_labels(self.graph, pos)

        plot.axis("off")
        plot.savefig(filepath)

    def write_dot(self, filepath: str = "graph.dot") -> None:
        "Meh graph image representation. File must be rendered with `dot`"
        logger.debug("Writing dot file with graph")
        networkx.drawing.nx_agraph.write_dot(self.graph, filepath)

    @property
    @lru_cache()
    def key_schema(self) -> MetaData:
        """
        Create schema with only the primary keys and foreign keys of each table.
        Ensures that the resulting schema is compatible with sqlite3
        """
        metadata = MetaData()

        for table in self.graph.nodes:

            # Here we use edge data for the first (only ?) time.
            # Perhaps we can shape the data better to avoid doing work here.

            # Select the relations whose foreign keys are present in this table
            relations = [
                edge[-1]["relation"]
                for edge in self.graph.reverse(copy=False).edges(
                    nbunch=table, data=True
                )
            ]

            # We get PK from table data instead of relation data,
            # because a primary key doesn't necessarily form a relation.
            # Assumes PK either is a single column or it doesn't exist.
            primary_key = [
                Column(c.name, c.type, primary_key=True)
                for c in table.primary_key.columns
            ][:1]

            columns = [
                *primary_key,
                *[
                    Column(r.fk.name, r.fk.type, ForeignKey(str(r.pk)))
                    for r in relations
                ],
            ]

            # Map postgres UUIDs into sqlite compatible UUIDs
            for column in columns:
                if str(column.type) not in ["UUID", "BIGINT", "INTEGER"]:
                    logger.warning(
                        "Key column {} has weird type {}",
                        f"{table.name}.{column.name}",
                        str(column.type),
                    )
                if str(column.type) == "UUID":
                    column.type = UUIDField()

            Table(table.name, metadata, *columns)

        return metadata

    @classmethod
    def from_graph(cls: Type["RelationDAG"], graph: DiGraph) -> "RelationDAG":
        """
        Instanciates a RelationDAG from a networkx DiGraph.
        It makes the graph is immutable and raises if the graph is not a DAG
        """
        new = cls(networkx.freeze(graph))
        if networkx.is_directed_acyclic_graph(new.graph):
            logger.debug(
                "DAG contains {} nodes and {} edges",
                len(graph.nodes),
                len(graph.edges),
            )
        else:
            logger.error("Generated graph is not a DAG.")
            raise ValueError

        return new

    @classmethod
    def load(
        cls: Type["RelationDAG"],
        database: Database,
        extend_relations: List[Relation],
        ignore_relations: List[Relation],
        ignore_tables: List[Table],
    ) -> "RelationDAG":
        """
        Create a RelationDAG
        The data loaded from this method is sourced
        from the database and from the user config
        """
        graph = DiGraph(name="RelationDAG")

        # Get actual table instances
        tables = database.tables.__dict__.values()

        # Create relations from table data and add the ones specified in settings
        relations = Relation.from_tables(tables) + extend_relations

        # Create graph
        graph.add_nodes_from(tables)
        graph.add_edges_from([r.edge for r in relations])

        # Remove excluded entities (tables and relations) from the created graph
        graph.remove_edges_from([r.edge for r in ignore_relations])
        graph.remove_nodes_from(ignore_tables)

        # Create RelationDAG instance
        return cls.from_graph(graph)

    def __str__(self) -> str:
        "Return some useful information about the graph"
        return networkx.info(self.graph)

    def __repr__(self) -> str:
        "For REPL use. Should work with only RelationDAG in scope."
        return "RelationDAG"

Ancestors

  • builtins.tuple

Static methods

def from_graph(graph: networkx.classes.digraph.DiGraph) ‑> RelationDAG

Instanciates a RelationDAG from a networkx DiGraph. It makes the graph is immutable and raises if the graph is not a DAG

Expand source code
@classmethod
def from_graph(cls: Type["RelationDAG"], graph: DiGraph) -> "RelationDAG":
    """
    Instanciates a RelationDAG from a networkx DiGraph.
    It makes the graph is immutable and raises if the graph is not a DAG
    """
    new = cls(networkx.freeze(graph))
    if networkx.is_directed_acyclic_graph(new.graph):
        logger.debug(
            "DAG contains {} nodes and {} edges",
            len(graph.nodes),
            len(graph.edges),
        )
    else:
        logger.error("Generated graph is not a DAG.")
        raise ValueError

    return new
def load(database: Database, extend_relations: List[Relation], ignore_relations: List[Relation], ignore_tables: List[sqlalchemy.sql.schema.Table]) ‑> RelationDAG

Create a RelationDAG The data loaded from this method is sourced from the database and from the user config

Expand source code
@classmethod
def load(
    cls: Type["RelationDAG"],
    database: Database,
    extend_relations: List[Relation],
    ignore_relations: List[Relation],
    ignore_tables: List[Table],
) -> "RelationDAG":
    """
    Create a RelationDAG
    The data loaded from this method is sourced
    from the database and from the user config
    """
    graph = DiGraph(name="RelationDAG")

    # Get actual table instances
    tables = database.tables.__dict__.values()

    # Create relations from table data and add the ones specified in settings
    relations = Relation.from_tables(tables) + extend_relations

    # Create graph
    graph.add_nodes_from(tables)
    graph.add_edges_from([r.edge for r in relations])

    # Remove excluded entities (tables and relations) from the created graph
    graph.remove_edges_from([r.edge for r in ignore_relations])
    graph.remove_nodes_from(ignore_tables)

    # Create RelationDAG instance
    return cls.from_graph(graph)

Instance variables

var entrypoints : List[sqlalchemy.sql.schema.Table]

Lists tables which have no foreign keys

Expand source code
@property
@lru_cache()
def entrypoints(self) -> List[Table]:
    "Lists tables which have no foreign keys"
    return [n for (n, d) in self.graph.in_degree(self.graph.nodes) if d == 0]
var graph : networkx.classes.digraph.DiGraph

Alias for field number 0

var key_schema : sqlalchemy.sql.schema.MetaData

Create schema with only the primary keys and foreign keys of each table. Ensures that the resulting schema is compatible with sqlite3

Expand source code
@property
@lru_cache()
def key_schema(self) -> MetaData:
    """
    Create schema with only the primary keys and foreign keys of each table.
    Ensures that the resulting schema is compatible with sqlite3
    """
    metadata = MetaData()

    for table in self.graph.nodes:

        # Here we use edge data for the first (only ?) time.
        # Perhaps we can shape the data better to avoid doing work here.

        # Select the relations whose foreign keys are present in this table
        relations = [
            edge[-1]["relation"]
            for edge in self.graph.reverse(copy=False).edges(
                nbunch=table, data=True
            )
        ]

        # We get PK from table data instead of relation data,
        # because a primary key doesn't necessarily form a relation.
        # Assumes PK either is a single column or it doesn't exist.
        primary_key = [
            Column(c.name, c.type, primary_key=True)
            for c in table.primary_key.columns
        ][:1]

        columns = [
            *primary_key,
            *[
                Column(r.fk.name, r.fk.type, ForeignKey(str(r.pk)))
                for r in relations
            ],
        ]

        # Map postgres UUIDs into sqlite compatible UUIDs
        for column in columns:
            if str(column.type) not in ["UUID", "BIGINT", "INTEGER"]:
                logger.warning(
                    "Key column {} has weird type {}",
                    f"{table.name}.{column.name}",
                    str(column.type),
                )
            if str(column.type) == "UUID":
                column.type = UUIDField()

        Table(table.name, metadata, *columns)

    return metadata
var tables : List[sqlalchemy.sql.schema.Table]

Lists all tables taken into consideration for sampling

Expand source code
@property
@lru_cache()
def tables(self) -> List[Table]:
    "Lists all tables taken into consideration for sampling"
    return self.graph.nodes
var topologically_sorted : Iterable[sqlalchemy.sql.schema.Table]

Returns all tables ordered in a way that if table X has a foreign key to Y, Y will always come first

Expand source code
@property
@lru_cache()
def topologically_sorted(self) -> Iterable[Table]:
    """
    Returns all tables ordered in a way that
    if table X has a foreign key to Y, Y will always come first
    """
    return networkx.topological_sort(self.graph)

Methods

def write_dot(self, filepath: str = 'graph.dot') ‑> NoneType

Meh graph image representation. File must be rendered with dot

Expand source code
def write_dot(self, filepath: str = "graph.dot") -> None:
    "Meh graph image representation. File must be rendered with `dot`"
    logger.debug("Writing dot file with graph")
    networkx.drawing.nx_agraph.write_dot(self.graph, filepath)
def write_plot(self, filepath: str = 'graph.png') ‑> NoneType

Meh graph image representation

Expand source code
def write_plot(self, filepath: str = "graph.png") -> None:
    "Meh graph image representation"
    logger.debug("Writing image file with graph")
    plot.title("RelationDAG")
    pos = networkx.drawing.nx_agraph.graphviz_layout(self.graph, prog="dot")
    plot.figure(figsize=(40, 40))

    networkx.draw_networkx_nodes(self.graph, pos, node_size=5000, alpha=0.9)
    networkx.draw_networkx_edges(
        self.graph,
        pos,
        arrows=True,
        node_size=5000,
    )
    networkx.draw_networkx_labels(self.graph, pos)

    plot.axis("off")
    plot.savefig(filepath)