Source code for multicorn

# -*- coding: utf-8 -*-
"""
Base multicorn module.

This module contains all the python code needed by the multicorn C extension
to postgresql.

You should install it in the python path available to the user running
postgresql (usually, the system wide python installation).

"""

import sys
from collections import namedtuple
try:
    from collections import OrderedDict
except ImportError:
    import collections
    from ordereddict import OrderedDict
    collections.OrderedDict = OrderedDict

__version__ = '__VERSION__'

ANY = object()
ALL = object()
UNBOUND = object()


SortKey = namedtuple("SortKey",
                              ["attname", "attnum", "is_reversed",
                               "nulls_first", "collate"])

"""
A SortKey describes the sort of one column an SQL query requested.

A query can request the sort of zero, one or multiple columns. Therefore, a list
of SortKey is provided to the ForeignDataWrapper, containing zero, one or more
SortKey.

Attributes:
    attname(str):       The name of the column to sort as defined in the postgresql
        table.
    attnum(int):        The position of the column to sort as defined in the
        postgresql table.
    is_reversed(bool):  True is the query requested a DESC order.
    nulls_first(bool):  If True, NULL values must appears at the beginning.
        Otherwise, they must appear at the end.
    collate(str):       The collation name to use to sort the data, as appearing
        in the postgresql cluster.
"""

[docs]class Qual(object): """A Qual describes a postgresql qualifier. A qualifier is here defined as an expression of the type:: col_name operator value For example:: mycolumn > 3 mycolumn = ANY(1,2,3) mycolumn ~~ ALL('A%','AB%', '%C') Attributes: field_name (str): The name of the column as defined in the postgresql table. operator (str or tuple): The name of the operator if a string. Example: =, <=, ~~ (for a like clause) If it is a tuple, then the tuple is of the form (operator name, ANY or ALL). The tuple represents a comparison of the form WHERE field = ANY(1, 2, 3), which is the internal representation of WHERE field IN (1, 2, 3) value (object): The constant value on the right side """ def __init__(self, field_name, operator, value): """Constructs a qual object. Instantiated from the C extension with the field name, operator and value extracted from the postgresql where clause. Accepts every field from the qual. """ self.field_name = field_name self.operator = operator self.value = value @property def is_list_operator(self): """ Returns: True if this qual represents an array expr, False otherwise """ return isinstance(self.operator, tuple) @property def list_any_or_all(self): """ Returns: ANY if and only if: - this qual is a list operator - the operator applies as an 'ANY' clause (eg, = ANY(1,2,3)) ALL if and only if: - this is a list operator - the operator applies as an 'ALL' clause (eg, > ALL(1, 2, 3)) None if this is not a list operator. """ if self.is_list_operator: return ANY if self.operator[1] else ALL return None def __repr__(self): if self.is_list_operator: value = '%s(%s)' % ( 'ANY' if self.list_any_or_all == ANY else 'ALL', self.value) operator = self.operator[0] else: value = self.value operator = self.operator return ("%s %s %s" % (self.field_name, operator, value)) def __eq__(self, other): if isinstance(other, Qual): return (self.field_name == other.field_name and self.operator == other.operator and self.value == other.value) return False def __hash__(self): return hash((self.field_name, self.operator, self.value))
[docs]class ForeignDataWrapper(object): """Base class for all foreign data wrapper instances. Though not required, ForeignDataWrapper implementation should inherit from this class. """ _startup_cost = 20
[docs] def __init__(self, fdw_options, fdw_columns): """The foreign data wrapper is initialized on the first query. Args: fdw_options (dict): The foreign data wrapper options. It is a dictionary mapping keys from the sql "CREATE FOREIGN TABLE" statement options. It is left to the implementor to decide what should be put in those options, and what to do with them. fdw_columns (dict): The foreign datawrapper columns. It is a dictionary mapping the column names to their ColumnDefinition. """ pass
[docs] def get_rel_size(self, quals, columns): """ Method called from the planner to estimate the resulting relation size for a scan. It will help the planner in deciding between different types of plans, according to their costs. Args: quals (list): A list of Qual instances describing the filters applied to this scan. columns (list): The list of columns that must be returned. Returns: A tuple of the form (expected_number_of_rows, avg_row_width (in bytes)) """ return (100000000, len(columns) * 100)
[docs] def can_sort(self, sortkeys): """ Method called from the planner to ask the FDW what are the sorts it can enforced, to avoid PostgreSQL to sort the data after retreiving all the rows. These sorts can come from explicit ORDER BY clauses, but also GROUP BY and DISTINCT clauses. The FDW has to inspect every sort, and respond which one are handled. The sorts are cumulatives. For example:: col1 ASC col2 DESC means that the FDW must render the tuples sorted by col1 ascending and col2 descending. Args: sortkeys (list): A list of :class:`SortKey` representing all the sorts the query must enforce. Return: The list of cumulative SortKey, for which the FDW can enforce the sort. """ return []
[docs] def get_path_keys(self): u""" Method called from the planner to add additional Path to the planner. By default, the planner generates an (unparameterized) path, which can be reasoned about like a SequentialScan, optionally filtered. This method allows the implementor to declare other Paths, corresponding to faster access methods for specific attributes. Such a parameterized path can be reasoned about like an IndexScan. For example, with the following query:: select * from foreign_table inner join local_table using(id); where foreign_table is a foreign table containing 100000 rows, and local_table is a regular table containing 100 rows. The previous query would probably be transformed to a plan similar to this one:: ┌────────────────────────────────────────────────────────────────────────────────────┐ │ QUERY PLAN │ ├────────────────────────────────────────────────────────────────────────────────────┤ │ Hash Join (cost=57.67..4021812.67 rows=615000 width=68) │ │ Hash Cond: (foreign_table.id = local_table.id) │ │ -> Foreign Scan on foreign_table (cost=20.00..4000000.00 rows=100000 width=40) │ │ -> Hash (cost=22.30..22.30 rows=1230 width=36) │ │ -> Seq Scan on local_table (cost=0.00..22.30 rows=1230 width=36) │ └────────────────────────────────────────────────────────────────────────────────────┘ But with a parameterized path declared on the id key, with the knowledge that this key is unique on the foreign side, the following plan might get chosen:: ┌───────────────────────────────────────────────────────────────────────┐ │ QUERY PLAN │ ├───────────────────────────────────────────────────────────────────────┤ │ Nested Loop (cost=20.00..49234.60 rows=615000 width=68) │ │ -> Seq Scan on local_table (cost=0.00..22.30 rows=1230 width=36) │ │ -> Foreign Scan on remote_table (cost=20.00..40.00 rows=1 width=40)│ │ Filter: (id = local_table.id) │ └───────────────────────────────────────────────────────────────────────┘ Returns: A list of tuples of the form: (key_columns, expected_rows), where key_columns is a tuple containing the columns on which the path can be used, and expected_rows is the number of rows this path might return for a simple lookup. For example, the return value corresponding to the previous scenario would be:: [(('id',), 1)] """ return []
[docs] def explain(self, quals, columns, sortkeys=None, verbose=False): """Hook called on explain. The arguments are the same as the :meth:`execute`, with the addition of a "verbose" keyword arg for when the EXPLAIN is called with the VERBOSE option. Returns: An iterable of strings to display in the EXPLAIN output. """ return []
[docs] def execute(self, quals, columns, sortkeys=None): """Execute a query in the foreign data wrapper. This method is called at the first iteration. This is where the actual remote query execution takes place. Multicorn makes no assumption about the particular behavior of a ForeignDataWrapper, and will NOT remove any qualifiers from the PostgreSQL quals list. That means the quals will be rechecked anyway. Typically, an implementation would: - initialize (or reuse) some sort of connection to the remote system - transform the quals and columns arguments to a representation suitable for the remote system - fetch the data according to this query - return it to the C-extension. Although any iterable can be returned, it is strongly advised to implement this method as a generator to prevent loading the whole dataset in memory. Args: quals (list): A list of :class:`Qual` instances, containing the basic where clauses in the query. columns (list): A list of columns that postgresql is going to need. You should return AT LEAST those columns when returning a dict. If returning a sequence, every column from the table should be in the sequence. sortkeys (list): A list of :class:`SortKey` that the FDW said it can enforce. Returns: An iterable of python objects which can be converted back to PostgreSQL. Currently, such objects are: - sequences containing exactly as much columns as the underlying tables - dictionaries mapping column names to their values. If the sortkeys wasn't empty, the FDW has to return the data in the expected order. """ pass
@property def rowid_column(self): """ Returns: A column name which will act as a rowid column, for delete/update operations. One can think of it as a primary key. This can be either an existing column name, or a made-up one. This column name should be subsequently present in every returned resultset. """ raise NotImplementedError("This FDW does not support the writable API")
[docs] def insert(self, values): """ Insert a tuple defined by ''values'' in the foreign table. Args: values (dict): a dictionary mapping column names to column values Returns: A dictionary containing the new values. These values can differ from the ``values`` argument if any one of them was changed or inserted by the foreign side. For example, if a key is auto generated. """ raise NotImplementedError("This FDW does not support the writable API")
[docs] def update(self, oldvalues, newvalues): """ Update a tuple containing ''oldvalues'' to the ''newvalues''. Args: oldvalues (dict): a dictionary mapping from column names to previously known values for the tuple. newvalues (dict): a dictionary mapping from column names to new values for the tuple. Returns: A dictionary containing the new values. See :method:``insert`` for information about this return value. """ raise NotImplementedError("This FDW does not support the writable API")
[docs] def delete(self, oldvalues): """ Delete a tuple identified by ``oldvalues`` Args: oldvalues (dict): a dictionary mapping from column names to previously known values for the tuple. Returns: None """ raise NotImplementedError("This FDW does not support the writable API")
[docs] def pre_commit(self): """ Hook called just before a commit is issued, on PostgreSQL >=9.3. This is where the transaction should tentatively commited. """ pass
[docs] def rollback(self): """ Hook called when the transaction is rollbacked. """ pass
[docs] def commit(self): """ Hook called at commit time. On PostgreSQL >= 9.3, the pre_commit hook should be preferred. """ pass
[docs] def end_scan(self): """ Hook called at the end of a foreign scan. """ pass
[docs] def end_modify(self): """ Hook called at the end of a foreign modify (DML operations) """ pass
[docs] def begin(self, serializable): """ Hook called at the beginning of a transaction. """ pass
[docs] def sub_begin(self, level): """ Hook called at the beginning of a subtransaction. """ pass
[docs] def sub_rollback(self, level): """ Hook called when a subtransaction is rollbacked. """ pass
[docs] def sub_commit(self, level): """ Hook called when a subtransaction is committed. """ pass
@classmethod
[docs] def import_schema(self, schema, srv_options, options, restriction_type, restricts): """ Hook called on an IMPORT FOREIGN SCHEMA command. Args: schema (str): the foreign schema to import srv_options (dict): options defined at the server level options (dict): options defined at the IMPORT FOREIGN SCHEMA statement level restriction_type (str): One of 'limit', 'except' or None restricts (list): a list of tables as passed to the LIMIT TO or EXCEPT clause Returns: list: a list of :class:`multicorn.TableDefinition` """ raise NotImplementedError( "This FDW does not support IMPORT FOREIGN SCHEMA")
class TransactionAwareForeignDataWrapper(ForeignDataWrapper): def __init__(self, fdw_options, fdw_columns): super(TransactionAwareForeignDataWrapper, self).__init__( fdw_options, fdw_columns) self._init_transaction_state() def _init_transaction_state(self): self.current_transaction_state = [] def insert(self, values): self.current_transaction_state.append(('insert', values)) def update(self, oldvalues, newvalues): self.current_transaction_state.append( ('update', (oldvalues, newvalues))) def delete(self, oldvalues): self.current_transaction_state.append(('delete', oldvalues)) def rollback(self): self._init_transaction_state() """Code from python2.7 importlib.import_module.""" """Backport of importlib.import_module from 3.x.""" # While not critical (and in no way guaranteed!), it would be nice to keep this # code compatible with Python 2.3. def _resolve_name(name, package, level): """Return the absolute name of the module to be imported.""" if not hasattr(package, 'rindex'): raise ValueError("'package' not set to a string") dot = len(package) for x in range(level, 1, -1): try: dot = package.rindex('.', 0, dot) except ValueError: raise ValueError("attempted relative import beyond top-level " "package") return "%s.%s" % (package[:dot], name) def import_module(name, package=None): """Import a module. The 'package' argument is required when performing a relative import. It specifies the package to use as the anchor point from which to resolve the relative import to an absolute import. """ if name.startswith('.'): if not package: raise TypeError("relative imports require the 'package' argument") level = 0 for character in name: if character != '.': break level += 1 name = _resolve_name(name[level:], package, level) __import__(name) return sys.modules[name] def get_class(module_path): """ Internal function called from c code to import a foreign data wrapper. Args: module_path (str): A fully qualified name for a class. to import Ex: multicorn.csvfdw.CsvFdw. Returns: the class designated by module_path. """ module_path.split(".") wrapper_class = module_path.split(".")[-1] module_name = ".".join(module_path.split(".")[:-1]) module = import_module(module_name) return getattr(module, wrapper_class) def quote_identifier(value): return '"' + value.replace('"', '""') + '"' def quote_option(value): return "'" + value.replace("'", "''") + "'" def dict_to_optionstring(options): return ",\n".join( "%s %s" % (key, quote_option(value)) for key, value in sorted(options.items()))
[docs]class ColumnDefinition(object): """ Definition of Foreign Table Column. Attributes: column_name (str): the name of the column type_oid (int): the internal OID of the PostgreSQL type typmod (int): the type modifier (ex: VARCHAR(12)) type_name (str): the formatted type name, with the modifier (ex: VARCHAR(12)) base_type_name (str): the base type name, withou modifier (ex: VARCHAR) options (dict): a mapping of option names to option values, as strings. """ def __init__(self, column_name, type_oid=0, typmod=0, type_name="", base_type_name="", options=None): self.column_name = column_name self.type_oid = type_oid self.typmod = typmod self.type_name = type_name self.base_type_name = base_type_name self.options = options or {} def __repr__(self): return "%s(%s, %i, %s%s)" % ( self.__class__.__name__, self.column_name, self.type_oid, self.type_name, " options %s" % self.options if self.options else "") def to_statement(self): stmt = "%s %s" % ( quote_identifier(self.column_name), self.type_name) if self.options: stmt += " OPTIONS ( %s )" % dict_to_optionstring(self.options) return stmt
[docs]class TableDefinition(object): """ Definition of a Foreign Table. Attributes: table_name (str): the name of the table columns (str): a list of :class:`ColumnDefinition` objects options (dict): a dictionary containing the table-level options. """ def __init__(self, table_name, schema=None, columns=None, options=None): self.table_name = table_name self.columns = columns or [] self.options = options or {}
[docs] def to_statement(self, schema_name, server_name): """ Generates the CREATE FOREIGN TABLE statement associated with this definition. """ parts = [] parts.append("CREATE FOREIGN TABLE %s.%s (" % (quote_identifier(schema_name), quote_identifier(self.table_name))) parts.append(",\n".join(col.to_statement() for col in self.columns)) parts.append(" \n ) SERVER %s " % quote_identifier(server_name)) if self.options: parts.append(" OPTIONS (") parts.append(dict_to_optionstring(self.options)) parts.append(")") return '\n'.join(parts)