Source code for evadb.interfaces.relational.relation

# coding=utf-8
# Copyright 2018-2023 EvaDB
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Union

import pandas

from evadb.database import EvaDBDatabase
from evadb.interfaces.relational.utils import (
    create_limit_expression,
    create_star_expression,
    handle_select_clause,
    sql_predicate_to_expresssion_tree,
    sql_string_to_expresssion_list,
    string_to_lateral_join,
    try_binding,
)
from evadb.models.storage.batch import Batch
from evadb.parser.alias import Alias
from evadb.parser.select_statement import SelectStatement
from evadb.parser.statement import AbstractStatement
from evadb.parser.table_ref import JoinNode, TableRef
from evadb.parser.types import JoinType
from evadb.parser.utils import parse_sql_orderby_expr
from evadb.server.command_handler import execute_statement


class EvaDBQuery:
    def __init__(
        self,
        evadb: EvaDBDatabase,
        query_node: Union[AbstractStatement, TableRef],
        alias: Alias = None,
    ):
        self._evadb = evadb
        self._query_node = query_node
        self._alias = alias

    def alias(self, alias: str) -> "EvaDBQuery":
        """Returns a new Relation with an alias set.

        Args:
            alias (str): an alias name to be set for the Relation.

        Returns:
            EvaDBQuery: Aliased Relation.

        Examples:
            >>> relation = cursor.table("sample_table")
            >>> relation.alias('table')
        """
        self._alias = Alias(alias)

    def cross_apply(self, expr: str, alias: str) -> "EvaDBQuery":
        """Execute a expr on all the rows of the relation

        Args:
            expr (str): sql expression
            alias (str): alias of the output of the expr

        Returns:
            `EvaDBQuery`: relation

        Examples:

            Runs Yolo on all the frames of the input table

            >>> relation = cursor.table("videos")
            >>> relation.cross_apply("Yolo(data)", "objs(labels, bboxes, scores)")

            Runs Yolo on all the frames of the input table and unnest each object as separate row.

            >>> relation.cross_apply("unnest(Yolo(data))", "obj(label, bbox, score)")
        """
        assert self._query_node.from_table is not None

        table_ref = string_to_lateral_join(expr, alias=alias)
        join_table = TableRef(
            JoinNode(
                TableRef(self._query_node, alias=self._alias),
                table_ref,
                join_type=JoinType.LATERAL_JOIN,
            )
        )
        self._query_node = SelectStatement(
            target_list=create_star_expression(), from_table=join_table
        )
        # reset the alias as after join there isn't a single alias
        self._alias = Alias("Relation")
        try_binding(self._evadb.catalog, self._query_node)
        return self

[docs] def df(self, drop_alias: bool = True) -> pandas.DataFrame: """ Execute and fetch all rows as a pandas DataFrame Args: drop_alias (bool): whether to drop the table name in the output dataframe. Default: True. Returns: pandas.DataFrame: Example: Runs a SQL query and get a panda Dataframe. >>> cursor.query("SELECT * FROM MyTable;").df() col1 col2 0 1 2 1 3 4 2 5 6 """ batch = self.execute(drop_alias=drop_alias) assert batch.frames is not None, "relation execute failed" return batch.frames
def execute(self, drop_alias: bool = True) -> Batch: """Transform the relation into a result set Args: drop_alias (bool): whether to drop the table name in the output batch. Default: True. Returns: Batch: result as evadb Batch Example: Runs a SQL query and get a Batch >>> batch = cursor.query("SELECT * FROM MyTable;").execute() """ result = execute_statement(self._evadb, self._query_node.copy()) # TODO: this is a dirty implementation. Ideally this should be done in the final projection. if drop_alias: result.drop_column_alias() assert result is not None return result def filter(self, expr: str) -> "EvaDBQuery": """ Filters rows using the given condition. Multiple filters can be chained using `AND` Parameters: expr (str): The filter expression. Returns: EvaDBQuery : Filtered EvaDBQuery. Examples: >>> relation = cursor.table("sample_table") >>> relation.filter("col1 > 10") Filter by sql string >>> relation.filter("col1 > 10 AND col1 < 20") """ parsed_expr = sql_predicate_to_expresssion_tree(expr) self._query_node = handle_select_clause( self._query_node, self._alias, "where_clause", parsed_expr ) try_binding(self._evadb.catalog, self._query_node) return self def limit(self, num: int) -> "EvaDBQuery": """Limits the result count to the number specified. Args: num (int): Number of records to return. Will return num records or all records if the Relation contains fewer records. Returns: EvaDBQuery: Relation with subset of records Examples: >>> relation = cursor.table("sample_table") >>> relation.limit(10) """ limit_expr = create_limit_expression(num) self._query_node = handle_select_clause( self._query_node, self._alias, "limit_count", limit_expr ) try_binding(self._evadb.catalog, self._query_node) return self def order(self, order_expr: str) -> "EvaDBQuery": """Reorder the relation based on the order_expr Args: order_expr (str): sql expression to order the relation Returns: EvaDBQuery: A EvaDBQuery ordered based on the order_expr. Examples: >>> relation = cursor.table("PDFs") >>> relation.order("Similarity(SentenceTransformerFeatureExtractor('When was the NATO created?'), SentenceTransformerFeatureExtractor(data) ) DESC") """ parsed_expr = parse_sql_orderby_expr(order_expr) self._query_node = handle_select_clause( self._query_node, self._alias, "orderby_list", parsed_expr ) try_binding(self._evadb.catalog, self._query_node) return self def select(self, expr: str) -> "EvaDBQuery": """ Projects a set of expressions and returns a new EvaDBQuery. Parameters: exprs (Union[str, List[str]]): The expression(s) to be selected. If '*' is provided, it expands to all columns in the current EvaDBQuery. Returns: EvaDBQuery: A EvaDBQuery with subset (or all) of columns. Examples: >>> relation = cursor.table("sample_table") Select all columns in the EvaDBQuery. >>> relation.select("*") Select all subset of columns in the EvaDBQuery. >>> relation.select("col1") >>> relation.select("col1, col2") """ parsed_exprs = sql_string_to_expresssion_list(expr) self._query_node = handle_select_clause( self._query_node, self._alias, "target_list", parsed_exprs ) try_binding(self._evadb.catalog, self._query_node) return self def show(self) -> pandas.DataFrame: """Execute and fetch all rows as a pandas DataFrame Returns: pandas.DataFrame: """ batch = self.execute() assert batch is not None, "relation execute failed" return batch.frames def sql_query(self) -> str: """Get the SQL query that is equivalent to the relation Returns: str: the sql query Examples: >>> relation = cursor.table("sample_table").project('i') >>> relation.sql_query() """ return str(self._query_node)