stage_L3/src/querying.py

import sqlite3
import numpy as np
from random import choice
from tprint import tprint

from joblib import Memory  # for persistent memoïzation

from query_generator import *
import orderankings as odrk

from config import CONFIG, DATABASE_CFG, VENV_HOME, DATABASE_FILE

# persistent memoïzation
if CONFIG["persistent_query_memoization"]:
    memory = Memory(f"{VENV_HOME}/src/cache")
else:
    # if memoïzation is disabled, then just use the false memoization decorator
    class FalseMemory:
        def cache(self, func):
            """This is a decorator that does nothing to its function."""
            return func
    memory = FalseMemory()

VERBOSE = CONFIG["verbose"]["querying"]

######################### Connexion to sqlite database #########################

# initialize database connection
if VERBOSE:
    print(f"connecting to {DATABASE_FILE}")

CON = sqlite3.connect(DATABASE_FILE)
CUR = CON.cursor()


@memory.cache  # persistent memoïzation
def query(q: str) -> list[tuple]:
    """Execute a given query and reture the result in a python list[tuple]."""
    if VERBOSE:
        print(f'sending query : {q}')
    res = CUR.execute(str(q))
    if VERBOSE:
        print("got response", res)
    return res.fetchall()


##################### Choice of the right query generator ######################


QUERY_PARAM_GB_CONSTRUCTOR = DATABASE_CFG["query_generator"]


######################## orderings extraction functions ########################

def random_query() -> list[tuple]:
    random_criteria = choice(DATABASE_CFG["criterion"])

    qg_constructor = DATABASE_CFG["query_generator"]
    sql_query = qg_constructor(
        parameter=DATABASE_CFG["parameter"],
        authorized_parameter_values=DATABASE_CFG["authorized_parameter_values"],
        criteria=random_criteria,
        summed_attribute=DATABASE_CFG["summed_attribute"])

    # print the query
    if VERBOSE: print("query :", str(sql_query), sep="\n")

    result = query(str(sql_query))  # get result from database

    if VERBOSE:  # print the result
        print("query result :")
        tprint(result)

    return result


def filter_correct_length_orderings(orderings: list[tuple], length: int) -> list[tuple]:
    """Keep only orders that are of the specified length that means removing
    too short ones, and slicing too long ones."""
    correct_length_orderings = np.array(
        [ordrng[:length] for ordrng in orderings if len(ordrng) >= length]
    )

    if VERBOSE:
        print(f"found {len(correct_length_orderings)} orderings :")
        # print(correct_length_orderings)
        tprint(correct_length_orderings)
    return correct_length_orderings


def rankings_from_table(query_result: list[tuple]):
    orderings_dict = odrk.get_all_orderings_from_table(query_result)
    orderings = orderings_dict.values()
    orderings = filter_correct_length_orderings(
        orderings,
        DATABASE_CFG["orders_length"])
    if VERBOSE:
        print(orderings)
    rankings = odrk.rankings_from_orderings(orderings)
    return rankings

@memory.cache  # persistent memoïzation
def find_orderings(parameter: str, summed_attribute: str, criterion: tuple[str, ...],
                   length: int,
                   authorized_parameter_values: tuple[str, ...] | None = None
                   ) -> list[list[str]]:
    """Gather the list of every ordering returned by queries using given values
    of parameter, summed_attribute, and all given values of criterion.
    Args:
        parameter (str): The value of the parameter attribute in the query generator.
        summed_attribute (str): The attribute that you wan to sum in order to sort the values.
        criterion (tuple[str]): The list of attributes that you want to group the query by.
        length (int): The length of orderings, hence the number of different
                      values of parameter that you consider in the query.
    Returns:
        list[list]: The list of all found orderings.
    """

    # instanciate the query generator
    qg = DATABASE_CFG["query_generator"](
        parameter=parameter,
        authorized_parameter_values=authorized_parameter_values,
        summed_attribute=summed_attribute,
        criteria=None)

    # ensemble de tous les ordres trouvés
    # la clef est la valeur dans la colonne criteria
    orderings = list()

    for criteria in criterion:
        qg.criteria = criteria
        # if VERBOSE: print(repr(QG))
        table = query(str(qg))
        if VERBOSE:
            print(f"request result with criteria '{criteria}' :")
            tprint(table, limit=10)
        table_orders = odrk.get_all_orderings_from_table(table)
        # pprint(table_orders, compact=True, width=1000)
        # update the global list of all found orders
        orderings.extend(table_orders.values())

    correct_length_orderings = filter_correct_length_orderings(orderings, length)

    return correct_length_orderings