Files
stage_L3/src/querying.py
Oscar Plaisant 626b3a7327 update
2024-07-02 03:05:59 +02:00

146 lines
4.8 KiB
Python

import sqlite3
import numpy as np
from random import choice
from tprint import tprint
from joblib import Memory # for persistent memoïzation
from query_generator import *
import orderankings as odrk
from config import CONFIG, DATABASE_CFG, VENV_HOME, DATABASE_FILE
# persistent memoïzation
if CONFIG["persistent_query_memoization"]:
memory = Memory(f"{VENV_HOME}/src/cache")
else:
# if memoïzation is disabled, then just use the false memoization decorator
class FalseMemory:
def cache(self, func):
"""This is a decorator that does nothing to its function."""
return func
memory = FalseMemory()
VERBOSE = CONFIG["verbose"]["querying"]
######################### Connexion to sqlite database #########################
# initialize database connection
if VERBOSE:
print(f"connecting to {DATABASE_FILE}")
CON = sqlite3.connect(DATABASE_FILE)
CUR = CON.cursor()
@memory.cache # persistent memoïzation
def query(q: str) -> list[tuple]:
"""Execute a given query and reture the result in a python list[tuple]."""
if VERBOSE:
print(f'sending query : {q}')
res = CUR.execute(str(q))
if VERBOSE:
print("got response", res)
return res.fetchall()
##################### Choice of the right query generator ######################
QUERY_PARAM_GB_CONSTRUCTOR = DATABASE_CFG["query_generator"]
######################## orderings extraction functions ########################
def random_query() -> list[tuple]:
random_criteria = choice(DATABASE_CFG["criterion"])
qg_constructor = DATABASE_CFG["query_generator"]
sql_query = qg_constructor(
parameter=DATABASE_CFG["parameter"],
authorized_parameter_values=DATABASE_CFG["authorized_parameter_values"],
criteria=random_criteria,
summed_attribute=DATABASE_CFG["summed_attribute"])
# print the query
if VERBOSE: print("query :", str(sql_query), sep="\n")
result = query(str(sql_query)) # get result from database
if VERBOSE: # print the result
print("query result :")
tprint(result)
return result
def filter_correct_length_orderings(orderings: list[tuple], length: int) -> list[tuple]:
"""Keep only orders that are of the specified length that means removing
too short ones, and slicing too long ones."""
correct_length_orderings = np.array(
[ordrng[:length] for ordrng in orderings if len(ordrng) >= length]
)
if VERBOSE:
print(f"found {len(correct_length_orderings)} orderings :")
# print(correct_length_orderings)
tprint(correct_length_orderings)
return correct_length_orderings
def rankings_from_table(query_result: list[tuple]):
orderings_dict = odrk.get_all_orderings_from_table(query_result)
orderings = orderings_dict.values()
orderings = filter_correct_length_orderings(
orderings,
DATABASE_CFG["orders_length"])
if VERBOSE:
print(orderings)
rankings = odrk.rankings_from_orderings(orderings)
return rankings
@memory.cache # persistent memoïzation
def find_orderings(parameter: str, summed_attribute: str, criterion: tuple[str, ...],
length: int,
authorized_parameter_values: tuple[str, ...] | None = None
) -> list[list[str]]:
"""Gather the list of every ordering returned by queries using given values
of parameter, summed_attribute, and all given values of criterion.
Args:
parameter (str): The value of the parameter attribute in the query generator.
summed_attribute (str): The attribute that you wan to sum in order to sort the values.
criterion (tuple[str]): The list of attributes that you want to group the query by.
length (int): The length of orderings, hence the number of different
values of parameter that you consider in the query.
Returns:
list[list]: The list of all found orderings.
"""
# instanciate the query generator
qg = DATABASE_CFG["query_generator"](
parameter=parameter,
authorized_parameter_values=authorized_parameter_values,
summed_attribute=summed_attribute,
criteria=None)
# ensemble de tous les ordres trouvés
# la clef est la valeur dans la colonne criteria
orderings = list()
for criteria in criterion:
qg.criteria = criteria
# if VERBOSE: print(repr(QG))
table = query(str(qg))
if VERBOSE:
print(f"request result with criteria '{criteria}' :")
tprint(table, limit=10)
table_orders = odrk.get_all_orderings_from_table(table)
# pprint(table_orders, compact=True, width=1000)
# update the global list of all found orders
orderings.extend(table_orders.values())
correct_length_orderings = filter_correct_length_orderings(orderings, length)
return correct_length_orderings