diff --git a/Makefile b/Makefile index b93f9d5..1c31939 100644 --- a/Makefile +++ b/Makefile @@ -6,8 +6,7 @@ DATABASE_FILE=${DATABASE_FOLDER}/${DATABASE_NAME}.db all: execute-script execute-script: requirements.txt - source bin/activate; \ - python3 src/concentration_test.py; \ + source bin/activate && python3 src/concentration_test.py; requirements.txt: bin/pip3 install -r requirements.txt diff --git a/old_concentration_test.py b/old_concentration_test.py index b06a938..0a395e2 100644 --- a/old_concentration_test.py +++ b/old_concentration_test.py @@ -5,12 +5,15 @@ from tprint import tprint import orderankings as odrk from querying import find_orderings from kemeny_young import kendall_tau_dist, rank_aggregation +from losses import * from tqdm import tqdm from collections import Counter, defaultdict import joblib from functools import partial import random -import yaml + +# load configuration from config.yaml +from config import CONFIG as CFG # Random number generator for the whole program # RNG = np.random.default_rng(1234) @@ -18,61 +21,32 @@ import yaml ######################## YAML CONFIG (src/config.yaml) ######################### -with open('src/config.yaml') as config_file: - cfg = yaml.load(config_file, Loader=yaml.Loader) -DATABASE_NAME = cfg["database_name"] +DATABASE_NAME = CFG["database_name"] -VERBOSE = cfg["verbose"]["concentration_test"] +VERBOSE = CFG["verbose"]["concentration_test"] ################## DATA SETTINGS (parameters, hypothesis...) ################### # loaded from src/config.yaml -PARAMETER = tuple(cfg[DATABASE_NAME]["parameter"]) -SUMMED_ATTRIBUTE = tuple(cfg[DATABASE_NAME]["summmed_attribute"]) +PARAMETER = tuple(CFG[DATABASE_NAME]["parameter"]) +SUMMED_ATTRIBUTE = tuple(CFG[DATABASE_NAME]["summmed_attribute"]) # SUMMED_ATTRIBUTE = "lo_revenue" # SUMMED_ATTRIBUTE = "lo_extendedprice" -LENGTH = cfg[DATABASE_NAME]["orders_length"] +LENGTH = CFG[DATABASE_NAME]["orders_length"] -AUTHORIZED_PARAMETER_VALUES = tuple(cfg[DATABASE_NAME]["authorized_parameter_values"]) +AUTHORIZED_PARAMETER_VALUES = tuple(CFG[DATABASE_NAME]["authorized_parameter_values"]) -CRITERION = tuple(cfg[DATABASE_NAME]["criterion"]) +CRITERION = tuple(CFG[DATABASE_NAME]["criterion"]) -HYPOTHESIS_ORDERING = tuple(cfg[DATABASE_NAME]["hypothesis_ordering"]) +HYPOTHESIS_ORDERING = tuple(CFG[DATABASE_NAME]["hypothesis_ordering"]) assert len(HYPOTHESIS_ORDERING) == LENGTH -################################ LOSS FUNCTIONS ################################ - -def orderings_average_loss(orderings: list[list[str]], truth: list[str]) -> float:# {{{ - """This loss is the the average of kendall tau distances between the truth - and each ordering.""" - rankings = odrk.rankings_from_orderings(orderings) - true_ranking = odrk.rankings_from_orderings([truth])[0] - return rankings_average_loss(rankings, true_ranking)# }}} - - -def rankings_average_loss(rankings: list[list[int]], truth: list[int]) -> float:# {{{ - distance = sum(kendall_tau_dist(rkng, truth) for rkng in rankings) - length = len(rankings) - # apparently, this is what works for a good normalization - return distance / length - # return distance * 2 / (length * (length - 1))}}} - - -def kmny_dist_loss(orderings: list[list[str]], truth: list[str]) -> int:# {{{ - """Return the kendall tau distance between the truth and the kemeny-young - aggregation of orderings""" - _, agg_rank = rank_aggregation(odrk.rankings_from_orderings(orderings)) - aggregation = odrk.ordering_from_ranking(agg_rank, truth) - loss = kendall_tau_dist( - odrk.ranking_from_ordering(aggregation), - odrk.ranking_from_ordering(truth)) - return loss - # print(aggregation, HYPOTHESIS_ORDERING, kdl_agg_dist)}}} +################## APPLIED ON SAMPLES FOR CONCENTRATION TESTS ################## def get_loss_progression(): # {{{ grouped_orderings = find_orderings(parameter=PARAMETER, @@ -104,7 +78,6 @@ def get_loss_progression(): # {{{ return average_losses, kendal_aggregation_losses # }}} -################## APPLIED ON SAMPLES FOR CONCENTRATION TESTS ################## def plot_loss_progression(): # {{{ """Plot the progression of losses when using more and more of the values diff --git a/src/cache/joblib/querying/query/0e23f59dabd35a8f054d4a0e6f123a4c/metadata.json b/src/cache/joblib/querying/query/0e23f59dabd35a8f054d4a0e6f123a4c/metadata.json new file mode 100644 index 0000000..8c4d760 --- /dev/null +++ b/src/cache/joblib/querying/query/0e23f59dabd35a8f054d4a0e6f123a4c/metadata.json @@ -0,0 +1 @@ +{"duration": 16.941783666610718, "input_args": {"q": "\"\\n SELECT p_color, p_container, SUM(lo_quantity)\\n FROM lineorder\\n INNER JOIN part ON lo_partkey = p_partkey\\nWHERE p_color IN ('aliceblue', 'antiquewhite', 'aqua', 'aquamarine', 'azure', 'beige', 'bisque', 'black', 'blanchedalmond', 'blue', 'blueviolet', 'brown', 'burlywood', 'cadetblue', 'chartreuse', 'chocolate', 'coral', 'cornflowerblue', 'cornsilk', 'crimson', 'cyan', 'darkblue', 'darkcyan', 'darkgoldenrod', 'darkgray', 'darkgreen', 'darkgrey', 'darkkhaki', 'darkmagenta', 'darkolivegreen', 'darkorange', 'darkorchid', 'darkred', 'darksalmon', 'darkseagreen', 'darkslateblue', 'darkslategray', 'darkslategrey', 'darkturquoise', 'darkviolet', 'deeppink', 'deepskyblue', 'dimgray', 'dimgrey', 'dodgerblue', 'firebrick', 'floralwhite', 'forestgreen', 'fuchsia', 'gainsboro', 'ghostwhite', 'gold', 'goldenrod', 'gray', 'green', 'greenyellow', 'grey', 'honeydew', 'hotpink', 'indianred', 'indigo', 'ivory', 'khaki', 'lavender', 'lavenderblush', 'lawngreen', 'lemonchiffon', 'lightblue', 'lightcoral', 'lightcyan', 'lightgoldenrodyellow', 'lightgray', 'lightgreen', 'lightgrey', 'lightpink', 'lightsalmon', 'lightseagreen', 'lightskyblue', 'lightslategray', 'lightslategrey', 'lightsteelblue', 'lightyellow', 'lime', 'limegreen', 'linen', 'magenta', 'maroon', 'mediumaquamarine', 'mediumblue', 'mediumorchid', 'mediumpurple', 'mediumseagreen', 'mediumslateblue', 'mediumspringgreen', 'mediumturquoise', 'mediumvioletred', 'midnightblue', 'mintcream', 'mistyrose', 'moccasin', 'navajowhite', 'navy', 'oldlace', 'olive', 'olivedrab', 'orange', 'orangered', 'orchid', 'palegoldenrod', 'palegreen', 'paleturquoise', 'palevioletred', 'papayawhip', 'peachpuff', 'peru', 'pink', 'plum', 'powderblue', 'purple', 'rebeccapurple', 'red', 'rosybrown', 'royalblue', 'saddlebrown', 'salmon', 'sandybrown', 'seagreen', 'seashell', 'sienna', 'silver', 'skyblue', 'slateblue', 'slategray', 'slategrey', 'snow', 'springgreen', 'steelblue', 'tan', 'teal', 'thistle', 'tomato', 'turquoise', 'violet', 'wheat', 'white', 'whitesmoke', 'yellow', 'yellowgreen')\\n\\n GROUP BY p_color, p_container\\n ORDER BY SUM(lo_quantity) DESC;\\n \""}, "time": 1719876952.150381} \ No newline at end of file diff --git a/src/cache/joblib/querying/query/23e274e15c4de03bd630d87ff9c3fdfe/output.pkl b/src/cache/joblib/querying/query/0e23f59dabd35a8f054d4a0e6f123a4c/output.pkl similarity index 100% rename from src/cache/joblib/querying/query/23e274e15c4de03bd630d87ff9c3fdfe/output.pkl rename to src/cache/joblib/querying/query/0e23f59dabd35a8f054d4a0e6f123a4c/output.pkl diff --git a/src/cache/joblib/querying/query/161b65ddd38abeff053bd3b1f60fef9a/metadata.json b/src/cache/joblib/querying/query/161b65ddd38abeff053bd3b1f60fef9a/metadata.json deleted file mode 100644 index 8b89363..0000000 --- a/src/cache/joblib/querying/query/161b65ddd38abeff053bd3b1f60fef9a/metadata.json +++ /dev/null @@ -1 +0,0 @@ -{"duration": 0.014148950576782227, "input_args": {"q": "\"\\n SELECT departure_airport, airline, SUM(nb_flights)\\n FROM fact_table\\n INNER JOIN airport_dim ON airport_dim.iata_code = fact_table.departure_airport\\n NATURAL JOIN hour_dim\\n INNER JOIN time_dim ON time_dim.day = fact_table.date\\n WHERE departure_airport IN ('ATL', 'ORD', 'DFW', 'DEN', 'LAX', 'IAH', 'LAS', 'SFO', 'PHX', 'MCO', 'SEA', 'CLT', 'MSP', 'LGA', 'DTW', 'EWR', 'BOS', 'BWI', 'SLC', 'JFK')\\n GROUP BY departure_airport, airline\\n ORDER BY SUM(nb_flights) DESC;\\n \""}, "time": 1717674727.832313} \ No newline at end of file diff --git a/src/cache/joblib/querying/query/161b65ddd38abeff053bd3b1f60fef9a/output.pkl b/src/cache/joblib/querying/query/161b65ddd38abeff053bd3b1f60fef9a/output.pkl deleted file mode 100644 index 7fdca57..0000000 Binary files a/src/cache/joblib/querying/query/161b65ddd38abeff053bd3b1f60fef9a/output.pkl and /dev/null differ diff --git a/src/cache/joblib/querying/query/23e274e15c4de03bd630d87ff9c3fdfe/metadata.json b/src/cache/joblib/querying/query/23e274e15c4de03bd630d87ff9c3fdfe/metadata.json deleted file mode 100644 index 1449d1c..0000000 --- a/src/cache/joblib/querying/query/23e274e15c4de03bd630d87ff9c3fdfe/metadata.json +++ /dev/null @@ -1 +0,0 @@ -{"duration": 12.216989040374756, "input_args": {"q": "\"\\n SELECT p_color, p_container, SUM(lo_quantity)\\n FROM lineorder\\n INNER JOIN part ON lo_partkey = p_partkey\\n\\n WHERE p_color IN ('aliceblue', 'antiquewhite', 'aqua', 'aquamarine', 'azure', 'beige', 'bisque', 'black', 'blanchedalmond', 'blue', 'blueviolet', 'brown', 'burlywood', 'cadetblue', 'chartreuse', 'chocolate', 'coral', 'cornflowerblue', 'cornsilk', 'crimson', 'cyan', 'darkblue', 'darkcyan', 'darkgoldenrod', 'darkgray', 'darkgreen', 'darkgrey', 'darkkhaki', 'darkmagenta', 'darkolivegreen', 'darkorange', 'darkorchid', 'darkred', 'darksalmon', 'darkseagreen', 'darkslateblue', 'darkslategray', 'darkslategrey', 'darkturquoise', 'darkviolet', 'deeppink', 'deepskyblue', 'dimgray', 'dimgrey', 'dodgerblue', 'firebrick', 'floralwhite', 'forestgreen', 'fuchsia', 'gainsboro', 'ghostwhite', 'gold', 'goldenrod', 'gray', 'green', 'greenyellow', 'grey', 'honeydew', 'hotpink', 'indianred', 'indigo', 'ivory', 'khaki', 'lavender', 'lavenderblush', 'lawngreen', 'lemonchiffon', 'lightblue', 'lightcoral', 'lightcyan', 'lightgoldenrodyellow', 'lightgray', 'lightgreen', 'lightgrey', 'lightpink', 'lightsalmon', 'lightseagreen', 'lightskyblue', 'lightslategray', 'lightslategrey', 'lightsteelblue', 'lightyellow', 'lime', 'limegreen', 'linen', 'magenta', 'maroon', 'mediumaquamarine', 'mediumblue', 'mediumorchid', 'mediumpurple', 'mediumseagreen', 'mediumslateblue', 'mediumspringgreen', 'mediumturquoise', 'mediumvioletred', 'midnightblue', 'mintcream', 'mistyrose', 'moccasin', 'navajowhite', 'navy', 'oldlace', 'olive', 'olivedrab', 'orange', 'orangered', 'orchid', 'palegoldenrod', 'palegreen', 'paleturquoise', 'palevioletred', 'papayawhip', 'peachpuff', 'peru', 'pink', 'plum', 'powderblue', 'purple', 'rebeccapurple', 'red', 'rosybrown', 'royalblue', 'saddlebrown', 'salmon', 'sandybrown', 'seagreen', 'seashell', 'sienna', 'silver', 'skyblue', 'slateblue', 'slategray', 'slategrey', 'snow', 'springgreen', 'steelblue', 'tan', 'teal', 'thistle', 'tomato', 'turquoise', 'violet', 'wheat', 'white', 'whitesmoke', 'yellow', 'yellowgreen')\\n GROUP BY p_color, p_container\\n ORDER BY SUM(lo_quantity) DESC;\\n \""}, "time": 1717680541.325936} \ No newline at end of file diff --git a/src/cache/joblib/querying/query/284629abd212b529f9344d18e5179806/metadata.json b/src/cache/joblib/querying/query/284629abd212b529f9344d18e5179806/metadata.json new file mode 100644 index 0000000..d1be328 --- /dev/null +++ b/src/cache/joblib/querying/query/284629abd212b529f9344d18e5179806/metadata.json @@ -0,0 +1 @@ +{"duration": 16.9809091091156, "input_args": {"q": "\"\\n SELECT p_color, p_category, SUM(lo_quantity)\\n FROM lineorder\\n INNER JOIN part ON lo_partkey = p_partkey\\nWHERE p_color IN ('aliceblue', 'antiquewhite', 'aqua', 'aquamarine', 'azure', 'beige', 'bisque', 'black', 'blanchedalmond', 'blue', 'blueviolet', 'brown', 'burlywood', 'cadetblue', 'chartreuse', 'chocolate', 'coral', 'cornflowerblue', 'cornsilk', 'crimson', 'cyan', 'darkblue', 'darkcyan', 'darkgoldenrod', 'darkgray', 'darkgreen', 'darkgrey', 'darkkhaki', 'darkmagenta', 'darkolivegreen', 'darkorange', 'darkorchid', 'darkred', 'darksalmon', 'darkseagreen', 'darkslateblue', 'darkslategray', 'darkslategrey', 'darkturquoise', 'darkviolet', 'deeppink', 'deepskyblue', 'dimgray', 'dimgrey', 'dodgerblue', 'firebrick', 'floralwhite', 'forestgreen', 'fuchsia', 'gainsboro', 'ghostwhite', 'gold', 'goldenrod', 'gray', 'green', 'greenyellow', 'grey', 'honeydew', 'hotpink', 'indianred', 'indigo', 'ivory', 'khaki', 'lavender', 'lavenderblush', 'lawngreen', 'lemonchiffon', 'lightblue', 'lightcoral', 'lightcyan', 'lightgoldenrodyellow', 'lightgray', 'lightgreen', 'lightgrey', 'lightpink', 'lightsalmon', 'lightseagreen', 'lightskyblue', 'lightslategray', 'lightslategrey', 'lightsteelblue', 'lightyellow', 'lime', 'limegreen', 'linen', 'magenta', 'maroon', 'mediumaquamarine', 'mediumblue', 'mediumorchid', 'mediumpurple', 'mediumseagreen', 'mediumslateblue', 'mediumspringgreen', 'mediumturquoise', 'mediumvioletred', 'midnightblue', 'mintcream', 'mistyrose', 'moccasin', 'navajowhite', 'navy', 'oldlace', 'olive', 'olivedrab', 'orange', 'orangered', 'orchid', 'palegoldenrod', 'palegreen', 'paleturquoise', 'palevioletred', 'papayawhip', 'peachpuff', 'peru', 'pink', 'plum', 'powderblue', 'purple', 'rebeccapurple', 'red', 'rosybrown', 'royalblue', 'saddlebrown', 'salmon', 'sandybrown', 'seagreen', 'seashell', 'sienna', 'silver', 'skyblue', 'slateblue', 'slategray', 'slategrey', 'snow', 'springgreen', 'steelblue', 'tan', 'teal', 'thistle', 'tomato', 'turquoise', 'violet', 'wheat', 'white', 'whitesmoke', 'yellow', 'yellowgreen')\\n\\n GROUP BY p_color, p_category\\n ORDER BY SUM(lo_quantity) DESC;\\n \""}, "time": 1719564027.6609159} \ No newline at end of file diff --git a/src/cache/joblib/querying/query/f44a9533a2bc2e5cc92ba7d62f9d34b7/output.pkl b/src/cache/joblib/querying/query/284629abd212b529f9344d18e5179806/output.pkl similarity index 100% rename from src/cache/joblib/querying/query/f44a9533a2bc2e5cc92ba7d62f9d34b7/output.pkl rename to src/cache/joblib/querying/query/284629abd212b529f9344d18e5179806/output.pkl diff --git a/src/cache/joblib/querying/query/2bd5c6e61c3964fdf79ad628ee867aef/metadata.json b/src/cache/joblib/querying/query/2bd5c6e61c3964fdf79ad628ee867aef/metadata.json new file mode 100644 index 0000000..dfbcf38 --- /dev/null +++ b/src/cache/joblib/querying/query/2bd5c6e61c3964fdf79ad628ee867aef/metadata.json @@ -0,0 +1 @@ +{"duration": 12.007299900054932, "input_args": {"q": "\"\\n SELECT p_color, p_brand, SUM(lo_quantity)\\n FROM lineorder\\n INNER JOIN part ON lo_partkey = p_partkey\\nWHERE p_color IN ('azure', 'bisque', 'black', 'aquamarine')\\n\\n GROUP BY p_color, p_brand\\n ORDER BY SUM(lo_quantity) DESC;\\n \""}, "time": 1719876425.192638} \ No newline at end of file diff --git a/src/cache/joblib/querying/query/2bd5c6e61c3964fdf79ad628ee867aef/output.pkl b/src/cache/joblib/querying/query/2bd5c6e61c3964fdf79ad628ee867aef/output.pkl new file mode 100644 index 0000000..eaf28ea Binary files /dev/null and b/src/cache/joblib/querying/query/2bd5c6e61c3964fdf79ad628ee867aef/output.pkl differ diff --git a/src/cache/joblib/querying/query/38eb9029508ab26a653f100309b92ce2/metadata.json b/src/cache/joblib/querying/query/38eb9029508ab26a653f100309b92ce2/metadata.json deleted file mode 100644 index 09dfc88..0000000 --- a/src/cache/joblib/querying/query/38eb9029508ab26a653f100309b92ce2/metadata.json +++ /dev/null @@ -1 +0,0 @@ -{"duration": 15.925843238830566, "input_args": {"q": "\"\\n SELECT p_color, c_city, SUM(lo_quantity)\\n FROM lineorder\\n INNER JOIN customer ON lo_custkey = c_custkey\\nINNER JOIN part ON lo_partkey = p_partkey\\n\\n WHERE p_color IN ('aliceblue', 'antiquewhite', 'aqua', 'aquamarine', 'azure', 'beige', 'bisque', 'black', 'blanchedalmond', 'blue', 'blueviolet', 'brown', 'burlywood', 'cadetblue', 'chartreuse', 'chocolate', 'coral', 'cornflowerblue', 'cornsilk', 'crimson', 'cyan', 'darkblue', 'darkcyan', 'darkgoldenrod', 'darkgray', 'darkgreen', 'darkgrey', 'darkkhaki', 'darkmagenta', 'darkolivegreen', 'darkorange', 'darkorchid', 'darkred', 'darksalmon', 'darkseagreen', 'darkslateblue', 'darkslategray', 'darkslategrey', 'darkturquoise', 'darkviolet', 'deeppink', 'deepskyblue', 'dimgray', 'dimgrey', 'dodgerblue', 'firebrick', 'floralwhite', 'forestgreen', 'fuchsia', 'gainsboro', 'ghostwhite', 'gold', 'goldenrod', 'gray', 'green', 'greenyellow', 'grey', 'honeydew', 'hotpink', 'indianred', 'indigo', 'ivory', 'khaki', 'lavender', 'lavenderblush', 'lawngreen', 'lemonchiffon', 'lightblue', 'lightcoral', 'lightcyan', 'lightgoldenrodyellow', 'lightgray', 'lightgreen', 'lightgrey', 'lightpink', 'lightsalmon', 'lightseagreen', 'lightskyblue', 'lightslategray', 'lightslategrey', 'lightsteelblue', 'lightyellow', 'lime', 'limegreen', 'linen', 'magenta', 'maroon', 'mediumaquamarine', 'mediumblue', 'mediumorchid', 'mediumpurple', 'mediumseagreen', 'mediumslateblue', 'mediumspringgreen', 'mediumturquoise', 'mediumvioletred', 'midnightblue', 'mintcream', 'mistyrose', 'moccasin', 'navajowhite', 'navy', 'oldlace', 'olive', 'olivedrab', 'orange', 'orangered', 'orchid', 'palegoldenrod', 'palegreen', 'paleturquoise', 'palevioletred', 'papayawhip', 'peachpuff', 'peru', 'pink', 'plum', 'powderblue', 'purple', 'rebeccapurple', 'red', 'rosybrown', 'royalblue', 'saddlebrown', 'salmon', 'sandybrown', 'seagreen', 'seashell', 'sienna', 'silver', 'skyblue', 'slateblue', 'slategray', 'slategrey', 'snow', 'springgreen', 'steelblue', 'tan', 'teal', 'thistle', 'tomato', 'turquoise', 'violet', 'wheat', 'white', 'whitesmoke', 'yellow', 'yellowgreen')\\n GROUP BY p_color, c_city\\n ORDER BY SUM(lo_quantity) DESC;\\n \""}, "time": 1717599419.778661} \ No newline at end of file diff --git a/src/cache/joblib/querying/query/3d7671c3cba40b600e1061e676c7b26d/metadata.json b/src/cache/joblib/querying/query/3d7671c3cba40b600e1061e676c7b26d/metadata.json deleted file mode 100644 index bb265dc..0000000 --- a/src/cache/joblib/querying/query/3d7671c3cba40b600e1061e676c7b26d/metadata.json +++ /dev/null @@ -1 +0,0 @@ -{"duration": 12.698099851608276, "input_args": {"q": "\"\\n SELECT p_color, s_region, SUM(lo_quantity)\\n FROM lineorder\\n INNER JOIN part ON lo_partkey = p_partkey\\nINNER JOIN supplier ON lo_suppkey = s_suppkey\\n\\n WHERE p_color IN ('aliceblue', 'antiquewhite', 'aqua', 'aquamarine', 'azure', 'beige', 'bisque', 'black', 'blanchedalmond', 'blue', 'blueviolet', 'brown', 'burlywood', 'cadetblue', 'chartreuse', 'chocolate', 'coral', 'cornflowerblue', 'cornsilk', 'crimson', 'cyan', 'darkblue', 'darkcyan', 'darkgoldenrod', 'darkgray', 'darkgreen', 'darkgrey', 'darkkhaki', 'darkmagenta', 'darkolivegreen', 'darkorange', 'darkorchid', 'darkred', 'darksalmon', 'darkseagreen', 'darkslateblue', 'darkslategray', 'darkslategrey', 'darkturquoise', 'darkviolet', 'deeppink', 'deepskyblue', 'dimgray', 'dimgrey', 'dodgerblue', 'firebrick', 'floralwhite', 'forestgreen', 'fuchsia', 'gainsboro', 'ghostwhite', 'gold', 'goldenrod', 'gray', 'green', 'greenyellow', 'grey', 'honeydew', 'hotpink', 'indianred', 'indigo', 'ivory', 'khaki', 'lavender', 'lavenderblush', 'lawngreen', 'lemonchiffon', 'lightblue', 'lightcoral', 'lightcyan', 'lightgoldenrodyellow', 'lightgray', 'lightgreen', 'lightgrey', 'lightpink', 'lightsalmon', 'lightseagreen', 'lightskyblue', 'lightslategray', 'lightslategrey', 'lightsteelblue', 'lightyellow', 'lime', 'limegreen', 'linen', 'magenta', 'maroon', 'mediumaquamarine', 'mediumblue', 'mediumorchid', 'mediumpurple', 'mediumseagreen', 'mediumslateblue', 'mediumspringgreen', 'mediumturquoise', 'mediumvioletred', 'midnightblue', 'mintcream', 'mistyrose', 'moccasin', 'navajowhite', 'navy', 'oldlace', 'olive', 'olivedrab', 'orange', 'orangered', 'orchid', 'palegoldenrod', 'palegreen', 'paleturquoise', 'palevioletred', 'papayawhip', 'peachpuff', 'peru', 'pink', 'plum', 'powderblue', 'purple', 'rebeccapurple', 'red', 'rosybrown', 'royalblue', 'saddlebrown', 'salmon', 'sandybrown', 'seagreen', 'seashell', 'sienna', 'silver', 'skyblue', 'slateblue', 'slategray', 'slategrey', 'snow', 'springgreen', 'steelblue', 'tan', 'teal', 'thistle', 'tomato', 'turquoise', 'violet', 'wheat', 'white', 'whitesmoke', 'yellow', 'yellowgreen')\\n GROUP BY p_color, s_region\\n ORDER BY SUM(lo_quantity) DESC;\\n \""}, "time": 1717599488.394772} \ No newline at end of file diff --git a/src/cache/joblib/querying/query/468a06c3fb4c088e4ece3745fa6bb563/metadata.json b/src/cache/joblib/querying/query/468a06c3fb4c088e4ece3745fa6bb563/metadata.json new file mode 100644 index 0000000..65a11b1 --- /dev/null +++ b/src/cache/joblib/querying/query/468a06c3fb4c088e4ece3745fa6bb563/metadata.json @@ -0,0 +1 @@ +{"duration": 12.975467920303345, "input_args": {"q": "\"\\n SELECT p_color, s_city, SUM(lo_quantity)\\n FROM lineorder\\n INNER JOIN part ON lo_partkey = p_partkey\\nINNER JOIN supplier ON lo_suppkey = s_suppkey\\nWHERE p_color IN ('azure', 'bisque', 'black', 'aquamarine')\\n\\n GROUP BY p_color, s_city\\n ORDER BY SUM(lo_quantity) DESC;\\n \""}, "time": 1719876439.027987} \ No newline at end of file diff --git a/src/cache/joblib/querying/query/468a06c3fb4c088e4ece3745fa6bb563/output.pkl b/src/cache/joblib/querying/query/468a06c3fb4c088e4ece3745fa6bb563/output.pkl new file mode 100644 index 0000000..ebdede3 Binary files /dev/null and b/src/cache/joblib/querying/query/468a06c3fb4c088e4ece3745fa6bb563/output.pkl differ diff --git a/src/cache/joblib/querying/query/4d2f399a43b21e50df0ce5cc4f0f7ca4/metadata.json b/src/cache/joblib/querying/query/4d2f399a43b21e50df0ce5cc4f0f7ca4/metadata.json deleted file mode 100644 index ac2c11c..0000000 --- a/src/cache/joblib/querying/query/4d2f399a43b21e50df0ce5cc4f0f7ca4/metadata.json +++ /dev/null @@ -1 +0,0 @@ -{"duration": 13.360551118850708, "input_args": {"q": "\"\\n SELECT p_color, s_city, SUM(lo_quantity)\\n FROM lineorder\\n INNER JOIN part ON lo_partkey = p_partkey\\nINNER JOIN supplier ON lo_suppkey = s_suppkey\\n\\n WHERE p_color IN ('aliceblue', 'antiquewhite', 'aqua', 'aquamarine', 'azure', 'beige', 'bisque', 'black', 'blanchedalmond', 'blue', 'blueviolet', 'brown', 'burlywood', 'cadetblue', 'chartreuse', 'chocolate', 'coral', 'cornflowerblue', 'cornsilk', 'crimson', 'cyan', 'darkblue', 'darkcyan', 'darkgoldenrod', 'darkgray', 'darkgreen', 'darkgrey', 'darkkhaki', 'darkmagenta', 'darkolivegreen', 'darkorange', 'darkorchid', 'darkred', 'darksalmon', 'darkseagreen', 'darkslateblue', 'darkslategray', 'darkslategrey', 'darkturquoise', 'darkviolet', 'deeppink', 'deepskyblue', 'dimgray', 'dimgrey', 'dodgerblue', 'firebrick', 'floralwhite', 'forestgreen', 'fuchsia', 'gainsboro', 'ghostwhite', 'gold', 'goldenrod', 'gray', 'green', 'greenyellow', 'grey', 'honeydew', 'hotpink', 'indianred', 'indigo', 'ivory', 'khaki', 'lavender', 'lavenderblush', 'lawngreen', 'lemonchiffon', 'lightblue', 'lightcoral', 'lightcyan', 'lightgoldenrodyellow', 'lightgray', 'lightgreen', 'lightgrey', 'lightpink', 'lightsalmon', 'lightseagreen', 'lightskyblue', 'lightslategray', 'lightslategrey', 'lightsteelblue', 'lightyellow', 'lime', 'limegreen', 'linen', 'magenta', 'maroon', 'mediumaquamarine', 'mediumblue', 'mediumorchid', 'mediumpurple', 'mediumseagreen', 'mediumslateblue', 'mediumspringgreen', 'mediumturquoise', 'mediumvioletred', 'midnightblue', 'mintcream', 'mistyrose', 'moccasin', 'navajowhite', 'navy', 'oldlace', 'olive', 'olivedrab', 'orange', 'orangered', 'orchid', 'palegoldenrod', 'palegreen', 'paleturquoise', 'palevioletred', 'papayawhip', 'peachpuff', 'peru', 'pink', 'plum', 'powderblue', 'purple', 'rebeccapurple', 'red', 'rosybrown', 'royalblue', 'saddlebrown', 'salmon', 'sandybrown', 'seagreen', 'seashell', 'sienna', 'silver', 'skyblue', 'slateblue', 'slategray', 'slategrey', 'snow', 'springgreen', 'steelblue', 'tan', 'teal', 'thistle', 'tomato', 'turquoise', 'violet', 'wheat', 'white', 'whitesmoke', 'yellow', 'yellowgreen')\\n GROUP BY p_color, s_city\\n ORDER BY SUM(lo_quantity) DESC;\\n \""}, "time": 1717599475.656039} \ No newline at end of file diff --git a/src/cache/joblib/querying/query/53202312495480449be1c57770e04769/metadata.json b/src/cache/joblib/querying/query/53202312495480449be1c57770e04769/metadata.json new file mode 100644 index 0000000..b4424d9 --- /dev/null +++ b/src/cache/joblib/querying/query/53202312495480449be1c57770e04769/metadata.json @@ -0,0 +1 @@ +{"duration": 12.97324800491333, "input_args": {"q": "\"\\n SELECT p_color, p_brand, SUM(lo_quantity)\\n FROM lineorder\\n INNER JOIN part ON lo_partkey = p_partkey\\nWHERE p_color IN ('aliceblue', 'antiquewhite', 'aqua', 'aquamarine', 'azure', 'beige', 'bisque', 'black', 'blanchedalmond', 'blue', 'blueviolet', 'brown', 'burlywood', 'cadetblue', 'chartreuse', 'chocolate', 'coral', 'cornflowerblue', 'cornsilk', 'crimson', 'cyan', 'darkblue', 'darkcyan', 'darkgoldenrod', 'darkgray', 'darkgreen', 'darkgrey', 'darkkhaki', 'darkmagenta', 'darkolivegreen', 'darkorange', 'darkorchid', 'darkred', 'darksalmon', 'darkseagreen', 'darkslateblue', 'darkslategray', 'darkslategrey', 'darkturquoise', 'darkviolet', 'deeppink', 'deepskyblue', 'dimgray', 'dimgrey', 'dodgerblue', 'firebrick', 'floralwhite', 'forestgreen', 'fuchsia', 'gainsboro', 'ghostwhite', 'gold', 'goldenrod', 'gray', 'green', 'greenyellow', 'grey', 'honeydew', 'hotpink', 'indianred', 'indigo', 'ivory', 'khaki', 'lavender', 'lavenderblush', 'lawngreen', 'lemonchiffon', 'lightblue', 'lightcoral', 'lightcyan', 'lightgoldenrodyellow', 'lightgray', 'lightgreen', 'lightgrey', 'lightpink', 'lightsalmon', 'lightseagreen', 'lightskyblue', 'lightslategray', 'lightslategrey', 'lightsteelblue', 'lightyellow', 'lime', 'limegreen', 'linen', 'magenta', 'maroon', 'mediumaquamarine', 'mediumblue', 'mediumorchid', 'mediumpurple', 'mediumseagreen', 'mediumslateblue', 'mediumspringgreen', 'mediumturquoise', 'mediumvioletred', 'midnightblue', 'mintcream', 'mistyrose', 'moccasin', 'navajowhite', 'navy', 'oldlace', 'olive', 'olivedrab', 'orange', 'orangered', 'orchid', 'palegoldenrod', 'palegreen', 'paleturquoise', 'palevioletred', 'papayawhip', 'peachpuff', 'peru', 'pink', 'plum', 'powderblue', 'purple', 'rebeccapurple', 'red', 'rosybrown', 'royalblue', 'saddlebrown', 'salmon', 'sandybrown', 'seagreen', 'seashell', 'sienna', 'silver', 'skyblue', 'slateblue', 'slategray', 'slategrey', 'snow', 'springgreen', 'steelblue', 'tan', 'teal', 'thistle', 'tomato', 'turquoise', 'violet', 'wheat', 'white', 'whitesmoke', 'yellow', 'yellowgreen')\\n\\n GROUP BY p_color, p_brand\\n ORDER BY SUM(lo_quantity) DESC;\\n \""}, "time": 1719579491.6208699} \ No newline at end of file diff --git a/src/cache/joblib/querying/query/986e56573188d2eca1b0a0f1b4cb1876/output.pkl b/src/cache/joblib/querying/query/53202312495480449be1c57770e04769/output.pkl similarity index 100% rename from src/cache/joblib/querying/query/986e56573188d2eca1b0a0f1b4cb1876/output.pkl rename to src/cache/joblib/querying/query/53202312495480449be1c57770e04769/output.pkl diff --git a/src/cache/joblib/querying/query/57ba0946d28413e367789b8a9a7efa51/metadata.json b/src/cache/joblib/querying/query/57ba0946d28413e367789b8a9a7efa51/metadata.json deleted file mode 100644 index a546ab0..0000000 --- a/src/cache/joblib/querying/query/57ba0946d28413e367789b8a9a7efa51/metadata.json +++ /dev/null @@ -1 +0,0 @@ -{"duration": 0.02377486228942871, "input_args": {"q": "\"\\n SELECT departure_airport, day, SUM(nb_flights)\\n FROM fact_table\\n INNER JOIN airport_dim ON airport_dim.iata_code = fact_table.departure_airport\\n NATURAL JOIN hour_dim\\n INNER JOIN time_dim ON time_dim.day = fact_table.date\\n WHERE departure_airport IN ('ATL', 'ORD', 'DFW', 'DEN', 'LAX', 'IAH', 'LAS', 'SFO', 'PHX', 'MCO', 'SEA', 'CLT', 'MSP', 'LGA', 'DTW', 'EWR', 'BOS', 'BWI', 'SLC', 'JFK')\\n GROUP BY departure_airport, day\\n ORDER BY SUM(nb_flights) DESC;\\n \""}, "time": 1717674727.8571048} \ No newline at end of file diff --git a/src/cache/joblib/querying/query/57ba0946d28413e367789b8a9a7efa51/output.pkl b/src/cache/joblib/querying/query/57ba0946d28413e367789b8a9a7efa51/output.pkl deleted file mode 100644 index c6ef7a0..0000000 Binary files a/src/cache/joblib/querying/query/57ba0946d28413e367789b8a9a7efa51/output.pkl and /dev/null differ diff --git a/src/cache/joblib/querying/query/5a01aed9d26496912a8cfd4740614c15/metadata.json b/src/cache/joblib/querying/query/5a01aed9d26496912a8cfd4740614c15/metadata.json new file mode 100644 index 0000000..883ce5e --- /dev/null +++ b/src/cache/joblib/querying/query/5a01aed9d26496912a8cfd4740614c15/metadata.json @@ -0,0 +1 @@ +{"duration": 17.964900255203247, "input_args": {"q": "\"\\n SELECT p_color, s_region, SUM(lo_quantity)\\n FROM lineorder\\n INNER JOIN part ON lo_partkey = p_partkey\\nINNER JOIN supplier ON lo_suppkey = s_suppkey\\nWHERE p_color IN ('aliceblue', 'antiquewhite', 'aqua', 'aquamarine', 'azure', 'beige', 'bisque', 'black', 'blanchedalmond', 'blue', 'blueviolet', 'brown', 'burlywood', 'cadetblue', 'chartreuse', 'chocolate', 'coral', 'cornflowerblue', 'cornsilk', 'crimson', 'cyan', 'darkblue', 'darkcyan', 'darkgoldenrod', 'darkgray', 'darkgreen', 'darkgrey', 'darkkhaki', 'darkmagenta', 'darkolivegreen', 'darkorange', 'darkorchid', 'darkred', 'darksalmon', 'darkseagreen', 'darkslateblue', 'darkslategray', 'darkslategrey', 'darkturquoise', 'darkviolet', 'deeppink', 'deepskyblue', 'dimgray', 'dimgrey', 'dodgerblue', 'firebrick', 'floralwhite', 'forestgreen', 'fuchsia', 'gainsboro', 'ghostwhite', 'gold', 'goldenrod', 'gray', 'green', 'greenyellow', 'grey', 'honeydew', 'hotpink', 'indianred', 'indigo', 'ivory', 'khaki', 'lavender', 'lavenderblush', 'lawngreen', 'lemonchiffon', 'lightblue', 'lightcoral', 'lightcyan', 'lightgoldenrodyellow', 'lightgray', 'lightgreen', 'lightgrey', 'lightpink', 'lightsalmon', 'lightseagreen', 'lightskyblue', 'lightslategray', 'lightslategrey', 'lightsteelblue', 'lightyellow', 'lime', 'limegreen', 'linen', 'magenta', 'maroon', 'mediumaquamarine', 'mediumblue', 'mediumorchid', 'mediumpurple', 'mediumseagreen', 'mediumslateblue', 'mediumspringgreen', 'mediumturquoise', 'mediumvioletred', 'midnightblue', 'mintcream', 'mistyrose', 'moccasin', 'navajowhite', 'navy', 'oldlace', 'olive', 'olivedrab', 'orange', 'orangered', 'orchid', 'palegoldenrod', 'palegreen', 'paleturquoise', 'palevioletred', 'papayawhip', 'peachpuff', 'peru', 'pink', 'plum', 'powderblue', 'purple', 'rebeccapurple', 'red', 'rosybrown', 'royalblue', 'saddlebrown', 'salmon', 'sandybrown', 'seagreen', 'seashell', 'sienna', 'silver', 'skyblue', 'slateblue', 'slategray', 'slategrey', 'snow', 'springgreen', 'steelblue', 'tan', 'teal', 'thistle', 'tomato', 'turquoise', 'violet', 'wheat', 'white', 'whitesmoke', 'yellow', 'yellowgreen')\\n\\n GROUP BY p_color, s_region\\n ORDER BY SUM(lo_quantity) DESC;\\n \""}, "time": 1719876970.129612} \ No newline at end of file diff --git a/src/cache/joblib/querying/query/3d7671c3cba40b600e1061e676c7b26d/output.pkl b/src/cache/joblib/querying/query/5a01aed9d26496912a8cfd4740614c15/output.pkl similarity index 100% rename from src/cache/joblib/querying/query/3d7671c3cba40b600e1061e676c7b26d/output.pkl rename to src/cache/joblib/querying/query/5a01aed9d26496912a8cfd4740614c15/output.pkl diff --git a/src/cache/joblib/querying/query/5ae3400131ed99d42c87bf20fafb9a4d/metadata.json b/src/cache/joblib/querying/query/5ae3400131ed99d42c87bf20fafb9a4d/metadata.json deleted file mode 100644 index 0366c1a..0000000 --- a/src/cache/joblib/querying/query/5ae3400131ed99d42c87bf20fafb9a4d/metadata.json +++ /dev/null @@ -1 +0,0 @@ -{"duration": 0.00795602798461914, "input_args": {"q": "\"\\n SELECT departure_airport, month, SUM(nb_flights)\\n FROM fact_table\\n INNER JOIN airport_dim ON airport_dim.iata_code = fact_table.departure_airport\\n NATURAL JOIN hour_dim\\n INNER JOIN time_dim ON time_dim.day = fact_table.date\\n WHERE departure_airport IN ('ATL', 'ORD', 'DFW', 'DEN', 'LAX', 'IAH', 'LAS', 'SFO', 'PHX', 'MCO', 'SEA', 'CLT', 'MSP', 'LGA', 'DTW', 'EWR', 'BOS', 'BWI', 'SLC', 'JFK')\\n GROUP BY departure_airport, month\\n ORDER BY SUM(nb_flights) DESC;\\n \""}, "time": 1717674727.8699038} \ No newline at end of file diff --git a/src/cache/joblib/querying/query/5ae3400131ed99d42c87bf20fafb9a4d/output.pkl b/src/cache/joblib/querying/query/5ae3400131ed99d42c87bf20fafb9a4d/output.pkl deleted file mode 100644 index 7cc7b40..0000000 Binary files a/src/cache/joblib/querying/query/5ae3400131ed99d42c87bf20fafb9a4d/output.pkl and /dev/null differ diff --git a/src/cache/joblib/querying/query/6bebee0ddcc3b6c76ca9a6d695b0e94a/metadata.json b/src/cache/joblib/querying/query/6bebee0ddcc3b6c76ca9a6d695b0e94a/metadata.json deleted file mode 100644 index 24dd6c3..0000000 --- a/src/cache/joblib/querying/query/6bebee0ddcc3b6c76ca9a6d695b0e94a/metadata.json +++ /dev/null @@ -1 +0,0 @@ -{"duration": 0.00851297378540039, "input_args": {"q": "\"\\n SELECT departure_airport, year, SUM(nb_flights)\\n FROM fact_table\\n INNER JOIN airport_dim ON airport_dim.iata_code = fact_table.departure_airport\\n NATURAL JOIN hour_dim\\n INNER JOIN time_dim ON time_dim.day = fact_table.date\\n WHERE departure_airport IN ('ATL', 'ORD', 'DFW', 'DEN', 'LAX', 'IAH', 'LAS', 'SFO', 'PHX', 'MCO', 'SEA', 'CLT', 'MSP', 'LGA', 'DTW', 'EWR', 'BOS', 'BWI', 'SLC', 'JFK')\\n GROUP BY departure_airport, year\\n ORDER BY SUM(nb_flights) DESC;\\n \""}, "time": 1717674727.8793159} \ No newline at end of file diff --git a/src/cache/joblib/querying/query/6bebee0ddcc3b6c76ca9a6d695b0e94a/output.pkl b/src/cache/joblib/querying/query/6bebee0ddcc3b6c76ca9a6d695b0e94a/output.pkl deleted file mode 100644 index 4a57bad..0000000 Binary files a/src/cache/joblib/querying/query/6bebee0ddcc3b6c76ca9a6d695b0e94a/output.pkl and /dev/null differ diff --git a/src/cache/joblib/querying/query/75abaf2d4d0c36b2e51407897316ce85/metadata.json b/src/cache/joblib/querying/query/75abaf2d4d0c36b2e51407897316ce85/metadata.json new file mode 100644 index 0000000..7f72a5c --- /dev/null +++ b/src/cache/joblib/querying/query/75abaf2d4d0c36b2e51407897316ce85/metadata.json @@ -0,0 +1 @@ +{"duration": 14.970414876937866, "input_args": {"q": "\"\\n SELECT p_color, c_city, SUM(lo_quantity)\\n FROM lineorder\\n INNER JOIN customer ON lo_custkey = c_custkey\\nINNER JOIN part ON lo_partkey = p_partkey\\nWHERE p_color IN ('azure', 'bisque', 'black', 'aquamarine')\\n\\n GROUP BY p_color, c_city\\n ORDER BY SUM(lo_quantity) DESC;\\n \""}, "time": 1719876469.760285} \ No newline at end of file diff --git a/src/cache/joblib/querying/query/75abaf2d4d0c36b2e51407897316ce85/output.pkl b/src/cache/joblib/querying/query/75abaf2d4d0c36b2e51407897316ce85/output.pkl new file mode 100644 index 0000000..d5724cf Binary files /dev/null and b/src/cache/joblib/querying/query/75abaf2d4d0c36b2e51407897316ce85/output.pkl differ diff --git a/src/cache/joblib/querying/query/799a2ec09b37e6592b1586f7976666ba/metadata.json b/src/cache/joblib/querying/query/799a2ec09b37e6592b1586f7976666ba/metadata.json new file mode 100644 index 0000000..1880bc5 --- /dev/null +++ b/src/cache/joblib/querying/query/799a2ec09b37e6592b1586f7976666ba/metadata.json @@ -0,0 +1 @@ +{"duration": 12.126240015029907, "input_args": {"q": "\"\\n SELECT p_color, p_type, SUM(lo_quantity)\\n FROM lineorder\\n INNER JOIN part ON lo_partkey = p_partkey\\nWHERE p_color IN ('azure', 'bisque', 'black', 'aquamarine')\\n\\n GROUP BY p_color, p_type\\n ORDER BY SUM(lo_quantity) DESC;\\n \""}, "time": 1719876753.84258} \ No newline at end of file diff --git a/src/cache/joblib/querying/query/799a2ec09b37e6592b1586f7976666ba/output.pkl b/src/cache/joblib/querying/query/799a2ec09b37e6592b1586f7976666ba/output.pkl new file mode 100644 index 0000000..650409b Binary files /dev/null and b/src/cache/joblib/querying/query/799a2ec09b37e6592b1586f7976666ba/output.pkl differ diff --git a/src/cache/joblib/querying/query/7a18edd32faaa721067069d238939514/metadata.json b/src/cache/joblib/querying/query/7a18edd32faaa721067069d238939514/metadata.json deleted file mode 100644 index 9b71fc0..0000000 --- a/src/cache/joblib/querying/query/7a18edd32faaa721067069d238939514/metadata.json +++ /dev/null @@ -1 +0,0 @@ -{"duration": 12.400226831436157, "input_args": {"q": "\"\\n SELECT p_color, p_type, SUM(lo_quantity)\\n FROM lineorder\\n INNER JOIN part ON lo_partkey = p_partkey\\n\\n WHERE p_color IN ('aliceblue', 'antiquewhite', 'aqua', 'aquamarine', 'azure', 'beige', 'bisque', 'black', 'blanchedalmond', 'blue', 'blueviolet', 'brown', 'burlywood', 'cadetblue', 'chartreuse', 'chocolate', 'coral', 'cornflowerblue', 'cornsilk', 'crimson', 'cyan', 'darkblue', 'darkcyan', 'darkgoldenrod', 'darkgray', 'darkgreen', 'darkgrey', 'darkkhaki', 'darkmagenta', 'darkolivegreen', 'darkorange', 'darkorchid', 'darkred', 'darksalmon', 'darkseagreen', 'darkslateblue', 'darkslategray', 'darkslategrey', 'darkturquoise', 'darkviolet', 'deeppink', 'deepskyblue', 'dimgray', 'dimgrey', 'dodgerblue', 'firebrick', 'floralwhite', 'forestgreen', 'fuchsia', 'gainsboro', 'ghostwhite', 'gold', 'goldenrod', 'gray', 'green', 'greenyellow', 'grey', 'honeydew', 'hotpink', 'indianred', 'indigo', 'ivory', 'khaki', 'lavender', 'lavenderblush', 'lawngreen', 'lemonchiffon', 'lightblue', 'lightcoral', 'lightcyan', 'lightgoldenrodyellow', 'lightgray', 'lightgreen', 'lightgrey', 'lightpink', 'lightsalmon', 'lightseagreen', 'lightskyblue', 'lightslategray', 'lightslategrey', 'lightsteelblue', 'lightyellow', 'lime', 'limegreen', 'linen', 'magenta', 'maroon', 'mediumaquamarine', 'mediumblue', 'mediumorchid', 'mediumpurple', 'mediumseagreen', 'mediumslateblue', 'mediumspringgreen', 'mediumturquoise', 'mediumvioletred', 'midnightblue', 'mintcream', 'mistyrose', 'moccasin', 'navajowhite', 'navy', 'oldlace', 'olive', 'olivedrab', 'orange', 'orangered', 'orchid', 'palegoldenrod', 'palegreen', 'paleturquoise', 'palevioletred', 'papayawhip', 'peachpuff', 'peru', 'pink', 'plum', 'powderblue', 'purple', 'rebeccapurple', 'red', 'rosybrown', 'royalblue', 'saddlebrown', 'salmon', 'sandybrown', 'seagreen', 'seashell', 'sienna', 'silver', 'skyblue', 'slateblue', 'slategray', 'slategrey', 'snow', 'springgreen', 'steelblue', 'tan', 'teal', 'thistle', 'tomato', 'turquoise', 'violet', 'wheat', 'white', 'whitesmoke', 'yellow', 'yellowgreen')\\n GROUP BY p_color, p_type\\n ORDER BY SUM(lo_quantity) DESC;\\n \""}, "time": 1717680529.093871} \ No newline at end of file diff --git a/src/cache/joblib/querying/query/7a18edd32faaa721067069d238939514/output.pkl b/src/cache/joblib/querying/query/7a18edd32faaa721067069d238939514/output.pkl deleted file mode 100644 index 5cb6260..0000000 Binary files a/src/cache/joblib/querying/query/7a18edd32faaa721067069d238939514/output.pkl and /dev/null differ diff --git a/src/cache/joblib/querying/query/80f0a111881101cdd4a94debb9ebfadf/metadata.json b/src/cache/joblib/querying/query/80f0a111881101cdd4a94debb9ebfadf/metadata.json new file mode 100644 index 0000000..3471dcf --- /dev/null +++ b/src/cache/joblib/querying/query/80f0a111881101cdd4a94debb9ebfadf/metadata.json @@ -0,0 +1 @@ +{"duration": 14.343026876449585, "input_args": {"q": "\"\\n SELECT p_color, c_region, SUM(lo_quantity)\\n FROM lineorder\\n INNER JOIN customer ON lo_custkey = c_custkey\\nINNER JOIN part ON lo_partkey = p_partkey\\nWHERE p_color IN ('bisque', 'blue')\\n\\n GROUP BY p_color, c_region\\n ORDER BY SUM(lo_quantity) DESC;\\n \""}, "time": 1719876820.283672} \ No newline at end of file diff --git a/src/cache/joblib/querying/query/80f0a111881101cdd4a94debb9ebfadf/output.pkl b/src/cache/joblib/querying/query/80f0a111881101cdd4a94debb9ebfadf/output.pkl new file mode 100644 index 0000000..34c3a38 Binary files /dev/null and b/src/cache/joblib/querying/query/80f0a111881101cdd4a94debb9ebfadf/output.pkl differ diff --git a/src/cache/joblib/querying/query/829c0b7bd51a86ea761cf24f640f0d4f/metadata.json b/src/cache/joblib/querying/query/829c0b7bd51a86ea761cf24f640f0d4f/metadata.json new file mode 100644 index 0000000..704b565 --- /dev/null +++ b/src/cache/joblib/querying/query/829c0b7bd51a86ea761cf24f640f0d4f/metadata.json @@ -0,0 +1 @@ +{"duration": 13.514874935150146, "input_args": {"q": "\"\\n SELECT p_color, p_category, SUM(lo_quantity)\\n FROM lineorder\\n INNER JOIN part ON lo_partkey = p_partkey\\nWHERE p_color IN ('azure', 'bisque', 'black', 'aquamarine')\\n\\n GROUP BY p_color, p_category\\n ORDER BY SUM(lo_quantity) DESC;\\n \""}, "time": 1719876484.837832} \ No newline at end of file diff --git a/src/cache/joblib/querying/query/829c0b7bd51a86ea761cf24f640f0d4f/output.pkl b/src/cache/joblib/querying/query/829c0b7bd51a86ea761cf24f640f0d4f/output.pkl new file mode 100644 index 0000000..6a7bfab Binary files /dev/null and b/src/cache/joblib/querying/query/829c0b7bd51a86ea761cf24f640f0d4f/output.pkl differ diff --git a/src/cache/joblib/querying/query/8a569d65af2925b8f4dd83fb317a642f/metadata.json b/src/cache/joblib/querying/query/8a569d65af2925b8f4dd83fb317a642f/metadata.json new file mode 100644 index 0000000..addcdc4 --- /dev/null +++ b/src/cache/joblib/querying/query/8a569d65af2925b8f4dd83fb317a642f/metadata.json @@ -0,0 +1 @@ +{"duration": 14.327998876571655, "input_args": {"q": "\"\\n SELECT p_color, c_city, SUM(lo_quantity)\\n FROM lineorder\\n INNER JOIN customer ON lo_custkey = c_custkey\\nINNER JOIN part ON lo_partkey = p_partkey\\nWHERE p_color IN ('aliceblue', 'antiquewhite', 'aqua', 'aquamarine', 'azure', 'beige', 'bisque', 'black', 'blanchedalmond', 'blue', 'blueviolet', 'brown', 'burlywood', 'cadetblue', 'chartreuse', 'chocolate', 'coral', 'cornflowerblue', 'cornsilk', 'crimson', 'cyan', 'darkblue', 'darkcyan', 'darkgoldenrod', 'darkgray', 'darkgreen', 'darkgrey', 'darkkhaki', 'darkmagenta', 'darkolivegreen', 'darkorange', 'darkorchid', 'darkred', 'darksalmon', 'darkseagreen', 'darkslateblue', 'darkslategray', 'darkslategrey', 'darkturquoise', 'darkviolet', 'deeppink', 'deepskyblue', 'dimgray', 'dimgrey', 'dodgerblue', 'firebrick', 'floralwhite', 'forestgreen', 'fuchsia', 'gainsboro', 'ghostwhite', 'gold', 'goldenrod', 'gray', 'green', 'greenyellow', 'grey', 'honeydew', 'hotpink', 'indianred', 'indigo', 'ivory', 'khaki', 'lavender', 'lavenderblush', 'lawngreen', 'lemonchiffon', 'lightblue', 'lightcoral', 'lightcyan', 'lightgoldenrodyellow', 'lightgray', 'lightgreen', 'lightgrey', 'lightpink', 'lightsalmon', 'lightseagreen', 'lightskyblue', 'lightslategray', 'lightslategrey', 'lightsteelblue', 'lightyellow', 'lime', 'limegreen', 'linen', 'magenta', 'maroon', 'mediumaquamarine', 'mediumblue', 'mediumorchid', 'mediumpurple', 'mediumseagreen', 'mediumslateblue', 'mediumspringgreen', 'mediumturquoise', 'mediumvioletred', 'midnightblue', 'mintcream', 'mistyrose', 'moccasin', 'navajowhite', 'navy', 'oldlace', 'olive', 'olivedrab', 'orange', 'orangered', 'orchid', 'palegoldenrod', 'palegreen', 'paleturquoise', 'palevioletred', 'papayawhip', 'peachpuff', 'peru', 'pink', 'plum', 'powderblue', 'purple', 'rebeccapurple', 'red', 'rosybrown', 'royalblue', 'saddlebrown', 'salmon', 'sandybrown', 'seagreen', 'seashell', 'sienna', 'silver', 'skyblue', 'slateblue', 'slategray', 'slategrey', 'snow', 'springgreen', 'steelblue', 'tan', 'teal', 'thistle', 'tomato', 'turquoise', 'violet', 'wheat', 'white', 'whitesmoke', 'yellow', 'yellowgreen')\\n\\n GROUP BY p_color, c_city\\n ORDER BY SUM(lo_quantity) DESC;\\n \""}, "time": 1719583996.5475771} \ No newline at end of file diff --git a/src/cache/joblib/querying/query/38eb9029508ab26a653f100309b92ce2/output.pkl b/src/cache/joblib/querying/query/8a569d65af2925b8f4dd83fb317a642f/output.pkl similarity index 100% rename from src/cache/joblib/querying/query/38eb9029508ab26a653f100309b92ce2/output.pkl rename to src/cache/joblib/querying/query/8a569d65af2925b8f4dd83fb317a642f/output.pkl diff --git a/src/cache/joblib/querying/query/90d6946253c401dde28bae43087ebec1/metadata.json b/src/cache/joblib/querying/query/90d6946253c401dde28bae43087ebec1/metadata.json new file mode 100644 index 0000000..39e8717 --- /dev/null +++ b/src/cache/joblib/querying/query/90d6946253c401dde28bae43087ebec1/metadata.json @@ -0,0 +1 @@ +{"duration": 19.513118982315063, "input_args": {"q": "\"\\n SELECT p_color, c_region, SUM(lo_quantity)\\n FROM lineorder\\n INNER JOIN customer ON lo_custkey = c_custkey\\nINNER JOIN part ON lo_partkey = p_partkey\\nWHERE p_color IN ('aliceblue', 'antiquewhite', 'aqua', 'aquamarine', 'azure', 'beige', 'bisque', 'black', 'blanchedalmond', 'blue', 'blueviolet', 'brown', 'burlywood', 'cadetblue', 'chartreuse', 'chocolate', 'coral', 'cornflowerblue', 'cornsilk', 'crimson', 'cyan', 'darkblue', 'darkcyan', 'darkgoldenrod', 'darkgray', 'darkgreen', 'darkgrey', 'darkkhaki', 'darkmagenta', 'darkolivegreen', 'darkorange', 'darkorchid', 'darkred', 'darksalmon', 'darkseagreen', 'darkslateblue', 'darkslategray', 'darkslategrey', 'darkturquoise', 'darkviolet', 'deeppink', 'deepskyblue', 'dimgray', 'dimgrey', 'dodgerblue', 'firebrick', 'floralwhite', 'forestgreen', 'fuchsia', 'gainsboro', 'ghostwhite', 'gold', 'goldenrod', 'gray', 'green', 'greenyellow', 'grey', 'honeydew', 'hotpink', 'indianred', 'indigo', 'ivory', 'khaki', 'lavender', 'lavenderblush', 'lawngreen', 'lemonchiffon', 'lightblue', 'lightcoral', 'lightcyan', 'lightgoldenrodyellow', 'lightgray', 'lightgreen', 'lightgrey', 'lightpink', 'lightsalmon', 'lightseagreen', 'lightskyblue', 'lightslategray', 'lightslategrey', 'lightsteelblue', 'lightyellow', 'lime', 'limegreen', 'linen', 'magenta', 'maroon', 'mediumaquamarine', 'mediumblue', 'mediumorchid', 'mediumpurple', 'mediumseagreen', 'mediumslateblue', 'mediumspringgreen', 'mediumturquoise', 'mediumvioletred', 'midnightblue', 'mintcream', 'mistyrose', 'moccasin', 'navajowhite', 'navy', 'oldlace', 'olive', 'olivedrab', 'orange', 'orangered', 'orchid', 'palegoldenrod', 'palegreen', 'paleturquoise', 'palevioletred', 'papayawhip', 'peachpuff', 'peru', 'pink', 'plum', 'powderblue', 'purple', 'rebeccapurple', 'red', 'rosybrown', 'royalblue', 'saddlebrown', 'salmon', 'sandybrown', 'seagreen', 'seashell', 'sienna', 'silver', 'skyblue', 'slateblue', 'slategray', 'slategrey', 'snow', 'springgreen', 'steelblue', 'tan', 'teal', 'thistle', 'tomato', 'turquoise', 'violet', 'wheat', 'white', 'whitesmoke', 'yellow', 'yellowgreen')\\n\\n GROUP BY p_color, c_region\\n ORDER BY SUM(lo_quantity) DESC;\\n \""}, "time": 1719876989.662223} \ No newline at end of file diff --git a/src/cache/joblib/querying/query/90d6946253c401dde28bae43087ebec1/output.pkl b/src/cache/joblib/querying/query/90d6946253c401dde28bae43087ebec1/output.pkl new file mode 100644 index 0000000..1f5b8c2 Binary files /dev/null and b/src/cache/joblib/querying/query/90d6946253c401dde28bae43087ebec1/output.pkl differ diff --git a/src/cache/joblib/querying/query/986e56573188d2eca1b0a0f1b4cb1876/metadata.json b/src/cache/joblib/querying/query/986e56573188d2eca1b0a0f1b4cb1876/metadata.json deleted file mode 100644 index 88e1791..0000000 --- a/src/cache/joblib/querying/query/986e56573188d2eca1b0a0f1b4cb1876/metadata.json +++ /dev/null @@ -1 +0,0 @@ -{"duration": 12.51117491722107, "input_args": {"q": "\"\\n SELECT p_color, p_brand, SUM(lo_quantity)\\n FROM lineorder\\n INNER JOIN part ON lo_partkey = p_partkey\\n\\n WHERE p_color IN ('aliceblue', 'antiquewhite', 'aqua', 'aquamarine', 'azure', 'beige', 'bisque', 'black', 'blanchedalmond', 'blue', 'blueviolet', 'brown', 'burlywood', 'cadetblue', 'chartreuse', 'chocolate', 'coral', 'cornflowerblue', 'cornsilk', 'crimson', 'cyan', 'darkblue', 'darkcyan', 'darkgoldenrod', 'darkgray', 'darkgreen', 'darkgrey', 'darkkhaki', 'darkmagenta', 'darkolivegreen', 'darkorange', 'darkorchid', 'darkred', 'darksalmon', 'darkseagreen', 'darkslateblue', 'darkslategray', 'darkslategrey', 'darkturquoise', 'darkviolet', 'deeppink', 'deepskyblue', 'dimgray', 'dimgrey', 'dodgerblue', 'firebrick', 'floralwhite', 'forestgreen', 'fuchsia', 'gainsboro', 'ghostwhite', 'gold', 'goldenrod', 'gray', 'green', 'greenyellow', 'grey', 'honeydew', 'hotpink', 'indianred', 'indigo', 'ivory', 'khaki', 'lavender', 'lavenderblush', 'lawngreen', 'lemonchiffon', 'lightblue', 'lightcoral', 'lightcyan', 'lightgoldenrodyellow', 'lightgray', 'lightgreen', 'lightgrey', 'lightpink', 'lightsalmon', 'lightseagreen', 'lightskyblue', 'lightslategray', 'lightslategrey', 'lightsteelblue', 'lightyellow', 'lime', 'limegreen', 'linen', 'magenta', 'maroon', 'mediumaquamarine', 'mediumblue', 'mediumorchid', 'mediumpurple', 'mediumseagreen', 'mediumslateblue', 'mediumspringgreen', 'mediumturquoise', 'mediumvioletred', 'midnightblue', 'mintcream', 'mistyrose', 'moccasin', 'navajowhite', 'navy', 'oldlace', 'olive', 'olivedrab', 'orange', 'orangered', 'orchid', 'palegoldenrod', 'palegreen', 'paleturquoise', 'palevioletred', 'papayawhip', 'peachpuff', 'peru', 'pink', 'plum', 'powderblue', 'purple', 'rebeccapurple', 'red', 'rosybrown', 'royalblue', 'saddlebrown', 'salmon', 'sandybrown', 'seagreen', 'seashell', 'sienna', 'silver', 'skyblue', 'slateblue', 'slategray', 'slategrey', 'snow', 'springgreen', 'steelblue', 'tan', 'teal', 'thistle', 'tomato', 'turquoise', 'violet', 'wheat', 'white', 'whitesmoke', 'yellow', 'yellowgreen')\\n GROUP BY p_color, p_brand\\n ORDER BY SUM(lo_quantity) DESC;\\n \""}, "time": 1717599462.2399411} \ No newline at end of file diff --git a/src/cache/joblib/querying/query/a0b37fcf91ab1f58c79b7422d1fe8b88/metadata.json b/src/cache/joblib/querying/query/a0b37fcf91ab1f58c79b7422d1fe8b88/metadata.json new file mode 100644 index 0000000..ee7a623 --- /dev/null +++ b/src/cache/joblib/querying/query/a0b37fcf91ab1f58c79b7422d1fe8b88/metadata.json @@ -0,0 +1 @@ +{"duration": 18.672475337982178, "input_args": {"q": "\"\\n SELECT p_color, s_city, SUM(lo_quantity)\\n FROM lineorder\\n INNER JOIN part ON lo_partkey = p_partkey\\nINNER JOIN supplier ON lo_suppkey = s_suppkey\\nWHERE p_color IN ('aliceblue', 'antiquewhite', 'aqua', 'aquamarine', 'azure', 'beige', 'bisque', 'black', 'blanchedalmond', 'blue', 'blueviolet', 'brown', 'burlywood', 'cadetblue', 'chartreuse', 'chocolate', 'coral', 'cornflowerblue', 'cornsilk', 'crimson', 'cyan', 'darkblue', 'darkcyan', 'darkgoldenrod', 'darkgray', 'darkgreen', 'darkgrey', 'darkkhaki', 'darkmagenta', 'darkolivegreen', 'darkorange', 'darkorchid', 'darkred', 'darksalmon', 'darkseagreen', 'darkslateblue', 'darkslategray', 'darkslategrey', 'darkturquoise', 'darkviolet', 'deeppink', 'deepskyblue', 'dimgray', 'dimgrey', 'dodgerblue', 'firebrick', 'floralwhite', 'forestgreen', 'fuchsia', 'gainsboro', 'ghostwhite', 'gold', 'goldenrod', 'gray', 'green', 'greenyellow', 'grey', 'honeydew', 'hotpink', 'indianred', 'indigo', 'ivory', 'khaki', 'lavender', 'lavenderblush', 'lawngreen', 'lemonchiffon', 'lightblue', 'lightcoral', 'lightcyan', 'lightgoldenrodyellow', 'lightgray', 'lightgreen', 'lightgrey', 'lightpink', 'lightsalmon', 'lightseagreen', 'lightskyblue', 'lightslategray', 'lightslategrey', 'lightsteelblue', 'lightyellow', 'lime', 'limegreen', 'linen', 'magenta', 'maroon', 'mediumaquamarine', 'mediumblue', 'mediumorchid', 'mediumpurple', 'mediumseagreen', 'mediumslateblue', 'mediumspringgreen', 'mediumturquoise', 'mediumvioletred', 'midnightblue', 'mintcream', 'mistyrose', 'moccasin', 'navajowhite', 'navy', 'oldlace', 'olive', 'olivedrab', 'orange', 'orangered', 'orchid', 'palegoldenrod', 'palegreen', 'paleturquoise', 'palevioletred', 'papayawhip', 'peachpuff', 'peru', 'pink', 'plum', 'powderblue', 'purple', 'rebeccapurple', 'red', 'rosybrown', 'royalblue', 'saddlebrown', 'salmon', 'sandybrown', 'seagreen', 'seashell', 'sienna', 'silver', 'skyblue', 'slateblue', 'slategray', 'slategrey', 'snow', 'springgreen', 'steelblue', 'tan', 'teal', 'thistle', 'tomato', 'turquoise', 'violet', 'wheat', 'white', 'whitesmoke', 'yellow', 'yellowgreen')\\n\\n GROUP BY p_color, s_city\\n ORDER BY SUM(lo_quantity) DESC;\\n \""}, "time": 1719564087.0910451} \ No newline at end of file diff --git a/src/cache/joblib/querying/query/4d2f399a43b21e50df0ce5cc4f0f7ca4/output.pkl b/src/cache/joblib/querying/query/a0b37fcf91ab1f58c79b7422d1fe8b88/output.pkl similarity index 100% rename from src/cache/joblib/querying/query/4d2f399a43b21e50df0ce5cc4f0f7ca4/output.pkl rename to src/cache/joblib/querying/query/a0b37fcf91ab1f58c79b7422d1fe8b88/output.pkl diff --git a/src/cache/joblib/querying/query/aa4248891060b704d862b2bf6d1b9d5b/metadata.json b/src/cache/joblib/querying/query/aa4248891060b704d862b2bf6d1b9d5b/metadata.json new file mode 100644 index 0000000..648f6fa --- /dev/null +++ b/src/cache/joblib/querying/query/aa4248891060b704d862b2bf6d1b9d5b/metadata.json @@ -0,0 +1 @@ +{"duration": 13.761728048324585, "input_args": {"q": "\"\\n SELECT p_color, p_container, SUM(lo_quantity)\\n FROM lineorder\\n INNER JOIN part ON lo_partkey = p_partkey\\nWHERE p_color IN ('azure', 'bisque', 'black', 'aquamarine')\\n\\n GROUP BY p_color, p_container\\n ORDER BY SUM(lo_quantity) DESC;\\n \""}, "time": 1719877064.6480262} \ No newline at end of file diff --git a/src/cache/joblib/querying/query/aa4248891060b704d862b2bf6d1b9d5b/output.pkl b/src/cache/joblib/querying/query/aa4248891060b704d862b2bf6d1b9d5b/output.pkl new file mode 100644 index 0000000..5ed551d Binary files /dev/null and b/src/cache/joblib/querying/query/aa4248891060b704d862b2bf6d1b9d5b/output.pkl differ diff --git a/src/cache/joblib/querying/query/b685f2e2d6be7d4259ca981123292684/metadata.json b/src/cache/joblib/querying/query/b685f2e2d6be7d4259ca981123292684/metadata.json new file mode 100644 index 0000000..2fe29b4 --- /dev/null +++ b/src/cache/joblib/querying/query/b685f2e2d6be7d4259ca981123292684/metadata.json @@ -0,0 +1 @@ +{"duration": 14.175416707992554, "input_args": {"q": "\"\\n SELECT p_color, c_nation, SUM(lo_quantity)\\n FROM lineorder\\n INNER JOIN customer ON lo_custkey = c_custkey\\nINNER JOIN part ON lo_partkey = p_partkey\\nWHERE p_color IN ('bisque', 'blue')\\n\\n GROUP BY p_color, c_nation\\n ORDER BY SUM(lo_quantity) DESC;\\n \""}, "time": 1719876849.762035} \ No newline at end of file diff --git a/src/cache/joblib/querying/query/b685f2e2d6be7d4259ca981123292684/output.pkl b/src/cache/joblib/querying/query/b685f2e2d6be7d4259ca981123292684/output.pkl new file mode 100644 index 0000000..f86a67e Binary files /dev/null and b/src/cache/joblib/querying/query/b685f2e2d6be7d4259ca981123292684/output.pkl differ diff --git a/src/cache/joblib/querying/query/c7594935966c0a80db5a2ebe41c044d1/metadata.json b/src/cache/joblib/querying/query/c7594935966c0a80db5a2ebe41c044d1/metadata.json new file mode 100644 index 0000000..91c79fc --- /dev/null +++ b/src/cache/joblib/querying/query/c7594935966c0a80db5a2ebe41c044d1/metadata.json @@ -0,0 +1 @@ +{"duration": 14.510737180709839, "input_args": {"q": "\"\\n SELECT p_color, c_city, SUM(lo_quantity)\\n FROM lineorder\\n INNER JOIN customer ON lo_custkey = c_custkey\\nINNER JOIN part ON lo_partkey = p_partkey\\nWHERE p_color IN ('azure', 'blue')\\n\\n GROUP BY p_color, c_city\\n ORDER BY SUM(lo_quantity) DESC;\\n \""}, "time": 1719876891.030114} \ No newline at end of file diff --git a/src/cache/joblib/querying/query/c7594935966c0a80db5a2ebe41c044d1/output.pkl b/src/cache/joblib/querying/query/c7594935966c0a80db5a2ebe41c044d1/output.pkl new file mode 100644 index 0000000..78f764a Binary files /dev/null and b/src/cache/joblib/querying/query/c7594935966c0a80db5a2ebe41c044d1/output.pkl differ diff --git a/src/cache/joblib/querying/query/c9f3d24ad36c4794af2e5e07582f3f19/metadata.json b/src/cache/joblib/querying/query/c9f3d24ad36c4794af2e5e07582f3f19/metadata.json deleted file mode 100644 index ad2c82e..0000000 --- a/src/cache/joblib/querying/query/c9f3d24ad36c4794af2e5e07582f3f19/metadata.json +++ /dev/null @@ -1 +0,0 @@ -{"duration": 11.881813049316406, "input_args": {"q": "\"\\n SELECT p_color, p_color, SUM(lo_quantity)\\n FROM lineorder\\n INNER JOIN part ON lo_partkey = p_partkey\\n\\n WHERE p_color IN ('aliceblue', 'antiquewhite', 'aqua', 'aquamarine', 'azure', 'beige', 'bisque', 'black', 'blanchedalmond', 'blue', 'blueviolet', 'brown', 'burlywood', 'cadetblue', 'chartreuse', 'chocolate', 'coral', 'cornflowerblue', 'cornsilk', 'crimson', 'cyan', 'darkblue', 'darkcyan', 'darkgoldenrod', 'darkgray', 'darkgreen', 'darkgrey', 'darkkhaki', 'darkmagenta', 'darkolivegreen', 'darkorange', 'darkorchid', 'darkred', 'darksalmon', 'darkseagreen', 'darkslateblue', 'darkslategray', 'darkslategrey', 'darkturquoise', 'darkviolet', 'deeppink', 'deepskyblue', 'dimgray', 'dimgrey', 'dodgerblue', 'firebrick', 'floralwhite', 'forestgreen', 'fuchsia', 'gainsboro', 'ghostwhite', 'gold', 'goldenrod', 'gray', 'green', 'greenyellow', 'grey', 'honeydew', 'hotpink', 'indianred', 'indigo', 'ivory', 'khaki', 'lavender', 'lavenderblush', 'lawngreen', 'lemonchiffon', 'lightblue', 'lightcoral', 'lightcyan', 'lightgoldenrodyellow', 'lightgray', 'lightgreen', 'lightgrey', 'lightpink', 'lightsalmon', 'lightseagreen', 'lightskyblue', 'lightslategray', 'lightslategrey', 'lightsteelblue', 'lightyellow', 'lime', 'limegreen', 'linen', 'magenta', 'maroon', 'mediumaquamarine', 'mediumblue', 'mediumorchid', 'mediumpurple', 'mediumseagreen', 'mediumslateblue', 'mediumspringgreen', 'mediumturquoise', 'mediumvioletred', 'midnightblue', 'mintcream', 'mistyrose', 'moccasin', 'navajowhite', 'navy', 'oldlace', 'olive', 'olivedrab', 'orange', 'orangered', 'orchid', 'palegoldenrod', 'palegreen', 'paleturquoise', 'palevioletred', 'papayawhip', 'peachpuff', 'peru', 'pink', 'plum', 'powderblue', 'purple', 'rebeccapurple', 'red', 'rosybrown', 'royalblue', 'saddlebrown', 'salmon', 'sandybrown', 'seagreen', 'seashell', 'sienna', 'silver', 'skyblue', 'slateblue', 'slategray', 'slategrey', 'snow', 'springgreen', 'steelblue', 'tan', 'teal', 'thistle', 'tomato', 'turquoise', 'violet', 'wheat', 'white', 'whitesmoke', 'yellow', 'yellowgreen')\\n GROUP BY p_color, p_color\\n ORDER BY SUM(lo_quantity) DESC;\\n \""}, "time": 1717599431.864533} \ No newline at end of file diff --git a/src/cache/joblib/querying/query/c9f3d24ad36c4794af2e5e07582f3f19/output.pkl b/src/cache/joblib/querying/query/c9f3d24ad36c4794af2e5e07582f3f19/output.pkl deleted file mode 100644 index 5248216..0000000 Binary files a/src/cache/joblib/querying/query/c9f3d24ad36c4794af2e5e07582f3f19/output.pkl and /dev/null differ diff --git a/src/cache/joblib/querying/query/e1be94694225425f3332ec7fd0ff983b/metadata.json b/src/cache/joblib/querying/query/e1be94694225425f3332ec7fd0ff983b/metadata.json new file mode 100644 index 0000000..05da9aa --- /dev/null +++ b/src/cache/joblib/querying/query/e1be94694225425f3332ec7fd0ff983b/metadata.json @@ -0,0 +1 @@ +{"duration": 11.52425217628479, "input_args": {"q": "\"\\n SELECT p_color, p_container, SUM(lo_quantity)\\n FROM lineorder\\n INNER JOIN part ON lo_partkey = p_partkey\\nWHERE p_color IN ('bisque', 'blue')\\n\\n GROUP BY p_color, p_container\\n ORDER BY SUM(lo_quantity) DESC;\\n \""}, "time": 1719876799.081373} \ No newline at end of file diff --git a/src/cache/joblib/querying/query/e1be94694225425f3332ec7fd0ff983b/output.pkl b/src/cache/joblib/querying/query/e1be94694225425f3332ec7fd0ff983b/output.pkl new file mode 100644 index 0000000..63e89f0 Binary files /dev/null and b/src/cache/joblib/querying/query/e1be94694225425f3332ec7fd0ff983b/output.pkl differ diff --git a/src/cache/joblib/querying/query/ea66b46a7775f9cbe553c0c6082dfd01/metadata.json b/src/cache/joblib/querying/query/ea66b46a7775f9cbe553c0c6082dfd01/metadata.json deleted file mode 100644 index 4bb5e18..0000000 --- a/src/cache/joblib/querying/query/ea66b46a7775f9cbe553c0c6082dfd01/metadata.json +++ /dev/null @@ -1 +0,0 @@ -{"duration": 0.0241241455078125, "input_args": {"q": "\"\\n SELECT departure_airport, departure_hour, SUM(nb_flights)\\n FROM fact_table\\n INNER JOIN airport_dim ON airport_dim.iata_code = fact_table.departure_airport\\n NATURAL JOIN hour_dim\\n INNER JOIN time_dim ON time_dim.day = fact_table.date\\n WHERE departure_airport IN ('ATL', 'ORD', 'DFW', 'DEN', 'LAX', 'IAH', 'LAS', 'SFO', 'PHX', 'MCO', 'SEA', 'CLT', 'MSP', 'LGA', 'DTW', 'EWR', 'BOS', 'BWI', 'SLC', 'JFK')\\n GROUP BY departure_airport, departure_hour\\n ORDER BY SUM(nb_flights) DESC;\\n \""}, "time": 1717674748.1134489} \ No newline at end of file diff --git a/src/cache/joblib/querying/query/ea66b46a7775f9cbe553c0c6082dfd01/output.pkl b/src/cache/joblib/querying/query/ea66b46a7775f9cbe553c0c6082dfd01/output.pkl deleted file mode 100644 index 81a3583..0000000 Binary files a/src/cache/joblib/querying/query/ea66b46a7775f9cbe553c0c6082dfd01/output.pkl and /dev/null differ diff --git a/src/cache/joblib/querying/query/ec3e9481c64616b01f8273f0ef37492b/metadata.json b/src/cache/joblib/querying/query/ec3e9481c64616b01f8273f0ef37492b/metadata.json new file mode 100644 index 0000000..6050512 --- /dev/null +++ b/src/cache/joblib/querying/query/ec3e9481c64616b01f8273f0ef37492b/metadata.json @@ -0,0 +1 @@ +{"duration": 18.256238222122192, "input_args": {"q": "\"\\n SELECT p_color, s_nation, SUM(lo_quantity)\\n FROM lineorder\\n INNER JOIN part ON lo_partkey = p_partkey\\nINNER JOIN supplier ON lo_suppkey = s_suppkey\\nWHERE p_color IN ('aliceblue', 'antiquewhite', 'aqua', 'aquamarine', 'azure', 'beige', 'bisque', 'black', 'blanchedalmond', 'blue', 'blueviolet', 'brown', 'burlywood', 'cadetblue', 'chartreuse', 'chocolate', 'coral', 'cornflowerblue', 'cornsilk', 'crimson', 'cyan', 'darkblue', 'darkcyan', 'darkgoldenrod', 'darkgray', 'darkgreen', 'darkgrey', 'darkkhaki', 'darkmagenta', 'darkolivegreen', 'darkorange', 'darkorchid', 'darkred', 'darksalmon', 'darkseagreen', 'darkslateblue', 'darkslategray', 'darkslategrey', 'darkturquoise', 'darkviolet', 'deeppink', 'deepskyblue', 'dimgray', 'dimgrey', 'dodgerblue', 'firebrick', 'floralwhite', 'forestgreen', 'fuchsia', 'gainsboro', 'ghostwhite', 'gold', 'goldenrod', 'gray', 'green', 'greenyellow', 'grey', 'honeydew', 'hotpink', 'indianred', 'indigo', 'ivory', 'khaki', 'lavender', 'lavenderblush', 'lawngreen', 'lemonchiffon', 'lightblue', 'lightcoral', 'lightcyan', 'lightgoldenrodyellow', 'lightgray', 'lightgreen', 'lightgrey', 'lightpink', 'lightsalmon', 'lightseagreen', 'lightskyblue', 'lightslategray', 'lightslategrey', 'lightsteelblue', 'lightyellow', 'lime', 'limegreen', 'linen', 'magenta', 'maroon', 'mediumaquamarine', 'mediumblue', 'mediumorchid', 'mediumpurple', 'mediumseagreen', 'mediumslateblue', 'mediumspringgreen', 'mediumturquoise', 'mediumvioletred', 'midnightblue', 'mintcream', 'mistyrose', 'moccasin', 'navajowhite', 'navy', 'oldlace', 'olive', 'olivedrab', 'orange', 'orangered', 'orchid', 'palegoldenrod', 'palegreen', 'paleturquoise', 'palevioletred', 'papayawhip', 'peachpuff', 'peru', 'pink', 'plum', 'powderblue', 'purple', 'rebeccapurple', 'red', 'rosybrown', 'royalblue', 'saddlebrown', 'salmon', 'sandybrown', 'seagreen', 'seashell', 'sienna', 'silver', 'skyblue', 'slateblue', 'slategray', 'slategrey', 'snow', 'springgreen', 'steelblue', 'tan', 'teal', 'thistle', 'tomato', 'turquoise', 'violet', 'wheat', 'white', 'whitesmoke', 'yellow', 'yellowgreen')\\n\\n GROUP BY p_color, s_nation\\n ORDER BY SUM(lo_quantity) DESC;\\n \""}, "time": 1719877024.142326} \ No newline at end of file diff --git a/src/cache/joblib/querying/query/fb5b0a41ced2d8b021aa9e61416936c1/output.pkl b/src/cache/joblib/querying/query/ec3e9481c64616b01f8273f0ef37492b/output.pkl similarity index 100% rename from src/cache/joblib/querying/query/fb5b0a41ced2d8b021aa9e61416936c1/output.pkl rename to src/cache/joblib/querying/query/ec3e9481c64616b01f8273f0ef37492b/output.pkl diff --git a/src/cache/joblib/querying/query/f44a9533a2bc2e5cc92ba7d62f9d34b7/metadata.json b/src/cache/joblib/querying/query/f44a9533a2bc2e5cc92ba7d62f9d34b7/metadata.json deleted file mode 100644 index c37df91..0000000 --- a/src/cache/joblib/querying/query/f44a9533a2bc2e5cc92ba7d62f9d34b7/metadata.json +++ /dev/null @@ -1 +0,0 @@ -{"duration": 12.596222877502441, "input_args": {"q": "\"\\n SELECT p_color, p_category, SUM(lo_quantity)\\n FROM lineorder\\n INNER JOIN part ON lo_partkey = p_partkey\\n\\n WHERE p_color IN ('aliceblue', 'antiquewhite', 'aqua', 'aquamarine', 'azure', 'beige', 'bisque', 'black', 'blanchedalmond', 'blue', 'blueviolet', 'brown', 'burlywood', 'cadetblue', 'chartreuse', 'chocolate', 'coral', 'cornflowerblue', 'cornsilk', 'crimson', 'cyan', 'darkblue', 'darkcyan', 'darkgoldenrod', 'darkgray', 'darkgreen', 'darkgrey', 'darkkhaki', 'darkmagenta', 'darkolivegreen', 'darkorange', 'darkorchid', 'darkred', 'darksalmon', 'darkseagreen', 'darkslateblue', 'darkslategray', 'darkslategrey', 'darkturquoise', 'darkviolet', 'deeppink', 'deepskyblue', 'dimgray', 'dimgrey', 'dodgerblue', 'firebrick', 'floralwhite', 'forestgreen', 'fuchsia', 'gainsboro', 'ghostwhite', 'gold', 'goldenrod', 'gray', 'green', 'greenyellow', 'grey', 'honeydew', 'hotpink', 'indianred', 'indigo', 'ivory', 'khaki', 'lavender', 'lavenderblush', 'lawngreen', 'lemonchiffon', 'lightblue', 'lightcoral', 'lightcyan', 'lightgoldenrodyellow', 'lightgray', 'lightgreen', 'lightgrey', 'lightpink', 'lightsalmon', 'lightseagreen', 'lightskyblue', 'lightslategray', 'lightslategrey', 'lightsteelblue', 'lightyellow', 'lime', 'limegreen', 'linen', 'magenta', 'maroon', 'mediumaquamarine', 'mediumblue', 'mediumorchid', 'mediumpurple', 'mediumseagreen', 'mediumslateblue', 'mediumspringgreen', 'mediumturquoise', 'mediumvioletred', 'midnightblue', 'mintcream', 'mistyrose', 'moccasin', 'navajowhite', 'navy', 'oldlace', 'olive', 'olivedrab', 'orange', 'orangered', 'orchid', 'palegoldenrod', 'palegreen', 'paleturquoise', 'palevioletred', 'papayawhip', 'peachpuff', 'peru', 'pink', 'plum', 'powderblue', 'purple', 'rebeccapurple', 'red', 'rosybrown', 'royalblue', 'saddlebrown', 'salmon', 'sandybrown', 'seagreen', 'seashell', 'sienna', 'silver', 'skyblue', 'slateblue', 'slategray', 'slategrey', 'snow', 'springgreen', 'steelblue', 'tan', 'teal', 'thistle', 'tomato', 'turquoise', 'violet', 'wheat', 'white', 'whitesmoke', 'yellow', 'yellowgreen')\\n GROUP BY p_color, p_category\\n ORDER BY SUM(lo_quantity) DESC;\\n \""}, "time": 1717599449.712944} \ No newline at end of file diff --git a/src/cache/joblib/querying/query/fb5b0a41ced2d8b021aa9e61416936c1/metadata.json b/src/cache/joblib/querying/query/fb5b0a41ced2d8b021aa9e61416936c1/metadata.json deleted file mode 100644 index dfc3a27..0000000 --- a/src/cache/joblib/querying/query/fb5b0a41ced2d8b021aa9e61416936c1/metadata.json +++ /dev/null @@ -1 +0,0 @@ -{"duration": 13.08634901046753, "input_args": {"q": "\"\\n SELECT p_color, s_nation, SUM(lo_quantity)\\n FROM lineorder\\n INNER JOIN part ON lo_partkey = p_partkey\\nINNER JOIN supplier ON lo_suppkey = s_suppkey\\n\\n WHERE p_color IN ('aliceblue', 'antiquewhite', 'aqua', 'aquamarine', 'azure', 'beige', 'bisque', 'black', 'blanchedalmond', 'blue', 'blueviolet', 'brown', 'burlywood', 'cadetblue', 'chartreuse', 'chocolate', 'coral', 'cornflowerblue', 'cornsilk', 'crimson', 'cyan', 'darkblue', 'darkcyan', 'darkgoldenrod', 'darkgray', 'darkgreen', 'darkgrey', 'darkkhaki', 'darkmagenta', 'darkolivegreen', 'darkorange', 'darkorchid', 'darkred', 'darksalmon', 'darkseagreen', 'darkslateblue', 'darkslategray', 'darkslategrey', 'darkturquoise', 'darkviolet', 'deeppink', 'deepskyblue', 'dimgray', 'dimgrey', 'dodgerblue', 'firebrick', 'floralwhite', 'forestgreen', 'fuchsia', 'gainsboro', 'ghostwhite', 'gold', 'goldenrod', 'gray', 'green', 'greenyellow', 'grey', 'honeydew', 'hotpink', 'indianred', 'indigo', 'ivory', 'khaki', 'lavender', 'lavenderblush', 'lawngreen', 'lemonchiffon', 'lightblue', 'lightcoral', 'lightcyan', 'lightgoldenrodyellow', 'lightgray', 'lightgreen', 'lightgrey', 'lightpink', 'lightsalmon', 'lightseagreen', 'lightskyblue', 'lightslategray', 'lightslategrey', 'lightsteelblue', 'lightyellow', 'lime', 'limegreen', 'linen', 'magenta', 'maroon', 'mediumaquamarine', 'mediumblue', 'mediumorchid', 'mediumpurple', 'mediumseagreen', 'mediumslateblue', 'mediumspringgreen', 'mediumturquoise', 'mediumvioletred', 'midnightblue', 'mintcream', 'mistyrose', 'moccasin', 'navajowhite', 'navy', 'oldlace', 'olive', 'olivedrab', 'orange', 'orangered', 'orchid', 'palegoldenrod', 'palegreen', 'paleturquoise', 'palevioletred', 'papayawhip', 'peachpuff', 'peru', 'pink', 'plum', 'powderblue', 'purple', 'rebeccapurple', 'red', 'rosybrown', 'royalblue', 'saddlebrown', 'salmon', 'sandybrown', 'seagreen', 'seashell', 'sienna', 'silver', 'skyblue', 'slateblue', 'slategray', 'slategrey', 'snow', 'springgreen', 'steelblue', 'tan', 'teal', 'thistle', 'tomato', 'turquoise', 'violet', 'wheat', 'white', 'whitesmoke', 'yellow', 'yellowgreen')\\n GROUP BY p_color, s_nation\\n ORDER BY SUM(lo_quantity) DESC;\\n \""}, "time": 1717680554.546965} \ No newline at end of file diff --git a/src/cache/joblib/querying/query/func_code.py b/src/cache/joblib/querying/query/func_code.py index e16aed0..bd2e0f4 100644 --- a/src/cache/joblib/querying/query/func_code.py +++ b/src/cache/joblib/querying/query/func_code.py @@ -1,8 +1,10 @@ -# first line: 29 +# first line: 34 @memory.cache # persistent memoïzation def query(q: str) -> list[tuple]: """Execute a given query and reture the result in a python list[tuple].""" - if VERBOSE: print(f'sending query : {q}') + if VERBOSE: + print(f'sending query : {q}') res = CUR.execute(str(q)) - if VERBOSE: print("got response", res) + if VERBOSE: + print("got response", res) return res.fetchall() diff --git a/src/concentration_test.py b/src/concentration_test.py index e69de29..96539a0 100644 --- a/src/concentration_test.py +++ b/src/concentration_test.py @@ -0,0 +1,66 @@ +import matplotlib.pyplot as plt +import numpy as np + +from tprint import tprint + +import orderankings as odrk +import querying as qry +import kemeny_young as ky + +from config import CONFIG as CFG, DATABASE_CFG + +######################## YAML CONFIG (src/config.yaml) ######################### + +DATABASE_NAME = CFG["database_name"] + +VERBOSE = CFG["verbose"]["concentration_test"] + +HYPOTHESIS_RANKING = odrk.ranking_from_ordering( + DATABASE_CFG["hypothesis_ordering"]) + +#################### CONCENTRATION TESTS ON RANDOM QUERIES ##################### + + + + +def rankings_loss(hypothesis_ranking, rankings: list[list[int]]) -> float: + """Return the loss for the distance between the hypothesis and the rankings. + It is the kendall-tau distance between the hypothesis, and the kemeny-young + winner of the rankings.""" + tau, agg_ranking = ky.rank_aggregation(rankings) + if VERBOSE: + print("rank aggregation fit (τ distance to each aggregated ranking) :", + tau) + print(hypothesis_ranking, agg_ranking) + return ky.kendall_tau_dist(hypothesis_ranking, agg_ranking) + + +def loss_of_random_query(hypothesis_ranking) -> float: + query = qry.random_query() + rankings = qry.rankings_from_table(query) + loss = rankings_loss(hypothesis_ranking, rankings) + if VERBOSE: + print("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━") + print("hypothesis ranking :") + print(hypothesis_ranking) + print("rankings :") + print(rankings) + print("loss :") + print(loss) + return loss + + +def concentration_test(hypothesis_ranking, N: int) -> list[float]: + loss_list = [] + for _ in range(N): + loss = loss_of_random_query(hypothesis_ranking) + loss_list.append(loss) + return loss_list + + +if __name__ == '__main__': + print(concentration_test(HYPOTHESIS_RANKING, 5)) + + + + diff --git a/src/config.py b/src/config.py new file mode 100644 index 0000000..2cdadb9 --- /dev/null +++ b/src/config.py @@ -0,0 +1,29 @@ +""" +This module loads the yaml config from +""" +from yaml import load as yaml_load, Loader as yaml_Loader +from os import environ # access environment variables + +# absolute path to the home of the virtual environment +# doesn't have any trailing "/" +VENV_HOME = environ.get('VIRTUAL_ENV').rstrip('/') + +CONFIG_FILE_NAME = 'config.yaml' + +# absolute path to the yaml config file +CONFIG_FILE_PATH = f"{VENV_HOME}/src/{CONFIG_FILE_NAME}" + +# load the config into the CONFIG variable +with open(CONFIG_FILE_PATH) as config: + CONFIG = yaml_load(config, Loader=yaml_Loader) + +# name of the current database (from the config file) +DATABASE_NAME = CONFIG["database_name"] + +# configuration specific to the current database +DATABASE_CFG = CONFIG["database"][DATABASE_NAME] + +# absolute path to the sqlite database file +DATABASE_FILE = f"{VENV_HOME}/{DATABASE_NAME}_dataset/{DATABASE_NAME}.db" + + diff --git a/src/config.yaml b/src/config.yaml index c5148b0..3aa0edb 100644 --- a/src/config.yaml +++ b/src/config.yaml @@ -2,13 +2,14 @@ # database_name: flight_delay database_name: SSB -dataset_config: +database: SSB: # {{{ - orders_length: 2 + orders_length: 4 # hypothesis_ordering: ['bisque', 'aquamarine'] - hypothesis_ordering: ['bisque', 'blue'] + # hypothesis_ordering: ['azure', 'blue'] + hypothesis_ordering: ['azure', 'bisque', 'black', 'aquamarine'] # hypothesis_ordering: [30, 18] # hypothesis_ordering: [2, 32] @@ -18,7 +19,9 @@ dataset_config: # authorized_parameter_values: !!python/object/apply:builtins.range [0, 50] parameter: p_color - authorized_parameter_values: !!python/tuple ['aliceblue', 'antiquewhite', 'aqua', 'aquamarine', 'azure', 'beige', 'bisque', 'black', 'blanchedalmond', 'blue', 'blueviolet', 'brown', 'burlywood', 'cadetblue', 'chartreuse', 'chocolate', 'coral', 'cornflowerblue', 'cornsilk', 'crimson', 'cyan', 'darkblue', 'darkcyan', 'darkgoldenrod', 'darkgray', 'darkgreen', 'darkgrey', 'darkkhaki', 'darkmagenta', 'darkolivegreen', 'darkorange', 'darkorchid', 'darkred', 'darksalmon', 'darkseagreen', 'darkslateblue', 'darkslategray', 'darkslategrey', 'darkturquoise', 'darkviolet', 'deeppink', 'deepskyblue', 'dimgray', 'dimgrey', 'dodgerblue', 'firebrick', 'floralwhite', 'forestgreen', 'fuchsia', 'gainsboro', 'ghostwhite', 'gold', 'goldenrod', 'gray', 'green', 'greenyellow', 'grey', 'honeydew', 'hotpink', 'indianred', 'indigo', 'ivory', 'khaki', 'lavender', 'lavenderblush', 'lawngreen', 'lemonchiffon', 'lightblue', 'lightcoral', 'lightcyan', 'lightgoldenrodyellow', 'lightgray', 'lightgreen', 'lightgrey', 'lightpink', 'lightsalmon', 'lightseagreen', 'lightskyblue', 'lightslategray', 'lightslategrey', 'lightsteelblue', 'lightyellow', 'lime', 'limegreen', 'linen', 'magenta', 'maroon', 'mediumaquamarine', 'mediumblue', 'mediumorchid', 'mediumpurple', 'mediumseagreen', 'mediumslateblue', 'mediumspringgreen', 'mediumturquoise', 'mediumvioletred', 'midnightblue', 'mintcream', 'mistyrose', 'moccasin', 'navajowhite', 'navy', 'oldlace', 'olive', 'olivedrab', 'orange', 'orangered', 'orchid', 'palegoldenrod', 'palegreen', 'paleturquoise', 'palevioletred', 'papayawhip', 'peachpuff', 'peru', 'pink', 'plum', 'powderblue', 'purple', 'rebeccapurple', 'red', 'rosybrown', 'royalblue', 'saddlebrown', 'salmon', 'sandybrown', 'seagreen', 'seashell', 'sienna', 'silver', 'skyblue', 'slateblue', 'slategray', 'slategrey', 'snow', 'springgreen', 'steelblue', 'tan', 'teal', 'thistle', 'tomato', 'turquoise', 'violet', 'wheat', 'white', 'whitesmoke', 'yellow', 'yellowgreen'] + # authorized_parameter_values: !!python/tuple ['aliceblue', 'antiquewhite', 'aqua', 'aquamarine', 'azure', 'beige', 'bisque', 'black', 'blanchedalmond', 'blue', 'blueviolet', 'brown', 'burlywood', 'cadetblue', 'chartreuse', 'chocolate', 'coral', 'cornflowerblue', 'cornsilk', 'crimson', 'cyan', 'darkblue', 'darkcyan', 'darkgoldenrod', 'darkgray', 'darkgreen', 'darkgrey', 'darkkhaki', 'darkmagenta', 'darkolivegreen', 'darkorange', 'darkorchid', 'darkred', 'darksalmon', 'darkseagreen', 'darkslateblue', 'darkslategray', 'darkslategrey', 'darkturquoise', 'darkviolet', 'deeppink', 'deepskyblue', 'dimgray', 'dimgrey', 'dodgerblue', 'firebrick', 'floralwhite', 'forestgreen', 'fuchsia', 'gainsboro', 'ghostwhite', 'gold', 'goldenrod', 'gray', 'green', 'greenyellow', 'grey', 'honeydew', 'hotpink', 'indianred', 'indigo', 'ivory', 'khaki', 'lavender', 'lavenderblush', 'lawngreen', 'lemonchiffon', 'lightblue', 'lightcoral', 'lightcyan', 'lightgoldenrodyellow', 'lightgray', 'lightgreen', 'lightgrey', 'lightpink', 'lightsalmon', 'lightseagreen', 'lightskyblue', 'lightslategray', 'lightslategrey', 'lightsteelblue', 'lightyellow', 'lime', 'limegreen', 'linen', 'magenta', 'maroon', 'mediumaquamarine', 'mediumblue', 'mediumorchid', 'mediumpurple', 'mediumseagreen', 'mediumslateblue', 'mediumspringgreen', 'mediumturquoise', 'mediumvioletred', 'midnightblue', 'mintcream', 'mistyrose', 'moccasin', 'navajowhite', 'navy', 'oldlace', 'olive', 'olivedrab', 'orange', 'orangered', 'orchid', 'palegoldenrod', 'palegreen', 'paleturquoise', 'palevioletred', 'papayawhip', 'peachpuff', 'peru', 'pink', 'plum', 'powderblue', 'purple', 'rebeccapurple', 'red', 'rosybrown', 'royalblue', 'saddlebrown', 'salmon', 'sandybrown', 'seagreen', 'seashell', 'sienna', 'silver', 'skyblue', 'slateblue', 'slategray', 'slategrey', 'snow', 'springgreen', 'steelblue', 'tan', 'teal', 'thistle', 'tomato', 'turquoise', 'violet', 'wheat', 'white', 'whitesmoke', 'yellow', 'yellowgreen'] + authorized_parameter_values: ['azure', 'bisque', 'black', 'aquamarine'] + # authorized_parameter_values: ['azure', 'blue'] summed_attribute: lo_quantity # summed_attribute: lo_revenue @@ -26,22 +29,22 @@ dataset_config: criterion: ##### customer table - # - "c_region" + - "c_region" - "c_city" - # "c_nation" + - "c_nation" ##### part table - "p_category" - "p_brand" # - "p_mfgr" # - "p_color" - # - "p_type" - # - "p_container" + - "p_type" + - "p_container" ##### supplier table - "s_city" - # "s_nation" - # "s_region" + - "s_nation" + - "s_region" ##### order date # - "D_DATE" @@ -98,6 +101,6 @@ dataset_config: # set which parts of the program should ouput logs verbose: # queries to the database (src/querying.py) - querying: false - concentration_test: false + querying: true + concentration_test: true diff --git a/src/kemeny_young.py b/src/kemeny_young.py index 1c95958..34b4100 100644 --- a/src/kemeny_young.py +++ b/src/kemeny_young.py @@ -5,11 +5,10 @@ rankings, and the kemeny-young rank aggregation method. import numpy as np from numba import jit, njit from itertools import permutations -from tools import combinations_of_2 +from tools import combinations_of_2, Number from tqdm import tqdm from tprint import tprint -Number = int|float # original, unoptimized version, but it's more readable # def kendall_tau_dist(rank_a, rank_b) -> int: @@ -49,7 +48,8 @@ def rank_aggregation(rankings: list[list[int]]) -> tuple[int, tuple[int, ...]]: Args: ranks: A list of the ranks (2D numpy array) to elect from. Returns: - int, list: The minimal sum of distances to ranks, the rank of minimal distance. + int, list: The minimal sum of distances to ranks, the rank of minimal + distance. """ rankings = np.array(rankings) min_dist: int = np.inf @@ -82,17 +82,29 @@ if __name__ == '__main__': # print(rank_aggregation(ranks)) - # print(kendall_tau_dist([1, 2, 3], - # [3, 1, 2])) + rankings = np.argsort(list('abc')), np.argsort(list('bda')) + a, b = rankings[0], rankings[1] + print(a, b) + print(rank_aggregation(rankings)) + print(rank_aggregation([[1, 2, 3], [2, 4, 1]])) - ranks = np.array(list(permutations(range(7)))) - for _ in tqdm(range(10)): - selected_lines = np.random.randint(ranks.shape[0], size=30) - selected = ranks[selected_lines,:] - print(rank_aggregation(selected)) - # tprint(selected) - # print(ranks) - # print(kendalltau_dist(ranks[5], ranks[-1])) - # print(np_kendalltau_dist(ranks[5], ranks[-1])) + orderings = np.array([["salut", "coucou", "bonjour"], + ["coucou", "hello", "bonjour"], + ["hey", "salut", "coucou"], + ["bonjour", "coucou", "hey"]]) + print(rank_aggregation(np.argsort(orderings, axis=1))) + print(rank_aggregation(np.vectorize(hash)(orderings))) + print(np.vectorize(hash)(orderings)) + + + # ranks = np.array(list(permutations(range(7)))) + # for _ in tqdm(range(10)): + # selected_lines = np.random.randint(ranks.shape[0], size=30) + # selected = ranks[selected_lines,:] + # print(rank_aggregation(selected)) + # # tprint(selected) + # # print(ranks) + # # print(kendalltau_dist(ranks[5], ranks[-1])) + # # print(np_kendalltau_dist(ranks[5], ranks[-1])) diff --git a/src/losses.py b/src/losses.py new file mode 100644 index 0000000..3a94358 --- /dev/null +++ b/src/losses.py @@ -0,0 +1,32 @@ +from tools import Number +import orderankings as odrk +import kemeny_young as ky + + +def orderings_average_loss(orderings: list[list[str]], truth: list[str]) -> float:# {{{ + """This loss is the the average of kendall tau distances between the truth + and each ordering.""" + rankings = odrk.rankings_from_orderings(orderings) + true_ranking = odrk.rankings_from_orderings([truth])[0] + return rankings_average_loss(rankings, true_ranking)# }}} + + +def rankings_average_loss(rankings: list[list[int]], truth: list[int]) -> float:# {{{ + distance = sum(ky.kendall_tau_dist(rkng, truth) for rkng in rankings) + length = len(rankings) + # apparently, this is what works for a good normalization + return distance / length + # return distance * 2 / (length * (length - 1))}}} + + +def kmny_dist_loss(orderings: list[list[str]], truth: list[str]) -> Number:# {{{ + """Return the kendall tau distance between the truth and the kemeny-young + aggregation of orderings""" + _, agg_rank = ky.rank_aggregation(odrk.rankings_from_orderings(orderings)) + aggregation = odrk.ordering_from_ranking(agg_rank, truth) + loss = ky.kendall_tau_dist( + odrk.ranking_from_ordering(aggregation), + odrk.ranking_from_ordering(truth)) + return loss + # print(aggregation, HYPOTHESIS_ORDERING, kdl_agg_dist)}}} + diff --git a/src/orderankings.py b/src/orderankings.py index f16e34f..64a5d7b 100644 --- a/src/orderankings.py +++ b/src/orderankings.py @@ -12,10 +12,13 @@ you index to get back the values from the indexes. Rankings are similar to mathematical "permutations". """ import numpy as np -from tprint import tprint -from kemeny_young import rank_aggregation -VERBOSE=False +from kemeny_young import rank_aggregation +from tprint import tprint + +from collections import defaultdict + +VERBOSE = False # def inverse_permutation(permutation: list[int]) -> list[int]: # """Return the inverse of a given permutation.""" @@ -39,8 +42,7 @@ def inverse_permutation(permutation: list[int]) -> list[int]: return inverse - -def get_orderings_from_table(table: np.ndarray, column_index: int =0) -> list: +def get_orderings_from_table(table: np.ndarray, column_index: int = 0) -> list: """Extract a list of orderings from a table coming out of a sql query. This basically means that you extract values of the given column, while keeping order but removing duplicates. @@ -51,13 +53,19 @@ def get_orderings_from_table(table: np.ndarray, column_index: int =0) -> list: extract the orderings from. """ table = np.array(table) - values = table[:,column_index] + values = table[:, column_index] ranking, indexes = np.unique(values, return_index=True) return values[np.sort(indexes)] # distinct ordered values -def get_all_orderings_from_table(table: list[tuple]) -> dict: - orders = dict() +def get_all_orderings_from_table(table: list[list[str]]) -> dict: + """Return a dictionnary mapping a value of the criteria to the order you + get when selecting on this value. + This means you get all orders of a table, where the criteria is in the + second column. + IMPORTANT: this function assumes that values are already sorted + appropriately. If not, the resulting orders won't be correct.""" + orders = defaultdict() for line in table: parameter, criteria, sum_value = line if orders.get(criteria) is None: @@ -73,7 +81,8 @@ def rankings_from_orderings(orderings: list[list[str]]) -> list[list[int]]: matching ordering into alphabetical order. """ orderings = np.array(orderings) - rankings = np.argsort(orderings, axis=1) + # rankings = np.argsort(orderings, axis=1) + rankings = np.vectorize(hash)(orderings) if VERBOSE: print("found rankings :") tprint(rankings) @@ -83,6 +92,7 @@ def rankings_from_orderings(orderings: list[list[str]]) -> list[list[int]]: def ranking_from_ordering(ordering: list[str]) -> list[int]: return rankings_from_orderings([ordering])[0] + def ordering_from_ranking(ranking: list[int], values_to_order: list[str]) -> list[str]: """Get an order of values from a ranking of these values. This is basically the inverse function of *rankings_from_orderings*. @@ -99,25 +109,25 @@ def ordering_from_ranking(ranking: list[int], values_to_order: list[str]) -> lis return np.sort(values_to_order)[inversed_ranking] -# def ordering_from_ranking(ranking: list[int], -# reference_ordering: list[str], -# reference_ranking: list[int]): -# """Get an ordering of values from a ranking, using a reference ordering and -# ranking (the ranking must match the ordering).""" -# # make sure you are using numpy arrays -# ref_ordering = np.array(reference_ordering) -# ref_ranking = np.array(reference_ranking) -# # get back the best order from the best ranking -# ordering = ref_ordering[ref_ranking[[ranking]]][0] -# if VERBOSE: print("best ordering :", ordering) -# return ordering +def ordering_from_ranking(ranking: list[int], + reference_ordering: list[str], + reference_ranking: list[int]): + """Get an ordering of values from a ranking, using a reference ordering and + ranking (the ranking must match the ordering).""" + # make sure you are using numpy arrays + ref_ordering = np.array(reference_ordering) + ref_ranking = np.array(reference_ranking) + # get back the best order from the best ranking + ordering = ref_ordering[ref_ranking[[ranking]]][0] + if VERBOSE: print("best ordering :", ordering) + return ordering + def aggregate_rankings(rankings: list[list[int]]) -> tuple[int, ...]: """Calculate the aggregation of all given rankings, that is the ranking that is the nearest to all given rankings.""" min_dist, best_ranking = rank_aggregation(rankings) - if VERBOSE: print("best ranking :", best_ranking) + if VERBOSE: + print("best ranking :", best_ranking) return best_ranking - - diff --git a/src/querying.py b/src/querying.py index 197dfa2..ba994d6 100644 --- a/src/querying.py +++ b/src/querying.py @@ -1,66 +1,101 @@ import sqlite3 import numpy as np +from random import choice from tprint import tprint from joblib import Memory # for persistent memoïzation from query_generator import * import orderankings as odrk -import kemeny_young as km - -import yaml # to load config file -from os import environ # access environment variables +from config import CONFIG, DATABASE_CFG, VENV_HOME, DATABASE_FILE # persistent memoïzation -memory = Memory("src/cache") +memory = Memory(f"{VENV_HOME}/src/cache") -VENV_PATH = environ.get('VIRTUAL_ENV') +VERBOSE = CONFIG["verbose"]["querying"] -with open(VENV_PATH + "/src/config.yaml") as config_file: - cfg = yaml.load(config_file, Loader=yaml.Loader) - -VERBOSE = cfg["verbose"]["querying"] - -DATABASE_NAME = cfg["database_name"] -if VERBOSE: print("using database", DATABASE_NAME) - - -################################################################################ -# Connexion to sqlite database +######################### Connexion to sqlite database ######################### # initialize database connection -DATABASE_FILE = f"{DATABASE_NAME}_dataset/{DATABASE_NAME}.db" -if VERBOSE: print(f"connecting to {DATABASE_FILE}") +if VERBOSE: + print(f"connecting to {DATABASE_FILE}") + CON = sqlite3.connect(DATABASE_FILE) CUR = CON.cursor() + @memory.cache # persistent memoïzation def query(q: str) -> list[tuple]: """Execute a given query and reture the result in a python list[tuple].""" - if VERBOSE: print(f'sending query : {q}') + if VERBOSE: + print(f'sending query : {q}') res = CUR.execute(str(q)) - if VERBOSE: print("got response", res) + if VERBOSE: + print("got response", res) return res.fetchall() -################################################################################ -# Choice of the right query generator -if DATABASE_NAME == "flight_delay": - QUERY_PARAM_GB_FACTORY = QueryFlightWithParameterGroupedByCriteria -elif DATABASE_NAME == "SSB": - QUERY_PARAM_GB_FACTORY = QuerySSBWithParameterGroupedByCriteria -else: - raise ValueError(f"Unknown database : {DATABASE_NAME}") +##################### Choice of the right query generator ###################### -################################################################################ -# orderings extraction functions + +QUERY_PARAM_GB_CONSTRUCTOR = DATABASE_CFG["query_generator"] + + +######################## orderings extraction functions ######################## + +def random_query() -> list[tuple]: + random_criteria = choice(DATABASE_CFG["criterion"]) + + qg_constructor = DATABASE_CFG["query_generator"] + sql_query = qg_constructor( + parameter=DATABASE_CFG["parameter"], + authorized_parameter_values=DATABASE_CFG["authorized_parameter_values"], + criteria=random_criteria, + summed_attribute=DATABASE_CFG["summed_attribute"]) + + # print the query + if VERBOSE: print("query :", str(sql_query), sep="\n") + + result = query(str(sql_query)) # get result from database + + if VERBOSE: # print the result + print("query result :") + tprint(result) + + return result + + +def filter_correct_length_orderings(orderings: list[tuple], length: int) -> list[tuple]: + """Keep only orders that are of the specified length that means removing + too short ones, and slicing too long ones.""" + correct_length_orderings = np.array( + [ordrng[:length] for ordrng in orderings if len(ordrng) >= length] + ) + + if VERBOSE: + print(f"found {len(correct_length_orderings)} orderings :") + # print(correct_length_orderings) + tprint(correct_length_orderings) + return correct_length_orderings + + +def rankings_from_table(query_result: list[tuple]): + orderings_dict = odrk.get_all_orderings_from_table(query_result) + orderings = orderings_dict.values() + orderings = filter_correct_length_orderings( + orderings, + DATABASE_CFG["orders_length"]) + if VERBOSE: + print(orderings) + rankings = odrk.rankings_from_orderings(orderings) + return rankings @memory.cache # persistent memoïzation def find_orderings(parameter: str, summed_attribute: str, criterion: tuple[str, ...], length: int, - authorized_parameter_values: tuple[str, ...] | None =None + authorized_parameter_values: tuple[str, ...] | None = None ) -> list[list[str]]: """Gather the list of every ordering returned by queries using given values of parameter, summed_attribute, and all given values of criterion. @@ -73,11 +108,13 @@ def find_orderings(parameter: str, summed_attribute: str, criterion: tuple[str, Returns: list[list]: The list of all found orderings. """ + # instanciate the query generator - qg = QUERY_PARAM_GB_FACTORY(parameter=parameter, - authorized_parameter_values=authorized_parameter_values, - summed_attribute=summed_attribute, - criteria=None) + qg = DATABASE_CFG["query_generator"]( + parameter=parameter, + authorized_parameter_values=authorized_parameter_values, + summed_attribute=summed_attribute, + criteria=None) # ensemble de tous les ordres trouvés # la clef est la valeur dans la colonne criteria @@ -95,18 +132,6 @@ def find_orderings(parameter: str, summed_attribute: str, criterion: tuple[str, # update the global list of all found orders orderings.extend(table_orders.values()) - # keep only orders that are of the specified length - # that means removing too short ones, and slicing too long ones - correct_length_orderings = np.array( - [ordrng[:length] for ordrng in orderings if len(ordrng) >= length] - ) - - if VERBOSE: - print(f"found {len(correct_length_orderings)} orderings :") - print(correct_length_orderings) - # tprint(correct_length_orderings) + correct_length_orderings = filter_correct_length_orderings(orderings, length) return correct_length_orderings - - - diff --git a/src/tools.py b/src/tools.py index 9757f8f..541cfb4 100644 --- a/src/tools.py +++ b/src/tools.py @@ -2,6 +2,8 @@ import numpy as np from numba import jit from fastcache import lru_cache +Number = int | float + # @lru_cache(maxsize=16) def combinations_of_2(size: int): """Returns an array of size n*2, containing every pair of two integers