From 7eae62f912a8feec87aa909bcc204263bfd7990a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Barth=C3=A9l=C3=A9my=20von=20Haller?= Date: Fri, 29 May 2020 08:58:07 +0200 Subject: [PATCH] Only clean the new objects (QC-339) (#409) --- Framework/script/RepoCleaner/Ccdb.py | 10 +++-- Framework/script/RepoCleaner/repoCleaner.py | 41 +++++++++++++++++++-- 2 files changed, 44 insertions(+), 7 deletions(-) diff --git a/Framework/script/RepoCleaner/Ccdb.py b/Framework/script/RepoCleaner/Ccdb.py index 28d7a5cf40c59..027c1d958b225 100644 --- a/Framework/script/RepoCleaner/Ccdb.py +++ b/Framework/script/RepoCleaner/Ccdb.py @@ -51,15 +51,17 @@ def __init__(self, url): logging.info(f"Instantiate CCDB at {url}") self.url = url - def getObjectsList(self) -> List[str]: + def getObjectsList(self, added_since: int = 0) -> List[str]: ''' - Get the full list of objects in the CCDB. - + Get the full list of objects in the CCDB that have been created since added_since. + + :param added_since: if specified, only return objects added since this timestamp in epoch milliseconds. :return A list of strings, each containing a path to an object in the CCDB. ''' + logging.debug(f"added_since : {added_since}") url_for_all_obj = self.url + '/latest/.*' logging.debug(f"Ccdb::getObjectsList -> {url_for_all_obj}") - headers = {'Accept':'application/json'} + headers = {'Accept':'application/json', 'If-Not-Before':str(added_since)} r = requests.get(url_for_all_obj, headers=headers) r.raise_for_status() try: diff --git a/Framework/script/RepoCleaner/repoCleaner.py b/Framework/script/RepoCleaner/repoCleaner.py index f82e968bc9ff0..7f95d251ea83c 100755 --- a/Framework/script/RepoCleaner/repoCleaner.py +++ b/Framework/script/RepoCleaner/repoCleaner.py @@ -22,12 +22,13 @@ import argparse import logging import requests -import os import re from typing import List +import tempfile import dryable import yaml +import time from Ccdb import Ccdb @@ -125,7 +126,7 @@ def findMatchingRule(rules, object_path): if object_path == None: logging.error(f"findMatchingRule: object_path is None") - return None + return None for rule in rules: pattern = re.compile(rule.object_path) @@ -136,6 +137,38 @@ def findMatchingRule(rules, object_path): logging.debug(" No rule found, skipping.") return None +filepath = tempfile.gettempdir() + "/repoCleaner.txt" +currentTimeStamp = time.time_ns() // 1000000 + +def getTimestampLastExecution(): + """ + Returns the timestamp of the last execution. + It is stored in a file in $TMP/repoCleaner.txt. + :return: the timestampe of the last execution or 0 if it cannot find it. + """ + try: + f = open(filepath, "r") + except IOError as e: + logging.info(f"File {filepath} not readable, we return 0 as timestamp.") + return 0 + timestamp = f.read() + logging.info(f"Timestamp retrieved from {filepath}: {timestamp}") + f.close() + return timestamp + +def storeSavedTimestamp(): + """ + Store the timestamp we saved at the beginning of the execution of this script. + """ + try: + f = open(filepath, "w+") + except IOError: + logging.error(f"Could not write the saved timestamp to {filepath}") + f.write(str(currentTimeStamp)) + logging.info(f"Stored timestamp {currentTimeStamp} in {filepath}") + f.close() + + # **************** # We start here ! # **************** @@ -158,10 +191,11 @@ def main(): # Get list of objects from CCDB ccdb = Ccdb(ccdb_url) - paths = ccdb.getObjectsList() + paths = ccdb.getObjectsList(getTimestampLastExecution()) if args.only_path != '': paths = [item for item in paths if item.startswith(args.only_path)] logging.debug(paths) + logging.debug(len(paths)) # For each object call the first matching rule logging.info("Loop through the objects and apply first matching rule.") @@ -178,6 +212,7 @@ def main(): logging.info(f"{rule.policy} applied on {object_path}: {stats}") logging.info(f" *** DONE *** (total deleted: {ccdb.counter_deleted}, total updated: {ccdb.counter_validity_updated})") + storeSavedTimestamp() if __name__ == "__main__": # to be able to run the test code above when not imported. main()