From ace7039a6a82cebcd4026865d4aa5e60a8f5eccf Mon Sep 17 00:00:00 2001 From: richardzilincikPantheon Date: Tue, 13 Jun 2023 12:47:15 +0200 Subject: [PATCH] chore: elasticsearch -> opensearch --- .github/workflows/tests.yml | 11 +- .gitignore | 2 +- .vscode/launch.json | 2 +- Dockerfile | 2 +- api/globalConfig.py | 12 +- api/my_flask.py | 6 +- api/views/README.md | 2 +- api/views/admin.py | 6 +- api/views/health_check.py | 32 +-- api/views/redis_search.py | 2 +- api/views/yang_search/grep_search.py | 14 +- .../{elk_search.py => opensearch_query.py} | 37 ++-- api/views/yang_search/yang_search.py | 71 +++--- crontab | 4 +- documentation/source/index.html.md | 12 +- jobs/celery.py | 6 +- .../README.md | 12 +- .../__init__.py | 0 .../build_yindex.py | 22 +- .../create_indices.py | 20 +- .../json/completion.json | 0 .../json/draft_search.json | 0 .../json/initialize_autocomplete_index.json | 0 .../json/initialize_drafts_index.json | 0 .../json/initialize_modules_index.json | 0 .../json/initialize_test_index.json | 0 .../json/initialize_test_search_index.json | 0 .../json/initialize_yindex_index.json | 2 +- .../json/module_search.json | 0 .../json/show_node.json | 0 .../json/sorted_name_rev_query.json | 0 .../models/__init__.py | 0 .../models/index_build.py | 0 .../models/keywords_names.py | 0 .../models/opensearch_indices.py | 2 +- .../opensearch_manager.py | 203 +++++++++--------- .../opensearch_snapshots_manager.py | 56 ++--- .../process-drafts.py | 20 +- .../process_changed_mods.py | 20 +- .../pyang_plugin/README.md | 6 +- .../pyang_plugin/__init__.py | 0 .../pyang_plugin/json_tree.py | 0 .../yang_catalog_index_opensearch.py | 0 .../tests/resources/opensearch_test_data.json | 0 .../tests/test_opensearch_manager.py | 94 ++++---- parseAndPopulate/README.md | 4 +- .../modulesComplicatedAlgorithms.py | 2 +- parseAndPopulate/populate.py | 2 +- recovery/README.md | 20 +- recovery/{elk_fill.py => opensearch_fill.py} | 4 +- ...elk_recovery.py => opensearch_recovery.py} | 16 +- requirements.txt | 2 +- sandbox/compare_databases.py | 40 ++-- sandbox/reindex.py | 10 +- ..._aliases.py => swap_opensearch_aliases.py} | 10 +- ....py => update_opensearch_index_mapping.py} | 11 +- tests/resources/test.conf | 10 +- tests/test_celery_tasks.py | 2 +- tests/test_search.py | 40 ++-- utility/log.py | 2 +- ...asticsearch_util.py => opensearch_util.py} | 14 +- utility/remove_unused.py | 16 +- utility/script_config_dict.py | 12 +- 63 files changed, 466 insertions(+), 429 deletions(-) rename api/views/yang_search/{elk_search.py => opensearch_query.py} (90%) rename {elasticsearchIndexing => opensearch_indexing}/README.md (71%) rename {elasticsearchIndexing => opensearch_indexing}/__init__.py (100%) rename {elasticsearchIndexing => opensearch_indexing}/build_yindex.py (84%) rename {elasticsearchIndexing => opensearch_indexing}/create_indices.py (75%) rename {elasticsearchIndexing => opensearch_indexing}/json/completion.json (100%) rename {elasticsearchIndexing => opensearch_indexing}/json/draft_search.json (100%) rename {elasticsearchIndexing => opensearch_indexing}/json/initialize_autocomplete_index.json (100%) rename {elasticsearchIndexing => opensearch_indexing}/json/initialize_drafts_index.json (100%) rename {elasticsearchIndexing => opensearch_indexing}/json/initialize_modules_index.json (100%) rename {elasticsearchIndexing => opensearch_indexing}/json/initialize_test_index.json (100%) rename {elasticsearchIndexing => opensearch_indexing}/json/initialize_test_search_index.json (100%) rename {elasticsearchIndexing => opensearch_indexing}/json/initialize_yindex_index.json (98%) rename {elasticsearchIndexing => opensearch_indexing}/json/module_search.json (100%) rename {elasticsearchIndexing => opensearch_indexing}/json/show_node.json (100%) rename {elasticsearchIndexing => opensearch_indexing}/json/sorted_name_rev_query.json (100%) rename {elasticsearchIndexing => opensearch_indexing}/models/__init__.py (100%) rename {elasticsearchIndexing => opensearch_indexing}/models/index_build.py (100%) rename {elasticsearchIndexing => opensearch_indexing}/models/keywords_names.py (100%) rename elasticsearchIndexing/models/es_indices.py => opensearch_indexing/models/opensearch_indices.py (86%) rename elasticsearchIndexing/es_manager.py => opensearch_indexing/opensearch_manager.py (55%) rename elasticsearchIndexing/es_snapshots_manager.py => opensearch_indexing/opensearch_snapshots_manager.py (56%) rename {elasticsearchIndexing => opensearch_indexing}/process-drafts.py (80%) rename {elasticsearchIndexing => opensearch_indexing}/process_changed_mods.py (91%) rename {elasticsearchIndexing => opensearch_indexing}/pyang_plugin/README.md (58%) rename {elasticsearchIndexing => opensearch_indexing}/pyang_plugin/__init__.py (100%) rename {elasticsearchIndexing => opensearch_indexing}/pyang_plugin/json_tree.py (100%) rename elasticsearchIndexing/pyang_plugin/yang_catalog_index_es.py => opensearch_indexing/pyang_plugin/yang_catalog_index_opensearch.py (100%) rename elasticsearchIndexing/tests/resources/es_test_data.json => opensearch_indexing/tests/resources/opensearch_test_data.json (100%) rename elasticsearchIndexing/tests/test_es_manager.py => opensearch_indexing/tests/test_opensearch_manager.py (58%) rename recovery/{elk_fill.py => opensearch_fill.py} (95%) rename recovery/{elk_recovery.py => opensearch_recovery.py} (76%) rename sandbox/{swap_es_aliases.py => swap_opensearch_aliases.py} (73%) rename sandbox/{update_es_index_mapping.py => update_opensearch_index_mapping.py} (81%) rename utility/{elasticsearch_util.py => opensearch_util.py} (94%) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 9403cfe2..ef58a3b9 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -16,7 +16,7 @@ jobs: matrix: python-version: ['3.10'] redis-version: [6] - elasticsearch-version: [7.10.1] + opensearch-version: [2.7.0] max-parallel: 1 steps: @@ -65,13 +65,10 @@ jobs: sudo sysctl -w vm.swappiness=1 sudo sysctl -w vm.max_map_count=262144 - - name: Setup Elasticsearch ${{ matrix.elasticsearch-version }} - uses: getong/elasticsearch-action@v1.2 + - name: Setup OpenSearch ${{ matrix.opensearch-version }} + uses: ankane/setup-opensearch@v1 with: - elasticsearch version: ${{ matrix.elasticsearch-version }} - host port: 9200 - container port: 9200 - discovery type: 'single-node' + opensearch-version: ${{ matrix.elasticsearch-version }} - name: Prepare environment run: | diff --git a/.gitignore b/.gitignore index a1ac1026..7f2c7cb4 100644 --- a/.gitignore +++ b/.gitignore @@ -10,7 +10,7 @@ api/standard **/*.html !statistics/template/stats.html !api/template/*.html -!elasticsearchIndexing/json/**/* +!opensearch_indexing/json/**/* !api/views/yang_search/json/* /venv yang.egg-info diff --git a/.vscode/launch.json b/.vscode/launch.json index fac29de8..aeb03f65 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -125,7 +125,7 @@ "name": "process_changed_mods", "type": "python", "request": "launch", - "program": "${workspaceFolder}/elasticsearchIndexing/process_changed_mods.py", + "program": "${workspaceFolder}/opensearch_indexing/process_changed_mods.py", "console": "integratedTerminal", "cwd": "${workspaceFolder}", "justMyCode": false diff --git a/Dockerfile b/Dockerfile index af91340c..31baf492 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,7 +12,7 @@ ENV LANG=C.UTF-8 LC_ALL=C.UTF-8 PYTHONUNBUFFERED=1 ENV VIRTUAL_ENV=/backend ENV BACKEND=/backend -ENV PYANG_PLUGINPATH="$BACKEND/elasticsearchIndexing/pyang_plugin" +ENV PYANG_PLUGINPATH="$BACKEND/opensearch_indexing/pyang_plugin" #Install Cron RUN apt-get -y update && apt-get -y install libv8-dev cron gunicorn logrotate curl mydumper rsync vim pcregrep diff --git a/api/globalConfig.py b/api/globalConfig.py index d7231af8..0df7b4e5 100644 --- a/api/globalConfig.py +++ b/api/globalConfig.py @@ -44,7 +44,9 @@ def load_config(self): self.credentials = ( config.get('Secrets-Section', 'confd-credentials', fallback='admin admin').strip('"').split(' ') ) - self.elk_credentials = config.get('Secrets-Section', 'elk-secret', fallback='').strip('"').split(' ') + self.opensearch_credentials = ( + config.get('Secrets-Section', 'opensearch-secret', fallback='').strip('"').split(' ') + ) self.cache_dir = config.get('Directory-Section', 'cache', fallback='tests/resources/cache') self.save_requests = config.get('Directory-Section', 'save-requests', fallback='/var/yang/test-requests') self.save_file_dir = config.get('Directory-Section', 'save-file-dir', fallback='/var/yang/all_modules') @@ -69,9 +71,9 @@ def load_config(self): 'yang-models-dir', fallback='tests/resources/yangmodels/yang', ) - self.es_host = config.get('DB-Section', 'es-host', fallback='localhost') - self.es_port = config.get('DB-Section', 'es-port', fallback='9200') - self.es_aws = config.get('DB-Section', 'es-aws', fallback=False) + self.opensearch_host = config.get('DB-Section', 'opensearch-host', fallback='localhost') + self.opensearch_port = config.get('DB-Section', 'opensearch-port', fallback='9200') + self.opensearch_aws = config.get('DB-Section', 'opensearch-aws', fallback=False) self.redis_host = config.get('DB-Section', 'redis-host', fallback='localhost') self.redis_port = config.get('DB-Section', 'redis-port', fallback='6379') self.json_ytree = config.get('Directory-Section', 'json-ytree', fallback='/var/yang/ytrees') @@ -81,7 +83,7 @@ def load_config(self): fallback='http://localhost/api', ) self.domain_prefix = config.get('Web-Section', 'domain-prefix', fallback='http://localhost') - self.es_aws = self.es_aws == 'True' + self.opensearch_aws = self.opensearch_aws == 'True' self.LOGGER = log.get_logger('api.yc_gc', '{}/yang.log'.format(self.logs_dir)) diff --git a/api/my_flask.py b/api/my_flask.py index baaf23f4..ddf3b8f9 100644 --- a/api/my_flask.py +++ b/api/my_flask.py @@ -20,8 +20,8 @@ import api.authentication.auth as auth from api.matomo_tracker import MatomoTrackerData, get_headers_dict, record_analytic -from elasticsearchIndexing.es_manager import ESManager from jobs.celery import celery_app +from opensearch_indexing.opensearch_manager import OpenSearchManager from redisConnections.redis_user_notifications_connection import RedisUserNotificationsConnection from redisConnections.redis_users_connection import RedisUsersConnection from redisConnections.redisConnection import RedisConnection @@ -92,9 +92,9 @@ def setup_logger(self): os.chmod(file_name_path, 0o664) def post_config_load(self): - self.config['S-ELK-CREDENTIALS'] = self.config.s_elk_secret.strip('"').split() + self.config['S-OPENSEARCH-CREDENTIALS'] = self.config.s_opensearch_secret.strip('"').split() self.config['S-CONFD-CREDENTIALS'] = self.config.s_confd_credentials.strip('"').split() - self.config['ES-MANAGER'] = ESManager() + self.config['OPENSEARCH-MANAGER'] = OpenSearchManager() celery_app.load_config() self.config['CELERY-APP'] = celery_app diff --git a/api/views/README.md b/api/views/README.md index 71641c25..360de408 100644 --- a/api/views/README.md +++ b/api/views/README.md @@ -42,7 +42,7 @@ ### [health_check.py](https://github.com/YangCatalog/backend/blob/master/api/views/health_check.py) * `/api/admin/healthcheck/services-list - ['GET']` -* `/api/admin/healthcheck/elk - ['GET']` +* `/api/admin/healthcheck/opensearch - ['GET']` * `/api/admin/healthcheck/confd - ['GET']` * `/api/admin/healthcheck/redis - ['GET']` * `/api/admin/healthcheck/nginx - ['GET']` diff --git a/api/views/admin.py b/api/views/admin.py index 8317d31f..c3cc8a6f 100644 --- a/api/views/admin.py +++ b/api/views/admin.py @@ -569,8 +569,8 @@ def get_script_names(): 'pull_local', 'statistics', 'recovery', - 'elk_recovery', - 'elk_fill', + 'opensearch_recovery', + 'opensearch_fill', 'redis_users_recovery', 'resolve_expiration', 'reviseSemver', @@ -592,7 +592,7 @@ def get_module_name(script_name): return 'ietfYangDraftPull' elif script_name in ('ietf_push', 'iana_push'): return 'automatic_push' - elif script_name in ('recovery', 'elk_recovery', 'elk_fill', 'redis_users_recovery'): + elif script_name in ('recovery', 'opensearch_recovery', 'opensearch_fill', 'redis_users_recovery'): return 'recovery' elif script_name == 'statistics': return 'statistic' diff --git a/api/views/health_check.py b/api/views/health_check.py index c3342540..57653801 100644 --- a/api/views/health_check.py +++ b/api/views/health_check.py @@ -57,7 +57,7 @@ def set_config(): def get_services_list(): response_body = [] service_endpoints = [ - 'elk', + 'opensearch', 'confd-admin', 'redis-admin', 'yang-search-admin', @@ -68,7 +68,7 @@ def get_services_list(): 'celery', ] service_names = [ - 'Elasticsearch', + 'OpenSearch', 'ConfD', 'Redis', 'YANG search', @@ -84,24 +84,24 @@ def get_services_list(): return make_response(jsonify(response_body), 200) -@bp.route('/elk', methods=['GET']) -def health_check_elk(): - service_name = 'Elasticsearch' +@bp.route('/opensearch', methods=['GET']) +def health_check_opensearch(): + service_name = 'OpenSearch' try: - # try to ping Elasticsearch - if app_config.es_manager.ping(): - bp.logger.info('Successfully connected to Elasticsearch') + # try to ping OpenSearch + if app_config.opensearch_manager.ping(): + bp.logger.info('Successfully connected to OpenSearch') # get health of cluster - health = app_config.es_manager.cluster_health() + health = app_config.opensearch_manager.cluster_health() health_status = health.get('status') bp.logger.info('Health status of cluster: {}'.format(health_status)) # get list of indices - indices = app_config.es_manager.get_indices() + indices = app_config.opensearch_manager.get_indices() if len(indices) > 0: return make_response( jsonify( { - 'info': 'Elasticsearch is running', + 'info': 'OpenSearch is running', 'status': 'running', 'message': 'Cluster status: {}'.format(health_status), }, @@ -112,7 +112,7 @@ def health_check_elk(): return make_response( jsonify( { - 'info': 'Elasticsearch is running', + 'info': 'OpenSearch is running', 'status': 'problem', 'message': 'Cluster status: {} Number of indices: {}'.format(health_status, len(indices)), }, @@ -120,19 +120,19 @@ def health_check_elk(): 200, ) else: - bp.logger.info('Cannot connect to Elasticsearch database') + bp.logger.info('Cannot connect to OpenSearch database') return make_response( jsonify( { - 'info': 'Not OK - Elasticsearch is not running', + 'info': 'Not OK - OpenSearch is not running', 'status': 'down', - 'error': 'Cannot ping Elasticsearch', + 'error': 'Cannot ping OpenSearch', }, ), 200, ) except Exception as err: - bp.logger.error('Cannot connect to Elasticsearch database. Error: {}'.format(err)) + bp.logger.error('Cannot connect to OpenSearch database. Error: {}'.format(err)) return make_response(jsonify(error_response(service_name, err)), 200) diff --git a/api/views/redis_search.py b/api/views/redis_search.py index 479894de..7fd4b87a 100644 --- a/api/views/redis_search.py +++ b/api/views/redis_search.py @@ -506,7 +506,7 @@ def filter_using_api(res_row, payload): reject = True break else: - # Module key has different value then serached for then reject + # Module key has different value then searched for then reject values = value.split(',') reject = True for val in values: diff --git a/api/views/yang_search/grep_search.py b/api/views/yang_search/grep_search.py index c142510d..3af05e14 100644 --- a/api/views/yang_search/grep_search.py +++ b/api/views/yang_search/grep_search.py @@ -7,8 +7,8 @@ from api.cache.api_cache import cache from api.views.yang_search.constants import GREP_SEARCH_CACHE_TIMEOUT -from elasticsearchIndexing.es_manager import ESManager -from elasticsearchIndexing.models.es_indices import ESIndices +from opensearch_indexing.models.opensearch_indices import OpenSearchIndices +from opensearch_indexing.opensearch_manager import OpenSearchManager from utility import log from utility.create_config import create_config from utility.staticVariables import ORGANIZATIONS @@ -23,8 +23,8 @@ class GrepSearch: def __init__( self, config: ConfigParser = create_config(), - es_manager: ESManager = ESManager(), - modules_es_index: ESIndices = ESIndices.AUTOCOMPLETE, + opensearch_manager: OpenSearchManager = OpenSearchManager(), + modules_es_index: OpenSearchIndices = OpenSearchIndices.AUTOCOMPLETE, starting_cursor: int = 0, ): self.previous_cursor = 0 @@ -40,10 +40,10 @@ def __init__( self.query = json.load(query_file) log_file_path = os.path.join(config.get('Directory-Section', 'logs'), 'yang.log') - self.logger = log.get_logger('yc-elasticsearch', log_file_path) + self.logger = log.get_logger('yc-opensearch', log_file_path) self.modules_es_index = modules_es_index - self._es_manager = es_manager + self._opensearch_manager = opensearch_manager self._searched_modules_amount = 0 def search( @@ -250,7 +250,7 @@ def _construct_db_query( def _get_results_from_db(self, already_found_results: t.Optional[list[dict]] = None) -> list[dict]: response = already_found_results or [] - es_response = self._es_manager.generic_search( + es_response = self._opensearch_manager.generic_search( index=self.modules_es_index, query=self.query, response_size=None, diff --git a/api/views/yang_search/elk_search.py b/api/views/yang_search/opensearch_query.py similarity index 90% rename from api/views/yang_search/elk_search.py rename to api/views/yang_search/opensearch_query.py index 39fa8c6e..66e2398b 100644 --- a/api/views/yang_search/elk_search.py +++ b/api/views/yang_search/opensearch_query.py @@ -20,12 +20,12 @@ import json import os -from elasticsearch import ConnectionTimeout +from opensearchpy import ConnectionTimeout import api.views.yang_search.search_params as sp from api.views.yang_search.response_row import ResponseRow -from elasticsearchIndexing.es_manager import ESManager -from elasticsearchIndexing.models.es_indices import ESIndices +from opensearch_indexing.models.opensearch_indices import OpenSearchIndices +from opensearch_indexing.opensearch_manager import OpenSearchManager from redisConnections.redisConnection import RedisConnection from utility import log from utility.staticVariables import OUTPUT_COLUMNS @@ -34,9 +34,9 @@ RESERVED_CHARACTERS = ['"', '<'] -class ElkSearch: +class OpenSearchQuery: """ - Serves distinctly for yangcatalog search. This class will create a query that is sent to elasticsearch + Serves distinctly for yangcatalog search. This class will create a query that is sent to OpenSearch which returns output that needs to be processed. We process this into a list which is displayed as rows in a grid of yangcatalog search. """ @@ -44,18 +44,18 @@ class ElkSearch: def __init__( self, logs_dir: str, - es_manager: ESManager, + opensearch_manager: OpenSearchManager, redis_connection: RedisConnection, search_params: sp.SearchParams, ) -> None: """ - Initialization of search under Elasticsearch engine. We need to prepare a query - that will be used to search in Elasticsearch. + Initialization of search under OpenSearch engine. We need to prepare a query + that will be used to search in OpenSearch. Arguments: :param searched_term (str) String that we are searching for :param logs_dir (str) Directory to log files - :param es_manager (ESManager) Elasticsearch manager + :param opensearch_manager (OpenSearchManager) OpenSearch manager :param redis_connection (RedisConnection) Redis connection to modules db (db=1) :param search_params (SearchParams) Contains search parameters """ @@ -63,7 +63,7 @@ def __init__( search_query_path = os.path.join(os.environ['BACKEND'], 'api/views/yang_search/json/search.json') with open(search_query_path, encoding='utf-8') as reader: self.query: dict = json.load(reader) - self._es_manager = es_manager + self._opensearch_manager = opensearch_manager self._redis_connection = redis_connection self._latest_revisions = {} self._remove_columns = list(set(OUTPUT_COLUMNS) - set(self._search_params.output_columns)) @@ -71,7 +71,8 @@ def __init__( self._missing_modules = [] self.timeout = False log_file_path = os.path.join(logs_dir, 'yang.log') - self.logger = log.get_logger('yc-elasticsearch', log_file_path) + self.logger = log.get_logger('yc-opensearch', log_file_path) + self._construct_query() def alerts(self): """ @@ -83,9 +84,9 @@ def alerts(self): alerts.append(f'Module {missing} metadata does not exist in yangcatalog') return alerts - def construct_query(self): + def _construct_query(self): """ - Create a json query that is then sent to Elasticsearch. + Create a json query that is then sent to OpenSearch. Changes being made while creating query: - statement is a list of schema types. It is one or more or all of ['typedef', 'grouping', 'feature', 'identity', 'extension', 'rpc', 'container', 'list', 'leaf-list', 'leaf', 'notification', 'action'] @@ -182,13 +183,13 @@ def _retrieve_results(self, latest_revisions: bool) -> list[dict]: query.pop('aggs') try: self.logger.debug(json.dumps(query, indent=2)) - response = self._es_manager.generic_search( - ESIndices.YINDEX, + response = self._opensearch_manager.generic_search( + OpenSearchIndices.YINDEX, query, response_size=RESPONSE_SIZE, ) except ConnectionTimeout: - self.logger.exception('Error while searching in Elasticsearch') + self.logger.exception('Error while searching in OpenSearch') self.timeout = True return [] hits = response['hits']['hits'] @@ -216,7 +217,7 @@ def _process_hits(self, hits: list) -> list[dict]: module_data = self._redis_connection.get_module(module_key) module_data = json.loads(module_data) if not module_data: - self.logger.error(f'Failed to get module from Redis, but found in Elasticsearch: {module_key}') + self.logger.error(f'Failed to get module from Redis, but found in OpenSearch: {module_key}') reject.add(module_key) self._missing_modules.append(module_key) continue @@ -240,7 +241,7 @@ def _process_hits(self, hits: list) -> list[dict]: self._row_hashes.add(row_hash) response_rows.append(row.output_row) - self.logger.debug(f'ElkSearch finished with length {len(response_rows)}') + self.logger.debug(f'OpenSearch finished with length {len(response_rows)}') return response_rows def _rejects_mibs_or_versions(self, module_data: dict) -> bool: diff --git a/api/views/yang_search/yang_search.py b/api/views/yang_search/yang_search.py index 52eb1d10..41bfa620 100644 --- a/api/views/yang_search/yang_search.py +++ b/api/views/yang_search/yang_search.py @@ -37,10 +37,10 @@ from api.cache.api_cache import cache from api.my_flask import app from api.views.yang_search.constants import GREP_SEARCH_CACHE_TIMEOUT -from api.views.yang_search.elk_search import ElkSearch from api.views.yang_search.grep_search import GrepSearch -from elasticsearchIndexing.models.es_indices import ESIndices -from elasticsearchIndexing.models.keywords_names import KeywordsNames +from api.views.yang_search.opensearch_query import OpenSearchQuery +from opensearch_indexing.models.keywords_names import KeywordsNames +from opensearch_indexing.models.opensearch_indices import OpenSearchIndices from utility.create_config import create_config from utility.staticVariables import MODULE_PROPERTIES_ORDER, OUTPUT_COLUMNS, SCHEMA_TYPES from utility.yangParser import create_context @@ -80,7 +80,11 @@ def grep_search(): cursor = int(value[0]) if (value := query_params.get('cursor')) else 0 config = create_config() try: - grep_search_instance = GrepSearch(config=config, es_manager=app_config.es_manager, starting_cursor=cursor) + grep_search_instance = GrepSearch( + config=config, + opensearch_manager=app_config.opensearch_manager, + starting_cursor=cursor, + ) results = grep_search_instance.search(organizations, search_string, inverted_search, case_sensitive) except ValueError as e: abort(400, description=str(e)) @@ -314,14 +318,18 @@ def search(): schema_types=is_list_in(payload, 'schema-types', SCHEMA_TYPES), output_columns=is_list_in(payload, 'output-columns', OUTPUT_COLUMNS), ) - elk_search = ElkSearch(app_config.d_logs, app_config.es_manager, app.redisConnection, search_params) - elk_search.construct_query() + opensearch_search = OpenSearchQuery( + app_config.d_logs, + app_config.opensearch_manager, + app.redisConnection, + search_params, + ) response = {} - response['rows'], response['max-hits'] = elk_search.search() + response['rows'], response['max-hits'] = opensearch_search.search() if payload.get('sub-search'): response['max-hits'] = False - response['warning'] = elk_search.alerts() - response['timeout'] = elk_search.timeout + response['warning'] = opensearch_search.alerts() + response['timeout'] = opensearch_search.timeout return response @@ -354,12 +362,16 @@ def advanced_search(): output_columns=is_list_in(payload, 'output-columns', OUTPUT_COLUMNS), include_drafts=is_boolean(payload, 'include-drafts', True), ) - elk_search = ElkSearch(app_config.d_logs, app_config.es_manager, app.redisConnection, search_params) - elk_search.construct_query() + opensearch_search = OpenSearchQuery( + app_config.d_logs, + app_config.opensearch_manager, + app.redisConnection, + search_params, + ) response = {} - response['rows'], response['max-hits'] = elk_search.search() - response['warning'] = elk_search.alerts() - response['timeout'] = elk_search.timeout + response['rows'], response['max-hits'] = opensearch_search.search() + response['warning'] = opensearch_search.alerts() + response['timeout'] = opensearch_search.timeout return response @@ -379,11 +391,15 @@ def get_services_list(keyword: str, pattern: str): return make_response(jsonify(result), 200) if keyword == 'organization': - result = app_config.es_manager.autocomplete(ESIndices.AUTOCOMPLETE, KeywordsNames.ORGANIZATION, pattern) + result = app_config.opensearch_manager.autocomplete( + OpenSearchIndices.AUTOCOMPLETE, + KeywordsNames.ORGANIZATION, + pattern, + ) if keyword == 'module': - result = app_config.es_manager.autocomplete(ESIndices.AUTOCOMPLETE, KeywordsNames.NAME, pattern) + result = app_config.opensearch_manager.autocomplete(OpenSearchIndices.AUTOCOMPLETE, KeywordsNames.NAME, pattern) if keyword == 'draft': - result = app_config.es_manager.autocomplete(ESIndices.DRAFTS, KeywordsNames.DRAFT, pattern) + result = app_config.opensearch_manager.autocomplete(OpenSearchIndices.DRAFTS, KeywordsNames.DRAFT, pattern) return make_response(jsonify(result), 200) @@ -428,7 +444,7 @@ def show_node_with_revision(name: str, path: str, revision: t.Optional[str] = No revision = get_latest_module_revision(name) module = {'name': name, 'revision': revision, 'path': path} - hits = app_config.es_manager.get_node(module)['hits']['hits'] + hits = app_config.opensearch_manager.get_node(module)['hits']['hits'] if not hits: abort(404, description=f'Could not find data for {name}@{revision} at {path}') @@ -466,11 +482,11 @@ def module_details(module: str, revision: t.Optional[str] = None, warnings: bool if revision is not None and (len(revision) != 10 or re.match(r'\d{4}[-/]\d{2}[-/]\d{2}', revision) is None): abort(400, description='Revision provided has wrong format - please use "YYYY-MM-DD" format') - elk_response = get_modules_revision_organization(module, None, warnings) + opensearch_response = get_modules_revision_organization(module, None, warnings) - if 'warning' in elk_response: - return elk_response - revisions, organization = elk_response + if 'warning' in opensearch_response: + return opensearch_response + revisions, organization = opensearch_response if len(revisions) == 0: if warnings: return {'warning': f'module {module} does not exists in API'} @@ -523,7 +539,7 @@ def get_yang_catalog_help(): """ revision = get_latest_module_revision('yang-catalog') module = {'name': 'yang-catalog', 'revision': revision} - hits = app_config.es_manager.get_module_by_name_revision(ESIndices.YINDEX, module) + hits = app_config.opensearch_manager.get_module_by_name_revision(OpenSearchIndices.YINDEX, module) module_details_data = {} skip_statement = ['typedef', 'grouping', 'identity'] for hit in hits: @@ -596,10 +612,10 @@ def get_modules_revision_organization(module_name: str, revision: t.Optional[str """ try: if revision is None: - hits = app_config.es_manager.get_sorted_module_revisions(ESIndices.YINDEX, module_name) + hits = app_config.opensearch_manager.get_sorted_module_revisions(OpenSearchIndices.YINDEX, module_name) else: module = {'name': module_name, 'revision': revision} - hits = app_config.es_manager.get_module_by_name_revision(ESIndices.YINDEX, module) + hits = app_config.opensearch_manager.get_module_by_name_revision(OpenSearchIndices.YINDEX, module) organization = hits[0]['_source']['organization'] revisions = [] @@ -630,7 +646,10 @@ def get_latest_module_revision(module_name: str) -> str: :return: latest revision of the module """ try: - es_result = app_config.es_manager.get_sorted_module_revisions(ESIndices.AUTOCOMPLETE, module_name) + es_result = app_config.opensearch_manager.get_sorted_module_revisions( + OpenSearchIndices.AUTOCOMPLETE, + module_name, + ) return es_result[0]['_source']['revision'] except IndexError: bp.logger.exception(f'Failed to get revision for {module_name}') diff --git a/crontab b/crontab index 3f5612a9..5a6dd203 100644 --- a/crontab +++ b/crontab @@ -20,7 +20,7 @@ BACKEND=/backend 5 22 * * * (cd ~ ; source bin/activate ; echo "`date` starting pull_local" >> /var/yang/logs/crons-log.log ; cd ietfYangDraftPull ; python pull_local.py ) 17 18 * * * (cd ~ ; source bin/activate ; echo "`date` starting recovery" >> /var/yang/logs/crons-log.log ; cd recovery ; python recovery.py --save) 30 15 * * * (cd ~ ; source bin/activate ; echo "`date` starting remove_unused" >> /var/yang/logs/crons-log.log ; cd utility ; python remove_unused.py) -*/3 * * * * (cd ~ ; source bin/activate ; cd elasticsearchIndexing ; python process_changed_mods.py) -0 2 */1 * * (cd ~ ; source bin/activate ; cd elasticsearchIndexing ; python process-drafts.py) +*/3 * * * * (cd ~ ; source bin/activate ; cd opensearch_indexing ; python process_changed_mods.py) +0 2 */1 * * (cd ~ ; source bin/activate ; cd opensearch_indexing ; python process-drafts.py) 0 */2 * * * (cd ~ ; source bin/activate ; cd utility ; python confdFullCheck.py) 0 0 1 * * (cd ~ ; source bin/activate ; cd recovery ; python redis_users_recovery.py --save) diff --git a/documentation/source/index.html.md b/documentation/source/index.html.md index 08514e64..13850d47 100644 --- a/documentation/source/index.html.md +++ b/documentation/source/index.html.md @@ -2398,12 +2398,12 @@ curl -X GET -H "Accept: application/json" "https://yangcatalog.org/api/admin/log ```json { "data": [ - "elasticsearch/gc", + "opensearch/gc", "crons-log", "confd/browser", "YANGgenericstats-daily", "confd/netconf", - "elasticsearch/elasticsearch_index_indexing_slowlog", + "opensearch/opensearch_index_indexing_slowlog", "healthcheck" ], "info": "success" @@ -2731,8 +2731,8 @@ curl -X GET -H "Accept: application/json" "https://yangcatalog.org/api/admin/scr "pull_local", "statistics", "recovery", - "elk_recovery", - "elk_fill", + "opensearch_recovery", + "opensearch_fill", "resolve_expiration", "reviseSemver" ], @@ -2964,8 +2964,8 @@ curl -X GET -H "Accept: application/json" -H "Content-type: application/json" ```json [ { - "endpoint": "elk", - "name": "Elasticsearch" + "endpoint": "opensearch", + "name": "OpenSearch" }, { "endpoint": "confd", diff --git a/jobs/celery.py b/jobs/celery.py index e5f123b9..2744a9bc 100644 --- a/jobs/celery.py +++ b/jobs/celery.py @@ -31,7 +31,7 @@ from redisConnections.redisConnection import RedisConnection, key_quote from utility import log from utility.create_config import create_config -from utility.elasticsearch_util import ESIndexingPaths, prepare_for_es_removal, send_for_es_indexing +from utility.opensearch_util import ESIndexingPaths, prepare_for_es_removal, send_for_es_indexing from utility.staticVariables import json_headers @@ -116,7 +116,7 @@ def process_vendor_deletion(params: dict[str, str]): Deleting vendors metadata. Deletes all the modules in the vendor branch of the yang-catalog.yang module on given path. If the module was added by a vendor and it doesn't contain any other implementations it will delete the whole module in the modules branch of the yang-catalog.yang module. - It will also call the indexing script to update Elasticsearch searching. + It will also call the indexing script to update OpenSearch searching. """ data_key = '' redis_vendor_key = '' @@ -235,7 +235,7 @@ def process_module_deletion(modules: list[dict[str, str]]): """ Delete modules. It deletes modules of given path from Redis. This will delete whole module in modules branch of the yang-catalog:yang module. - It will also call the indexing script to update ES. + It will also call the indexing script to update OpenSearch. """ try: all_modules_raw = celery_app.redis_connection.get_all_modules() diff --git a/elasticsearchIndexing/README.md b/opensearch_indexing/README.md similarity index 71% rename from elasticsearchIndexing/README.md rename to opensearch_indexing/README.md index 0033db70..df3e1dfc 100644 --- a/elasticsearchIndexing/README.md +++ b/opensearch_indexing/README.md @@ -1,22 +1,22 @@ # YANG Search Data Maintenance -## [process_changed_mods.py](https://github.com/YangCatalog/backend/blob/master/elasticsearchindexing/process_changed_mods.py) +## [process_changed_mods.py](https://github.com/YangCatalog/backend/blob/master/opensearch_indexing/process_changed_mods.py) Executed every 3 minutes by the cronjob. Takes as optional argument `--config-path` - path to the configuration file. Reads two files: `changes-cache` and `delete-cache` (their actual paths can be found in the configuration file by these names) with new/changed and deleted modules respectively, making `.bak` file backups before truncating them to 0. Then all the deleted modules are being deleted from all indices and the new/changed modules are indexed in the `YINDEX` and `AUTOCOMPLETE` indices -with the help of the [build_yindex.py](https://github.com/YangCatalog/backend/blob/master/elasticsearchindexing/build_yindex.py) module. +with the help of the [build_yindex.py](https://github.com/YangCatalog/backend/blob/master/opensearch_indexing/build_yindex.py) module. **Note:** `changes-cache` and `delete-cache` files are created inside the [populate.py](https://github.com/YangCatalog/backend/blob/master/parseAndPopulate/populate.py) script -## [build_yindex.py](https://github.com/YangCatalog/backend/blob/master/elasticsearchindexing/build_yindex.py) +## [build_yindex.py](https://github.com/YangCatalog/backend/blob/master/opensearch_indexing/build_yindex.py) -Contains functionality to parse a module data using the custom pyang plugin: [yang_catalog_index_es.py](https://github.com/YangCatalog/backend/blob/master/elasticsearchindexing/pyang_plugin/yang_catalog_index_es.py) +Contains functionality to parse a module data using the custom pyang plugin: [yang_catalog_index_opensearch.py](https://github.com/YangCatalog/backend/blob/master/opensearch_indexing/pyang_plugin/yang_catalog_index_opensearch.py) and add the module data in the `YINDEX` and `AUTOCOMPLETE` indices, previously deleting the module data from these indices. -## [process-drafts.py](https://github.com/YangCatalog/backend/blob/master/elasticsearchindexing/process-drafts.py) +## [process-drafts.py](https://github.com/YangCatalog/backend/blob/master/opensearch_indexing/process-drafts.py) -Script run by the cronjob to add new drafts to the `DRAFTS` Elasticsearch index. +Script run by the cronjob to add new drafts to the `DRAFTS` OpenSearch index. diff --git a/elasticsearchIndexing/__init__.py b/opensearch_indexing/__init__.py similarity index 100% rename from elasticsearchIndexing/__init__.py rename to opensearch_indexing/__init__.py diff --git a/elasticsearchIndexing/build_yindex.py b/opensearch_indexing/build_yindex.py similarity index 84% rename from elasticsearchIndexing/build_yindex.py rename to opensearch_indexing/build_yindex.py index ac74e434..b7c7fd29 100644 --- a/elasticsearchIndexing/build_yindex.py +++ b/opensearch_indexing/build_yindex.py @@ -22,15 +22,15 @@ import logging import os.path -from elasticsearch import ConnectionError, ConnectionTimeout, RequestError +from opensearchpy import ConnectionError, ConnectionTimeout, RequestError from pyang import plugin from pyang.util import get_latest_revision -from elasticsearchIndexing.es_manager import ESManager -from elasticsearchIndexing.models.es_indices import ESIndices -from elasticsearchIndexing.models.index_build import BuildYINDEXModule -from elasticsearchIndexing.pyang_plugin.json_tree import emit_tree -from elasticsearchIndexing.pyang_plugin.yang_catalog_index_es import IndexerPlugin +from opensearch_indexing.models.index_build import BuildYINDEXModule +from opensearch_indexing.models.opensearch_indices import OpenSearchIndices +from opensearch_indexing.opensearch_manager import OpenSearchManager +from opensearch_indexing.pyang_plugin.json_tree import emit_tree +from opensearch_indexing.pyang_plugin.yang_catalog_index_opensearch import IndexerPlugin from utility import yangParser from utility.util import validate_revision @@ -38,7 +38,7 @@ def build_indices( - es_manager: ESManager, + opensearch_manager: OpenSearchManager, module: BuildYINDEXModule, save_file_dir: str, json_ytree: str, @@ -83,7 +83,7 @@ def build_indices( try: # Remove exisiting modules from all indices logger.debug('deleting data from index: modules') - es_manager.delete_from_indices(module) + opensearch_manager.delete_from_indices(module) # Remove existing submodules from index: yindex for subm in submodules: @@ -93,7 +93,7 @@ def build_indices( submodule = {'name': subm_n, 'revision': subm_r} try: logger.debug('deleting data from index: yindex') - es_manager.delete_from_index(ESIndices.YINDEX, submodule) + opensearch_manager.delete_from_index(OpenSearchIndices.YINDEX, submodule) except RequestError: logger.exception(f'Problem while deleting {subm_n}@{subm_r}') @@ -102,12 +102,12 @@ def build_indices( chunks = [yindexes[key][i : i + ES_CHUNK_SIZE] for i in range(0, len(yindexes[key]), ES_CHUNK_SIZE)] for idx, chunk in enumerate(chunks, start=1): logger.debug(f'Pushing data to index: yindex {idx} out of {len(chunks)}') - es_manager.bulk_modules(ESIndices.YINDEX, chunk) + opensearch_manager.bulk_modules(OpenSearchIndices.YINDEX, chunk) # Index new modules to index: autocomplete logger.debug('pushing data to index: autocomplete') del module['path'] - es_manager.index_module(ESIndices.AUTOCOMPLETE, module) + opensearch_manager.index_module(OpenSearchIndices.AUTOCOMPLETE, module) break except (ConnectionTimeout, ConnectionError) as e: attempts -= 1 diff --git a/elasticsearchIndexing/create_indices.py b/opensearch_indexing/create_indices.py similarity index 75% rename from elasticsearchIndexing/create_indices.py rename to opensearch_indexing/create_indices.py index 8f72c653..e85b6991 100644 --- a/elasticsearchIndexing/create_indices.py +++ b/opensearch_indexing/create_indices.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Create an Elasticsearch index""" +"""Create an OpenSearch index""" __author__ = 'Richard Zilincik' __copyright__ = 'Copyright The IETF Trust 2022, All Rights Reserved' @@ -25,7 +25,7 @@ import json import os -from elasticsearchIndexing.es_manager import ESManager +from opensearch_indexing.opensearch_manager import OpenSearchManager def main(): @@ -34,37 +34,37 @@ def main(): '--index', type=str, help='Name of the new index. If not specified, indices are created ' - 'from all the initialization files found in elasticsearchIndexing/json.', + 'from all the initialization files found in opensearch_indexing/json.', ) parser.add_argument( '--schema', type=str, help='Path to a json schema file. Only valid if an index name was provided. ' - 'Defaults to elasticsearchIndexing/json/initialize__index.json.', + 'Defaults to opensearch_indexing/json/initialize__index.json.', ) args = parser.parse_args() - es = ESManager().es - schema_dir = os.path.join(os.environ['BACKEND'], 'elasticsearchIndexing/json') + opensearch = OpenSearchManager().opensearch + schema_dir = os.path.join(os.environ['BACKEND'], 'opensearch_indexing/json') if args.index is None: for index_schema_base in glob.glob('initialize_*_index.json', root_dir=schema_dir): index_name = index_schema_base.removeprefix('initialize_').removesuffix('_index.json') - if es.indices.exists(index_name): + if opensearch.indices.exists(index_name): print(f'{index_name} index already exists') continue index_schema = os.path.join(schema_dir, index_schema_base) with open(index_schema) as f: schema_contents = json.load(f) - create_result = es.indices.create(index=index_name, body=schema_contents) + create_result = opensearch.indices.create(index=index_name, body=schema_contents) print(create_result) else: index_name = args.index - if es.indices.exists(index_name): + if opensearch.indices.exists(index_name): print(f'{index_name} index already exists') return index_schema = args.schema or os.path.join(schema_dir, f'initialize_{index_name}_index.json') with open(index_schema) as f: schema_contents = json.load(f) - create_result = es.indices.create(index=index_name, body=schema_contents, ignore=400) + create_result = opensearch.indices.create(index=index_name, body=schema_contents, ignore=400) print(create_result) diff --git a/elasticsearchIndexing/json/completion.json b/opensearch_indexing/json/completion.json similarity index 100% rename from elasticsearchIndexing/json/completion.json rename to opensearch_indexing/json/completion.json diff --git a/elasticsearchIndexing/json/draft_search.json b/opensearch_indexing/json/draft_search.json similarity index 100% rename from elasticsearchIndexing/json/draft_search.json rename to opensearch_indexing/json/draft_search.json diff --git a/elasticsearchIndexing/json/initialize_autocomplete_index.json b/opensearch_indexing/json/initialize_autocomplete_index.json similarity index 100% rename from elasticsearchIndexing/json/initialize_autocomplete_index.json rename to opensearch_indexing/json/initialize_autocomplete_index.json diff --git a/elasticsearchIndexing/json/initialize_drafts_index.json b/opensearch_indexing/json/initialize_drafts_index.json similarity index 100% rename from elasticsearchIndexing/json/initialize_drafts_index.json rename to opensearch_indexing/json/initialize_drafts_index.json diff --git a/elasticsearchIndexing/json/initialize_modules_index.json b/opensearch_indexing/json/initialize_modules_index.json similarity index 100% rename from elasticsearchIndexing/json/initialize_modules_index.json rename to opensearch_indexing/json/initialize_modules_index.json diff --git a/elasticsearchIndexing/json/initialize_test_index.json b/opensearch_indexing/json/initialize_test_index.json similarity index 100% rename from elasticsearchIndexing/json/initialize_test_index.json rename to opensearch_indexing/json/initialize_test_index.json diff --git a/elasticsearchIndexing/json/initialize_test_search_index.json b/opensearch_indexing/json/initialize_test_search_index.json similarity index 100% rename from elasticsearchIndexing/json/initialize_test_search_index.json rename to opensearch_indexing/json/initialize_test_search_index.json diff --git a/elasticsearchIndexing/json/initialize_yindex_index.json b/opensearch_indexing/json/initialize_yindex_index.json similarity index 98% rename from elasticsearchIndexing/json/initialize_yindex_index.json rename to opensearch_indexing/json/initialize_yindex_index.json index 75ffd15b..90fc0daf 100644 --- a/elasticsearchIndexing/json/initialize_yindex_index.json +++ b/opensearch_indexing/json/initialize_yindex_index.json @@ -103,7 +103,7 @@ "type": "boolean" }, "path": { - "type": "wildcard" + "type": "keyword" }, "maturity": { "type": "keyword" diff --git a/elasticsearchIndexing/json/module_search.json b/opensearch_indexing/json/module_search.json similarity index 100% rename from elasticsearchIndexing/json/module_search.json rename to opensearch_indexing/json/module_search.json diff --git a/elasticsearchIndexing/json/show_node.json b/opensearch_indexing/json/show_node.json similarity index 100% rename from elasticsearchIndexing/json/show_node.json rename to opensearch_indexing/json/show_node.json diff --git a/elasticsearchIndexing/json/sorted_name_rev_query.json b/opensearch_indexing/json/sorted_name_rev_query.json similarity index 100% rename from elasticsearchIndexing/json/sorted_name_rev_query.json rename to opensearch_indexing/json/sorted_name_rev_query.json diff --git a/elasticsearchIndexing/models/__init__.py b/opensearch_indexing/models/__init__.py similarity index 100% rename from elasticsearchIndexing/models/__init__.py rename to opensearch_indexing/models/__init__.py diff --git a/elasticsearchIndexing/models/index_build.py b/opensearch_indexing/models/index_build.py similarity index 100% rename from elasticsearchIndexing/models/index_build.py rename to opensearch_indexing/models/index_build.py diff --git a/elasticsearchIndexing/models/keywords_names.py b/opensearch_indexing/models/keywords_names.py similarity index 100% rename from elasticsearchIndexing/models/keywords_names.py rename to opensearch_indexing/models/keywords_names.py diff --git a/elasticsearchIndexing/models/es_indices.py b/opensearch_indexing/models/opensearch_indices.py similarity index 86% rename from elasticsearchIndexing/models/es_indices.py rename to opensearch_indexing/models/opensearch_indices.py index 4d2b2c7d..214906c9 100644 --- a/elasticsearchIndexing/models/es_indices.py +++ b/opensearch_indexing/models/opensearch_indices.py @@ -1,7 +1,7 @@ from enum import Enum -class ESIndices(Enum): +class OpenSearchIndices(Enum): YINDEX = 'yindex-alias' MODULES = 'modules-alias' DRAFTS = 'drafts-alias' diff --git a/elasticsearchIndexing/es_manager.py b/opensearch_indexing/opensearch_manager.py similarity index 55% rename from elasticsearchIndexing/es_manager.py rename to opensearch_indexing/opensearch_manager.py index a3c95955..7f616f6a 100644 --- a/elasticsearchIndexing/es_manager.py +++ b/opensearch_indexing/opensearch_manager.py @@ -22,201 +22,201 @@ import typing as t from configparser import ConfigParser -from elasticsearch import Elasticsearch -from elasticsearch.exceptions import AuthorizationException, NotFoundError, RequestError -from elasticsearch.helpers import parallel_bulk +from opensearchpy import OpenSearch +from opensearchpy.exceptions import AuthorizationException, NotFoundError, RequestError +from opensearchpy.helpers import parallel_bulk import utility.log as log -from elasticsearchIndexing.models.es_indices import ESIndices -from elasticsearchIndexing.models.keywords_names import KeywordsNames +from opensearch_indexing.models.keywords_names import KeywordsNames +from opensearch_indexing.models.opensearch_indices import OpenSearchIndices from utility.create_config import create_config -class ESManager: - def __init__(self, es: t.Optional[Elasticsearch] = None): +class OpenSearchManager: + def __init__(self, opensearch: t.Optional[OpenSearch] = None): config = create_config() self.threads = int(config.get('General-Section', 'threads')) log_directory = config.get('Directory-Section', 'logs') - self.elk_repo_name = config.get('General-Section', 'elk-repo-name') - self.elk_request_timeout = int(config.get('General-Section', 'elk-request-timeout', fallback=60)) - self._setup_elasticsearch(config, es) - log_file_path = os.path.join(log_directory, 'jobs', 'es-manager.log') - self.logger = log.get_logger('es-manager', log_file_path) - - def _setup_elasticsearch(self, config: ConfigParser, es: t.Optional[Elasticsearch] = None): - if es: - self.es = es + self.opensearch_repo_name = config.get('General-Section', 'opensearch-repo-name') + self.opensearch_request_timeout = int(config.get('General-Section', 'opensearch-request-timeout', fallback=60)) + self._setup_opensearch(config, opensearch) + log_file_path = os.path.join(log_directory, 'jobs', 'opensearch-manager.log') + self.logger = log.get_logger('opensearch-manager', log_file_path) + + def _setup_opensearch(self, config: ConfigParser, opensearch: t.Optional[OpenSearch] = None): + if opensearch: + self.opensearch = opensearch return - es_aws = config.get('DB-Section', 'es-aws') - elk_credentials = config.get('Secrets-Section', 'elk-secret').strip('"').split(' ') - es_host_config = { - 'host': config.get('DB-Section', 'es-host', fallback='localhost'), - 'port': config.get('DB-Section', 'es-port', fallback='9200'), + opensearch_aws = config.get('DB-Section', 'opensearch-aws') + opensearch_credentials = config.get('Secrets-Section', 'opensearch-secret').strip('"').split(' ') + opensearch_host_config = { + 'host': config.get('DB-Section', 'opensearch-host', fallback='localhost'), + 'port': config.get('DB-Section', 'opensearch-port', fallback='9200'), } - if es_aws == 'True': - self.es = Elasticsearch( - hosts=[es_host_config], - http_auth=(elk_credentials[0], elk_credentials[1]), + if opensearch_aws == 'True': + self.opensearch = OpenSearch( + hosts=[opensearch_host_config], + http_auth=(opensearch_credentials[0], opensearch_credentials[1]), scheme='https', ) return - self.es = Elasticsearch(hosts=[es_host_config]) + self.opensearch = OpenSearch(hosts=[opensearch_host_config]) def ping(self) -> bool: - return self.es.ping() + return self.opensearch.ping() def cluster_health(self) -> dict: """Returns a brief representation of the cluster health""" - return self.es.cluster.health() + return self.opensearch.cluster.health() - def create_index(self, index: ESIndices): + def create_index(self, index: OpenSearchIndices): """ - Create Elasticsearch index with given name. + Create OpenSearch index with given name. Argument: - :param index (ESIndices) Index to be created + :param index (OpenSearchIndices) Index to be created """ index_name = index.value index_json_name = f'initialize_{index_name}_index.json' - index_json_path = os.path.join(os.environ['BACKEND'], 'elasticsearchIndexing/json/', index_json_name) + index_json_path = os.path.join(os.environ['BACKEND'], 'opensearch_indexing/json/', index_json_name) with open(index_json_path, encoding='utf-8') as reader: index_config = json.load(reader) create_result = None try: - create_result = self.es.indices.create(index=index_name, body=index_config, ignore=400) + create_result = self.opensearch.indices.create(index=index_name, body=index_config, ignore=400) except AuthorizationException: # https://discuss.elastic.co/t/forbidden-12-index-read-only-allow-delete-api/110282/4 self.logger.exception('Problem with index creation') read_only_query = {'index': {'blocks': {'read_only_allow_delete': 'false'}}} - self.es.indices.put_settings(index=index_name, body=read_only_query) - create_result = self.es.indices.create(index=index_name, body=index_config, ignore=400) + self.opensearch.indices.put_settings(index=index_name, body=read_only_query) + create_result = self.opensearch.indices.create(index=index_name, body=index_config, ignore=400) return create_result - def index_exists(self, index: ESIndices) -> bool: + def index_exists(self, index: OpenSearchIndices) -> bool: """ Check if the index already exists. Argument: - :param index (ESIndices) Index to be checked + :param index (OpenSearchIndices) Index to be checked """ name = index.value - return self.es.indices.exists(name) or self.es.indices.exists_alias(name) + return self.opensearch.indices.exists(name) or self.opensearch.indices.exists_alias(name) def get_indices(self) -> list: """Returns a list of existing indices.""" - return list(self.es.indices.get_alias().keys()) + return list(self.opensearch.indices.get_alias().keys()) - def put_index_mapping(self, index: ESIndices, body: dict) -> dict: + def put_index_mapping(self, index: OpenSearchIndices, body: dict) -> dict: """ Update mapping for provided index. Arguments: - :param index (ESIndices) Index whose mapping to update + :param index (OpenSearchIndices) Index whose mapping to update :param body (dict) Mapping definition """ - return self.es.indices.put_mapping(index=index.value, body=body, ignore=403) + return self.opensearch.indices.put_mapping(index=index.value, body=body, ignore=403) - def get_index_mapping(self, index: ESIndices) -> dict: + def get_index_mapping(self, index: OpenSearchIndices) -> dict: """ Get mapping for provided index. Argument: - :param index (ESIndices) Index whose mapping to get + :param index (OpenSearchIndices) Index whose mapping to get """ mapping = {} try: - mapping = self.es.indices.get_mapping(index=index.value) + mapping = self.opensearch.indices.get_mapping(index=index.value) except NotFoundError: self.logger.exception('Index not found') return mapping - def get_documents_count(self, index: ESIndices) -> int: + def get_documents_count(self, index: OpenSearchIndices) -> int: """ Get number of documents stored in provided index. Argument: - :param index (ESIndices) Index in which to search + :param index (OpenSearchIndices) Index in which to search """ count = 0 try: - count = self.es.count(index=index.value)['count'] + count = self.opensearch.count(index=index.value)['count'] except NotFoundError: self.logger.exception('Index not found') return count - def autocomplete(self, index: ESIndices, keyword: KeywordsNames, searched_term: str) -> list: + def autocomplete(self, index: OpenSearchIndices, keyword: KeywordsNames, searched_term: str) -> list: """ Get list of the modules which will be returned as autocomplete after entering the 'searched_term' by the user. Arguments: - :param index (ESIndices) Index in which to search + :param index (OpenSearchIndices) Index in which to search :param keyword (KeywordsNames) :param searched_term (str) String entered by the user """ - autocomplete_json_path = os.path.join(os.environ['BACKEND'], 'elasticsearchIndexing/json/completion.json') + autocomplete_json_path = os.path.join(os.environ['BACKEND'], 'opensearch_indexing/json/completion.json') with open(autocomplete_json_path, encoding='utf-8') as reader: autocomplete_query = json.load(reader) autocomplete_query['query']['bool']['must'][0]['term'] = {keyword.value: searched_term.lower()} autocomplete_query['aggs']['groupby_module']['terms']['field'] = f'{keyword.value}.keyword' - rows = self.es.search(index=index.value, body=autocomplete_query) + rows = self.opensearch.search(index=index.value, body=autocomplete_query) hits = rows['aggregations']['groupby_module']['buckets'] result = [hit['key'] for hit in hits] return result - def delete_from_index(self, index: ESIndices, module: dict) -> dict: + def delete_from_index(self, index: OpenSearchIndices, module: dict) -> dict: """ Delete module from the index. Arguments: - :param index (ESIndices) Target index from which to delete module + :param index (OpenSearchIndices) Target index from which to delete module :param module (dict) Document to delete """ self.logger.info(f'Deleting module: "{module}" from index: "{index}"') delete_module_query = self._get_name_revision_query(index, module) - return self.es.delete_by_query(index=index.value, body=delete_module_query, conflicts='proceed') + return self.opensearch.delete_by_query(index=index.value, body=delete_module_query, conflicts='proceed') def delete_from_indices(self, module: dict): - for index in ESIndices: + for index in OpenSearchIndices: self.delete_from_index(index, module) - def index_module(self, index: ESIndices, document: dict) -> dict: + def index_module(self, index: OpenSearchIndices, document: dict) -> dict: """ Creates or updates a 'document' in a selected index. Arguments: - :param index (ESIndices) Target index to be indexed + :param index (OpenSearchIndices) Target index to be indexed :param document (dict) Document to index """ # TODO: Remove this IF after reindexing and unification of both indices - if index in [ESIndices.MODULES, ESIndices.YINDEX]: + if index in [OpenSearchIndices.MODULES, OpenSearchIndices.YINDEX]: try: document['module'] = document.pop('name') except KeyError: pass - return self.es.index(index=index.value, body=document, request_timeout=self.elk_request_timeout) + return self.opensearch.index(index=index.value, body=document, request_timeout=self.opensearch_request_timeout) - def bulk_modules(self, index: ESIndices, chunk): + def bulk_modules(self, index: OpenSearchIndices, chunk): for success, info in parallel_bulk( - client=self.es, + client=self.opensearch, actions=chunk, index=index.value, thread_count=self.threads, - request_timeout=self.elk_request_timeout, + request_timeout=self.opensearch_request_timeout, ): if not success: - self.logger.error(f'Elasticsearch document failed with info: {info}') + self.logger.error(f'OpenSearch document failed with info: {info}') - def match_all(self, index: ESIndices) -> dict: + def match_all(self, index: OpenSearchIndices) -> dict: """ Return the dictionary of all modules that are in the index. Argument: - :param index (ESIndices) Index in which to search + :param index (OpenSearchIndices) Index in which to search """ def _store_hits(hits: list, all_results: dict): @@ -239,112 +239,121 @@ def _store_hits(hits: list, all_results: dict): all_results = {} match_all_query = {'query': {'match_all': {}}} total_index_docs = 0 - es_result = self.es.search(index=index.value, body=match_all_query, scroll=u'1m', size=250) - scroll_id = es_result.get('_scroll_id') - hits = es_result['hits']['hits'] + opensearch_result = self.opensearch.search(index=index.value, body=match_all_query, scroll=u'1m', size=250) + scroll_id = opensearch_result.get('_scroll_id') + hits = opensearch_result['hits']['hits'] _store_hits(hits, all_results) total_index_docs += len(hits) - while es_result['hits']['hits']: - es_result = self.scroll(scroll_id) + while opensearch_result['hits']['hits']: + opensearch_result = self.scroll(scroll_id) - scroll_id = es_result.get('_scroll_id') - hits = es_result['hits']['hits'] + scroll_id = opensearch_result.get('_scroll_id') + hits = opensearch_result['hits']['hits'] _store_hits(hits, all_results) total_index_docs += len(hits) self.clear_scroll(scroll_id) return all_results - def get_module_by_name_revision(self, index: ESIndices, module: dict) -> list: + def get_module_by_name_revision(self, index: OpenSearchIndices, module: dict) -> list: get_module_query = self._get_name_revision_query(index, module) - es_result = self.es.search(index=index.value, body=get_module_query, size=1000) + opensearch_result = self.opensearch.search(index=index.value, body=get_module_query, size=1000) - return es_result['hits']['hits'] + return opensearch_result['hits']['hits'] - def get_sorted_module_revisions(self, index: ESIndices, name: str): - query_path = os.path.join(os.environ['BACKEND'], 'elasticsearchIndexing/json/sorted_name_rev_query.json') + def get_sorted_module_revisions(self, index: OpenSearchIndices, name: str): + query_path = os.path.join(os.environ['BACKEND'], 'opensearch_indexing/json/sorted_name_rev_query.json') with open(query_path, encoding='utf-8') as reader: sorted_name_rev_query = json.load(reader) # TODO: Remove this IF after reindexing and unification of both indices - if index in [ESIndices.MODULES, ESIndices.YINDEX]: + if index in [OpenSearchIndices.MODULES, OpenSearchIndices.YINDEX]: del sorted_name_rev_query['query']['bool']['must'][0]['match_phrase']['name.keyword'] sorted_name_rev_query['query']['bool']['must'][0]['match_phrase'] = {'module.keyword': {'query': name}} else: sorted_name_rev_query['query']['bool']['must'][0]['match_phrase']['name.keyword']['query'] = name try: - es_result = self.es.search(index=index.value, body=sorted_name_rev_query) + es_result = self.opensearch.search(index=index.value, body=sorted_name_rev_query) except RequestError: return [] return es_result['hits']['hits'] def get_node(self, module: dict) -> dict: - query_path = os.path.join(os.environ['BACKEND'], 'elasticsearchIndexing/json/show_node.json') + query_path = os.path.join(os.environ['BACKEND'], 'opensearch_indexing/json/show_node.json') with open(query_path, encoding='utf-8') as reader: show_node_query = json.load(reader) show_node_query['query']['bool']['must'][0]['match_phrase']['module.keyword']['query'] = module['name'] show_node_query['query']['bool']['must'][1]['match_phrase']['path']['query'] = module['path'] show_node_query['query']['bool']['must'][2]['match_phrase']['revision']['query'] = module['revision'] - hits = self.es.search(index=ESIndices.YINDEX.value, body=show_node_query) + hits = self.opensearch.search(index=OpenSearchIndices.YINDEX.value, body=show_node_query) return hits def generic_search( self, - index: t.Union[ESIndices, str], + index: t.Union[OpenSearchIndices, str], query: dict, response_size: t.Optional[int] = 0, use_scroll: bool = False, ): index = index if isinstance(index, str) else index.value if use_scroll: - return self.es.search( + return self.opensearch.search( index=index, body=query, - request_timeout=self.elk_request_timeout, + request_timeout=self.opensearch_request_timeout, scroll=u'10m', size=response_size, ) - return self.es.search(index=index, body=query, request_timeout=self.elk_request_timeout, size=response_size) + return self.opensearch.search( + index=index, + body=query, + request_timeout=self.opensearch_request_timeout, + size=response_size, + ) def clear_scroll(self, scroll_id: str): - return self.es.clear_scroll(scroll_id=scroll_id, ignore=(404,)) + return self.opensearch.clear_scroll(scroll_id=scroll_id, ignore=(404,)) def scroll(self, scroll_id: str): - return self.es.scroll(scroll_id=scroll_id, scroll=u'10m', request_timeout=self.elk_request_timeout) + return self.opensearch.scroll( + scroll_id=scroll_id, + scroll=u'10m', + request_timeout=self.opensearch_request_timeout, + ) - def document_exists(self, index: ESIndices, module: dict) -> bool: + def document_exists(self, index: OpenSearchIndices, module: dict) -> bool: """ Check whether 'module' already exists in index - if count is greater than 0. Arguments: - :param index (ESIndices) Index in which to search + :param index (OpenSearchIndices) Index in which to search :param module (dict) Document to search """ - if index == ESIndices.DRAFTS: + if index == OpenSearchIndices.DRAFTS: get_query = self._get_draft_query(index, module) else: get_query = self._get_name_revision_query(index, module) try: - es_count = self.es.count(index=index.value, body=get_query) + es_count = self.opensearch.count(index=index.value, body=get_query) except RequestError: return False return es_count['count'] > 0 - def _get_name_revision_query(self, index: ESIndices, module: dict) -> dict: - module_search_path = os.path.join(os.environ['BACKEND'], 'elasticsearchIndexing/json/module_search.json') + def _get_name_revision_query(self, index: OpenSearchIndices, module: dict) -> dict: + module_search_path = os.path.join(os.environ['BACKEND'], 'opensearch_indexing/json/module_search.json') with open(module_search_path, encoding='utf-8') as reader: name_revision_query = json.load(reader) # TODO: Remove this IF after reindexing and unification of both indices - if index in [ESIndices.MODULES, ESIndices.YINDEX]: + if index in [OpenSearchIndices.MODULES, OpenSearchIndices.YINDEX]: del name_revision_query['query']['bool']['must'][0]['match_phrase']['name.keyword'] name_revision_query['query']['bool']['must'][0]['match_phrase'] = { 'module.keyword': {'query': module['name']}, @@ -355,8 +364,8 @@ def _get_name_revision_query(self, index: ESIndices, module: dict) -> dict: return name_revision_query - def _get_draft_query(self, index: ESIndices, draft: dict) -> dict: - draft_search_path = os.path.join(os.environ['BACKEND'], 'elasticsearchIndexing/json/draft_search.json') + def _get_draft_query(self, index: OpenSearchIndices, draft: dict) -> dict: + draft_search_path = os.path.join(os.environ['BACKEND'], 'opensearch_indexing/json/draft_search.json') with open(draft_search_path, encoding='utf-8') as reader: draft_query = json.load(reader) diff --git a/elasticsearchIndexing/es_snapshots_manager.py b/opensearch_indexing/opensearch_snapshots_manager.py similarity index 56% rename from elasticsearchIndexing/es_snapshots_manager.py rename to opensearch_indexing/opensearch_snapshots_manager.py index 49dd6783..ee407035 100644 --- a/elasticsearchIndexing/es_snapshots_manager.py +++ b/opensearch_indexing/opensearch_snapshots_manager.py @@ -20,40 +20,40 @@ import os from operator import itemgetter -from elasticsearch import Elasticsearch -from elasticsearch.exceptions import NotFoundError +from opensearchpy import OpenSearch +from opensearchpy.exceptions import NotFoundError import utility.log as log -from elasticsearchIndexing.models.es_indices import ESIndices +from opensearch_indexing.models.opensearch_indices import OpenSearchIndices from utility.create_config import create_config -class ESSnapshotsManager: +class OpenSearchSnapshotsManager: def __init__(self) -> None: config = create_config() log_directory = config.get('Directory-Section', 'logs') - es_aws = config.get('DB-Section', 'es-aws') - elk_credentials = config.get('Secrets-Section', 'elk-secret').strip('"').split(' ') - self.elk_repo_name = config.get('General-Section', 'elk-repo-name') - es_host_config = { - 'host': config.get('DB-Section', 'es-host', fallback='localhost'), - 'port': config.get('DB-Section', 'es-port', fallback='9200'), + opensearch_aws = config.get('DB-Section', 'opensearch-aws') + opensearch_credentials = config.get('Secrets-Section', 'opensearch-secret').strip('"').split(' ') + self.opensearch_repo_name = config.get('General-Section', 'opensearch-repo-name') + opensearch_host_config = { + 'host': config.get('DB-Section', 'opensearch-host', fallback='localhost'), + 'port': config.get('DB-Section', 'opensearch-port', fallback='9200'), } - if es_aws == 'True': - self.es = Elasticsearch( - hosts=[es_host_config], - http_auth=(elk_credentials[0], elk_credentials[1]), + if opensearch_aws == 'True': + self.opensearch = OpenSearch( + hosts=[opensearch_host_config], + http_auth=(opensearch_credentials[0], opensearch_credentials[1]), scheme='https', ) else: - self.es = Elasticsearch(hosts=[es_host_config]) - log_file_path = os.path.join(log_directory, 'jobs', 'es-manager.log') - self.LOGGER = log.get_logger('es-snapshots-manager', log_file_path) + self.opensearch = OpenSearch(hosts=[opensearch_host_config]) + log_file_path = os.path.join(log_directory, 'jobs', 'opensearch-manager.log') + self.LOGGER = log.get_logger('opensearch-snapshots-manager', log_file_path) def create_snapshot_repository(self, compress: bool) -> dict: """Register a snapshot repository.""" - body = {'type': 'fs', 'settings': {'location': self.elk_repo_name, 'compress': compress}} - return self.es.snapshot.create_repository(repository=self.elk_repo_name, body=body) + body = {'type': 'fs', 'settings': {'location': self.opensearch_repo_name, 'compress': compress}} + return self.opensearch.snapshot.create_repository(repository=self.opensearch_repo_name, body=body) def create_snapshot(self, snapshot_name: str) -> dict: """Creates a snapshot with given 'snapshot_name' in a snapshot repository. @@ -62,12 +62,16 @@ def create_snapshot(self, snapshot_name: str) -> dict: :param snapshot_name (str) Name of the snapshot to be created """ index_body = {'indices': '_all'} - return self.es.snapshot.create(repository=self.elk_repo_name, snapshot=snapshot_name, body=index_body) + return self.opensearch.snapshot.create( + repository=self.opensearch_repo_name, + snapshot=snapshot_name, + body=index_body, + ) def get_sorted_snapshots(self) -> list: """Return a sorted list of existing snapshots.""" try: - snapshots = self.es.snapshot.get(repository=self.elk_repo_name, snapshot='_all') + snapshots = self.opensearch.snapshot.get(repository=self.opensearch_repo_name, snapshot='_all') except NotFoundError: self.LOGGER.exception('Snapshots not found') return [] @@ -80,14 +84,14 @@ def restore_snapshot(self, snapshot_name: str) -> dict: :param snapshot_name (str) Name of the snapshot to restore """ index_body = {'indices': '_all'} - for index in ESIndices: + for index in OpenSearchIndices: try: - self.es.indices.close(index.value) + self.opensearch.indices.close(index.value) except NotFoundError: continue - return self.es.snapshot.restore( - repository=self.elk_repo_name, + return self.opensearch.snapshot.restore( + repository=self.opensearch_repo_name, snapshot=snapshot_name, body=index_body, wait_for_completion=True, @@ -99,4 +103,4 @@ def delete_snapshot(self, snapshot_name: str) -> dict: Argument: :param snapshot_name (str) Name of the snapshot to delete """ - return self.es.snapshot.delete(repository=self.elk_repo_name, snapshot=snapshot_name) + return self.opensearch.snapshot.delete(repository=self.opensearch_repo_name, snapshot=snapshot_name) diff --git a/elasticsearchIndexing/process-drafts.py b/opensearch_indexing/process-drafts.py similarity index 80% rename from elasticsearchIndexing/process-drafts.py rename to opensearch_indexing/process-drafts.py index dc6253cd..bc3ae8ee 100644 --- a/elasticsearchIndexing/process-drafts.py +++ b/opensearch_indexing/process-drafts.py @@ -13,7 +13,7 @@ # limitations under the License. """ -Script for adding new drafts to the DRAFTS index in Elasticsearch, so they can be searched in our system. +Script for adding new drafts to the DRAFTS index in OpenSearch, so they can be searched in our system. """ __author__ = 'Dmytro Kyrychenko' @@ -24,8 +24,8 @@ import logging import os -from elasticsearchIndexing.es_manager import ESManager -from elasticsearchIndexing.models.es_indices import ESIndices +from opensearch_indexing.models.opensearch_indices import OpenSearchIndices +from opensearch_indexing.opensearch_manager import OpenSearchManager from utility import log from utility.create_config import create_config from utility.script_config_dict import script_config_dict @@ -57,14 +57,14 @@ def main(script_config: ScriptConfig = DEFAULT_SCRIPT_CONFIG.copy()) -> list[Job drafts = [filename[:-4] for filename in os.listdir(ietf_drafts_dir) if filename[-4:] == '.txt'] - logger.info('Trying to initialize Elasticsearch indices') - es_manager = ESManager() - if not es_manager.index_exists(ESIndices.DRAFTS): + logger.info('Trying to initialize OpenSearch indices') + opensearch_manager = OpenSearchManager() + if not opensearch_manager.index_exists(OpenSearchIndices.DRAFTS): error_message = 'Drafts index has not been created yet.' logger.error(error_message) raise RuntimeError(error_message) - logging.getLogger('elasticsearch').setLevel(logging.ERROR) + logging.getLogger('opensearch').setLevel(logging.ERROR) done = 0 for i, draft_name in enumerate(drafts, 1): @@ -73,8 +73,8 @@ def main(script_config: ScriptConfig = DEFAULT_SCRIPT_CONFIG.copy()) -> list[Job logger.info(f'Indexing draft {draft_name} - draft {i} out of {len(drafts)}') try: - if not es_manager.document_exists(ESIndices.DRAFTS, draft): - es_manager.index_module(ESIndices.DRAFTS, draft) + if not opensearch_manager.document_exists(OpenSearchIndices.DRAFTS, draft): + opensearch_manager.index_module(OpenSearchIndices.DRAFTS, draft) logger.info(f'added {draft_name} to index') done += 1 else: @@ -82,7 +82,7 @@ def main(script_config: ScriptConfig = DEFAULT_SCRIPT_CONFIG.copy()) -> list[Job except Exception: logger.exception(f'Problem while processing draft {draft_name}') logger.info('Job finished successfully') - return [JobLogMessage(label='Successful', message=f'Added {done} drafts to ElasticSearch')] + return [JobLogMessage(label='Successful', message=f'Added {done} drafts to Opensearch')] if __name__ == '__main__': diff --git a/elasticsearchIndexing/process_changed_mods.py b/opensearch_indexing/process_changed_mods.py similarity index 91% rename from elasticsearchIndexing/process_changed_mods.py rename to opensearch_indexing/process_changed_mods.py index 02e86642..cb3922ca 100755 --- a/elasticsearchIndexing/process_changed_mods.py +++ b/opensearch_indexing/process_changed_mods.py @@ -23,9 +23,9 @@ import shutil import sys -from elasticsearchIndexing.build_yindex import build_indices -from elasticsearchIndexing.es_manager import ESManager -from elasticsearchIndexing.models.index_build import BuildYINDEXModule +from opensearch_indexing.build_yindex import build_indices +from opensearch_indexing.models.index_build import BuildYINDEXModule +from opensearch_indexing.opensearch_manager import OpenSearchManager from utility import log from utility.create_config import create_config from utility.script_config_dict import script_config_dict @@ -78,7 +78,7 @@ def start_processing_changed_mods(self): os.unlink(self.lock_file_cron) sys.exit() - self._initialize_es_manager() + self._initialize_opensearch_manager() self.logger.info('Running cache files backup') self._backup_cache_files(self.delete_cache_path) @@ -103,22 +103,22 @@ def _create_lock_files(self): self.logger.error('Temporary lock file could not be created although it is not locked') sys.exit() - def _initialize_es_manager(self): - self.es_manager = ESManager() - logging.getLogger('elasticsearch').setLevel(logging.ERROR) + def _initialize_opensearch_manager(self): + self.opensearch_manager = OpenSearchManager() + logging.getLogger('opensearch').setLevel(logging.ERROR) def _delete_modules_from_es(self): for module in self.delete_cache: name, rev_org = module.split('@') revision, organization = rev_org.split('/') revision = validate_revision(revision) - self.logger.info(f'Deleting {module} from es indices') + self.logger.info(f'Deleting {module} from opensearch indices') module = { 'name': name, 'revision': revision, 'organization': organization, } - self.es_manager.delete_from_indices(module) + self.opensearch_manager.delete_from_indices(module) def _change_modules_in_es(self): recursion_limit = sys.getrecursionlimit() @@ -136,7 +136,7 @@ def _change_modules_in_es(self): ) try: - build_indices(self.es_manager, module, self.save_file_dir, self.json_ytree, self.logger) + build_indices(self.opensearch_manager, module, self.save_file_dir, self.json_ytree, self.logger) except Exception: self.logger.exception(f'Problem while processing module {module_key}') try: diff --git a/elasticsearchIndexing/pyang_plugin/README.md b/opensearch_indexing/pyang_plugin/README.md similarity index 58% rename from elasticsearchIndexing/pyang_plugin/README.md rename to opensearch_indexing/pyang_plugin/README.md index 4d9becc6..764ef624 100644 --- a/elasticsearchIndexing/pyang_plugin/README.md +++ b/opensearch_indexing/pyang_plugin/README.md @@ -3,10 +3,10 @@ This directory contains all PYANG plugins used by Yang Search. It can be referenced by the environment variable `$PYANG_PLUGINPATH`. All the PYANG plugins should only be placed in the `$PYANG_PLUGINPATH` directory. -## [json_tree.py](https://github.com/YangCatalog/backend/blob/master/elasticsearchindexing/pyang_plugin/json_tree.py) +## [json_tree.py](https://github.com/YangCatalog/backend/blob/master/opensearch_indexing/pyang_plugin/json_tree.py) PYANG plugin for generating a JSON-formatted output of the data node hierarchy of the YANG modules. -## [yang_catalog_index_es.py](https://github.com/YangCatalog/backend/blob/master/elasticsearchindexing/pyang_plugin/yang_catalog_index_es.py) +## [yang_catalog_index_opensearch.py](https://github.com/YangCatalog/backend/blob/master/opensearch_indexing/pyang_plugin/yang_catalog_index_opensearch.py) -PYANG plugin to generate the Elasticsearch data for indexing from modules. +PYANG plugin to generate the OpenSearch data for indexing from modules. diff --git a/elasticsearchIndexing/pyang_plugin/__init__.py b/opensearch_indexing/pyang_plugin/__init__.py similarity index 100% rename from elasticsearchIndexing/pyang_plugin/__init__.py rename to opensearch_indexing/pyang_plugin/__init__.py diff --git a/elasticsearchIndexing/pyang_plugin/json_tree.py b/opensearch_indexing/pyang_plugin/json_tree.py similarity index 100% rename from elasticsearchIndexing/pyang_plugin/json_tree.py rename to opensearch_indexing/pyang_plugin/json_tree.py diff --git a/elasticsearchIndexing/pyang_plugin/yang_catalog_index_es.py b/opensearch_indexing/pyang_plugin/yang_catalog_index_opensearch.py similarity index 100% rename from elasticsearchIndexing/pyang_plugin/yang_catalog_index_es.py rename to opensearch_indexing/pyang_plugin/yang_catalog_index_opensearch.py diff --git a/elasticsearchIndexing/tests/resources/es_test_data.json b/opensearch_indexing/tests/resources/opensearch_test_data.json similarity index 100% rename from elasticsearchIndexing/tests/resources/es_test_data.json rename to opensearch_indexing/tests/resources/opensearch_test_data.json diff --git a/elasticsearchIndexing/tests/test_es_manager.py b/opensearch_indexing/tests/test_opensearch_manager.py similarity index 58% rename from elasticsearchIndexing/tests/test_es_manager.py rename to opensearch_indexing/tests/test_opensearch_manager.py index b2fcda13..4f837d8a 100644 --- a/elasticsearchIndexing/tests/test_es_manager.py +++ b/opensearch_indexing/tests/test_opensearch_manager.py @@ -22,43 +22,43 @@ import unittest from ddt import data, ddt -from elasticsearch import Elasticsearch +from opensearchpy import OpenSearch -from elasticsearchIndexing.es_manager import ESManager -from elasticsearchIndexing.models.es_indices import ESIndices -from elasticsearchIndexing.models.keywords_names import KeywordsNames +from opensearch_indexing.models.keywords_names import KeywordsNames +from opensearch_indexing.models.opensearch_indices import OpenSearchIndices +from opensearch_indexing.opensearch_manager import OpenSearchManager from utility.create_config import create_config -class TestESManagerClass(unittest.TestCase): - es: Elasticsearch - test_index: ESIndices +class TestOpenSearchManagerClass(unittest.TestCase): + opensearch: OpenSearch + test_index: OpenSearchIndices @classmethod def setUpClass(cls): config = create_config() - es_host_config = { - 'host': config.get('DB-Section', 'es-host', fallback='localhost'), - 'port': config.get('DB-Section', 'es-port', fallback='9200'), + opensearch_host_config = { + 'host': config.get('DB-Section', 'opensearch-host', fallback='localhost'), + 'port': config.get('DB-Section', 'opensearch-port', fallback='9200'), } - cls.es = Elasticsearch(hosts=[es_host_config]) - cls.es_manager = ESManager(cls.es) - cls.test_index = ESIndices.TEST - resources_path = os.path.join(os.environ['BACKEND'], 'elasticsearchIndexing/tests/resources') - with open(os.path.join(resources_path, 'es_test_data.json'), 'r') as reader: + cls.opensearch = OpenSearch(hosts=[opensearch_host_config]) + cls.opensearch_manager = OpenSearchManager(cls.opensearch) + cls.test_index = OpenSearchIndices.TEST + resources_path = os.path.join(os.environ['BACKEND'], 'opensearch_indexing/tests/resources') + with open(os.path.join(resources_path, 'opensearch_test_data.json'), 'r') as reader: cls.test_data = json.load(reader) cls.ietf_rip_module = {'name': 'ietf-rip', 'revision': '2020-02-20', 'organization': 'ietf'} def setUp(self): - self.es.indices.delete(index=self.test_index.value, ignore=[400, 404]) + self.opensearch.indices.delete(index=self.test_index.value, ignore=[400, 404]) def tearDown(self): - self.es.indices.delete(index=self.test_index.value, ignore=[400, 404]) + self.opensearch.indices.delete(index=self.test_index.value, ignore=[400, 404]) -class TestESManagerWithoutIndexClass(TestESManagerClass): +class TestOpenSearchManagerWithoutIndexClass(TestOpenSearchManagerClass): def test_create_index(self): - create_result = self.es_manager.create_index(self.test_index) + create_result = self.opensearch_manager.create_index(self.test_index) self.assertIn('acknowledged', create_result) self.assertIn('index', create_result) @@ -66,26 +66,26 @@ def test_create_index(self): self.assertEqual(create_result['index'], self.test_index.value) def test_index_exists(self): - index_exists = self.es_manager.index_exists(self.test_index) + index_exists = self.opensearch_manager.index_exists(self.test_index) self.assertFalse(index_exists) -class TestESManagerWithEmptyIndexClass(TestESManagerClass): +class TestOpenSearchManagerWithEmptyIndexClass(TestOpenSearchManagerClass): @classmethod def setUpClass(cls): super().setUpClass() index_json_name = f'initialize_{cls.test_index.value}_index.json' - index_json_path = os.path.join(os.environ['BACKEND'], 'elasticsearchIndexing/json/', index_json_name) + index_json_path = os.path.join(os.environ['BACKEND'], 'opensearch_indexing/json/', index_json_name) with open(index_json_path, encoding='utf-8') as reader: cls.index_config = json.load(reader) def setUp(self): super().setUp() - self.es.indices.create(index=self.test_index.value, body=self.index_config, ignore=400) + self.opensearch.indices.create(index=self.test_index.value, body=self.index_config, ignore=400) def test_create_index_already_exists(self): - create_result = self.es_manager.create_index(self.test_index) + create_result = self.opensearch_manager.create_index(self.test_index) self.assertIn('error', create_result) self.assertIn('status', create_result) @@ -94,29 +94,29 @@ def test_create_index_already_exists(self): self.assertEqual(create_result['error']['type'], 'resource_already_exists_exception') def test_index_exists(self): - index_exists = self.es_manager.index_exists(self.test_index) + index_exists = self.opensearch_manager.index_exists(self.test_index) self.assertTrue(index_exists) def test_document_exists(self): - in_es = self.es_manager.document_exists(self.test_index, self.ietf_rip_module) + in_es = self.opensearch_manager.document_exists(self.test_index, self.ietf_rip_module) self.assertFalse(in_es) def test_autocomplete_no_results(self): searched_term = 'ietf-' - results = self.es_manager.autocomplete(self.test_index, KeywordsNames.NAME, searched_term) + results = self.opensearch_manager.autocomplete(self.test_index, KeywordsNames.NAME, searched_term) self.assertEqual(results, []) def test_delete_from_index(self): - delete_result = self.es_manager.delete_from_index(self.test_index, self.ietf_rip_module) + delete_result = self.opensearch_manager.delete_from_index(self.test_index, self.ietf_rip_module) self.assertIn('deleted', delete_result) self.assertEqual(delete_result['deleted'], 0) def test_index_module(self): - index_result = self.es_manager.index_module(self.test_index, self.ietf_rip_module) + index_result = self.opensearch_manager.index_module(self.test_index, self.ietf_rip_module) self.assertIn('result', index_result) self.assertEqual(index_result['result'], 'created') @@ -124,42 +124,42 @@ def test_index_module(self): self.assertEqual(index_result['_shards']['failed'], 0) def test_get_module_by_name_revision(self): - hits = self.es_manager.get_module_by_name_revision(self.test_index, self.ietf_rip_module) + hits = self.opensearch_manager.get_module_by_name_revision(self.test_index, self.ietf_rip_module) self.assertEqual(hits, []) def test_get_sorted_module_revisions(self): name = 'ietf-rip' - hits = self.es_manager.get_sorted_module_revisions(self.test_index, name) + hits = self.opensearch_manager.get_sorted_module_revisions(self.test_index, name) self.assertEqual(hits, []) def test_match_all(self): - all_es_modules = self.es_manager.match_all(self.test_index) + all_es_modules = self.opensearch_manager.match_all(self.test_index) self.assertEqual(all_es_modules, {}) @ddt -class TestESManagerAutocompleteIndexClass(TestESManagerClass): +class TestOpenSearchManagerAutocompleteIndexClass(TestOpenSearchManagerClass): @classmethod def setUpClass(cls): super().setUpClass() index_json_name = f'initialize_{cls.test_index.value}_index.json' - index_json_path = os.path.join(os.environ['BACKEND'], 'elasticsearchIndexing/json/', index_json_name) + index_json_path = os.path.join(os.environ['BACKEND'], 'opensearch_indexing/json/', index_json_name) with open(index_json_path, encoding='utf-8') as reader: cls.index_config = json.load(reader) cls.autocomplete_modules = cls.test_data['autocomplete_modules'] def setUp(self): super().setUp() - self.es.indices.create(index=self.test_index.value, body=self.index_config, ignore=400) + self.opensearch.indices.create(index=self.test_index.value, body=self.index_config, ignore=400) for module in self.autocomplete_modules: - self.es.index(index=self.test_index.value, body=module, refresh='true') + self.opensearch.index(index=self.test_index.value, body=module, refresh='true') @data('ietf-', 'IETF-R', '-yang-') def test_autocomplete(self, searched_term: str): - results = self.es_manager.autocomplete(self.test_index, KeywordsNames.NAME, searched_term) + results = self.opensearch_manager.autocomplete(self.test_index, KeywordsNames.NAME, searched_term) self.assertNotEqual(results, []) for result in results: @@ -167,13 +167,13 @@ def test_autocomplete(self, searched_term: str): @data('a', 'ab', 'ief-r') def test_autocomplete_no_results(self, searched_term: str): - results = self.es_manager.autocomplete(self.test_index, KeywordsNames.NAME, searched_term) + results = self.opensearch_manager.autocomplete(self.test_index, KeywordsNames.NAME, searched_term) self.assertEqual(results, []) @data('ietf', 'open') def test_autocomplete_organization(self, searched_term: str): - results = self.es_manager.autocomplete(self.test_index, KeywordsNames.ORGANIZATION, searched_term) + results = self.opensearch_manager.autocomplete(self.test_index, KeywordsNames.ORGANIZATION, searched_term) self.assertNotEqual(results, []) for result in results: @@ -181,23 +181,23 @@ def test_autocomplete_organization(self, searched_term: str): @data('i', 'ie', 'random') def test_autocomplete_organization_no_results(self, searched_term: str): - results = self.es_manager.autocomplete(self.test_index, KeywordsNames.ORGANIZATION, searched_term) + results = self.opensearch_manager.autocomplete(self.test_index, KeywordsNames.ORGANIZATION, searched_term) self.assertEqual(results, []) def test_delete_from_index(self): - delete_result = self.es_manager.delete_from_index(self.test_index, self.ietf_rip_module) + delete_result = self.opensearch_manager.delete_from_index(self.test_index, self.ietf_rip_module) self.assertIn('deleted', delete_result) self.assertNotEqual(delete_result['deleted'], 0) def test_document_exists(self): - in_es = self.es_manager.document_exists(self.test_index, self.ietf_rip_module) + in_es = self.opensearch_manager.document_exists(self.test_index, self.ietf_rip_module) self.assertTrue(in_es) def test_get_module_by_name_revision(self): - hits = self.es_manager.get_module_by_name_revision(self.test_index, self.ietf_rip_module) + hits = self.opensearch_manager.get_module_by_name_revision(self.test_index, self.ietf_rip_module) self.assertNotEqual(hits, []) self.assertEqual(len(hits), 1) @@ -206,13 +206,13 @@ def test_get_module_by_name_revision(self): def test_get_module_by_name_revision_not_exists(self): module = {'name': 'random', 'revision': '2022-01-01', 'organization': 'random'} - hits = self.es_manager.get_module_by_name_revision(self.test_index, module) + hits = self.opensearch_manager.get_module_by_name_revision(self.test_index, module) self.assertEqual(hits, []) def test_get_sorted_module_revisions(self): name = 'ietf-rip' - hits = self.es_manager.get_sorted_module_revisions(self.test_index, name) + hits = self.opensearch_manager.get_sorted_module_revisions(self.test_index, name) self.assertNotEqual(hits, []) for hit in hits: @@ -220,12 +220,12 @@ def test_get_sorted_module_revisions(self): def test_get_sorted_module_revisions_not_exists(self): name = 'random' - hits = self.es_manager.get_sorted_module_revisions(self.test_index, name) + hits = self.opensearch_manager.get_sorted_module_revisions(self.test_index, name) self.assertEqual(hits, []) def test_match_all(self): - all_es_modules = self.es_manager.match_all(self.test_index) + all_es_modules = self.opensearch_manager.match_all(self.test_index) self.assertNotEqual(all_es_modules, {}) for module in all_es_modules.values(): diff --git a/parseAndPopulate/README.md b/parseAndPopulate/README.md index 77c3a181..68f314f7 100644 --- a/parseAndPopulate/README.md +++ b/parseAndPopulate/README.md @@ -21,8 +21,8 @@ The main scripts are: Firstly, it creates a temporary json directory, which will be used to store the needed files (like `prepare.json`, `normal.json`, `temp_hashes.json`). Secondly, it runs the [parse_directory](https://github.com/YangCatalog/backend/blob/master/parseAndPopulate/parse_directory.py) script which dumps new/updated modules and vendors data into the json dir mentioned above in the `prepare.json` and `normal.json` files respectively. - After populating ConfD and Redis, it will prepare and send modules data for Elasticsearch indexing (writes data to the - `changes-cache` and `delete-cache` files which are later used in the [process_changed_mods.py](https://github.com/YangCatalog/backend/blob/master/elasticsearchindexing/process_changed_mods.py) + After populating ConfD and Redis, it will prepare and send modules data for OpenSearch indexing (writes data to the + `changes-cache` and `delete-cache` files which are later used in the [process_changed_mods.py](https://github.com/YangCatalog/backend/blob/master/opensearch_indexing/process_changed_mods.py) script). Then the API will be restarted, so it can load all the new metadata into its cache. After that, this script will start to run more complicated algorithms on those parsed yang files. This will extract dependents, semantic versioning and tree types. When this is parsed it will once again populate ConfD and Redis, and restart API, diff --git a/parseAndPopulate/modulesComplicatedAlgorithms.py b/parseAndPopulate/modulesComplicatedAlgorithms.py index 69220c9d..2dec50a5 100644 --- a/parseAndPopulate/modulesComplicatedAlgorithms.py +++ b/parseAndPopulate/modulesComplicatedAlgorithms.py @@ -40,7 +40,7 @@ from pyang import plugin from pyang.plugins.tree import emit_tree -from elasticsearchIndexing.pyang_plugin.json_tree import emit_tree as emit_json_tree +from opensearch_indexing.pyang_plugin.json_tree import emit_tree as emit_json_tree from redisConnections.redisConnection import RedisConnection from utility import log, message_factory from utility.confdService import ConfdService diff --git a/parseAndPopulate/populate.py b/parseAndPopulate/populate.py index 3d15388e..cba38249 100644 --- a/parseAndPopulate/populate.py +++ b/parseAndPopulate/populate.py @@ -46,8 +46,8 @@ from redisConnections.redisConnection import RedisConnection from utility.confdService import ConfdService from utility.create_config import create_config -from utility.elasticsearch_util import ESIndexingPaths, prepare_for_es_indexing, send_for_es_indexing from utility.message_factory import MessageFactory +from utility.opensearch_util import ESIndexingPaths, prepare_for_es_indexing, send_for_es_indexing from utility.script_config_dict import script_config_dict from utility.scriptConfig import ScriptConfig from utility.staticVariables import json_headers diff --git a/recovery/README.md b/recovery/README.md index 2f6375eb..2bae1898 100644 --- a/recovery/README.md +++ b/recovery/README.md @@ -20,28 +20,28 @@ rebuilding by the `yc-api-recovery` container. providing this argument, the latest backup will be used. All the modules and vendors (metadata) from this backup will be loaded to Redis. -## [elk_recovery.py](https://github.com/YangCatalog/backend/blob/master/recovery/elk_recovery.py) +## [opensearch_recovery.py](https://github.com/YangCatalog/backend/blob/master/recovery/opensearch_recovery.py) This script can be called via the Admin page, and it saves/loads (using the `--save` and `--load` arguments respectively) -the Elasticsearch database (all indices) to/from a snapshot. +the OpenSearch database (all indices) to/from a snapshot. 1. Saving database When saving the database it will create a snapshot in the directory that must be specified in the - [elasticsearch.yml](https://github.com/YangCatalog/deployment/blob/master/elasticsearch/elasticsearch.yml) + [opensearch.yml](https://github.com/YangCatalog/deployment/blob/master/opensearch/opensearch.yml) file in the `path.repo`. The default name for the snapshot is current datetime in the special backup format, but - it can be changed using the `--file` argument, for example, to save some specified state of Elasticsearch for later. + it can be changed using the `--file` argument, for example, to save some specified state of OpenSearch for later. 2. Loading database When loading the database we can provide the path to a specific snapshot file using the `--file` argument, without - providing this argument, the latest snapshot will be used. Be aware that restoring an Elasticsearch snapshot will - replace the current state of the Elasticsearch indices with the data that was backed up in the snapshot, it will + providing this argument, the latest snapshot will be used. Be aware that restoring an OpenSearch snapshot will + replace the current state of the OpenSearch indices with the data that was backed up in the snapshot, it will replace the entire index, including any data that was added or modified since the snapshot was taken. - Elasticsearch will automatically close the index before restoring the data and then reopen it when the restore is complete. + OpenSearch will automatically close the index before restoring the data and then reopen it when the restore is complete. This means that the index will be unavailable while the restore is in progress. -## [elk_fill.py](https://github.com/YangCatalog/backend/blob/master/recovery/elk_fill.py) +## [opensearch_fill.py](https://github.com/YangCatalog/backend/blob/master/recovery/opensearch_fill.py) This script can be called via the Admin page, and it's used for creating a dictionary of all the modules which are currently stored in the Redis database. Dictionary contains key: value pairs in following format: @@ -50,8 +50,8 @@ which are currently stored in the Redis database. Dictionary contains key: value "@/": "/var/yang/all_modules/@.yang" } ``` -The entire dictionary is then stored in a JSON file: `elasticsearch_data.json` in the `temp` directory. -Content of this JSON file can then be used as an input for indexing modules into Elasticsearch. +The entire dictionary is then stored in a JSON file: `opense_data.json` in the `temp` directory. +Content of this JSON file can then be used as an input for indexing modules into OpenSearch. ## [redis_users_recovery.py](https://github.com/YangCatalog/backend/blob/master/recovery/redis_users_recovery.py) diff --git a/recovery/elk_fill.py b/recovery/opensearch_fill.py similarity index 95% rename from recovery/elk_fill.py rename to recovery/opensearch_fill.py index 5b0198d0..bbfd6847 100644 --- a/recovery/elk_fill.py +++ b/recovery/opensearch_fill.py @@ -14,7 +14,7 @@ """ This script will create JSON file which is used to -populate Elasticsearch from all the modules saved in Redis database. +populate OpenSearch from all the modules saved in Redis database. """ __author__ = 'Miroslav Kovac' @@ -70,7 +70,7 @@ def main(script_conf: ScriptConfig = DEFAULT_SCRIPT_CONFIG.copy()): value = f'{save_file_dir}/{name}@{revision}.yang' modules_dict[key] = value - output_path = os.path.join(temp, 'elasticsearch_data.json') + output_path = os.path.join(temp, 'opensearch_data.json') with open(output_path, 'w') as writer: json.dump(modules_dict, writer) diff --git a/recovery/elk_recovery.py b/recovery/opensearch_recovery.py similarity index 76% rename from recovery/elk_recovery.py rename to recovery/opensearch_recovery.py index 347e297d..45f6b142 100644 --- a/recovery/elk_recovery.py +++ b/recovery/opensearch_recovery.py @@ -14,7 +14,7 @@ # limitations under the License. """ -Create or restore backups of our Elasticsearch database. +Create or restore backups of our OpenSearch database. """ __author__ = 'Miroslav Kovac' @@ -26,7 +26,7 @@ import os import sys -from elasticsearchIndexing.es_snapshots_manager import ESSnapshotsManager +from opensearch_indexing.opensearch_snapshots_manager import OpenSearchSnapshotsManager from utility.script_config_dict import script_config_dict from utility.scriptConfig import ScriptConfig from utility.staticVariables import BACKUP_DATE_FORMAT @@ -37,29 +37,29 @@ help=script_config_dict[FILENAME]['help'], args=script_config_dict[FILENAME].get('args'), arglist=None if __name__ == '__main__' else [], - mutually_exclusive_args=script_config_dict[FILENAME]['mutually_exclusive_args'], + mutually_exclusive_args=script_config_dict[FILENAME].get('mutually_exclusive_args'), ) def main(script_conf: ScriptConfig = DEFAULT_SCRIPT_CONFIG.copy()): args = script_conf.args - es_snapshots_manager = ESSnapshotsManager() - es_snapshots_manager.create_snapshot_repository(args.compress) + opensearch_snapshots_manager = OpenSearchSnapshotsManager() + opensearch_snapshots_manager.create_snapshot_repository(args.compress) if args.save: args.file = args.file or datetime.datetime.utcnow().strftime(BACKUP_DATE_FORMAT) - es_snapshots_manager.create_snapshot(args.file) + opensearch_snapshots_manager.create_snapshot(args.file) elif args.load: if args.file: snapshot_name = args.file else: - sorted_snapshots = es_snapshots_manager.get_sorted_snapshots() + sorted_snapshots = opensearch_snapshots_manager.get_sorted_snapshots() if not sorted_snapshots: print('There are no snapshots to restore') sys.exit(1) snapshot_name = sorted_snapshots[-1]['snapshot'] - restore_result = es_snapshots_manager.restore_snapshot(snapshot_name) + restore_result = opensearch_snapshots_manager.restore_snapshot(snapshot_name) print(f'Restore result:\n{restore_result}') diff --git a/requirements.txt b/requirements.txt index 0a094e85..2a1ede37 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,7 +11,7 @@ prometheus-client==0.10.1 Crypto==1.4.1 pycryptodome==3.14.1 python-dateutil==2.8.1 -elasticsearch==7.10.1 +opensearch-py==2.2.0 Flask==2.3.2 Flask-HTTPAuth==4.4.0 flask-cors==3.0.10 diff --git a/sandbox/compare_databases.py b/sandbox/compare_databases.py index cab69204..3d93cbef 100644 --- a/sandbox/compare_databases.py +++ b/sandbox/compare_databases.py @@ -1,9 +1,9 @@ """ PHASE I: This script loops through each key in the Redis database (= each stored module) -and checks whether the necessary information about this module is also in the Elasticsearch database. -If module is missing in Elasticsearch database, check if it is stored in all_modules folder, -and dump it in format which can be directly used to insert to Elasticsearch. +and checks whether the necessary information about this module is also in the OpenSearch database. +If module is missing in OpenSearch database, check if it is stored in all_modules folder, +and dump it in format which can be directly used to insert to OpenSearch. PHASE II: Search and scroll all the documents in the 'modules' index and check whether this modules is also stored in Redis database @@ -12,12 +12,12 @@ import json import os -from elasticsearch.exceptions import RequestError +from opensearchpy.exceptions import RequestError from redis import Redis import utility.log as log -from elasticsearchIndexing.es_manager import ESManager -from elasticsearchIndexing.models.es_indices import ESIndices +from opensearch_indexing.models.opensearch_indices import OpenSearchIndices +from opensearch_indexing.opensearch_manager import OpenSearchManager from utility.create_config import create_config @@ -48,19 +48,19 @@ def main(): global LOGGER LOGGER = log.get_logger('sandbox', '{}/sandbox.log'.format(log_directory)) - # Create Redis and Elasticsearch connections + # Create Redis and OpenSearch connections redis = Redis(host=redis_host, port=redis_port, db=1) - es_manager = ESManager() + opensearch_manager = OpenSearchManager() # Set up variables and counters - es_missing_modules = [] + opensearch_missing_modules = [] redis_missing_modules = [] incorrect_format_modules = [] modules_to_index_dict = {} modules_to_index_list = [] redis_modules = 0 - # PHASE I: Check modules from Redis in Elasticsearch + # PHASE I: Check modules from Redis in OpenSearch LOGGER.info('Starting PHASE I') for redis_key in redis.scan_iter(): try: @@ -72,11 +72,11 @@ def main(): except ValueError: continue try: - in_es = es_manager.document_exists(ESIndices.AUTOCOMPLETE, module) + in_es = opensearch_manager.document_exists(OpenSearchIndices.AUTOCOMPLETE, module) if in_es: continue - es_missing_modules.append(key) + opensearch_missing_modules.append(key) module_raw = redis.get(redis_key) module = json.loads(module_raw or '{}') # Check if this file is in /var/yang/all_modules folder @@ -95,25 +95,25 @@ def main(): except Exception: continue - # PHASE II: Check modules from Elasticsearch in Redis + # PHASE II: Check modules from OpenSearch in Redis LOGGER.info('Starting PHASE II') - all_es_modules = es_manager.match_all(ESIndices.AUTOCOMPLETE) - result = check_module_in_redis(all_es_modules, redis) + all_opensearch_modules = opensearch_manager.match_all(OpenSearchIndices.AUTOCOMPLETE) + result = check_module_in_redis(all_opensearch_modules, redis) redis_missing_modules.extend(result) # Log results LOGGER.info('REDIS') LOGGER.info('Number of modules in Redis: {}'.format(redis_modules)) LOGGER.info('Number of modules with incorrect format {}'.format(len(incorrect_format_modules))) - LOGGER.info('Number of Redis modules missing in ES: {}'.format(len(es_missing_modules))) + LOGGER.info('Number of Redis modules missing in OpenSearch: {}'.format(len(opensearch_missing_modules))) LOGGER.info('Number of missing modules which can be immediately indexed: {}'.format(len(modules_to_index_dict))) - LOGGER.info('ELASTICSEARCH') - LOGGER.info('Number of modules in Elasticsearch: {}'.format(len(all_es_modules))) - LOGGER.info('Number of ES modules missing in Redis: {}'.format(len(redis_missing_modules))) + LOGGER.info('OPENSEARCH') + LOGGER.info('Number of modules in OpenSearch: {}'.format(len(all_opensearch_modules))) + LOGGER.info('Number of OpenSearch modules missing in Redis: {}'.format(len(redis_missing_modules))) result = { - 'es_missing_modules_list': es_missing_modules, + 'opensearch_missing_modules_list': opensearch_missing_modules, 'redis_missing_modules_list': redis_missing_modules, 'incorrect_format_modules_list': incorrect_format_modules, 'modules_to_index': modules_to_index_dict, diff --git a/sandbox/reindex.py b/sandbox/reindex.py index e06980e8..232cb129 100644 --- a/sandbox/reindex.py +++ b/sandbox/reindex.py @@ -1,10 +1,10 @@ -"""Reindex an Elasticsearch index.""" +"""Reindex an OpenSearch index.""" import argparse import time import utility.log as log -from elasticsearchIndexing.es_manager import ESManager +from opensearch_indexing.opensearch_manager import OpenSearchManager from utility.create_config import create_config @@ -16,13 +16,13 @@ def main(): config = create_config() log_directory = config.get('Directory-Section', 'logs', fallback='/var/yang/logs') logger = log.get_logger('reindex', '{}/sandbox.log'.format(log_directory)) - es = ESManager().es - task_id = es.reindex( + opensearch = OpenSearchManager().opensearch + task_id = opensearch.reindex( body={'source': {'index': args.source}, 'dest': {'index': args.dest}}, wait_for_completion=False, )['task'] while True: - task_info = es.tasks.get(task_id=task_id) + task_info = opensearch.tasks.get(task_id=task_id) logger.info(f'{task_info["task"]["status"]["updated"]} out of {task_info["task"]["status"]["total"]}') if task_info['completed']: break diff --git a/sandbox/swap_es_aliases.py b/sandbox/swap_opensearch_aliases.py similarity index 73% rename from sandbox/swap_es_aliases.py rename to sandbox/swap_opensearch_aliases.py index 3923857a..7d4c1b23 100644 --- a/sandbox/swap_es_aliases.py +++ b/sandbox/swap_opensearch_aliases.py @@ -1,8 +1,8 @@ -"""Swap the underlying indices of an Elasticsearch alias""" +"""Swap the underlying indices of an OpenSearch alias""" import argparse -from elasticsearchIndexing.es_manager import ESManager +from opensearch_indexing.opensearch_manager import OpenSearchManager def main(): @@ -12,8 +12,8 @@ def main(): parser.add_argument('add', type=str, help='New index to add to the alias') parser.add_argument('--delete', action='store_true', help='Delete the old index after removal') args = parser.parse_args() - es = ESManager().es - es.indices.update_aliases( + opensearch = OpenSearchManager().opensearch + opensearch.indices.update_aliases( body={ 'actions': [ {'remove': {'index': '*', 'alias': args.alias}}, @@ -22,7 +22,7 @@ def main(): }, ) if args.delete: - es.indices.delete(args.remove) + opensearch.indices.delete(args.remove) if __name__ == '__main__': diff --git a/sandbox/update_es_index_mapping.py b/sandbox/update_opensearch_index_mapping.py similarity index 81% rename from sandbox/update_es_index_mapping.py rename to sandbox/update_opensearch_index_mapping.py index ab12bfdb..37f67d67 100644 --- a/sandbox/update_es_index_mapping.py +++ b/sandbox/update_opensearch_index_mapping.py @@ -1,10 +1,10 @@ -"""Update Elasticsearch index's mapping""" +"""Update OpenSearch index's mapping""" import argparse import json import os -from elasticsearchIndexing.es_manager import ESManager +from opensearch_indexing.opensearch_manager import OpenSearchManager from utility import log from utility.create_config import create_config @@ -39,12 +39,13 @@ def main(): ), ) args = parser.parse_args() - es = ESManager().es + opensearch = OpenSearchManager().opensearch with open(args.new_mapping_path, 'r') as new_mapping_file: new_mapping_body = json.load(new_mapping_file) - es.indices.put_mapping(index=args.index, body=new_mapping_body) + opensearch.indices.put_mapping(index=args.index, body=new_mapping_body) logger.info( - f'{args.index} index is updated and now has the following mapping:\n{es.indices.get_mapping(index=args.index)}', + f'{args.index} index is updated and now has the following mapping:\n' + f'{opensearch.indices.get_mapping(index=args.index)}', ) diff --git a/tests/resources/test.conf b/tests/resources/test.conf index 13c4367f..b70c0b53 100644 --- a/tests/resources/test.conf +++ b/tests/resources/test.conf @@ -1,6 +1,6 @@ [General-Section] notify-index=False -elk-repo-name=yangcatalog_snapshots +opensearch-repo-name=yangcatalog_snapshots uwsgi=True threads=1 is-prod=False @@ -10,7 +10,7 @@ repo-config-email=test_email@example.com [Secrets-Section] flask-secret-key=S3CR3T rabbitmq-password=guest -elk-secret=test +opensearch-secret=test confd-credentials='test test' yang-catalog-token=test admin-token=test @@ -45,9 +45,9 @@ host=127.0.0.1 name-users=yang_catalog name-search=yang user=yang -es-host=localhost -es-port=9200 -es-aws=False +opensearch-host=localhost +opensearch-port=9200 +opensearch-aws=False redis-host=yc-redis redis-port=6379 redis-modules-db=11 diff --git a/tests/test_celery_tasks.py b/tests/test_celery_tasks.py index 47e17a26..88959c5b 100644 --- a/tests/test_celery_tasks.py +++ b/tests/test_celery_tasks.py @@ -32,7 +32,7 @@ from jobs.status_messages import StatusMessage from redisConnections.redisConnection import RedisConnection from utility.create_config import create_config -from utility.elasticsearch_util import ESIndexingPaths +from utility.opensearch_util import ESIndexingPaths class MockModulesComplicatedAlgorithms: diff --git a/tests/test_search.py b/tests/test_search.py index 9501a043..14a334ef 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -3,21 +3,21 @@ import unittest from configparser import ConfigParser -from elasticsearch import Elasticsearch +from opensearchpy import OpenSearch from api.cache.api_cache import cache from api.views.yang_search.grep_search import GrepSearch from api.yangcatalog_api import app # noqa: F401 -from elasticsearchIndexing.es_manager import ESManager -from elasticsearchIndexing.models.es_indices import ESIndices +from opensearch_indexing.models.opensearch_indices import OpenSearchIndices +from opensearch_indexing.opensearch_manager import OpenSearchManager from utility.create_config import create_config class TestGrepSearchClass(unittest.TestCase): resources_path: str - es: Elasticsearch - es_manager: ESManager - es_index: ESIndices + opensearch: OpenSearch + opensearch_manager: OpenSearchManager + opensearch_index: OpenSearchIndices @classmethod def setUpClass(cls): @@ -27,32 +27,36 @@ def setUpClass(cls): @classmethod def _configure_es(cls, config: ConfigParser): - es_host_config = { - 'host': config.get('DB-Section', 'es-host', fallback='localhost'), - 'port': config.get('DB-Section', 'es-port', fallback='9200'), + opensearch_host_config = { + 'host': config.get('DB-Section', 'opensearch-host', fallback='localhost'), + 'port': config.get('DB-Section', 'opensearch-port', fallback='9200'), } - cls.es = Elasticsearch(hosts=[es_host_config]) - cls.es_manager = ESManager(cls.es) - cls.es_index = ESIndices.TEST_SEARCH + cls.opensearch = OpenSearch(hosts=[opensearch_host_config]) + cls.opensearch_manager = OpenSearchManager(cls.opensearch) + cls.opensearch_index = OpenSearchIndices.TEST_SEARCH with open(os.path.join(cls.resources_path, 'test_search/search_test_data.json'), 'r') as reader: es_test_data = json.load(reader) - index_json_name = f'initialize_{cls.es_index.value}_index.json' - index_json_path = os.path.join(os.environ['BACKEND'], 'elasticsearchIndexing', 'json', index_json_name) + index_json_name = f'initialize_{cls.opensearch_index.value}_index.json' + index_json_path = os.path.join(os.environ['BACKEND'], 'opensearch_indexing', 'json', index_json_name) with open(index_json_path, encoding='utf-8') as reader: index_config = json.load(reader) - cls.es.indices.create(index=cls.es_index.value, body=index_config, ignore=400) + cls.opensearch.indices.create(index=cls.opensearch_index.value, body=index_config, ignore=400) all_modules = es_test_data['all_modules'] for module in all_modules: - cls.es.index(index=cls.es_index.value, body=module, refresh='true') + cls.opensearch.index(index=cls.opensearch_index.value, body=module, refresh='true') @classmethod def tearDownClass(cls): - cls.es.indices.delete(index=cls.es_index.value, ignore=[400, 404]) + cls.opensearch.indices.delete(index=cls.opensearch_index.value, ignore=[400, 404]) def setUp(self): with app.app_context(): cache.clear() - self.grep_search = GrepSearch(config=self.config, es_manager=self.es_manager, modules_es_index=self.es_index) + self.grep_search = GrepSearch( + config=self.config, + opensearch_manager=self.opensearch_manager, + modules_es_index=self.opensearch_index, + ) def tearDown(self): with app.app_context(): diff --git a/utility/log.py b/utility/log.py index 66b188ac..69bef026 100644 --- a/utility/log.py +++ b/utility/log.py @@ -41,7 +41,7 @@ def get_logger(name: str, file_name_path: str = 'yang.log', level: int = logging handler = logging.FileHandler(file_name_path) handler.setFormatter(logging.Formatter(format, datefmt)) logger = logging.getLogger(name) - logging.getLogger('elasticsearch').setLevel(logging.ERROR) + logging.getLogger('opensearch').setLevel(logging.ERROR) logger.setLevel(level) if len(logger.handlers) == 0: logger.addHandler(handler) diff --git a/utility/elasticsearch_util.py b/utility/opensearch_util.py similarity index 94% rename from utility/elasticsearch_util.py rename to utility/opensearch_util.py index 6bebd40a..664f0dc2 100644 --- a/utility/elasticsearch_util.py +++ b/utility/opensearch_util.py @@ -7,8 +7,8 @@ import requests -from elasticsearchIndexing.es_manager import ESManager -from elasticsearchIndexing.models.es_indices import ESIndices +from opensearch_indexing.models.opensearch_indices import OpenSearchIndices +from opensearch_indexing.opensearch_manager import OpenSearchManager from redisConnections.redisConnection import RedisConnection from utility import message_factory from utility.staticVariables import json_headers @@ -33,7 +33,7 @@ class ESIndexingPaths: def send_for_es_indexing(body_to_send: ESIndexingBody, logger: logging.Logger, paths: ESIndexingPaths): """ - Creates a json file that will be used for Elasticsearch indexing. + Creates a json file that will be used for OpenSearch indexing. Arguments: :param body_to_send: (dict) body that needs to be indexed @@ -151,7 +151,7 @@ def prepare_for_es_indexing( :param force_indexing (bool) Whether we should force indexing even if module exists in cache. """ mf = message_factory.MessageFactory() - es_manager = ESManager() + opensearch_manager = OpenSearchManager() with open(modules_to_index, 'r') as reader: sdos_json = json.load(reader) logger.debug(f'{len(sdos_json.get("module", []))} modules loaded from prepare.json') @@ -162,14 +162,14 @@ def prepare_for_es_indexing( response = requests.get(url, headers=json_headers) code = response.status_code - in_es = False + in_opensearch = False in_redis = code in [200, 201, 204] if in_redis: - in_es = es_manager.document_exists(ESIndices.AUTOCOMPLETE, module) + in_opensearch = opensearch_manager.document_exists(OpenSearchIndices.AUTOCOMPLETE, module) else: load_new_files_to_github = True - if force_indexing or not in_es or not in_redis: + if force_indexing or not in_opensearch or not in_redis: path = f'{save_file_dir}/{module.get("name")}@{module.get("revision")}.yang' key = f'{module["name"]}@{module["revision"]}/{module["organization"]}' post_body[key] = path diff --git a/utility/remove_unused.py b/utility/remove_unused.py index 7bbd81b0..e7fe9071 100644 --- a/utility/remove_unused.py +++ b/utility/remove_unused.py @@ -32,7 +32,7 @@ from datetime import datetime as dt import utility.log as log -from elasticsearchIndexing.es_snapshots_manager import ESSnapshotsManager +from opensearch_indexing.opensearch_snapshots_manager import OpenSearchSnapshotsManager from utility.create_config import create_config from utility.staticVariables import BACKUP_DATE_FORMAT from utility.util import get_list_of_backups, job_log @@ -64,7 +64,7 @@ def main(): log_directory = config.get('Directory-Section', 'logs') temp_dir = config.get('Directory-Section', 'temp') cache_directory = config.get('Directory-Section', 'cache') - es_aws = config.get('DB-Section', 'es-aws') + opensearch_aws = config.get('DB-Section', 'opensearch-aws') log_file_path = os.path.join(log_directory, 'jobs', 'removeUnused.log') logger = log.get_logger('remove_unused', log_file_path) @@ -117,14 +117,14 @@ def main(): except PermissionError: logger.exception(f'Problem while deleting {dir}') - if es_aws != 'True': - logger.info('Removing old elasticsearch snapshots') - es_snapshots_manager = ESSnapshotsManager() - es_snapshots_manager.create_snapshot_repository(args.compress) - sorted_snapshots = es_snapshots_manager.get_sorted_snapshots() + if opensearch_aws != 'True': + logger.info('Removing old opensea snapshots') + opensearch_snapshots_manager = OpenSearchSnapshotsManager() + opensearch_snapshots_manager.create_snapshot_repository(args.compress) + sorted_snapshots = opensearch_snapshots_manager.get_sorted_snapshots() for snapshot in sorted_snapshots[:-5]: - es_snapshots_manager.delete_snapshot(snapshot['snapshot']) + opensearch_snapshots_manager.delete_snapshot(snapshot['snapshot']) logger.info('Removing old cache json files') remove_old_backups(os.path.join(cache_directory, 'confd')) diff --git a/utility/script_config_dict.py b/utility/script_config_dict.py index 9bbe8e93..ea0fb5b2 100644 --- a/utility/script_config_dict.py +++ b/utility/script_config_dict.py @@ -42,7 +42,7 @@ class ScriptConfigInfo(BaseScriptConfigInfo, total=False): script_config_dict: dict[str, ScriptConfigInfo] = { 'process_changed_mods': { - 'help': 'Process added/changed/deleted modules, and reflect changes in Elasticsearch.', + 'help': 'Process added/changed/deleted modules, and reflect changes in OpenSearch.', 'args': [ { 'flag': '--config-path', @@ -53,7 +53,7 @@ class ScriptConfigInfo(BaseScriptConfigInfo, total=False): ], }, 'process-drafts': { - 'help': 'Add new drafts to the DRAFTS Elasticsearch index.', + 'help': 'Add new drafts to the DRAFTS OpenSearch index.', 'args': [ { 'flag': '--config-path', @@ -277,12 +277,12 @@ class ScriptConfigInfo(BaseScriptConfigInfo, total=False): ' to confd directory' ), }, - 'elk_fill': { + 'opensearch_fill': { 'help': ( 'This script creates a dictionary of all the modules currently stored in the Redis database. ' 'The key is in @/ format and the value is the path to the .yang file. ' 'The entire dictionary is then stored in a JSON file - ' - 'the content of this JSON file can then be used as an input for indexing modules into Elasticsearch.' + 'the content of this JSON file can then be used as an input for indexing modules into OpenSearch.' ), 'args': [ { @@ -293,8 +293,8 @@ class ScriptConfigInfo(BaseScriptConfigInfo, total=False): }, ], }, - 'elk_recovery': { - 'help': ' Create or restore backups of our Elasticsearch database. ', + 'opensearch_recovery': { + 'help': ' Create or restore backups of our OpenSearch database. ', 'mutually_exclusive_args': [ [ {