Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#339 : Remove separate pandas installations in nox tests #368

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .ci/run-repository.sh
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ if [[ "$TASK_TYPE" == "test" ]]; then
--env "TEST_TYPE=server" \
--name opensearch-py-ml-test-runner \
opensearch-project/opensearch-py-ml \
nox -s "test-${PYTHON_VERSION}(pandas_version='${PANDAS_VERSION}')"
nox -s "test-${PYTHON_VERSION}"

docker cp opensearch-py-ml-test-runner:/code/opensearch-py-ml/junit/ ./junit/
docker rm opensearch-py-ml-test-runner
Expand All @@ -61,7 +61,7 @@ elif [[ "$TASK_TYPE" == "doc" ]]; then
--env "TEST_TYPE=server" \
--name opensearch-py-ml-doc-runner \
opensearch-project/opensearch-py-ml \
nox -s "docs-${PYTHON_VERSION}(pandas_version='${PANDAS_VERSION}')"
nox -s "docs-${PYTHON_VERSION}"

docker cp opensearch-py-ml-doc-runner:/code/opensearch-py-ml/docs/build/ ./docs/
docker rm opensearch-py-ml-doc-runner
Expand All @@ -84,7 +84,7 @@ elif [[ "$TASK_TYPE" == "trace" ]]; then
--env "TEST_TYPE=server" \
--name opensearch-py-ml-trace-runner \
opensearch-project/opensearch-py-ml \
nox -s "trace-${PYTHON_VERSION}(pandas_version='${PANDAS_VERSION}')" -- ${MODEL_ID} ${MODEL_VERSION} ${TRACING_FORMAT} -ed ${EMBEDDING_DIMENSION} -pm ${POOLING_MODE} -md ${MODEL_DESCRIPTION:+"$MODEL_DESCRIPTION"}
nox -s "trace-${PYTHON_VERSION}" -- ${MODEL_ID} ${MODEL_VERSION} ${TRACING_FORMAT} -ed ${EMBEDDING_DIMENSION} -pm ${POOLING_MODE} -md ${MODEL_DESCRIPTION:+"$MODEL_DESCRIPTION"}

docker cp opensearch-py-ml-trace-runner:/code/opensearch-py-ml/upload/ ./upload/
docker cp opensearch-py-ml-trace-runner:/code/opensearch-py-ml/trace_output/ ./trace_output/
Expand Down
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,9 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
- Enable the model upload workflow to add model_content_size_in_bytes & model_content_hash_value to model config automatically @thanawan-atc ([#291](https://github.com/opensearch-project/opensearch-py-ml/pull/291))
- Update pretrained_models_all_versions.json (2023-10-18 18:11:34) by @dhrubo-os ([#322](https://github.com/opensearch-project/opensearch-py-ml/pull/322))
- Update model upload history - sentence-transformers/paraphrase-mpnet-base-v2 (v.1.0.0)(BOTH) by @dhrubo-os ([#321](https://github.com/opensearch-project/opensearch-py-ml/pull/321))
- Replaced usage of `is_datetime_or_timedelta_dtype` with `is_timedelta64_dtype` and `is_datetime64_any_dtype`([#316](https://github.com/opensearch-project/opensearch-py-ml/pull/316))
- Replaced usage of `is_datetime_or_timedelta_dtype` with `is_timedelta64_dtype` and `is_datetime64_any_dtype` by @rawwar ([#316](https://github.com/opensearch-project/opensearch-py-ml/pull/316))
- use try-except-else block for handling unexpected exceptions during integration tests by @rawwar([#370](https://github.com/opensearch-project/opensearch-py-ml/pull/370))
- Removed pandas version pin in nox tests by @rawwar ([#368](https://github.com/opensearch-project/opensearch-py-ml/pull/368))

### Fixed
- Enable make_model_config_json to add model description to model config file by @thanawan-atc in ([#203](https://github.com/opensearch-project/opensearch-py-ml/pull/203))
Expand Down
1 change: 0 additions & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import os
import sys


sys.path.insert(0, os.path.abspath("../../"))

# -- Project information -----------------------------------------------------
Expand Down
12 changes: 3 additions & 9 deletions noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,16 +103,14 @@ def lint(session):


@nox.session(python=["3.8", "3.9", "3.10"])
@nox.parametrize("pandas_version", ["1.5.0"])
def test(session, pandas_version: str):
def test(session):
session.install(
"-r",
"requirements-dev.txt",
"--timeout",
"1500",
)
session.install(".")
session.run("python", "-m", "pip", "install", f"pandas~={pandas_version}")
session.run("python", "-m", "setup_tests")

junit_xml = join(abspath(dirname(__file__)), "junit", "opensearch-py-ml-junit.xml")
Expand Down Expand Up @@ -140,12 +138,10 @@ def test(session, pandas_version: str):


@nox.session(python=["3.9"])
@nox.parametrize("pandas_version", ["1.5.0"])
def docs(session, pandas_version: str):
def docs(session):
# Run this so users get an error if they don't have Pandoc installed.
session.install("-r", "docs/requirements-docs.txt")
session.install(".")
session.run("python", "-m", "pip", "install", f"pandas~={pandas_version}")

session.cd("docs")
session.run("make", "clean", external=True)
Expand All @@ -156,16 +152,14 @@ def docs(session, pandas_version: str):
# to automate the action workflow, leveraging its ability to set up the environment
# required for model autotracing.
@nox.session(python=["3.9"])
@nox.parametrize("pandas_version", ["1.5.0"])
def trace(session, pandas_version: str):
def trace(session):
session.install(
"-r",
"requirements-dev.txt",
"--timeout",
"1500",
)
session.install(".")
session.run("python", "-m", "pip", "install", f"pandas~={pandas_version}")

session.run(
"python",
Expand Down
6 changes: 4 additions & 2 deletions opensearch_py_ml/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -978,8 +978,10 @@ def _sizeof_fmt(num: float, size_qualifier: str) -> str:
elif verbose is False: # specifically set to False, not nesc None
_non_verbose_repr()
else:
_non_verbose_repr() if exceeds_info_cols else _verbose_repr(
number_of_columns
(
_non_verbose_repr()
if exceeds_info_cols
else _verbose_repr(number_of_columns)
)

# pandas 0.25.1 uses get_dtype_counts() here. This
Expand Down
6 changes: 3 additions & 3 deletions opensearch_py_ml/field_mappings.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,9 +445,9 @@ def find_aggregatable(row, df):
try:
series = df.loc[df.os_field_name == os_field_name_keyword]
if not series.empty and series.is_aggregatable.squeeze():
row_as_dict[
"aggregatable_os_field_name"
] = os_field_name_keyword
row_as_dict["aggregatable_os_field_name"] = (
os_field_name_keyword
)
else:
row_as_dict["aggregatable_os_field_name"] = None
except KeyError:
Expand Down
6 changes: 3 additions & 3 deletions opensearch_py_ml/ml_commons/model_uploader.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,9 @@ def _register_model(
model_meta_json[TOTAL_CHUNKS_FIELD] = total_num_chunks

if MODEL_CONTENT_SIZE_IN_BYTES_FIELD not in model_meta_json:
model_meta_json[
MODEL_CONTENT_SIZE_IN_BYTES_FIELD
] = model_content_size_in_bytes
model_meta_json[MODEL_CONTENT_SIZE_IN_BYTES_FIELD] = (
model_content_size_in_bytes
)
if MODEL_CONTENT_HASH_VALUE not in model_meta_json:
# Generate the sha1 hash for the model zip file
hash_val_model_file = _generate_model_content_hash_value(model_path)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,9 @@ def merge_events(
send = ends[ix]
sevents = candidates[ix, :]

merged: List[
Dict[str, torch.Tensor]
] = [] # merge in linear pass over time dimension
merged: List[Dict[str, torch.Tensor]] = (
[]
) # merge in linear pass over time dimension
currstart = torch.tensor([-1])
currend = torch.tensor([-1])
currevent = torch.ones(T) * -1.0
Expand Down
6 changes: 3 additions & 3 deletions opensearch_py_ml/ml_models/sentencetransformermodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -1304,9 +1304,9 @@ def make_model_config_json(
model_config_content["model_content_size_in_bytes"] = os.stat(
model_zip_file_path
).st_size
model_config_content[
"model_content_hash_value"
] = _generate_model_content_hash_value(model_zip_file_path)
model_config_content["model_content_hash_value"] = (
_generate_model_content_hash_value(model_zip_file_path)
)

if verbose:
print("generating ml-commons_model_config.json file...\n")
Expand Down
8 changes: 5 additions & 3 deletions opensearch_py_ml/operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -1159,9 +1159,11 @@ def _map_pd_aggs_to_os_aggs(
# piggy-back on that single aggregation.
if extended_stats_calls >= 2:
os_aggs = [
("extended_stats", os_agg)
if os_agg in extended_stats_os_aggs
else os_agg
(
("extended_stats", os_agg)
if os_agg in extended_stats_os_aggs
else os_agg
)
for os_agg in os_aggs
]

Expand Down
8 changes: 5 additions & 3 deletions tests/dataframe/test_iterrows_itertuples_pytest.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,11 @@ def assert_tuples_almost_equal(left, right):
# Shim which uses pytest.approx() for floating point values inside tuples.
assert len(left) == len(right)
assert all(
(lt == rt) # Not floats? Use ==
if not isinstance(lt, float) and not isinstance(rt, float)
else (lt == pytest.approx(rt)) # If both are floats use pytest.approx()
(
(lt == rt) # Not floats? Use ==
if not isinstance(lt, float) and not isinstance(rt, float)
else (lt == pytest.approx(rt))
) # If both are floats use pytest.approx()
for lt, rt in zip(left, right)
)

Expand Down
6 changes: 3 additions & 3 deletions utils/model_uploader/update_models_upload_history_md.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,9 @@ def create_model_json_obj(
"Model ID": model_id,
"Model Version": model_version,
"Model Format": model_format,
"Embedding Dimension": str(embedding_dimension)
if embedding_dimension is not None
else "N/A",
"Embedding Dimension": (
str(embedding_dimension) if embedding_dimension is not None else "N/A"
),
"Pooling Mode": pooling_mode if pooling_mode is not None else "N/A",
"Workflow Run ID": workflow_id if workflow_id is not None else "-",
}
Expand Down
Loading