Skip to content

Commit

Permalink
Rename old push_to_hub configs to "default" in dataset_infos (hugging…
Browse files Browse the repository at this point in the history
…face#6218)

* rename old push_to_hub configs to "default" in dataset_infos

* always rename if there is one single config in json
  • Loading branch information
lhoestq authored Sep 6, 2023
1 parent 4de930c commit b3ac3b3
Showing 1 changed file with 10 additions and 0 deletions.
10 changes: 10 additions & 0 deletions src/datasets/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -903,6 +903,7 @@ def get_module(self) -> DatasetModule:
if self.data_files is not None or not metadata_configs:
builder_kwargs["data_files"] = data_files
builder_kwargs.update(default_builder_kwargs) # from _EXTENSION_TO_MODULE
# this file is deprecated and was created automatically in old versions of push_to_hub
if os.path.isfile(os.path.join(self.path, config.DATASETDICT_INFOS_FILENAME)):
with open(os.path.join(self.path, config.DATASETDICT_INFOS_FILENAME), encoding="utf-8") as f:
legacy_dataset_infos = DatasetInfosDict(
Expand All @@ -911,6 +912,10 @@ def get_module(self) -> DatasetModule:
for config_name, dataset_info_dict in json.load(f).items()
}
)
if len(legacy_dataset_infos) == 1:
# old config e.g. named "username--dataset_name"
legacy_config_name = next(iter(legacy_dataset_infos))
legacy_dataset_infos["default"] = legacy_dataset_infos.pop(legacy_config_name)
legacy_dataset_infos.update(dataset_infos)
dataset_infos = legacy_dataset_infos
if default_config_name is None and len(dataset_infos) == 1:
Expand Down Expand Up @@ -1096,6 +1101,7 @@ def get_module(self) -> DatasetModule:
if download_config.download_desc is None:
download_config.download_desc = "Downloading metadata"
try:
# this file is deprecated and was created automatically in old versions of push_to_hub
dataset_infos_path = cached_path(
hf_hub_url(self.name, config.DATASETDICT_INFOS_FILENAME, revision=self.revision),
download_config=download_config,
Expand All @@ -1107,6 +1113,10 @@ def get_module(self) -> DatasetModule:
for config_name, dataset_info_dict in json.load(f).items()
}
)
if len(legacy_dataset_infos) == 1:
# old config e.g. named "username--dataset_name"
legacy_config_name = next(iter(legacy_dataset_infos))
legacy_dataset_infos["default"] = legacy_dataset_infos.pop(legacy_config_name)
legacy_dataset_infos.update(dataset_infos)
dataset_infos = legacy_dataset_infos
except FileNotFoundError:
Expand Down

0 comments on commit b3ac3b3

Please sign in to comment.