Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Backport 6982 to 1.4.latest #7074

Merged
merged 3 commits into from
Apr 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changes/unreleased/Fixes-20230215-104536.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Fixes
body: Make use of hashlib.md5() FIPS compliant
time: 2023-02-15T10:45:36.755797+01:00
custom:
Author: nielspardon
Issue: "6900"
5 changes: 2 additions & 3 deletions core/dbt/config/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
)
from typing_extensions import Protocol, runtime_checkable

import hashlib
import os

from dbt import flags, deprecations
Expand All @@ -30,7 +29,7 @@
from dbt.helper_types import NoValue
from dbt.semver import VersionSpecifier, versions_compatible
from dbt.version import get_installed_version
from dbt.utils import MultiDict
from dbt.utils import MultiDict, md5
from dbt.node_types import NodeType
from dbt.config.selectors import SelectorDict
from dbt.contracts.project import (
Expand Down Expand Up @@ -663,7 +662,7 @@ def from_project_root(
return partial.render(renderer)

def hashed_name(self):
return hashlib.md5(self.project_name.encode("utf-8")).hexdigest()
return md5(self.project_name)

def get_selector(self, name: str) -> Union[SelectionSpec, bool]:
if name not in self.selectors:
Expand Down
5 changes: 2 additions & 3 deletions core/dbt/contracts/connection.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import abc
import itertools
import hashlib
from dataclasses import dataclass, field
from typing import (
Any,
Expand All @@ -13,7 +12,7 @@
Callable,
)
from dbt.exceptions import DbtInternalError
from dbt.utils import translate_aliases
from dbt.utils import translate_aliases, md5
from dbt.events.functions import fire_event
from dbt.events.types import NewConnectionOpening
from dbt.events.contextvars import get_node_info
Expand Down Expand Up @@ -142,7 +141,7 @@ def unique_field(self) -> str:
raise NotImplementedError("unique_field not implemented for base credentials class")

def hashed_unique_field(self) -> str:
return hashlib.md5(self.unique_field.encode("utf-8")).hexdigest()
return md5(self.unique_field)

def connection_info(self, *, with_aliases: bool = False) -> Iterable[Tuple[str, Any]]:
"""Return an ordered iterator of key/value pairs for pretty-printing."""
Expand Down
4 changes: 2 additions & 2 deletions core/dbt/deps/git.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import os
import hashlib
from typing import List, Optional

from dbt.clients import git, system
Expand All @@ -12,10 +11,11 @@
from dbt.exceptions import ExecutableError, MultipleVersionGitDepsError
from dbt.events.functions import fire_event, warn_or_error
from dbt.events.types import EnsureGitInstalled, DepsUnpinned
from dbt.utils import md5


def md5sum(s: str):
return hashlib.md5(s.encode("latin-1")).hexdigest()
return md5(s, "latin-1")


class GitPackageMixin:
Expand Down
4 changes: 2 additions & 2 deletions core/dbt/parser/generic_test_builders.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import hashlib
import re
from copy import deepcopy
from dataclasses import dataclass
Expand Down Expand Up @@ -35,6 +34,7 @@
UndefinedMacroError,
)
from dbt.parser.search import FileBlock
from dbt.utils import md5


def synthesize_generic_test_names(
Expand Down Expand Up @@ -72,7 +72,7 @@ def synthesize_generic_test_names(

if len(full_name) >= 64:
test_trunc_identifier = test_identifier[:30]
label = hashlib.md5(full_name.encode("utf-8")).hexdigest()
label = md5(full_name)
short_name = "{}_{}".format(test_trunc_identifier, label)
else:
short_name = full_name
Expand Down
7 changes: 3 additions & 4 deletions core/dbt/parser/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import pathlib

from abc import ABCMeta, abstractmethod
from hashlib import md5
from typing import Iterable, Dict, Any, Union, List, Optional, Generic, TypeVar, Type

from dbt.dataclass_schema import ValidationError, dbtClassMixin
Expand Down Expand Up @@ -80,7 +79,7 @@
TestBlock,
Testable,
)
from dbt.utils import get_pseudo_test_path, coerce_dict_str
from dbt.utils import get_pseudo_test_path, coerce_dict_str, md5


TestDef = Union[str, Dict[str, Any]]
Expand Down Expand Up @@ -222,8 +221,8 @@ def get_hashable_md(data: Union[str, int, float, List, Dict]) -> Union[str, List
return str(data)

hashable_metadata = repr(get_hashable_md(test_metadata))
hash_string = "".join([name, hashable_metadata]).encode("utf-8")
test_hash = md5(hash_string).hexdigest()[-HASH_LENGTH:]
hash_string = "".join([name, hashable_metadata])
test_hash = md5(hash_string)[-HASH_LENGTH:]

dct = {
"alias": name,
Expand Down
12 changes: 8 additions & 4 deletions core/dbt/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import json
import os
import requests
import sys
from tarfile import ReadError
import time
from pathlib import PosixPath, WindowsPath
Expand Down Expand Up @@ -252,16 +253,19 @@ def get_pseudo_hook_path(hook_name):
return os.path.join(*path_parts)


def md5(string):
return hashlib.md5(string.encode("utf-8")).hexdigest()
def md5(string, charset="utf-8"):
if sys.version_info >= (3, 9):
return hashlib.md5(string.encode(charset), usedforsecurity=False).hexdigest()
else:
return hashlib.md5(string.encode(charset)).hexdigest()


def get_hash(model):
return hashlib.md5(model.unique_id.encode("utf-8")).hexdigest()
return md5(model.unique_id)


def get_hashed_contents(model):
return hashlib.md5(model.raw_code.encode("utf-8")).hexdigest()
return md5(model.raw_code)


def flatten_nodes(dep_list):
Expand Down