From 57126b177a2ac86ea56faa1e20a0cc82c6e8459f Mon Sep 17 00:00:00 2001 From: Thomas Date: Mon, 27 Nov 2023 13:01:46 +0800 Subject: [PATCH] Remove Data Qualifiers (#183) * feat: remove data qualifiers * feat: more qualifier removals --- README.md | 13 ----- demo_resources/demo_dataset.yml | 10 ---- demo_resources/demo_policy.yml | 1 - demo_resources/demo_system.yml | 2 - mkdocs/docs/explorer.md | 4 -- mkdocs/docs/index.md | 17 +------ mkdocs/docs/js/vis.js | 23 +-------- mkdocs/docs/js/vis2-absolute.js | 23 +-------- mkdocs/docs/js/vis2.js | 23 +-------- mkdocs/docs/resources/dataset.md | 6 --- mkdocs/docs/resources/policy.md | 8 +--- mkdocs/docs/resources/system.md | 6 +-- mkdocs/mkdocs.yml | 3 +- scripts/export_default_taxonomy.py | 1 - src/fideslang/__init__.py | 2 - src/fideslang/default_taxonomy/__init__.py | 2 - .../default_taxonomy/data_qualifiers.py | 45 ------------------ src/fideslang/default_taxonomy/utils.py | 4 +- src/fideslang/models.py | 47 +------------------ tests/conftest.py | 10 ---- .../failing_dataset_collection_taxonomy.yml | 4 -- tests/data/failing_dataset_field_taxonomy.yml | 4 -- tests/data/failing_dataset_taxonomy.yml | 4 -- tests/data/failing_declaration_taxonomy.yml | 2 - tests/data/failing_nested_dataset.yml | 2 - tests/data/passing_declaration_taxonomy.yml | 2 - tests/fideslang/test_default_taxonomy.py | 7 ++- tests/fideslang/test_models.py | 31 ------------ tests/fideslang/test_relationships.py | 12 ----- tests/fideslang/test_validation.py | 7 +-- 30 files changed, 18 insertions(+), 307 deletions(-) delete mode 100644 src/fideslang/default_taxonomy/data_qualifiers.py diff --git a/README.md b/README.md index d2d4729f..b8462107 100644 --- a/README.md +++ b/README.md @@ -48,19 +48,6 @@ Examples of a Data Subject are: Learn more about [Data Subject Categories in the taxonomy reference now](https://ethyca.github.io/fideslang/taxonomy/data_subjects/). -### 4. Data Identification Qualifiers - -Data Identification Qualifiers describe the degree of identification of the given data. -Think of this as a spectrum: on one end is completely anonymous data, i.e. it is impossible to identify an individual from it; on the other end is data that specifically identifies an individual. - -Along this spectrum are labels that describe the degree of identification that a given data might provide, such as: - -- `identified_data` -- `anonymized_data` -- `aggregated_data` - -Learn more about [Data Identification Qualifiers in the taxonomy reference now](https://ethyca.github.io/fideslang/taxonomy/data_qualifiers/). - ### Extensibility & Interoperability The taxonomy is designed to support common privacy compliance regulations and standards out of the box, these include GDPR, CCPA, LGPD and ISO 19944. diff --git a/demo_resources/demo_dataset.yml b/demo_resources/demo_dataset.yml index 29867d45..97be8205 100644 --- a/demo_resources/demo_dataset.yml +++ b/demo_resources/demo_dataset.yml @@ -5,8 +5,6 @@ dataset: description: Data collected about users for our analytics system. meta: null data_categories: [] - data_qualifiers: - - aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified retention: "30 days after account deletion" third_country_transfers: - GBR @@ -15,37 +13,29 @@ dataset: - name: users description: User information data_categories: [] - data_qualifiers: - - aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified fields: - name: created_at description: User's creation timestamp data_categories: - system.operations - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified - name: email description: User's Email data_categories: - user.contact.email - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified retention: Account termination - name: first_name description: User's first name data_categories: - user.name - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified retention: Account termination - name: food_preference description: User's favorite food data_categories: [] - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified - name: state description: User's State data_categories: - user.contact.state - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified - name: uuid description: User's unique ID data_categories: - user.unique_id - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified diff --git a/demo_resources/demo_policy.yml b/demo_resources/demo_policy.yml index 61ff9c3b..159175bb 100644 --- a/demo_resources/demo_policy.yml +++ b/demo_resources/demo_policy.yml @@ -17,4 +17,3 @@ policy: matches: ANY values: - customer - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified diff --git a/demo_resources/demo_system.yml b/demo_resources/demo_system.yml index 72448167..570765b9 100644 --- a/demo_resources/demo_system.yml +++ b/demo_resources/demo_system.yml @@ -23,7 +23,6 @@ system: data_use: improve.system data_subjects: - customer - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified ingress: - demo_users_dataset @@ -40,4 +39,3 @@ system: data_use: advertising data_subjects: - customer - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified diff --git a/mkdocs/docs/explorer.md b/mkdocs/docs/explorer.md index 61d29ac8..b3f7122a 100644 --- a/mkdocs/docs/explorer.md +++ b/mkdocs/docs/explorer.md @@ -9,7 +9,6 @@ The taxonomy explorer is a useful way to visualize and review the taxonomy for t -
@@ -35,6 +34,3 @@ The taxonomy explorer is a useful way to visualize and review the taxonomy for t
- - - diff --git a/mkdocs/docs/index.md b/mkdocs/docs/index.md index f0b2c567..b6ae41c8 100644 --- a/mkdocs/docs/index.md +++ b/mkdocs/docs/index.md @@ -17,7 +17,6 @@ The Fides taxonomy, or categorization, is made up of four main classification gr -
@@ -59,6 +58,7 @@ Data Uses are also hierarchical with natural inheritance, meaning you can easily Learn more about [Data Uses in the taxonomy reference now](taxonomy/data_uses.md). ### 3. Data Subjects + Data Subjects is a label commonly used in the regulatory world to describe the users of a system who's data is being processed. In many systems a generic user label may be sufficient, however the taxonomy is intended to provide greater control through specificity where needed. Examples of this are: @@ -71,23 +71,10 @@ Examples of this are: Learn more about [Data Subjects in the taxonomy reference now](taxonomy/data_subjects.md). -### 4. Data Qualifiers -Data Qualifiers describe the degree of identification of the given data. Think of this as a spectrum: on one end is completely anonymous data, i.e. it is impossible to identify an individual from it, and on the other end is data that specifically identifies an individual. - -Along this spectrum are labels that describe the degree of identification that a given data might provide, such as: - -- `identified` -- `anonymized` -- `aggregated` - -Learn more about [Data Qualifiers in the taxonomy reference now](taxonomy/data_qualifiers.md). - ### Extensibility and Interoperability + The taxonomy is designed to support common privacy compliance regulations `and standards out of the box, these include GDPR, CCPA, LGPD and ISO 19944. You can extend the taxonomy to support your system needs. If you do this, we recommend extending from the existing class structures to ensure interoperability inside and outside your organization. If you have suggestions for missing classifications or concepts, please submit them for addition. - - - diff --git a/mkdocs/docs/js/vis.js b/mkdocs/docs/js/vis.js index 063d4fb2..014d02fe 100644 --- a/mkdocs/docs/js/vis.js +++ b/mkdocs/docs/js/vis.js @@ -733,8 +733,7 @@ Promise.all([ d3.csv("csv/data_categories.csv"), d3.csv("csv/data_uses.csv"), d3.csv("csv/data_subjects.csv"), - d3.csv("csv/data_qualifiers.csv"), -]).then(([categoriesCSV, usesCSV, subjectsCSV, qualifiersCSV]) => { +]).then(([categoriesCSV, usesCSV, subjectsCSV]) => { const tooltip = new VisTooltip(); const colors = { @@ -820,24 +819,6 @@ Promise.all([ "#f73ffc", "#fb409e", ]), - qualifiers: d3 - .scaleOrdinal() - .domain([ - "data_qualifier", - "aggregated", - "aggregated.anonymized", - "aggregated.anonymized.unlinked_pseudonymized", - "aggregated.anonymized.unlinked_pseudonymized.pseudonymized", - "aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified", - ]) - .range([ - "#2a3045", - "#0861ce", - "#8459cc", - "#c14cbb", - "#ed43a0", - "#ff4a7f", - ]), }; const accessor = { @@ -859,13 +840,11 @@ Promise.all([ const categoriesRoot = stratify(categoriesCSV); const usesRoot = stratify(usesCSV); const subjectsRoot = stratify(subjectsCSV); - const qualifiersRoot = stratify(qualifiersCSV); const chartData = { categories: categoriesRoot, uses: usesRoot, subjects: subjectsRoot, - qualifiers: qualifiersRoot, }; const chartDataButtons = d3 .select("#data-control") diff --git a/mkdocs/docs/js/vis2-absolute.js b/mkdocs/docs/js/vis2-absolute.js index 9a001726..71eef370 100644 --- a/mkdocs/docs/js/vis2-absolute.js +++ b/mkdocs/docs/js/vis2-absolute.js @@ -733,8 +733,7 @@ Promise.all([ d3.csv("https://ethyca.github.io/fideslang/csv/data_categories.csv"), d3.csv("https://ethyca.github.io/fideslang/csv/data_uses.csv"), d3.csv("https://ethyca.github.io/fideslang/csv/data_subjects.csv"), - d3.csv("https://ethyca.github.io/fideslang/csv/data_qualifiers.csv"), -]).then(([categoriesCSV, usesCSV, subjectsCSV, qualifiersCSV]) => { +]).then(([categoriesCSV, usesCSV, subjectsCSV]) => { const tooltip = new VisTooltip(); const colors = { @@ -820,24 +819,6 @@ Promise.all([ "#f73ffc", "#fb409e", ]), - qualifiers: d3 - .scaleOrdinal() - .domain([ - "data_qualifier", - "aggregated", - "aggregated.anonymized", - "aggregated.anonymized.unlinked_pseudonymized", - "aggregated.anonymized.unlinked_pseudonymized.pseudonymized", - "aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified", - ]) - .range([ - "#2a3045", - "#0861ce", - "#8459cc", - "#c14cbb", - "#ed43a0", - "#ff4a7f", - ]), }; const accessor = { @@ -859,13 +840,11 @@ Promise.all([ const categoriesRoot = stratify(categoriesCSV); const usesRoot = stratify(usesCSV); const subjectsRoot = stratify(subjectsCSV); - const qualifiersRoot = stratify(qualifiersCSV); const chartData = { categories: categoriesRoot, uses: usesRoot, subjects: subjectsRoot, - qualifiers: qualifiersRoot, }; const chartDataButtons = d3 .select("#data-control") diff --git a/mkdocs/docs/js/vis2.js b/mkdocs/docs/js/vis2.js index c06794c8..93051cea 100644 --- a/mkdocs/docs/js/vis2.js +++ b/mkdocs/docs/js/vis2.js @@ -733,8 +733,7 @@ Promise.all([ d3.csv("../csv/data_categories.csv"), d3.csv("../csv/data_uses.csv"), d3.csv("../csv/data_subjects.csv"), - d3.csv("../csv/data_qualifiers.csv"), -]).then(([categoriesCSV, usesCSV, subjectsCSV, qualifiersCSV]) => { +]).then(([categoriesCSV, usesCSV, subjectsCSV]) => { const tooltip = new VisTooltip(); const colors = { @@ -820,24 +819,6 @@ Promise.all([ "#f73ffc", "#fb409e", ]), - qualifiers: d3 - .scaleOrdinal() - .domain([ - "data_qualifier", - "aggregated", - "aggregated.anonymized", - "aggregated.anonymized.unlinked_pseudonymized", - "aggregated.anonymized.unlinked_pseudonymized.pseudonymized", - "aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified", - ]) - .range([ - "#2a3045", - "#0861ce", - "#8459cc", - "#c14cbb", - "#ed43a0", - "#ff4a7f", - ]), }; const accessor = { @@ -859,13 +840,11 @@ Promise.all([ const categoriesRoot = stratify(categoriesCSV); const usesRoot = stratify(usesCSV); const subjectsRoot = stratify(subjectsCSV); - const qualifiersRoot = stratify(qualifiersCSV); const chartData = { categories: categoriesRoot, uses: usesRoot, subjects: subjectsRoot, - qualifiers: qualifiersRoot, }; const chartDataButtons = d3 .select("#data-control") diff --git a/mkdocs/docs/resources/dataset.md b/mkdocs/docs/resources/dataset.md index 3c70c521..1a2a20cd 100644 --- a/mkdocs/docs/resources/dataset.md +++ b/mkdocs/docs/resources/dataset.md @@ -56,7 +56,6 @@ An optional array of contact information if a Joint Controller exists. This info An optional string to describe the retention policy for a dataset. This field can also be applied more granularly at either the Collection or field level of a Dataset **data_categories**     [_string_]
-**data_qualifiers**     [_string_]
Arrays of Data Category and Data Qualifier resources, identified by `fides_key`, that apply to all collections in the Dataset. @@ -73,7 +72,6 @@ A UI-friendly label for the collection. A human-readable description of the collection. **collections.data_categories**     [_string_]
-**collections.data_qualifiers**     [_string_]
Arrays of Data Category and Data Qualifier resources, identified by `fides_key`, that apply to all fields in the collection. @@ -97,10 +95,6 @@ A human-readable description of the field. Arrays of Data Categories, identified by `fides_key`, that applies to this field. -**collections.fields.data_qualifier**     _string_
- -A Data Qualifier that applies to this field. Note that this field holds a single value, therefore, the property name is singular. - **collections.fields.retention**  _string_ An optional string to describe the retention policy for a field within a Dataset collection. diff --git a/mkdocs/docs/resources/policy.md b/mkdocs/docs/resources/policy.md index fa700054..ce7d1051 100644 --- a/mkdocs/docs/resources/policy.md +++ b/mkdocs/docs/resources/policy.md @@ -1,6 +1,6 @@ # Policy -A Policy is your privacy policy as code, it lists a set of acceptable and non-acceptable rules and uses all 4 privacy attributes (`data_category`, `data_use`, `data_subject`, and `data_qualifier`). The purpose of the policy is to state what types of data are allowed for certain usages. +A Policy is your privacy policy as code, it lists a set of acceptable and non-acceptable rules and uses all 3 privacy attributes (`data_category`, `data_use`, `data_subject`). The purpose of the policy is to state what types of data are allowed for certain usages. ``` organization @@ -34,10 +34,6 @@ The [Data Use](../../taxonomy/data_uses/) privacy attribute describes the variou The [Data Subjects](../../taxonomy/data_subjects/) privacy attribute describes the individual persons whose data your rule pertains to. -**data_qualifier**     _string_      - -The [Data Qualifier](../../taxonomy/data_qualifiers/) privacy attribute describes the acceptable or non-acceptable level of deidentification for this data. - **matches**     _enum_      * `ANY` @@ -76,7 +72,6 @@ policy: matches: ANY values: - customer - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified ``` **Demo manifest file:** `/fides/demo_resources/demo_policy.yml` @@ -113,7 +108,6 @@ policy: "customer" ] }, - "data_qualifier": "aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified" } ] } diff --git a/mkdocs/docs/resources/system.md b/mkdocs/docs/resources/system.md index 09d27508..f5e457ec 100644 --- a/mkdocs/docs/resources/system.md +++ b/mkdocs/docs/resources/system.md @@ -1,6 +1,6 @@ # System -A System is a model for describing anything that processes data for your organization (applications, services, 3rd party APIs, etc.) and describes how these datasets are used for business functions of instances of your data resources. It contains all 4 privacy attributes (`data_category`, `data_use`, `data_subject`, and `data_qualifier`). +A System is a model for describing anything that processes data for your organization (applications, services, 3rd party APIs, etc.) and describes how these datasets are used for business functions of instances of your data resources. It contains all 3 privacy attributes (`data_category`, `data_use`, and `data_subject`). ``` organization @@ -64,7 +64,7 @@ The resources from which the System receives data. **privacy_declarations**     [array]      -The array of declarations describing the types of data in your system. This is a list of the privcy attributes (`data_category`, `data_use`, `data_subject`, and `data_qualifier`) for each of your systems. +The array of declarations describing the types of data in your system. This is a list of the privcy attributes (`data_category`, `data_use`, and `data_subject`) for each of your systems. If a dataset is referenced as part of the system, all applicable data categories set on the dataset are treated as part of the system. @@ -114,7 +114,6 @@ system: data_use: improve.system data_subjects: - customer - data_qualifier: identified_data egress: - another_demo_system ingress: @@ -166,7 +165,6 @@ system: "data_subjects": [ "customer" ], - "data_qualifier": "identified_data", "egress": ["another_demo_system"], "ingress": ["yet_another_demo_system"] } diff --git a/mkdocs/mkdocs.yml b/mkdocs/mkdocs.yml index cae7cf78..a76f02b7 100644 --- a/mkdocs/mkdocs.yml +++ b/mkdocs/mkdocs.yml @@ -13,7 +13,6 @@ nav: - Data Categories: taxonomy/data_categories.md - Data Uses: taxonomy/data_uses.md - Data Subjects: taxonomy/data_subjects.md - - Data Qualifiers: taxonomy/data_qualifiers.md - Resources: - Organization: resources/organization.md - Policy: resources/policy.md @@ -63,4 +62,4 @@ extra_css: - https://cdnjs.cloudflare.com/ajax/libs/highlight.js/10.7.2/styles/default.min.css - css/fides.css - css/taxonomy.css - - css/logo.css \ No newline at end of file + - css/logo.css diff --git a/scripts/export_default_taxonomy.py b/scripts/export_default_taxonomy.py index fe1b3400..7d2d96cc 100644 --- a/scripts/export_default_taxonomy.py +++ b/scripts/export_default_taxonomy.py @@ -13,7 +13,6 @@ FILE_RESOURCE_PAIRS: Tuple[Tuple[str, str], ...] = ( ("data_categories", "data_category"), ("data_subjects", "data_subject"), - ("data_qualifiers", "data_qualifier"), ("data_uses", "data_use"), ) DATA_DIR = "data_files" diff --git a/src/fideslang/__init__.py b/src/fideslang/__init__.py index 4295434d..9d18a523 100644 --- a/src/fideslang/__init__.py +++ b/src/fideslang/__init__.py @@ -27,7 +27,6 @@ from .models import ( DataCategory, DataFlow, - DataQualifier, Dataset, DatasetField, DatasetFieldBase, @@ -51,7 +50,6 @@ FidesModelType = Union[Type[FidesModel], Type[Evaluation]] model_map: Dict[str, FidesModelType] = { "data_category": DataCategory, - "data_qualifier": DataQualifier, "data_subject": DataSubject, "data_use": DataUse, "dataset": Dataset, diff --git a/src/fideslang/default_taxonomy/__init__.py b/src/fideslang/default_taxonomy/__init__.py index 47937331..b032904b 100644 --- a/src/fideslang/default_taxonomy/__init__.py +++ b/src/fideslang/default_taxonomy/__init__.py @@ -3,7 +3,6 @@ from fideslang.models import Taxonomy from .data_categories import DEFAULT_DATA_CATEGORIES -from .data_qualifiers import DEFAULT_DATA_QUALIFIERS from .data_subjects import DEFAULT_DATA_SUBJECTS from .data_uses import DEFAULT_DATA_USES from .organizations import DEFAULT_ORGANIZATIONS @@ -14,7 +13,6 @@ DEFAULT_TAXONOMY = Taxonomy( data_category=sorted(DEFAULT_DATA_CATEGORIES, key=sort_data_types), - data_qualifier=sorted(DEFAULT_DATA_QUALIFIERS, key=sort_data_types), data_subject=sorted(DEFAULT_DATA_SUBJECTS, key=sort_data_types), data_use=sorted(DEFAULT_DATA_USES, key=sort_data_types), organization=DEFAULT_ORGANIZATIONS, diff --git a/src/fideslang/default_taxonomy/data_qualifiers.py b/src/fideslang/default_taxonomy/data_qualifiers.py deleted file mode 100644 index eb46e2cc..00000000 --- a/src/fideslang/default_taxonomy/data_qualifiers.py +++ /dev/null @@ -1,45 +0,0 @@ -from functools import partial - -from fideslang.models import DataQualifier - -from .utils import default_factory - -default_qualifier_factory = partial(default_factory, taxonomy_class=DataQualifier) - - -DEFAULT_DATA_QUALIFIERS = [ - default_qualifier_factory( - fides_key="aggregated", - organization_fides_key="default_organization", - name="Aggregated Data", - description="Statistical data that does not contain individually identifying information but includes information about groups of individuals that renders individual identification impossible.", - ), - default_qualifier_factory( - fides_key="aggregated.anonymized", - organization_fides_key="default_organization", - name="Anonymized Data", - description="Data where all attributes have been sufficiently altered that the individaul cannot be reidentified by this data or in combination with other datasets.", - parent_key="aggregated", - ), - default_qualifier_factory( - fides_key="aggregated.anonymized.unlinked_pseudonymized", - organization_fides_key="default_organization", - name="Unlinked Pseudonymized Data", - description="Data for which all identifiers have been substituted with unrelated values and linkages broken such that it may not be reversed, even by the party that performed the pseudonymization.", - parent_key="aggregated.anonymized", - ), - default_qualifier_factory( - fides_key="aggregated.anonymized.unlinked_pseudonymized.pseudonymized", - organization_fides_key="default_organization", - name="Pseudonymized Data", - description="Data for which all identifiers have been substituted with unrelated values, rendering the individual unidentifiable and cannot be reasonably reversed other than by the party that performed the pseudonymization.", - parent_key="aggregated.anonymized.unlinked_pseudonymized", - ), - default_qualifier_factory( - fides_key="aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified", - organization_fides_key="default_organization", - name="Identified Data", - description="Data that directly identifies an individual.", - parent_key="aggregated.anonymized.unlinked_pseudonymized.pseudonymized", - ), -] diff --git a/src/fideslang/default_taxonomy/utils.py b/src/fideslang/default_taxonomy/utils.py index dc85cabf..f97cc94f 100644 --- a/src/fideslang/default_taxonomy/utils.py +++ b/src/fideslang/default_taxonomy/utils.py @@ -1,8 +1,8 @@ from typing import Dict, Union -from fideslang.models import DataCategory, DataQualifier, DataSubject, DataUse +from fideslang.models import DataCategory, DataSubject, DataUse -CustomType = Union[DataCategory, DataSubject, DataQualifier, DataUse] +CustomType = Union[DataCategory, DataSubject, DataUse] def default_factory(taxonomy_class: CustomType, **kwargs: Dict) -> CustomType: diff --git a/src/fideslang/models.py b/src/fideslang/models.py index 599b8fd9..350caf17 100644 --- a/src/fideslang/models.py +++ b/src/fideslang/models.py @@ -286,15 +286,6 @@ class DataCategory(FidesModel, DefaultModel): _no_self_reference: classmethod = no_self_reference_validator -class DataQualifier(FidesModel, DefaultModel): - """The DataQualifier resource model.""" - - parent_key: Optional[FidesKey] - - _matching_parent_key: classmethod = matching_parent_key_validator - _no_self_reference: classmethod = no_self_reference_validator - - class Cookies(BaseModel): """The Cookies resource model""" @@ -442,10 +433,6 @@ class MyDatasetField(DatasetFieldBase): data_categories: Optional[List[FidesKey]] = Field( description="Arrays of Data Categories, identified by `fides_key`, that applies to this field.", ) - data_qualifier: FidesKey = Field( - default="aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified", - description="A Data Qualifier that applies to this field. Note that this field holds a single value, therefore, the property name is singular.", - ) retention: Optional[str] = Field( description="An optional string to describe the retention policy for a dataset. This field can also be applied more granularly at either the Collection or field level of a Dataset.", ) @@ -619,10 +606,6 @@ class DatasetCollection(FidesopsMetaBackwardsCompat): data_categories: Optional[List[FidesKey]] = Field( description="Array of Data Category resources identified by `fides_key`, that apply to all fields in the collection.", ) - data_qualifier: FidesKey = Field( - default="aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified", - description="Array of Data Qualifier resources identified by `fides_key`, that apply to all fields in the collection.", - ) retention: Optional[str] = Field( description="An optional string to describe the retention policy for a Dataset collection. This field can also be applied more granularly at the field level of a Dataset.", ) @@ -687,9 +670,6 @@ class Dataset(FidesModel, FidesopsMetaBackwardsCompat): data_categories: Optional[List[FidesKey]] = Field( description="Array of Data Category resources identified by `fides_key`, that apply to all collections in the Dataset.", ) - data_qualifier: Optional[FidesKey] = Field( - description="Deprecated. Array of Data Qualifier resources identified by `fides_key`, that apply to all collections in the Dataset.", - ) fides_meta: Optional[DatasetMetadata] = Field( description=DatasetMetadata.__doc__, default=None ) @@ -720,9 +700,9 @@ def deprecate_fields(cls, values: Dict) -> Dict: """ Warn of Dataset fields pending deprecation. """ + # TODO: Do we want to remove these for Fideslang 3? deprecated_fields = [ "joint_controller", - "data_qualifier", "retention", "third_country_transfers", ] @@ -748,9 +728,6 @@ class ViolationAttributes(BaseModel): data_uses: List[str] = Field( description="A list of data uses which led to an evaluation violation.", ) - data_qualifier: str = Field( - description="The data qualifier which led to an evaluation violation.", - ) class Violation(BaseModel): @@ -898,10 +875,6 @@ class PolicyRule(BaseModel): data_subjects: PrivacyRule = Field( description=PrivacyRule.__doc__, ) - data_qualifier: FidesKey = Field( - default="aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified", - description="The fides key of the data qualifier to be used in a privacy rule.", - ) class Policy(FidesModel): @@ -972,9 +945,6 @@ class PrivacyDeclaration(BaseModel): data_use: FidesKey = Field( description="The Data Use describing a system in a privacy declaration.", ) - data_qualifier: Optional[FidesKey] = Field( - description="Deprecated. The fides key of the data qualifier describing a system in a privacy declaration.", - ) data_subjects: List[FidesKey] = Field( default_factory=list, description="An array of data subjects describing a system in a privacy declaration.", @@ -1026,20 +996,6 @@ class PrivacyDeclaration(BaseModel): description="Cookies associated with this data use to deliver services and functionality", ) - @validator("data_qualifier") - @classmethod - def deprecate_data_qualifier(cls, value: FidesKey) -> FidesKey: - """ - Warn that the `data_qualifier` field is deprecated, if set. - """ - if value is not None: - warn( - "The data_qualifier field is deprecated, and will be removed in a future version of fideslang.", - DeprecationWarning, - ) - - return value - class Config: """Config for the Privacy Declaration""" @@ -1326,7 +1282,6 @@ class Taxonomy(BaseModel): data_category: List[DataCategory] = Field(default_factory=list) data_subject: Optional[List[DataSubject]] = Field(default_factory=list) data_use: Optional[List[DataUse]] = Field(default_factory=list) - data_qualifier: Optional[List[DataQualifier]] = Field(default_factory=list) dataset: Optional[List[Dataset]] = Field(default_factory=list) system: Optional[List[System]] = Field(default_factory=list) diff --git a/tests/conftest.py b/tests/conftest.py index e41f992f..89a099f5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -22,12 +22,6 @@ def resources_dict(): name="Custom Data Category", description="Custom Data Category", ), - "data_qualifier": models.DataQualifier( - organization_fides_key=1, - fides_key="custom_data_qualifier", - name="Custom Data Qualifier", - description="Custom Data Qualifier", - ), "dataset": models.Dataset( organization_fides_key=1, fides_key="test_sample_db_dataset", @@ -47,14 +41,12 @@ def resources_dict(): description="A First Name Field", path="another.path", data_categories=["user.name"], - data_qualifier="aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified", ), models.DatasetField( name="Email", description="User's Email", path="another.another.path", data_categories=["user.contact.email"], - data_qualifier="aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified", ), ], ) @@ -93,7 +85,6 @@ def resources_dict(): data_categories=models.PrivacyRule(matches="NONE", values=[]), data_uses=models.PrivacyRule(matches="NONE", values=["provide.system"]), data_subjects=models.PrivacyRule(matches="ANY", values=[]), - data_qualifier="aggregated.anonymized.unlinked_pseudonymized.pseudonymized", ), "registry": models.Registry( organization_fides_key=1, @@ -115,7 +106,6 @@ def resources_dict(): data_categories=[], data_use="provide", data_subjects=[], - data_qualifier="aggregated_data", ) ], ), diff --git a/tests/data/failing_dataset_collection_taxonomy.yml b/tests/data/failing_dataset_collection_taxonomy.yml index 6b0ae18e..dd29d344 100644 --- a/tests/data/failing_dataset_collection_taxonomy.yml +++ b/tests/data/failing_dataset_collection_taxonomy.yml @@ -7,13 +7,11 @@ dataset: description: User's information data_categories: - user.political_opinion - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized fields: - name: First_Name description: A First Name Field data_categories: - user.name - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified system: - fides_key: customer_data_sharing_system @@ -28,7 +26,6 @@ system: data_categories: - user data_use: advertising - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified data_subjects: - customer ingress: @@ -53,4 +50,3 @@ policy: matches: ANY values: - customer - data_qualifier: aggregated diff --git a/tests/data/failing_dataset_field_taxonomy.yml b/tests/data/failing_dataset_field_taxonomy.yml index 9891efcc..502b6eec 100644 --- a/tests/data/failing_dataset_field_taxonomy.yml +++ b/tests/data/failing_dataset_field_taxonomy.yml @@ -10,12 +10,10 @@ dataset: description: A First Name Field data_categories: - user.name - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified - name: political_opinion description: User's political opinion data_categories: - user.political_opinion - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized system: - fides_key: customer_data_sharing_system name: Customer Data Sharing System @@ -29,7 +27,6 @@ system: data_categories: - user data_use: advertising - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified data_subjects: - customer ingress: @@ -54,4 +51,3 @@ policy: matches: ANY values: - customer - data_qualifier: aggregated diff --git a/tests/data/failing_dataset_taxonomy.yml b/tests/data/failing_dataset_taxonomy.yml index 61e2fb92..01cff66b 100644 --- a/tests/data/failing_dataset_taxonomy.yml +++ b/tests/data/failing_dataset_taxonomy.yml @@ -4,7 +4,6 @@ dataset: description: This is a Sample Database Dataset data_categories: - user.political_opinion - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized collections: - name: users description: User's information @@ -13,7 +12,6 @@ dataset: description: A First Name Field data_categories: - user.name - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified system: - fides_key: customer_data_sharing_system @@ -28,7 +26,6 @@ system: data_categories: - user data_use: advertising - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified data_subjects: - customer ingress: @@ -53,4 +50,3 @@ policy: matches: ANY values: - customer - data_qualifier: aggregated diff --git a/tests/data/failing_declaration_taxonomy.yml b/tests/data/failing_declaration_taxonomy.yml index 53b1a42c..ebca27b6 100644 --- a/tests/data/failing_declaration_taxonomy.yml +++ b/tests/data/failing_declaration_taxonomy.yml @@ -8,7 +8,6 @@ system: data_categories: - user.political_opinion data_use: third_party_sharing.payment_processing - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified data_subjects: - customer @@ -31,4 +30,3 @@ policy: matches: ANY values: - customer - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified diff --git a/tests/data/failing_nested_dataset.yml b/tests/data/failing_nested_dataset.yml index a0e005c2..2a1ac33d 100644 --- a/tests/data/failing_nested_dataset.yml +++ b/tests/data/failing_nested_dataset.yml @@ -34,7 +34,6 @@ system: data_use: improve.system data_subjects: - customer - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified ingress: - test_failing_nested_dataset_field @@ -57,4 +56,3 @@ policy: matches: OTHER values: - anonymous_user - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified diff --git a/tests/data/passing_declaration_taxonomy.yml b/tests/data/passing_declaration_taxonomy.yml index d2ede112..aac30812 100644 --- a/tests/data/passing_declaration_taxonomy.yml +++ b/tests/data/passing_declaration_taxonomy.yml @@ -8,7 +8,6 @@ system: data_categories: - user.political_opinion data_use: third_party_sharing.payment_processing - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified data_subjects: - customer @@ -31,4 +30,3 @@ policy: matches: ANY values: - customer - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified diff --git a/tests/fideslang/test_default_taxonomy.py b/tests/fideslang/test_default_taxonomy.py index c47a749b..d04f8e9b 100644 --- a/tests/fideslang/test_default_taxonomy.py +++ b/tests/fideslang/test_default_taxonomy.py @@ -9,7 +9,6 @@ "data_category": 85, "data_use": 55, "data_subject": 15, - "data_qualifier": 5, } @@ -46,7 +45,11 @@ def test_name_uniqueness(self, data_type: str) -> None: @pytest.mark.parametrize("data_type", taxonomy_counts.keys()) def test_description_uniqueness(self, data_type: str) -> None: - keys = [x.description for x in getattr(DEFAULT_TAXONOMY, data_type) if not x.version_deprecated] + keys = [ + x.description + for x in getattr(DEFAULT_TAXONOMY, data_type) + if not x.version_deprecated + ] duplicate_keys = { key: value for key, value in Counter(keys).items() if value > 1 } diff --git a/tests/fideslang/test_models.py b/tests/fideslang/test_models.py index a9221282..45f3218d 100644 --- a/tests/fideslang/test_models.py +++ b/tests/fideslang/test_models.py @@ -69,7 +69,6 @@ class TestPrivacyDeclaration: def test_privacydeclaration_valid(self) -> None: assert PrivacyDeclaration( data_categories=[], - data_qualifier="aggregated_data", data_subjects=[], data_use="provide", egress=[], @@ -77,19 +76,6 @@ def test_privacydeclaration_valid(self) -> None: name="declaration-name", ) - def test_privacy_declaration_data_qualifier_deprecation(self) -> None: - with deprecated_call(match="data_qualifier"): - assert PrivacyDeclaration( - data_categories=[], - data_qualifier="aggregated_data", - data_subjects=[], - data_use="provide", - dataset_references=[], - egress=["test_system_2"], - ingress=["test_system_3"], - name="declaration-name", - ) - class TestSystem: # TODO: these tests are not effectively evaluating whether the provided constructor args @@ -121,7 +107,6 @@ def test_system_valid(self) -> None: privacy_declarations=[ PrivacyDeclaration( data_categories=[], - data_qualifier="aggregated_data", data_subjects=[], data_use="provide", egress=["test_system_2"], @@ -172,7 +157,6 @@ def test_system_valid_nested_meta(self) -> None: privacy_declarations=[ PrivacyDeclaration( data_categories=[], - data_qualifier="aggregated_data", data_subjects=[], data_use="provide", egress=["test_system_2"], @@ -209,7 +193,6 @@ def test_system_valid_no_meta(self) -> None: privacy_declarations=[ PrivacyDeclaration( data_categories=[], - data_qualifier="aggregated_data", data_subjects=[], data_use="provide", egress=["test_system_2"], @@ -233,7 +216,6 @@ def test_system_valid_no_egress_or_ingress(self) -> None: privacy_declarations=[ PrivacyDeclaration( data_categories=[], - data_qualifier="aggregated_data", data_subjects=[], data_use="provide", name="declaration-name", @@ -262,7 +244,6 @@ def test_system_no_egress(self) -> None: privacy_declarations=[ PrivacyDeclaration( data_categories=[], - data_qualifier="aggregated_data", data_subjects=[], data_use="provide", egress=["test_system_2"], @@ -293,7 +274,6 @@ def test_system_no_ingress(self) -> None: privacy_declarations=[ PrivacyDeclaration( data_categories=[], - data_qualifier="aggregated_data", data_subjects=[], data_use="provide", egress=["test_system_2"], @@ -323,7 +303,6 @@ def test_system_user_ingress_valid(self) -> None: privacy_declarations=[ PrivacyDeclaration( data_categories=[], - data_qualifier="aggregated_data", data_subjects=[], data_use="provide", ingress=["user"], @@ -376,7 +355,6 @@ def test_expanded_system(self): "user.demographic", "user.privacy_preferences", ], - data_qualifier="aggregated_data", data_use="functional.storage", data_subjects=[], egress=["test_system_2"], @@ -500,20 +478,17 @@ def test_valid_dataset(self): } }, }, - data_qualifier="dataset_qualifier_1", data_categories=["dataset_data_category_1"], fides_meta={"after": ["other_dataset"]}, collections=[ DatasetCollection( name="dataset_collection_1", - data_qualifier="data_collection_data_qualifier_1", data_categories=["dataset_collection_data_category_1"], fides_meta={"after": ["third_dataset.blue_collection"]}, fields=[ DatasetField( name="dataset_field_1", data_categories=["dataset_field_data_category_1"], - data_qualifier="dataset_field_data_qualifier_1", fides_meta={ "references": [ { @@ -530,14 +505,12 @@ def test_valid_dataset(self): ), DatasetCollection( name="dataset_collection_2", - data_qualifier="data_collection_data_qualifier_2", data_categories=["dataset_collection_data_category_2"], fides_meta={"after": ["orange_dataset.dataset_collection_1"]}, fields=[ DatasetField( name="dataset_field_2", data_categories=["dataset_field_data_category_2"], - data_qualifier="dataset_field_data_qualifier_2", fides_meta={ "identity": "email", "primary_key": False, @@ -552,7 +525,6 @@ def test_valid_dataset(self): @mark.parametrize( "deprecated_field,value", [ - ("data_qualifier", "dataset_qualifier_1"), ("joint_controller", {"name": "Controller_name"}), ("retention", "90 days"), ("third_country_transfers", ["IRL"]), @@ -571,7 +543,6 @@ def test_dataset_deprecated_fields(self, deprecated_field, value) -> None: def test_dataset_collection_skip_processing(self): collection = DatasetCollection( name="dataset_collection_1", - data_qualifier="data_collection_data_qualifier_1", data_categories=["dataset_collection_data_category_1"], fields=[], ) @@ -579,7 +550,6 @@ def test_dataset_collection_skip_processing(self): collection = DatasetCollection( name="dataset_collection_1", - data_qualifier="data_collection_data_qualifier_1", data_categories=["dataset_collection_data_category_1"], fides_meta={"after": ["third_dataset.blue_collection"]}, fields=[], @@ -589,7 +559,6 @@ def test_dataset_collection_skip_processing(self): collection = DatasetCollection( name="dataset_collection_1", - data_qualifier="data_collection_data_qualifier_1", data_categories=["dataset_collection_data_category_1"], fides_meta={"skip_processing": True}, fields=[], diff --git a/tests/fideslang/test_relationships.py b/tests/fideslang/test_relationships.py index d9628442..54ca20ac 100644 --- a/tests/fideslang/test_relationships.py +++ b/tests/fideslang/test_relationships.py @@ -74,7 +74,6 @@ def test_dont_find_other_objects(self) -> None: name="privacy_declaration_1", data_categories=["privacy_declaration_data_category_1"], data_use="privacy_declaration_data_use_1", - data_qualifier="privacy_declaration_data_qualifier_1", data_subjects=[ "privacy_declaration_data_subject_1", "privacy_declaration_data_subject_2", @@ -91,7 +90,6 @@ def test_dont_find_other_objects(self) -> None: "default_organization", "privacy_declaration_data_category_1", "privacy_declaration_data_use_1", - "privacy_declaration_data_qualifier_1", "privacy_declaration_data_subject_1", "privacy_declaration_data_subject_2", "privacy_declaration_data_set_1", @@ -192,7 +190,6 @@ def test_get_referenced_missing_privacy_declaration_keys(self): name="privacy_declaration_1", data_categories=["privacy_declaration_data_category_1"], data_use="privacy_declaration_data_use_1", - data_qualifier="privacy_declaration_data_qualifier_1", data_subjects=["privacy_declaration_data_subject_1"], dataset_references=["privacy_declaration_data_set_1"], ) @@ -204,7 +201,6 @@ def test_get_referenced_missing_privacy_declaration_keys(self): "default_organization", "privacy_declaration_data_category_1", "privacy_declaration_data_use_1", - "privacy_declaration_data_qualifier_1", "privacy_declaration_data_subject_1", "privacy_declaration_data_set_1", } @@ -231,7 +227,6 @@ def test_get_referenced_missing_policy_keys(self): "values": ["policy_rule_data_subject_1"], "matches": MatchesEnum.ANY, }, - data_qualifier="policy_rule_data_qualifier_1", ) ], ) @@ -242,7 +237,6 @@ def test_get_referenced_missing_policy_keys(self): "policy_rule_data_category_1", "policy_rule_data_use_1", "policy_rule_data_subject_1", - "policy_rule_data_qualifier_1", } referenced_keys = relationships.get_referenced_missing_keys(taxonomy) assert not referenced_keys.difference(expected_referenced_key) @@ -252,18 +246,15 @@ def test_get_referenced_missing_dataset_keys(self): dataset=[ Dataset( fides_key="dataset_1", - data_qualifier="dataset_qualifier_1", data_categories=["dataset_data_category_1"], collections=[ DatasetCollection( name="dataset_collection_1", - data_qualifier="data_collection_data_qualifier_1", data_categories=["dataset_collection_data_category_1"], fields=[ DatasetField( name="dataset_field_1", data_categories=["dataset_field_data_category_1"], - data_qualifier="dataset_field_data_qualifier_1", ) ], ) @@ -273,12 +264,9 @@ def test_get_referenced_missing_dataset_keys(self): ) expected_referenced_key = { "default_organization", - "dataset_qualifier_1", "dataset_data_category_1", - "data_collection_data_qualifier_1", "dataset_collection_data_category_1", "dataset_field_data_category_1", - "dataset_field_data_qualifier_1", } referenced_keys = relationships.get_referenced_missing_keys(taxonomy) assert not referenced_keys.difference(expected_referenced_key) diff --git a/tests/fideslang/test_validation.py b/tests/fideslang/test_validation.py index a932f26b..7f87d141 100644 --- a/tests/fideslang/test_validation.py +++ b/tests/fideslang/test_validation.py @@ -8,7 +8,6 @@ Dataset, DataUse, DataSubject, - DataQualifier, DatasetCollection, DatasetField, DatasetMetadata, @@ -25,7 +24,7 @@ ) from fideslang.validation import FidesKey, FidesValidationError, valid_data_type -DEFAULT_TAXONOMY_CLASSES = [DataCategory, DataUse, DataQualifier, DataSubject] +DEFAULT_TAXONOMY_CLASSES = [DataCategory, DataUse, DataSubject] @pytest.mark.unit @@ -412,7 +411,6 @@ def test_valid_policy_rule(): data_categories=PrivacyRule(matches="NONE", values=[]), data_uses=PrivacyRule(matches="NONE", values=["provide.service"]), data_subjects=PrivacyRule(matches="ANY", values=[]), - data_qualifier="aggregated.anonymized.unlinked_pseudonymized.pseudonymized", ) @@ -444,7 +442,6 @@ def test_create_valid_system(): data_categories=[], data_use="provide.service", data_subjects=[], - data_qualifier="aggregated_data", dataset_references=[], ) ], @@ -477,7 +474,6 @@ def test_invalid_country_identifier(country_code: str): data_categories=[], data_use="provide.service", data_subjects=[], - data_qualifier="aggregated_data", dataset_references=["test_system"], ) ], @@ -503,7 +499,6 @@ def test_valid_country_identifier(country_code: str): data_categories=[], data_use="provide.service", data_subjects=[], - data_qualifier="aggregated_data", dataset_references=["test_system"], ) ],