dsa_tdb.types module

dsa_tdb.types.ALL_PLATFORMS_ENTRY_VALUE = 'All Platforms'

The name of the global option in the dropdown menu

dsa_tdb.types.ALL_PLATFORMS_PLATFORM_NAME = 'global'

The name of the fictictious global platform to account for all the platforms

class dsa_tdb.types.AccountType(value, names=<not given>, *values, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: StrEnum

Enum of the account type.

ACCOUNT_TYPE_BUSINESS = 'ACCOUNT_TYPE_BUSINESS'
ACCOUNT_TYPE_PRIVATE = 'ACCOUNT_TYPE_PRIVATE'
class dsa_tdb.types.AggregateFileFormat(value, names=<not given>, *values, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: StrEnum

Enum of the aggregate file formats.

csv = 'csv'
parquet = 'parquet'
pickle = 'pickle'
class dsa_tdb.types.AggregateWriteMode(value, names=<not given>, *values, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: StrEnum

Enum of the aggregate modes for the output file.

append = 'append'
error = 'error'
overwrite = 'overwrite'
class dsa_tdb.types.AggregationConfig(*, input_format: InputFileFormat = 'parquet', delete_original_columns: bool = False, horizontally_explode_columns: bool = False, normalize_platform_name: bool = False, normalize_content_type_other: bool = False, output_format: AggregateFileFormat = 'parquet', fillna_str_value: str | None = None, fillna_bool_value: bool | None = False, content_date_range: List[datetime] | None = None, decision_date_range: List[datetime] | None = None, created_at_date_range: List[datetime] | None = None, platforms_to_exclude: List[str] | None = None, columns_to_import: List[TDB_columnsFull] = ['uuid', 'decision_visibility', 'decision_visibility_other', 'decision_monetary', 'decision_monetary_other', 'decision_provision', 'decision_account', 'account_type', 'decision_ground', 'decision_ground_reference_url', 'illegal_content_legal_ground', 'illegal_content_explanation', 'incompatible_content_ground', 'incompatible_content_explanation', 'incompatible_content_illegal', 'category', 'category_addition', 'category_specification', 'category_specification_other', 'content_type', 'content_type_other', 'content_language', 'territorial_scope', 'decision_facts', 'source_type', 'source_identity', 'automated_detection', 'automated_decision', 'platform_name', 'platform_uid'], columns_datetime: List[TDB_datetimeColumns] = ['content_date', 'application_date', 'created_at', 'end_date_account_restriction', 'end_date_monetary_restriction', 'end_date_service_restriction', 'end_date_visibility_restriction'], columns_to_group: List[RawAndExplodedColumn] | None = None, columns_to_fill_str: List[RawAndExplodedColumn] = [], columns_to_fill_bool: List[RawAndExplodedColumn] = ['CONTENT_TYPE_APP', 'CONTENT_TYPE_AUDIO', 'CONTENT_TYPE_IMAGE', 'CONTENT_TYPE_PRODUCT', 'CONTENT_TYPE_SYNTHETIC_MEDIA', 'CONTENT_TYPE_TEXT', 'CONTENT_TYPE_VIDEO', 'CONTENT_TYPE_OTHER', 'CONTENT_TYPE_LINK', 'CONTENT_TYPE_ACCOUNT', 'CONTENT_TYPE_AD', 'CONTENT_TYPE_STICKER', 'CONTENT_TYPE_HASHTAG', 'DECISION_VISIBILITY_CONTENT_REMOVED', 'DECISION_VISIBILITY_CONTENT_DISABLED', 'DECISION_VISIBILITY_CONTENT_DEMOTED', 'DECISION_VISIBILITY_CONTENT_AGE_RESTRICTED', 'DECISION_VISIBILITY_CONTENT_INTERACTION_RESTRICTED', 'DECISION_VISIBILITY_CONTENT_LABELLED', 'DECISION_VISIBILITY_OTHER', 'STATEMENT_CATEGORY_ANIMAL_WELFARE', 'STATEMENT_CATEGORY_DATA_PROTECTION_AND_PRIVACY_VIOLATIONS', 'STATEMENT_CATEGORY_ILLEGAL_OR_HARMFUL_SPEECH', 'STATEMENT_CATEGORY_INTELLECTUAL_PROPERTY_INFRINGEMENTS', 'STATEMENT_CATEGORY_NEGATIVE_EFFECTS_ON_CIVIC_DISCOURSE_OR_ELECTIONS', 'STATEMENT_CATEGORY_NON_CONSENSUAL_BEHAVIOUR', 'STATEMENT_CATEGORY_PORNOGRAPHY_OR_SEXUALIZED_CONTENT', 'STATEMENT_CATEGORY_PROTECTION_OF_MINORS', 'STATEMENT_CATEGORY_RISK_FOR_PUBLIC_SECURITY', 'STATEMENT_CATEGORY_SCAMS_AND_FRAUD', 'STATEMENT_CATEGORY_SELF_HARM', 'STATEMENT_CATEGORY_SCOPE_OF_PLATFORM_SERVICE', 'STATEMENT_CATEGORY_UNSAFE_AND_ILLEGAL_PRODUCTS', 'STATEMENT_CATEGORY_VIOLENCE', 'KEYWORD_ANIMAL_HARM', 'KEYWORD_ADULT_SEXUAL_MATERIAL', 'KEYWORD_AGE_SPECIFIC_RESTRICTIONS_MINORS', 'KEYWORD_AGE_SPECIFIC_RESTRICTIONS', 'KEYWORD_BIOMETRIC_DATA_BREACH', 'KEYWORD_CHILD_SEXUAL_ABUSE_MATERIAL', 'KEYWORD_CONTENT_PROMOTING_EATING_DISORDERS', 'KEYWORD_COORDINATED_HARM', 'KEYWORD_COPYRIGHT_INFRINGEMENT', 'KEYWORD_DANGEROUS_TOYS', 'KEYWORD_DATA_FALSIFICATION', 'KEYWORD_DEFAMATION', 'KEYWORD_DESIGN_INFRINGEMENT', 'KEYWORD_DISCRIMINATION', 'KEYWORD_DISINFORMATION', 'KEYWORD_FOREIGN_INFORMATION_MANIPULATION', 'KEYWORD_GENDER_BASED_VIOLENCE', 'KEYWORD_GEOGRAPHIC_INDICATIONS_INFRINGEMENT', 'KEYWORD_GEOGRAPHICAL_REQUIREMENTS', 'KEYWORD_GOODS_SERVICES_NOT_PERMITTED', 'KEYWORD_GROOMING_SEXUAL_ENTICEMENT_MINORS', 'KEYWORD_HATE_SPEECH', 'KEYWORD_HUMAN_EXPLOITATION', 'KEYWORD_HUMAN_TRAFFICKING', 'KEYWORD_ILLEGAL_ORGANIZATIONS', 'KEYWORD_IMAGE_BASED_SEXUAL_ABUSE', 'KEYWORD_IMPERSONATION_ACCOUNT_HIJACKING', 'KEYWORD_INAUTHENTIC_ACCOUNTS', 'KEYWORD_INAUTHENTIC_LISTINGS', 'KEYWORD_INAUTHENTIC_USER_REVIEWS', 'KEYWORD_INCITEMENT_VIOLENCE_HATRED', 'KEYWORD_INSUFFICIENT_INFORMATION_TRADERS', 'KEYWORD_LANGUAGE_REQUIREMENTS', 'KEYWORD_MISINFORMATION', 'KEYWORD_MISSING_PROCESSING_GROUND', 'KEYWORD_NON_CONSENSUAL_IMAGE_SHARING', 'KEYWORD_NON_CONSENSUAL_ITEMS_DEEPFAKE', 'KEYWORD_NUDITY', 'KEYWORD_ONLINE_BULLYING_INTIMIDATION', 'KEYWORD_PATENT_INFRINGEMENT', 'KEYWORD_PHISHING', 'KEYWORD_PYRAMID_SCHEMES', 'KEYWORD_REGULATED_GOODS_SERVICES', 'KEYWORD_RIGHT_TO_BE_FORGOTTEN', 'KEYWORD_RISK_ENVIRONMENTAL_DAMAGE', 'KEYWORD_RISK_PUBLIC_HEALTH', 'KEYWORD_SELF_MUTILATION', 'KEYWORD_STALKING', 'KEYWORD_SUICIDE', 'KEYWORD_TERRORIST_CONTENT', 'KEYWORD_TRADE_SECRET_INFRINGEMENT', 'KEYWORD_TRADEMARK_INFRINGEMENT', 'KEYWORD_UNLAWFUL_SALE_ANIMALS', 'KEYWORD_UNSAFE_CHALLENGES', 'KEYWORD_OTHER'], compute_time_to_action: bool = False, compute_restriction_duration: bool = False, write_mode: AggregateWriteMode = 'overwrite', created_at_dt_floor: str | None = 'day')

Bases: BaseModel

Configuration for the aggregation of data.

class Config

Bases: object

use_enum_values = True
columns_datetime: List[TDB_datetimeColumns]
columns_to_fill_bool: List[RawAndExplodedColumn]
columns_to_fill_str: List[RawAndExplodedColumn]
columns_to_group: List[RawAndExplodedColumn] | None
columns_to_import: List[TDB_columnsFull]
compute_restriction_duration: bool
compute_time_to_action: bool
content_date_range: List[datetime] | None
created_at_date_range: List[datetime] | None
created_at_dt_floor: str | None
decision_date_range: List[datetime] | None
delete_original_columns: bool
fillna_bool_value: bool | None
fillna_str_value: str | None
horizontally_explode_columns: bool
input_format: InputFileFormat
model_config: ClassVar[ConfigDict] = {'use_enum_values': True}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

normalize_content_type_other: bool
normalize_platform_name: bool
output_format: AggregateFileFormat
platforms_to_exclude: List[str] | None
write_mode: AggregateWriteMode
class dsa_tdb.types.AutomatedDecision(value, names=<not given>, *values, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: StrEnum

Enum of the automated decision.

AUTOMATED_DECISION_FULLY = 'AUTOMATED_DECISION_FULLY'
AUTOMATED_DECISION_NOT_AUTOMATED = 'AUTOMATED_DECISION_NOT_AUTOMATED'
AUTOMATED_DECISION_PARTIALLY = 'AUTOMATED_DECISION_PARTIALLY'
class dsa_tdb.types.AutomatedDetection(value, names=<not given>, *values, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: StrEnum

Enum of the automated detection.

No = 'No'
Yes = 'Yes'
class dsa_tdb.types.BooleanOperator(value, names=<not given>, *values, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: StrEnum

Enum of the boolean operators.

AND = 'AND'
OR = 'OR'
dsa_tdb.types.CELERY_TASK_QUEUE = 'dsa_tdb_queue'

The name of the queue to use for the celery tasks.

dsa_tdb.types.CHUNKED_FILES_SUBFOLDER_NAME = 'daily_dumps_chunked'

The subfolder where to save the chunked files.

dsa_tdb.types.CHUNKED_FILE_SUCCESS_NAME = 'COMPLETE'

The name of the file to save in the chunked folder to signal the completion of the chunking.

class dsa_tdb.types.Category(value, names=<not given>, *values, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: StrEnum

STATEMENT_CATEGORY_ANIMAL_WELFARE = 'STATEMENT_CATEGORY_ANIMAL_WELFARE'
STATEMENT_CATEGORY_DATA_PROTECTION_AND_PRIVACY_VIOLATIONS = 'STATEMENT_CATEGORY_DATA_PROTECTION_AND_PRIVACY_VIOLATIONS'
STATEMENT_CATEGORY_ILLEGAL_OR_HARMFUL_SPEECH = 'STATEMENT_CATEGORY_ILLEGAL_OR_HARMFUL_SPEECH'
STATEMENT_CATEGORY_INTELLECTUAL_PROPERTY_INFRINGEMENTS = 'STATEMENT_CATEGORY_INTELLECTUAL_PROPERTY_INFRINGEMENTS'
STATEMENT_CATEGORY_NEGATIVE_EFFECTS_ON_CIVIC_DISCOURSE_OR_ELECTIONS = 'STATEMENT_CATEGORY_NEGATIVE_EFFECTS_ON_CIVIC_DISCOURSE_OR_ELECTIONS'
STATEMENT_CATEGORY_NON_CONSENSUAL_BEHAVIOUR = 'STATEMENT_CATEGORY_NON_CONSENSUAL_BEHAVIOUR'
STATEMENT_CATEGORY_PORNOGRAPHY_OR_SEXUALIZED_CONTENT = 'STATEMENT_CATEGORY_PORTNOGRAPHY_OR_SEXUALIZED_CONTENT'
STATEMENT_CATEGORY_PROTECTION_OF_MINORS = 'STATEMENT_CATEGORY_PROTECTION_OF_MINORS'
STATEMENT_CATEGORY_RISK_FOR_PUBLIC_SECURITY = 'STATEMENT_CATEGORY_RISK_FOR_PUBLIC_SECURITY'
STATEMENT_CATEGORY_SCAMS_AND_FRAUD = 'STATEMENT_CATEGORY_SCAMS_AND_FRAUD'
STATEMENT_CATEGORY_SCOPE_OF_PLATFORM_SERVICE = 'STATEMENT_CATEGORY_SCOPE_OF_PLATFORM_SERVICE'
STATEMENT_CATEGORY_SELF_HARM = 'STATEMENT_CATEGORY_SELF_HARM'
STATEMENT_CATEGORY_UNSAFE_AND_ILLEGAL_PRODUCTS = 'STATEMENT_CATEGORY_UNSAFE_AND_ILLEGAL_PRODUCTS'
STATEMENT_CATEGORY_VIOLENCE = 'STATEMENT_CATEGORY_VIOLENCE'
class dsa_tdb.types.CategoryAddition(*, category_addition: List[Category])

Bases: BaseModel

The base models to validate the category addition.

class Config

Bases: object

use_enum_values = True
category_addition: List[Category]
model_config: ClassVar[ConfigDict] = {'use_enum_values': True}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

class dsa_tdb.types.CategorySpecification(*, category_specification: List[Keyword])

Bases: BaseModel

The base models to validate the category specification.

class Config

Bases: object

use_enum_values = True
category_specification: List[Keyword]
model_config: ClassVar[ConfigDict] = {'use_enum_values': True}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

class dsa_tdb.types.ContentLanguage(value, names=<not given>, *values, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: StrEnum

Enum of the content language.

BG = 'BG'
CS = 'CS'
DA = 'DA'
DE = 'DE'
EL = 'EL'
EN = 'EN'
ES = 'ES'
ET = 'ET'
FI = 'FI'
FR = 'FR'
GA = 'GA'
HR = 'HR'
HU = 'HU'
IT = 'IT'
LT = 'LT'
LV = 'LV'
MT = 'MT'
NL = 'NL'
PL = 'PL'
PT = 'PT'
RO = 'RO'
SK = 'SK'
SL = 'SL'
SV = 'SV'
class dsa_tdb.types.ContentType(value, names=<not given>, *values, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: StrEnum

Enum of the content types.

CONTENT_TYPE_ACCOUNT = 'CONTENT_TYPE_ACCOUNT'
CONTENT_TYPE_AD = 'CONTENT_TYPE_AD'
CONTENT_TYPE_APP = 'CONTENT_TYPE_APP'
CONTENT_TYPE_AUDIO = 'CONTENT_TYPE_AUDIO'
CONTENT_TYPE_HASHTAG = 'CONTENT_TYPE_HASHTAG'
CONTENT_TYPE_IMAGE = 'CONTENT_TYPE_IMAGE'
CONTENT_TYPE_OTHER = 'CONTENT_TYPE_OTHER'
CONTENT_TYPE_PRODUCT = 'CONTENT_TYPE_PRODUCT'
CONTENT_TYPE_STICKER = 'CONTENT_TYPE_STICKER'
CONTENT_TYPE_SYNTHETIC_MEDIA = 'CONTENT_TYPE_SYNTHETIC_MEDIA'
CONTENT_TYPE_TEXT = 'CONTENT_TYPE_TEXT'
CONTENT_TYPE_VIDEO = 'CONTENT_TYPE_VIDEO'
dsa_tdb.types.DAILY_FILES_SUBFOLDER_REGEX = '(?P<platform>[\\w-]+)___(?P<version>\\w+)'

The subfolder regex of the daily files/sha1 once downloaded.

dsa_tdb.types.DAILY_FILES_SUBFOLDER_TEMPLATE = '{platform}___{version}'

The subfolder pattern of the daily files/sha1 once downloaded.

dsa_tdb.types.DAILY_FILES_TABLE_URL = 'https://transparency.dsa.ec.europa.eu/explore-data/download'

The url of the daily files table.

dsa_tdb.types.DAILY_FILE_CHECKSUM_EXTENSION = '.zip.sha1'

The extension of the daily files’sha1 checksums.

dsa_tdb.types.DAILY_FILE_DATE_FORMAT = '%Y-%m-%d'

The format of the date in the daily files/sha1.

dsa_tdb.types.DAILY_FILE_EXTENSION = '.zip'

The extension of the daily files checksums.

dsa_tdb.types.DAILY_FILE_NAME_REGEX = 'sor-(?P<platform>[\\w-]+)-(?P<date>\\d{4}-\\d{2}-\\d{2})-(?P<version>\\w+)(?P<extension>\\.\\w+)'

The name regex of the daily files/sha1 once downloaded.

dsa_tdb.types.DAILY_FILE_NAME_TEMPLATE = 'sor-{platform}-{date}-{version}{extension}'

The name pattern of the daily files/sha1 once downloaded.

dsa_tdb.types.DAILY_FILE_URL_TEMPLATE = 'https://dsa-sor-data-dumps.s3.eu-central-1.amazonaws.com/sor-{platform}-{date}-{version}{extension}'

The url pattern of the daily files/sha1.

class dsa_tdb.types.DatetimeColumns(*, columns: List[TDB_datetimeColumns])

Bases: BaseModel

The base models to validate the datetime columns to use.

class Config

Bases: object

use_enum_values = True
columns: List[TDB_datetimeColumns]
model_config: ClassVar[ConfigDict] = {'use_enum_values': True}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

class dsa_tdb.types.DecisionAccount(value, names=<not given>, *values, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: StrEnum

Enum of the decision account.

DECISION_ACCOUNT_SUSPENDED = 'DECISION_ACCOUNT_SUSPENDED'
DECISION_ACCOUNT_TERMINATED = 'DECISION_ACCOUNT_TERMINATED'
class dsa_tdb.types.DecisionGround(value, names=<not given>, *values, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: StrEnum

Enum of the decision ground.

DECISION_GROUND_ILLEGAL_CONTENT = 'DECISION_GROUND_ILLEGAL_CONTENT'
DECISION_GROUND_INCOMPATIBLE_CONTENT = 'DECISION_GROUND_INCOMPATIBLE_CONTENT'
class dsa_tdb.types.DecisionMonetary(value, names=<not given>, *values, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: StrEnum

Enum of the decision monetary.

DECISION_MONETARY_OTHER = 'DECISION_MONETARY_OTHER'
DECISION_MONETARY_SUSPENSION = 'DECISION_MONETARY_SUSPENSION'
DECISION_MONETARY_TERMINATION = 'DECISION_MONETARY_TERMINATION'
class dsa_tdb.types.DecisionProvision(value, names=<not given>, *values, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: StrEnum

Enum of the decision provision.

DECISION_PROVISION_PARTIAL_SUSPENSION = 'DECISION_PROVISION_PARTIAL_SUSPENSION'
DECISION_PROVISION_PARTIAL_TERMINATION = 'DECISION_PROVISION_PARTIAL_TERMINATION'
DECISION_PROVISION_TOTAL_SUSPENSION = 'DECISION_PROVISION_TOTAL_SUSPENSION'
DECISION_PROVISION_TOTAL_TERMINATION = 'DECISION_PROVISION_TOTAL_TERMINATION'
class dsa_tdb.types.DecisionVisibility(value, names=<not given>, *values, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: StrEnum

Enum of the decision visibility.

DECISION_VISIBILITY_CONTENT_AGE_RESTRICTED = 'DECISION_VISIBILITY_CONTENT_AGE_RESTRICTED'
DECISION_VISIBILITY_CONTENT_DEMOTED = 'DECISION_VISIBILITY_CONTENT_DEMOTED'
DECISION_VISIBILITY_CONTENT_DISABLED = 'DECISION_VISIBILITY_CONTENT_DISABLED'
DECISION_VISIBILITY_CONTENT_INTERACTION_RESTRICTED = 'DECISION_VISIBILITY_CONTENT_INTERACTION_RESTRICTED'
DECISION_VISIBILITY_CONTENT_LABELLED = 'DECISION_VISIBILITY_CONTENT_LABELLED'
DECISION_VISIBILITY_CONTENT_REMOVED = 'DECISION_VISIBILITY_CONTENT_REMOVED'
DECISION_VISIBILITY_OTHER = 'DECISION_VISIBILITY_OTHER'
class dsa_tdb.types.EXPLODED_COLUMNS(value, names=<not given>, *values, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: Enum

The set of columns that we can obtain when exploding the columns.

CONTENT_TYPE_ACCOUNT = 'CONTENT_TYPE_ACCOUNT'
CONTENT_TYPE_AD = 'CONTENT_TYPE_AD'
CONTENT_TYPE_APP = 'CONTENT_TYPE_APP'
CONTENT_TYPE_AUDIO = 'CONTENT_TYPE_AUDIO'
CONTENT_TYPE_HASHTAG = 'CONTENT_TYPE_HASHTAG'
CONTENT_TYPE_IMAGE = 'CONTENT_TYPE_IMAGE'
CONTENT_TYPE_OTHER = 'CONTENT_TYPE_OTHER'
CONTENT_TYPE_PRODUCT = 'CONTENT_TYPE_PRODUCT'
CONTENT_TYPE_STICKER = 'CONTENT_TYPE_STICKER'
CONTENT_TYPE_SYNTHETIC_MEDIA = 'CONTENT_TYPE_SYNTHETIC_MEDIA'
CONTENT_TYPE_TEXT = 'CONTENT_TYPE_TEXT'
CONTENT_TYPE_VIDEO = 'CONTENT_TYPE_VIDEO'
DECISION_VISIBILITY_CONTENT_AGE_RESTRICTED = 'DECISION_VISIBILITY_CONTENT_AGE_RESTRICTED'
DECISION_VISIBILITY_CONTENT_DEMOTED = 'DECISION_VISIBILITY_CONTENT_DEMOTED'
DECISION_VISIBILITY_CONTENT_DISABLED = 'DECISION_VISIBILITY_CONTENT_DISABLED'
DECISION_VISIBILITY_CONTENT_INTERACTION_RESTRICTED = 'DECISION_VISIBILITY_CONTENT_INTERACTION_RESTRICTED'
DECISION_VISIBILITY_CONTENT_LABELLED = 'DECISION_VISIBILITY_CONTENT_LABELLED'
DECISION_VISIBILITY_CONTENT_REMOVED = 'DECISION_VISIBILITY_CONTENT_REMOVED'
DECISION_VISIBILITY_OTHER = 'DECISION_VISIBILITY_OTHER'
KEYWORD_ADULT_SEXUAL_MATERIAL = 'KEYWORD_ADULT_SEXUAL_MATERIAL'
KEYWORD_AGE_SPECIFIC_RESTRICTIONS = 'KEYWORD_AGE_SPECIFIC_RESTRICTIONS'
KEYWORD_AGE_SPECIFIC_RESTRICTIONS_MINORS = 'KEYWORD_AGE_SPECIFIC_RESTRICTIONS_MINORS'
KEYWORD_ANIMAL_HARM = 'KEYWORD_ANIMAL_HARM'
KEYWORD_BIOMETRIC_DATA_BREACH = 'KEYWORD_BIOMETRIC_DATA_BREACH'
KEYWORD_CHILD_SEXUAL_ABUSE_MATERIAL = 'KEYWORD_CHILD_SEXUAL_ABUSE_MATERIAL'
KEYWORD_CONTENT_PROMOTING_EATING_DISORDERS = 'KEYWORD_CONTENT_PROMOTING_EATING_DISORDERS'
KEYWORD_COORDINATED_HARM = 'KEYWORD_COORDINATED_HARM'
KEYWORD_DANGEROUS_TOYS = 'KEYWORD_DANGEROUS_TOYS'
KEYWORD_DATA_FALSIFICATION = 'KEYWORD_DATA_FALSIFICATION'
KEYWORD_DEFAMATION = 'KEYWORD_DEFAMATION'
KEYWORD_DESIGN_INFRINGEMENT = 'KEYWORD_DESIGN_INFRINGEMENT'
KEYWORD_DISCRIMINATION = 'KEYWORD_DISCRIMINATION'
KEYWORD_DISINFORMATION = 'KEYWORD_DISINFORMATION'
KEYWORD_FOREIGN_INFORMATION_MANIPULATION = 'KEYWORD_FOREIGN_INFORMATION_MANIPULATION'
KEYWORD_GENDER_BASED_VIOLENCE = 'KEYWORD_GENDER_BASED_VIOLENCE'
KEYWORD_GEOGRAPHICAL_REQUIREMENTS = 'KEYWORD_GEOGRAPHICAL_REQUIREMENTS'
KEYWORD_GEOGRAPHIC_INDICATIONS_INFRINGEMENT = 'KEYWORD_GEOGRAPHIC_INDICATIONS_INFRINGEMENT'
KEYWORD_GOODS_SERVICES_NOT_PERMITTED = 'KEYWORD_GOODS_SERVICES_NOT_PERMITTED'
KEYWORD_GROOMING_SEXUAL_ENTICEMENT_MINORS = 'KEYWORD_GROOMING_SEXUAL_ENTICEMENT_MINORS'
KEYWORD_HATE_SPEECH = 'KEYWORD_HATE_SPEECH'
KEYWORD_HUMAN_EXPLOITATION = 'KEYWORD_HUMAN_EXPLOITATION'
KEYWORD_HUMAN_TRAFFICKING = 'KEYWORD_HUMAN_TRAFFICKING'
KEYWORD_ILLEGAL_ORGANIZATIONS = 'KEYWORD_ILLEGAL_ORGANIZATIONS'
KEYWORD_IMAGE_BASED_SEXUAL_ABUSE = 'KEYWORD_IMAGE_BASED_SEXUAL_ABUSE'
KEYWORD_IMPERSONATION_ACCOUNT_HIJACKING = 'KEYWORD_IMPERSONATION_ACCOUNT_HIJACKING'
KEYWORD_INAUTHENTIC_ACCOUNTS = 'KEYWORD_INAUTHENTIC_ACCOUNTS'
KEYWORD_INAUTHENTIC_LISTINGS = 'KEYWORD_INAUTHENTIC_LISTINGS'
KEYWORD_INAUTHENTIC_USER_REVIEWS = 'KEYWORD_INAUTHENTIC_USER_REVIEWS'
KEYWORD_INCITEMENT_VIOLENCE_HATRED = 'KEYWORD_INCITEMENT_VIOLENCE_HATRED'
KEYWORD_INSUFFICIENT_INFORMATION_TRADERS = 'KEYWORD_INSUFFICIENT_INFORMATION_TRADERS'
KEYWORD_LANGUAGE_REQUIREMENTS = 'KEYWORD_LANGUAGE_REQUIREMENTS'
KEYWORD_MISINFORMATION = 'KEYWORD_MISINFORMATION'
KEYWORD_MISSING_PROCESSING_GROUND = 'KEYWORD_MISSING_PROCESSING_GROUND'
KEYWORD_NON_CONSENSUAL_IMAGE_SHARING = 'KEYWORD_NON_CONSENSUAL_IMAGE_SHARING'
KEYWORD_NON_CONSENSUAL_ITEMS_DEEPFAKE = 'KEYWORD_NON_CONSENSUAL_ITEMS_DEEPFAKE'
KEYWORD_NUDITY = 'KEYWORD_NUDITY'
KEYWORD_ONLINE_BULLYING_INTIMIDATION = 'KEYWORD_ONLINE_BULLYING_INTIMIDATION'
KEYWORD_OTHER = 'KEYWORD_OTHER'
KEYWORD_PATENT_INFRINGEMENT = 'KEYWORD_PATENT_INFRINGEMENT'
KEYWORD_PHISHING = 'KEYWORD_PHISHING'
KEYWORD_PYRAMID_SCHEMES = 'KEYWORD_PYRAMID_SCHEMES'
KEYWORD_REGULATED_GOODS_SERVICES = 'KEYWORD_REGULATED_GOODS_SERVICES'
KEYWORD_RIGHT_TO_BE_FORGOTTEN = 'KEYWORD_RIGHT_TO_BE_FORGOTTEN'
KEYWORD_RISK_ENVIRONMENTAL_DAMAGE = 'KEYWORD_RISK_ENVIRONMENTAL_DAMAGE'
KEYWORD_RISK_PUBLIC_HEALTH = 'KEYWORD_RISK_PUBLIC_HEALTH'
KEYWORD_SELF_MUTILATION = 'KEYWORD_SELF_MUTILATION'
KEYWORD_STALKING = 'KEYWORD_STALKING'
KEYWORD_SUICIDE = 'KEYWORD_SUICIDE'
KEYWORD_TERRORIST_CONTENT = 'KEYWORD_TERRORIST_CONTENT'
KEYWORD_TRADEMARK_INFRINGEMENT = 'KEYWORD_TRADEMARK_INFRINGEMENT'
KEYWORD_TRADE_SECRET_INFRINGEMENT = 'KEYWORD_TRADE_SECRET_INFRINGEMENT'
KEYWORD_UNLAWFUL_SALE_ANIMALS = 'KEYWORD_UNLAWFUL_SALE_ANIMALS'
KEYWORD_UNSAFE_CHALLENGES = 'KEYWORD_UNSAFE_CHALLENGES'
STATEMENT_CATEGORY_ANIMAL_WELFARE = 'STATEMENT_CATEGORY_ANIMAL_WELFARE'
STATEMENT_CATEGORY_DATA_PROTECTION_AND_PRIVACY_VIOLATIONS = 'STATEMENT_CATEGORY_DATA_PROTECTION_AND_PRIVACY_VIOLATIONS'
STATEMENT_CATEGORY_ILLEGAL_OR_HARMFUL_SPEECH = 'STATEMENT_CATEGORY_ILLEGAL_OR_HARMFUL_SPEECH'
STATEMENT_CATEGORY_INTELLECTUAL_PROPERTY_INFRINGEMENTS = 'STATEMENT_CATEGORY_INTELLECTUAL_PROPERTY_INFRINGEMENTS'
STATEMENT_CATEGORY_NEGATIVE_EFFECTS_ON_CIVIC_DISCOURSE_OR_ELECTIONS = 'STATEMENT_CATEGORY_NEGATIVE_EFFECTS_ON_CIVIC_DISCOURSE_OR_ELECTIONS'
STATEMENT_CATEGORY_NON_CONSENSUAL_BEHAVIOUR = 'STATEMENT_CATEGORY_NON_CONSENSUAL_BEHAVIOUR'
STATEMENT_CATEGORY_PORNOGRAPHY_OR_SEXUALIZED_CONTENT = 'STATEMENT_CATEGORY_PORNOGRAPHY_OR_SEXUALIZED_CONTENT'
STATEMENT_CATEGORY_PROTECTION_OF_MINORS = 'STATEMENT_CATEGORY_PROTECTION_OF_MINORS'
STATEMENT_CATEGORY_RISK_FOR_PUBLIC_SECURITY = 'STATEMENT_CATEGORY_RISK_FOR_PUBLIC_SECURITY'
STATEMENT_CATEGORY_SCAMS_AND_FRAUD = 'STATEMENT_CATEGORY_SCAMS_AND_FRAUD'
STATEMENT_CATEGORY_SCOPE_OF_PLATFORM_SERVICE = 'STATEMENT_CATEGORY_SCOPE_OF_PLATFORM_SERVICE'
STATEMENT_CATEGORY_SELF_HARM = 'STATEMENT_CATEGORY_SELF_HARM'
STATEMENT_CATEGORY_UNSAFE_AND_ILLEGAL_PRODUCTS = 'STATEMENT_CATEGORY_UNSAFE_AND_ILLEGAL_PRODUCTS'
STATEMENT_CATEGORY_VIOLENCE = 'STATEMENT_CATEGORY_VIOLENCE'
class dsa_tdb.types.FilteringConfig(*, input_format: InputFileFormat = 'parquet', output_format: AggregateFileFormat = 'parquet', write_mode: AggregateWriteMode = 'overwrite', delete_original_columns: bool = False, horizontally_explode_columns: bool = False, normalize_platform_name: bool = False, normalize_content_type_other: bool = False, fillna_str_value: str | None = None, fillna_bool_value: bool | None = False, content_date_range: List[datetime] | None = None, decision_date_range: List[datetime] | None = None, created_at_date_range: List[datetime] | None = None, platforms_to_exclude: List[str] | None = None, platforms_to_include: List[str] | None = None, created_at_dt_floor: str | None = 'day', columns_to_import: List[TDB_columnsFull] = ['uuid', 'decision_visibility', 'decision_visibility_other', 'decision_monetary', 'decision_monetary_other', 'decision_provision', 'decision_account', 'account_type', 'decision_ground', 'decision_ground_reference_url', 'illegal_content_legal_ground', 'illegal_content_explanation', 'incompatible_content_ground', 'incompatible_content_explanation', 'incompatible_content_illegal', 'category', 'category_addition', 'category_specification', 'category_specification_other', 'content_type', 'content_type_other', 'content_language', 'territorial_scope', 'decision_facts', 'source_type', 'source_identity', 'automated_detection', 'automated_decision', 'platform_name', 'platform_uid'], columns_datetime: List[TDB_datetimeColumns] = ['content_date', 'application_date', 'created_at', 'end_date_account_restriction', 'end_date_monetary_restriction', 'end_date_service_restriction', 'end_date_visibility_restriction'], upstream_sampling: float | None = None, downstream_sampling: float | None = None, bool_columns_to_check: List[RawAndExplodedColumn] | None = None, bool_columns_to_check_operator: BooleanOperator = 'OR', decision_monetary: List[DecisionMonetary] | None = None, decision_provision: List[DecisionProvision] | None = None, decision_account: List[DecisionAccount] | None = None, decision_visibility: List[DecisionVisibility] | None = None, category: List[Category] | None = None, decision_ground: List[DecisionGround] | None = None, automated_detection: List[AutomatedDetection] | None = None, automated_decision: List[AutomatedDecision] | None = None, source_type: List[SourceType] | None = None, account_type: List[AccountType] | None = None, incompatible_content_illegal: List[IncompatibleContentIllegal] | None = None, content_language: List[ContentLanguage] | None = None, content_type: List[ContentType] | None = None, category_addition: List[Category] | None = None, category_specification: List[Keyword] | None = None, territorial_scope: List[TerritorialScope] | None = None, decision_visibility_other: str | None = None, decision_visibility_other_to_lower: bool = False, decision_monetary_other: str | None = None, decision_monetary_other_to_lower: bool = False, decision_facts: str | None = None, decision_facts_to_lower: bool = False, decision_ground_reference_url: str | None = None, decision_ground_reference_url_to_lower: bool = False, illegal_content_legal_ground: str | None = None, illegal_content_legal_ground_to_lower: bool = False, illegal_content_explanation: str | None = None, illegal_content_explanation_to_lower: bool = False, incompatible_content_ground: str | None = None, incompatible_content_ground_to_lower: bool = False, incompatible_content_explanation: str | None = None, incompatible_content_explanation_to_lower: bool = False, content_type_other: str | None = None, content_type_other_to_lower: bool = False, category_specification_other: str | None = None, category_specification_other_to_lower: bool = False, source_identity: str | None = None, source_identity_to_lower: bool = False, end_date_account_restriction: datetime | None = None, end_date_monetary_restriction: datetime | None = None, end_date_service_restriction: datetime | None = None, end_date_visibility_restriction: datetime | None = None, columns_to_fill_str: List[RawAndExplodedColumn] = [], columns_to_fill_bool: List[RawAndExplodedColumn] = ['CONTENT_TYPE_APP', 'CONTENT_TYPE_AUDIO', 'CONTENT_TYPE_IMAGE', 'CONTENT_TYPE_PRODUCT', 'CONTENT_TYPE_SYNTHETIC_MEDIA', 'CONTENT_TYPE_TEXT', 'CONTENT_TYPE_VIDEO', 'CONTENT_TYPE_OTHER', 'CONTENT_TYPE_LINK', 'CONTENT_TYPE_ACCOUNT', 'CONTENT_TYPE_AD', 'CONTENT_TYPE_STICKER', 'CONTENT_TYPE_HASHTAG', 'DECISION_VISIBILITY_CONTENT_REMOVED', 'DECISION_VISIBILITY_CONTENT_DISABLED', 'DECISION_VISIBILITY_CONTENT_DEMOTED', 'DECISION_VISIBILITY_CONTENT_AGE_RESTRICTED', 'DECISION_VISIBILITY_CONTENT_INTERACTION_RESTRICTED', 'DECISION_VISIBILITY_CONTENT_LABELLED', 'DECISION_VISIBILITY_OTHER', 'STATEMENT_CATEGORY_ANIMAL_WELFARE', 'STATEMENT_CATEGORY_DATA_PROTECTION_AND_PRIVACY_VIOLATIONS', 'STATEMENT_CATEGORY_ILLEGAL_OR_HARMFUL_SPEECH', 'STATEMENT_CATEGORY_INTELLECTUAL_PROPERTY_INFRINGEMENTS', 'STATEMENT_CATEGORY_NEGATIVE_EFFECTS_ON_CIVIC_DISCOURSE_OR_ELECTIONS', 'STATEMENT_CATEGORY_NON_CONSENSUAL_BEHAVIOUR', 'STATEMENT_CATEGORY_PORNOGRAPHY_OR_SEXUALIZED_CONTENT', 'STATEMENT_CATEGORY_PROTECTION_OF_MINORS', 'STATEMENT_CATEGORY_RISK_FOR_PUBLIC_SECURITY', 'STATEMENT_CATEGORY_SCAMS_AND_FRAUD', 'STATEMENT_CATEGORY_SELF_HARM', 'STATEMENT_CATEGORY_SCOPE_OF_PLATFORM_SERVICE', 'STATEMENT_CATEGORY_UNSAFE_AND_ILLEGAL_PRODUCTS', 'STATEMENT_CATEGORY_VIOLENCE', 'KEYWORD_ANIMAL_HARM', 'KEYWORD_ADULT_SEXUAL_MATERIAL', 'KEYWORD_AGE_SPECIFIC_RESTRICTIONS_MINORS', 'KEYWORD_AGE_SPECIFIC_RESTRICTIONS', 'KEYWORD_BIOMETRIC_DATA_BREACH', 'KEYWORD_CHILD_SEXUAL_ABUSE_MATERIAL', 'KEYWORD_CONTENT_PROMOTING_EATING_DISORDERS', 'KEYWORD_COORDINATED_HARM', 'KEYWORD_COPYRIGHT_INFRINGEMENT', 'KEYWORD_DANGEROUS_TOYS', 'KEYWORD_DATA_FALSIFICATION', 'KEYWORD_DEFAMATION', 'KEYWORD_DESIGN_INFRINGEMENT', 'KEYWORD_DISCRIMINATION', 'KEYWORD_DISINFORMATION', 'KEYWORD_FOREIGN_INFORMATION_MANIPULATION', 'KEYWORD_GENDER_BASED_VIOLENCE', 'KEYWORD_GEOGRAPHIC_INDICATIONS_INFRINGEMENT', 'KEYWORD_GEOGRAPHICAL_REQUIREMENTS', 'KEYWORD_GOODS_SERVICES_NOT_PERMITTED', 'KEYWORD_GROOMING_SEXUAL_ENTICEMENT_MINORS', 'KEYWORD_HATE_SPEECH', 'KEYWORD_HUMAN_EXPLOITATION', 'KEYWORD_HUMAN_TRAFFICKING', 'KEYWORD_ILLEGAL_ORGANIZATIONS', 'KEYWORD_IMAGE_BASED_SEXUAL_ABUSE', 'KEYWORD_IMPERSONATION_ACCOUNT_HIJACKING', 'KEYWORD_INAUTHENTIC_ACCOUNTS', 'KEYWORD_INAUTHENTIC_LISTINGS', 'KEYWORD_INAUTHENTIC_USER_REVIEWS', 'KEYWORD_INCITEMENT_VIOLENCE_HATRED', 'KEYWORD_INSUFFICIENT_INFORMATION_TRADERS', 'KEYWORD_LANGUAGE_REQUIREMENTS', 'KEYWORD_MISINFORMATION', 'KEYWORD_MISSING_PROCESSING_GROUND', 'KEYWORD_NON_CONSENSUAL_IMAGE_SHARING', 'KEYWORD_NON_CONSENSUAL_ITEMS_DEEPFAKE', 'KEYWORD_NUDITY', 'KEYWORD_ONLINE_BULLYING_INTIMIDATION', 'KEYWORD_PATENT_INFRINGEMENT', 'KEYWORD_PHISHING', 'KEYWORD_PYRAMID_SCHEMES', 'KEYWORD_REGULATED_GOODS_SERVICES', 'KEYWORD_RIGHT_TO_BE_FORGOTTEN', 'KEYWORD_RISK_ENVIRONMENTAL_DAMAGE', 'KEYWORD_RISK_PUBLIC_HEALTH', 'KEYWORD_SELF_MUTILATION', 'KEYWORD_STALKING', 'KEYWORD_SUICIDE', 'KEYWORD_TERRORIST_CONTENT', 'KEYWORD_TRADE_SECRET_INFRINGEMENT', 'KEYWORD_TRADEMARK_INFRINGEMENT', 'KEYWORD_UNLAWFUL_SALE_ANIMALS', 'KEYWORD_UNSAFE_CHALLENGES', 'KEYWORD_OTHER'])

Bases: BaseModel

Configuration for the aggregation of data.

class Config

Bases: object

use_enum_values = True
account_type: List[AccountType] | None
automated_decision: List[AutomatedDecision] | None
automated_detection: List[AutomatedDetection] | None
bool_columns_to_check: List[RawAndExplodedColumn] | None
bool_columns_to_check_operator: BooleanOperator
category: List[Category] | None
category_addition: List[Category] | None
category_specification: List[Keyword] | None
category_specification_other: str | None
category_specification_other_to_lower: bool
columns_datetime: List[TDB_datetimeColumns]
columns_to_fill_bool: List[RawAndExplodedColumn]
columns_to_fill_str: List[RawAndExplodedColumn]
columns_to_import: List[TDB_columnsFull]
content_date_range: List[datetime] | None
content_language: List[ContentLanguage] | None
content_type: List[ContentType] | None
content_type_other: str | None
content_type_other_to_lower: bool
created_at_date_range: List[datetime] | None
created_at_dt_floor: str | None
decision_account: List[DecisionAccount] | None
decision_date_range: List[datetime] | None
decision_facts: str | None
decision_facts_to_lower: bool
decision_ground: List[DecisionGround] | None
decision_ground_reference_url: str | None
decision_ground_reference_url_to_lower: bool
decision_monetary: List[DecisionMonetary] | None
decision_monetary_other: str | None
decision_monetary_other_to_lower: bool
decision_provision: List[DecisionProvision] | None
decision_visibility: List[DecisionVisibility] | None
decision_visibility_other: str | None
decision_visibility_other_to_lower: bool
delete_original_columns: bool
downstream_sampling: float | None
end_date_account_restriction: datetime | None
end_date_monetary_restriction: datetime | None
end_date_service_restriction: datetime | None
end_date_visibility_restriction: datetime | None
fillna_bool_value: bool | None
fillna_str_value: str | None
horizontally_explode_columns: bool
illegal_content_explanation: str | None
illegal_content_explanation_to_lower: bool
incompatible_content_explanation: str | None
incompatible_content_explanation_to_lower: bool
incompatible_content_ground: str | None
incompatible_content_ground_to_lower: bool
incompatible_content_illegal: List[IncompatibleContentIllegal] | None
input_format: InputFileFormat
model_config: ClassVar[ConfigDict] = {'use_enum_values': True}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

normalize_content_type_other: bool
normalize_platform_name: bool
output_format: AggregateFileFormat
platforms_to_exclude: List[str] | None
platforms_to_include: List[str] | None
source_identity: str | None
source_identity_to_lower: bool
source_type: List[SourceType] | None
territorial_scope: List[TerritorialScope] | None
upstream_sampling: float | None
write_mode: AggregateWriteMode
class dsa_tdb.types.IncompatibleContentIllegal(value, names=<not given>, *values, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: StrEnum

Enum of the incompatible content illegal.

No = 'No'
Yes = 'Yes'
class dsa_tdb.types.InputFileFormat(value, names=<not given>, *values, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: StrEnum

Enum of the input file formats.

csv = 'csv'
parquet = 'parquet'
class dsa_tdb.types.Keyword(value, names=<not given>, *values, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: StrEnum

KEYWORD_ADULT_SEXUAL_MATERIAL = 'KEYWORD_ADULT_SEXUAL_MATERIAL'
KEYWORD_AGE_SPECIFIC_RESTRICTIONS = 'KEYWORD_AGE_SPECIFIC_RESTRICTIONS'
KEYWORD_AGE_SPECIFIC_RESTRICTIONS_MINORS = 'KEYWORD_AGE_SPECIFIC_RESTRICTIONS_MINORS'
KEYWORD_ANIMAL_HARM = 'KEYWORD_ANIMAL_HARM'
KEYWORD_BIOMETRIC_DATA_BREACH = 'KEYWORD_BIOMETRIC_DATA_BREACH'
KEYWORD_CHILD_SEXUAL_ABUSE_MATERIAL = 'KEYWORD_CHILD_SEXUAL_ABUSE_MATERIAL'
KEYWORD_CONTENT_PROMOTING_EATING_DISORDERS = 'KEYWORD_CONTENT_PROMOTING_EATING_DISORDERS'
KEYWORD_COORDINATED_HARM = 'KEYWORD_COORDINATED_HARM'
KEYWORD_DANGEROUS_TOYS = 'KEYWORD_DANGEROUS_TOYS'
KEYWORD_DATA_FALSIFICATION = 'KEYWORD_DATA_FALSIFICATION'
KEYWORD_DEFAMATION = 'KEYWORD_DEFAMATION'
KEYWORD_DESIGN_INFRINGEMENT = 'KEYWORD_DESIGN_INFRINGEMENT'
KEYWORD_DISCRIMINATION = 'KEYWORD_DISCRIMINATION'
KEYWORD_DISINFORMATION = 'KEYWORD_DISINFORMATION'
KEYWORD_FOREIGN_INFORMATION_MANIPULATION = 'KEYWORD_FOREIGN_INFORMATION_MANIPULATION'
KEYWORD_GENDER_BASED_VIOLENCE = 'KEYWORD_GENDER_BASED_VIOLENCE'
KEYWORD_GEOGRAPHICAL_REQUIREMENTS = 'KEYWORD_GEOGRAPHICAL_REQUIREMENTS'
KEYWORD_GEOGRAPHIC_INDICATIONS_INFRINGEMENT = 'KEYWORD_GEOGRAPHIC_INDICATIONS_INFRINGEMENT'
KEYWORD_GOODS_SERVICES_NOT_PERMITTED = 'KEYWORD_GOODS_SERVICES_NOT_PERMITTED'
KEYWORD_GROOMING_SEXUAL_ENTICEMENT_MINORS = 'KEYWORD_GROOMING_SEXUAL_ENTICEMENT_MINORS'
KEYWORD_HATE_SPEECH = 'KEYWORD_HATE_SPEECH'
KEYWORD_HUMAN_EXPLOITATION = 'KEYWORD_HUMAN_EXPLOITATION'
KEYWORD_HUMAN_TRAFFICKING = 'KEYWORD_HUMAN_TRAFFICKING'
KEYWORD_ILLEGAL_ORGANIZATIONS = 'KEYWORD_ILLEGAL_ORGANIZATIONS'
KEYWORD_IMAGE_BASED_SEXUAL_ABUSE = 'KEYWORD_IMAGE_BASED_SEXUAL_ABUSE'
KEYWORD_IMPERSONATION_ACCOUNT_HIJACKING = 'KEYWORD_IMPERSONATION_ACCOUNT_HIJACKING'
KEYWORD_INAUTHENTIC_ACCOUNTS = 'KEYWORD_INAUTHENTIC_ACCOUNTS'
KEYWORD_INAUTHENTIC_LISTINGS = 'KEYWORD_INAUTHENTIC_LISTINGS'
KEYWORD_INAUTHENTIC_USER_REVIEWS = 'KEYWORD_INAUTHENTIC_USER_REVIEWS'
KEYWORD_INCITEMENT_VIOLENCE_HATRED = 'KEYWORD_INCITEMENT_VIOLENCE_HATRED'
KEYWORD_INSUFFICIENT_INFORMATION_TRADERS = 'KEYWORD_INSUFFICIENT_INFORMATION_TRADERS'
KEYWORD_LANGUAGE_REQUIREMENTS = 'KEYWORD_LANGUAGE_REQUIREMENTS'
KEYWORD_MISINFORMATION = 'KEYWORD_MISINFORMATION'
KEYWORD_MISSING_PROCESSING_GROUND = 'KEYWORD_MISSING_PROCESSING_GROUND'
KEYWORD_NON_CONSENSUAL_IMAGE_SHARING = 'KEYWORD_NON_CONSENSUAL_IMAGE_SHARING'
KEYWORD_NON_CONSENSUAL_ITEMS_DEEPFAKE = 'KEYWORD_NON_CONSENSUAL_ITEMS_DEEPFAKE'
KEYWORD_NUDITY = 'KEYWORD_NUDITY'
KEYWORD_ONLINE_BULLYING_INTIMIDATION = 'KEYWORD_ONLINE_BULLYING_INTIMIDATION'
KEYWORD_OTHER = 'KEYWORD_OTHER'
KEYWORD_PATENT_INFRINGEMENT = 'KEYWORD_PATENT_INFRINGEMENT'
KEYWORD_PHISHING = 'KEYWORD_PHISHING'
KEYWORD_PYRAMID_SCHEMES = 'KEYWORD_PYRAMID_SCHEMES'
KEYWORD_REGULATED_GOODS_SERVICES = 'KEYWORD_REGULATED_GOODS_SERVICES'
KEYWORD_RIGHT_TO_BE_FORGOTTEN = 'KEYWORD_RIGHT_TO_BE_FORGOTTEN'
KEYWORD_RISK_ENVIRONMENTAL_DAMAGE = 'KEYWORD_RISK_ENVIRONMENTAL_DAMAGE'
KEYWORD_RISK_PUBLIC_HEALTH = 'KEYWORD_RISK_PUBLIC_HEALTH'
KEYWORD_SELF_MUTILATION = 'KEYWORD_SELF_MUTILATION'
KEYWORD_STALKING = 'KEYWORD_STALKING'
KEYWORD_SUICIDE = 'KEYWORD_SUICIDE'
KEYWORD_TERRORIST_CONTENT = 'KEYWORD_TERRORIST_CONTENT'
KEYWORD_TRADEMARK_INFRINGEMENT = 'KEYWORD_TRADEMARK_INFRINGEMENT'
KEYWORD_TRADE_SECRET_INFRINGEMENT = 'KEYWORD_TRADE_SECRET_INFRINGEMENT'
KEYWORD_UNLAWFUL_SALE_ANIMALS = 'KEYWORD_UNLAWFUL_SALE_ANIMALS'
KEYWORD_UNSAFE_CHALLENGES = 'KEYWORD_UNSAFE_CHALLENGES'
class dsa_tdb.types.LoadFileArguments(*, dump_files_pattern: str | List, columns_to_import: List[TDB_columnsFull] | None = ['uuid', 'decision_visibility', 'decision_visibility_other', 'decision_monetary', 'decision_monetary_other', 'decision_provision', 'decision_account', 'account_type', 'decision_ground', 'decision_ground_reference_url', 'illegal_content_legal_ground', 'illegal_content_explanation', 'incompatible_content_ground', 'incompatible_content_explanation', 'incompatible_content_illegal', 'category', 'category_addition', 'category_specification', 'category_specification_other', 'content_type', 'content_type_other', 'content_language', 'territorial_scope', 'decision_facts', 'source_type', 'source_identity', 'automated_detection', 'automated_decision', 'platform_name', 'platform_uid'], columns_datetime: List[TDB_datetimeColumns] | None = ['content_date', 'application_date', 'created_at', 'end_date_account_restriction', 'end_date_monetary_restriction', 'end_date_service_restriction', 'end_date_visibility_restriction'], content_date_range: List[datetime] | None = None, decision_date_range: List[datetime] | None = None, created_at_date_range: List[datetime] | None = None, input_format: TDB_chunkFormat = TDB_chunkFormat.csv, del_original: bool = True, explode_cols: bool = True, fillna_str: str | None = None, fillna_bool: bool | None = False, columns_to_fill_str: List[RawAndExplodedColumn] = [], columns_to_fill_bool: List[RawAndExplodedColumn] = ['CONTENT_TYPE_APP', 'CONTENT_TYPE_AUDIO', 'CONTENT_TYPE_IMAGE', 'CONTENT_TYPE_PRODUCT', 'CONTENT_TYPE_SYNTHETIC_MEDIA', 'CONTENT_TYPE_TEXT', 'CONTENT_TYPE_VIDEO', 'CONTENT_TYPE_OTHER', 'CONTENT_TYPE_LINK', 'CONTENT_TYPE_ACCOUNT', 'CONTENT_TYPE_AD', 'CONTENT_TYPE_STICKER', 'CONTENT_TYPE_HASHTAG', 'DECISION_VISIBILITY_CONTENT_REMOVED', 'DECISION_VISIBILITY_CONTENT_DISABLED', 'DECISION_VISIBILITY_CONTENT_DEMOTED', 'DECISION_VISIBILITY_CONTENT_AGE_RESTRICTED', 'DECISION_VISIBILITY_CONTENT_INTERACTION_RESTRICTED', 'DECISION_VISIBILITY_CONTENT_LABELLED', 'DECISION_VISIBILITY_OTHER', 'STATEMENT_CATEGORY_ANIMAL_WELFARE', 'STATEMENT_CATEGORY_DATA_PROTECTION_AND_PRIVACY_VIOLATIONS', 'STATEMENT_CATEGORY_ILLEGAL_OR_HARMFUL_SPEECH', 'STATEMENT_CATEGORY_INTELLECTUAL_PROPERTY_INFRINGEMENTS', 'STATEMENT_CATEGORY_NEGATIVE_EFFECTS_ON_CIVIC_DISCOURSE_OR_ELECTIONS', 'STATEMENT_CATEGORY_NON_CONSENSUAL_BEHAVIOUR', 'STATEMENT_CATEGORY_PORNOGRAPHY_OR_SEXUALIZED_CONTENT', 'STATEMENT_CATEGORY_PROTECTION_OF_MINORS', 'STATEMENT_CATEGORY_RISK_FOR_PUBLIC_SECURITY', 'STATEMENT_CATEGORY_SCAMS_AND_FRAUD', 'STATEMENT_CATEGORY_SELF_HARM', 'STATEMENT_CATEGORY_SCOPE_OF_PLATFORM_SERVICE', 'STATEMENT_CATEGORY_UNSAFE_AND_ILLEGAL_PRODUCTS', 'STATEMENT_CATEGORY_VIOLENCE', 'KEYWORD_ANIMAL_HARM', 'KEYWORD_ADULT_SEXUAL_MATERIAL', 'KEYWORD_AGE_SPECIFIC_RESTRICTIONS_MINORS', 'KEYWORD_AGE_SPECIFIC_RESTRICTIONS', 'KEYWORD_BIOMETRIC_DATA_BREACH', 'KEYWORD_CHILD_SEXUAL_ABUSE_MATERIAL', 'KEYWORD_CONTENT_PROMOTING_EATING_DISORDERS', 'KEYWORD_COORDINATED_HARM', 'KEYWORD_COPYRIGHT_INFRINGEMENT', 'KEYWORD_DANGEROUS_TOYS', 'KEYWORD_DATA_FALSIFICATION', 'KEYWORD_DEFAMATION', 'KEYWORD_DESIGN_INFRINGEMENT', 'KEYWORD_DISCRIMINATION', 'KEYWORD_DISINFORMATION', 'KEYWORD_FOREIGN_INFORMATION_MANIPULATION', 'KEYWORD_GENDER_BASED_VIOLENCE', 'KEYWORD_GEOGRAPHIC_INDICATIONS_INFRINGEMENT', 'KEYWORD_GEOGRAPHICAL_REQUIREMENTS', 'KEYWORD_GOODS_SERVICES_NOT_PERMITTED', 'KEYWORD_GROOMING_SEXUAL_ENTICEMENT_MINORS', 'KEYWORD_HATE_SPEECH', 'KEYWORD_HUMAN_EXPLOITATION', 'KEYWORD_HUMAN_TRAFFICKING', 'KEYWORD_ILLEGAL_ORGANIZATIONS', 'KEYWORD_IMAGE_BASED_SEXUAL_ABUSE', 'KEYWORD_IMPERSONATION_ACCOUNT_HIJACKING', 'KEYWORD_INAUTHENTIC_ACCOUNTS', 'KEYWORD_INAUTHENTIC_LISTINGS', 'KEYWORD_INAUTHENTIC_USER_REVIEWS', 'KEYWORD_INCITEMENT_VIOLENCE_HATRED', 'KEYWORD_INSUFFICIENT_INFORMATION_TRADERS', 'KEYWORD_LANGUAGE_REQUIREMENTS', 'KEYWORD_MISINFORMATION', 'KEYWORD_MISSING_PROCESSING_GROUND', 'KEYWORD_NON_CONSENSUAL_IMAGE_SHARING', 'KEYWORD_NON_CONSENSUAL_ITEMS_DEEPFAKE', 'KEYWORD_NUDITY', 'KEYWORD_ONLINE_BULLYING_INTIMIDATION', 'KEYWORD_PATENT_INFRINGEMENT', 'KEYWORD_PHISHING', 'KEYWORD_PYRAMID_SCHEMES', 'KEYWORD_REGULATED_GOODS_SERVICES', 'KEYWORD_RIGHT_TO_BE_FORGOTTEN', 'KEYWORD_RISK_ENVIRONMENTAL_DAMAGE', 'KEYWORD_RISK_PUBLIC_HEALTH', 'KEYWORD_SELF_MUTILATION', 'KEYWORD_STALKING', 'KEYWORD_SUICIDE', 'KEYWORD_TERRORIST_CONTENT', 'KEYWORD_TRADE_SECRET_INFRINGEMENT', 'KEYWORD_TRADEMARK_INFRINGEMENT', 'KEYWORD_UNLAWFUL_SALE_ANIMALS', 'KEYWORD_UNSAFE_CHALLENGES', 'KEYWORD_OTHER'], compute_time_to_action: bool = False, compute_restriction_duration: bool = False, normalize_platform_name: bool = False, normalize_content_type_other: bool = False)

Bases: BaseModel

The base models to validate the load file arguments.

class Config

Bases: object

use_enum_values = True
columns_datetime: List[TDB_datetimeColumns] | None
columns_to_fill_bool: List[RawAndExplodedColumn]
columns_to_fill_str: List[RawAndExplodedColumn]
columns_to_import: List[TDB_columnsFull] | None
compute_restriction_duration: bool
compute_time_to_action: bool
content_date_range: List[datetime] | None
created_at_date_range: List[datetime] | None
decision_date_range: List[datetime] | None
del_original: bool
dump_files_pattern: str | List
explode_cols: bool
fillna_bool: bool | None
fillna_str: str | None
input_format: TDB_chunkFormat
model_config: ClassVar[ConfigDict] = {'use_enum_values': True}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

normalize_content_type_other: bool
normalize_platform_name: bool
class dsa_tdb.types.PreprocessArguments(*, dump_files_folder: str, version: TDB_dailyDumpsVersion, platform: str, check_sha1: bool = True, force_sha1: bool = False, from_date: date | None = None, to_date: date | None = None, n_processes: int = 1, do_chunking: bool = False, platforms_to_exclude: List[str] | None = None, chunk_size: int = 1000000, chunk_format: TDB_chunkFormat = TDB_chunkFormat.csv, delete_original: bool = False, loglevel: int = 20, override_chunked_subfolder: str = 'daily_dumps_chunked', raise_on_error: bool = True)

Bases: BaseModel

The base models to validate the download arguments.

class Config

Bases: object

use_enum_values = True
check_sha1: bool
chunk_format: TDB_chunkFormat
chunk_size: int
delete_original: bool
do_chunking: bool
dump_files_folder: str
force_sha1: bool
from_date: date | None
loglevel: int
model_config: ClassVar[ConfigDict] = {'use_enum_values': True}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

n_processes: int
override_chunked_subfolder: str
platform: str
platforms_to_exclude: List[str] | None
raise_on_error: bool
to_date: date | None
version: TDB_dailyDumpsVersion
class dsa_tdb.types.RawAndExplodedColumn(value, names=<not given>, *values, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: Enum

CONTENT_TYPE_ACCOUNT = 'CONTENT_TYPE_ACCOUNT'
CONTENT_TYPE_AD = 'CONTENT_TYPE_AD'
CONTENT_TYPE_APP = 'CONTENT_TYPE_APP'
CONTENT_TYPE_AUDIO = 'CONTENT_TYPE_AUDIO'
CONTENT_TYPE_HASHTAG = 'CONTENT_TYPE_HASHTAG'
CONTENT_TYPE_IMAGE = 'CONTENT_TYPE_IMAGE'
CONTENT_TYPE_OTHER = 'CONTENT_TYPE_OTHER'
CONTENT_TYPE_PRODUCT = 'CONTENT_TYPE_PRODUCT'
CONTENT_TYPE_STICKER = 'CONTENT_TYPE_STICKER'
CONTENT_TYPE_SYNTHETIC_MEDIA = 'CONTENT_TYPE_SYNTHETIC_MEDIA'
CONTENT_TYPE_TEXT = 'CONTENT_TYPE_TEXT'
CONTENT_TYPE_VIDEO = 'CONTENT_TYPE_VIDEO'
DECISION_ACCOUNT_SUSPENDED = 'DECISION_ACCOUNT_SUSPENDED'
DECISION_ACCOUNT_TERMINATED = 'DECISION_ACCOUNT_TERMINATED'
DECISION_MONETARY_OTHER = 'DECISION_MONETARY_OTHER'
DECISION_MONETARY_SUSPENSION = 'DECISION_MONETARY_SUSPENSION'
DECISION_MONETARY_TERMINATION = 'DECISION_MONETARY_TERMINATION'
DECISION_PROVISION_PARTIAL_SUSPENSION = 'DECISION_PROVISION_PARTIAL_SUSPENSION'
DECISION_PROVISION_PARTIAL_TERMINATION = 'DECISION_PROVISION_PARTIAL_TERMINATION'
DECISION_PROVISION_TOTAL_SUSPENSION = 'DECISION_PROVISION_TOTAL_SUSPENSION'
DECISION_PROVISION_TOTAL_TERMINATION = 'DECISION_PROVISION_TOTAL_TERMINATION'
DECISION_VISIBILITY_CONTENT_AGE_RESTRICTED = 'DECISION_VISIBILITY_CONTENT_AGE_RESTRICTED'
DECISION_VISIBILITY_CONTENT_DEMOTED = 'DECISION_VISIBILITY_CONTENT_DEMOTED'
DECISION_VISIBILITY_CONTENT_DISABLED = 'DECISION_VISIBILITY_CONTENT_DISABLED'
DECISION_VISIBILITY_CONTENT_INTERACTION_RESTRICTED = 'DECISION_VISIBILITY_CONTENT_INTERACTION_RESTRICTED'
DECISION_VISIBILITY_CONTENT_LABELLED = 'DECISION_VISIBILITY_CONTENT_LABELLED'
DECISION_VISIBILITY_CONTENT_REMOVED = 'DECISION_VISIBILITY_CONTENT_REMOVED'
DECISION_VISIBILITY_OTHER = 'DECISION_VISIBILITY_OTHER'
KEYWORD_ADULT_SEXUAL_MATERIAL = 'KEYWORD_ADULT_SEXUAL_MATERIAL'
KEYWORD_AGE_SPECIFIC_RESTRICTIONS = 'KEYWORD_AGE_SPECIFIC_RESTRICTIONS'
KEYWORD_AGE_SPECIFIC_RESTRICTIONS_MINORS = 'KEYWORD_AGE_SPECIFIC_RESTRICTIONS_MINORS'
KEYWORD_ANIMAL_HARM = 'KEYWORD_ANIMAL_HARM'
KEYWORD_BIOMETRIC_DATA_BREACH = 'KEYWORD_BIOMETRIC_DATA_BREACH'
KEYWORD_CHILD_SEXUAL_ABUSE_MATERIAL = 'KEYWORD_CHILD_SEXUAL_ABUSE_MATERIAL'
KEYWORD_CONTENT_PROMOTING_EATING_DISORDERS = 'KEYWORD_CONTENT_PROMOTING_EATING_DISORDERS'
KEYWORD_COORDINATED_HARM = 'KEYWORD_COORDINATED_HARM'
KEYWORD_DANGEROUS_TOYS = 'KEYWORD_DANGEROUS_TOYS'
KEYWORD_DATA_FALSIFICATION = 'KEYWORD_DATA_FALSIFICATION'
KEYWORD_DEFAMATION = 'KEYWORD_DEFAMATION'
KEYWORD_DESIGN_INFRINGEMENT = 'KEYWORD_DESIGN_INFRINGEMENT'
KEYWORD_DISCRIMINATION = 'KEYWORD_DISCRIMINATION'
KEYWORD_DISINFORMATION = 'KEYWORD_DISINFORMATION'
KEYWORD_FOREIGN_INFORMATION_MANIPULATION = 'KEYWORD_FOREIGN_INFORMATION_MANIPULATION'
KEYWORD_GENDER_BASED_VIOLENCE = 'KEYWORD_GENDER_BASED_VIOLENCE'
KEYWORD_GEOGRAPHICAL_REQUIREMENTS = 'KEYWORD_GEOGRAPHICAL_REQUIREMENTS'
KEYWORD_GEOGRAPHIC_INDICATIONS_INFRINGEMENT = 'KEYWORD_GEOGRAPHIC_INDICATIONS_INFRINGEMENT'
KEYWORD_GOODS_SERVICES_NOT_PERMITTED = 'KEYWORD_GOODS_SERVICES_NOT_PERMITTED'
KEYWORD_GROOMING_SEXUAL_ENTICEMENT_MINORS = 'KEYWORD_GROOMING_SEXUAL_ENTICEMENT_MINORS'
KEYWORD_HATE_SPEECH = 'KEYWORD_HATE_SPEECH'
KEYWORD_HUMAN_EXPLOITATION = 'KEYWORD_HUMAN_EXPLOITATION'
KEYWORD_HUMAN_TRAFFICKING = 'KEYWORD_HUMAN_TRAFFICKING'
KEYWORD_ILLEGAL_ORGANIZATIONS = 'KEYWORD_ILLEGAL_ORGANIZATIONS'
KEYWORD_IMAGE_BASED_SEXUAL_ABUSE = 'KEYWORD_IMAGE_BASED_SEXUAL_ABUSE'
KEYWORD_IMPERSONATION_ACCOUNT_HIJACKING = 'KEYWORD_IMPERSONATION_ACCOUNT_HIJACKING'
KEYWORD_INAUTHENTIC_ACCOUNTS = 'KEYWORD_INAUTHENTIC_ACCOUNTS'
KEYWORD_INAUTHENTIC_LISTINGS = 'KEYWORD_INAUTHENTIC_LISTINGS'
KEYWORD_INAUTHENTIC_USER_REVIEWS = 'KEYWORD_INAUTHENTIC_USER_REVIEWS'
KEYWORD_INCITEMENT_VIOLENCE_HATRED = 'KEYWORD_INCITEMENT_VIOLENCE_HATRED'
KEYWORD_INSUFFICIENT_INFORMATION_TRADERS = 'KEYWORD_INSUFFICIENT_INFORMATION_TRADERS'
KEYWORD_LANGUAGE_REQUIREMENTS = 'KEYWORD_LANGUAGE_REQUIREMENTS'
KEYWORD_MISINFORMATION = 'KEYWORD_MISINFORMATION'
KEYWORD_MISSING_PROCESSING_GROUND = 'KEYWORD_MISSING_PROCESSING_GROUND'
KEYWORD_NON_CONSENSUAL_IMAGE_SHARING = 'KEYWORD_NON_CONSENSUAL_IMAGE_SHARING'
KEYWORD_NON_CONSENSUAL_ITEMS_DEEPFAKE = 'KEYWORD_NON_CONSENSUAL_ITEMS_DEEPFAKE'
KEYWORD_NUDITY = 'KEYWORD_NUDITY'
KEYWORD_ONLINE_BULLYING_INTIMIDATION = 'KEYWORD_ONLINE_BULLYING_INTIMIDATION'
KEYWORD_OTHER = 'KEYWORD_OTHER'
KEYWORD_PATENT_INFRINGEMENT = 'KEYWORD_PATENT_INFRINGEMENT'
KEYWORD_PHISHING = 'KEYWORD_PHISHING'
KEYWORD_PYRAMID_SCHEMES = 'KEYWORD_PYRAMID_SCHEMES'
KEYWORD_REGULATED_GOODS_SERVICES = 'KEYWORD_REGULATED_GOODS_SERVICES'
KEYWORD_RIGHT_TO_BE_FORGOTTEN = 'KEYWORD_RIGHT_TO_BE_FORGOTTEN'
KEYWORD_RISK_ENVIRONMENTAL_DAMAGE = 'KEYWORD_RISK_ENVIRONMENTAL_DAMAGE'
KEYWORD_RISK_PUBLIC_HEALTH = 'KEYWORD_RISK_PUBLIC_HEALTH'
KEYWORD_SELF_MUTILATION = 'KEYWORD_SELF_MUTILATION'
KEYWORD_STALKING = 'KEYWORD_STALKING'
KEYWORD_SUICIDE = 'KEYWORD_SUICIDE'
KEYWORD_TERRORIST_CONTENT = 'KEYWORD_TERRORIST_CONTENT'
KEYWORD_TRADEMARK_INFRINGEMENT = 'KEYWORD_TRADEMARK_INFRINGEMENT'
KEYWORD_TRADE_SECRET_INFRINGEMENT = 'KEYWORD_TRADE_SECRET_INFRINGEMENT'
KEYWORD_UNLAWFUL_SALE_ANIMALS = 'KEYWORD_UNLAWFUL_SALE_ANIMALS'
KEYWORD_UNSAFE_CHALLENGES = 'KEYWORD_UNSAFE_CHALLENGES'
STATEMENT_CATEGORY_ANIMAL_WELFARE = 'STATEMENT_CATEGORY_ANIMAL_WELFARE'
STATEMENT_CATEGORY_DATA_PROTECTION_AND_PRIVACY_VIOLATIONS = 'STATEMENT_CATEGORY_DATA_PROTECTION_AND_PRIVACY_VIOLATIONS'
STATEMENT_CATEGORY_ILLEGAL_OR_HARMFUL_SPEECH = 'STATEMENT_CATEGORY_ILLEGAL_OR_HARMFUL_SPEECH'
STATEMENT_CATEGORY_INTELLECTUAL_PROPERTY_INFRINGEMENTS = 'STATEMENT_CATEGORY_INTELLECTUAL_PROPERTY_INFRINGEMENTS'
STATEMENT_CATEGORY_NEGATIVE_EFFECTS_ON_CIVIC_DISCOURSE_OR_ELECTIONS = 'STATEMENT_CATEGORY_NEGATIVE_EFFECTS_ON_CIVIC_DISCOURSE_OR_ELECTIONS'
STATEMENT_CATEGORY_NON_CONSENSUAL_BEHAVIOUR = 'STATEMENT_CATEGORY_NON_CONSENSUAL_BEHAVIOUR'
STATEMENT_CATEGORY_PORNOGRAPHY_OR_SEXUALIZED_CONTENT = 'STATEMENT_CATEGORY_PORNOGRAPHY_OR_SEXUALIZED_CONTENT'
STATEMENT_CATEGORY_PROTECTION_OF_MINORS = 'STATEMENT_CATEGORY_PROTECTION_OF_MINORS'
STATEMENT_CATEGORY_RISK_FOR_PUBLIC_SECURITY = 'STATEMENT_CATEGORY_RISK_FOR_PUBLIC_SECURITY'
STATEMENT_CATEGORY_SCAMS_AND_FRAUD = 'STATEMENT_CATEGORY_SCAMS_AND_FRAUD'
STATEMENT_CATEGORY_SCOPE_OF_PLATFORM_SERVICE = 'STATEMENT_CATEGORY_SCOPE_OF_PLATFORM_SERVICE'
STATEMENT_CATEGORY_SELF_HARM = 'STATEMENT_CATEGORY_SELF_HARM'
STATEMENT_CATEGORY_UNSAFE_AND_ILLEGAL_PRODUCTS = 'STATEMENT_CATEGORY_UNSAFE_AND_ILLEGAL_PRODUCTS'
STATEMENT_CATEGORY_VIOLENCE = 'STATEMENT_CATEGORY_VIOLENCE'
account_type = 'account_type'
application_date = 'application_date'
automated_decision = 'automated_decision'
automated_detection = 'automated_detection'
category = 'category'
category_addition = 'category_addition'
category_specification = 'category_specification'
category_specification_other = 'category_specification_other'
content_date = 'content_date'
content_language = 'content_language'
content_type = 'content_type'
content_type_other = 'content_type_other'
created_at = 'created_at'
decision_account = 'decision_account'
decision_facts = 'decision_facts'
decision_ground = 'decision_ground'
decision_ground_reference_url = 'decision_ground_reference_url'
decision_monetary = 'decision_monetary'
decision_monetary_other = 'decision_monetary_other'
decision_provision = 'decision_provision'
decision_visibility = 'decision_visibility'
decision_visibility_other = 'decision_visibility_other'
end_date_account_restriction = 'end_date_account_restriction'
end_date_monetary_restriction = 'end_date_monetary_restriction'
end_date_service_restriction = 'end_date_service_restriction'
end_date_visibility_restriction = 'end_date_visibility_restriction'
illegal_content_explanation = 'illegal_content_explanation'
incompatible_content_explanation = 'incompatible_content_explanation'
incompatible_content_ground = 'incompatible_content_ground'
incompatible_content_illegal = 'incompatible_content_illegal'
platform_name = 'platform_name'
platform_uid = 'platform_uid'
source_identity = 'source_identity'
source_type = 'source_type'
territorial_scope = 'territorial_scope'
uuid = 'uuid'
class dsa_tdb.types.RawAndExplodedColumns(*, columns: List[RawAndExplodedColumn])

Bases: BaseModel

The base models to validate the columns found when importing and/or exploding.

class Config

Bases: object

use_enum_values = True
columns: List[RawAndExplodedColumn]
model_config: ClassVar[ConfigDict] = {'use_enum_values': True}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

class dsa_tdb.types.SourceType(value, names=<not given>, *values, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: StrEnum

Enum of the source type.

SOURCE_ARTICLE_16 = 'SOURCE_ARTICLE_16'
SOURCE_TRUSTED_FLAGGER = 'SOURCE_TRUSTED_FLAGGER'
SOURCE_TYPE_OTHER_NOTIFICATION = 'SOURCE_TYPE_OTHER_NOTIFICATION'
SOURCE_VOLUNTARY = 'SOURCE_VOLUNTARY'
class dsa_tdb.types.TDB_agg_data_folder_prefix(value, names=<not given>, *values, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: StrEnum

Enum of the aggregated files available formats’ folder prefix.

csv = 'csv'
parquet = 'pqt'
class dsa_tdb.types.TDB_agg_data_format(value, names=<not given>, *values, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: StrEnum

Enum of the aggregated files available formats.

csv = 'csv'
parquet = 'parquet'
class dsa_tdb.types.TDB_agg_data_versions(value, names=<not given>, *values, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: StrEnum

Enum of the aggregated files available versions.

complete = 'complete'
simple = 'simple'
class dsa_tdb.types.TDB_chunkFormat(value, names=<not given>, *values, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: StrEnum

Enum of the formats of the chunks.

csv = 'csv'
parquet = 'parquet'
pickle = 'pickle'
class dsa_tdb.types.TDB_columnsFull(value, names=<not given>, *values, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: StrEnum

Enum of all columns in the DSA Transparency Database and full daily dumps.

account_type = 'account_type'
application_date = 'application_date'
automated_decision = 'automated_decision'
automated_detection = 'automated_detection'
category = 'category'
category_addition = 'category_addition'
category_specification = 'category_specification'
category_specification_other = 'category_specification_other'
content_date = 'content_date'
content_language = 'content_language'
content_type = 'content_type'
content_type_other = 'content_type_other'
created_at = 'created_at'
decision_account = 'decision_account'
decision_facts = 'decision_facts'
decision_ground = 'decision_ground'
decision_ground_reference_url = 'decision_ground_reference_url'
decision_monetary = 'decision_monetary'
decision_monetary_other = 'decision_monetary_other'
decision_provision = 'decision_provision'
decision_visibility = 'decision_visibility'
decision_visibility_other = 'decision_visibility_other'
end_date_account_restriction = 'end_date_account_restriction'
end_date_monetary_restriction = 'end_date_monetary_restriction'
end_date_service_restriction = 'end_date_service_restriction'
end_date_visibility_restriction = 'end_date_visibility_restriction'
illegal_content_explanation = 'illegal_content_explanation'
incompatible_content_explanation = 'incompatible_content_explanation'
incompatible_content_ground = 'incompatible_content_ground'
incompatible_content_illegal = 'incompatible_content_illegal'
platform_name = 'platform_name'
platform_uid = 'platform_uid'
source_identity = 'source_identity'
source_type = 'source_type'
territorial_scope = 'territorial_scope'
uuid = 'uuid'
class dsa_tdb.types.TDB_columnsLight(value, names=<not given>, *values, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: StrEnum

Enum of all columns in the light daily dump files.

account_type = 'account_type'
application_date = 'application_date'
automated_decision = 'automated_decision'
automated_detection = 'automated_detection'
category = 'category'
category_addition = 'category_addition'
category_specification = 'category_specification'
category_specification_other = 'category_specification_other'
content_date = 'content_date'
content_language = 'content_language'
content_type = 'content_type'
content_type_other = 'content_type_other'
created_at = 'created_at'
decision_account = 'decision_account'
decision_ground = 'decision_ground'
decision_ground_reference_url = 'decision_ground_reference_url'
decision_monetary = 'decision_monetary'
decision_monetary_other = 'decision_monetary_other'
decision_provision = 'decision_provision'
decision_visibility = 'decision_visibility'
decision_visibility_other = 'decision_visibility_other'
end_date_account_restriction = 'end_date_account_restriction'
end_date_monetary_restriction = 'end_date_monetary_restriction'
end_date_service_restriction = 'end_date_service_restriction'
end_date_visibility_restriction = 'end_date_visibility_restriction'
incompatible_content_ground = 'incompatible_content_ground'
incompatible_content_illegal = 'incompatible_content_illegal'
platform_name = 'platform_name'
platform_uid = 'platform_uid'
source_identity = 'source_identity'
source_type = 'source_type'
uuid = 'uuid'
class dsa_tdb.types.TDB_dailyDumpsVersion(value, names=<not given>, *values, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: StrEnum

Enum of the versions of the daily dumps.

full = 'full'
light = 'light'
class dsa_tdb.types.TDB_datetimeColumns(value, names=<not given>, *values, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: StrEnum

Enum of all datetime columns in the DSA Transparency Database and full daily dumps.

application_date = 'application_date'
content_date = 'content_date'
created_at = 'created_at'
end_date_account_restriction = 'end_date_account_restriction'
end_date_monetary_restriction = 'end_date_monetary_restriction'
end_date_service_restriction = 'end_date_service_restriction'
end_date_visibility_restriction = 'end_date_visibility_restriction'
class dsa_tdb.types.TDB_freetextColumns(value, names=<not given>, *values, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: StrEnum

category_specification_other = 'category_specification_other'
content_type_other = 'content_type_other'
decision_facts = 'decision_facts'
decision_monetary_other = 'decision_monetary_other'
decision_visibility_other = 'decision_visibility_other'
illegal_content_explanation = 'illegal_content_explanation'
incompatible_content_explanation = 'incompatible_content_explanation'
class dsa_tdb.types.TerritorialScope(value, names=<not given>, *values, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: StrEnum

Enum of the territorial scope.

AT = 'AT'
BE = 'BE'
BG = 'BG'
CY = 'CY'
CZ = 'CZ'
DE = 'DE'
DK = 'DK'
EE = 'EE'
EEA = 'EEA'
EEA_no_IS = 'EEA_no_IS'
ES = 'ES'
EU = 'EU'
FI = 'FI'
FR = 'FR'
GR = 'GR'
HR = 'HR'
HU = 'HU'
IE = 'IE'
IS = 'IS'
IT = 'IT'
LI = 'LI'
LT = 'LT'
LU = 'LU'
LV = 'LV'
MT = 'MT'
NL = 'NL'
NO = 'NO'
PL = 'PL'
PT = 'PT'
RO = 'RO'
SE = 'SE'
SI = 'SI'
SK = 'SK'
class dsa_tdb.types.UseColumns(*, columns: List[TDB_columnsFull])

Bases: BaseModel

The base models to validate columns to use.

class Config

Bases: object

use_enum_values = True
columns: List[TDB_columnsFull]
model_config: ClassVar[ConfigDict] = {'use_enum_values': True}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

dsa_tdb.types.all_columns = ['uuid', 'decision_visibility', 'decision_visibility_other', 'end_date_visibility_restriction', 'decision_monetary', 'decision_monetary_other', 'end_date_monetary_restriction', 'decision_provision', 'end_date_service_restriction', 'decision_account', 'end_date_account_restriction', 'account_type', 'decision_ground', 'decision_ground_reference_url', 'illegal_content_legal_ground', 'illegal_content_explanation', 'incompatible_content_ground', 'incompatible_content_explanation', 'incompatible_content_illegal', 'category', 'category_addition', 'category_specification', 'category_specification_other', 'content_type', 'content_type_other', 'content_language', 'content_date', 'territorial_scope', 'application_date', 'decision_facts', 'source_type', 'source_identity', 'automated_detection', 'automated_decision', 'platform_name', 'platform_uid', 'created_at']

The list of all the columns in the DSA Transparency Database and full daily dumps.

dsa_tdb.types.all_columns_light = ['uuid', 'decision_visibility', 'decision_visibility_other', 'end_date_visibility_restriction', 'decision_monetary', 'decision_monetary_other', 'end_date_monetary_restriction', 'decision_provision', 'end_date_service_restriction', 'decision_account', 'end_date_account_restriction', 'account_type', 'decision_ground', 'decision_ground_reference_url', 'illegal_content_legal_ground', 'incompatible_content_ground', 'incompatible_content_illegal', 'category', 'category_addition', 'category_specification', 'category_specification_other', 'content_type', 'content_type_other', 'content_language', 'content_date', 'application_date', 'source_type', 'source_identity', 'automated_detection', 'automated_decision', 'platform_name', 'platform_uid', 'created_at']

The list of all the columns in the daily light dump files. These are the same columns as the full file but the {‘decision_facts’, ‘illegal_content_explanation’, ‘incompatible_content_explanation’, ‘territorial_scope’} columns.

dsa_tdb.types.columns_common_prefixes = {'category': 'STATEMENT_CATEGORY_', 'category_addition': 'STATEMENT_CATEGORY_', 'category_specification': 'KEYWORD_', 'content_type': 'CONTENT_TYPE_', 'decision_visibility': 'DECISION_VISIBILITY_'}

The common prefixes to remove from the columns having multiple values.

dsa_tdb.types.columns_to_explode = {'category_addition': ['STATEMENT_CATEGORY_ANIMAL_WELFARE', 'STATEMENT_CATEGORY_DATA_PROTECTION_AND_PRIVACY_VIOLATIONS', 'STATEMENT_CATEGORY_ILLEGAL_OR_HARMFUL_SPEECH', 'STATEMENT_CATEGORY_INTELLECTUAL_PROPERTY_INFRINGEMENTS', 'STATEMENT_CATEGORY_NEGATIVE_EFFECTS_ON_CIVIC_DISCOURSE_OR_ELECTIONS', 'STATEMENT_CATEGORY_NON_CONSENSUAL_BEHAVIOUR', 'STATEMENT_CATEGORY_PORNOGRAPHY_OR_SEXUALIZED_CONTENT', 'STATEMENT_CATEGORY_PROTECTION_OF_MINORS', 'STATEMENT_CATEGORY_RISK_FOR_PUBLIC_SECURITY', 'STATEMENT_CATEGORY_SCAMS_AND_FRAUD', 'STATEMENT_CATEGORY_SELF_HARM', 'STATEMENT_CATEGORY_SCOPE_OF_PLATFORM_SERVICE', 'STATEMENT_CATEGORY_UNSAFE_AND_ILLEGAL_PRODUCTS', 'STATEMENT_CATEGORY_VIOLENCE'], 'category_specification': ['KEYWORD_ANIMAL_HARM', 'KEYWORD_ADULT_SEXUAL_MATERIAL', 'KEYWORD_AGE_SPECIFIC_RESTRICTIONS_MINORS', 'KEYWORD_AGE_SPECIFIC_RESTRICTIONS', 'KEYWORD_BIOMETRIC_DATA_BREACH', 'KEYWORD_CHILD_SEXUAL_ABUSE_MATERIAL', 'KEYWORD_CONTENT_PROMOTING_EATING_DISORDERS', 'KEYWORD_COORDINATED_HARM', 'KEYWORD_COPYRIGHT_INFRINGEMENT', 'KEYWORD_DANGEROUS_TOYS', 'KEYWORD_DATA_FALSIFICATION', 'KEYWORD_DEFAMATION', 'KEYWORD_DESIGN_INFRINGEMENT', 'KEYWORD_DISCRIMINATION', 'KEYWORD_DISINFORMATION', 'KEYWORD_FOREIGN_INFORMATION_MANIPULATION', 'KEYWORD_GENDER_BASED_VIOLENCE', 'KEYWORD_GEOGRAPHIC_INDICATIONS_INFRINGEMENT', 'KEYWORD_GEOGRAPHICAL_REQUIREMENTS', 'KEYWORD_GOODS_SERVICES_NOT_PERMITTED', 'KEYWORD_GROOMING_SEXUAL_ENTICEMENT_MINORS', 'KEYWORD_HATE_SPEECH', 'KEYWORD_HUMAN_EXPLOITATION', 'KEYWORD_HUMAN_TRAFFICKING', 'KEYWORD_ILLEGAL_ORGANIZATIONS', 'KEYWORD_IMAGE_BASED_SEXUAL_ABUSE', 'KEYWORD_IMPERSONATION_ACCOUNT_HIJACKING', 'KEYWORD_INAUTHENTIC_ACCOUNTS', 'KEYWORD_INAUTHENTIC_LISTINGS', 'KEYWORD_INAUTHENTIC_USER_REVIEWS', 'KEYWORD_INCITEMENT_VIOLENCE_HATRED', 'KEYWORD_INSUFFICIENT_INFORMATION_TRADERS', 'KEYWORD_LANGUAGE_REQUIREMENTS', 'KEYWORD_MISINFORMATION', 'KEYWORD_MISSING_PROCESSING_GROUND', 'KEYWORD_NON_CONSENSUAL_IMAGE_SHARING', 'KEYWORD_NON_CONSENSUAL_ITEMS_DEEPFAKE', 'KEYWORD_NUDITY', 'KEYWORD_ONLINE_BULLYING_INTIMIDATION', 'KEYWORD_PATENT_INFRINGEMENT', 'KEYWORD_PHISHING', 'KEYWORD_PYRAMID_SCHEMES', 'KEYWORD_REGULATED_GOODS_SERVICES', 'KEYWORD_RIGHT_TO_BE_FORGOTTEN', 'KEYWORD_RISK_ENVIRONMENTAL_DAMAGE', 'KEYWORD_RISK_PUBLIC_HEALTH', 'KEYWORD_SELF_MUTILATION', 'KEYWORD_STALKING', 'KEYWORD_SUICIDE', 'KEYWORD_TERRORIST_CONTENT', 'KEYWORD_TRADE_SECRET_INFRINGEMENT', 'KEYWORD_TRADEMARK_INFRINGEMENT', 'KEYWORD_UNLAWFUL_SALE_ANIMALS', 'KEYWORD_UNSAFE_CHALLENGES', 'KEYWORD_OTHER'], 'content_type': ['CONTENT_TYPE_APP', 'CONTENT_TYPE_AUDIO', 'CONTENT_TYPE_IMAGE', 'CONTENT_TYPE_PRODUCT', 'CONTENT_TYPE_SYNTHETIC_MEDIA', 'CONTENT_TYPE_TEXT', 'CONTENT_TYPE_VIDEO', 'CONTENT_TYPE_OTHER', 'CONTENT_TYPE_LINK', 'CONTENT_TYPE_ACCOUNT', 'CONTENT_TYPE_AD', 'CONTENT_TYPE_STICKER', 'CONTENT_TYPE_HASHTAG'], 'decision_visibility': ['DECISION_VISIBILITY_CONTENT_REMOVED', 'DECISION_VISIBILITY_CONTENT_DISABLED', 'DECISION_VISIBILITY_CONTENT_DEMOTED', 'DECISION_VISIBILITY_CONTENT_AGE_RESTRICTED', 'DECISION_VISIBILITY_CONTENT_INTERACTION_RESTRICTED', 'DECISION_VISIBILITY_CONTENT_LABELLED', 'DECISION_VISIBILITY_OTHER']}

The dictionary reporting the name of the columns and the possible values it can feature.

dsa_tdb.types.datetime_columns = ['content_date', 'application_date', 'created_at', 'end_date_account_restriction', 'end_date_monetary_restriction', 'end_date_service_restriction', 'end_date_visibility_restriction']

The list of the columns containing datetime values.

dsa_tdb.types.datetime_format = 'ISO8601'

The format of the datetime values.

dsa_tdb.types.datetime_format_strftime = '%Y-%m-%d %H:%M:%S'

The format of the datetime values for the strftime method.

dsa_tdb.types.territorial_scopes = {'EEA': {'AT', 'BE', 'BG', 'CY', 'CZ', 'DE', 'DK', 'EE', 'ES', 'FI', 'FR', 'GR', 'HR', 'HU', 'IE', 'IS', 'IT', 'LI', 'LT', 'LU', 'LV', 'MT', 'NL', 'NO', 'PL', 'PT', 'RO', 'SE', 'SI', 'SK'}, 'EEA_no_IS': {'AT', 'BE', 'BG', 'CY', 'CZ', 'DE', 'DK', 'EE', 'ES', 'FI', 'FR', 'GR', 'HR', 'HU', 'IE', 'IT', 'LI', 'LT', 'LU', 'LV', 'MT', 'NL', 'NO', 'PL', 'PT', 'RO', 'SE', 'SI', 'SK'}, 'EU': {'AT', 'BE', 'BG', 'CY', 'CZ', 'DE', 'DK', 'EE', 'ES', 'FI', 'FR', 'GR', 'HR', 'HU', 'IE', 'IT', 'LT', 'LU', 'LV', 'MT', 'NL', 'PL', 'PT', 'RO', 'SE', 'SI', 'SK'}}

The dictionary reporting the name of the territorial scope and the set of countries it matches. TODO Must be ported to a pydantic type.