Version: 0.17.23
SparkAzureBlobStorageDatasource
-
class great_expectations.datasource.fluent.SparkAzureBlobStorageDatasource(*,
type: Literal['spark_abs'] = 'spark_abs',
name: str,
id: Optional[uuid.UUID] = None,
assets: List[Union[great_expectations.datasource.fluent.spark_file_path_datasource.CSVAsset, great_expectations.datasource.fluent.spark_file_path_datasource.DirectoryCSVAsset, great_expectations.datasource.fluent.spark_file_path_datasource.ParquetAsset, great_expectations.datasource.fluent.spark_file_path_datasource.DirectoryParquetAsset, great_expectations.datasource.fluent.spark_file_path_datasource.ORCAsset, great_expectations.datasource.fluent.spark_file_path_datasource.DirectoryORCAsset, great_expectations.datasource.fluent.spark_file_path_datasource.JSONAsset, great_expectations.datasource.fluent.spark_file_path_datasource.DirectoryJSONAsset, great_expectations.datasource.fluent.spark_file_path_datasource.TextAsset, great_expectations.datasource.fluent.spark_file_path_datasource.DirectoryTextAsset, great_expectations.datasource.fluent.spark_file_path_datasource.DeltaAsset, great_expectations.datasource.fluent.spark_file_path_datasource.DirectoryDeltaAsset]] = [],
spark_config: Optional[Dict[pydantic.v1.types.StrictStr, Union[pydantic.v1.types.StrictStr, pydantic.v1.types.StrictInt, pydantic.v1.types.StrictFloat, pydantic.v1.types.StrictBool]]] = None,
force_reuse_spark_context: bool = True,
persist: bool = True,
azure_options: Dict[str, Union[great_expectations.datasource.fluent.config_str.ConfigStr, Any]] = )
-
-
add_csv_asset(name: str,
*,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str, Any] = None,
batching_regex: Pattern = re.compile('.*'),
connect_options: Mapping = None,
splitter: Optional[Union[great_expectations.datasource.fluent.spark_generic_splitters.SplitterColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterMultiColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDividedInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterModInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYear, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonth, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonthAndDay, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDatetimePart]] = None,
pathGlobFilter: Optional[Union[bool, str]] = None,
recursiveFileLookup: Optional[Union[bool, str]] = None,
modifiedBefore: Optional[Union[bool, str]] = None,
modifiedAfter: Optional[Union[bool, str]] = None,
schema: Optional[Union[great_expectations.datasource.fluent.serializable_types.pyspark.SerializableStructType, str]] = None,
sep: Optional[str] = None,
encoding: Optional[str] = None,
quote: Optional[str] = None,
escape: Optional[str] = None,
comment: Optional[str] = None,
header: Optional[Union[bool, str]] = None,
inferSchema: Optional[Union[bool, str]] = None,
ignoreLeadingWhiteSpace: Optional[Union[bool, str]] = None,
ignoreTrailingWhiteSpace: Optional[Union[bool, str]] = None,
nullValue: Optional[str] = None,
nanValue: Optional[str] = None,
positiveInf: Optional[str] = None,
negativeInf: Optional[str] = None,
dateFormat: Optional[str] = None,
timestampFormat: Optional[str] = None,
maxColumns: Optional[Union[int, str]] = None,
maxCharsPerColumn: Optional[Union[int, str]] = None,
maxMalformedLogPerPartition: Optional[Union[int, str]] = None,
mode: Optional[Literal['PERMISSIVE', 'DROPMALFORMED', 'FAILFAST']] = None,
columnNameOfCorruptRecord: Optional[str] = None,
multiLine: Optional[Union[bool, str]] = None,
charToEscapeQuoteEscaping: Optional[str] = None,
samplingRatio: Optional[Union[float, str]] = None,
enforceSchema: Optional[Union[bool, str]] = None,
emptyValue: Optional[str] = None,
locale: Optional[str] = None,
lineSep: Optional[str] = None,
unescapedQuoteHandling: Optional[Literal['STOP_AT_CLOSING_QUOTE', 'BACK_TO_DELIMITER', 'STOP_AT_DELIMITER', 'SKIP_VALUE', 'RAISE_ERROR']] = None)
→
pydantic.BaseModel
-
add_delta_asset(name: str,
*,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str, Any] = None,
batching_regex: Pattern = re.compile('.*'),
connect_options: Mapping = None,
splitter: Optional[Union[great_expectations.datasource.fluent.spark_generic_splitters.SplitterColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterMultiColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDividedInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterModInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYear, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonth, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonthAndDay, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDatetimePart]] = None,
timestampAsOf: Optional[str] = None,
versionAsOf: Optional[str] = None)
→
pydantic.BaseModel
-
add_directory_csv_asset(name: str,
*,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str, Any] = None,
batching_regex: Pattern = re.compile('.*'),
connect_options: Mapping = None,
splitter: Optional[Union[great_expectations.datasource.fluent.spark_generic_splitters.SplitterColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterMultiColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDividedInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterModInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYear, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonth, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonthAndDay, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDatetimePart]] = None,
pathGlobFilter: Optional[Union[bool, str]] = None,
recursiveFileLookup: Optional[Union[bool, str]] = None,
modifiedBefore: Optional[Union[bool, str]] = None,
modifiedAfter: Optional[Union[bool, str]] = None,
schema: Optional[Union[great_expectations.datasource.fluent.serializable_types.pyspark.SerializableStructType, str]] = None,
sep: Optional[str] = None,
encoding: Optional[str] = None,
quote: Optional[str] = None,
escape: Optional[str] = None,
comment: Optional[str] = None,
header: Optional[Union[bool, str]] = None,
inferSchema: Optional[Union[bool, str]] = None,
ignoreLeadingWhiteSpace: Optional[Union[bool, str]] = None,
ignoreTrailingWhiteSpace: Optional[Union[bool, str]] = None,
nullValue: Optional[str] = None,
nanValue: Optional[str] = None,
positiveInf: Optional[str] = None,
negativeInf: Optional[str] = None,
dateFormat: Optional[str] = None,
timestampFormat: Optional[str] = None,
maxColumns: Optional[Union[int, str]] = None,
maxCharsPerColumn: Optional[Union[int, str]] = None,
maxMalformedLogPerPartition: Optional[Union[int, str]] = None,
mode: Optional[Literal['PERMISSIVE', 'DROPMALFORMED', 'FAILFAST']] = None,
columnNameOfCorruptRecord: Optional[str] = None,
multiLine: Optional[Union[bool, str]] = None,
charToEscapeQuoteEscaping: Optional[str] = None,
samplingRatio: Optional[Union[float, str]] = None,
enforceSchema: Optional[Union[bool, str]] = None,
emptyValue: Optional[str] = None,
locale: Optional[str] = None,
lineSep: Optional[str] = None,
unescapedQuoteHandling: Optional[Literal['STOP_AT_CLOSING_QUOTE', 'BACK_TO_DELIMITER', 'STOP_AT_DELIMITER', 'SKIP_VALUE', 'RAISE_ERROR']] = None,
data_directory: pathlib.Path)
→
pydantic.BaseModel
-
add_directory_delta_asset(name: str,
*,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str, Any] = None,
batching_regex: Pattern = re.compile('.*'),
connect_options: Mapping = None,
splitter: Optional[Union[great_expectations.datasource.fluent.spark_generic_splitters.SplitterColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterMultiColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDividedInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterModInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYear, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonth, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonthAndDay, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDatetimePart]] = None,
timestampAsOf: Optional[str] = None,
versionAsOf: Optional[str] = None,
data_directory: pathlib.Path)
→
pydantic.BaseModel
-
add_directory_json_asset(name: str,
*,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str, Any] = None,
batching_regex: Pattern = re.compile('.*'),
connect_options: Mapping = None,
splitter: Optional[Union[great_expectations.datasource.fluent.spark_generic_splitters.SplitterColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterMultiColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDividedInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterModInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYear, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonth, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonthAndDay, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDatetimePart]] = None,
pathGlobFilter: Optional[Union[bool, str]] = None,
recursiveFileLookup: Optional[Union[bool, str]] = None,
modifiedBefore: Optional[Union[bool, str]] = None,
modifiedAfter: Optional[Union[bool, str]] = None,
schema: Optional[Union[great_expectations.datasource.fluent.serializable_types.pyspark.SerializableStructType, str]] = None,
primitivesAsString: Optional[Union[bool, str]] = None,
prefersDecimal: Optional[Union[bool, str]] = None,
allowComments: Optional[Union[bool, str]] = None,
allowUnquotedFieldNames: Optional[Union[bool, str]] = None,
allowSingleQuotes: Optional[Union[bool, str]] = None,
allowNumericLeadingZero: Optional[Union[bool, str]] = None,
allowBackslashEscapingAnyCharacter: Optional[Union[bool, str]] = None,
mode: Optional[Literal['PERMISSIVE', 'DROPMALFORMED', 'FAILFAST']] = None,
columnNameOfCorruptRecord: Optional[str] = None,
dateFormat: Optional[str] = None,
timestampFormat: Optional[str] = None,
multiLine: Optional[Union[bool, str]] = None,
allowUnquotedControlChars: Optional[Union[bool, str]] = None,
lineSep: Optional[str] = None,
samplingRatio: Optional[Union[float, str]] = None,
dropFieldIfAllNull: Optional[Union[bool, str]] = None,
encoding: Optional[str] = None,
locale: Optional[str] = None,
allowNonNumericNumbers: Optional[Union[bool, str]] = None,
data_directory: pathlib.Path)
→
pydantic.BaseModel
-
add_directory_orc_asset(name: str,
*,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str, Any] = None,
batching_regex: Pattern = re.compile('.*'),
connect_options: Mapping = None,
splitter: Optional[Union[great_expectations.datasource.fluent.spark_generic_splitters.SplitterColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterMultiColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDividedInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterModInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYear, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonth, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonthAndDay, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDatetimePart]] = None,
pathGlobFilter: Optional[Union[bool, str]] = None,
recursiveFileLookup: Optional[Union[bool, str]] = None,
modifiedBefore: Optional[Union[bool, str]] = None,
modifiedAfter: Optional[Union[bool, str]] = None,
mergeSchema: Optional[Union[bool, str]] = False,
data_directory: pathlib.Path)
→
pydantic.BaseModel
-
add_directory_parquet_asset(name: str,
*,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str, Any] = None,
batching_regex: Pattern = re.compile('.*'),
connect_options: Mapping = None,
splitter: Optional[Union[great_expectations.datasource.fluent.spark_generic_splitters.SplitterColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterMultiColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDividedInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterModInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYear, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonth, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonthAndDay, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDatetimePart]] = None,
pathGlobFilter: Optional[Union[bool, str]] = None,
recursiveFileLookup: Optional[Union[bool, str]] = None,
modifiedBefore: Optional[Union[bool, str]] = None,
modifiedAfter: Optional[Union[bool, str]] = None,
mergeSchema: Optional[Union[bool, str]] = None,
datetimeRebaseMode: Optional[Literal['EXCEPTION', 'CORRECTED', 'LEGACY']] = None,
int96RebaseMode: Optional[Literal['EXCEPTION', 'CORRECTED', 'LEGACY']] = None,
data_directory: pathlib.Path)
→
pydantic.BaseModel
-
add_directory_text_asset(name: str,
*,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str, Any] = None,
batching_regex: Pattern = re.compile('.*'),
connect_options: Mapping = None,
splitter: Optional[Union[great_expectations.datasource.fluent.spark_generic_splitters.SplitterColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterMultiColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDividedInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterModInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYear, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonth, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonthAndDay, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDatetimePart]] = None,
pathGlobFilter: Optional[Union[bool, str]] = None,
recursiveFileLookup: Optional[Union[bool, str]] = None,
modifiedBefore: Optional[Union[bool, str]] = None,
modifiedAfter: Optional[Union[bool, str]] = None,
wholetext: bool = False,
lineSep: Optional[str] = None,
data_directory: pathlib.Path)
→
pydantic.BaseModel
-
add_json_asset(name: str,
*,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str, Any] = None,
batching_regex: Pattern = re.compile('.*'),
connect_options: Mapping = None,
splitter: Optional[Union[great_expectations.datasource.fluent.spark_generic_splitters.SplitterColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterMultiColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDividedInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterModInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYear, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonth, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonthAndDay, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDatetimePart]] = None,
pathGlobFilter: Optional[Union[bool, str]] = None,
recursiveFileLookup: Optional[Union[bool, str]] = None,
modifiedBefore: Optional[Union[bool, str]] = None,
modifiedAfter: Optional[Union[bool, str]] = None,
schema: Optional[Union[great_expectations.datasource.fluent.serializable_types.pyspark.SerializableStructType, str]] = None,
primitivesAsString: Optional[Union[bool, str]] = None,
prefersDecimal: Optional[Union[bool, str]] = None,
allowComments: Optional[Union[bool, str]] = None,
allowUnquotedFieldNames: Optional[Union[bool, str]] = None,
allowSingleQuotes: Optional[Union[bool, str]] = None,
allowNumericLeadingZero: Optional[Union[bool, str]] = None,
allowBackslashEscapingAnyCharacter: Optional[Union[bool, str]] = None,
mode: Optional[Literal['PERMISSIVE', 'DROPMALFORMED', 'FAILFAST']] = None,
columnNameOfCorruptRecord: Optional[str] = None,
dateFormat: Optional[str] = None,
timestampFormat: Optional[str] = None,
multiLine: Optional[Union[bool, str]] = None,
allowUnquotedControlChars: Optional[Union[bool, str]] = None,
lineSep: Optional[str] = None,
samplingRatio: Optional[Union[float, str]] = None,
dropFieldIfAllNull: Optional[Union[bool, str]] = None,
encoding: Optional[str] = None,
locale: Optional[str] = None,
allowNonNumericNumbers: Optional[Union[bool, str]] = None)
→
pydantic.BaseModel
-
add_orc_asset(name: str,
*,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str, Any] = None,
batching_regex: Pattern = re.compile('.*'),
connect_options: Mapping = None,
splitter: Optional[Union[great_expectations.datasource.fluent.spark_generic_splitters.SplitterColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterMultiColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDividedInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterModInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYear, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonth, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonthAndDay, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDatetimePart]] = None,
pathGlobFilter: Optional[Union[bool, str]] = None,
recursiveFileLookup: Optional[Union[bool, str]] = None,
modifiedBefore: Optional[Union[bool, str]] = None,
modifiedAfter: Optional[Union[bool, str]] = None,
mergeSchema: Optional[Union[bool, str]] = False)
→
pydantic.BaseModel
-
add_parquet_asset(name: str,
*,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str, Any] = None,
batching_regex: Pattern = re.compile('.*'),
connect_options: Mapping = None,
splitter: Optional[Union[great_expectations.datasource.fluent.spark_generic_splitters.SplitterColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterMultiColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDividedInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterModInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYear, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonth, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonthAndDay, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDatetimePart]] = None,
pathGlobFilter: Optional[Union[bool, str]] = None,
recursiveFileLookup: Optional[Union[bool, str]] = None,
modifiedBefore: Optional[Union[bool, str]] = None,
modifiedAfter: Optional[Union[bool, str]] = None,
mergeSchema: Optional[Union[bool, str]] = None,
datetimeRebaseMode: Optional[Literal['EXCEPTION', 'CORRECTED', 'LEGACY']] = None,
int96RebaseMode: Optional[Literal['EXCEPTION', 'CORRECTED', 'LEGACY']] = None)
→
pydantic.BaseModel
-
add_text_asset(name: str,
*,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str, Any] = None,
batching_regex: Pattern = re.compile('.*'),
connect_options: Mapping = None,
splitter: Optional[Union[great_expectations.datasource.fluent.spark_generic_splitters.SplitterColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterMultiColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDividedInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterModInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYear, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonth, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonthAndDay, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDatetimePart]] = None,
pathGlobFilter: Optional[Union[bool, str]] = None,
recursiveFileLookup: Optional[Union[bool, str]] = None,
modifiedBefore: Optional[Union[bool, str]] = None,
modifiedAfter: Optional[Union[bool, str]] = None,
wholetext: bool = False,
lineSep: Optional[str] = None)
→
pydantic.BaseModel