Version: 0.17.23
-
class great_expectations.datasource.fluent.PandasS3Datasource(*,
type: Literal['pandas_s3'] = 'pandas_s3',
name: str,
id: Optional[uuid.UUID] = None,
assets: List[great_expectations.datasource.fluent.file_path_data_asset._FilePathDataAsset] = [],
bucket: str,
boto3_options: Dict[str, Union[great_expectations.datasource.fluent.config_str.ConfigStr, Any]] = )
-
-
add_csv_asset(name: str,
*,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str, Any] = None,
batching_regex: Pattern = re.compile('.*'),
connect_options: Mapping = None,
splitter: Optional[Union[great_expectations.datasource.fluent.spark_generic_splitters.SplitterColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterMultiColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDividedInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterModInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYear, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonth, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonthAndDay, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDatetimePart]] = None,
sep: Optional[str] = None,
delimiter: Optional[str] = None,
header: Union[int, Sequence[int], None, Literal['infer']] = 'infer',
names: Union[Sequence[str], None] = None,
index_col: Union[IndexLabel, Literal[False], None] = None,
usecols: Optional[Union[int, str, Sequence[int]]] = None,
dtype: Optional[dict] = None,
engine: Union[CSVEngine, None] = None,
converters: Any = None,
true_values: Any = None,
false_values: Any = None,
skipinitialspace: bool = False,
skiprows: Optional[Union[Sequence[int], int]] = None,
skipfooter: int = 0,
nrows: Optional[int] = None,
na_values: Any = None,
keep_default_na: bool = True,
na_filter: bool = True,
verbose: bool = False,
skip_blank_lines: bool = True,
parse_dates: Union[bool, Sequence[str], None] = None,
infer_datetime_format: bool = None,
keep_date_col: bool = False,
date_parser: Any = None,
date_format: Optional[str] = None,
dayfirst: bool = False,
cache_dates: bool = True,
iterator: bool = False,
chunksize: Optional[int] = None,
compression: CompressionOptions = 'infer',
thousands: Optional[str] = None,
decimal: str = '.',
lineterminator: Optional[str] = None,
quotechar: str = '"',
quoting: int = 0,
doublequote: bool = True,
escapechar: Optional[str] = None,
comment: Optional[str] = None,
encoding: Optional[str] = None,
encoding_errors: Optional[str] = 'strict',
dialect: Optional[str] = None,
on_bad_lines: str = 'error',
delim_whitespace: bool = False,
low_memory: Any = True,
memory_map: bool = False,
float_precision: Union[Literal['high', 'legacy'], None] = None,
storage_options: StorageOptions = None,
dtype_backend: DtypeBackend = None,
**data)
→
pydantic.BaseModel
-
add_excel_asset(name: str,
*,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str, Any] = None,
batching_regex: Pattern = re.compile('.*'),
connect_options: Mapping = None,
splitter: Optional[Union[great_expectations.datasource.fluent.spark_generic_splitters.SplitterColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterMultiColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDividedInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterModInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYear, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonth, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonthAndDay, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDatetimePart]] = None,
sheet_name: Optional[Union[str, int, List[Union[int, str]]]] = 0,
header: Union[int, Sequence[int], None] = 0,
names: Optional[List[str]] = None,
index_col: Union[int, Sequence[int], None] = None,
usecols: Optional[Union[int, str, Sequence[int]]] = None,
dtype: Optional[dict] = None,
engine: Union[Literal['xlrd', 'openpyxl', 'odf', 'pyxlsb'], None] = None,
true_values: Union[Iterable[str], None] = None,
false_values: Union[Iterable[str], None] = None,
skiprows: Optional[Union[Sequence[int], int]] = None,
nrows: Optional[int] = None,
na_values: Any = None,
keep_default_na: bool = True,
na_filter: bool = True,
verbose: bool = False,
parse_dates: Union[List, Dict, bool] = False,
date_format: Optional[str] = None,
thousands: Optional[str] = None,
decimal: str = '.',
comment: Optional[str] = None,
skipfooter: int = 0,
storage_options: StorageOptions = None,
dtype_backend: DtypeBackend = None,
**data)
→
pydantic.BaseModel
-
add_feather_asset(name: str,
*,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str, Any] = None,
batching_regex: Pattern = re.compile('.*'),
connect_options: Mapping = None,
splitter: Optional[Union[great_expectations.datasource.fluent.spark_generic_splitters.SplitterColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterMultiColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDividedInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterModInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYear, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonth, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonthAndDay, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDatetimePart]] = None,
columns: Union[Sequence[str], None] = None,
use_threads: bool = True,
storage_options: StorageOptions = None,
dtype_backend: DtypeBackend = None,
**data)
→
pydantic.BaseModel
-
add_fwf_asset(name: str,
*,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str, Any] = None,
batching_regex: Pattern = re.compile('.*'),
connect_options: Mapping = None,
splitter: Optional[Union[great_expectations.datasource.fluent.spark_generic_splitters.SplitterColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterMultiColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDividedInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterModInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYear, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonth, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonthAndDay, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDatetimePart]] = None,
colspecs: Union[Sequence[Tuple[int, int]], str, None] = 'infer',
widths: Union[Sequence[int], None] = None,
infer_nrows: int = 100,
dtype_backend: DtypeBackend = None,
kwargs: Optional[dict] = None,
**data)
→
pydantic.BaseModel
-
add_hdf_asset(name: str,
*,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str, Any] = None,
batching_regex: Pattern = re.compile('.*'),
connect_options: Mapping = None,
splitter: Optional[Union[great_expectations.datasource.fluent.spark_generic_splitters.SplitterColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterMultiColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDividedInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterModInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYear, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonth, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonthAndDay, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDatetimePart]] = None,
key: Any = None,
mode: str = 'r',
errors: str = 'strict',
where: Optional[Union[str, List]] = None,
start: Optional[int] = None,
stop: Optional[int] = None,
columns: Optional[List[str]] = None,
iterator: bool = False,
chunksize: Optional[int] = None,
kwargs: Optional[dict] = None,
**data)
→
pydantic.BaseModel
-
add_html_asset(name: str,
*,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str, Any] = None,
batching_regex: Pattern = re.compile('.*'),
connect_options: Mapping = None,
splitter: Optional[Union[great_expectations.datasource.fluent.spark_generic_splitters.SplitterColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterMultiColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDividedInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterModInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYear, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonth, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonthAndDay, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDatetimePart]] = None,
match: Union[str, Pattern] = '.+',
flavor: Optional[str] = None,
header: Union[int, Sequence[int], None] = None,
index_col: Union[int, Sequence[int], None] = None,
skiprows: Optional[Union[Sequence[int], int]] = None,
attrs: Optional[Dict[str, str]] = None,
parse_dates: bool = False,
thousands: Optional[str] = ',',
encoding: Optional[str] = None,
decimal: str = '.',
converters: Optional[Dict] = None,
na_values: Union[Iterable[object], None] = None,
keep_default_na: bool = True,
displayed_only: bool = True,
extract_links: Literal[None, 'header', 'footer', 'body', 'all'] = None,
dtype_backend: DtypeBackend = None,
**data)
→
pydantic.BaseModel
-
add_json_asset(name: str,
*,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str, Any] = None,
batching_regex: Pattern = re.compile('.*'),
connect_options: Mapping = None,
splitter: Optional[Union[great_expectations.datasource.fluent.spark_generic_splitters.SplitterColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterMultiColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDividedInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterModInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYear, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonth, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonthAndDay, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDatetimePart]] = None,
orient: Optional[str] = None,
typ: Literal['frame', 'series'] = 'frame',
dtype: Optional[dict] = None,
convert_axes: Any = None,
convert_dates: Union[bool, List[str]] = True,
keep_default_dates: bool = True,
precise_float: bool = False,
date_unit: Optional[str] = None,
encoding: Optional[str] = None,
encoding_errors: Optional[str] = 'strict',
lines: bool = False,
chunksize: Optional[int] = None,
compression: CompressionOptions = 'infer',
nrows: Optional[int] = None,
storage_options: StorageOptions = None,
dtype_backend: DtypeBackend = None,
**data)
→
pydantic.BaseModel
-
add_orc_asset(name: str,
*,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str, Any] = None,
batching_regex: Pattern = re.compile('.*'),
connect_options: Mapping = None,
splitter: Optional[Union[great_expectations.datasource.fluent.spark_generic_splitters.SplitterColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterMultiColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDividedInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterModInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYear, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonth, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonthAndDay, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDatetimePart]] = None,
columns: Optional[List[str]] = None,
dtype_backend: DtypeBackend = None,
kwargs: Optional[dict] = None,
**data)
→
pydantic.BaseModel
-
add_parquet_asset(name: str,
*,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str, Any] = None,
batching_regex: Pattern = re.compile('.*'),
connect_options: Mapping = None,
splitter: Optional[Union[great_expectations.datasource.fluent.spark_generic_splitters.SplitterColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterMultiColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDividedInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterModInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYear, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonth, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonthAndDay, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDatetimePart]] = None,
engine: str = 'auto',
columns: Optional[List[str]] = None,
storage_options: StorageOptions = None,
use_nullable_dtypes: bool = None,
dtype_backend: DtypeBackend = None,
kwargs: Optional[dict] = None,
**data)
→
pydantic.BaseModel
-
add_pickle_asset(name: str,
*,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str, Any] = None,
batching_regex: Pattern = re.compile('.*'),
connect_options: Mapping = None,
splitter: Optional[Union[great_expectations.datasource.fluent.spark_generic_splitters.SplitterColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterMultiColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDividedInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterModInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYear, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonth, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonthAndDay, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDatetimePart]] = None,
compression: CompressionOptions = 'infer',
storage_options: StorageOptions = None,
**data)
→
pydantic.BaseModel
-
add_sas_asset(name: str,
*,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str, Any] = None,
batching_regex: Pattern = re.compile('.*'),
connect_options: Mapping = None,
splitter: Optional[Union[great_expectations.datasource.fluent.spark_generic_splitters.SplitterColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterMultiColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDividedInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterModInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYear, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonth, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonthAndDay, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDatetimePart]] = None,
format: Optional[str] = None,
index: Optional[str] = None,
encoding: Optional[str] = None,
chunksize: Optional[int] = None,
iterator: bool = False,
compression: CompressionOptions = 'infer',
**data)
→
pydantic.BaseModel
-
add_spss_asset(name: str,
*,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str, Any] = None,
batching_regex: Pattern = re.compile('.*'),
connect_options: Mapping = None,
splitter: Optional[Union[great_expectations.datasource.fluent.spark_generic_splitters.SplitterColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterMultiColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDividedInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterModInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYear, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonth, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonthAndDay, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDatetimePart]] = None,
usecols: Optional[Union[int, str, Sequence[int]]] = None,
convert_categoricals: bool = True,
dtype_backend: DtypeBackend = None,
**data)
→
pydantic.BaseModel
-
add_stata_asset(name: str,
*,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str, Any] = None,
batching_regex: Pattern = re.compile('.*'),
connect_options: Mapping = None,
splitter: Optional[Union[great_expectations.datasource.fluent.spark_generic_splitters.SplitterColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterMultiColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDividedInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterModInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYear, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonth, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonthAndDay, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDatetimePart]] = None,
convert_dates: bool = True,
convert_categoricals: bool = True,
index_col: Optional[str] = None,
convert_missing: bool = False,
preserve_dtypes: bool = True,
columns: Union[Sequence[str], None] = None,
order_categoricals: bool = True,
chunksize: Optional[int] = None,
iterator: bool = False,
compression: CompressionOptions = 'infer',
storage_options: StorageOptions = None,
**data)
→
pydantic.BaseModel
-
add_xml_asset(name: str,
*,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str, Any] = None,
batching_regex: Pattern = re.compile('.*'),
connect_options: Mapping = None,
splitter: Optional[Union[great_expectations.datasource.fluent.spark_generic_splitters.SplitterColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterMultiColumnValue, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDividedInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterModInteger, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYear, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonth, great_expectations.datasource.fluent.spark_generic_splitters.SplitterYearAndMonthAndDay, great_expectations.datasource.fluent.spark_generic_splitters.SplitterDatetimePart]] = None,
xpath: str = './*',
namespaces: Optional[Dict[str, str]] = None,
elems_only: bool = False,
attrs_only: bool = False,
names: Union[Sequence[str], None] = None,
dtype: Optional[dict] = None,
encoding: Optional[str] = 'utf-8',
stylesheet: Union[FilePath, None] = None,
iterparse: Optional[Dict[str, List[str]]] = None,
compression: CompressionOptions = 'infer',
storage_options: StorageOptions = None,
dtype_backend: DtypeBackend = None,
**data)
→
pydantic.BaseModel