roboto.domain.topics.parquet.arrow_to_roboto#

Module Contents#

roboto.domain.topics.parquet.arrow_to_roboto.arrow_type_to_canonical_type(arrow_type)#
Parameters:

arrow_type (pyarrow.DataType)

Return type:

roboto.domain.topics.record.CanonicalDataType

roboto.domain.topics.parquet.arrow_to_roboto.compute_boolean_statistics(data)#
Parameters:

data (Union[pyarrow.Array, pyarrow.ChunkedArray])

Return type:

dict[str, Any]

roboto.domain.topics.parquet.arrow_to_roboto.compute_dictionary_metadata(column_name, data, max_dictionary_size=2048)#
Parameters:
  • column_name (str)

  • data (Union[pyarrow.Array, pyarrow.ChunkedArray])

  • max_dictionary_size (int)

Return type:

dict[str, Any]

roboto.domain.topics.parquet.arrow_to_roboto.compute_numeric_statistics(data)#
Parameters:

data (Union[pyarrow.Array, pyarrow.ChunkedArray])

Return type:

dict[str, Any]

roboto.domain.topics.parquet.arrow_to_roboto.field_to_message_path_request(field, parquet_file, timestamp)#
Parameters:
Return type:

roboto.domain.topics.operations.AddMessagePathRequest

roboto.domain.topics.parquet.arrow_to_roboto.generate_metadata_for_field(field, parquet_parser, timestamp)#
Parameters:
Return type:

dict[str, Any]

roboto.domain.topics.parquet.arrow_to_roboto.logger#
roboto.domain.topics.parquet.arrow_to_roboto.sanitize_column_name(field)#
Parameters:

field (pyarrow.Field)

Return type:

str