Reading Data From Amorphic Datalake
Following are some examples on how to read using amorphicutils.
1. Python shell
1.1. Reading CSV data
1.1.1 Reading full dataset
from amorphicutils.python import read
lz_bucket = ""
domain = ""
dataset = ""
glue_spark = pyspark.infra.gluespark.GlueSpark()
glue_context = glue_spark.get_glue_context()
spark = glue_context.spark_session
reader = read.Read(bucket_name=lz_bucket)
result = reader.read_csv_data(domain, dataset, schema=None, header=True, delimiter=",")
print(result["message"])
df = result["data"]
from amorphicutils.python import read
lz_bucket = ""
domain = ""
dataset = ""
glue_spark = pyspark.infra.gluespark.GlueSpark()
glue_context = glue_spark.get_glue_context()
spark = glue_context.spark_session
reader = read.Read(bucket_name=lz_bucket)
result = reader.read_parquet(domain, dataset)
print(result["message"])
df = result["data"]