from
pyspark.sql
import
SparkSession
from
pyspark.sql.functions
import
udf
from
pyspark.sql.types
import
StructType, StructField, StringType, IntegerType, ArrayType
spark_session
=
SparkSession.builder.getOrCreate()
data_set
=
[((
'Ishita'
,
'Rai'
,
'Pundir'
),
'2000-21-02'
,
'Male'
,
13000
),
((
'Aia'
,
'Singh'
,
'Rajput'
),
'2004-01-06'
,
'Female'
,
10000
)]
schema
=
StructType([
StructField(
'Full_Name'
, StructType([
StructField(
'First_Name'
, StringType(),
True
),
StructField(
'Middle_Name'
, StringType(),
True
),
StructField(
'Last_Name'
, StringType(),
True
)
])),
StructField(
'Date_Of_Birth'
, StringType(),
True
),
StructField(
'Gender'
, StringType(),
True
),
StructField(
'Fees'
, IntegerType(),
True
)
])
df
=
spark_session.createDataFrame(data
=
data_set,
schema
=
schema)
udf_sort
=
udf(
lambda
x:
sorted
(x),
ArrayType(StringType()))
df.withColumn(
'Sorted_Full_Name'
, udf_sort(
df[
"Full_Name"
])).show(truncate
=
False
)