import
pyspark
from
pyspark.sql
import
SparkSession
spark
=
SparkSession.builder.appName(
'sparkdf'
).getOrCreate()
data
=
[[
"1"
,
"sravan"
,
"company 1"
],
[
"2"
,
"ojaswi"
,
"company 1"
],
[
"3"
,
"rohith"
,
"company 2"
],
[
"4"
,
"sridevi"
,
"company 1"
],
[
"5"
,
"bobby"
,
"company 1"
]]
columns
=
[
'ID'
,
'NAME'
,
'Company'
]
dataframe
=
spark.createDataFrame(data, columns)
data1
=
[[
"1"
,
"45000"
,
"IT"
],
[
"2"
,
"145000"
,
"Manager"
],
[
"6"
,
"45000"
,
"HR"
],
[
"5"
,
"34000"
,
"Sales"
]]
columns
=
[
'ID'
,
'salary'
,
'department'
]
dataframe1
=
spark.createDataFrame(data1, columns)
dataframe.join(dataframe1,
dataframe.
ID
=
=
dataframe1.
ID
,
"full"
).show()