Filter
Spark Gem
Filters DataFrame based on the provided filter condition
Parameters
Parameter | Description | Required |
---|---|---|
DataFrame | Input DataFrame on which the filter condition will be applied. | True |
Filter Condition | BooleanType column or boolean expression. Supports SQL, Python and Scala expressions. | True |
Example
Spark Code
- Python
- Scala
def Filter_Orders(spark: SparkSession, in0: DataFrame) -> DataFrame:
return in0.filter(
(
((col("order_category") == lit("Marketing"))
& ((col("order_status") == lit("Finished")) | (col("order_status") == lit("Approved"))))
& ~ col("is_discounted")
)
)
object Filter_Orders {
def apply(spark: SparkSession, in: DataFrame): DataFrame =
in.filter(
(
col("order_category") === lit("Marketing"))
.and(
(col("order_status") === lit("Finished"))
.or(col("order_status") === lit("Approved"))
)
.and(!col("is_discounted"))
)
}