from pyspark.sql.functions import col
from delta.tables import DeltaTable
# Create sample Delta table
data = spark.range(0, 4).withColumn("value", col("id") * 2)
data.show()
data.write.format("delta").mode("overwrite").save("delta-table")
# Update the Delta table
delta_table = DeltaTable.forPath(spark, "delta-table")
delta_table.update(
condition=(col("id") % 2 == 0),
set={"value": col("value") + 1}
)
delta_table.toDF().orderBy("id").show()
# "Rollback" to version 0 using time-travel
rollback_version = 0
rollback_data = (
spark.read.format("delta")
.option("versionAsOf", rollback_version)
.load("delta-table")
)
rollback_data.orderBy("id").show()
+---+-----+
| id|value|
+---+-----+
| 0| 0|
| 1| 2|
| 2| 4|
| 3| 6|
+---+-----+
+---+-----+
| id|value|
+---+-----+
| 0| 1|
| 1| 2|
| 2| 5|
| 3| 6|
+---+-----+
+---+-----+
| id|value|
+---+-----+
| 0| 0|
| 1| 2|
| 2| 4|
| 3| 6|
+---+-----+
Rollback data in Delta Lake
Using time travel
Use option versionAsOf to time travel
Update data in Delta Lake
How to rollback a Delta table to a previous version using time travel!