Class: SparkConnect::DataFrameWriterV2

Inherits:
Object
  • Object
show all
Defined in:
lib/spark_connect/writer.rb

Overview

The DataSourceV2 write interface, returned by SparkConnect::DataFrame#write_to. Mirrors PySpark's DataFrameWriterV2.

Examples:

df.write_to("catalog.db.table").using("parquet").create
df.write_to("catalog.db.table").append

Constant Summary collapse

Proto =
SparkConnect::Proto
WO2 =
Proto::WriteOperationV2

Instance Method Summary collapse

Constructor Details

#initialize(df, table) ⇒ DataFrameWriterV2

Returns a new instance of DataFrameWriterV2.

Parameters:



149
150
151
152
153
154
155
156
# File 'lib/spark_connect/writer.rb', line 149

def initialize(df, table)
  @df = df
  @table = table.to_s
  @provider = nil
  @options = {}
  @table_properties = {}
  @partitioning = []
end

Instance Method Details

#appendObject

Append rows. @return [void]



189
190
# File 'lib/spark_connect/writer.rb', line 189

def append = run(:MODE_APPEND)
# Overwrite rows matching `condition`. @return [void]

#createObject

Create the table. @return [void]



183
184
# File 'lib/spark_connect/writer.rb', line 183

def create = run(:MODE_CREATE)
# Replace the table. @return [void]

#create_or_replaceObject

Create or replace the table. @return [void]



187
188
# File 'lib/spark_connect/writer.rb', line 187

def create_or_replace = run(:MODE_CREATE_OR_REPLACE)
# Append rows. @return [void]

#option(key, value) ⇒ self

Returns set a write option.

Returns:

  • (self)

    set a write option.



165
166
167
168
# File 'lib/spark_connect/writer.rb', line 165

def option(key, value)
  @options[key.to_s] = value.to_s
  self
end

#overwrite(condition) ⇒ Object

Overwrite rows matching condition. @return [void]



191
192
# File 'lib/spark_connect/writer.rb', line 191

def overwrite(condition) = run(:MODE_OVERWRITE, overwrite_condition: Column.to_col(condition).to_expr)
# Dynamically overwrite partitions. @return [void]

#overwrite_partitionsObject

Dynamically overwrite partitions. @return [void]



193
# File 'lib/spark_connect/writer.rb', line 193

def overwrite_partitions = run(:MODE_OVERWRITE_PARTITIONS)

#partition_by(*cols) ⇒ self

Returns partition by the given expressions/columns.

Returns:

  • (self)

    partition by the given expressions/columns.



177
178
179
180
# File 'lib/spark_connect/writer.rb', line 177

def partition_by(*cols)
  @partitioning = cols.flatten.map { |c| (c.is_a?(Column) ? c : Functions.col(c.to_s)).to_expr }
  self
end

#replaceObject

Replace the table. @return [void]



185
186
# File 'lib/spark_connect/writer.rb', line 185

def replace = run(:MODE_REPLACE)
# Create or replace the table. @return [void]

#table_property(key, value) ⇒ self

Returns set a table property.

Returns:

  • (self)

    set a table property.



171
172
173
174
# File 'lib/spark_connect/writer.rb', line 171

def table_property(key, value)
  @table_properties[key.to_s] = value.to_s
  self
end

#using(provider) ⇒ self

Returns set the table provider/format.

Returns:

  • (self)

    set the table provider/format.



159
160
161
162
# File 'lib/spark_connect/writer.rb', line 159

def using(provider)
  @provider = provider.to_s
  self
end