Skip to content

Using from C and C++

C and C++ programs load libadbc_driver_spark through the ADBC driver manager and drive it with the plain ADBC C API declared in adbc.h. Against Apache Spark Connect, the library exports the standard AdbcDriverInit entrypoint, so the driver manager dlopen()s the shared library and resolves that symbol without you naming an entrypoint option.

Prerequisites

  • The shared library libadbc_driver_spark.{so,dylib,dll}, either downloaded from Releases or built from source (see Installation).
  • The ADBC driver manager (libadbc_driver_manager) and adbc.h. The header shipped with this project lives at c/arrow-adbc/adbc.h.
  • A running Spark Connect server reachable at the sc:// URI (default sc://localhost:15002).

The example program

The runnable example lives at examples/c/quickstart.c. It creates a database handle, points the driver manager at the Spark Connect shared library through the driver option, sets the uri, opens a connection, runs a query, and reads the Arrow result stream. To stay free of an Arrow C++ or nanoarrow dependency it counts rows and batches; consume the returned ArrowArrayStream with nanoarrow or the Arrow C data interface to inspect individual columns.

/* SPDX-License-Identifier: Apache-2.0 */

#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include <adbc.h>

/* Print the ADBC error (if any), release it, and return the status so callers
 * can `return check(...)` directly. */
static AdbcStatusCode check(const char* what, AdbcStatusCode status,
                            struct AdbcError* error) {
  if (status != ADBC_STATUS_OK) {
    fprintf(stderr, "%s failed (status %d): %s\n", what, (int)status,
            error->message ? error->message : "(no message)");
    if (error->release) {
      error->release(error);
    }
  }
  return status;
}

int main(void) {
  const char* driver_path = getenv("SPARK_DRIVER");
  const char* uri = getenv("SPARK_REMOTE");
  if (!driver_path) {
    fprintf(stderr, "set SPARK_DRIVER to the libadbc_driver_spark path\n");
    return EXIT_FAILURE;
  }
  if (!uri) {
    uri = "sc://localhost:15002";
  }

  /* Every Adbc* call takes an AdbcError that must be zero-initialized. */
  struct AdbcError error = {0};
  struct AdbcDatabase database = {0};
  struct AdbcConnection connection = {0};
  struct AdbcStatement statement = {0};
  struct ArrowArrayStream stream = {0};
  int rc = EXIT_FAILURE;

  /* 1. Create the database handle and configure it. Setting "driver" tells the
   *    driver manager which shared library to load; "uri" is the Spark Connect
   *    endpoint. */
  if (check("AdbcDatabaseNew", AdbcDatabaseNew(&database, &error), &error) != ADBC_STATUS_OK) {
    return EXIT_FAILURE;
  }
  if (check("set driver", AdbcDatabaseSetOption(&database, "driver", driver_path, &error),
            &error) != ADBC_STATUS_OK) {
    goto release_db;
  }
  if (check("set uri", AdbcDatabaseSetOption(&database, "uri", uri, &error), &error) !=
      ADBC_STATUS_OK) {
    goto release_db;
  }
  if (check("AdbcDatabaseInit", AdbcDatabaseInit(&database, &error), &error) !=
      ADBC_STATUS_OK) {
    goto release_db;
  }

  /* 2. Create and initialize a connection (one Spark Connect session). */
  if (check("AdbcConnectionNew", AdbcConnectionNew(&connection, &error), &error) !=
      ADBC_STATUS_OK) {
    goto release_db;
  }
  if (check("AdbcConnectionInit", AdbcConnectionInit(&connection, &database, &error),
            &error) != ADBC_STATUS_OK) {
    goto release_conn;
  }

  /* 3. Create a statement, set the SQL, and execute it. */
  if (check("AdbcStatementNew", AdbcStatementNew(&connection, &statement, &error),
            &error) != ADBC_STATUS_OK) {
    goto release_conn;
  }
  if (check("AdbcStatementSetSqlQuery",
            AdbcStatementSetSqlQuery(&statement,
                                     "SELECT id, id * id AS square FROM range(5)", &error),
            &error) != ADBC_STATUS_OK) {
    goto release_stmt;
  }

  int64_t rows_affected = -1;
  if (check("AdbcStatementExecuteQuery",
            AdbcStatementExecuteQuery(&statement, &stream, &rows_affected, &error),
            &error) != ADBC_STATUS_OK) {
    goto release_stmt;
  }

  /* 4. Read the result as an Arrow C stream. The driver returns native Arrow
   *    data; here we just count rows and batches to keep the example free of an
   *    Arrow C++/nanoarrow dependency. */
  struct ArrowSchema schema = {0};
  if (stream.get_schema(&stream, &schema) != 0) {
    fprintf(stderr, "get_schema failed: %s\n", stream.get_last_error(&stream));
    goto release_stream;
  }
  printf("result has %d column(s):", (int)schema.n_children);
  for (int64_t i = 0; i < schema.n_children; i++) {
    printf(" %s", schema.children[i]->name);
  }
  printf("\n");
  if (schema.release) {
    schema.release(&schema);
  }

  int64_t total_rows = 0;
  int batches = 0;
  for (;;) {
    struct ArrowArray array = {0};
    if (stream.get_next(&stream, &array) != 0) {
      fprintf(stderr, "get_next failed: %s\n", stream.get_last_error(&stream));
      goto release_stream;
    }
    /* A released/NULL array marks end of stream. */
    if (array.release == NULL) {
      break;
    }
    total_rows += array.length;
    batches++;
    array.release(&array);
  }
  printf("read %lld row(s) in %d batch(es)\n", (long long)total_rows, batches);
  rc = EXIT_SUCCESS;

release_stream:
  if (stream.release) {
    stream.release(&stream);
  }
release_stmt:
  AdbcStatementRelease(&statement, &error);
release_conn:
  AdbcConnectionRelease(&connection, &error);
release_db:
  AdbcDatabaseRelease(&database, &error);
  return rc;
}

Building

Compile against the vendored ADBC headers and link the driver manager. From the repository root:

cc examples/c/quickstart.c \
    -Ic/arrow-adbc \
    -ladbc_driver_manager \
    -o quickstart

Add -I and -L flags pointing at wherever the driver manager headers and library live on your system if they are not on the default search path.

Running

Point SPARK_DRIVER at the Spark Connect shared library. Download the tarball for your platform from the Releases page and extract it. Each tarball extracts to the current directory and contains libadbc_driver_spark.{so,dylib,dll} plus LICENSE and NOTICE:

# Download the shared library for your platform from the Releases page
curl -fsSL -o adbc-spark.tar.gz \
  https://github.com/HyukjinKwon/adbc-driver-spark/releases/latest/download/libadbc_driver_spark-linux-x86_64.tar.gz
tar xzf adbc-spark.tar.gz
export SPARK_DRIVER="$PWD/libadbc_driver_spark.so"   # .dylib on macOS, .dll on Windows

export SPARK_REMOTE=sc://localhost:15002
./quickstart

Pick the matching asset for your platform: libadbc_driver_spark-linux-x86_64.tar.gz, libadbc_driver_spark-linux-aarch64.tar.gz, libadbc_driver_spark-macos-x86_64.tar.gz, libadbc_driver_spark-macos-arm64.tar.gz, or libadbc_driver_spark-windows-x86_64.tar.gz.

Alternatively, if you already have the Python package installed (pip install adbc-driver-spark), the bundled library is at:

export SPARK_DRIVER=$(python -c \
  "import adbc_driver_spark, pathlib; \
   print(next(pathlib.Path(adbc_driver_spark.__file__).parent.glob('libadbc_driver_spark.*')))")

At run time the loader must also be able to find libadbc_driver_manager if it is not on the default search path:

# Linux
LD_LIBRARY_PATH=/path/to/lib ./quickstart
# macOS
DYLD_LIBRARY_PATH=/path/to/lib ./quickstart

Tip

The example passes the library's absolute path as the driver option (SPARK_DRIVER), for example /opt/adbc/libadbc_driver_spark.so. Using the absolute path instead of a bare name avoids loader-path issues.

Note

Authentication and other settings are plain database options. To use a bearer token over TLS, add AdbcDatabaseSetOption(&database, "adbc.spark.token", "...", &error) and "adbc.spark.tls.enabled", "true". See the Configuration Reference.