Skip to main content

Module functions

toradb.local(path: str) -> Database

Open or create a database at path.
import toradb
db = toradb.local("./my_db")

toradb.connect(path: str) -> Database

Alias for local.

Database

Database(path: str)

Same as toradb.local(path).

create_table(name, mode=None, schema=None) -> Table

name
str
required
Table name.
mode
str
default:"text"
text or hybrid.
schema
dict
Column definitions for hybrid tables, e.g. {"embedding": "vector[8]"}.
docs = db.create_table("docs", mode="text")

table(name: str) -> Table

Open an existing table without running CREATE TABLE.

list_tables() -> list[str]

Return table names in the database.

sql(query: str) -> str | SearchResults | AnalyticsResults

Execute one SQL statement. Return type depends on the statement:
  • Retrieval SELECTSearchResults
  • Analytics SELECT with GROUP BY → analytics object with .to_pandas()
  • DDL / catalog → str message

sql_stream(query: str, batch_size=128) -> list[SearchResults]

Page through a retrieval SELECT. Does not support GROUP BY or EXPLAIN.

reindex(table, using=None, column=None) -> str

using
str
default:"BM25"
Index type: BM25, HNSW, DISKANN, etc.
column
str
default:"text"
Column to index.
db.reindex("articles", using="BM25")

begin_bulk_ingest(table: str) -> None

Start a bulk load session on table. Defers per-batch dense index rebuilds and table-level index writes until finish_bulk_ingest.

bulk_ingest_active(table: str) -> bool

Return whether table is in an active bulk ingest session.

finish_bulk_ingest(table, compact=False, reindex_bm25=False) -> None

Finalize indexes after bulk load: builds deferred segment BM25 sidecars, merges table indexes, and reloads corpus texts for search.
compact
bool
default:"false"
Run segment compaction after writing table indexes.
reindex_bm25
bool
default:"false"
Extra CREATE INDEX BM25 pass (usually unnecessary — finish already builds BM25).
db.begin_bulk_ingest("docs")
# ... many add / add_arrow calls ...
db.finish_bulk_ingest("docs", compact=True)

fetch_documents(table: str, ids: list[int]) -> dict[int, dict]

Load text and metadata for specific document ids. Uses the in-memory corpus when loaded; otherwise reads only the Parquet segments that can contain those ids (via segment_id_ranges in the table manifest). On disk, ToraDB maps contiguous ids to row offsets and uses Parquet row selection (plus optional page indexes) instead of scanning whole segments. Pairs with segment_only bulk ingest when search returns ids without loading the full table into RAM.
results = db.table("passages").search("diabetes treatment", top_k=10)
ids = [int(r["id"]) for r in results.to_pandas()["id"]]
docs = db.fetch_documents("passages", ids)
print(docs[ids[0]]["text"])

index_build_status(table: str) -> dict | None

Read {table}/indexes/build_status.json without loading the full corpus. Returns None when no build is in progress and no status file exists.

resume_index_build(table: str, compact=False) -> None

Resume or rerun index build after crash or partial finish (idempotent segment BM25 skip when sidecars are current). See Bulk load.