Skip to main content

Overview

The KnowledgeGraph class is the core data structure in sift-kg. It’s a wrapper around NetworkX’s MultiDiGraph with specialized methods for entity and relation management.
from sift_kg import KnowledgeGraph

Constructor

kg = KnowledgeGraph(
    canonicalize_relations=True,
    confidence_aggregation="product_complement",
)

Parameters

canonicalize_relations
bool
default:"True"
If True, repeated mentions of the same source/relation/target triple are merged into one canonical edge. The confidence becomes an aggregation of all mentions.
confidence_aggregation
str
default:"product_complement"
How to aggregate confidence scores across multiple mentions:
  • "product_complement": Independent weak signals reinforce (1 - ∏(1 - c))
  • "mean": Average confidence across all mentions
  • "max": Take highest confidence

Example

from sift_kg import KnowledgeGraph

kg = KnowledgeGraph(
    canonicalize_relations=True,
    confidence_aggregation="product_complement",
)

Class Methods

load

@classmethod
KnowledgeGraph.load(path: str | Path) -> KnowledgeGraph
Load a knowledge graph from a JSON file.
path
str | Path
required
Path to the graph JSON file (typically graph_data.json)
Returns: KnowledgeGraph instance
from pathlib import Path
from sift_kg import KnowledgeGraph

kg = KnowledgeGraph.load(Path("./output/graph_data.json"))
print(f"Loaded: {kg.entity_count} entities, {kg.relation_count} relations")

Instance Methods

add_entity

kg.add_entity(
    entity_id: str,
    entity_type: str,
    name: str,
    confidence: float = 0.5,
    source_documents: list[str] | None = None,
    **attrs: Any,
) -> None
Add or update an entity node. If the entity already exists, it merges the new data (takes higher confidence, extends source documents).
entity_id
str
required
Unique entity identifier (e.g. "person:alice", "org:acme_corp")
entity_type
str
required
Entity type (e.g. "PERSON", "ORGANIZATION", "LOCATION")
name
str
required
Display name for the entity
confidence
float
default:"0.5"
Extraction confidence (0.0 to 1.0)
source_documents
list[str] | None
default:"None"
List of source document IDs where this entity was found
**attrs
Any
Additional attributes (e.g. context, attributes dict, custom fields)
kg.add_entity(
    entity_id="person:alice",
    entity_type="PERSON",
    name="Alice Johnson",
    confidence=0.95,
    source_documents=["doc1.pdf", "doc2.pdf"],
    context="CEO of TechCorp",
    attributes={"role": "executive", "department": "leadership"},
)

add_relation

kg.add_relation(
    relation_id: str,
    source_id: str,
    target_id: str,
    relation_type: str,
    confidence: float = 0.5,
    evidence: str = "",
    source_document: str = "",
    canonicalize: bool | None = None,
    confidence_aggregation: str | None = None,
) -> bool
Add a relation edge between two entities. Returns False if source or target entity doesn’t exist.
relation_id
str
required
Unique relation identifier
source_id
str
required
Source entity ID
target_id
str
required
Target entity ID
relation_type
str
required
Relation type (e.g. "WORKS_FOR", "LOCATED_IN", "COLLABORATES_WITH")
confidence
float
default:"0.5"
Extraction confidence (0.0 to 1.0)
evidence
str
default:""
Text evidence supporting this relation
source_document
str
default:""
Document ID where this relation was found
canonicalize
bool | None
default:"None"
Override the instance-level canonicalize_relations setting
confidence_aggregation
str | None
default:"None"
Override the instance-level confidence_aggregation method
Returns: bool - True if added successfully, False if source/target missing
# Add a relation
success = kg.add_relation(
    relation_id="rel_001",
    source_id="person:alice",
    target_id="org:techcorp",
    relation_type="WORKS_FOR",
    confidence=0.92,
    evidence="Alice Johnson is the CEO of TechCorp.",
    source_document="annual_report.pdf",
)

if not success:
    print("Failed to add relation: entity not found")

get_entity

kg.get_entity(entity_id: str) -> dict[str, Any] | None
Get entity data by ID.
entity_id
str
required
Entity identifier
Returns: dict[str, Any] | None - Entity data dict or None if not found
entity = kg.get_entity("person:alice")
if entity:
    print(f"Name: {entity['name']}")
    print(f"Type: {entity['entity_type']}")
    print(f"Confidence: {entity['confidence']}")
    print(f"Documents: {entity['source_documents']}")

get_relations

kg.get_relations(
    entity_id: str,
    direction: str = "both",
) -> list[dict[str, Any]]
Get all relations for an entity.
entity_id
str
required
Entity identifier
direction
str
default:"both"
Direction to query:
  • "in": Incoming edges (entity is target)
  • "out": Outgoing edges (entity is source)
  • "both": All edges
Returns: list[dict[str, Any]] - List of relation dicts with source, target, and all edge attributes
# Get all relations
all_relations = kg.get_relations("person:alice", direction="both")

# Get only outgoing relations
out_relations = kg.get_relations("person:alice", direction="out")

for rel in out_relations:
    print(f"{rel['source']} --[{rel['relation_type']}]--> {rel['target']}")
    print(f"  Evidence: {rel['evidence']}")
    print(f"  Confidence: {rel['confidence']:.2f}")

save

kg.save(path: str | Path) -> None
Save the knowledge graph to a JSON file.
path
str | Path
required
Output file path
from pathlib import Path

kg.save(Path("./output/graph_data.json"))

export

kg.export(include_mentions: bool = True) -> dict[str, Any]
Export graph as a JSON-serializable dictionary.
include_mentions
bool
default:"True"
Include individual relation mentions (can be large for graphs with many duplicate relations)
Returns: dict[str, Any] with keys:
  • metadata: Graph metadata (entity count, relation count, timestamps, etc.)
  • nodes: List of entity dicts
  • links: List of relation dicts
graph_data = kg.export(include_mentions=False)

print(f"Entities: {len(graph_data['nodes'])}")
print(f"Relations: {len(graph_data['links'])}")
print(f"Metadata: {graph_data['metadata']}")

Properties

entity_count

kg.entity_count -> int
Number of entities in the graph.
print(f"Graph has {kg.entity_count} entities")

relation_count

kg.relation_count -> int
Number of relations in the graph.
print(f"Graph has {kg.relation_count} relations")

Complete Example

from pathlib import Path
from sift_kg import KnowledgeGraph

# Create a new graph
kg = KnowledgeGraph(
    canonicalize_relations=True,
    confidence_aggregation="product_complement",
)

# Add entities
kg.add_entity(
    entity_id="person:alice",
    entity_type="PERSON",
    name="Alice Johnson",
    confidence=0.95,
    source_documents=["doc1.pdf"],
)

kg.add_entity(
    entity_id="org:techcorp",
    entity_type="ORGANIZATION",
    name="TechCorp Inc.",
    confidence=0.98,
    source_documents=["doc1.pdf", "doc2.pdf"],
)

# Add a relation
kg.add_relation(
    relation_id="rel_001",
    source_id="person:alice",
    target_id="org:techcorp",
    relation_type="WORKS_FOR",
    confidence=0.92,
    evidence="Alice Johnson is the CEO of TechCorp.",
    source_document="doc1.pdf",
)

# Add another mention of the same relation (will be canonicalized)
kg.add_relation(
    relation_id="rel_002",
    source_id="person:alice",
    target_id="org:techcorp",
    relation_type="WORKS_FOR",
    confidence=0.88,
    evidence="Alice leads TechCorp as its chief executive.",
    source_document="doc2.pdf",
)

# Query the graph
print(f"Entities: {kg.entity_count}")
print(f"Relations: {kg.relation_count}")  # Will be 1 (canonicalized)

entity = kg.get_entity("person:alice")
print(f"Entity: {entity['name']} ({entity['entity_type']})")

relations = kg.get_relations("person:alice", direction="out")
for rel in relations:
    print(f"  → {rel['relation_type']} {rel['target']}")
    print(f"    Confidence: {rel['confidence']:.2f}")
    print(f"    Support: {rel['support_count']} mentions from {rel['support_doc_count']} docs")

# Save the graph
kg.save(Path("./my_graph.json"))

# Load it back
kg_loaded = KnowledgeGraph.load(Path("./my_graph.json"))
print(f"Loaded: {kg_loaded.entity_count} entities")

NetworkX Integration

The underlying NetworkX graph is accessible via kg.graph:
import networkx as nx

# Access the raw NetworkX graph
g = kg.graph

# Use NetworkX algorithms
page_rank = nx.pagerank(g)
print(f"Top entities by PageRank: {sorted(page_rank.items(), key=lambda x: x[1], reverse=True)[:5]}")

# Community detection
communities = nx.community.louvain_communities(g.to_undirected())
print(f"Found {len(communities)} communities")

# Shortest path
if nx.has_path(g, "person:alice", "person:bob"):
    path = nx.shortest_path(g, "person:alice", "person:bob")
    print(f"Shortest path: {path}")

Build docs developers (and LLMs) love