taskgraph_ts/src/graph/construction.ts

// TaskGraph class construction — fromTasks, fromRecords, fromJSON, incremental building

import { DirectedGraph } from 'graphology';
import { subgraph as graphologySubgraph } from 'graphology-operators';
import { Value } from '@alkdev/typebox/value';
import type {
  TaskGraphNodeAttributes,
  TaskGraphEdgeAttributes,
  TaskGraphSerialized,
  TaskInput,
  DependencyEdge,
} from '../schema/index.js';
import { TaskGraphSerialized as TaskGraphSerializedSchema } from '../schema/index.js';
import {
  DuplicateNodeError,
  DuplicateEdgeError,
  TaskNotFoundError,
  InvalidInputError,
  type ValidationError,
  type GraphValidationError,
  type AnyValidationError,
} from '../error/index.js';
import {
  removeTask as _removeTask,
  removeDependency as _removeDependency,
  updateTask as _updateTask,
  updateEdgeAttributes as _updateEdgeAttributes,
} from './mutation.js';
import {
  hasCycles as _hasCycles,
  findCycles as _findCycles,
  topologicalOrder as _topologicalOrder,
  dependencies as _dependencies,
  dependents as _dependents,
  taskCount as _taskCount,
  getTask as _getTask,
} from './queries.js';
import {
  validateSchema as _validateSchema,
  validateGraph as _validateGraph,
  validate as _validate,
} from './validation.js';

/**
 * Internal graph type alias for the graphology DirectedGraph with our attribute types.
 *
 * This is the concrete type of the underlying graphology instance wrapped by TaskGraph.
 */
export type TaskGraphInner = DirectedGraph<TaskGraphNodeAttributes, TaskGraphEdgeAttributes>;

// ---------------------------------------------------------------------------
// Helper: strip null → undefined for TaskInput → TaskGraphNodeAttributes
// ---------------------------------------------------------------------------

/**
 * Transform a TaskInput into TaskGraphNodeAttributes by:
 * 1. Stripping null values → undefined (absent = "not assessed")
 * 2. Dropping non-graph fields (tags, assignee, due, created, modified)
 *
 * Per graph-model.md, categorical fields are `Type.Optional(Nullable(Enum))` on input
 * but `Type.Optional(Enum)` on the graph — null and absent both become "not stored."
 */
function taskInputToNodeAttrs(input: TaskInput): TaskGraphNodeAttributes {
  const attrs: TaskGraphNodeAttributes = { name: input.name };

  // Only store non-null categorical fields
  if (input.status != null) attrs.status = input.status;
  if (input.scope != null) attrs.scope = input.scope;
  if (input.risk != null) attrs.risk = input.risk;
  if (input.impact != null) attrs.impact = input.impact;
  if (input.level != null) attrs.level = input.level;
  if (input.priority != null) attrs.priority = input.priority;

  // Note: tags, assignee, due, created, modified are NOT stored on graph nodes
  // They belong to the caller/consumer, not the graph.

  return attrs;
}

/**
 * TaskGraph wraps a graphology DirectedGraph and provides the foundation
 * for construction, mutation, and query methods.
 *
 * Edges follow the **prerequisite → dependent** convention:
 * if task B has `dependsOn: ["A"]`, the edge is A → B.
 *
 * Constraints enforced by the underlying graph options:
 * - **No parallel edges** (`multi: false`): between any node pair, at most one edge.
 * - **No self-loops** (`allowSelfLoops: false`): a node cannot depend on itself.
 * - **Directed** (`type: 'directed'`): all edges have a direction.
 *
 * Edge keys are deterministic: `${source}->${target}` (per ADR-006).
 *
 * > **Warning on `raw`**: Mutating the underlying graphology instance directly
 * > bypasses TaskGraph's validation and invariants. Consumers using `raw`
 * > should treat the graph as read-only for structural changes and use
 * > TaskGraph methods for all mutations.
 */
export class TaskGraph {
  /** The underlying graphology DirectedGraph instance. */
  private readonly _graph: TaskGraphInner;

  /**
   * Create a new TaskGraph.
   *
   * @param data - Optional serialized graph data to initialize from (delegates to `fromJSON`).
   *               When provided, the graph is populated from the serialized data.
   *               When omitted, creates an empty graph.
   */
  constructor(data?: TaskGraphSerialized) {
    this._graph = new DirectedGraph<TaskGraphNodeAttributes, TaskGraphEdgeAttributes>({
      type: 'directed',
      multi: false,
      allowSelfLoops: false,
    });

    if (data) {
      TaskGraph.fromJSON(data, this);
    }
  }

  /**
   * Returns the underlying graphology DirectedGraph instance.
   *
   * Use this for read-only access (queries, event listeners) or for
   * operations not yet exposed by TaskGraph. Avoid mutating the graph
   * directly — prefer TaskGraph methods for all structural changes.
   */
  get raw(): TaskGraphInner {
    return this._graph;
  }

  /**
   * Produce a deterministic edge key from source and target node keys.
   *
   * Format: `${source}->${target}` (per ADR-006).
   *
   * This is used internally by addDependency and construction methods
   * that call `addEdgeWithKey` on the underlying graphology instance.
   *
   * @param source - Source (prerequisite) node key
   * @param target - Target (dependent) node key
   * @returns Deterministic edge key string
   */
  protected _edgeKey(source: string, target: string): string {
    return `${source}->${target}`;
  }

  // ---------------------------------------------------------------------------
  // Mutation methods
  // ---------------------------------------------------------------------------

  /**
   * Remove a task (node) from the graph.
   *
   * No-op if the node doesn't exist. Graphology automatically removes
   * all edges attached to the dropped node (cascade edge removal).
   *
   * @param id - The task ID to remove
   */
  removeTask(id: string): void {
    _removeTask(this._graph, id);
  }

  /**
   * Remove a dependency (edge) from the graph.
   *
   * No-op if the edge doesn't exist. Uses the deterministic edge key
   * `${prerequisite}->${dependent}` to identify the edge (per ADR-006).
   *
   * @param prerequisite - Source (prerequisite) task ID
   * @param dependent - Target (dependent) task ID
   */
  removeDependency(prerequisite: string, dependent: string): void {
    _removeDependency(this._graph, prerequisite, dependent);
  }

  /**
   * Update a task's attributes with a partial merge.
   *
   * Throws `TaskNotFoundError` if the task ID doesn't exist.
   * Uses a shallow merge of the provided attributes into the existing
   * node attributes.
   *
   * @param id - The task ID to update
   * @param attributes - Partial attributes to merge into the existing node
   * @throws {TaskNotFoundError} If the task ID doesn't exist in the graph
   */
  updateTask(id: string, attributes: Partial<TaskGraphNodeAttributes>): void {
    _updateTask(this._graph, id, attributes);
  }

  /**
   * Update an edge's attributes with a partial merge.
   *
   * Throws `TaskNotFoundError` if the edge doesn't exist.
   * Uses the deterministic edge key `${prerequisite}->${dependent}` to
   * identify the edge (per ADR-006).
   *
   * @param prerequisite - Source (prerequisite) task ID
   * @param dependent - Target (dependent) task ID
   * @param attrs - Partial edge attributes to merge into the existing edge
   * @throws {TaskNotFoundError} If the edge doesn't exist in the graph
   */
  updateEdgeAttributes(prerequisite: string, dependent: string, attrs: Partial<TaskGraphEdgeAttributes>): void {
    _updateEdgeAttributes(this._graph, prerequisite, dependent, attrs);
  }

  // ---------------------------------------------------------------------------
  // Static construction methods
  // ---------------------------------------------------------------------------
  // ---------------------------------------------------------------------------

  /**
   * Construct a TaskGraph from an array of TaskInput objects.
   *
   * Transforms `TaskInput[]` into node data + edge data, builds a serialized
   * blob, and calls `graph.import()`. This is faster than N individual
   * addNode/addEdge calls and avoids the verbose builder API.
   *
   * Semantics:
   * - Each `dependsOn` entry creates an edge with default `qualityRetention: 0.9`.
   * - `dependsOn` targets not matching any task ID become **orphan nodes**
   *   with default attributes (`{ name: <dep-id> }`).
   * - Duplicate task IDs throw `DuplicateNodeError`.
   * - Uses `mergeNode` for idempotent node merging (same ID gets merged attributes).
   * - Duplicate `dependsOn` entries for the same pair create only one edge
   *   (idempotent via deterministic edge key).
   * - Cycles are NOT rejected at construction time — call `hasCycles()` or
   *   `validateGraph()` to detect.
   *
   * @param tasks - Array of TaskInput objects
   * @returns A new TaskGraph populated from the task inputs
   * @throws {DuplicateNodeError} if duplicate task IDs are found
   */
  static fromTasks(tasks: TaskInput[]): TaskGraph {
    const tg = new TaskGraph();

    // Detect duplicate IDs before any graph mutation
    const seenIds = new Set<string>();
    for (const task of tasks) {
      if (seenIds.has(task.id)) {
        throw new DuplicateNodeError(task.id);
      }
      seenIds.add(task.id);
    }

    // Build node map: id → TaskGraphNodeAttributes (using mergeNode semantics)
    // If the same ID appears multiple times, it's an error (checked above).
    // But for nodes created from dependsOn orphans, mergeNode allows idempotent merge.
    const nodeMap = new Map<string, TaskGraphNodeAttributes>();

    for (const task of tasks) {
      const attrs = taskInputToNodeAttrs(task);
      nodeMap.set(task.id, attrs);
    }

    // Collect edges from dependsOn arrays and track orphan node IDs
    const edgeSet = new Set<string>(); // for dedup
    const edgeEntries: Array<{
      key: string;
      source: string;
      target: string;
      attributes: TaskGraphEdgeAttributes;
    }> = [];
    const orphanIds = new Set<string>();

    for (const task of tasks) {
      for (const dep of task.dependsOn) {
        const edgeKey = `${dep}->${task.id}`;
        if (!edgeSet.has(edgeKey)) {
          edgeSet.add(edgeKey);
          edgeEntries.push({
            key: edgeKey,
            source: dep,
            target: task.id,
            attributes: { qualityRetention: 0.9 },
          });
        }

        // Track orphan nodes: dependsOn targets not in the tasks array
        if (!nodeMap.has(dep)) {
          orphanIds.add(dep);
        }
      }
    }

    // Add orphan nodes with default attributes
    for (const orphanId of orphanIds) {
      nodeMap.set(orphanId, { name: orphanId });
    }

    // Build serialized blob and import in bulk
    const serialized = {
      attributes: {} as Record<string, unknown>,
      options: {
        type: 'directed' as const,
        multi: false as const,
        allowSelfLoops: false as const,
      },
      nodes: Array.from(nodeMap.entries()).map(([key, attributes]) => ({
        key,
        attributes,
      })),
      edges: edgeEntries,
    };

    tg._graph.import(serialized);
    return tg;
  }

  /**
   * Construct a TaskGraph from explicit task and edge arrays.
   *
   * Unlike `fromTasks`, edges are provided explicitly with per-edge `qualityRetention`.
   * This method is strict:
   * - Edges must reference tasks that exist in the `tasks` array —
   *   throws `TaskNotFoundError` for dangling references.
   * - Duplicate task IDs throw `DuplicateNodeError`.
   * - Duplicate edges (same prerequisite→dependent pair) throw `DuplicateEdgeError`.
   * - Cycles are NOT rejected at construction time.
   *
   * @param tasks - Array of TaskInput objects
   * @param edges - Array of DependencyEdge objects
   * @returns A new TaskGraph populated from the records
   * @throws {DuplicateNodeError} if duplicate task IDs are found
   * @throws {DuplicateEdgeError} if duplicate prerequisite→dependent pairs are found
   * @throws {TaskNotFoundError} if an edge references a task ID not in the tasks array
   */
  static fromRecords(tasks: TaskInput[], edges: DependencyEdge[]): TaskGraph {
    const tg = new TaskGraph();

    // Detect duplicate IDs
    const taskIdSet = new Set<string>();
    for (const task of tasks) {
      if (taskIdSet.has(task.id)) {
        throw new DuplicateNodeError(task.id);
      }
      taskIdSet.add(task.id);
    }

    // Build node map
    const nodeMap = new Map<string, TaskGraphNodeAttributes>();
    for (const task of tasks) {
      nodeMap.set(task.id, taskInputToNodeAttrs(task));
    }

    // Validate edges and detect duplicates / dangling refs
    const edgeSet = new Set<string>();
    const edgeEntries: Array<{
      key: string;
      source: string;
      target: string;
      attributes: TaskGraphEdgeAttributes;
    }> = [];

    for (const edge of edges) {
      const { from: prerequisite, to: dependent } = edge;

      // Check both endpoints exist in the tasks array
      if (!taskIdSet.has(prerequisite)) {
        throw new TaskNotFoundError(prerequisite);
      }
      if (!taskIdSet.has(dependent)) {
        throw new TaskNotFoundError(dependent);
      }

      // Check for duplicate edges
      const edgeKey = `${prerequisite}->${dependent}`;
      if (edgeSet.has(edgeKey)) {
        throw new DuplicateEdgeError(prerequisite, dependent);
      }
      edgeSet.add(edgeKey);

      edgeEntries.push({
        key: edgeKey,
        source: prerequisite,
        target: dependent,
        attributes: {
          qualityRetention: edge.qualityRetention ?? 0.9,
        },
      });
    }

    // Build serialized blob and import in bulk
    const serialized = {
      attributes: {} as Record<string, unknown>,
      options: {
        type: 'directed' as const,
        multi: false as const,
        allowSelfLoops: false as const,
      },
      nodes: Array.from(nodeMap.entries()).map(([key, attributes]) => ({
        key,
        attributes,
      })),
      edges: edgeEntries,
    };

    tg._graph.import(serialized);
    return tg;
  }

  /**
   * Construct a TaskGraph from serialized data (graphology native JSON format).
   *
   * Validates input against the `TaskGraphSerialized` schema using TypeBox
   * `Value.Check`. Invalid data throws an `InvalidInputError` derived from
   * the first TypeBox validation error.
   *
   * If a `target` TaskGraph is provided, it is populated in-place and returned.
   * Otherwise, a new TaskGraph is created and populated.
   *
   * Orphan nodes in the JSON are preserved (graphology import doesn't enforce
   * connectivity).
   *
   * @param data - Serialized graph data in graphology native JSON format
   * @param target - Optional existing TaskGraph to populate (used by constructor)
   * @returns A TaskGraph populated from the serialized data
   * @throws {InvalidInputError} if data fails schema validation
   */
  static fromJSON(data: TaskGraphSerialized, target?: TaskGraph): TaskGraph {
    // Validate input against TaskGraphSerialized schema
    if (!Value.Check(TaskGraphSerializedSchema, data)) {
      const errors = Value.Errors(TaskGraphSerializedSchema, data);
      const firstError = errors.First();
      if (firstError) {
        throw InvalidInputError.fromTypeBoxError(firstError);
      }
      // Fallback if no specific error found (shouldn't happen, but be safe)
      throw new InvalidInputError('data', 'Input does not match TaskGraphSerialized schema');
    }

    const graph = target ?? new TaskGraph();
    graph._graph.import(data);
    return graph;
  }

  // ---------------------------------------------------------------------------
  // Export methods
  // ---------------------------------------------------------------------------

  /**
   * Export the graph as a serialized object in graphology native JSON format.
   *
   * The returned object conforms to the `TaskGraphSerialized` schema and
   * includes all node attributes (name, scope, risk, etc.) and edge attributes
   * (including `qualityRetention`).
   *
   * The output can be passed to `TaskGraph.fromJSON()` for a round-trip.
   *
   * @returns A `TaskGraphSerialized` object representing this graph
   */
  export(): TaskGraphSerialized {
    return this._graph.export() as TaskGraphSerialized;
  }

  /**
   * Alias for `export()`. Enables `JSON.stringify(graph)` to produce
   * the serialized graph representation automatically.
   *
   * @returns A `TaskGraphSerialized` object representing this graph
   */
  toJSON(): TaskGraphSerialized {
    return this.export();
  }

  // ---------------------------------------------------------------------------
  // Incremental construction methods
  // ---------------------------------------------------------------------------

  /**
   * Add a task (node) to the graph.
   *
   * @param id - Unique task identifier (used as the node key)
   * @param attributes - Node attributes for the task
   * @throws {DuplicateNodeError} if a node with the given ID already exists
   */
  addTask(id: string, attributes: TaskGraphNodeAttributes): void {
    if (this._graph.hasNode(id)) {
      throw new DuplicateNodeError(id);
    }
    this._graph.addNode(id, attributes);
  }

  /**
   * Add a dependency (edge) between two tasks.
   *
   * Creates an edge from `prerequisite` to `dependent` using a deterministic
   * edge key (`${prerequisite}->${dependent}`) per ADR-006.
   *
   * @param prerequisite - Source node (must exist in the graph)
   * @param dependent - Target node (must exist in the graph)
   * @param qualityRetention - Optional edge quality retention (default: 0.9)
   * @throws {TaskNotFoundError} if either endpoint doesn't exist
   * @throws {DuplicateEdgeError} if an edge between the two nodes already exists
   */
  addDependency(prerequisite: string, dependent: string, qualityRetention: number = 0.9): void {
    // Validate both endpoints exist
    if (!this._graph.hasNode(prerequisite)) {
      throw new TaskNotFoundError(prerequisite);
    }
    if (!this._graph.hasNode(dependent)) {
      throw new TaskNotFoundError(dependent);
    }

    // Check for duplicate edge
    const edgeKey = this._edgeKey(prerequisite, dependent);
    if (this._graph.hasEdge(edgeKey)) {
      throw new DuplicateEdgeError(prerequisite, dependent);
    }

    this._graph.addEdgeWithKey(edgeKey, prerequisite, dependent, { qualityRetention });
  }

  // ---------------------------------------------------------------------------
  // Query methods
  // ---------------------------------------------------------------------------

  /**
   * Check whether the graph contains any cycles.
   *
   * Uses `graphology-dag.hasCycle()` as a fast boolean check.
   */
  hasCycles(): boolean {
    return _hasCycles(this._graph);
  }

  /**
   * Find all cycle paths in the graph.
   *
   * Uses `stronglyConnectedComponents()` as a fast pre-check, then runs a
   * custom 3-color DFS (WHITE/GREY/BLACK) to extract cycle paths.
   *
   * Returns **one representative cycle per back edge**, not an exhaustive
   * enumeration of all simple cycles. Each inner array is an ordered node
   * sequence where the last node has an edge back to the first:
   * `[A, B, C]` means A → B → C → A.
   */
  findCycles(): string[][] {
    return _findCycles(this._graph);
  }

  /**
   * Return task IDs in topological (prerequisite → dependent) order.
   *
   * Uses `graphology-dag.topologicalSort()` for the actual sort.
   *
   * @throws {CircularDependencyError} When the graph is cyclic, with `cycles`
   *   populated from `findCycles()`.
   */
  topologicalOrder(): string[] {
    return _topologicalOrder(this._graph);
  }

  /**
   * Return the prerequisite task IDs for a given task.
   *
   * @throws {TaskNotFoundError} If `taskId` doesn't exist in the graph.
   */
  dependencies(taskId: string): string[] {
    return _dependencies(this._graph, taskId);
  }

  /**
   * Return the dependent task IDs for a given task.
   *
   * @throws {TaskNotFoundError} If `taskId` doesn't exist in the graph.
   */
  dependents(taskId: string): string[] {
    return _dependents(this._graph, taskId);
  }

  /**
   * Return the number of tasks (nodes) in the graph.
   */
  taskCount(): number {
    return _taskCount(this._graph);
  }

  /**
   * Return the attributes of a task node, or `undefined` if it doesn't exist.
   */
  getTask(taskId: string): TaskGraphNodeAttributes | undefined {
    return _getTask(this._graph, taskId);
  }

  // ---------------------------------------------------------------------------
  // Subgraph method
  // ---------------------------------------------------------------------------

  /**
   * Extract a subgraph containing only nodes that pass the filter predicate.
   *
   * Per ADR-007, returns only edges where **both endpoints** are in the
   * filtered set (internal-only). External edges (where only one endpoint
   * matches) are excluded. This produces a valid (potentially disconnected)
   * subgraph suitable for all graph algorithms.
   *
   * Uses `graphology-operators.subgraph` under the hood, which preserves
   * node and edge attributes.
   *
   * Does not mutate the original graph — returns a new `TaskGraph` instance.
   *
   * @param filter - Predicate function receiving taskId and attributes for each node
   * @returns A new TaskGraph instance with matching nodes and internal-only edges
   */
  subgraph(filter: (taskId: string, attrs: TaskGraphNodeAttributes) => boolean): TaskGraph {
    // Build the set of node keys that pass the filter
    const filteredNodes = new Set<string>();
    for (const node of this._graph.nodes()) {
      const attrs = this._graph.getNodeAttributes(node);
      if (filter(node, attrs)) {
        filteredNodes.add(node);
      }
    }

    // Use graphology-operators subgraph which only keeps edges where
    // both endpoints are in the filtered set (internal-only per ADR-007)
    const subGraph = graphologySubgraph(this._graph, filteredNodes);

    // Create a new TaskGraph and transfer the subgraph data
    const result = new TaskGraph();
    result._graph.import(subGraph.export());
    return result;
  }

  // ---------------------------------------------------------------------------
  // Validation methods
  // ---------------------------------------------------------------------------

  /**
   * Validate all node attributes against the TaskGraphNodeAttributes schema.
   *
   * Uses TypeBox `Value.Check()` and `Value.Errors()` on each node's attributes.
   * Returns structured `ValidationError[]` with `type: "schema"`, `taskId`,
   * `field`, `message`, and optional `value`.
   *
   * Validation never throws — it collects all issues and returns them.
   * This allows consumers to implement "collect all errors" strategies.
   */
  validateSchema(): ValidationError[] {
    return _validateSchema(this._graph);
  }

  /**
   * Validate graph-level invariants: cycles and dangling references.
   *
   * Runs `findCycles()` and checks for dangling dependency references
   * (edges where one endpoint doesn't exist as a node).
   *
   * Returns structured `GraphValidationError[]` with:
   * - `type: "graph"`
   * - `category: "cycle"` for cycle errors, with cycle paths in `details`
   * - `category: "dangling-reference"` for dangling references, with `taskId`
   *
   * Validation never throws — it collects all issues and returns them.
   */
  validateGraph(): GraphValidationError[] {
    return _validateGraph(this._graph);
  }

  /**
   * Run both schema and graph validation, returning combined results.
   *
   * Convenience method that runs `validateSchema()` and `validateGraph()`
   * and concatenates the results into a single array.
   *
   * Validation never throws — it collects all issues and returns them.
   */
  validate(): AnyValidationError[] {
    return _validate(this._graph);
  }
}