-- =============================================================================
-- MIGRATION 002: System Settings & Enhanced GraphRAG
-- =============================================================================
-- Adds:
--   - system_settings table for centralized configuration
--   - usage_logs table for API credit tracking (Tavily, OpenAI)
--   - concept_relationships table for semantic graph edges
--
-- Run with:
--   PGPASSWORD='JpGZhjgjNd1M8rrh29BT' psql -h localhost -U research_dev_user \
--     -d research_dev_db -f database/migrations/002_system_settings.sql
-- =============================================================================

-- -----------------------------------------------------------------------------
-- System Settings Table
-- -----------------------------------------------------------------------------
-- Centralized key-value store for system-wide settings.
-- Replaces file-based configuration where appropriate.
-- -----------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS system_settings (
    setting_key VARCHAR(100) PRIMARY KEY,
    setting_value JSONB NOT NULL,
    description TEXT,
    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    updated_by VARCHAR(100) DEFAULT 'system'
);

-- Insert default settings
INSERT INTO system_settings (setting_key, setting_value, description) VALUES
    ('tavily_credits', '{"used": 0, "limit": 1000, "reset_date": null}', 'Tavily API credit tracking'),
    ('openai_usage', '{"tokens_used": 0, "cost_usd": 0.0, "requests": 0}', 'OpenAI API usage tracking'),
    ('pipeline_version', '"2.1.0"', 'Current pipeline version'),
    ('last_maintenance', 'null', 'Timestamp of last maintenance run')
ON CONFLICT (setting_key) DO NOTHING;

-- Trigger for updated_at
DROP TRIGGER IF EXISTS tr_settings_updated_at ON system_settings;
CREATE TRIGGER tr_settings_updated_at BEFORE UPDATE ON system_settings
    FOR EACH ROW EXECUTE FUNCTION update_updated_at();

-- -----------------------------------------------------------------------------
-- Usage Logs Table
-- -----------------------------------------------------------------------------
-- Detailed log of API usage for credit tracking and cost analysis.
-- Supports multi-user environments and different API providers.
-- -----------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS usage_logs (
    log_id SERIAL PRIMARY KEY,

    -- What was used
    service VARCHAR(50) NOT NULL,           -- 'tavily', 'openai', 'local_llm'
    operation VARCHAR(100) NOT NULL,        -- 'search', 'embedding', 'chat', 'completion'

    -- Usage details
    query_text TEXT,                        -- The query/prompt (truncated for privacy)
    result_count INTEGER,                   -- Number of results returned
    tokens_used INTEGER,                    -- Token count (for LLM operations)

    -- Cost tracking
    credits_used INTEGER DEFAULT 0,         -- Credit cost (Tavily)
    cost_usd DECIMAL(10, 6) DEFAULT 0,      -- Estimated USD cost (OpenAI)

    -- Context
    project_id INTEGER REFERENCES book_projects(project_id) ON DELETE SET NULL,
    session_id VARCHAR(100),                -- For grouping related operations
    user_id VARCHAR(100) DEFAULT 'default',

    -- Performance
    response_time_ms INTEGER,

    -- Metadata
    success BOOLEAN DEFAULT TRUE,
    error_message TEXT,
    metadata JSONB,                         -- Additional service-specific data

    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

-- Indexes for efficient queries
CREATE INDEX IF NOT EXISTS idx_usage_service ON usage_logs(service);
CREATE INDEX IF NOT EXISTS idx_usage_created ON usage_logs(created_at DESC);
CREATE INDEX IF NOT EXISTS idx_usage_user ON usage_logs(user_id);
CREATE INDEX IF NOT EXISTS idx_usage_session ON usage_logs(session_id);
CREATE INDEX IF NOT EXISTS idx_usage_project ON usage_logs(project_id);
CREATE INDEX IF NOT EXISTS idx_usage_service_date ON usage_logs(service, DATE(created_at));

-- -----------------------------------------------------------------------------
-- Concept Relationships Table (Enhanced GraphRAG)
-- -----------------------------------------------------------------------------
-- Stores semantic relationships between concepts with typed edges.
-- Enables more sophisticated graph traversal than co-occurrence alone.
--
-- Relationship types include:
--   - influences: A has an effect on B
--   - part_of: A is a component of B
--   - opposes: A is contrary to B
--   - related_to: General semantic relation
--   - derived_from: A originates from B
--   - enables: A makes B possible
-- -----------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS concept_relationships (
    relationship_id SERIAL PRIMARY KEY,

    -- The concepts being related
    source_concept_id INTEGER NOT NULL REFERENCES concepts(concept_id) ON DELETE CASCADE,
    target_concept_id INTEGER NOT NULL REFERENCES concepts(concept_id) ON DELETE CASCADE,

    -- Relationship semantics
    relationship_type VARCHAR(50) NOT NULL,  -- 'influences', 'part_of', 'opposes', etc.
    description TEXT,                        -- Context explaining the relationship

    -- Strength and confidence
    weight DECIMAL(5, 4) DEFAULT 0.5,       -- Relationship strength (0-1)
    confidence DECIMAL(5, 4) DEFAULT 0.5,   -- Extraction confidence (0-1)

    -- Provenance
    source_chunk_id VARCHAR(100) REFERENCES chunks(chunk_id) ON DELETE SET NULL,
    extraction_method VARCHAR(50),           -- 'manual', 'llm_extracted', 'rule_based'

    -- Metadata
    is_bidirectional BOOLEAN DEFAULT FALSE,  -- Does B also relate to A?
    verified BOOLEAN DEFAULT FALSE,          -- Has this been human-verified?

    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,

    -- Prevent duplicate relationships
    UNIQUE(source_concept_id, target_concept_id, relationship_type)
);

-- Indexes for graph traversal
CREATE INDEX IF NOT EXISTS idx_rel_source ON concept_relationships(source_concept_id);
CREATE INDEX IF NOT EXISTS idx_rel_target ON concept_relationships(target_concept_id);
CREATE INDEX IF NOT EXISTS idx_rel_type ON concept_relationships(relationship_type);
CREATE INDEX IF NOT EXISTS idx_rel_bidirectional ON concept_relationships(source_concept_id, target_concept_id)
    WHERE is_bidirectional = TRUE;

-- Trigger for updated_at
DROP TRIGGER IF EXISTS tr_rel_updated_at ON concept_relationships;
CREATE TRIGGER tr_rel_updated_at BEFORE UPDATE ON concept_relationships
    FOR EACH ROW EXECUTE FUNCTION update_updated_at();

-- -----------------------------------------------------------------------------
-- Helper Functions
-- -----------------------------------------------------------------------------

-- Function: Get current Tavily credits
CREATE OR REPLACE FUNCTION get_tavily_credits()
RETURNS TABLE(used INTEGER, remaining INTEGER, credit_limit INTEGER, reset_date TIMESTAMP) AS $$
DECLARE
    credits JSONB;
BEGIN
    SELECT setting_value INTO credits
    FROM system_settings
    WHERE setting_key = 'tavily_credits';

    RETURN QUERY SELECT
        (credits->>'used')::INTEGER,
        (credits->>'limit')::INTEGER - (credits->>'used')::INTEGER,
        (credits->>'limit')::INTEGER,
        (credits->>'reset_date')::TIMESTAMP;
END;
$$ LANGUAGE plpgsql;

-- Function: Record Tavily usage
CREATE OR REPLACE FUNCTION record_tavily_usage(
    p_query TEXT,
    p_credits INTEGER,
    p_results INTEGER,
    p_session_id VARCHAR DEFAULT NULL,
    p_project_id INTEGER DEFAULT NULL
) RETURNS INTEGER AS $$
DECLARE
    current_credits JSONB;
    new_used INTEGER;
BEGIN
    -- Get current credits
    SELECT setting_value INTO current_credits
    FROM system_settings
    WHERE setting_key = 'tavily_credits';

    -- Update credits
    new_used := COALESCE((current_credits->>'used')::INTEGER, 0) + p_credits;

    UPDATE system_settings
    SET setting_value = jsonb_set(
        setting_value,
        '{used}',
        to_jsonb(new_used)
    )
    WHERE setting_key = 'tavily_credits';

    -- Log the usage
    INSERT INTO usage_logs (service, operation, query_text, result_count, credits_used, session_id, project_id)
    VALUES ('tavily', 'search', LEFT(p_query, 500), p_results, p_credits, p_session_id, p_project_id);

    RETURN new_used;
END;
$$ LANGUAGE plpgsql;

-- Function: Reset Tavily credits (for new billing period)
CREATE OR REPLACE FUNCTION reset_tavily_credits(p_new_limit INTEGER DEFAULT NULL)
RETURNS VOID AS $$
DECLARE
    current_credits JSONB;
BEGIN
    SELECT setting_value INTO current_credits
    FROM system_settings
    WHERE setting_key = 'tavily_credits';

    UPDATE system_settings
    SET setting_value = jsonb_build_object(
        'used', 0,
        'limit', COALESCE(p_new_limit, (current_credits->>'limit')::INTEGER, 1000),
        'reset_date', NOW()
    )
    WHERE setting_key = 'tavily_credits';
END;
$$ LANGUAGE plpgsql;

-- Function: Get related concepts via semantic relationships
CREATE OR REPLACE FUNCTION get_semantic_relations(
    p_concept_id INTEGER,
    p_relationship_types VARCHAR[] DEFAULT NULL,
    p_min_weight DECIMAL DEFAULT 0.3
) RETURNS TABLE(
    concept_id INTEGER,
    concept_name VARCHAR,
    relationship_type VARCHAR,
    direction VARCHAR,
    weight DECIMAL,
    description TEXT
) AS $$
BEGIN
    RETURN QUERY
    SELECT
        c.concept_id,
        c.name,
        cr.relationship_type,
        'outgoing'::VARCHAR AS direction,
        cr.weight,
        cr.description
    FROM concept_relationships cr
    JOIN concepts c ON cr.target_concept_id = c.concept_id
    WHERE cr.source_concept_id = p_concept_id
      AND cr.weight >= p_min_weight
      AND (p_relationship_types IS NULL OR cr.relationship_type = ANY(p_relationship_types))

    UNION ALL

    SELECT
        c.concept_id,
        c.name,
        cr.relationship_type,
        'incoming'::VARCHAR AS direction,
        cr.weight,
        cr.description
    FROM concept_relationships cr
    JOIN concepts c ON cr.source_concept_id = c.concept_id
    WHERE cr.target_concept_id = p_concept_id
      AND cr.weight >= p_min_weight
      AND cr.is_bidirectional = TRUE
      AND (p_relationship_types IS NULL OR cr.relationship_type = ANY(p_relationship_types))

    ORDER BY weight DESC;
END;
$$ LANGUAGE plpgsql;

-- -----------------------------------------------------------------------------
-- Views
-- -----------------------------------------------------------------------------

-- View: Credit usage summary by day
CREATE OR REPLACE VIEW v_credit_usage_daily AS
SELECT
    DATE(created_at) AS usage_date,
    service,
    COUNT(*) AS request_count,
    SUM(credits_used) AS total_credits,
    SUM(cost_usd) AS total_cost_usd,
    SUM(tokens_used) AS total_tokens
FROM usage_logs
WHERE success = TRUE
GROUP BY DATE(created_at), service
ORDER BY usage_date DESC, service;

-- View: Concept relationship graph (for visualization)
CREATE OR REPLACE VIEW v_concept_graph AS
SELECT
    cr.relationship_id,
    sc.concept_id AS source_id,
    sc.name AS source_name,
    tc.concept_id AS target_id,
    tc.name AS target_name,
    cr.relationship_type,
    cr.weight,
    cr.confidence,
    cr.is_bidirectional,
    cr.verified
FROM concept_relationships cr
JOIN concepts sc ON cr.source_concept_id = sc.concept_id
JOIN concepts tc ON cr.target_concept_id = tc.concept_id
ORDER BY cr.weight DESC;

-- =============================================================================
-- MIGRATION COMPLETE
-- =============================================================================
-- Verify with:
--   \dt system_settings
--   \dt usage_logs
--   \dt concept_relationships
--   SELECT * FROM get_tavily_credits();
-- =============================================================================
