diff --git a/apps/operator/package.json b/apps/operator/package.json
index c3cb05b..add0c15 100644
--- a/apps/operator/package.json
+++ b/apps/operator/package.json
@@ -19,6 +19,7 @@
     "@repo/logger": "workspace:*",
     "drizzle-orm": "0.45.2",
     "hono": "4.12.9",
+    "node-html-markdown": "2.0.0",
     "openai": "6.33.0",
     "zod": "4.3.6"
   },
diff --git a/apps/operator/src/modules/telegram/controller.ts b/apps/operator/src/modules/telegram/controller.ts
index ef51ec7..8665ebe 100644
--- a/apps/operator/src/modules/telegram/controller.ts
+++ b/apps/operator/src/modules/telegram/controller.ts
@@ -14,6 +14,7 @@ import { TelegramService } from "../../services/telegram";
 import type { AppEnv } from "../../types/env";
 import type { TelegramUpdate } from "../../types/telegram";
 import { splitMessage } from "../../utils/message";
+import { validateSourceUrl } from "../../utils/url-validator";
 
 const DAYS = ["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"];
 
@@ -200,9 +201,11 @@ const handleWebhook = async (c: Context<AppEnv, string, WebhookInput>) => {
         return { error: validation.error.message };
       }
 
-      // Monitor support (source_url) is not yet implemented — reject
       if (input.sourceUrl) {
-        return { error: "URL monitors are not yet supported" };
+        const urlCheck = validateSourceUrl(input.sourceUrl);
+        if (!urlCheck.valid) {
+          return { error: urlCheck.reason };
+        }
       }
 
       // Store as pending in D1 — require user confirmation
diff --git a/apps/operator/src/scheduled.ts b/apps/operator/src/scheduled.ts
index ebea7c3..8c4e38a 100644
--- a/apps/operator/src/scheduled.ts
+++ b/apps/operator/src/scheduled.ts
@@ -2,9 +2,11 @@ import { Logger } from "@repo/logger";
 
 import { OpenAiService } from "./services/openai";
 import { ScheduleService } from "./services/schedule";
+import { scrapeUrl } from "./services/scrape";
 import { TelegramService } from "./services/telegram";
 import type { AppEnv } from "./types/env";
 import { splitMessage } from "./utils/message";
+import { validateSourceUrl } from "./utils/url-validator";
 
 type Env = AppEnv["Bindings"];
 
@@ -41,11 +43,73 @@ const handleScheduled = async (
 
   const results = await Promise.allSettled(
     claimed.map(async (schedule) => {
-      // Monitor path (Stage 2 — stub)
+      // Monitor path
       if (schedule.sourceUrl) {
-        logger.info("monitor execution not yet implemented, skipping", {
-          scheduleId: schedule.id,
+        const urlCheck = validateSourceUrl(schedule.sourceUrl);
+        if (!urlCheck.valid) {
+          logger.error("monitor URL validation failed at execution", {
+            scheduleId: schedule.id,
+            reason: urlCheck.reason,
+          });
+          return;
+        }
+
+        const scrapeResult = await scrapeUrl(schedule.sourceUrl);
+        if (!scrapeResult.ok) {
+          throw new Error(`Scrape failed: ${scrapeResult.error}`);
+        }
+
+        let previousState: string | null = null;
+        if (schedule.stateJson) {
+          try {
+            const parsed = JSON.parse(schedule.stateJson) as Record<
+              string,
+              unknown
+            >;
+            previousState =
+              typeof parsed.lastContent === "string"
+                ? parsed.lastContent
+                : null;
+          } catch {
+            logger.warn("malformed stateJson, treating as first run", {
+              scheduleId: schedule.id,
+            });
+          }
+        }
+
+        let scrapedContent = scrapeResult.text;
+        if (scrapeResult.truncated) {
+          scrapedContent +=
+            "\n\n[Note: Page content was truncated. Analysis is based on partial content.]";
+        }
+
+        const openai = new OpenAiService(env.OPENAI_API_KEY, logger);
+        const analysis = await openai.analyzeMonitor({
+          task: schedule.messagePrompt ?? schedule.description,
+          scrapedContent,
+          previousState,
         });
+
+        if (analysis.notify) {
+          for (const chunk of splitMessage(analysis.message)) {
+            await telegram.sendMessage({ chat_id: chatId, text: chunk });
+          }
+          logger.info("monitor notification sent", {
+            scheduleId: schedule.id,
+          });
+        } else {
+          logger.info("monitor check — no notification needed", {
+            scheduleId: schedule.id,
+          });
+        }
+
+        await scheduleService.updateState(
+          schedule.id,
+          JSON.stringify({
+            lastContent: analysis.newState,
+            lastScrapedAt: now.toISOString(),
+          })
+        );
         return;
       }
 
diff --git a/apps/operator/src/services/openai.test.ts b/apps/operator/src/services/openai.test.ts
index edf6ce8..85d6556 100644
--- a/apps/operator/src/services/openai.test.ts
+++ b/apps/operator/src/services/openai.test.ts
@@ -280,4 +280,144 @@ describe("OpenAiService", () => {
       expect(executor).not.toHaveBeenCalled();
     });
   });
+
+  describe("analyzeMonitor", () => {
+    it("returns parsed monitor analysis", async () => {
+      const analysis = {
+        notify: true,
+        message: "Seinfeld is on TV at 20:00 on MTV3",
+        newState: "Current listings include Seinfeld at 20:00",
+      };
+      createMock.mockResolvedValueOnce({
+        choices: [{ message: { content: JSON.stringify(analysis) } }],
+      });
+
+      const result = await service.analyzeMonitor({
+        task: "Check if Seinfeld is on today",
+        scrapedContent: "# TV Listings\n- 20:00 Seinfeld (MTV3)",
+        previousState: null,
+      });
+
+      expect(result).toEqual(analysis);
+    });
+
+    it("sends structured request with json_object format", async () => {
+      createMock.mockResolvedValueOnce({
+        choices: [
+          {
+            message: {
+              content: JSON.stringify({
+                notify: false,
+                message: "",
+                newState: "no changes",
+              }),
+            },
+          },
+        ],
+      });
+
+      await service.analyzeMonitor({
+        task: "Check for changes",
+        scrapedContent: "content",
+        previousState: "old state",
+      });
+
+      expect(createMock).toHaveBeenCalledWith(
+        expect.objectContaining({
+          response_format: { type: "json_object" },
+          max_completion_tokens: 4096,
+        })
+      );
+    });
+
+    it("includes previous state in user message", async () => {
+      createMock.mockResolvedValueOnce({
+        choices: [
+          {
+            message: {
+              content: JSON.stringify({
+                notify: true,
+                message: "Changed",
+                newState: "new",
+              }),
+            },
+          },
+        ],
+      });
+
+      await service.analyzeMonitor({
+        task: "Check diff",
+        scrapedContent: "new content",
+        previousState: "old content summary",
+      });
+
+      const callArgs = createMock.mock.calls[0] as [
+        { messages: { content: string }[] },
+      ];
+      const userMsg = callArgs[0].messages[1].content;
+      expect(userMsg).toContain("old content summary");
+    });
+
+    it("uses placeholder when no previous state", async () => {
+      createMock.mockResolvedValueOnce({
+        choices: [
+          {
+            message: {
+              content: JSON.stringify({
+                notify: true,
+                message: "First check",
+                newState: "initial",
+              }),
+            },
+          },
+        ],
+      });
+
+      await service.analyzeMonitor({
+        task: "Monitor page",
+        scrapedContent: "content",
+        previousState: null,
+      });
+
+      const callArgs = createMock.mock.calls[0] as [
+        { messages: { content: string }[] },
+      ];
+      const userMsg = callArgs[0].messages[1].content;
+      expect(userMsg).toContain("First check");
+    });
+
+    it("throws on empty response", async () => {
+      createMock.mockResolvedValueOnce({
+        choices: [{ message: { content: null } }],
+      });
+
+      await expect(
+        service.analyzeMonitor({
+          task: "test",
+          scrapedContent: "content",
+          previousState: null,
+        })
+      ).rejects.toThrow("OpenAI returned empty response for monitor analysis");
+    });
+
+    it("throws on invalid JSON structure", async () => {
+      createMock.mockResolvedValueOnce({
+        choices: [
+          {
+            message: {
+              content: JSON.stringify({ wrong: "structure" }),
+            },
+          },
+        ],
+      });
+
+      await expect(
+        service.analyzeMonitor({
+          task: "test",
+          scrapedContent: "content",
+          previousState: null,
+        })
+      ).rejects.toThrow();
+    });
+  });
 });
diff --git a/apps/operator/src/services/openai.ts b/apps/operator/src/services/openai.ts
index 759453d..eef2ef0 100644
--- a/apps/operator/src/services/openai.ts
+++ b/apps/operator/src/services/openai.ts
@@ -1,6 +1,7 @@
 import type { Logger } from "@repo/logger";
 import OpenAI from "openai";
 import type { ChatCompletionTool } from "openai/resources/chat/completions";
+import { z } from "zod";
 
 const SYSTEM_PROMPT = `You are a helpful personal assistant called Switch Operator. Be concise and helpful.
 
@@ -16,6 +17,14 @@ Schedule types:
 
 Use fixed_message for exact text or message_prompt for AI-generated content.
 
+You can also create web monitors that scrape a URL on a schedule and notify based on conditions.
+When the user wants to monitor a website for changes or check for specific content, use create_schedule with source_url + message_prompt.
+The message_prompt should describe what to look for or how to analyze the page content.
+
+Monitor examples:
+- "Notify me when a specific show is on TV" → source_url with the TV listings page, message_prompt: "Check if [show name] appears in today's listings. Notify with channel and time if found."
+- "Weekly report changes" → source_url with the report page, message_prompt: "Compare this week's content to last week. Summarize key changes."
+
 When listing schedules, format them as a numbered list (1, 2, 3...) with key details like description, type, time, and next run.
 When the user asks to delete a schedule by number, first call list_schedules to get the current list, then use the ID from the matching position to call delete_schedule.`;
 
@@ -64,6 +73,11 @@ const SCHEDULE_TOOLS: ChatCompletionTool[] = [
             description:
               "Prompt for AI-generated message. Mutually exclusive with fixed_message.",
           },
+          source_url: {
+            type: "string",
+            description:
+              "URL to monitor/scrape. When set, the schedule becomes a monitor: it will fetch this URL on each run, analyze the content using message_prompt, and notify only if the condition is met. Requires message_prompt. Cannot be used with fixed_message.",
+          },
           description: {
             type: "string",
             description: "Short description of this schedule (max 200 chars).",
@@ -216,7 +230,73 @@ class OpenAiService {
 
     throw new Error("Tool calling exceeded maximum iterations");
   }
+
+  async analyzeMonitor(params: {
+    task: string;
+    scrapedContent: string;
+    previousState: string | null;
+  }): Promise<MonitorAnalysis> {
+    this.logger.debug("analyzing monitor", {
+      taskLength: params.task.length,
+      contentLength: params.scrapedContent.length,
+      hasPreviousState: params.previousState != null,
+    });
+
+    const previousStateText =
+      params.previousState ?? "First check — no previous state.";
+
+    const response = await this.client.chat.completions.create({
+      model: "gpt-5.4-mini",
+      max_completion_tokens: 4096,
+      response_format: { type: "json_object" },
+      messages: [
+        { role: "system", content: MONITOR_ANALYSIS_PROMPT },
+        {
+          role: "user",
+          content: `## Task\n${params.task}\n\n## Current page content\n${params.scrapedContent}\n\n## Previous state\n${previousStateText}`,
+        },
+      ],
+    });
+
+    const content = response.choices[0]?.message.content;
+    if (!content) {
+      throw new Error("OpenAI returned empty response for monitor analysis");
+    }
+
+    const parsed: unknown = JSON.parse(content);
+    return monitorAnalysisSchema.parse(parsed);
+  }
 }
 
+const MONITOR_ANALYSIS_PROMPT = `You are analyzing a web page for a monitoring task.
+
+The user will provide:
+1. A task describing what to check or monitor
+2. The current page content (scraped and converted to markdown)
+3. Previous state from the last check (or "First check" if this is the first run)
+
+Respond in JSON with exactly these fields:
+{
+  "notify": true or false,
+  "message": "notification message to send to the user (max 4000 chars, use markdown formatting)",
+  "newState": "concise summary of current state for comparison next time (max 5000 chars)"
+}
+
+Rules:
+- Only set "notify" to true if the condition described in the task is met
+- For diff/change detection tasks: compare current content to previous state and summarize what changed. Notify if there are meaningful changes.
+- For condition check tasks: evaluate whether the specific condition is satisfied. Notify only if it is.
+- The "message" should be informative and actionable — include relevant details from the page
+- The "newState" should contain enough information to compare against next time. Keep it concise.
+- If this is the first check, always set notify to true with a summary of current state`;
+
+const monitorAnalysisSchema = z.object({
+  notify: z.boolean(),
+  message: z.string().max(4000),
+  newState: z.string().max(5000),
+});
+
+type MonitorAnalysis = z.infer<typeof monitorAnalysisSchema>;
+
 export { MAX_TOOL_ITERATIONS, OpenAiService, SCHEDULE_TOOLS };
-export type { ToolExecutor, ToolResult };
+export type { MonitorAnalysis, ToolExecutor, ToolResult };
diff --git a/apps/operator/src/services/scrape.test.ts b/apps/operator/src/services/scrape.test.ts
new file mode 100644
index 0000000..b121c64
--- /dev/null
+++ b/apps/operator/src/services/scrape.test.ts
@@ -0,0 +1,351 @@
+import { afterEach, describe, expect, it, vi } from "vitest";
+
+import { collapseWhitespace, convertContent, scrapeUrl } from "./scrape";
+
+const createMockResponse = (
+  body: string,
+  init?: { status?: number; headers?: Record<string, string> }
+): Response =>
+  new Response(body, {
+    status: init?.status ?? 200,
+    headers: init?.headers ?? { "content-type": "text/html" },
+  });
+
+describe("scrapeUrl", () => {
+  afterEach(() => {
+    vi.restoreAllMocks();
+  });
+
+  it("converts HTML response to markdown", async () => {
+    vi.stubGlobal(
+      "fetch",
+      vi
+        .fn()
+        .mockResolvedValueOnce(createMockResponse("<h1>Hello</h1><p>World</p>"))
+    );
+
+    const result = await scrapeUrl("https://example.com");
+
+    expect(result.ok).toBe(true);
+    if (result.ok) {
+      expect(result.text).toContain("Hello");
+      expect(result.text).toContain("World");
+      expect(result.truncated).toBe(false);
+    }
+  });
+
+  it("handles JSON response with pretty-print", async () => {
+    const json = JSON.stringify({ key: "value", nested: { a: 1 } });
+    vi.stubGlobal(
+      "fetch",
+      vi.fn().mockResolvedValueOnce(
+        createMockResponse(json, {
+          headers: { "content-type": "application/json" },
+        })
+      )
+    );
+
+    const result = await scrapeUrl("https://api.example.com/data");
+
+    expect(result.ok).toBe(true);
+    if (result.ok) {
+      expect(result.text).toContain("```json");
+      expect(result.text).toContain('"key": "value"');
+      expect(result.text).toContain("```");
+    }
+  });
+
+  it("handles plain text response", async () => {
+    vi.stubGlobal(
+      "fetch",
+      vi.fn().mockResolvedValueOnce(
+        createMockResponse("Plain text content", {
+          headers: { "content-type": "text/plain" },
+        })
+      )
+    );
+
+    const result = await scrapeUrl("https://example.com/robots.txt");
+
+    expect(result.ok).toBe(true);
+    if (result.ok) {
+      expect(result.text).toBe("Plain text content");
+    }
+  });
+
+  it("returns error for unsupported content type", async () => {
+    vi.stubGlobal(
+      "fetch",
+      vi.fn().mockResolvedValueOnce(
+        createMockResponse("binary", {
+          headers: { "content-type": "application/octet-stream" },
+        })
+      )
+    );
+
+    const result = await scrapeUrl("https://example.com/file.bin");
+
+    expect(result.ok).toBe(false);
+    if (!result.ok) {
+      expect(result.error).toContain("Unsupported content type");
+    }
+  });
+
+  it("returns error for non-ok HTTP status", async () => {
+    vi.stubGlobal(
+      "fetch",
+      vi
+        .fn()
+        .mockResolvedValueOnce(createMockResponse("Forbidden", { status: 403 }))
+    );
+
+    const result = await scrapeUrl("https://example.com");
+
+    expect(result.ok).toBe(false);
+    if (!result.ok) {
+      expect(result.error).toBe("Blocked by site");
+      expect(result.statusCode).toBe(403);
+    }
+  });
+
+  it("returns error for 404", async () => {
+    vi.stubGlobal(
+      "fetch",
+      vi
+        .fn()
+        .mockResolvedValueOnce(createMockResponse("Not found", { status: 404 }))
+    );
+
+    const result = await scrapeUrl("https://example.com/missing");
+
+    expect(result.ok).toBe(false);
+    if (!result.ok) {
+      expect(result.error).toBe("Page not found");
+      expect(result.statusCode).toBe(404);
+    }
+  });
+
+  it("returns generic error for unmapped status codes", async () => {
+    vi.stubGlobal(
+      "fetch",
+      vi
+        .fn()
+        .mockResolvedValueOnce(createMockResponse("Error", { status: 502 }))
+    );
+
+    const result = await scrapeUrl("https://example.com");
+
+    expect(result.ok).toBe(false);
+    if (!result.ok) {
+      expect(result.error).toBe("HTTP 502");
+    }
+  });
+
+  it("truncates text exceeding maxTextLength", async () => {
+    const longHtml = `<p>${"a".repeat(1000)}</p>`;
+    vi.stubGlobal(
+      "fetch",
+      vi.fn().mockResolvedValueOnce(createMockResponse(longHtml))
+    );
+
+    const result = await scrapeUrl("https://example.com", 100);
+
+    expect(result.ok).toBe(true);
+    if (result.ok) {
+      expect(result.text.length).toBeLessThanOrEqual(100);
+      expect(result.truncated).toBe(true);
+    }
+  });
+
+  it("returns error on fetch failure", async () => {
+    vi.stubGlobal(
+      "fetch",
+      vi.fn().mockRejectedValueOnce(new Error("Network error"))
+    );
+
+    const result = await scrapeUrl("https://example.com");
+
+    expect(result.ok).toBe(false);
+    if (!result.ok) {
+      expect(result.error).toBe("Network error");
+    }
+  });
+
+  it("returns error on timeout", async () => {
+    const timeoutError = new DOMException("Timeout", "TimeoutError");
+    vi.stubGlobal("fetch", vi.fn().mockRejectedValueOnce(timeoutError));
+
+    const result = await scrapeUrl("https://example.com");
+
+    expect(result.ok).toBe(false);
+    if (!result.ok) {
+      expect(result.error).toBe("Request timed out");
+    }
+  });
+
+  it("follows safe redirects", async () => {
+    const mockFetch = vi
+      .fn()
+      .mockResolvedValueOnce(
+        new Response(null, {
+          status: 302,
+          headers: { location: "https://example.com/final" },
+        })
+      )
+      .mockResolvedValueOnce(createMockResponse("<p>Redirected</p>"));
+    vi.stubGlobal("fetch", mockFetch);
+
+    const result = await scrapeUrl("https://example.com/start");
+
+    expect(result.ok).toBe(true);
+    if (result.ok) {
+      expect(result.text).toContain("Redirected");
+    }
+    expect(mockFetch).toHaveBeenCalledTimes(2);
+  });
+
+  it("rejects redirects to localhost", async () => {
+    vi.stubGlobal(
+      "fetch",
+      vi.fn().mockResolvedValueOnce(
+        new Response(null, {
+          status: 302,
+          headers: { location: "https://127.0.0.1/secret" },
+        })
+      )
+    );
+
+    const result = await scrapeUrl("https://example.com");
+
+    expect(result.ok).toBe(false);
+    if (!result.ok) {
+      expect(result.error).toContain("Redirect to unsafe URL");
+    }
+  });
+
+  it("rejects redirects to HTTP", async () => {
+    vi.stubGlobal(
+      "fetch",
+      vi.fn().mockResolvedValueOnce(
+        new Response(null, {
+          status: 301,
+          headers: { location: "http://example.com/page" },
+        })
+      )
+    );
+
+    const result = await scrapeUrl("https://example.com");
+
+    expect(result.ok).toBe(false);
+    if (!result.ok) {
+      expect(result.error).toContain("Redirect to unsafe URL");
+    }
+  });
+
+  it("returns error on too many redirects", async () => {
+    const redirectResponse = new Response(null, {
+      status: 302,
+      headers: { location: "https://example.com/loop" },
+    });
+    vi.stubGlobal("fetch", vi.fn().mockResolvedValue(redirectResponse));
+
+    const result = await scrapeUrl("https://example.com");
+
+    expect(result.ok).toBe(false);
+    if (!result.ok) {
+      expect(result.error).toBe("Too many redirects");
+    }
+  });
+
+  it("returns error when response body exceeds 2MB", async () => {
+    const largeBody = "x".repeat(3 * 1024 * 1024);
+    vi.stubGlobal(
+      "fetch",
+      vi.fn().mockResolvedValueOnce(createMockResponse(largeBody))
+    );
+
+    const result = await scrapeUrl("https://example.com");
+
+    expect(result.ok).toBe(false);
+    if (!result.ok) {
+      expect(result.error).toBe("Response exceeds 2MB size limit");
+    }
+  });
+});
+
+describe("convertContent", () => {
+  it("converts HTML to markdown", () => {
+    const result = convertContent(
+      "<h1>Title</h1><p>Paragraph</p>",
+      "text/html"
+    );
+    expect("text" in result).toBe(true);
+    if ("text" in result) {
+      expect(result.text).toContain("Title");
+      expect(result.text).toContain("Paragraph");
+    }
+  });
+
+  it("handles application/xhtml+xml as HTML", () => {
+    const result = convertContent("<p>Content</p>", "application/xhtml+xml");
+    expect("text" in result).toBe(true);
+    if ("text" in result) {
+      expect(result.text).toContain("Content");
+    }
+  });
+
+  it("pretty-prints JSON", () => {
+    const result = convertContent('{"a":1}', "application/json");
+    expect("text" in result).toBe(true);
+    if ("text" in result) {
+      expect(result.text).toContain("```json");
+      expect(result.text).toContain('"a": 1');
+    }
+  });
+
+  it("handles +json content types", () => {
+    const result = convertContent(
+      '{"data":"test"}',
+      "application/vnd.api+json"
+    );
+    expect("text" in result).toBe(true);
+    if ("text" in result) {
+      expect(result.text).toContain("```json");
+    }
+  });
+
+  it("wraps invalid JSON in plain code block", () => {
+    const result = convertContent("not json", "application/json");
+    expect("text" in result).toBe(true);
+    if ("text" in result) {
+      expect(result.text).toContain("```\nnot json\n```");
+    }
+  });
+
+  it("passes plain text through", () => {
+    const result = convertContent("hello world", "text/plain");
+    expect("text" in result).toBe(true);
+    if ("text" in result) {
+      expect(result.text).toBe("hello world");
+    }
+  });
+
+  it("returns error for unsupported types", () => {
+    const result = convertContent("data", "application/octet-stream");
+    expect("error" in result).toBe(true);
+  });
+});
+
+describe("collapseWhitespace", () => {
+  it("collapses 3+ newlines to double", () => {
+    expect(collapseWhitespace("a\n\n\n\nb")).toBe("a\n\nb");
+  });
+
+  it("collapses multiple spaces to single", () => {
+    expect(collapseWhitespace("a     b")).toBe("a b");
+  });
+
+  it("trims leading and trailing whitespace", () => {
+    expect(collapseWhitespace("  hello  ")).toBe("hello");
+  });
+});
diff --git a/apps/operator/src/services/scrape.ts b/apps/operator/src/services/scrape.ts
new file mode 100644
index 0000000..b033c42
--- /dev/null
+++ b/apps/operator/src/services/scrape.ts
@@ -0,0 +1,205 @@
+import { NodeHtmlMarkdown } from "node-html-markdown";
+
+import { validateSourceUrl } from "../utils/url-validator";
+
+type ScrapeOk = { ok: true; text: string; truncated: boolean };
+type ScrapeError = { ok: false; error: string; statusCode?: number };
+type ScrapeResult = ScrapeOk | ScrapeError;
+
+const MAX_BODY_BYTES = 2 * 1024 * 1024; // 2 MB
+const DEFAULT_MAX_TEXT_LENGTH = 80_000;
+const FETCH_TIMEOUT_MS = 15_000;
+const MAX_REDIRECTS = 5;
+
+const USER_AGENT = "SwitchOperator/1.0";
+
+const ERROR_MAP: Partial<Record<number, string>> = {
+  403: "Blocked by site",
+  404: "Page not found",
+  429: "Rate limited by site",
+};
+
+const isHtml = (contentType: string): boolean =>
+  contentType.includes("text/html") ||
+  contentType.includes("application/xhtml+xml");
+
+const isJson = (contentType: string): boolean =>
+  contentType.includes("application/json") || contentType.includes("+json");
+
+const isPlainText = (contentType: string): boolean =>
+  contentType.includes("text/plain");
+
+const concatChunks = (chunks: Uint8Array[], totalBytes: number): Uint8Array => {
+  const result = new Uint8Array(totalBytes);
+  let offset = 0;
+  for (const chunk of chunks) {
+    result.set(chunk, offset);
+    offset += chunk.byteLength;
+  }
+  return result;
+};
+
+const readBodyWithLimit = async (
+  body: ReadableStream<Uint8Array>
+): Promise<{ bytes: Uint8Array } | { error: string }> => {
+  const reader = body.getReader();
+  const chunks: Uint8Array[] = [];
+  let totalBytes = 0;
+
+  try {
+    for (;;) {
+      const { done, value } = await reader.read();
+      if (done) {
+        break;
+      }
+      totalBytes += value.byteLength;
+      if (totalBytes > MAX_BODY_BYTES) {
+        await reader.cancel();
+        return { error: "Response exceeds 2MB size limit" };
+      }
+      chunks.push(value);
+    }
+  } finally {
+    reader.releaseLock();
+  }
+
+  return { bytes: concatChunks(chunks, totalBytes) };
+};
+
+const collapseWhitespace = (text: string): string =>
+  text
+    .replace(/\n{3,}/g, "\n\n")
+    .replace(/[ \t]{2,}/g, " ")
+    .trim();
+
+const convertContent = (
+  raw: string,
+  rawContentType: string
+): { text: string } | { error: string } => {
+  const contentType = rawContentType.toLowerCase();
+  if (isHtml(contentType)) {
+    const markdown = NodeHtmlMarkdown.translate(raw);
+    return { text: collapseWhitespace(markdown) };
+  }
+
+  if (isJson(contentType)) {
+    try {
+      const parsed: unknown = JSON.parse(raw);
+      const pretty = JSON.stringify(parsed, null, 2);
+      return { text: `\`\`\`json\n${pretty}\n\`\`\`` };
+    } catch {
+      return { text: `\`\`\`\n${raw}\n\`\`\`` };
+    }
+  }
+
+  if (isPlainText(contentType)) {
+    return { text: collapseWhitespace(raw) };
+  }
+
+  return { error: `Unsupported content type: ${contentType}` };
+};
+
+const isRedirect = (status: number): boolean =>
+  status === 301 ||
+  status === 302 ||
+  status === 303 ||
+  status === 307 ||
+  status === 308;
+
+const fetchWithSafeRedirects = async (
+  initialUrl: string
+): Promise<Response> => {
+  let currentUrl = initialUrl;
+
+  for (let i = 0; i <= MAX_REDIRECTS; i++) {
+    const response = await fetch(currentUrl, {
+      headers: { "User-Agent": USER_AGENT },
+      signal: AbortSignal.timeout(FETCH_TIMEOUT_MS),
+      redirect: "manual",
+    });
+
+    if (!isRedirect(response.status)) {
+      return response;
+    }
+
+    const location = response.headers.get("location");
+    if (!location) {
+      return response;
+    }
+
+    const resolved = new URL(location, currentUrl).toString();
+    const check = validateSourceUrl(resolved);
+    if (!check.valid) {
+      throw new Error(`Redirect to unsafe URL: ${check.reason}`);
+    }
+
+    currentUrl = resolved;
+  }
+
+  throw new Error("Too many redirects");
+};
+
+const scrapeUrl = async (
+  url: string,
+  maxTextLength = DEFAULT_MAX_TEXT_LENGTH
+): Promise<ScrapeResult> => {
+  let response: Response;
+  try {
+    response = await fetchWithSafeRedirects(url);
+  } catch (error) {
+    if (error instanceof DOMException && error.name === "TimeoutError") {
+      return { ok: false, error: "Request timed out" };
+    }
+    return {
+      ok: false,
+      error: error instanceof Error ? error.message : "Fetch failed",
+    };
+  }
+
+  if (!response.ok) {
+    const mapped = ERROR_MAP[response.status];
+    return {
+      ok: false,
+      error: mapped ?? `HTTP ${String(response.status)}`,
+      statusCode: response.status,
+    };
+  }
+
+  if (!response.body) {
+    return { ok: false, error: "Empty response body" };
+  }
+
+  const bodyResult = await readBodyWithLimit(
+    response.body as ReadableStream<Uint8Array>
+  );
+  if ("error" in bodyResult) {
+    return { ok: false, error: bodyResult.error };
+  }
+
+  const raw = new TextDecoder().decode(bodyResult.bytes);
+  const contentType = response.headers.get("content-type") ?? "";
+
+  const converted = convertContent(raw, contentType);
+  if ("error" in converted) {
+    return { ok: false, error: converted.error };
+  }
+
+  const truncated = converted.text.length > maxTextLength;
+  const text = truncated
+    ? converted.text.slice(0, maxTextLength)
+    : converted.text;
+
+  return { ok: true, text, truncated };
+};
+
+export {
+  collapseWhitespace,
+  convertContent,
+  DEFAULT_MAX_TEXT_LENGTH,
+  FETCH_TIMEOUT_MS,
+  MAX_BODY_BYTES,
+  MAX_REDIRECTS,
+  scrapeUrl,
+  USER_AGENT,
+};
+export type { ScrapeResult };
diff --git a/apps/operator/src/utils/url-validator.test.ts b/apps/operator/src/utils/url-validator.test.ts
index 0d8a9f8..a8eb001 100644
--- a/apps/operator/src/utils/url-validator.test.ts
+++ b/apps/operator/src/utils/url-validator.test.ts
@@ -1,62 +1,50 @@
 import { describe, expect, it } from "vitest";
 
-import { parseAllowedDomains, validateSourceUrl } from "./url-validator";
-
-const DOMAINS = ["telsu.fi", "blackrock.com"];
+import { validateSourceUrl } from "./url-validator";
 
 describe("validateSourceUrl", () => {
-  it("accepts valid HTTPS URL on allowed domain", () => {
-    expect(validateSourceUrl("https://www.telsu.fi/", DOMAINS)).toEqual({
+  it("accepts valid HTTPS URL", () => {
+    expect(validateSourceUrl("https://www.example.com/")).toEqual({
       valid: true,
     });
   });
 
-  it("accepts subdomain of allowed domain", () => {
+  it("accepts HTTPS URL with path", () => {
     expect(
-      validateSourceUrl(
-        "https://www.blackrock.com/us/individual/insights",
-        DOMAINS
-      )
+      validateSourceUrl("https://www.example.org/us/individual/insights")
     ).toEqual({ valid: true });
   });
 
-  it("accepts exact domain match", () => {
-    expect(validateSourceUrl("https://telsu.fi/page", DOMAINS)).toEqual({
-      valid: true,
-    });
-  });
-
   it("rejects HTTP URLs", () => {
-    const result = validateSourceUrl("http://www.telsu.fi/", DOMAINS);
+    const result = validateSourceUrl("http://www.example.com/");
     expect(result).toEqual({
       valid: false,
       reason: "Only HTTPS URLs are allowed",
     });
   });
 
-  it("rejects non-allowlisted domains", () => {
-    const result = validateSourceUrl("https://evil.com/", DOMAINS);
-    expect(result.valid).toBe(false);
-    expect(result).toHaveProperty("reason");
-  });
-
   it("rejects localhost", () => {
-    const result = validateSourceUrl("https://localhost/", DOMAINS);
+    const result = validateSourceUrl("https://localhost/");
     expect(result.valid).toBe(false);
   });
 
   it("rejects 127.0.0.1", () => {
-    const result = validateSourceUrl("https://127.0.0.1/", DOMAINS);
+    const result = validateSourceUrl("https://127.0.0.1/");
     expect(result.valid).toBe(false);
   });
 
   it("rejects [::1]", () => {
-    const result = validateSourceUrl("https://[::1]/", DOMAINS);
+    const result = validateSourceUrl("https://[::1]/");
+    expect(result.valid).toBe(false);
+  });
+
+  it("rejects 0.0.0.0", () => {
+    const result = validateSourceUrl("https://0.0.0.0/");
     expect(result.valid).toBe(false);
   });
 
   it("rejects URLs with credentials", () => {
-    const result = validateSourceUrl("https://user:pass@telsu.fi/", DOMAINS);
+    const result = validateSourceUrl("https://user:pass@example.com/");
     expect(result).toEqual({
       valid: false,
       reason: "URLs with credentials are not allowed",
@@ -64,10 +52,7 @@ describe("validateSourceUrl", () => {
   });
 
   it("rejects URLs over 2048 chars", () => {
-    const result = validateSourceUrl(
-      "https://telsu.fi/" + "a".repeat(2048),
-      DOMAINS
-    );
+    const result = validateSourceUrl("https://example.com/" + "a".repeat(2048));
     expect(result).toEqual({
       valid: false,
       reason: "URL exceeds 2048 character limit",
@@ -75,12 +60,12 @@ describe("validateSourceUrl", () => {
   });
 
   it("rejects invalid URLs", () => {
-    const result = validateSourceUrl("not-a-url", DOMAINS);
+    const result = validateSourceUrl("not-a-url");
     expect(result).toEqual({ valid: false, reason: "Invalid URL" });
   });
 
   it("rejects FTP URLs", () => {
-    const result = validateSourceUrl("ftp://telsu.fi/", DOMAINS);
+    const result = validateSourceUrl("ftp://example.com/");
     expect(result).toEqual({
       valid: false,
       reason: "Only HTTPS URLs are allowed",
@@ -88,41 +73,17 @@ describe("validateSourceUrl", () => {
   });
 
   it("rejects file URLs", () => {
-    const result = validateSourceUrl("file:///etc/passwd", DOMAINS);
+    const result = validateSourceUrl("file:///etc/passwd");
     expect(result.valid).toBe(false);
   });
 
-  it("rejects when allowed domains list is empty", () => {
-    const result = validateSourceUrl("https://telsu.fi/", []);
+  it("rejects 127.0.0.2 (full loopback range)", () => {
+    const result = validateSourceUrl("https://127.0.0.2/");
     expect(result.valid).toBe(false);
   });
 
-  it("prevents domain suffix attacks (eviltelsu.fi)", () => {
-    const result = validateSourceUrl("https://eviltelsu.fi/", DOMAINS);
+  it("rejects 127.255.255.255 (loopback range upper bound)", () => {
+    const result = validateSourceUrl("https://127.255.255.255/");
     expect(result.valid).toBe(false);
   });
 });
-
-describe("parseAllowedDomains", () => {
-  it("parses comma-separated domains", () => {
-    expect(parseAllowedDomains("telsu.fi,blackrock.com")).toEqual([
-      "telsu.fi",
-      "blackrock.com",
-    ]);
-  });
-
-  it("trims whitespace", () => {
-    expect(parseAllowedDomains(" telsu.fi , blackrock.com ")).toEqual([
-      "telsu.fi",
-      "blackrock.com",
-    ]);
-  });
-
-  it("returns empty array for undefined", () => {
-    expect(parseAllowedDomains(undefined)).toEqual([]);
-  });
-
-  it("returns empty array for empty string", () => {
-    expect(parseAllowedDomains("")).toEqual([]);
-  });
-});
diff --git a/apps/operator/src/utils/url-validator.ts b/apps/operator/src/utils/url-validator.ts
index 6f19fde..f32f731 100644
--- a/apps/operator/src/utils/url-validator.ts
+++ b/apps/operator/src/utils/url-validator.ts
@@ -1,15 +1,15 @@
 /**
- * Validates a source URL against the SSRF safety policy:
+ * Validates a source URL against the safety policy:
  * - HTTPS only
- * - Hostname must be in the allowed domains list
+ * - No localhost / loopback addresses
+ * - No credentials in URL
  * - Max 2048 characters
  *
  * DNS-level private IP checks are deferred to the fetch layer
  * (Cloudflare Workers already block fetches to private IPs).
  */
 const validateSourceUrl = (
-  url: string,
-  allowedDomains: string[]
+  url: string
 ): { valid: true } | { valid: false; reason: string } => {
   if (url.length > 2048) {
     return { valid: false, reason: "URL exceeds 2048 character limit" };
@@ -34,36 +34,15 @@ const validateSourceUrl = (
 
   if (
     hostname === "localhost" ||
-    hostname === "127.0.0.1" ||
+    hostname.startsWith("127.") ||
+    hostname === "::1" ||
     hostname === "[::1]" ||
     hostname === "0.0.0.0"
   ) {
     return { valid: false, reason: "Localhost URLs are not allowed" };
   }
 
-  const isAllowed = allowedDomains.some((domain) => {
-    const d = domain.toLowerCase();
-    return hostname === d || hostname.endsWith(`.${d}`);
-  });
-
-  if (!isAllowed) {
-    return {
-      valid: false,
-      reason: `Domain "${hostname}" is not in the allowed list`,
-    };
-  }
-
   return { valid: true };
 };
 
-const parseAllowedDomains = (envValue: string | undefined): string[] => {
-  if (!envValue) {
-    return [];
-  }
-  return envValue
-    .split(",")
-    .map((d) => d.trim())
-    .filter((d) => d.length > 0);
-};
-
-export { parseAllowedDomains, validateSourceUrl };
+export { validateSourceUrl };
diff --git a/apps/operator/wrangler.jsonc b/apps/operator/wrangler.jsonc
index 87760ea..931f8fe 100644
--- a/apps/operator/wrangler.jsonc
+++ b/apps/operator/wrangler.jsonc
@@ -6,7 +6,7 @@
   "compatibility_flags": ["nodejs_compat"],
   "observability": {
     "logs": {
-      "enabled": false,
+      "enabled": true,
       "invocation_logs": true,
     },
   },
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 59580f1..c8ecd16 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -38,6 +38,9 @@ importers:
       hono:
         specifier: 4.12.9
         version: 4.12.9
+      node-html-markdown:
+        specifier: 2.0.0
+        version: 2.0.0
       openai:
         specifier: 6.33.0
         version: 6.33.0(ws@8.18.0)(zod@4.3.6)
@@ -1340,6 +1343,9 @@ packages:
   blake3-wasm@2.1.5:
     resolution: {integrity: sha512-F1+K8EbfOZE49dtoPtmxUQrpXaBIl3ICvasLh+nJta0xkz+9kF/7uet9fLnwKqhDrmj6g+6K3Tw9yQPUg2ka5g==}
 
+  boolbase@1.0.0:
+    resolution: {integrity: sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==}
+
   brace-expansion@1.1.13:
     resolution: {integrity: sha512-9ZLprWS6EENmhEOpjCYW2c8VkmOvckIJZfkr7rBW6dObmfgJ/L1GpSYW5Hpo9lDz4D1+n0Ckz8rU7FwHDQiG/w==}
 
@@ -1402,6 +1408,13 @@ packages:
     resolution: {integrity: sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==}
     engines: {node: '>= 8'}
 
+  css-select@5.2.2:
+    resolution: {integrity: sha512-TizTzUddG/xYLA3NXodFM0fSbNizXjOKhqiQQwvhlspadZokn1KDy0NZFS0wuEubIYAV5/c1/lAr0TaaFXEXzw==}
+
+  css-what@6.2.2:
+    resolution: {integrity: sha512-u/O3vwbptzhMs3L1fQE82ZSLHQQfto5gyZzwteVIEyeaY5Fc7R4dapF/BvRoSYFeqfBk4m0V1Vafq5Pjv25wvA==}
+    engines: {node: '>= 6'}
+
   data-view-buffer@1.0.2:
     resolution: {integrity: sha512-EmKO5V3OLXh1rtK2wgXRansaK1/mtVdTUEiEI0W8RkvgT05kfxaH29PliLnpLP73yYO6142Q72QNa8Wx/A5CqQ==}
     engines: {node: '>= 0.4'}
@@ -1450,6 +1463,19 @@ packages:
     resolution: {integrity: sha512-35mSku4ZXK0vfCuHEDAwt55dg2jNajHZ1odvF+8SSr82EsZY4QmXfuWso8oEd8zRhVObSN18aM0CjSdoBX7zIw==}
     engines: {node: '>=0.10.0'}
 
+  dom-serializer@2.0.0:
+    resolution: {integrity: sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==}
+
+  domelementtype@2.3.0:
+    resolution: {integrity: sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==}
+
+  domhandler@5.0.3:
+    resolution: {integrity: sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==}
+    engines: {node: '>= 4'}
+
+  domutils@3.2.2:
+    resolution: {integrity: sha512-6kZKyUajlDuqlHKVX1w7gyslj9MPIXzIFiz/rGu35uC1wMi+kMhQwGhl4lt9unC9Vb9INnY9Z3/ZA3+FhASLaw==}
+
   drizzle-kit@0.31.10:
     resolution: {integrity: sha512-7OZcmQUrdGI+DUNNsKBn1aW8qSoKuTH7d0mYgSP8bAzdFzKoovxEFnoGQp2dVs82EOJeYycqRtciopszwUf8bw==}
     hasBin: true
@@ -1553,6 +1579,10 @@ packages:
   electron-to-chromium@1.5.328:
     resolution: {integrity: sha512-QNQ5l45DzYytThO21403XN3FvK0hOkWDG8viNf6jqS42msJ8I4tGDSpBCgvDRRPnkffafiwAym2X2eHeGD2V0w==}
 
+  entities@4.5.0:
+    resolution: {integrity: sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==}
+    engines: {node: '>=0.12'}
+
   error-stack-parser-es@1.0.5:
     resolution: {integrity: sha512-5qucVt2XcuGMcEGgWI7i+yZpmpByQ8J1lHhcL7PwqCwu9FPP3VUXzT4ltHe5i2z9dePwEHcDVOAfSnHsOlCXRA==}
 
@@ -1831,6 +1861,10 @@ packages:
     resolution: {integrity: sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==}
     engines: {node: '>= 0.4'}
 
+  he@1.2.0:
+    resolution: {integrity: sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw==}
+    hasBin: true
+
   hermes-estree@0.25.1:
     resolution: {integrity: sha512-0wUoCcLp+5Ev5pDW2OriHC2MJCbwLwuRx+gAqMTOkGKJJiBCLjtrvy4PWUGn6MIVefecRpzoOZ/UV6iGdOr+Cw==}
 
@@ -2137,9 +2171,19 @@ packages:
     resolution: {integrity: sha512-pyFS63ptit/P5WqUkt+UUfe+4oevH+bFeIiPPdfb0pFeYEu/1ELnJu5l+5EcTKYL5M7zaAa7S8ddywgXypqKCw==}
     engines: {node: '>= 0.4'}
 
+  node-html-markdown@2.0.0:
+    resolution: {integrity: sha512-DqUC3GGP7pwSYxS93SwHoP+qCw78xcMP6C6H2DuC8rPD2AweJRjBzQb5SdXpKtDlqAQ7hVotJcfhgU7hU5Gthw==}
+    engines: {node: '>=20.0.0'}
+
+  node-html-parser@6.1.13:
+    resolution: {integrity: sha512-qIsTMOY4C/dAa5Q5vsobRpOOvPfC4pB61UVW2uSwZNUp0QU/jCekTal1vMmbO0DgdHeLUJpv/ARmDqErVxA3Sg==}
+
   node-releases@2.0.36:
     resolution: {integrity: sha512-TdC8FSgHz8Mwtw9g5L4gR/Sh9XhSP/0DEkQxfEFXOpiul5IiHgHan2VhYYb6agDSfp4KuvltmGApc8HMgUrIkA==}
 
+  nth-check@2.1.1:
+    resolution: {integrity: sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==}
+
   object-assign@4.1.1:
     resolution: {integrity: sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==}
     engines: {node: '>=0.10.0'}
@@ -3630,6 +3674,8 @@ snapshots:
 
   blake3-wasm@2.1.5: {}
 
+  boolbase@1.0.0: {}
+
   brace-expansion@1.1.13:
     dependencies:
       balanced-match: 1.0.2
@@ -3695,6 +3741,16 @@ snapshots:
       shebang-command: 2.0.0
       which: 2.0.2
 
+  css-select@5.2.2:
+    dependencies:
+      boolbase: 1.0.0
+      css-what: 6.2.2
+      domhandler: 5.0.3
+      domutils: 3.2.2
+      nth-check: 2.1.1
+
+  css-what@6.2.2: {}
+
   data-view-buffer@1.0.2:
     dependencies:
       call-bound: 1.0.4
@@ -3741,6 +3797,24 @@ snapshots:
     dependencies:
       esutils: 2.0.3
 
+  dom-serializer@2.0.0:
+    dependencies:
+      domelementtype: 2.3.0
+      domhandler: 5.0.3
+      entities: 4.5.0
+
+  domelementtype@2.3.0: {}
+
+  domhandler@5.0.3:
+    dependencies:
+      domelementtype: 2.3.0
+
+  domutils@3.2.2:
+    dependencies:
+      dom-serializer: 2.0.0
+      domelementtype: 2.3.0
+      domhandler: 5.0.3
+
   drizzle-kit@0.31.10:
     dependencies:
       '@drizzle-team/brocli': 0.10.2
@@ -3760,6 +3834,8 @@ snapshots:
 
   electron-to-chromium@1.5.328: {}
 
+  entities@4.5.0: {}
+
   error-stack-parser-es@1.0.5: {}
 
   es-abstract@1.24.1:
@@ -4222,6 +4298,8 @@ snapshots:
     dependencies:
       function-bind: 1.1.2
 
+  he@1.2.0: {}
+
   hermes-estree@0.25.1: {}
 
   hermes-parser@0.25.1:
@@ -4510,8 +4588,21 @@ snapshots:
       object.entries: 1.1.9
       semver: 6.3.1
 
+  node-html-markdown@2.0.0:
+    dependencies:
+      node-html-parser: 6.1.13
+
+  node-html-parser@6.1.13:
+    dependencies:
+      css-select: 5.2.2
+      he: 1.2.0
+
   node-releases@2.0.36: {}
 
+  nth-check@2.1.1:
+    dependencies:
+      boolbase: 1.0.0
+
   object-assign@4.1.1: {}
 
   object-inspect@1.13.4: {}