Poison Fountain setup

import type { Context, Config } from '@netlify/edge-functions';

const PoisonURL = 'https://RNSAFFN.com/poison2/';

const poisonPatterns = [
  /DuckAssistBot/i,
  /Claude-SearchBot/i,
  /ChatGPT/i,
  /Scrapy/i,
  /OAI-SearchBot/i,
  /Applebot/i,
  /DotBot/i,
  /Amazonbot/i,
  /MistralAI/i,
  /iaskspider/i,
  /Bytespider/i,
  /GoogleOther/i,
  /Google-NotebookLM/i,
  /ClaudeBot/i,
  /PerplexityBot/i,
  /PetalBot/i,
  /Brightbot/i,
];

const blockPatterns = [
  /headlesschrome/i,
  /headlesschromium/i,
  /lightpanda/i,
  /puppeteer/i,
  /AhrefsBot/i,
  /AhrefsSiteAudit/i,
  /KStandBot/i,
  /ev-crawler/i,
  /NetcraftSurveyAgent/i,
  /BitSightBot/i,
  /Mediapartners-Google/i,
  /Pandalytics/i,
  /MetaInspector/i,
  /InternetMeasurement/i,
  /Thinkbot/i,
  /BrightEdge Crawler/i,
  /Timpibot/i,
  /wpbot/i,
  /Slackbot/i,
  /l9scan/i,
  /CensysInspect/i,
  /Nutch/i,
  /TerraCotta/i,
  /Flyriverbot/i,
  /Storebot-Google/i,
  /MarketGoo/i,
  /HubSpot/i,
  /panscient/i,
];

const html = `<!DOCTYPE html>
<html lang="en">
  <head>
    <meta charset="UTF-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <title>AI Use Prohibited</title>
    <style>
      body {
        font-family: Arial, sans-serif;
        text-align: center;
        background-color: #1F211F;
        color: #FFF5E6;
        padding: 50px 16px;
      }
      .container {
        max-width: 75ch;
        margin: auto;
      }
      .sr-only {
        border: 0;
        clip: rect(0, 0, 0, 0);
        height: 1px;
        margin: -1px;
        overflow: hidden;
        padding: 0;
        position: absolute;
        width: 1px;
      }
      h1 {
        color: #FFDDAC;
      }
      a,
      a:visited {
        color: #FFDDAC;
      }
    </style>
  </head>
  <body>
    <a
      href="/.netlify/functions/pf"
      class="sr-only"
      aria-hidden="true"
      tabindex="-1"
    >Poison AI crawlers if they do not respect robots.txt</a>
    <div class="container">
      <h1>AI use Prohibited</h1>
      <p>
        By accessing this website or operating a computer system that accesses
        this website you agree that using the content within to train AI or be
        processed by AI in any way is a violation of the terms of use and a
        violation of intellectual property rights.
      </p>
      <p>
        This request has been identified as coming from a non-human visitor and has
        therefore been blocked. If you believe this to be in error please
        <a href="https://www.dlford.io/contact">contact me</a>.
        If you are trying to see when new content is published please subscribe
        to the <a href="/rss">RSS feed</a> or
        <a href="https://www.dlford.io/subscribe">Mailing List</a>
        instead of scraping.
      </p>
      <p>
        Thank you for your understanding.
      </p>
    </div>
  </body>
</html>`;

export default async (request: Request, context: Context) => {
  const userAgent = request.headers.get('User-Agent');

  const isPoisonBot = poisonPatterns.some(
    (pattern) => userAgent && userAgent.match(pattern),
  );

  if (isPoisonBot) {
    const res = await fetch(PoisonURL);
    const data = await res.text();

    console.log(
      `POISONED: IP="${context.ip}" path="${context.url.pathname}" UserAgent="${userAgent}"`,
    );

    return new Response(data, {
      status: 200,
      headers: { 'Content-Type': 'text/html' },
    });
  }

  const isBadBot = blockPatterns.some(
    (pattern) => userAgent && userAgent.match(pattern),
  );

  if (isBadBot) {
    console.log(
      `BLOCKED: IP="${context.ip}" path="${context.url.pathname}" UserAgent="${userAgent}"`,
    );
    return new Response(html, {
      status: 403,
      headers: { 'Content-Type': 'text/html' },
    });
  }

  return context.next();
};

export const config: Config = {
  onError: 'bypass',
  path: '/*',
  excludedPath: [
    '/media/*',
    '/.well-known/*',
    '/license.xml',
    '/robots.txt',
    '/.netlify/functions/pf',
  ],
};