feat(telegram): support outbound media groups via sendMediaGroup

Send 2-10 photos/videos as Telegram albums using sendMediaGroup when all items are groupable (photo/video only). Falls back to existing per-item delivery for single items, GIFs, audio, documents, or mixed non-groupable media. Caption is placed on the first album item (subject to 1024-char limit) with overflow sent as a follow-up text message. Buttons are sent as a follow-up since sendMediaGroup does not support reply_markup. Threading, reply-to, silent, and pin behaviors are preserved. Closes #13620 Supersedes #21309 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-21 14:08:24 +08:00 · 2026-03-21 14:08:24 +08:00 · 204b19991f
commit 204b19991f
parent 8a05c05596
3 changed files with 576 additions and 30 deletions
--- a/extensions/telegram/src/bot/delivery.replies.ts
+++ b/extensions/telegram/src/bot/delivery.replies.ts
@ -1,4 +1,5 @@
-import { type Bot, GrammyError, InputFile } from "grammy";
+import { type Bot, GrammyError, InputFile, InputMediaBuilder } from "grammy";
+import type { InputMediaPhoto, InputMediaVideo } from "grammy/types";
 import type { ReplyToMode } from "openclaw/plugin-sdk/config-runtime";
 import type { MarkdownTableMode } from "openclaw/plugin-sdk/config-runtime";
 import { fireAndForgetHook } from "openclaw/plugin-sdk/hook-runtime";
@ -30,6 +31,7 @@ import { buildInlineKeyboard } from "../send.js";
 import { resolveTelegramVoiceSend } from "../voice.js";
 import {
  buildTelegramSendParams,
+  sendTelegramMediaGroup,
  sendTelegramText,
  sendTelegramWithThreadFallback,
 } from "./delivery.send.js";
@ -90,6 +92,144 @@ function markDelivered(progress: DeliveryProgress): void {
  progress.deliveredCount += 1;
 }

+const MEDIA_GROUP_MIN = 2;
+const MEDIA_GROUP_MAX = 10;
+
+type GroupableMediaKind = "image" | "video";
+
+/** Returns true when all media items are photos/videos and count is 2-10. */
+function isGroupableMediaList(mediaList: string[], reply: ReplyPayload): boolean {
+  if (reply.audioAsVoice) return false;
+  return mediaList.length >= MEDIA_GROUP_MIN && mediaList.length <= MEDIA_GROUP_MAX;
+}
+
+/**
+ * Sends 2-10 photos/videos as a single Telegram album via sendMediaGroup.
+ * Caption is placed on the first item (subject to 1024-char limit).
+ * Buttons and overflow text are sent as follow-up messages because
+ * sendMediaGroup does not support reply_markup.
+ *
+ * Returns the first delivered message_id, or undefined on empty delivery.
+ * Returns `null` when the loaded media turns out to be non-groupable
+ * (e.g. GIF, audio, document) so the caller can fall back to per-item delivery.
+ */
+async function deliverMediaGroupReply(params: {
+  reply: ReplyPayload;
+  mediaList: string[];
+  bot: Bot;
+  chatId: string;
+  runtime: RuntimeEnv;
+  thread?: TelegramThreadSpec | null;
+  tableMode?: MarkdownTableMode;
+  mediaLocalRoots?: readonly string[];
+  chunkText: ChunkTextFn;
+  mediaLoader: typeof loadWebMedia;
+  linkPreview?: boolean;
+  silent?: boolean;
+  replyQuoteText?: string;
+  replyMarkup?: ReturnType<typeof buildInlineKeyboard>;
+  replyToId?: number;
+  replyToMode: ReplyToMode;
+  progress: DeliveryProgress;
+}): Promise<number | undefined | null> {
+  // Load all media and build InputMedia entries, bailing out if any item
+  // is not a groupable type (photo/video).
+  const inputMedia: Array<InputMediaPhoto | InputMediaVideo> = [];
+  for (let i = 0; i < params.mediaList.length; i++) {
+    const mediaUrl = params.mediaList[i]!;
+    const media = await params.mediaLoader(
+      mediaUrl,
+      buildOutboundMediaLoadOptions({ mediaLocalRoots: params.mediaLocalRoots }),
+    );
+    const kind = kindFromMime(media.contentType ?? undefined) as
+      | GroupableMediaKind
+      | string
+      | undefined;
+    const isGif = isGifMedia({
+      contentType: media.contentType,
+      fileName: media.fileName,
+    });
+    // GIFs, audio, documents, and unknown types are not groupable.
+    if (isGif || (kind !== "image" && kind !== "video")) {
+      return null;
+    }
+    const fileName = media.fileName ?? "file";
+    const file = new InputFile(media.buffer, fileName);
+
+    // Caption is only attached to the first item in the group.
+    let captionOpts: { caption?: string; parse_mode?: "HTML" } = {};
+    if (i === 0) {
+      const { caption } = splitTelegramCaption(params.reply.text ?? undefined);
+      if (caption) {
+        const htmlCaption = renderTelegramHtmlText(caption, { tableMode: params.tableMode });
+        captionOpts = { caption: htmlCaption, parse_mode: "HTML" };
+      }
+    }
+
+    if (kind === "video") {
+      inputMedia.push(InputMediaBuilder.video(file, captionOpts));
+    } else {
+      inputMedia.push(InputMediaBuilder.photo(file, captionOpts));
+    }
+  }
+
+  const replyToMessageId = resolveReplyToForSend({
+    replyToId: params.replyToId,
+    replyToMode: params.replyToMode,
+    progress: params.progress,
+  });
+
+  const firstMessageId = await sendTelegramMediaGroup({
+    bot: params.bot,
+    chatId: params.chatId,
+    media: inputMedia,
+    runtime: params.runtime,
+    thread: params.thread,
+    replyToMessageId,
+    silent: params.silent,
+  });
+
+  // Mark all items as delivered.
+  for (let i = 0; i < inputMedia.length; i++) {
+    markDelivered(params.progress);
+  }
+  markReplyApplied(params.progress, replyToMessageId);
+
+  // Handle caption overflow and/or buttons as follow-up messages.
+  // sendMediaGroup does not support reply_markup, so buttons must go in a
+  // separate follow-up message.
+  const { followUpText } = splitTelegramCaption(params.reply.text ?? undefined);
+  if (followUpText) {
+    await sendPendingFollowUpText({
+      bot: params.bot,
+      chatId: params.chatId,
+      runtime: params.runtime,
+      thread: params.thread,
+      chunkText: params.chunkText,
+      text: followUpText,
+      replyMarkup: params.replyMarkup,
+      linkPreview: params.linkPreview,
+      silent: params.silent,
+      replyToId: params.replyToId,
+      replyToMode: params.replyToMode,
+      progress: params.progress,
+    });
+  } else if (params.replyMarkup) {
+    // No caption overflow, but buttons need a carrier message since
+    // sendMediaGroup does not support reply_markup.
+    await sendTelegramText(params.bot, params.chatId, "\u200B", params.runtime, {
+      thread: params.thread,
+      textMode: "html",
+      plainText: "\u200B",
+      silent: params.silent,
+      replyMarkup: params.replyMarkup,
+    });
+    markDelivered(params.progress);
+  }
+
+  return firstMessageId;
+}
+
 async function deliverTextReply(params: {
  bot: Bot;
  chatId: string;
@ -671,26 +811,56 @@ export async function deliverReplies(params: {
          progress,
        });
      } else {
-        firstDeliveredMessageId = await deliverMediaReply({
-          reply,
-          mediaList,
-          bot: params.bot,
-          chatId: params.chatId,
-          runtime: params.runtime,
-          thread: params.thread,
-          tableMode: params.tableMode,
-          mediaLocalRoots: params.mediaLocalRoots,
-          chunkText,
-          mediaLoader,
-          onVoiceRecording: params.onVoiceRecording,
-          linkPreview: params.linkPreview,
-          silent: params.silent,
-          replyQuoteText: params.replyQuoteText,
-          replyMarkup,
-          replyToId,
-          replyToMode: params.replyToMode,
-          progress,
-        });
+        // Try sending as a media group (album) when criteria are met.
+        let usedMediaGroup = false;
+        if (isGroupableMediaList(mediaList, reply)) {
+          const groupResult = await deliverMediaGroupReply({
+            reply,
+            mediaList,
+            bot: params.bot,
+            chatId: params.chatId,
+            runtime: params.runtime,
+            thread: params.thread,
+            tableMode: params.tableMode,
+            mediaLocalRoots: params.mediaLocalRoots,
+            chunkText,
+            mediaLoader,
+            linkPreview: params.linkPreview,
+            silent: params.silent,
+            replyQuoteText: params.replyQuoteText,
+            replyMarkup,
+            replyToId,
+            replyToMode: params.replyToMode,
+            progress,
+          });
+          // null means the loaded media was not groupable; fall back to per-item.
+          if (groupResult !== null) {
+            firstDeliveredMessageId = groupResult;
+            usedMediaGroup = true;
+          }
+        }
+        if (!usedMediaGroup) {
+          firstDeliveredMessageId = await deliverMediaReply({
+            reply,
+            mediaList,
+            bot: params.bot,
+            chatId: params.chatId,
+            runtime: params.runtime,
+            thread: params.thread,
+            tableMode: params.tableMode,
+            mediaLocalRoots: params.mediaLocalRoots,
+            chunkText,
+            mediaLoader,
+            onVoiceRecording: params.onVoiceRecording,
+            linkPreview: params.linkPreview,
+            silent: params.silent,
+            replyQuoteText: params.replyQuoteText,
+            replyMarkup,
+            replyToId,
+            replyToMode: params.replyToMode,
+            progress,
+          });
+        }
      }
      await maybePinFirstDeliveredMessage({
        shouldPin: shouldPinFirstMessage,
--- a/extensions/telegram/src/bot/delivery.send.ts
+++ b/extensions/telegram/src/bot/delivery.send.ts
@ -1,4 +1,5 @@
 import { type Bot, GrammyError } from "grammy";
+import type { InputMediaPhoto, InputMediaVideo } from "grammy/types";
 import { formatErrorMessage } from "openclaw/plugin-sdk/infra-runtime";
 import type { RuntimeEnv } from "openclaw/plugin-sdk/runtime-env";
 import { withTelegramApiErrorLogging } from "../api-logging.js";
@ -92,6 +93,32 @@ export function buildTelegramSendParams(opts?: {
  return params;
 }

+export async function sendTelegramMediaGroup(params: {
+  bot: Bot;
+  chatId: string;
+  media: ReadonlyArray<InputMediaPhoto | InputMediaVideo>;
+  runtime: RuntimeEnv;
+  thread?: TelegramThreadSpec | null;
+  replyToMessageId?: number;
+  silent?: boolean;
+}): Promise<number> {
+  const requestParams = buildTelegramSendParams({
+    replyToMessageId: params.replyToMessageId,
+    thread: params.thread,
+    silent: params.silent,
+  });
+  const result = await sendTelegramWithThreadFallback({
+    operation: "sendMediaGroup",
+    runtime: params.runtime,
+    thread: params.thread,
+    requestParams,
+    send: (effectiveParams) =>
+      params.bot.api.sendMediaGroup(params.chatId, params.media, { ...effectiveParams }),
+  });
+  // sendMediaGroup returns an array; first message_id is used for pinning/hooks.
+  return result[0]?.message_id ?? 0;
+}
+
 export async function sendTelegramText(
  bot: Bot,
  chatId: string,
--- a/extensions/telegram/src/bot/delivery.test.ts
+++ b/extensions/telegram/src/bot/delivery.test.ts
@ -59,6 +59,18 @@ vi.mock("grammy", () => ({
  GrammyError: class GrammyError extends Error {
    description = "";
  },
+  InputMediaBuilder: {
+    photo: (media: unknown, opts?: Record<string, unknown>) => ({
+      type: "photo",
+      media,
+      ...opts,
+    }),
+    video: (media: unknown, opts?: Record<string, unknown>) => ({
+      type: "video",
+      media,
+      ...opts,
+    }),
+  },
 }));

 function createRuntime(withLog = true): RuntimeStub {
@ -830,19 +842,26 @@ describe("deliverReplies", () => {
    }
  });

-  it("replyToMode 'first' only applies reply-to to first media item", async () => {
+  it("replyToMode 'first' only applies reply-to to first media item (per-item path)", async () => {
+    // Use a GIF + photo to force per-item delivery (non-groupable mix).
    const runtime = createRuntime();
-    const sendPhoto = vi.fn().mockResolvedValue({
+    const sendAnimation = vi.fn().mockResolvedValue({
      message_id: 30,
      chat: { id: "123" },
    });
-    const bot = createBot({ sendPhoto });
+    const sendPhoto = vi.fn().mockResolvedValue({
+      message_id: 31,
+      chat: { id: "123" },
+    });
+    const bot = createBot({ sendAnimation, sendPhoto });

-    mockMediaLoad("a.jpg", "image/jpeg", "img1");
+    mockMediaLoad("a.gif", "image/gif", "gif1");
+    // Media group fallback re-loads (first item detected as GIF).
+    mockMediaLoad("a.gif", "image/gif", "gif1");
    mockMediaLoad("b.jpg", "image/jpeg", "img2");

    await deliverReplies({
-      replies: [{ mediaUrls: ["https://a.jpg", "https://b.jpg"], replyToId: "900" }],
+      replies: [{ mediaUrls: ["https://a.gif", "https://b.jpg"], replyToId: "900" }],
      chatId: "123",
      token: "tok",
      runtime,
@ -851,13 +870,14 @@ describe("deliverReplies", () => {
      textLimit: 4000,
    });

-    expect(sendPhoto).toHaveBeenCalledTimes(2);
-    // First media should have reply_to_message_id
-    expect(sendPhoto.mock.calls[0][2]).toEqual(
+    expect(sendAnimation).toHaveBeenCalledTimes(1);
+    expect(sendPhoto).toHaveBeenCalledTimes(1);
+    // First media (GIF) should have reply_to_message_id
+    expect(sendAnimation.mock.calls[0][2]).toEqual(
      expect.objectContaining({ reply_to_message_id: 900 }),
    );
    // Second media should NOT have reply_to_message_id
-    expect(sendPhoto.mock.calls[1][2]).not.toHaveProperty("reply_to_message_id");
+    expect(sendPhoto.mock.calls[0][2]).not.toHaveProperty("reply_to_message_id");
  });

  it("pins the first delivered text message when telegram pin is requested", async () => {
@ -917,4 +937,333 @@ describe("deliverReplies", () => {
    expect(sendVoice).toHaveBeenCalledTimes(1);
    expect(sendMessage).not.toHaveBeenCalled();
  });
+
+  describe("media groups (albums)", () => {
+    function createMediaGroupHarness(messageId = 50) {
+      const runtime = createRuntime();
+      const sendMediaGroup = vi.fn().mockResolvedValue([
+        { message_id: messageId, chat: { id: "123" } },
+        { message_id: messageId + 1, chat: { id: "123" } },
+      ]);
+      const sendMessage = vi.fn().mockResolvedValue({
+        message_id: messageId + 10,
+        chat: { id: "123" },
+      });
+      const sendPhoto = vi.fn().mockResolvedValue({
+        message_id: messageId + 20,
+        chat: { id: "123" },
+      });
+      const bot = createBot({ sendMediaGroup, sendMessage, sendPhoto });
+      return { runtime, sendMediaGroup, sendMessage, sendPhoto, bot };
+    }
+
+    it("sends 2 photos as a media group via sendMediaGroup", async () => {
+      const { runtime, sendMediaGroup, sendPhoto, bot } = createMediaGroupHarness();
+
+      mockMediaLoad("a.jpg", "image/jpeg", "img1");
+      mockMediaLoad("b.jpg", "image/jpeg", "img2");
+
+      await deliverWith({
+        replies: [{ mediaUrls: ["https://a.jpg", "https://b.jpg"] }],
+        runtime,
+        bot,
+      });
+
+      expect(sendMediaGroup).toHaveBeenCalledTimes(1);
+      expect(sendPhoto).not.toHaveBeenCalled();
+      const media = sendMediaGroup.mock.calls[0][1];
+      expect(media).toHaveLength(2);
+      expect(media[0].type).toBe("photo");
+      expect(media[1].type).toBe("photo");
+    });
+
+    it("sends mixed photos and videos as a media group", async () => {
+      const { runtime, sendMediaGroup, bot } = createMediaGroupHarness();
+
+      mockMediaLoad("a.jpg", "image/jpeg", "img1");
+      mockMediaLoad("b.mp4", "video/mp4", "vid1");
+
+      await deliverWith({
+        replies: [{ mediaUrls: ["https://a.jpg", "https://b.mp4"] }],
+        runtime,
+        bot,
+      });
+
+      expect(sendMediaGroup).toHaveBeenCalledTimes(1);
+      const media = sendMediaGroup.mock.calls[0][1];
+      expect(media[0].type).toBe("photo");
+      expect(media[1].type).toBe("video");
+    });
+
+    it("places caption only on the first item in the group", async () => {
+      const { runtime, sendMediaGroup, bot } = createMediaGroupHarness();
+
+      mockMediaLoad("a.jpg", "image/jpeg", "img1");
+      mockMediaLoad("b.jpg", "image/jpeg", "img2");
+
+      await deliverWith({
+        replies: [{ mediaUrls: ["https://a.jpg", "https://b.jpg"], text: "album caption" }],
+        runtime,
+        bot,
+      });
+
+      expect(sendMediaGroup).toHaveBeenCalledTimes(1);
+      const media = sendMediaGroup.mock.calls[0][1];
+      expect(media[0].caption).toBe("album caption");
+      expect(media[0].parse_mode).toBe("HTML");
+      expect(media[1].caption).toBeUndefined();
+    });
+
+    it("sends caption overflow as a follow-up text message", async () => {
+      const { runtime, sendMediaGroup, sendMessage, bot } = createMediaGroupHarness();
+      const longCaption = "x".repeat(1025);
+
+      mockMediaLoad("a.jpg", "image/jpeg", "img1");
+      mockMediaLoad("b.jpg", "image/jpeg", "img2");
+
+      await deliverWith({
+        replies: [{ mediaUrls: ["https://a.jpg", "https://b.jpg"], text: longCaption }],
+        runtime,
+        bot,
+      });
+
+      expect(sendMediaGroup).toHaveBeenCalledTimes(1);
+      // Caption should not be on any media item when it overflows.
+      const media = sendMediaGroup.mock.calls[0][1];
+      expect(media[0].caption).toBeUndefined();
+      // Follow-up text message should be sent.
+      expect(sendMessage).toHaveBeenCalledTimes(1);
+      expect(sendMessage.mock.calls[0][1]).toContain("x".repeat(100));
+    });
+
+    it("falls back to per-item when a GIF is in the list", async () => {
+      const runtime = createRuntime();
+      const sendMediaGroup = vi.fn();
+      const sendAnimation = vi.fn().mockResolvedValue({
+        message_id: 60,
+        chat: { id: "123" },
+      });
+      const sendPhoto = vi.fn().mockResolvedValue({
+        message_id: 61,
+        chat: { id: "123" },
+      });
+      const bot = createBot({ sendMediaGroup, sendAnimation, sendPhoto });
+
+      // First media is a GIF — detected during group loading, triggers fallback.
+      mockMediaLoad("anim.gif", "image/gif", "gif1");
+      // Per-item fallback will re-load the media.
+      mockMediaLoad("anim.gif", "image/gif", "gif1");
+      mockMediaLoad("b.jpg", "image/jpeg", "img1");
+
+      await deliverWith({
+        replies: [{ mediaUrls: ["https://anim.gif", "https://b.jpg"] }],
+        runtime,
+        bot,
+      });
+
+      expect(sendMediaGroup).not.toHaveBeenCalled();
+      expect(sendAnimation).toHaveBeenCalledTimes(1);
+      expect(sendPhoto).toHaveBeenCalledTimes(1);
+    });
+
+    it("falls back to per-item when audio is in the list", async () => {
+      const runtime = createRuntime();
+      const sendMediaGroup = vi.fn();
+      const sendPhoto = vi.fn().mockResolvedValue({
+        message_id: 70,
+        chat: { id: "123" },
+      });
+      const sendAudio = vi.fn().mockResolvedValue({
+        message_id: 71,
+        chat: { id: "123" },
+      });
+      const bot = createBot({ sendMediaGroup, sendPhoto, sendAudio });
+
+      // First media is audio — not groupable.
+      mockMediaLoad("song.mp3", "audio/mpeg", "audio1");
+      // Per-item fallback re-loads.
+      mockMediaLoad("song.mp3", "audio/mpeg", "audio1");
+      mockMediaLoad("b.jpg", "image/jpeg", "img1");
+
+      await deliverWith({
+        replies: [{ mediaUrls: ["https://song.mp3", "https://b.jpg"] }],
+        runtime,
+        bot,
+      });
+
+      expect(sendMediaGroup).not.toHaveBeenCalled();
+      expect(sendAudio).toHaveBeenCalledTimes(1);
+      expect(sendPhoto).toHaveBeenCalledTimes(1);
+    });
+
+    it("does not use media group for a single item", async () => {
+      const { runtime, sendMediaGroup, bot } = createMediaGroupHarness();
+      const sendPhoto = vi.fn().mockResolvedValue({
+        message_id: 80,
+        chat: { id: "123" },
+      });
+      const singleBot = createBot({ sendMediaGroup, sendPhoto });
+
+      mockMediaLoad("a.jpg", "image/jpeg", "img1");
+
+      await deliverWith({
+        replies: [{ mediaUrl: "https://a.jpg" }],
+        runtime,
+        bot: singleBot,
+      });
+
+      expect(sendMediaGroup).not.toHaveBeenCalled();
+      expect(sendPhoto).toHaveBeenCalledTimes(1);
+    });
+
+    it("sends buttons as a follow-up message since sendMediaGroup has no reply_markup", async () => {
+      const { runtime, sendMediaGroup, sendMessage, bot } = createMediaGroupHarness();
+
+      mockMediaLoad("a.jpg", "image/jpeg", "img1");
+      mockMediaLoad("b.jpg", "image/jpeg", "img2");
+
+      await deliverWith({
+        replies: [
+          {
+            mediaUrls: ["https://a.jpg", "https://b.jpg"],
+            text: "short caption",
+            channelData: {
+              telegram: {
+                buttons: [[{ text: "Click", callback_data: "click" }]],
+              },
+            },
+          },
+        ],
+        runtime,
+        bot,
+      });
+
+      expect(sendMediaGroup).toHaveBeenCalledTimes(1);
+      // Buttons should be in a follow-up message.
+      expect(sendMessage).toHaveBeenCalledTimes(1);
+      expect(sendMessage.mock.calls[0][2]).toEqual(
+        expect.objectContaining({
+          reply_markup: {
+            inline_keyboard: [[{ text: "Click", callback_data: "click" }]],
+          },
+        }),
+      );
+    });
+
+    it("includes message_thread_id for DM topics in media groups", async () => {
+      const { runtime, sendMediaGroup, bot } = createMediaGroupHarness();
+
+      mockMediaLoad("a.jpg", "image/jpeg", "img1");
+      mockMediaLoad("b.jpg", "image/jpeg", "img2");
+
+      await deliverWith({
+        replies: [{ mediaUrls: ["https://a.jpg", "https://b.jpg"] }],
+        runtime,
+        bot,
+        thread: { id: 42, scope: "dm" },
+      });
+
+      expect(sendMediaGroup).toHaveBeenCalledWith(
+        "123",
+        expect.any(Array),
+        expect.objectContaining({
+          message_thread_id: 42,
+        }),
+      );
+    });
+
+    it("sets disable_notification when silent is true for media groups", async () => {
+      const { runtime, sendMediaGroup, bot } = createMediaGroupHarness();
+
+      mockMediaLoad("a.jpg", "image/jpeg", "img1");
+      mockMediaLoad("b.jpg", "image/jpeg", "img2");
+
+      await deliverWith({
+        replies: [{ mediaUrls: ["https://a.jpg", "https://b.jpg"] }],
+        runtime,
+        bot,
+        silent: true,
+      });
+
+      expect(sendMediaGroup).toHaveBeenCalledWith(
+        "123",
+        expect.any(Array),
+        expect.objectContaining({
+          disable_notification: true,
+        }),
+      );
+    });
+
+    it("applies reply_to_message_id on media group", async () => {
+      const { runtime, sendMediaGroup, bot } = createMediaGroupHarness();
+
+      mockMediaLoad("a.jpg", "image/jpeg", "img1");
+      mockMediaLoad("b.jpg", "image/jpeg", "img2");
+
+      await deliverReplies({
+        ...baseDeliveryParams,
+        replies: [{ mediaUrls: ["https://a.jpg", "https://b.jpg"], replyToId: "500" }],
+        runtime,
+        bot,
+        replyToMode: "first",
+        mediaLoader: loadWebMedia,
+      });
+
+      expect(sendMediaGroup).toHaveBeenCalledWith(
+        "123",
+        expect.any(Array),
+        expect.objectContaining({
+          reply_to_message_id: 500,
+        }),
+      );
+    });
+
+    it("does not use media group for audioAsVoice replies", async () => {
+      const runtime = createRuntime();
+      const sendMediaGroup = vi.fn();
+      const sendVoice = vi.fn().mockResolvedValue({
+        message_id: 90,
+        chat: { id: "123" },
+      });
+      const bot = createBot({ sendMediaGroup, sendVoice });
+
+      mockMediaLoad("a.ogg", "audio/ogg", "voice1");
+      mockMediaLoad("b.ogg", "audio/ogg", "voice2");
+
+      await deliverWith({
+        replies: [{ mediaUrls: ["https://a.ogg", "https://b.ogg"], audioAsVoice: true }],
+        runtime,
+        bot,
+      });
+
+      expect(sendMediaGroup).not.toHaveBeenCalled();
+    });
+
+    it("pins the first message from a media group when pin is requested", async () => {
+      const runtime = createRuntime();
+      const sendMediaGroup = vi.fn().mockResolvedValue([
+        { message_id: 100, chat: { id: "123" } },
+        { message_id: 101, chat: { id: "123" } },
+      ]);
+      const pinChatMessage = vi.fn().mockResolvedValue(true);
+      const bot = createBot({ sendMediaGroup, pinChatMessage });
+
+      mockMediaLoad("a.jpg", "image/jpeg", "img1");
+      mockMediaLoad("b.jpg", "image/jpeg", "img2");
+
+      await deliverWith({
+        replies: [
+          {
+            mediaUrls: ["https://a.jpg", "https://b.jpg"],
+            channelData: { telegram: { pin: true } },
+          },
+        ],
+        runtime,
+        bot,
+      });
+
+      expect(pinChatMessage).toHaveBeenCalledTimes(1);
+      expect(pinChatMessage).toHaveBeenCalledWith("123", 100, { disable_notification: true });
+    });
+  });
 });