diff --git a/collector/__tests__/utils/downloadURIToFile/index.test.js b/collector/__tests__/utils/downloadURIToFile/index.test.js
new file mode 100644
index 000000000..167afb923
--- /dev/null
+++ b/collector/__tests__/utils/downloadURIToFile/index.test.js
@@ -0,0 +1,96 @@
+const path = require("path");
+const { SUPPORTED_FILETYPE_CONVERTERS } = require("../../../utils/constants");
+const { mimeToExtension } = require("../../../utils/downloadURIToFile");
+
+/**
+ * Simulates the filename-building logic from downloadURIToFile
+ * to verify extension inference works correctly.
+ */
+function buildFilenameWithExtension(sluggedFilename, contentType) {
+  const existingExt = path.extname(sluggedFilename).toLowerCase();
+  if (!SUPPORTED_FILETYPE_CONVERTERS.hasOwnProperty(existingExt)) {
+    const mimeType = contentType?.toLowerCase()?.split(";")[0]?.trim();
+    const inferredExt = mimeToExtension(mimeType);
+    if (inferredExt) {
+      return sluggedFilename + inferredExt;
+    }
+  }
+  return sluggedFilename;
+}
+
+describe("mimeToExtension", () => {
+  test("returns null for invalid or unknown input", () => {
+    expect(mimeToExtension(null)).toBeNull();
+    expect(mimeToExtension(undefined)).toBeNull();
+    expect(mimeToExtension("application/octet-stream")).toBeNull();
+  });
+
+  test("returns first extension from ACCEPTED_MIMES for known types", () => {
+    expect(mimeToExtension("application/pdf")).toBe(".pdf");
+  });
+});
+
+describe("buildFilenameWithExtension", () => {
+  test("appends .pdf when URL path has no recognized extension (arxiv case)", () => {
+    // Simulates: https://arxiv.org/pdf/2307.10265
+    // slugify produces something like "arxiv.org-pdf-230710265"
+    const filename = "arxiv.org-pdf-230710265";
+    const result = buildFilenameWithExtension(filename, "application/pdf");
+    expect(result).toBe("arxiv.org-pdf-230710265.pdf");
+  });
+
+  test("appends .pdf when URL has numeric-looking extension", () => {
+    // path.extname("arxiv.org-pdf-2307.10265") => ".10265" which is not in SUPPORTED_FILETYPE_CONVERTERS
+    const filename = "arxiv.org-pdf-2307.10265";
+    const result = buildFilenameWithExtension(
+      filename,
+      "application/pdf; charset=utf-8"
+    );
+    expect(result).toBe("arxiv.org-pdf-2307.10265.pdf");
+  });
+
+  test("does NOT append extension when file already has a supported extension", () => {
+    const filename = "example.com-document.pdf";
+    const result = buildFilenameWithExtension(filename, "application/pdf");
+    expect(result).toBe("example.com-document.pdf");
+  });
+
+  test("does NOT append extension when file has .txt extension", () => {
+    const filename = "example.com-readme.txt";
+    const result = buildFilenameWithExtension(filename, "text/plain");
+    expect(result).toBe("example.com-readme.txt");
+  });
+
+  test("does not append extension for unknown content type", () => {
+    const filename = "example.com-binary-blob";
+    const result = buildFilenameWithExtension(
+      filename,
+      "application/octet-stream"
+    );
+    expect(result).toBe("example.com-binary-blob");
+  });
+
+  test("does not append extension when content type is null", () => {
+    const filename = "example.com-unknown";
+    const result = buildFilenameWithExtension(filename, null);
+    expect(result).toBe("example.com-unknown");
+  });
+
+  test("appends .docx for word document MIME type", () => {
+    const filename = "sharepoint.com-documents-report";
+    const result = buildFilenameWithExtension(
+      filename,
+      "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
+    );
+    expect(result).toBe("sharepoint.com-documents-report.docx");
+  });
+
+  test("handles content type with charset parameter correctly", () => {
+    const filename = "api.example.com-export-data";
+    const result = buildFilenameWithExtension(
+      filename,
+      "text/csv; charset=utf-8"
+    );
+    expect(result).toBe("api.example.com-export-data.csv");
+  });
+});
diff --git a/collector/processLink/helpers/index.js b/collector/processLink/helpers/index.js
index 88b74b2c2..dc00f8f4d 100644
--- a/collector/processLink/helpers/index.js
+++ b/collector/processLink/helpers/index.js
@@ -5,6 +5,16 @@ const { downloadURIToFile } = require("../../utils/downloadURIToFile");
 const { ACCEPTED_MIMES } = require("../../utils/constants");
 const { validYoutubeVideoUrl } = require("../../utils/url");
 
+/**
+ * Parse a Content-Type header value and return the MIME type without charset or other parameters.
+ * @param {string|null} contentTypeHeader - The raw Content-Type header value
+ * @returns {string|null} - The MIME type (e.g., "application/pdf") or null
+ */
+function parseContentType(contentTypeHeader) {
+  if (!contentTypeHeader) return null;
+  return contentTypeHeader.toLowerCase().split(";")[0].trim() || null;
+}
+
 /**
  * Get the content type of a resource
  * - Sends a HEAD request to the URL and returns the Content-Type header with a 5 second timeout
@@ -34,8 +44,9 @@ async function getContentTypeFromURL(url) {
         contentType: null,
       };
 
-    const contentType = res.headers.get("Content-Type")?.toLowerCase();
-    const contentTypeWithoutCharset = contentType?.split(";")[0].trim();
+    const contentTypeWithoutCharset = parseContentType(
+      res.headers.get("Content-Type")
+    );
     if (!contentTypeWithoutCharset)
       return {
         success: false,
@@ -171,6 +182,7 @@ async function processAsFile({ uri, saveAsDocument = true }) {
 }
 
 module.exports = {
+  parseContentType,
   returnResult,
   getContentTypeFromURL,
   determineContentType,
diff --git a/collector/utils/downloadURIToFile/index.js b/collector/utils/downloadURIToFile/index.js
index f7326658e..ce912648a 100644
--- a/collector/utils/downloadURIToFile/index.js
+++ b/collector/utils/downloadURIToFile/index.js
@@ -1,10 +1,26 @@
-const { WATCH_DIRECTORY } = require("../constants");
+const { WATCH_DIRECTORY, ACCEPTED_MIMES } = require("../constants");
 const fs = require("fs");
 const path = require("path");
 const { pipeline } = require("stream/promises");
 const { validURL } = require("../url");
 const { default: slugify } = require("slugify");
 
+// Add a custom slugify extension for slashing to handle URLs with paths.
+slugify.extend({ "/": "-" });
+
+/**
+ * Maps a MIME type to the preferred file extension using ACCEPTED_MIMES.
+ * Returns null if the MIME type is not recognized or if there are no possible extensions.
+ * @param {string} mimeType - The MIME type to resolve (e.g., "application/pdf")
+ * @returns {string|null} - The file extension (e.g., ".pdf") or null
+ */
+function mimeToExtension(mimeType) {
+  if (!mimeType || !ACCEPTED_MIMES.hasOwnProperty(mimeType)) return null;
+  const possibleExtensions = ACCEPTED_MIMES[mimeType] ?? [];
+  if (possibleExtensions.length === 0) return null;
+  return possibleExtensions[0];
+}
+
 /**
  * Download a file to the hotdir
  * @param {string} url - The URL of the file to download
@@ -33,10 +49,29 @@ async function downloadURIToFile(url, maxTimeout = 10_000) {
       .finally(() => clearTimeout(timeout));
 
     const urlObj = new URL(url);
-    const filename = `${urlObj.hostname}-${slugify(
-      urlObj.pathname.replace(/\//g, "-"),
-      { lower: true }
-    )}`;
+    const sluggedPath = slugify(urlObj.pathname, { lower: true });
+    let filename = `${urlObj.hostname}-${sluggedPath}`;
+
+    const existingExt = path.extname(filename).toLowerCase();
+    const { SUPPORTED_FILETYPE_CONVERTERS } = require("../constants");
+
+    // If the filename does not already have a supported file extension,
+    // try to infer one from the response Content-Type header.
+    // This handles URLs like https://arxiv.org/pdf/2307.10265 where the
+    // path has no explicit extension but the server responds with
+    // Content-Type: application/pdf.
+    if (!SUPPORTED_FILETYPE_CONVERTERS.hasOwnProperty(existingExt)) {
+      const { parseContentType } = require("../../processLink/helpers");
+      const contentType = parseContentType(res.headers.get("Content-Type"));
+      const inferredExt = mimeToExtension(contentType);
+      if (inferredExt) {
+        console.log(
+          `[Collector] URL path has no recognized extension. Inferred ${inferredExt} from Content-Type: ${contentType}`
+        );
+        filename += inferredExt;
+      }
+    }
+
     const localFilePath = path.join(WATCH_DIRECTORY, filename);
     const writeStream = fs.createWriteStream(localFilePath);
     await pipeline(res.body, writeStream);
@@ -51,4 +86,5 @@ async function downloadURIToFile(url, maxTimeout = 10_000) {
 
 module.exports = {
   downloadURIToFile,
+  mimeToExtension,
 };