Reapply "Remove illegal chars for Windows on files (#5364)"

This reverts commit 869be87ef6.
This commit is contained in:
Timothy Carambat
2026-04-06 14:05:25 -07:00
parent 869be87ef6
commit 0645f3c4bf
4 changed files with 44 additions and 13 deletions

View File

@@ -132,8 +132,9 @@ function writeToServerDocuments({
if (!fs.existsSync(destination))
fs.mkdirSync(destination, { recursive: true });
const safeFilename = sanitizeFileName(filename);
const destinationFilePath = normalizePath(
path.resolve(destination, filename) + ".json"
path.resolve(destination, safeFilename) + ".json"
);
fs.writeFileSync(destinationFilePath, JSON.stringify(data, null, 4), {
@@ -210,10 +211,19 @@ function normalizePath(filepath = "") {
return result;
}
/**
* Strips characters that are illegal in Windows filenames, including Unicode
* quotation marks (U+201C, U+201D, etc.) that can get corrupted into ASCII
* double-quotes during charset conversion in the upload pipeline.
* @param {string} fileName - The filename to sanitize.
* @returns {string} - The sanitized filename.
*/
function sanitizeFileName(fileName) {
if (!fileName) return fileName;
//eslint-disable-next-line
return fileName.replace(/[<>:"\/\\|?*]/g, "");
return fileName.replace(
/[<>:"/\\|?*\u201C\u201D\u201E\u201F\u2018\u2019\u201A\u201B]/g,
""
);
}
module.exports = {

View File

@@ -17,7 +17,12 @@ const { Telemetry } = require("../../models/telemetry");
const { CollectorApi } = require("../collectorApi");
const fs = require("fs");
const path = require("path");
const { hotdirPath, normalizePath, isWithin } = require("../files");
const {
hotdirPath,
normalizePath,
isWithin,
sanitizeFileName,
} = require("../files");
/**
* @typedef ResponseObject
* @property {string} id - uuid of response
@@ -72,8 +77,8 @@ async function processDocumentAttachments(attachments = []) {
if (dataUriMatch) base64Data = dataUriMatch[1];
const buffer = Buffer.from(base64Data, "base64");
const filename = normalizePath(
attachment.name || `attachment-${uuidv4()}`
const filename = sanitizeFileName(
normalizePath(attachment.name || `attachment-${uuidv4()}`)
);
const filePath = normalizePath(path.join(hotdirPath, filename));
if (!isWithin(hotdirPath, filePath))

View File

@@ -284,6 +284,21 @@ function normalizePath(filepath = "") {
return result;
}
/**
* Strips characters that are illegal in Windows filenames, including Unicode
* quotation marks (U+201C, U+201D, etc.) that can get corrupted into ASCII
* double-quotes during charset conversion in the upload pipeline.
* @param {string} fileName - The filename to sanitize.
* @returns {string} - The sanitized filename.
*/
function sanitizeFileName(fileName) {
if (!fileName) return fileName;
return fileName.replace(
/[<>:"/\\|?*\u201C\u201D\u201E\u201F\u2018\u2019\u201A\u201B]/g,
""
);
}
// Check if the vector-cache folder is empty or not
// useful for it the user is changing embedders as this will
// break the previous cache.
@@ -500,4 +515,5 @@ module.exports = {
purgeEntireVectorCache,
getDocumentsByFolder,
hotdirPath,
sanitizeFileName,
};

View File

@@ -2,7 +2,7 @@ const multer = require("multer");
const path = require("path");
const fs = require("fs");
const { v4 } = require("uuid");
const { normalizePath } = require(".");
const { normalizePath, sanitizeFileName } = require(".");
/**
* Handle File uploads for auto-uploading.
@@ -17,8 +17,8 @@ const fileUploadStorage = multer.diskStorage({
cb(null, uploadOutput);
},
filename: function (_, file, cb) {
file.originalname = normalizePath(
Buffer.from(file.originalname, "latin1").toString("utf8")
file.originalname = sanitizeFileName(
normalizePath(Buffer.from(file.originalname, "latin1").toString("utf8"))
);
cb(null, file.originalname);
},
@@ -37,8 +37,8 @@ const fileAPIUploadStorage = multer.diskStorage({
cb(null, uploadOutput);
},
filename: function (_, file, cb) {
file.originalname = normalizePath(
Buffer.from(file.originalname, "latin1").toString("utf8")
file.originalname = sanitizeFileName(
normalizePath(Buffer.from(file.originalname, "latin1").toString("utf8"))
);
cb(null, file.originalname);
},
@@ -55,8 +55,8 @@ const assetUploadStorage = multer.diskStorage({
return cb(null, uploadOutput);
},
filename: function (_, file, cb) {
file.originalname = normalizePath(
Buffer.from(file.originalname, "latin1").toString("utf8")
file.originalname = sanitizeFileName(
normalizePath(Buffer.from(file.originalname, "latin1").toString("utf8"))
);
cb(null, file.originalname);
},