Add support for /v1/document/upload* endpoints to support auto-add to workspace (#3692)

This commit is contained in:
Timothy Carambat
2025-04-21 08:28:33 -07:00
committed by GitHub
parent 7afbc6d11b
commit 05ced11650
3 changed files with 112 additions and 11 deletions

View File

@@ -43,6 +43,10 @@ function apiDocumentEndpoints(app) {
type: 'string',
format: 'binary',
description: 'The file to upload'
},
addToWorkspaces: {
type: 'string',
description: 'comma-separated text-string of workspace slugs to embed the document into post-upload. eg: workspace1,workspace2',
}
},
required: ['file']
@@ -87,6 +91,7 @@ function apiDocumentEndpoints(app) {
try {
const Collector = new CollectorApi();
const { originalname } = request.file;
const { addToWorkspaces = "" } = reqBody(request);
const processingOnline = await Collector.online();
if (!processingOnline) {
@@ -117,6 +122,12 @@ function apiDocumentEndpoints(app) {
await EventLogs.logEvent("api_document_uploaded", {
documentName: originalname,
});
if (!!addToWorkspaces)
await Document.api.uploadToWorkspace(
addToWorkspaces,
documents?.[0].location
);
response.status(200).json({ success: true, error: null, documents });
} catch (e) {
console.error(e.message, e);
@@ -152,6 +163,10 @@ function apiDocumentEndpoints(app) {
type: 'string',
format: 'binary',
description: 'The file to upload'
},
addToWorkspaces: {
type: 'string',
description: 'comma-separated text-string of workspace slugs to embed the document into post-upload. eg: workspace1,workspace2',
}
}
}
@@ -206,6 +221,7 @@ function apiDocumentEndpoints(app) {
*/
try {
const { originalname } = request.file;
const { addToWorkspaces = "" } = reqBody(request);
let folder = request.params?.folderName || "custom-documents";
folder = normalizePath(folder);
const targetFolderPath = path.join(documentsPath, folder);
@@ -276,6 +292,12 @@ function apiDocumentEndpoints(app) {
documentName: originalname,
folder,
});
if (!!addToWorkspaces)
await Document.api.uploadToWorkspace(
addToWorkspaces,
documents?.[0].location
);
response.status(200).json({ success: true, error: null, documents });
} catch (e) {
console.error(e.message, e);
@@ -290,16 +312,17 @@ function apiDocumentEndpoints(app) {
async (request, response) => {
/*
#swagger.tags = ['Documents']
#swagger.description = 'Upload a valid URL for AnythingLLM to scrape and prepare for embedding.'
#swagger.description = 'Upload a valid URL for AnythingLLM to scrape and prepare for embedding. Optionally, specify a comma-separated list of workspace slugs to embed the document into post-upload.'
#swagger.requestBody = {
description: 'Link of web address to be scraped.',
description: 'Link of web address to be scraped and optionally a comma-separated list of workspace slugs to embed the document into post-upload.',
required: true,
content: {
"application/json": {
schema: {
type: 'object',
example: {
"link": "https://anythingllm.com"
"link": "https://anythingllm.com",
"addToWorkspaces": "workspace1,workspace2"
}
}
}
@@ -342,7 +365,7 @@ function apiDocumentEndpoints(app) {
*/
try {
const Collector = new CollectorApi();
const { link } = reqBody(request);
const { link, addToWorkspaces = "" } = reqBody(request);
const processingOnline = await Collector.online();
if (!processingOnline) {
@@ -373,6 +396,12 @@ function apiDocumentEndpoints(app) {
await EventLogs.logEvent("api_link_uploaded", {
link,
});
if (!!addToWorkspaces)
await Document.api.uploadToWorkspace(
addToWorkspaces,
documents?.[0].location
);
response.status(200).json({ success: true, error: null, documents });
} catch (e) {
console.error(e.message, e);
@@ -397,11 +426,12 @@ function apiDocumentEndpoints(app) {
type: 'object',
example: {
"textContent": "This is the raw text that will be saved as a document in AnythingLLM.",
"addToWorkspaces": "workspace1,workspace2",
"metadata": {
"title": "This key is required. See in /server/endpoints/api/document/index.js:287",
keyOne: "valueOne",
keyTwo: "valueTwo",
etc: "etc"
"keyOne": "valueOne",
"keyTwo": "valueTwo",
"etc": "etc"
}
}
}
@@ -446,7 +476,11 @@ function apiDocumentEndpoints(app) {
try {
const Collector = new CollectorApi();
const requiredMetadata = ["title"];
const { textContent, metadata = {} } = reqBody(request);
const {
textContent,
metadata = {},
addToWorkspaces = "",
} = reqBody(request);
const processingOnline = await Collector.online();
if (!processingOnline) {
@@ -506,6 +540,12 @@ function apiDocumentEndpoints(app) {
);
await Telemetry.sendTelemetry("raw_document_uploaded");
await EventLogs.logEvent("api_raw_document_uploaded");
if (!!addToWorkspaces)
await Document.api.uploadToWorkspace(
addToWorkspaces,
documents?.[0].location
);
response.status(200).json({ success: true, error: null, documents });
} catch (e) {
console.error(e.message, e);

View File

@@ -255,6 +255,57 @@ const Document = {
return sourceString;
},
/**
* Functions for the backend API endpoints - not to be used by the frontend or elsewhere.
* @namespace api
*/
api: {
/**
* Process a document upload from the API and upsert it into the database. This
* functionality should only be used by the backend /v1/documents/upload endpoints for post-upload embedding.
* @param {string} wsSlugs - The slugs of the workspaces to embed the document into, will be comma-separated list of workspace slugs
* @param {string} docLocation - The location/path of the document that was uploaded
* @returns {Promise<boolean>} - True if the document was uploaded successfully, false otherwise
*/
uploadToWorkspace: async function (wsSlugs = "", docLocation = null) {
if (!docLocation)
return console.log(
"No document location provided for embedding",
docLocation
);
const slugs = wsSlugs
.split(",")
.map((slug) => String(slug)?.trim()?.toLowerCase());
if (slugs.length === 0)
return console.log(`No workspaces provided got: ${wsSlugs}`);
const { Workspace } = require("./workspace");
const workspaces = await Workspace.where({ slug: { in: slugs } });
if (workspaces.length === 0)
return console.log("No valid workspaces found for slugs: ", slugs);
// Upsert the document into each workspace - do this sequentially
// because the document may be large and we don't want to overwhelm the embedder, plus on the first
// upsert we will then have the cache of the document - making n+1 embeds faster. If we parallelize this
// we will have to do a lot of extra work to ensure that the document is not embedded more than once.
for (const workspace of workspaces) {
const { failedToEmbed = [], errors = [] } = await Document.addDocuments(
workspace,
[docLocation]
);
if (failedToEmbed.length > 0)
return console.log(
`Failed to embed document into workspace ${workspace.slug}`,
errors
);
console.log(`Document embedded into workspace ${workspace.slug}...`);
}
return true;
},
},
};
module.exports = { Document };

View File

@@ -909,6 +909,10 @@
"type": "string",
"format": "binary",
"description": "The file to upload"
},
"addToWorkspaces": {
"type": "string",
"description": "comma-separated text-string of workspace slugs to embed the document into post-upload. eg: workspace1,workspace2"
}
}
}
@@ -1010,6 +1014,10 @@
"type": "string",
"format": "binary",
"description": "The file to upload"
},
"addToWorkspaces": {
"type": "string",
"description": "comma-separated text-string of workspace slugs to embed the document into post-upload. eg: workspace1,workspace2"
}
}
}
@@ -1023,7 +1031,7 @@
"tags": [
"Documents"
],
"description": "Upload a valid URL for AnythingLLM to scrape and prepare for embedding.",
"description": "Upload a valid URL for AnythingLLM to scrape and prepare for embedding. Optionally, specify a comma-separated list of workspace slugs to embed the document into post-upload.",
"parameters": [],
"responses": {
"200": {
@@ -1076,14 +1084,15 @@
}
},
"requestBody": {
"description": "Link of web address to be scraped.",
"description": "Link of web address to be scraped and optionally a comma-separated list of workspace slugs to embed the document into post-upload.",
"required": true,
"content": {
"application/json": {
"schema": {
"type": "object",
"example": {
"link": "https://anythingllm.com"
"link": "https://anythingllm.com",
"addToWorkspaces": "workspace1,workspace2"
}
}
}
@@ -1160,6 +1169,7 @@
"type": "object",
"example": {
"textContent": "This is the raw text that will be saved as a document in AnythingLLM.",
"addToWorkspaces": "workspace1,workspace2",
"metadata": {
"title": "This key is required. See in /server/endpoints/api/document/index.js:287",
"keyOne": "valueOne",