mirror of
https://github.com/Mintplex-Labs/anything-llm
synced 2026-04-25 17:15:37 +02:00
Add support for /v1/document/upload* endpoints to support auto-add to workspace (#3692)
This commit is contained in:
@@ -43,6 +43,10 @@ function apiDocumentEndpoints(app) {
|
||||
type: 'string',
|
||||
format: 'binary',
|
||||
description: 'The file to upload'
|
||||
},
|
||||
addToWorkspaces: {
|
||||
type: 'string',
|
||||
description: 'comma-separated text-string of workspace slugs to embed the document into post-upload. eg: workspace1,workspace2',
|
||||
}
|
||||
},
|
||||
required: ['file']
|
||||
@@ -87,6 +91,7 @@ function apiDocumentEndpoints(app) {
|
||||
try {
|
||||
const Collector = new CollectorApi();
|
||||
const { originalname } = request.file;
|
||||
const { addToWorkspaces = "" } = reqBody(request);
|
||||
const processingOnline = await Collector.online();
|
||||
|
||||
if (!processingOnline) {
|
||||
@@ -117,6 +122,12 @@ function apiDocumentEndpoints(app) {
|
||||
await EventLogs.logEvent("api_document_uploaded", {
|
||||
documentName: originalname,
|
||||
});
|
||||
|
||||
if (!!addToWorkspaces)
|
||||
await Document.api.uploadToWorkspace(
|
||||
addToWorkspaces,
|
||||
documents?.[0].location
|
||||
);
|
||||
response.status(200).json({ success: true, error: null, documents });
|
||||
} catch (e) {
|
||||
console.error(e.message, e);
|
||||
@@ -152,6 +163,10 @@ function apiDocumentEndpoints(app) {
|
||||
type: 'string',
|
||||
format: 'binary',
|
||||
description: 'The file to upload'
|
||||
},
|
||||
addToWorkspaces: {
|
||||
type: 'string',
|
||||
description: 'comma-separated text-string of workspace slugs to embed the document into post-upload. eg: workspace1,workspace2',
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -206,6 +221,7 @@ function apiDocumentEndpoints(app) {
|
||||
*/
|
||||
try {
|
||||
const { originalname } = request.file;
|
||||
const { addToWorkspaces = "" } = reqBody(request);
|
||||
let folder = request.params?.folderName || "custom-documents";
|
||||
folder = normalizePath(folder);
|
||||
const targetFolderPath = path.join(documentsPath, folder);
|
||||
@@ -276,6 +292,12 @@ function apiDocumentEndpoints(app) {
|
||||
documentName: originalname,
|
||||
folder,
|
||||
});
|
||||
|
||||
if (!!addToWorkspaces)
|
||||
await Document.api.uploadToWorkspace(
|
||||
addToWorkspaces,
|
||||
documents?.[0].location
|
||||
);
|
||||
response.status(200).json({ success: true, error: null, documents });
|
||||
} catch (e) {
|
||||
console.error(e.message, e);
|
||||
@@ -290,16 +312,17 @@ function apiDocumentEndpoints(app) {
|
||||
async (request, response) => {
|
||||
/*
|
||||
#swagger.tags = ['Documents']
|
||||
#swagger.description = 'Upload a valid URL for AnythingLLM to scrape and prepare for embedding.'
|
||||
#swagger.description = 'Upload a valid URL for AnythingLLM to scrape and prepare for embedding. Optionally, specify a comma-separated list of workspace slugs to embed the document into post-upload.'
|
||||
#swagger.requestBody = {
|
||||
description: 'Link of web address to be scraped.',
|
||||
description: 'Link of web address to be scraped and optionally a comma-separated list of workspace slugs to embed the document into post-upload.',
|
||||
required: true,
|
||||
content: {
|
||||
"application/json": {
|
||||
schema: {
|
||||
type: 'object',
|
||||
example: {
|
||||
"link": "https://anythingllm.com"
|
||||
"link": "https://anythingllm.com",
|
||||
"addToWorkspaces": "workspace1,workspace2"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -342,7 +365,7 @@ function apiDocumentEndpoints(app) {
|
||||
*/
|
||||
try {
|
||||
const Collector = new CollectorApi();
|
||||
const { link } = reqBody(request);
|
||||
const { link, addToWorkspaces = "" } = reqBody(request);
|
||||
const processingOnline = await Collector.online();
|
||||
|
||||
if (!processingOnline) {
|
||||
@@ -373,6 +396,12 @@ function apiDocumentEndpoints(app) {
|
||||
await EventLogs.logEvent("api_link_uploaded", {
|
||||
link,
|
||||
});
|
||||
|
||||
if (!!addToWorkspaces)
|
||||
await Document.api.uploadToWorkspace(
|
||||
addToWorkspaces,
|
||||
documents?.[0].location
|
||||
);
|
||||
response.status(200).json({ success: true, error: null, documents });
|
||||
} catch (e) {
|
||||
console.error(e.message, e);
|
||||
@@ -397,11 +426,12 @@ function apiDocumentEndpoints(app) {
|
||||
type: 'object',
|
||||
example: {
|
||||
"textContent": "This is the raw text that will be saved as a document in AnythingLLM.",
|
||||
"addToWorkspaces": "workspace1,workspace2",
|
||||
"metadata": {
|
||||
"title": "This key is required. See in /server/endpoints/api/document/index.js:287",
|
||||
keyOne: "valueOne",
|
||||
keyTwo: "valueTwo",
|
||||
etc: "etc"
|
||||
"keyOne": "valueOne",
|
||||
"keyTwo": "valueTwo",
|
||||
"etc": "etc"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -446,7 +476,11 @@ function apiDocumentEndpoints(app) {
|
||||
try {
|
||||
const Collector = new CollectorApi();
|
||||
const requiredMetadata = ["title"];
|
||||
const { textContent, metadata = {} } = reqBody(request);
|
||||
const {
|
||||
textContent,
|
||||
metadata = {},
|
||||
addToWorkspaces = "",
|
||||
} = reqBody(request);
|
||||
const processingOnline = await Collector.online();
|
||||
|
||||
if (!processingOnline) {
|
||||
@@ -506,6 +540,12 @@ function apiDocumentEndpoints(app) {
|
||||
);
|
||||
await Telemetry.sendTelemetry("raw_document_uploaded");
|
||||
await EventLogs.logEvent("api_raw_document_uploaded");
|
||||
|
||||
if (!!addToWorkspaces)
|
||||
await Document.api.uploadToWorkspace(
|
||||
addToWorkspaces,
|
||||
documents?.[0].location
|
||||
);
|
||||
response.status(200).json({ success: true, error: null, documents });
|
||||
} catch (e) {
|
||||
console.error(e.message, e);
|
||||
|
||||
@@ -255,6 +255,57 @@ const Document = {
|
||||
|
||||
return sourceString;
|
||||
},
|
||||
|
||||
/**
|
||||
* Functions for the backend API endpoints - not to be used by the frontend or elsewhere.
|
||||
* @namespace api
|
||||
*/
|
||||
api: {
|
||||
/**
|
||||
* Process a document upload from the API and upsert it into the database. This
|
||||
* functionality should only be used by the backend /v1/documents/upload endpoints for post-upload embedding.
|
||||
* @param {string} wsSlugs - The slugs of the workspaces to embed the document into, will be comma-separated list of workspace slugs
|
||||
* @param {string} docLocation - The location/path of the document that was uploaded
|
||||
* @returns {Promise<boolean>} - True if the document was uploaded successfully, false otherwise
|
||||
*/
|
||||
uploadToWorkspace: async function (wsSlugs = "", docLocation = null) {
|
||||
if (!docLocation)
|
||||
return console.log(
|
||||
"No document location provided for embedding",
|
||||
docLocation
|
||||
);
|
||||
|
||||
const slugs = wsSlugs
|
||||
.split(",")
|
||||
.map((slug) => String(slug)?.trim()?.toLowerCase());
|
||||
if (slugs.length === 0)
|
||||
return console.log(`No workspaces provided got: ${wsSlugs}`);
|
||||
|
||||
const { Workspace } = require("./workspace");
|
||||
const workspaces = await Workspace.where({ slug: { in: slugs } });
|
||||
if (workspaces.length === 0)
|
||||
return console.log("No valid workspaces found for slugs: ", slugs);
|
||||
|
||||
// Upsert the document into each workspace - do this sequentially
|
||||
// because the document may be large and we don't want to overwhelm the embedder, plus on the first
|
||||
// upsert we will then have the cache of the document - making n+1 embeds faster. If we parallelize this
|
||||
// we will have to do a lot of extra work to ensure that the document is not embedded more than once.
|
||||
for (const workspace of workspaces) {
|
||||
const { failedToEmbed = [], errors = [] } = await Document.addDocuments(
|
||||
workspace,
|
||||
[docLocation]
|
||||
);
|
||||
if (failedToEmbed.length > 0)
|
||||
return console.log(
|
||||
`Failed to embed document into workspace ${workspace.slug}`,
|
||||
errors
|
||||
);
|
||||
console.log(`Document embedded into workspace ${workspace.slug}...`);
|
||||
}
|
||||
|
||||
return true;
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
module.exports = { Document };
|
||||
|
||||
@@ -909,6 +909,10 @@
|
||||
"type": "string",
|
||||
"format": "binary",
|
||||
"description": "The file to upload"
|
||||
},
|
||||
"addToWorkspaces": {
|
||||
"type": "string",
|
||||
"description": "comma-separated text-string of workspace slugs to embed the document into post-upload. eg: workspace1,workspace2"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1010,6 +1014,10 @@
|
||||
"type": "string",
|
||||
"format": "binary",
|
||||
"description": "The file to upload"
|
||||
},
|
||||
"addToWorkspaces": {
|
||||
"type": "string",
|
||||
"description": "comma-separated text-string of workspace slugs to embed the document into post-upload. eg: workspace1,workspace2"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1023,7 +1031,7 @@
|
||||
"tags": [
|
||||
"Documents"
|
||||
],
|
||||
"description": "Upload a valid URL for AnythingLLM to scrape and prepare for embedding.",
|
||||
"description": "Upload a valid URL for AnythingLLM to scrape and prepare for embedding. Optionally, specify a comma-separated list of workspace slugs to embed the document into post-upload.",
|
||||
"parameters": [],
|
||||
"responses": {
|
||||
"200": {
|
||||
@@ -1076,14 +1084,15 @@
|
||||
}
|
||||
},
|
||||
"requestBody": {
|
||||
"description": "Link of web address to be scraped.",
|
||||
"description": "Link of web address to be scraped and optionally a comma-separated list of workspace slugs to embed the document into post-upload.",
|
||||
"required": true,
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"example": {
|
||||
"link": "https://anythingllm.com"
|
||||
"link": "https://anythingllm.com",
|
||||
"addToWorkspaces": "workspace1,workspace2"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1160,6 +1169,7 @@
|
||||
"type": "object",
|
||||
"example": {
|
||||
"textContent": "This is the raw text that will be saved as a document in AnythingLLM.",
|
||||
"addToWorkspaces": "workspace1,workspace2",
|
||||
"metadata": {
|
||||
"title": "This key is required. See in /server/endpoints/api/document/index.js:287",
|
||||
"keyOne": "valueOne",
|
||||
|
||||
Reference in New Issue
Block a user