mirror of
https://github.com/Mintplex-Labs/anything-llm
synced 2026-04-28 02:18:22 +02:00
Feature/drupalwiki collector (#3693)
* Implement DrupalWiki collector * Add attachment downloading and processing functionality (#3) * linting * Linting Add citation image small refactors add URL for citation identifier --------- Co-authored-by: em <eugen.mayer@kontextwork.de> Co-authored-by: rexjohannes <53578137+rexjohannes@users.noreply.github.com> Co-authored-by: Eugen Mayer <136934+EugenMayer@users.noreply.github.com>
This commit is contained in:
102
collector/utils/extensions/DrupalWiki/index.js
Normal file
102
collector/utils/extensions/DrupalWiki/index.js
Normal file
@@ -0,0 +1,102 @@
|
||||
/**
|
||||
* Copyright 2024
|
||||
*
|
||||
* Authors:
|
||||
* - Eugen Mayer (KontextWork)
|
||||
*/
|
||||
|
||||
const { DrupalWiki } = require("./DrupalWiki");
|
||||
const { validBaseUrl } = require("../../../utils/http");
|
||||
|
||||
async function loadAndStoreSpaces(
|
||||
{ baseUrl = null, spaceIds = null, accessToken = null },
|
||||
response
|
||||
) {
|
||||
if (!baseUrl) {
|
||||
return {
|
||||
success: false,
|
||||
reason:
|
||||
"Please provide your baseUrl like https://mywiki.drupal-wiki.net.",
|
||||
};
|
||||
} else if (!validBaseUrl(baseUrl)) {
|
||||
return {
|
||||
success: false,
|
||||
reason: "Provided base URL is not a valid URL.",
|
||||
};
|
||||
}
|
||||
|
||||
if (!spaceIds) {
|
||||
return {
|
||||
success: false,
|
||||
reason:
|
||||
"Please provide a list of spaceIds like 21,56,67 you want to extract",
|
||||
};
|
||||
}
|
||||
|
||||
if (!accessToken) {
|
||||
return {
|
||||
success: false,
|
||||
reason: "Please provide a REST API-Token.",
|
||||
};
|
||||
}
|
||||
|
||||
console.log(`-- Working Drupal Wiki ${baseUrl} for spaceIds: ${spaceIds} --`);
|
||||
const drupalWiki = new DrupalWiki({ baseUrl, accessToken });
|
||||
|
||||
const encryptionWorker = response.locals.encryptionWorker;
|
||||
const spaceIdsArr = spaceIds.split(",").map((idStr) => {
|
||||
return Number(idStr.trim());
|
||||
});
|
||||
|
||||
for (const spaceId of spaceIdsArr) {
|
||||
try {
|
||||
await drupalWiki.loadAndStoreAllPagesForSpace(spaceId, encryptionWorker);
|
||||
console.log(`--- Finished space ${spaceId} ---`);
|
||||
} catch (e) {
|
||||
console.error(e);
|
||||
return {
|
||||
success: false,
|
||||
reason: e.message,
|
||||
data: {},
|
||||
};
|
||||
}
|
||||
}
|
||||
console.log(`-- Finished all spaces--`);
|
||||
|
||||
return {
|
||||
success: true,
|
||||
reason: null,
|
||||
data: {
|
||||
spaceIds,
|
||||
destination: drupalWiki.storagePath,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the page content from a specific Confluence page, not all pages in a workspace.
|
||||
* @returns
|
||||
*/
|
||||
async function loadPage({ baseUrl, pageId, accessToken }) {
|
||||
console.log(`-- Working Drupal Wiki Page ${pageId} of ${baseUrl} --`);
|
||||
const drupalWiki = new DrupalWiki({ baseUrl, accessToken });
|
||||
try {
|
||||
const page = await drupalWiki.loadPage(pageId);
|
||||
return {
|
||||
success: true,
|
||||
reason: null,
|
||||
content: page.processedBody,
|
||||
};
|
||||
} catch (e) {
|
||||
return {
|
||||
success: false,
|
||||
reason: `Failed (re)-fetching DrupalWiki page ${pageId} form ${baseUrl}}`,
|
||||
content: null,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
loadAndStoreSpaces,
|
||||
loadPage,
|
||||
};
|
||||
Reference in New Issue
Block a user