Feature/drupalwiki collector (#3693)

* Implement DrupalWiki collector

* Add attachment downloading and processing functionality (#3)

* linting

* Linting
Add citation image
small refactors
add URL for citation identifier

---------

Co-authored-by: em <eugen.mayer@kontextwork.de>
Co-authored-by: rexjohannes <53578137+rexjohannes@users.noreply.github.com>
Co-authored-by: Eugen Mayer <136934+EugenMayer@users.noreply.github.com>
This commit is contained in:
Timothy Carambat
2025-04-21 09:17:24 -07:00
committed by GitHub
parent fe59e22397
commit fd4929b4d2
15 changed files with 782 additions and 12 deletions

View File

@@ -154,6 +154,32 @@ function extensions(app) {
return;
}
);
app.post(
"/ext/drupalwiki",
[verifyPayloadIntegrity, setDataSigner],
async function (request, response) {
try {
const { loadAndStoreSpaces } = require("../utils/extensions/DrupalWiki");
const { success, reason, data } = await loadAndStoreSpaces(
reqBody(request),
response
);
response.status(200).json({ success, reason, data });
} catch (e) {
console.error(e);
response.status(400).json({
success: false,
reason: e.message,
data: {
title: null,
author: null,
},
});
}
return;
}
);
}
module.exports = extensions;

View File

@@ -2,7 +2,7 @@ const { getLinkText } = require("../../processLink");
/**
* Fetches the content of a raw link. Returns the content as a text string of the link in question.
* @param {object} data - metadata from document (eg: link)
* @param {object} data - metadata from document (eg: link)
* @param {import("../../middleware/setDataSigner").ResponseWithSigner} response
*/
async function resyncLink({ link }, response) {
@@ -24,7 +24,7 @@ async function resyncLink({ link }, response) {
* Fetches the content of a YouTube link. Returns the content as a text string of the video in question.
* We offer this as there may be some videos where a transcription could be manually edited after initial scraping
* but in general - transcriptions often never change.
* @param {object} data - metadata from document (eg: link)
* @param {object} data - metadata from document (eg: link)
* @param {import("../../middleware/setDataSigner").ResponseWithSigner} response
*/
async function resyncYouTube({ link }, response) {
@@ -44,9 +44,9 @@ async function resyncYouTube({ link }, response) {
}
/**
* Fetches the content of a specific confluence page via its chunkSource.
* Fetches the content of a specific confluence page via its chunkSource.
* Returns the content as a text string of the page in question and only that page.
* @param {object} data - metadata from document (eg: chunkSource)
* @param {object} data - metadata from document (eg: chunkSource)
* @param {import("../../middleware/setDataSigner").ResponseWithSigner} response
*/
async function resyncConfluence({ chunkSource }, response) {
@@ -76,9 +76,9 @@ async function resyncConfluence({ chunkSource }, response) {
}
/**
* Fetches the content of a specific confluence page via its chunkSource.
* Fetches the content of a specific confluence page via its chunkSource.
* Returns the content as a text string of the page in question and only that page.
* @param {object} data - metadata from document (eg: chunkSource)
* @param {object} data - metadata from document (eg: chunkSource)
* @param {import("../../middleware/setDataSigner").ResponseWithSigner} response
*/
async function resyncGithub({ chunkSource }, response) {
@@ -106,9 +106,48 @@ async function resyncGithub({ chunkSource }, response) {
}
}
/**
* Fetches the content of a specific DrupalWiki page via its chunkSource.
* Returns the content as a text string of the page in question and only that page.
* @param {object} data - metadata from document (eg: chunkSource)
* @param {import("../../middleware/setDataSigner").ResponseWithSigner} response
*/
async function resyncDrupalWiki({ chunkSource }, response) {
if (!chunkSource) throw new Error('Invalid source property provided');
try {
// DrupalWiki data is `payload` encrypted. So we need to expand its
// encrypted payload back into query params so we can reFetch the page with same access token/params.
const source = response.locals.encryptionWorker.expandPayload(chunkSource);
const { loadPage } = require("../../utils/extensions/DrupalWiki");
const { success, reason, content } = await loadPage({
baseUrl: source.searchParams.get('baseUrl'),
pageId: source.searchParams.get('pageId'),
accessToken: source.searchParams.get('accessToken'),
});
if (!success) {
console.error(`Failed to sync DrupalWiki page content. ${reason}`);
response.status(200).json({
success: false,
content: null,
});
} else {
response.status(200).json({ success, content });
}
} catch (e) {
console.error(e);
response.status(200).json({
success: false,
content: null,
});
}
}
module.exports = {
link: resyncLink,
youtube: resyncYouTube,
confluence: resyncConfluence,
github: resyncGithub,
}
drupalwiki: resyncDrupalWiki,
}

View File

@@ -0,0 +1,320 @@
/**
* Copyright 2024
*
* Authors:
* - Eugen Mayer (KontextWork)
*/
const { htmlToText } = require("html-to-text");
const { tokenizeString } = require("../../../tokenizer");
const { sanitizeFileName, writeToServerDocuments } = require("../../../files");
const { default: slugify } = require("slugify");
const path = require("path");
const fs = require("fs");
const { processSingleFile } = require("../../../../processSingleFile");
const {
WATCH_DIRECTORY,
SUPPORTED_FILETYPE_CONVERTERS,
} = require("../../../constants");
class Page {
/**
*
* @param {number }id
* @param {string }title
* @param {string} created
* @param {string} type
* @param {string} processedBody
* @param {string} url
* @param {number} spaceId
*/
constructor({ id, title, created, type, processedBody, url, spaceId }) {
this.id = id;
this.title = title;
this.url = url;
this.created = created;
this.type = type;
this.processedBody = processedBody;
this.spaceId = spaceId;
}
}
class DrupalWiki {
/**
*
* @param baseUrl
* @param spaceId
* @param accessToken
*/
constructor({ baseUrl, accessToken }) {
this.baseUrl = baseUrl;
this.accessToken = accessToken;
this.storagePath = this.#prepareStoragePath(baseUrl);
}
/**
* Load all pages for the given space, fetching storing each page one by one
* to minimize the memory usage
*
* @param {number} spaceId
* @param {import("../../EncryptionWorker").EncryptionWorker} encryptionWorker
* @returns {Promise<void>}
*/
async loadAndStoreAllPagesForSpace(spaceId, encryptionWorker) {
const pageIndex = await this.#getPageIndexForSpace(spaceId);
for (const pageId of pageIndex) {
try {
const page = await this.loadPage(pageId);
// Pages with an empty body will lead to embedding issues / exceptions
if (page.processedBody.trim() !== "") {
this.#storePage(page, encryptionWorker);
await this.#downloadAndProcessAttachments(page.id);
} else {
console.log(`Skipping page (${page.id}) since it has no content`);
}
} catch (e) {
console.error(
`Could not process DrupalWiki page ${pageId} (skipping and continuing): `
);
console.error(e);
}
}
}
/**
* @param {number} pageId
* @returns {Promise<Page>}
*/
async loadPage(pageId) {
return this.#fetchPage(pageId);
}
/**
* Fetches the page ids for the configured space
* @param {number} spaceId
* @returns{Promise<number[]>} array of pageIds
*/
async #getPageIndexForSpace(spaceId) {
// errors on fetching the pageIndex is fatal, no error handling
let hasNext = true;
let pageIds = [];
let pageNr = 0;
do {
let { isLast, pageIdsForPage } = await this.#getPagesForSpacePaginated(
spaceId,
pageNr
);
hasNext = !isLast;
pageNr++;
if (pageIdsForPage.length) {
pageIds = pageIds.concat(pageIdsForPage);
}
} while (hasNext);
return pageIds;
}
/**
*
* @param {number} pageNr
* @param {number} spaceId
* @returns {Promise<{isLast,pageIds}>}
*/
async #getPagesForSpacePaginated(spaceId, pageNr) {
/*
* {
* content: Page[],
* last: boolean,
* pageable: {
* pageNumber: number
* }
* }
*/
const data = await this._doFetch(
`${this.baseUrl}/api/rest/scope/api/page?size=100&space=${spaceId}&page=${pageNr}`
);
const pageIds = data.content.map((page) => {
return Number(page.id);
});
return {
isLast: data.last,
pageIdsForPage: pageIds,
};
}
/**
* @param pageId
* @returns {Promise<Page>}
*/
async #fetchPage(pageId) {
const data = await this._doFetch(
`${this.baseUrl}/api/rest/scope/api/page/${pageId}`
);
const url = `${this.baseUrl}/node/${data.id}`;
return new Page({
id: data.id,
title: data.title,
created: data.lastModified,
type: data.type,
processedBody: this.#processPageBody({
body: data.body,
title: data.title,
lastModified: data.lastModified,
url: url,
}),
url: url,
});
}
/**
* @param {Page} page
* @param {import("../../EncryptionWorker").EncryptionWorker} encryptionWorker
*/
#storePage(page, encryptionWorker) {
const { hostname } = new URL(this.baseUrl);
// This UUID will ensure that re-importing the same page without any changes will not
// show up (deduplication).
const targetUUID = `${hostname}.${page.spaceId}.${page.id}.${page.created}`;
const wordCount = page.processedBody.split(" ").length;
const tokenCount =
page.processedBody.length > 0
? tokenizeString(page.processedBody).length
: 0;
const data = {
id: targetUUID,
url: `drupalwiki://${page.url}`,
title: page.title,
docAuthor: this.baseUrl,
description: page.title,
docSource: `${this.baseUrl} DrupalWiki`,
chunkSource: this.#generateChunkSource(page.id, encryptionWorker),
published: new Date().toLocaleString(),
wordCount: wordCount,
pageContent: page.processedBody,
token_count_estimate: tokenCount,
};
const fileName = sanitizeFileName(`${slugify(page.title)}-${data.id}`);
console.log(
`[DrupalWiki Loader]: Saving page '${page.title}' (${page.id}) to '${this.storagePath}/${fileName}'`
);
writeToServerDocuments(data, fileName, this.storagePath);
}
/**
* Generate the full chunkSource for a specific Confluence page so that we can resync it later.
* This data is encrypted into a single `payload` query param so we can replay credentials later
* since this was encrypted with the systems persistent password and salt.
* @param {number} pageId
* @param {import("../../EncryptionWorker").EncryptionWorker} encryptionWorker
* @returns {string}
*/
#generateChunkSource(pageId, encryptionWorker) {
const payload = {
baseUrl: this.baseUrl,
pageId: pageId,
accessToken: this.accessToken,
};
return `drupalwiki://${this.baseUrl}?payload=${encryptionWorker.encrypt(
JSON.stringify(payload)
)}`;
}
async _doFetch(url) {
const response = await fetch(url, {
headers: this.#getHeaders(),
});
if (!response.ok) {
throw new Error(`Failed to fetch ${url}: ${response.status}`);
}
return response.json();
}
#getHeaders() {
return {
"Content-Type": "application/json",
Accept: "application/json",
Authorization: `Bearer ${this.accessToken}`,
};
}
#prepareStoragePath(baseUrl) {
const { hostname } = new URL(baseUrl);
const subFolder = slugify(`drupalwiki-${hostname}`).toLowerCase();
const outFolder =
process.env.NODE_ENV === "development"
? path.resolve(
__dirname,
`../../../../server/storage/documents/${subFolder}`
)
: path.resolve(process.env.STORAGE_DIR, `documents/${subFolder}`);
if (!fs.existsSync(outFolder)) {
fs.mkdirSync(outFolder, { recursive: true });
}
return outFolder;
}
/**
* @param {string} body
* @param {string} url
* @param {string} title
* @param {string} lastModified
* @returns {string}
* @private
*/
#processPageBody({ body, url, title, lastModified }) {
// use the title as content if there is none
const textContent = body.trim() !== "" ? body : title;
const plainTextContent = htmlToText(textContent, {
wordwrap: false,
preserveNewlines: true,
});
// preserve structure
const plainBody = plainTextContent.replace(/\n{3,}/g, "\n\n");
// add the link to the document
return `Link/URL: ${url}\n\n${plainBody}`;
}
async #downloadAndProcessAttachments(pageId) {
try {
const data = await this._doFetch(
`${this.baseUrl}/api/rest/scope/api/attachment?pageId=${pageId}&size=2000`
);
const extensionsList = Object.keys(SUPPORTED_FILETYPE_CONVERTERS);
for (const attachment of data.content || data) {
const { fileName, id: attachId } = attachment;
const lowerName = fileName.toLowerCase();
if (!extensionsList.some((ext) => lowerName.endsWith(ext))) {
continue;
}
const downloadUrl = `${this.baseUrl}/api/rest/scope/api/attachment/${attachId}/download`;
const attachmentResponse = await fetch(downloadUrl, {
headers: this.#getHeaders(),
});
if (!attachmentResponse.ok) {
console.log(`Skipping attachment: ${fileName} - Download failed`);
continue;
}
const buffer = await attachmentResponse.arrayBuffer();
const localFilePath = `${WATCH_DIRECTORY}/${fileName}`;
require("fs").writeFileSync(localFilePath, Buffer.from(buffer));
await processSingleFile(fileName);
}
} catch (err) {
console.error(`Fetching/processing attachments failed:`, err);
}
}
}
module.exports = { DrupalWiki };

View File

@@ -0,0 +1,102 @@
/**
* Copyright 2024
*
* Authors:
* - Eugen Mayer (KontextWork)
*/
const { DrupalWiki } = require("./DrupalWiki");
const { validBaseUrl } = require("../../../utils/http");
async function loadAndStoreSpaces(
{ baseUrl = null, spaceIds = null, accessToken = null },
response
) {
if (!baseUrl) {
return {
success: false,
reason:
"Please provide your baseUrl like https://mywiki.drupal-wiki.net.",
};
} else if (!validBaseUrl(baseUrl)) {
return {
success: false,
reason: "Provided base URL is not a valid URL.",
};
}
if (!spaceIds) {
return {
success: false,
reason:
"Please provide a list of spaceIds like 21,56,67 you want to extract",
};
}
if (!accessToken) {
return {
success: false,
reason: "Please provide a REST API-Token.",
};
}
console.log(`-- Working Drupal Wiki ${baseUrl} for spaceIds: ${spaceIds} --`);
const drupalWiki = new DrupalWiki({ baseUrl, accessToken });
const encryptionWorker = response.locals.encryptionWorker;
const spaceIdsArr = spaceIds.split(",").map((idStr) => {
return Number(idStr.trim());
});
for (const spaceId of spaceIdsArr) {
try {
await drupalWiki.loadAndStoreAllPagesForSpace(spaceId, encryptionWorker);
console.log(`--- Finished space ${spaceId} ---`);
} catch (e) {
console.error(e);
return {
success: false,
reason: e.message,
data: {},
};
}
}
console.log(`-- Finished all spaces--`);
return {
success: true,
reason: null,
data: {
spaceIds,
destination: drupalWiki.storagePath,
},
};
}
/**
* Gets the page content from a specific Confluence page, not all pages in a workspace.
* @returns
*/
async function loadPage({ baseUrl, pageId, accessToken }) {
console.log(`-- Working Drupal Wiki Page ${pageId} of ${baseUrl} --`);
const drupalWiki = new DrupalWiki({ baseUrl, accessToken });
try {
const page = await drupalWiki.loadPage(pageId);
return {
success: true,
reason: null,
content: page.processedBody,
};
} catch (e) {
return {
success: false,
reason: `Failed (re)-fetching DrupalWiki page ${pageId} form ${baseUrl}}`,
content: null,
};
}
}
module.exports = {
loadAndStoreSpaces,
loadPage,
};

View File

@@ -12,7 +12,24 @@ function queryParams(request) {
return request.query;
}
/**
* Validates if the provided baseUrl is a valid URL at all.
* - Does not validate if the URL is reachable or accessible.
* - Does not do any further validation of the URL like `validURL` in `utils/url/index.js`
* @param {string} baseUrl
* @returns {boolean}
*/
function validBaseUrl(baseUrl) {
try {
new URL(baseUrl);
return true;
} catch (e) {
return false;
}
}
module.exports = {
reqBody,
queryParams,
validBaseUrl,
};

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.1 KiB

View File

@@ -3,6 +3,7 @@ import GitLab from "./gitlab.svg";
import YouTube from "./youtube.svg";
import Link from "./link.svg";
import Confluence from "./confluence.jpeg";
import DrupalWiki from "./drupalwiki.jpg";
const ConnectorImages = {
github: GitHub,
@@ -10,6 +11,7 @@ const ConnectorImages = {
youtube: YouTube,
websiteDepth: Link,
confluence: Confluence,
drupalwiki: DrupalWiki,
};
export default ConnectorImages;

View File

@@ -0,0 +1,190 @@
/**
* Copyright 2024
*
* Authors:
* - Eugen Mayer (KontextWork)
*/
import { useState } from "react";
import System from "@/models/system";
import showToast from "@/utils/toast";
import { Warning } from "@phosphor-icons/react";
import { Tooltip } from "react-tooltip";
export default function DrupalWikiOptions() {
const [loading, setLoading] = useState(false);
const handleSubmit = async (e) => {
e.preventDefault();
const form = new FormData(e.target);
try {
setLoading(true);
showToast(
"Fetching all pages for the given Drupal Wiki spaces - this may take a while.",
"info",
{
clear: true,
autoClose: false,
}
);
const { data, error } = await System.dataConnectors.drupalwiki.collect({
baseUrl: form.get("baseUrl"),
spaceIds: form.get("spaceIds"),
accessToken: form.get("accessToken"),
});
if (!!error) {
showToast(error, "error", { clear: true });
setLoading(false);
return;
}
showToast(
`Pages collected from Drupal Wiki spaces ${data.spaceIds}. Output folder is ${data.destination}.`,
"success",
{ clear: true }
);
e.target.reset();
setLoading(false);
} catch (e) {
console.error(e);
showToast(e.message, "error", { clear: true });
setLoading(false);
}
};
return (
<div className="flex w-full">
<div className="flex flex-col w-full px-1 md:pb-6 pb-16">
<form className="w-full" onSubmit={handleSubmit}>
<div className="w-full flex flex-col py-2">
<div className="w-full flex flex-col gap-4">
<div className="flex flex-col pr-10">
<div className="flex flex-col gap-y-1 mb-4">
<label className="text-white text-sm font-bold flex gap-x-2 items-center">
<p className="font-bold text-white">Drupal Wiki base URL</p>
</label>
<p className="text-xs font-normal text-theme-text-secondary">
This is the base URL of your&nbsp;
<a
href="https://drupal-wiki.com"
target="_blank"
rel="noopener noreferrer"
className="underline"
>
Drupal Wiki
</a>
.
</p>
</div>
<input
type="url"
name="baseUrl"
className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
placeholder="eg: https://mywiki.drupal-wiki.net, https://drupalwiki.mycompany.tld, etc..."
required={true}
autoComplete="off"
spellCheck={false}
/>
</div>
<div className="flex flex-col pr-10">
<div className="flex flex-col gap-y-1 mb-4">
<label className="text-white text-sm font-bold">
Drupal Wiki Space IDs
</label>
<p className="text-xs font-normal text-theme-text-secondary">
Comma seperated Space IDs you want to extract. See the&nbsp;
<a
href="https://help.drupal-wiki.com/node/606"
target="_blank"
rel="noopener noreferrer"
className="underline"
onClick={(e) => e.stopPropagation()}
>
manual
</a>
&nbsp; on how to retrieve the Space IDs. Be sure that your
'API-Token User' has access to those spaces.
</p>
</div>
<input
type="text"
name="spaceIds"
className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
placeholder="eg: 12,34,69"
required={true}
autoComplete="off"
spellCheck={false}
/>
</div>
<div className="flex flex-col pr-10">
<div className="flex flex-col gap-y-1 mb-4">
<label className="text-white text-sm font-bold flex gap-x-2 items-center">
<p className="font-bold text-white">
Drupal Wiki API Token
</p>
<Warning
size={14}
className="ml-1 text-orange-500 cursor-pointer"
data-tooltip-id="access-token-tooltip"
data-tooltip-place="right"
/>
<Tooltip
delayHide={300}
id="access-token-tooltip"
className="max-w-xs z-99"
clickable={true}
>
<p className="text-sm font-light text-theme-text-primary">
You need to provide an API token for authentication. See
the Drupal Wiki&nbsp;
<a
href="https://help.drupal-wiki.com/node/605#2-Zugriffs-Token-generieren"
target="_blank"
rel="noopener noreferrer"
className="underline"
>
manual
</a>
&nbsp;on how to generate an API-Token for your user.
</p>
</Tooltip>
</label>
<p className="text-xs font-normal text-theme-text-secondary">
Access token for authentication.
</p>
</div>
<input
type="password"
name="accessToken"
className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
placeholder="pat:123"
required={true}
autoComplete="off"
spellCheck={false}
/>
</div>
</div>
</div>
<div className="flex flex-col gap-y-2 w-full pr-10">
<button
type="submit"
disabled={loading}
className="mt-2 w-full justify-center border border-slate-200 px-4 py-2 rounded-lg text-dark-text text-sm font-bold items-center flex gap-x-2 bg-slate-200 hover:bg-slate-300 hover:text-slate-800 disabled:bg-slate-300 disabled:cursor-not-allowed"
>
{loading ? "Collecting pages..." : "Submit"}
</button>
{loading && (
<p className="text-xs text-theme-text-secondary">
Once complete, all pages will be available for embedding into
workspaces.
</p>
)}
</div>
</form>
</div>
</div>
);
}

View File

@@ -5,6 +5,7 @@ import GithubOptions from "./Connectors/Github";
import GitlabOptions from "./Connectors/Gitlab";
import YoutubeOptions from "./Connectors/Youtube";
import ConfluenceOptions from "./Connectors/Confluence";
import DrupalWikiOptions from "./Connectors/DrupalWiki";
import { useState } from "react";
import ConnectorOption from "./ConnectorOption";
import WebsiteDepthOptions from "./Connectors/WebsiteDepth";
@@ -40,6 +41,12 @@ export const getDataConnectors = (t) => ({
description: t("connectors.confluence.description"),
options: <ConfluenceOptions />,
},
drupalwiki: {
name: "Drupal Wiki",
image: ConnectorImages.drupalwiki,
description: "Import Drupal Wiki spaces in a single click.",
options: <DrupalWikiOptions />,
},
});
export default function DataConnectors() {

View File

@@ -15,6 +15,7 @@ import {
YoutubeLogo,
} from "@phosphor-icons/react";
import ConfluenceLogo from "@/media/dataConnectors/confluence.png";
import DrupalWikiLogo from "@/media/dataConnectors/drupalwiki.png";
import { toPercentString } from "@/utils/numbers";
function combineLikeSources(sources) {
@@ -197,14 +198,17 @@ function parseChunkSource({ title = "", chunks = [] }) {
!chunks.length ||
(!chunks[0].chunkSource?.startsWith("link://") &&
!chunks[0].chunkSource?.startsWith("confluence://") &&
!chunks[0].chunkSource?.startsWith("github://"))
!chunks[0].chunkSource?.startsWith("github://") &&
!chunks[0].chunkSource?.startsWith("drupalwiki://"))
)
return nullResponse;
try {
const url = new URL(
chunks[0].chunkSource.split("link://")[1] ||
chunks[0].chunkSource.split("confluence://")[1] ||
chunks[0].chunkSource.split("github://")[1]
chunks[0].chunkSource.split("github://")[1] ||
chunks[0].chunkSource.split("drupalwiki://")[1]
);
let text = url.host + url.pathname;
let icon = "link";
@@ -224,6 +228,11 @@ function parseChunkSource({ title = "", chunks = [] }) {
icon = "confluence";
}
if (url.host.includes("drupal-wiki.net")) {
text = title;
icon = "drupalwiki";
}
return {
isUrl: true,
href: url.toString(),
@@ -239,10 +248,16 @@ const ConfluenceIcon = ({ ...props }) => (
<img src={ConfluenceLogo} {...props} />
);
// Patch to render DrupalWiki icon as a element like we do with Phosphor
const DrupalWikiIcon = ({ ...props }) => (
<img src={DrupalWikiLogo} {...props} />
);
const ICONS = {
file: FileText,
link: Link,
youtube: YoutubeLogo,
github: GithubLogo,
confluence: ConfluenceIcon,
drupalwiki: DrupalWikiIcon,
};

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

View File

@@ -162,6 +162,29 @@ const DataConnector = {
});
},
},
drupalwiki: {
collect: async function ({ baseUrl, spaceIds, accessToken }) {
return await fetch(`${API_BASE}/ext/drupalwiki`, {
method: "POST",
headers: baseHeaders(),
body: JSON.stringify({
baseUrl,
spaceIds,
accessToken,
}),
})
.then((res) => res.json())
.then((res) => {
if (!res.success) throw new Error(res.reason);
return { data: res.data, error: null };
})
.catch((e) => {
console.error(e);
return { data: null, error: e.message };
});
},
},
};
export default DataConnector;

View File

@@ -127,6 +127,27 @@ function extensionEndpoints(app) {
}
}
);
app.post(
"/ext/drupalwiki",
[validatedRequest, flexUserRoleValid([ROLES.admin, ROLES.manager])],
async (request, response) => {
try {
const responseFromProcessor =
await new CollectorApi().forwardExtensionRequest({
endpoint: "/ext/drupalwiki",
method: "POST",
body: request.body,
});
await Telemetry.sendTelemetry("extension_invoked", {
type: "drupalwiki",
});
response.status(200).json(responseFromProcessor);
} catch (e) {
console.error(e);
response.sendStatus(500).end();
}
}
);
}
module.exports = { extensionEndpoints };

View File

@@ -34,7 +34,7 @@ const { DocumentSyncRun } = require('../models/documentSyncRun.js');
continue;
}
if (type === 'link' || type === 'youtube') {
if (['link', 'youtube'].includes(type)) {
const response = await collector.forwardExtensionRequest({
endpoint: "/ext/resync-source-document",
method: "POST",
@@ -46,7 +46,7 @@ const { DocumentSyncRun } = require('../models/documentSyncRun.js');
newContent = response?.content;
}
if (type === 'confluence' || type === 'github' || type === 'gitlab') {
if (['confluence', 'github', 'gitlab', 'drupalwiki'].includes(type)) {
const response = await collector.forwardExtensionRequest({
endpoint: "/ext/resync-source-document",
method: "POST",

View File

@@ -10,7 +10,14 @@ const { Telemetry } = require("./telemetry");
const DocumentSyncQueue = {
featureKey: "experimental_live_file_sync",
// update the validFileTypes and .canWatch properties when adding elements here.
validFileTypes: ["link", "youtube", "confluence", "github", "gitlab"],
validFileTypes: [
"link",
"youtube",
"confluence",
"github",
"gitlab",
"drupalwiki",
],
defaultStaleAfter: 604800000,
maxRepeatFailures: 5, // How many times a run can fail in a row before pruning.
writable: [],
@@ -52,6 +59,7 @@ const DocumentSyncQueue = {
if (chunkSource.startsWith("confluence://")) return true; // If is a confluence document link
if (chunkSource.startsWith("github://")) return true; // If is a GitHub file reference
if (chunkSource.startsWith("gitlab://")) return true; // If is a GitLab file reference
if (chunkSource.startsWith("drupalwiki://")) return true; // If is a DrupalWiki document link
return false;
},