mirror of
https://github.com/Mintplex-Labs/anything-llm
synced 2026-04-25 17:15:37 +02:00
Allow user to specify args for chromium process so they dont need SYS_ADMIN on container. (#4397)
* allow user to specify args for chromium process so they dont need SYS_ADMIN perms * use arg flag content * update console outputs
This commit is contained in:
2
.github/workflows/dev-build.yaml
vendored
2
.github/workflows/dev-build.yaml
vendored
@@ -6,7 +6,7 @@ concurrency:
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: ['upload-ui-ux'] # put your current branch to create a build. Core team only.
|
||||
branches: ['3999-chromium-flags'] # put your current branch to create a build. Core team only.
|
||||
paths-ignore:
|
||||
- '**.md'
|
||||
- 'cloud-deployments/*'
|
||||
|
||||
@@ -5,6 +5,7 @@ const {
|
||||
const { writeToServerDocuments } = require("../../utils/files");
|
||||
const { tokenizeString } = require("../../utils/tokenizer");
|
||||
const { default: slugify } = require("slugify");
|
||||
const RuntimeSettings = require("../../utils/runtimeSettings");
|
||||
|
||||
/**
|
||||
* Scrape a generic URL and return the content in the specified format
|
||||
@@ -106,10 +107,12 @@ function validatedHeaders(headers = {}) {
|
||||
async function getPageContent({ link, captureAs = "text", headers = {} }) {
|
||||
try {
|
||||
let pageContents = [];
|
||||
const runtimeSettings = new RuntimeSettings();
|
||||
const loader = new PuppeteerWebBaseLoader(link, {
|
||||
launchOptions: {
|
||||
headless: "new",
|
||||
ignoreHTTPSErrors: true,
|
||||
args: runtimeSettings.get("browserLaunchArgs"),
|
||||
},
|
||||
gotoOptions: {
|
||||
waitUntil: "networkidle2",
|
||||
|
||||
@@ -27,6 +27,16 @@ class RuntimeSettings {
|
||||
// Value must be explicitly "true" or "false" as a string
|
||||
validate: (value) => String(value) === "true",
|
||||
},
|
||||
browserLaunchArgs: {
|
||||
default: [],
|
||||
validate: (value) => {
|
||||
let args = [];
|
||||
if (Array.isArray(value)) args = value.map((arg) => String(arg.trim()));
|
||||
if (typeof value === "string")
|
||||
args = value.split(",").map((arg) => arg.trim());
|
||||
return args;
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
constructor() {
|
||||
|
||||
@@ -363,4 +363,9 @@ GID='1000'
|
||||
# Specify the target languages for when using OCR to parse images and PDFs.
|
||||
# This is a comma separated list of language codes as a string. Unsupported languages will be ignored.
|
||||
# Default is English. See https://tesseract-ocr.github.io/tessdoc/Data-Files-in-different-versions.html for a list of valid language codes.
|
||||
# TARGET_OCR_LANG=eng,deu,ita,spa,fra,por,rus,nld,tur,hun,pol,ita,spa,fra,por,rus,nld,tur,hun,pol
|
||||
# TARGET_OCR_LANG=eng,deu,ita,spa,fra,por,rus,nld,tur,hun,pol,ita,spa,fra,por,rus,nld,tur,hun,pol
|
||||
|
||||
# Runtime flags for built-in pupeeteer Chromium instance
|
||||
# This is only required on Linux machines running AnythingLLM via Docker
|
||||
# and do not want to use the --cap-add=SYS_ADMIN docker argument
|
||||
# ANYTHINGLLM_CHROMIUM_ARGS="--no-sandbox,--disable-setuid-sandbox"
|
||||
@@ -362,3 +362,8 @@ TTS_PROVIDER="native"
|
||||
# This is a comma separated list of language codes as a string. Unsupported languages will be ignored.
|
||||
# Default is English. See https://tesseract-ocr.github.io/tessdoc/Data-Files-in-different-versions.html for a list of valid language codes.
|
||||
# TARGET_OCR_LANG=eng,deu,ita,spa,fra,por,rus,nld,tur,hun,pol,ita,spa,fra,por,rus,nld,tur,hun,pol
|
||||
|
||||
# Runtime flags for built-in pupeeteer Chromium instance
|
||||
# This is only required on Linux machines running AnythingLLM via Docker
|
||||
# and do not want to use the --cap-add=SYS_ADMIN docker argument
|
||||
# ANYTHINGLLM_CHROMIUM_ARGS="--no-sandbox,--disable-setuid-sandbox"
|
||||
@@ -38,6 +38,7 @@ class CollectorApi {
|
||||
},
|
||||
runtimeSettings: {
|
||||
allowAnyIp: process.env.COLLECTOR_ALLOW_ANY_IP ?? "false",
|
||||
browserLaunchArgs: process.env.ANYTHINGLLM_CHROMIUM_ARGS ?? [],
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1167,6 +1167,9 @@ function dumpENV() {
|
||||
|
||||
// Allow disabling of streaming for generic openai
|
||||
"GENERIC_OPENAI_STREAMING_DISABLED",
|
||||
|
||||
// Specify Chromium args for collector
|
||||
"ANYTHINGLLM_CHROMIUM_ARGS",
|
||||
];
|
||||
|
||||
// Simple sanitization of each value to prevent ENV injection via newline or quote escaping.
|
||||
|
||||
Reference in New Issue
Block a user