Allow user to specify args for chromium process so they dont need SYS_ADMIN on container. (#4397)

* allow user to specify args for chromium process so they dont need SYS_ADMIN perms

* use arg flag content

* update console outputs
This commit is contained in:
Timothy Carambat
2025-09-17 16:31:08 -07:00
committed by GitHub
parent 7864e1a970
commit 95557ee16f
7 changed files with 29 additions and 2 deletions

View File

@@ -6,7 +6,7 @@ concurrency:
on:
push:
branches: ['upload-ui-ux'] # put your current branch to create a build. Core team only.
branches: ['3999-chromium-flags'] # put your current branch to create a build. Core team only.
paths-ignore:
- '**.md'
- 'cloud-deployments/*'

View File

@@ -5,6 +5,7 @@ const {
const { writeToServerDocuments } = require("../../utils/files");
const { tokenizeString } = require("../../utils/tokenizer");
const { default: slugify } = require("slugify");
const RuntimeSettings = require("../../utils/runtimeSettings");
/**
* Scrape a generic URL and return the content in the specified format
@@ -106,10 +107,12 @@ function validatedHeaders(headers = {}) {
async function getPageContent({ link, captureAs = "text", headers = {} }) {
try {
let pageContents = [];
const runtimeSettings = new RuntimeSettings();
const loader = new PuppeteerWebBaseLoader(link, {
launchOptions: {
headless: "new",
ignoreHTTPSErrors: true,
args: runtimeSettings.get("browserLaunchArgs"),
},
gotoOptions: {
waitUntil: "networkidle2",

View File

@@ -27,6 +27,16 @@ class RuntimeSettings {
// Value must be explicitly "true" or "false" as a string
validate: (value) => String(value) === "true",
},
browserLaunchArgs: {
default: [],
validate: (value) => {
let args = [];
if (Array.isArray(value)) args = value.map((arg) => String(arg.trim()));
if (typeof value === "string")
args = value.split(",").map((arg) => arg.trim());
return args;
},
},
};
constructor() {

View File

@@ -363,4 +363,9 @@ GID='1000'
# Specify the target languages for when using OCR to parse images and PDFs.
# This is a comma separated list of language codes as a string. Unsupported languages will be ignored.
# Default is English. See https://tesseract-ocr.github.io/tessdoc/Data-Files-in-different-versions.html for a list of valid language codes.
# TARGET_OCR_LANG=eng,deu,ita,spa,fra,por,rus,nld,tur,hun,pol,ita,spa,fra,por,rus,nld,tur,hun,pol
# TARGET_OCR_LANG=eng,deu,ita,spa,fra,por,rus,nld,tur,hun,pol,ita,spa,fra,por,rus,nld,tur,hun,pol
# Runtime flags for built-in pupeeteer Chromium instance
# This is only required on Linux machines running AnythingLLM via Docker
# and do not want to use the --cap-add=SYS_ADMIN docker argument
# ANYTHINGLLM_CHROMIUM_ARGS="--no-sandbox,--disable-setuid-sandbox"

View File

@@ -362,3 +362,8 @@ TTS_PROVIDER="native"
# This is a comma separated list of language codes as a string. Unsupported languages will be ignored.
# Default is English. See https://tesseract-ocr.github.io/tessdoc/Data-Files-in-different-versions.html for a list of valid language codes.
# TARGET_OCR_LANG=eng,deu,ita,spa,fra,por,rus,nld,tur,hun,pol,ita,spa,fra,por,rus,nld,tur,hun,pol
# Runtime flags for built-in pupeeteer Chromium instance
# This is only required on Linux machines running AnythingLLM via Docker
# and do not want to use the --cap-add=SYS_ADMIN docker argument
# ANYTHINGLLM_CHROMIUM_ARGS="--no-sandbox,--disable-setuid-sandbox"

View File

@@ -38,6 +38,7 @@ class CollectorApi {
},
runtimeSettings: {
allowAnyIp: process.env.COLLECTOR_ALLOW_ANY_IP ?? "false",
browserLaunchArgs: process.env.ANYTHINGLLM_CHROMIUM_ARGS ?? [],
},
};
}

View File

@@ -1167,6 +1167,9 @@ function dumpENV() {
// Allow disabling of streaming for generic openai
"GENERIC_OPENAI_STREAMING_DISABLED",
// Specify Chromium args for collector
"ANYTHINGLLM_CHROMIUM_ARGS",
];
// Simple sanitization of each value to prevent ENV injection via newline or quote escaping.