mirror of
https://github.com/Mintplex-Labs/anything-llm
synced 2026-04-25 17:15:37 +02:00
Merge branch 'master' into 4792-feat-refactor-workspacepfp-image
This commit is contained in:
2
.github/workflows/dev-build.yaml
vendored
2
.github/workflows/dev-build.yaml
vendored
@@ -6,7 +6,7 @@ concurrency:
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: ['4774-feat-refactor-collector-to-remove-fluent-ffmpeg-package'] # put your current branch to create a build. Core team only.
|
||||
branches: ['upgrade-yt-scraper'] # put your current branch to create a build. Core team only.
|
||||
paths-ignore:
|
||||
- '**.md'
|
||||
- 'cloud-deployments/*'
|
||||
|
||||
@@ -1,33 +0,0 @@
|
||||
process.env.STORAGE_DIR = "test-storage"; // needed for tests to run
|
||||
const { YoutubeTranscript } = require("../../../../../utils/extensions/YoutubeTranscript/YoutubeLoader/youtube-transcript.js");
|
||||
|
||||
describe("YoutubeTranscript", () => {
|
||||
if (process.env.GITHUB_ACTIONS) {
|
||||
console.log("Skipping YoutubeTranscript test in GitHub Actions as the URLs will not resolve.");
|
||||
it('is stubbed in GitHub Actions', () => expect(true).toBe(true));
|
||||
} else {
|
||||
it("should fetch transcript from YouTube video", async () => {
|
||||
const videoId = "BJjsfNO5JTo";
|
||||
const transcript = await YoutubeTranscript.fetchTranscript(videoId, {
|
||||
lang: "en",
|
||||
});
|
||||
|
||||
expect(transcript).toBeDefined();
|
||||
expect(typeof transcript).toBe("string");
|
||||
expect(transcript.length).toBeGreaterThan(0);
|
||||
console.log("First 200 characters:", transcript.substring(0, 200) + "...");
|
||||
}, 30000);
|
||||
|
||||
it("should fetch non asr transcript from YouTube video", async () => {
|
||||
const videoId = "D111ao6wWH0";
|
||||
const transcript = await YoutubeTranscript.fetchTranscript(videoId, {
|
||||
lang: "zh-HK",
|
||||
});
|
||||
|
||||
expect(transcript).toBeDefined();
|
||||
expect(typeof transcript).toBe("string");
|
||||
expect(transcript.length).toBeGreaterThan(0);
|
||||
console.log("First 200 characters:", transcript.substring(0, 200) + "...");
|
||||
}, 30000);
|
||||
}
|
||||
});
|
||||
@@ -44,11 +44,17 @@
|
||||
"uuid": "^9.0.0",
|
||||
"wavefile": "^11.0.0",
|
||||
"winston": "^3.13.0",
|
||||
"youtube-transcript-plus": "^1.1.2",
|
||||
"youtubei.js": "^9.1.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"cross-env": "^7.0.3",
|
||||
"nodemon": "^2.0.22",
|
||||
"prettier": "^2.4.1"
|
||||
},
|
||||
"resolutions": {
|
||||
"string-width": "^4.2.3",
|
||||
"strip-ansi": "^6.0.1",
|
||||
"wrap-ansi": "^7.0.0"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -54,13 +54,15 @@ class YoutubeLoader {
|
||||
source: this.#videoId,
|
||||
};
|
||||
try {
|
||||
const { YoutubeTranscript } = require("./youtube-transcript");
|
||||
transcript = await YoutubeTranscript.fetchTranscript(this.#videoId, {
|
||||
const fetchTranscript = await import("youtube-transcript-plus").then(
|
||||
(module) => module.fetchTranscript
|
||||
);
|
||||
const transcriptSegments = await fetchTranscript(this.#videoId, {
|
||||
lang: this.#language,
|
||||
});
|
||||
if (!transcript) {
|
||||
if (!transcriptSegments || transcriptSegments.length === 0)
|
||||
throw new Error("Transcription not found");
|
||||
}
|
||||
transcript = this.#convertTranscriptSegmentsToText(transcriptSegments);
|
||||
if (this.#addVideoInfo) {
|
||||
const { Innertube } = require("youtubei.js");
|
||||
const youtube = await Innertube.create();
|
||||
@@ -82,6 +84,16 @@ class YoutubeLoader {
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
#convertTranscriptSegmentsToText(transcriptSegments) {
|
||||
return transcriptSegments
|
||||
.map((segment) =>
|
||||
typeof segment === "string" ? segment : segment.text || ""
|
||||
)
|
||||
.join(" ")
|
||||
.replace(/\s+/g, " ")
|
||||
.trim();
|
||||
}
|
||||
}
|
||||
|
||||
module.exports.YoutubeLoader = YoutubeLoader;
|
||||
|
||||
@@ -504,11 +504,6 @@ ansi-regex@^5.0.1:
|
||||
resolved "https://registry.yarnpkg.com/ansi-regex/-/ansi-regex-5.0.1.tgz#082cb2c89c9fe8659a311a53bd6a4dc5301db304"
|
||||
integrity sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==
|
||||
|
||||
ansi-regex@^6.0.1:
|
||||
version "6.2.2"
|
||||
resolved "https://registry.yarnpkg.com/ansi-regex/-/ansi-regex-6.2.2.tgz#60216eea464d864597ce2832000738a0589650c1"
|
||||
integrity sha512-Bq3SmSpyFHaWjPk8If9yc6svM8c56dB5BAtW4Qbw5jHTwwXXcTLoRMkpDJp6VL0XzlWaCHTXrkFURMYmD0sLqg==
|
||||
|
||||
ansi-styles@^4.0.0:
|
||||
version "4.3.0"
|
||||
resolved "https://registry.yarnpkg.com/ansi-styles/-/ansi-styles-4.3.0.tgz#edd803628ae71c04c85ae7a0906edad34b648937"
|
||||
@@ -521,11 +516,6 @@ ansi-styles@^5.0.0:
|
||||
resolved "https://registry.yarnpkg.com/ansi-styles/-/ansi-styles-5.2.0.tgz#07449690ad45777d1924ac2abb2fc8895dba836b"
|
||||
integrity sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==
|
||||
|
||||
ansi-styles@^6.1.0:
|
||||
version "6.2.3"
|
||||
resolved "https://registry.yarnpkg.com/ansi-styles/-/ansi-styles-6.2.3.tgz#c044d5dcc521a076413472597a1acb1f103c4041"
|
||||
integrity sha512-4Dj6M28JB+oAH8kFkTLUo+a2jwOFkuqb3yucU0CANcRRUbxS0cP0nZYCGjcc3BNXwRIsUVmDGgzawme7zvJHvg==
|
||||
|
||||
anymatch@~3.1.2:
|
||||
version "3.1.3"
|
||||
resolved "https://registry.yarnpkg.com/anymatch/-/anymatch-3.1.3.tgz#790c58b19ba1720a84205b57c618d5ad8524973e"
|
||||
@@ -1259,11 +1249,6 @@ dunder-proto@^1.0.1:
|
||||
es-errors "^1.3.0"
|
||||
gopd "^1.2.0"
|
||||
|
||||
eastasianwidth@^0.2.0:
|
||||
version "0.2.0"
|
||||
resolved "https://registry.yarnpkg.com/eastasianwidth/-/eastasianwidth-0.2.0.tgz#696ce2ec0aa0e6ea93a397ffcf24aa7840c827cb"
|
||||
integrity sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA==
|
||||
|
||||
ee-first@1.1.1:
|
||||
version "1.1.1"
|
||||
resolved "https://registry.yarnpkg.com/ee-first/-/ee-first-1.1.1.tgz#590c61156b0ae2f4f0255732a158b266bc56b21d"
|
||||
@@ -1274,11 +1259,6 @@ emoji-regex@^8.0.0:
|
||||
resolved "https://registry.yarnpkg.com/emoji-regex/-/emoji-regex-8.0.0.tgz#e818fd69ce5ccfcb404594f842963bf53164cc37"
|
||||
integrity sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==
|
||||
|
||||
emoji-regex@^9.2.2:
|
||||
version "9.2.2"
|
||||
resolved "https://registry.yarnpkg.com/emoji-regex/-/emoji-regex-9.2.2.tgz#840c8803b0d8047f4ff0cf963176b32d4ef3ed72"
|
||||
integrity sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg==
|
||||
|
||||
enabled@2.0.x:
|
||||
version "2.0.0"
|
||||
resolved "https://registry.yarnpkg.com/enabled/-/enabled-2.0.0.tgz#f9dd92ec2d6f4bbc0d5d1e64e21d61cd4665e7c2"
|
||||
@@ -3403,7 +3383,7 @@ streamx@^2.15.0, streamx@^2.21.0:
|
||||
fast-fifo "^1.3.2"
|
||||
text-decoder "^1.1.0"
|
||||
|
||||
"string-width-cjs@npm:string-width@^4.2.0", string-width@^4.1.0, string-width@^4.2.0, string-width@^4.2.3:
|
||||
"string-width-cjs@npm:string-width@^4.2.0":
|
||||
version "4.2.3"
|
||||
resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.3.tgz#269c7117d27b05ad2e536830a8ec895ef9c6d010"
|
||||
integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==
|
||||
@@ -3412,14 +3392,14 @@ streamx@^2.15.0, streamx@^2.21.0:
|
||||
is-fullwidth-code-point "^3.0.0"
|
||||
strip-ansi "^6.0.1"
|
||||
|
||||
string-width@^5.0.1, string-width@^5.1.2:
|
||||
version "5.1.2"
|
||||
resolved "https://registry.yarnpkg.com/string-width/-/string-width-5.1.2.tgz#14f8daec6d81e7221d2a357e668cab73bdbca794"
|
||||
integrity sha512-HnLOCR3vjcY8beoNLtcjZ5/nxn2afmME6lhrDrebokqMap+XbeW8n9TXpPDOqdGK5qcI3oT0GKTW6wC7EMiVqA==
|
||||
string-width@^4.1.0, string-width@^4.2.0, string-width@^4.2.3, string-width@^5.1.2:
|
||||
version "4.2.3"
|
||||
resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.3.tgz#269c7117d27b05ad2e536830a8ec895ef9c6d010"
|
||||
integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==
|
||||
dependencies:
|
||||
eastasianwidth "^0.2.0"
|
||||
emoji-regex "^9.2.2"
|
||||
strip-ansi "^7.0.1"
|
||||
emoji-regex "^8.0.0"
|
||||
is-fullwidth-code-point "^3.0.0"
|
||||
strip-ansi "^6.0.1"
|
||||
|
||||
string_decoder@^1.1.1, string_decoder@^1.3.0:
|
||||
version "1.3.0"
|
||||
@@ -3435,19 +3415,19 @@ string_decoder@~1.1.1:
|
||||
dependencies:
|
||||
safe-buffer "~5.1.0"
|
||||
|
||||
"strip-ansi-cjs@npm:strip-ansi@^6.0.1", strip-ansi@^6.0.0, strip-ansi@^6.0.1:
|
||||
"strip-ansi-cjs@npm:strip-ansi@^6.0.1":
|
||||
version "6.0.1"
|
||||
resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-6.0.1.tgz#9e26c63d30f53443e9489495b2105d37b67a85d9"
|
||||
integrity sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==
|
||||
dependencies:
|
||||
ansi-regex "^5.0.1"
|
||||
|
||||
strip-ansi@^7.0.1:
|
||||
version "7.1.2"
|
||||
resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-7.1.2.tgz#132875abde678c7ea8d691533f2e7e22bb744dba"
|
||||
integrity sha512-gmBGslpoQJtgnMAvOVqGZpEz9dyoKTCzy2nfz/n8aIFhN/jCE/rCmcxabB6jOOHV+0WNnylOxaxBQPSvcWklhA==
|
||||
strip-ansi@^6.0.0, strip-ansi@^6.0.1, strip-ansi@^7.0.1:
|
||||
version "6.0.1"
|
||||
resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-6.0.1.tgz#9e26c63d30f53443e9489495b2105d37b67a85d9"
|
||||
integrity sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==
|
||||
dependencies:
|
||||
ansi-regex "^6.0.1"
|
||||
ansi-regex "^5.0.1"
|
||||
|
||||
strip-dirs@^2.0.0:
|
||||
version "2.1.0"
|
||||
@@ -3837,7 +3817,7 @@ winston@^3.13.0:
|
||||
triple-beam "^1.3.0"
|
||||
winston-transport "^4.9.0"
|
||||
|
||||
"wrap-ansi-cjs@npm:wrap-ansi@^7.0.0", wrap-ansi@^7.0.0:
|
||||
"wrap-ansi-cjs@npm:wrap-ansi@^7.0.0":
|
||||
version "7.0.0"
|
||||
resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-7.0.0.tgz#67e145cff510a6a6984bdf1152911d69d2eb9e43"
|
||||
integrity sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==
|
||||
@@ -3846,14 +3826,14 @@ winston@^3.13.0:
|
||||
string-width "^4.1.0"
|
||||
strip-ansi "^6.0.0"
|
||||
|
||||
wrap-ansi@^8.1.0:
|
||||
version "8.1.0"
|
||||
resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-8.1.0.tgz#56dc22368ee570face1b49819975d9b9a5ead214"
|
||||
integrity sha512-si7QWI6zUMq56bESFvagtmzMdGOtoxfR+Sez11Mobfc7tm+VkUckk9bW2UeffTGVUbOksxmSw0AA2gs8g71NCQ==
|
||||
wrap-ansi@^7.0.0, wrap-ansi@^8.1.0:
|
||||
version "7.0.0"
|
||||
resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-7.0.0.tgz#67e145cff510a6a6984bdf1152911d69d2eb9e43"
|
||||
integrity sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==
|
||||
dependencies:
|
||||
ansi-styles "^6.1.0"
|
||||
string-width "^5.0.1"
|
||||
strip-ansi "^7.0.1"
|
||||
ansi-styles "^4.0.0"
|
||||
string-width "^4.1.0"
|
||||
strip-ansi "^6.0.0"
|
||||
|
||||
wrappy@1:
|
||||
version "1.0.2"
|
||||
@@ -3928,6 +3908,11 @@ yauzl@^2.10.0, yauzl@^2.4.2:
|
||||
buffer-crc32 "~0.2.3"
|
||||
fd-slicer "~1.1.0"
|
||||
|
||||
youtube-transcript-plus@^1.1.2:
|
||||
version "1.1.2"
|
||||
resolved "https://registry.yarnpkg.com/youtube-transcript-plus/-/youtube-transcript-plus-1.1.2.tgz#f86851852a056088c11f4f6523ab0f8dba7d9711"
|
||||
integrity sha512-bLlqkA6gVVUorZpcc+THuECXyAwOpnHqW2lOav9g6gGovxAP3FCD8s9GBFVjmSl3cWWwwPPXtG/zY1nD+GvQ7A==
|
||||
|
||||
youtubei.js@^9.1.0:
|
||||
version "9.4.0"
|
||||
resolved "https://registry.yarnpkg.com/youtubei.js/-/youtubei.js-9.4.0.tgz#ccccaf4a295b96e3e17134a66730bbc82461594b"
|
||||
|
||||
@@ -38,7 +38,7 @@ async function recoverAccount(username = "", recoveryCodes = []) {
|
||||
// because this is a user who has not logged out and back in since upgrade.
|
||||
const allUserHashes = await RecoveryCode.hashesForUser(user.id);
|
||||
if (allUserHashes.length < 4)
|
||||
return { success: false, error: "Invalid recovery codes" };
|
||||
return { success: false, error: "Invalid recovery codes." };
|
||||
|
||||
// If they tried to send more than two unique codes, we only take the first two
|
||||
const uniqueRecoveryCodes = [...new Set(recoveryCodes)]
|
||||
@@ -55,7 +55,7 @@ async function recoverAccount(username = "", recoveryCodes = []) {
|
||||
});
|
||||
return valid;
|
||||
});
|
||||
if (!validCodes) return { success: false, error: "Invalid recovery codes" };
|
||||
if (!validCodes) return { success: false, error: "Invalid recovery codes." };
|
||||
|
||||
const { passwordResetToken, error } = await PasswordResetToken.create(
|
||||
user.id
|
||||
|
||||
Reference in New Issue
Block a user