Merge branch 'master' into 4792-feat-refactor-workspacepfp-image

This commit is contained in:
shatfield4
2026-01-05 10:57:02 -08:00
6 changed files with 52 additions and 82 deletions

View File

@@ -6,7 +6,7 @@ concurrency:
on:
push:
branches: ['4774-feat-refactor-collector-to-remove-fluent-ffmpeg-package'] # put your current branch to create a build. Core team only.
branches: ['upgrade-yt-scraper'] # put your current branch to create a build. Core team only.
paths-ignore:
- '**.md'
- 'cloud-deployments/*'

View File

@@ -1,33 +0,0 @@
process.env.STORAGE_DIR = "test-storage"; // needed for tests to run
const { YoutubeTranscript } = require("../../../../../utils/extensions/YoutubeTranscript/YoutubeLoader/youtube-transcript.js");
describe("YoutubeTranscript", () => {
if (process.env.GITHUB_ACTIONS) {
console.log("Skipping YoutubeTranscript test in GitHub Actions as the URLs will not resolve.");
it('is stubbed in GitHub Actions', () => expect(true).toBe(true));
} else {
it("should fetch transcript from YouTube video", async () => {
const videoId = "BJjsfNO5JTo";
const transcript = await YoutubeTranscript.fetchTranscript(videoId, {
lang: "en",
});
expect(transcript).toBeDefined();
expect(typeof transcript).toBe("string");
expect(transcript.length).toBeGreaterThan(0);
console.log("First 200 characters:", transcript.substring(0, 200) + "...");
}, 30000);
it("should fetch non asr transcript from YouTube video", async () => {
const videoId = "D111ao6wWH0";
const transcript = await YoutubeTranscript.fetchTranscript(videoId, {
lang: "zh-HK",
});
expect(transcript).toBeDefined();
expect(typeof transcript).toBe("string");
expect(transcript.length).toBeGreaterThan(0);
console.log("First 200 characters:", transcript.substring(0, 200) + "...");
}, 30000);
}
});

View File

@@ -44,11 +44,17 @@
"uuid": "^9.0.0",
"wavefile": "^11.0.0",
"winston": "^3.13.0",
"youtube-transcript-plus": "^1.1.2",
"youtubei.js": "^9.1.0"
},
"devDependencies": {
"cross-env": "^7.0.3",
"nodemon": "^2.0.22",
"prettier": "^2.4.1"
},
"resolutions": {
"string-width": "^4.2.3",
"strip-ansi": "^6.0.1",
"wrap-ansi": "^7.0.0"
}
}

View File

@@ -54,13 +54,15 @@ class YoutubeLoader {
source: this.#videoId,
};
try {
const { YoutubeTranscript } = require("./youtube-transcript");
transcript = await YoutubeTranscript.fetchTranscript(this.#videoId, {
const fetchTranscript = await import("youtube-transcript-plus").then(
(module) => module.fetchTranscript
);
const transcriptSegments = await fetchTranscript(this.#videoId, {
lang: this.#language,
});
if (!transcript) {
if (!transcriptSegments || transcriptSegments.length === 0)
throw new Error("Transcription not found");
}
transcript = this.#convertTranscriptSegmentsToText(transcriptSegments);
if (this.#addVideoInfo) {
const { Innertube } = require("youtubei.js");
const youtube = await Innertube.create();
@@ -82,6 +84,16 @@ class YoutubeLoader {
},
];
}
#convertTranscriptSegmentsToText(transcriptSegments) {
return transcriptSegments
.map((segment) =>
typeof segment === "string" ? segment : segment.text || ""
)
.join(" ")
.replace(/\s+/g, " ")
.trim();
}
}
module.exports.YoutubeLoader = YoutubeLoader;

View File

@@ -504,11 +504,6 @@ ansi-regex@^5.0.1:
resolved "https://registry.yarnpkg.com/ansi-regex/-/ansi-regex-5.0.1.tgz#082cb2c89c9fe8659a311a53bd6a4dc5301db304"
integrity sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==
ansi-regex@^6.0.1:
version "6.2.2"
resolved "https://registry.yarnpkg.com/ansi-regex/-/ansi-regex-6.2.2.tgz#60216eea464d864597ce2832000738a0589650c1"
integrity sha512-Bq3SmSpyFHaWjPk8If9yc6svM8c56dB5BAtW4Qbw5jHTwwXXcTLoRMkpDJp6VL0XzlWaCHTXrkFURMYmD0sLqg==
ansi-styles@^4.0.0:
version "4.3.0"
resolved "https://registry.yarnpkg.com/ansi-styles/-/ansi-styles-4.3.0.tgz#edd803628ae71c04c85ae7a0906edad34b648937"
@@ -521,11 +516,6 @@ ansi-styles@^5.0.0:
resolved "https://registry.yarnpkg.com/ansi-styles/-/ansi-styles-5.2.0.tgz#07449690ad45777d1924ac2abb2fc8895dba836b"
integrity sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==
ansi-styles@^6.1.0:
version "6.2.3"
resolved "https://registry.yarnpkg.com/ansi-styles/-/ansi-styles-6.2.3.tgz#c044d5dcc521a076413472597a1acb1f103c4041"
integrity sha512-4Dj6M28JB+oAH8kFkTLUo+a2jwOFkuqb3yucU0CANcRRUbxS0cP0nZYCGjcc3BNXwRIsUVmDGgzawme7zvJHvg==
anymatch@~3.1.2:
version "3.1.3"
resolved "https://registry.yarnpkg.com/anymatch/-/anymatch-3.1.3.tgz#790c58b19ba1720a84205b57c618d5ad8524973e"
@@ -1259,11 +1249,6 @@ dunder-proto@^1.0.1:
es-errors "^1.3.0"
gopd "^1.2.0"
eastasianwidth@^0.2.0:
version "0.2.0"
resolved "https://registry.yarnpkg.com/eastasianwidth/-/eastasianwidth-0.2.0.tgz#696ce2ec0aa0e6ea93a397ffcf24aa7840c827cb"
integrity sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA==
ee-first@1.1.1:
version "1.1.1"
resolved "https://registry.yarnpkg.com/ee-first/-/ee-first-1.1.1.tgz#590c61156b0ae2f4f0255732a158b266bc56b21d"
@@ -1274,11 +1259,6 @@ emoji-regex@^8.0.0:
resolved "https://registry.yarnpkg.com/emoji-regex/-/emoji-regex-8.0.0.tgz#e818fd69ce5ccfcb404594f842963bf53164cc37"
integrity sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==
emoji-regex@^9.2.2:
version "9.2.2"
resolved "https://registry.yarnpkg.com/emoji-regex/-/emoji-regex-9.2.2.tgz#840c8803b0d8047f4ff0cf963176b32d4ef3ed72"
integrity sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg==
enabled@2.0.x:
version "2.0.0"
resolved "https://registry.yarnpkg.com/enabled/-/enabled-2.0.0.tgz#f9dd92ec2d6f4bbc0d5d1e64e21d61cd4665e7c2"
@@ -3403,7 +3383,7 @@ streamx@^2.15.0, streamx@^2.21.0:
fast-fifo "^1.3.2"
text-decoder "^1.1.0"
"string-width-cjs@npm:string-width@^4.2.0", string-width@^4.1.0, string-width@^4.2.0, string-width@^4.2.3:
"string-width-cjs@npm:string-width@^4.2.0":
version "4.2.3"
resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.3.tgz#269c7117d27b05ad2e536830a8ec895ef9c6d010"
integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==
@@ -3412,14 +3392,14 @@ streamx@^2.15.0, streamx@^2.21.0:
is-fullwidth-code-point "^3.0.0"
strip-ansi "^6.0.1"
string-width@^5.0.1, string-width@^5.1.2:
version "5.1.2"
resolved "https://registry.yarnpkg.com/string-width/-/string-width-5.1.2.tgz#14f8daec6d81e7221d2a357e668cab73bdbca794"
integrity sha512-HnLOCR3vjcY8beoNLtcjZ5/nxn2afmME6lhrDrebokqMap+XbeW8n9TXpPDOqdGK5qcI3oT0GKTW6wC7EMiVqA==
string-width@^4.1.0, string-width@^4.2.0, string-width@^4.2.3, string-width@^5.1.2:
version "4.2.3"
resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.3.tgz#269c7117d27b05ad2e536830a8ec895ef9c6d010"
integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==
dependencies:
eastasianwidth "^0.2.0"
emoji-regex "^9.2.2"
strip-ansi "^7.0.1"
emoji-regex "^8.0.0"
is-fullwidth-code-point "^3.0.0"
strip-ansi "^6.0.1"
string_decoder@^1.1.1, string_decoder@^1.3.0:
version "1.3.0"
@@ -3435,19 +3415,19 @@ string_decoder@~1.1.1:
dependencies:
safe-buffer "~5.1.0"
"strip-ansi-cjs@npm:strip-ansi@^6.0.1", strip-ansi@^6.0.0, strip-ansi@^6.0.1:
"strip-ansi-cjs@npm:strip-ansi@^6.0.1":
version "6.0.1"
resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-6.0.1.tgz#9e26c63d30f53443e9489495b2105d37b67a85d9"
integrity sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==
dependencies:
ansi-regex "^5.0.1"
strip-ansi@^7.0.1:
version "7.1.2"
resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-7.1.2.tgz#132875abde678c7ea8d691533f2e7e22bb744dba"
integrity sha512-gmBGslpoQJtgnMAvOVqGZpEz9dyoKTCzy2nfz/n8aIFhN/jCE/rCmcxabB6jOOHV+0WNnylOxaxBQPSvcWklhA==
strip-ansi@^6.0.0, strip-ansi@^6.0.1, strip-ansi@^7.0.1:
version "6.0.1"
resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-6.0.1.tgz#9e26c63d30f53443e9489495b2105d37b67a85d9"
integrity sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==
dependencies:
ansi-regex "^6.0.1"
ansi-regex "^5.0.1"
strip-dirs@^2.0.0:
version "2.1.0"
@@ -3837,7 +3817,7 @@ winston@^3.13.0:
triple-beam "^1.3.0"
winston-transport "^4.9.0"
"wrap-ansi-cjs@npm:wrap-ansi@^7.0.0", wrap-ansi@^7.0.0:
"wrap-ansi-cjs@npm:wrap-ansi@^7.0.0":
version "7.0.0"
resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-7.0.0.tgz#67e145cff510a6a6984bdf1152911d69d2eb9e43"
integrity sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==
@@ -3846,14 +3826,14 @@ winston@^3.13.0:
string-width "^4.1.0"
strip-ansi "^6.0.0"
wrap-ansi@^8.1.0:
version "8.1.0"
resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-8.1.0.tgz#56dc22368ee570face1b49819975d9b9a5ead214"
integrity sha512-si7QWI6zUMq56bESFvagtmzMdGOtoxfR+Sez11Mobfc7tm+VkUckk9bW2UeffTGVUbOksxmSw0AA2gs8g71NCQ==
wrap-ansi@^7.0.0, wrap-ansi@^8.1.0:
version "7.0.0"
resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-7.0.0.tgz#67e145cff510a6a6984bdf1152911d69d2eb9e43"
integrity sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==
dependencies:
ansi-styles "^6.1.0"
string-width "^5.0.1"
strip-ansi "^7.0.1"
ansi-styles "^4.0.0"
string-width "^4.1.0"
strip-ansi "^6.0.0"
wrappy@1:
version "1.0.2"
@@ -3928,6 +3908,11 @@ yauzl@^2.10.0, yauzl@^2.4.2:
buffer-crc32 "~0.2.3"
fd-slicer "~1.1.0"
youtube-transcript-plus@^1.1.2:
version "1.1.2"
resolved "https://registry.yarnpkg.com/youtube-transcript-plus/-/youtube-transcript-plus-1.1.2.tgz#f86851852a056088c11f4f6523ab0f8dba7d9711"
integrity sha512-bLlqkA6gVVUorZpcc+THuECXyAwOpnHqW2lOav9g6gGovxAP3FCD8s9GBFVjmSl3cWWwwPPXtG/zY1nD+GvQ7A==
youtubei.js@^9.1.0:
version "9.4.0"
resolved "https://registry.yarnpkg.com/youtubei.js/-/youtubei.js-9.4.0.tgz#ccccaf4a295b96e3e17134a66730bbc82461594b"

View File

@@ -38,7 +38,7 @@ async function recoverAccount(username = "", recoveryCodes = []) {
// because this is a user who has not logged out and back in since upgrade.
const allUserHashes = await RecoveryCode.hashesForUser(user.id);
if (allUserHashes.length < 4)
return { success: false, error: "Invalid recovery codes" };
return { success: false, error: "Invalid recovery codes." };
// If they tried to send more than two unique codes, we only take the first two
const uniqueRecoveryCodes = [...new Set(recoveryCodes)]
@@ -55,7 +55,7 @@ async function recoverAccount(username = "", recoveryCodes = []) {
});
return valid;
});
if (!validCodes) return { success: false, error: "Invalid recovery codes" };
if (!validCodes) return { success: false, error: "Invalid recovery codes." };
const { passwordResetToken, error } = await PasswordResetToken.create(
user.id