diff --git a/package-lock.json b/package-lock.json index a43b49d..03fde30 100644 --- a/package-lock.json +++ b/package-lock.json @@ -14,6 +14,8 @@ "@as-integrations/express5": "^1.1.2", "@aws-sdk/client-s3": "^3.1000.0", "@aws-sdk/s3-request-presigner": "^3.1000.0", + "@langchain/core": "^1.1.29", + "@langchain/google-genai": "^2.1.22", "@nestjs/apollo": "^13.2.4", "@nestjs/common": "^11.1.14", "@nestjs/config": "^4.0.3", @@ -22,11 +24,13 @@ "@nestjs/platform-express": "^11.0.1", "@prisma/adapter-pg": "^7.4.2", "@prisma/client": "^7.4.2", + "cheerio": "^1.2.0", "class-transformer": "^0.5.1", "class-validator": "^0.15.1", "graphql": "^16.13.0", "graphql-type-json": "^0.3.2", "graphql-upload-ts": "^2.1.3", + "pdf-parse": "^2.4.5", "pg": "^8.19.0", "reflect-metadata": "^0.2.2", "rxjs": "^7.8.2", @@ -1889,6 +1893,12 @@ "url": "https://github.com/sponsors/Borewit" } }, + "node_modules/@cfworker/json-schema": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/@cfworker/json-schema/-/json-schema-4.1.1.tgz", + "integrity": "sha512-gAmrUZSGtKc3AiBL71iNWxDsyUC5uMaKKGdvzYsBoTW/xi42JQHl7eKV2OYzCUqvc+D2RCcf7EXY2iCyFIk6og==", + "license": "MIT" + }, "node_modules/@chevrotain/cst-dts-gen": { "version": "10.5.0", "resolved": "https://registry.npmjs.org/@chevrotain/cst-dts-gen/-/cst-dts-gen-10.5.0.tgz", @@ -2182,6 +2192,15 @@ "node": "^18.18.0 || ^20.9.0 || >=21.1.0" } }, + "node_modules/@google/generative-ai": { + "version": "0.24.1", + "resolved": "https://registry.npmjs.org/@google/generative-ai/-/generative-ai-0.24.1.tgz", + "integrity": "sha512-MqO+MLfM6kjxcKoy0p1wRzG3b4ZZXtPI+z2IE26UogS2Cm/XHO+7gGRBh6gcJsOiIVoH93UwKvW4HdgiOZCy9Q==", + "license": "Apache-2.0", + "engines": { + "node": ">=18.0.0" + } + }, "node_modules/@graphql-tools/merge": { "version": "9.1.7", "resolved": "https://registry.npmjs.org/@graphql-tools/merge/-/merge-9.1.7.tgz", @@ -3352,6 +3371,67 @@ "@jridgewell/sourcemap-codec": "^1.4.14" } }, + "node_modules/@langchain/core": { + "version": "1.1.29", + "resolved": "https://registry.npmjs.org/@langchain/core/-/core-1.1.29.tgz", + "integrity": "sha512-BPoegTtIdZX4gl2kxcSXAlLrrJFl1cxeRsk9DM/wlIuvyPrFwjWqrEK5NwF5diDt5XSArhQxIFaifGAl4F7fgw==", + "license": "MIT", + "dependencies": { + "@cfworker/json-schema": "^4.0.2", + "ansi-styles": "^5.0.0", + "camelcase": "6", + "decamelize": "1.2.0", + "js-tiktoken": "^1.0.12", + "langsmith": ">=0.5.0 <1.0.0", + "mustache": "^4.2.0", + "p-queue": "^6.6.2", + "uuid": "^11.1.0", + "zod": "^3.25.76 || ^4" + }, + "engines": { + "node": ">=20" + } + }, + "node_modules/@langchain/core/node_modules/ansi-styles": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz", + "integrity": "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==", + "license": "MIT", + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/@langchain/core/node_modules/camelcase": { + "version": "6.3.0", + "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-6.3.0.tgz", + "integrity": "sha512-Gmy6FhYlCY7uOElZUSbxo2UCDH8owEk996gkbrpsgGtrJLM3J7jGxl9Ic7Qwwj4ivOE5AWZWRMecDdF7hqGjFA==", + "license": "MIT", + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/@langchain/google-genai": { + "version": "2.1.22", + "resolved": "https://registry.npmjs.org/@langchain/google-genai/-/google-genai-2.1.22.tgz", + "integrity": "sha512-01dplEIhEgwdY5dGwjWQXl5jhaAcz8DDXIlyCCms1QnR3HhVckZY1YrpAUiY1atoOcPXN+6/DrSPWUAM0mZVUQ==", + "license": "MIT", + "dependencies": { + "@google/generative-ai": "^0.24.0", + "uuid": "^11.1.0" + }, + "engines": { + "node": ">=20" + }, + "peerDependencies": { + "@langchain/core": "^1.1.29" + } + }, "node_modules/@lukeed/csprng": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/@lukeed/csprng/-/csprng-1.1.0.tgz", @@ -3375,6 +3455,190 @@ "node": ">=16" } }, + "node_modules/@napi-rs/canvas": { + "version": "0.1.80", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas/-/canvas-0.1.80.tgz", + "integrity": "sha512-DxuT1ClnIPts1kQx8FBmkk4BQDTfI5kIzywAaMjQSXfNnra5UFU9PwurXrl+Je3bJ6BGsp/zmshVVFbCmyI+ww==", + "license": "MIT", + "workspaces": [ + "e2e/*" + ], + "engines": { + "node": ">= 10" + }, + "optionalDependencies": { + "@napi-rs/canvas-android-arm64": "0.1.80", + "@napi-rs/canvas-darwin-arm64": "0.1.80", + "@napi-rs/canvas-darwin-x64": "0.1.80", + "@napi-rs/canvas-linux-arm-gnueabihf": "0.1.80", + "@napi-rs/canvas-linux-arm64-gnu": "0.1.80", + "@napi-rs/canvas-linux-arm64-musl": "0.1.80", + "@napi-rs/canvas-linux-riscv64-gnu": "0.1.80", + "@napi-rs/canvas-linux-x64-gnu": "0.1.80", + "@napi-rs/canvas-linux-x64-musl": "0.1.80", + "@napi-rs/canvas-win32-x64-msvc": "0.1.80" + } + }, + "node_modules/@napi-rs/canvas-android-arm64": { + "version": "0.1.80", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-android-arm64/-/canvas-android-arm64-0.1.80.tgz", + "integrity": "sha512-sk7xhN/MoXeuExlggf91pNziBxLPVUqF2CAVnB57KLG/pz7+U5TKG8eXdc3pm0d7Od0WreB6ZKLj37sX9muGOQ==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-darwin-arm64": { + "version": "0.1.80", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-darwin-arm64/-/canvas-darwin-arm64-0.1.80.tgz", + "integrity": "sha512-O64APRTXRUiAz0P8gErkfEr3lipLJgM6pjATwavZ22ebhjYl/SUbpgM0xcWPQBNMP1n29afAC/Us5PX1vg+JNQ==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-darwin-x64": { + "version": "0.1.80", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-darwin-x64/-/canvas-darwin-x64-0.1.80.tgz", + "integrity": "sha512-FqqSU7qFce0Cp3pwnTjVkKjjOtxMqRe6lmINxpIZYaZNnVI0H5FtsaraZJ36SiTHNjZlUB69/HhxNDT1Aaa9vA==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-linux-arm-gnueabihf": { + "version": "0.1.80", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm-gnueabihf/-/canvas-linux-arm-gnueabihf-0.1.80.tgz", + "integrity": "sha512-eyWz0ddBDQc7/JbAtY4OtZ5SpK8tR4JsCYEZjCE3dI8pqoWUC8oMwYSBGCYfsx2w47cQgQCgMVRVTFiiO38hHQ==", + "cpu": [ + "arm" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-linux-arm64-gnu": { + "version": "0.1.80", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm64-gnu/-/canvas-linux-arm64-gnu-0.1.80.tgz", + "integrity": "sha512-qwA63t8A86bnxhuA/GwOkK3jvb+XTQaTiVML0vAWoHyoZYTjNs7BzoOONDgTnNtr8/yHrq64XXzUoLqDzU+Uuw==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-linux-arm64-musl": { + "version": "0.1.80", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm64-musl/-/canvas-linux-arm64-musl-0.1.80.tgz", + "integrity": "sha512-1XbCOz/ymhj24lFaIXtWnwv/6eFHXDrjP0jYkc6iHQ9q8oXKzUX1Lc6bu+wuGiLhGh2GS/2JlfORC5ZcXimRcg==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-linux-riscv64-gnu": { + "version": "0.1.80", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-riscv64-gnu/-/canvas-linux-riscv64-gnu-0.1.80.tgz", + "integrity": "sha512-XTzR125w5ZMs0lJcxRlS1K3P5RaZ9RmUsPtd1uGt+EfDyYMu4c6SEROYsxyatbbu/2+lPe7MPHOO/0a0x7L/gw==", + "cpu": [ + "riscv64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-linux-x64-gnu": { + "version": "0.1.80", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-x64-gnu/-/canvas-linux-x64-gnu-0.1.80.tgz", + "integrity": "sha512-BeXAmhKg1kX3UCrJsYbdQd3hIMDH/K6HnP/pG2LuITaXhXBiNdh//TVVVVCBbJzVQaV5gK/4ZOCMrQW9mvuTqA==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-linux-x64-musl": { + "version": "0.1.80", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-x64-musl/-/canvas-linux-x64-musl-0.1.80.tgz", + "integrity": "sha512-x0XvZWdHbkgdgucJsRxprX/4o4sEed7qo9rCQA9ugiS9qE2QvP0RIiEugtZhfLH3cyI+jIRFJHV4Fuz+1BHHMg==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-win32-x64-msvc": { + "version": "0.1.80", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-win32-x64-msvc/-/canvas-win32-x64-msvc-0.1.80.tgz", + "integrity": "sha512-Z8jPsM6df5V8B1HrCHB05+bDiCxjE9QA//3YrkKIdVDEwn5RKaqOxCJDRJkl48cJbylcrJbW4HxZbTte8juuPg==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 10" + } + }, "node_modules/@napi-rs/wasm-runtime": { "version": "0.2.12", "resolved": "https://registry.npmjs.org/@napi-rs/wasm-runtime/-/wasm-runtime-0.2.12.tgz", @@ -5481,6 +5745,12 @@ "@types/superagent": "^8.1.0" } }, + "node_modules/@types/uuid": { + "version": "10.0.0", + "resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-10.0.0.tgz", + "integrity": "sha512-7gqG38EyHgyP1S+7+xomFtL+ZNHcKv6DwNaCZmJmo1vgMugyF3TCnXVg4t1uk89mLNwnLtnY3TpOpCOyp1/xHQ==", + "license": "MIT" + }, "node_modules/@types/validator": { "version": "13.15.10", "resolved": "https://registry.npmjs.org/@types/validator/-/validator-13.15.10.tgz", @@ -6645,7 +6915,6 @@ "version": "1.5.1", "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz", "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==", - "dev": true, "funding": [ { "type": "github", @@ -6711,6 +6980,12 @@ "url": "https://opencollective.com/express" } }, + "node_modules/boolbase": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz", + "integrity": "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==", + "license": "ISC" + }, "node_modules/bowser": { "version": "2.14.1", "resolved": "https://registry.npmjs.org/bowser/-/bowser-2.14.1.tgz", @@ -6999,6 +7274,48 @@ "dev": true, "license": "MIT" }, + "node_modules/cheerio": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/cheerio/-/cheerio-1.2.0.tgz", + "integrity": "sha512-WDrybc/gKFpTYQutKIK6UvfcuxijIZfMfXaYm8NMsPQxSYvf+13fXUJ4rztGGbJcBQ/GF55gvrZ0Bc0bj/mqvg==", + "license": "MIT", + "dependencies": { + "cheerio-select": "^2.1.0", + "dom-serializer": "^2.0.0", + "domhandler": "^5.0.3", + "domutils": "^3.2.2", + "encoding-sniffer": "^0.2.1", + "htmlparser2": "^10.1.0", + "parse5": "^7.3.0", + "parse5-htmlparser2-tree-adapter": "^7.1.0", + "parse5-parser-stream": "^7.1.2", + "undici": "^7.19.0", + "whatwg-mimetype": "^4.0.0" + }, + "engines": { + "node": ">=20.18.1" + }, + "funding": { + "url": "https://github.com/cheeriojs/cheerio?sponsor=1" + } + }, + "node_modules/cheerio-select": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/cheerio-select/-/cheerio-select-2.1.0.tgz", + "integrity": "sha512-9v9kG0LvzrlcungtnJtpGNxY+fzECQKhK4EGJX2vByejiMX84MFNQw4UxPJl3bFbTMw+Dfs37XaIkCwTZfLh4g==", + "license": "BSD-2-Clause", + "dependencies": { + "boolbase": "^1.0.0", + "css-select": "^5.1.0", + "css-what": "^6.1.0", + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3", + "domutils": "^3.0.1" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, "node_modules/chevrotain": { "version": "10.5.0", "resolved": "https://registry.npmjs.org/chevrotain/-/chevrotain-10.5.0.tgz", @@ -7293,6 +7610,15 @@ "node": "^14.18.0 || >=16.10.0" } }, + "node_modules/console-table-printer": { + "version": "2.15.0", + "resolved": "https://registry.npmjs.org/console-table-printer/-/console-table-printer-2.15.0.tgz", + "integrity": "sha512-SrhBq4hYVjLCkBVOWaTzceJalvn5K1Zq5aQA6wXC/cYjI3frKWNPEMK3sZsJfNNQApvCQmgBcc13ZKmFj8qExw==", + "license": "MIT", + "dependencies": { + "simple-wcswidth": "^1.1.2" + } + }, "node_modules/content-disposition": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-1.0.1.tgz", @@ -7431,6 +7757,34 @@ "node": ">= 8" } }, + "node_modules/css-select": { + "version": "5.2.2", + "resolved": "https://registry.npmjs.org/css-select/-/css-select-5.2.2.tgz", + "integrity": "sha512-TizTzUddG/xYLA3NXodFM0fSbNizXjOKhqiQQwvhlspadZokn1KDy0NZFS0wuEubIYAV5/c1/lAr0TaaFXEXzw==", + "license": "BSD-2-Clause", + "dependencies": { + "boolbase": "^1.0.0", + "css-what": "^6.1.0", + "domhandler": "^5.0.2", + "domutils": "^3.0.1", + "nth-check": "^2.0.1" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, + "node_modules/css-what": { + "version": "6.2.2", + "resolved": "https://registry.npmjs.org/css-what/-/css-what-6.2.2.tgz", + "integrity": "sha512-u/O3vwbptzhMs3L1fQE82ZSLHQQfto5gyZzwteVIEyeaY5Fc7R4dapF/BvRoSYFeqfBk4m0V1Vafq5Pjv25wvA==", + "license": "BSD-2-Clause", + "engines": { + "node": ">= 6" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, "node_modules/cssfilter": { "version": "0.0.10", "resolved": "https://registry.npmjs.org/cssfilter/-/cssfilter-0.0.10.tgz", @@ -7462,6 +7816,15 @@ } } }, + "node_modules/decamelize": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/decamelize/-/decamelize-1.2.0.tgz", + "integrity": "sha512-z2S+W9X73hAUUki+N+9Za2lBlun89zigOyGrsax+KUQ6wKW4ZoWpEYBkGhQjwAjjDCkWxhY0VKEhk8wzY7F5cA==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/dedent": { "version": "1.7.1", "resolved": "https://registry.npmjs.org/dedent/-/dedent-1.7.1.tgz", @@ -7608,6 +7971,61 @@ "node": ">=0.3.1" } }, + "node_modules/dom-serializer": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-2.0.0.tgz", + "integrity": "sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==", + "license": "MIT", + "dependencies": { + "domelementtype": "^2.3.0", + "domhandler": "^5.0.2", + "entities": "^4.2.0" + }, + "funding": { + "url": "https://github.com/cheeriojs/dom-serializer?sponsor=1" + } + }, + "node_modules/domelementtype": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz", + "integrity": "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fb55" + } + ], + "license": "BSD-2-Clause" + }, + "node_modules/domhandler": { + "version": "5.0.3", + "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-5.0.3.tgz", + "integrity": "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==", + "license": "BSD-2-Clause", + "dependencies": { + "domelementtype": "^2.3.0" + }, + "engines": { + "node": ">= 4" + }, + "funding": { + "url": "https://github.com/fb55/domhandler?sponsor=1" + } + }, + "node_modules/domutils": { + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/domutils/-/domutils-3.2.2.tgz", + "integrity": "sha512-6kZKyUajlDuqlHKVX1w7gyslj9MPIXzIFiz/rGu35uC1wMi+kMhQwGhl4lt9unC9Vb9INnY9Z3/ZA3+FhASLaw==", + "license": "BSD-2-Clause", + "dependencies": { + "dom-serializer": "^2.0.0", + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3" + }, + "funding": { + "url": "https://github.com/fb55/domutils?sponsor=1" + } + }, "node_modules/dotenv": { "version": "16.6.1", "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.6.1.tgz", @@ -7719,6 +8137,31 @@ "node": ">= 0.8" } }, + "node_modules/encoding-sniffer": { + "version": "0.2.1", + "resolved": "https://registry.npmjs.org/encoding-sniffer/-/encoding-sniffer-0.2.1.tgz", + "integrity": "sha512-5gvq20T6vfpekVtqrYQsSCFZ1wEg5+wW0/QaZMWkFr6BqD3NfKs0rLCx4rrVlSWJeZb5NBJgVLswK/w2MWU+Gw==", + "license": "MIT", + "dependencies": { + "iconv-lite": "^0.6.3", + "whatwg-encoding": "^3.1.1" + }, + "funding": { + "url": "https://github.com/fb55/encoding-sniffer?sponsor=1" + } + }, + "node_modules/encoding-sniffer/node_modules/iconv-lite": { + "version": "0.6.3", + "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz", + "integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==", + "license": "MIT", + "dependencies": { + "safer-buffer": ">= 2.1.2 < 3.0.0" + }, + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/enhanced-resolve": { "version": "5.20.0", "resolved": "https://registry.npmjs.org/enhanced-resolve/-/enhanced-resolve-5.20.0.tgz", @@ -7733,6 +8176,18 @@ "node": ">=10.13.0" } }, + "node_modules/entities": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz", + "integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, "node_modules/error-ex": { "version": "1.3.4", "resolved": "https://registry.npmjs.org/error-ex/-/error-ex-1.3.4.tgz", @@ -9068,6 +9523,37 @@ "dev": true, "license": "MIT" }, + "node_modules/htmlparser2": { + "version": "10.1.0", + "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-10.1.0.tgz", + "integrity": "sha512-VTZkM9GWRAtEpveh7MSF6SjjrpNVNNVJfFup7xTY3UpFtm67foy9HDVXneLtFVt4pMz5kZtgNcvCniNFb1hlEQ==", + "funding": [ + "https://github.com/fb55/htmlparser2?sponsor=1", + { + "type": "github", + "url": "https://github.com/sponsors/fb55" + } + ], + "license": "MIT", + "dependencies": { + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3", + "domutils": "^3.2.2", + "entities": "^7.0.1" + } + }, + "node_modules/htmlparser2/node_modules/entities": { + "version": "7.0.1", + "resolved": "https://registry.npmjs.org/entities/-/entities-7.0.1.tgz", + "integrity": "sha512-TWrgLOFUQTH994YUyl1yT4uyavY5nNB5muff+RtWaqNVCAK408b5ZnnbNAUEWLTCpum9w6arT70i1XdQ4UeOPA==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, "node_modules/http-errors": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-2.0.1.tgz", @@ -10228,6 +10714,15 @@ "jiti": "lib/jiti-cli.mjs" } }, + "node_modules/js-tiktoken": { + "version": "1.0.21", + "resolved": "https://registry.npmjs.org/js-tiktoken/-/js-tiktoken-1.0.21.tgz", + "integrity": "sha512-biOj/6M5qdgx5TKjDnFT1ymSpM5tbd3ylwDtrQvFQSu0Z7bBYko2dF+W/aUkXUPuk6IVpRxk/3Q2sHOzGlS36g==", + "license": "MIT", + "dependencies": { + "base64-js": "^1.5.1" + } + }, "node_modules/js-tokens": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz", @@ -10332,6 +10827,65 @@ "json-buffer": "3.0.1" } }, + "node_modules/langsmith": { + "version": "0.5.7", + "resolved": "https://registry.npmjs.org/langsmith/-/langsmith-0.5.7.tgz", + "integrity": "sha512-FjYf2oBGMoSXnaT4SRaFguIiGJaonZ5VKWKJDPl9awLZjz2RkN29AcQWceecSINVzXzTvtRWPOjAWT+XggqNNg==", + "license": "MIT", + "dependencies": { + "@types/uuid": "^10.0.0", + "chalk": "^5.6.2", + "console-table-printer": "^2.12.1", + "p-queue": "^6.6.2", + "semver": "^7.6.3", + "uuid": "^10.0.0" + }, + "peerDependencies": { + "@opentelemetry/api": "*", + "@opentelemetry/exporter-trace-otlp-proto": "*", + "@opentelemetry/sdk-trace-base": "*", + "openai": "*" + }, + "peerDependenciesMeta": { + "@opentelemetry/api": { + "optional": true + }, + "@opentelemetry/exporter-trace-otlp-proto": { + "optional": true + }, + "@opentelemetry/sdk-trace-base": { + "optional": true + }, + "openai": { + "optional": true + } + } + }, + "node_modules/langsmith/node_modules/chalk": { + "version": "5.6.2", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-5.6.2.tgz", + "integrity": "sha512-7NzBL0rN6fMUW+f7A6Io4h40qQlG+xGmtMxfbnH/K7TAtt8JQWVQK+6g0UXKMeVJoyV5EkkNsErQ8pVD3bLHbA==", + "license": "MIT", + "engines": { + "node": "^12.17.0 || ^14.13 || >=16.0.0" + }, + "funding": { + "url": "https://github.com/chalk/chalk?sponsor=1" + } + }, + "node_modules/langsmith/node_modules/uuid": { + "version": "10.0.0", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-10.0.0.tgz", + "integrity": "sha512-8XkAphELsDnEGrDxUOHB3RGvXz6TeuYSGEZBOjtTtPm2lwhGBjLgOzLHB63IUWfBpNucQjND6d3AOudO+H3RWQ==", + "funding": [ + "https://github.com/sponsors/broofa", + "https://github.com/sponsors/ctavan" + ], + "license": "MIT", + "bin": { + "uuid": "dist/bin/uuid" + } + }, "node_modules/leven": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/leven/-/leven-3.1.0.tgz", @@ -10759,6 +11313,15 @@ "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", "license": "MIT" }, + "node_modules/mustache": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/mustache/-/mustache-4.2.0.tgz", + "integrity": "sha512-71ippSywq5Yb7/tVYyGbkBggbU8H3u5Rz56fH60jGFgr8uHwxs+aSKeqmluIVzM0m0kB7xQjKS6qPfd0b2ZoqQ==", + "license": "MIT", + "bin": { + "mustache": "bin/mustache" + } + }, "node_modules/mute-stream": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/mute-stream/-/mute-stream-2.0.0.tgz", @@ -10902,6 +11465,18 @@ "node": ">=8" } }, + "node_modules/nth-check": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz", + "integrity": "sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==", + "license": "BSD-2-Clause", + "dependencies": { + "boolbase": "^1.0.0" + }, + "funding": { + "url": "https://github.com/fb55/nth-check?sponsor=1" + } + }, "node_modules/nypm": { "version": "0.6.5", "resolved": "https://registry.npmjs.org/nypm/-/nypm-0.6.5.tgz", @@ -11043,6 +11618,15 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/p-finally": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/p-finally/-/p-finally-1.0.0.tgz", + "integrity": "sha512-LICb2p9CB7FS+0eR1oqWnHhp0FljGLZCWBE9aix0Uye9W8LTQPwMTYVGWQWIw9RdQiDg4+epXQODwIYJtSJaow==", + "license": "MIT", + "engines": { + "node": ">=4" + } + }, "node_modules/p-limit": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-3.1.0.tgz", @@ -11075,6 +11659,40 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/p-queue": { + "version": "6.6.2", + "resolved": "https://registry.npmjs.org/p-queue/-/p-queue-6.6.2.tgz", + "integrity": "sha512-RwFpb72c/BhQLEXIZ5K2e+AhgNVmIejGlTgiB9MzZ0e93GRvqZ7uSi0dvRF7/XIXDeNkra2fNHBxTyPDGySpjQ==", + "license": "MIT", + "dependencies": { + "eventemitter3": "^4.0.4", + "p-timeout": "^3.2.0" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/p-queue/node_modules/eventemitter3": { + "version": "4.0.7", + "resolved": "https://registry.npmjs.org/eventemitter3/-/eventemitter3-4.0.7.tgz", + "integrity": "sha512-8guHBZCwKnFhYdHr2ysuRWErTwhoN2X8XELRlrRwpmfeY2jjuUN4taQMsULKUVo1K4DvZl+0pgfyoysHxvmvEw==", + "license": "MIT" + }, + "node_modules/p-timeout": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/p-timeout/-/p-timeout-3.2.0.tgz", + "integrity": "sha512-rhIwUycgwwKcP9yTOOFK/AKsAopjjCakVqLHePO3CC6Mir1Z99xT+R63jZxAT5lFZLa2inS5h+ZS2GvR99/FBg==", + "license": "MIT", + "dependencies": { + "p-finally": "^1.0.0" + }, + "engines": { + "node": ">=8" + } + }, "node_modules/p-try": { "version": "2.2.0", "resolved": "https://registry.npmjs.org/p-try/-/p-try-2.2.0.tgz", @@ -11124,6 +11742,55 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/parse5": { + "version": "7.3.0", + "resolved": "https://registry.npmjs.org/parse5/-/parse5-7.3.0.tgz", + "integrity": "sha512-IInvU7fabl34qmi9gY8XOVxhYyMyuH2xUNpb2q8/Y+7552KlejkRvqvD19nMoUW/uQGGbqNpA6Tufu5FL5BZgw==", + "license": "MIT", + "dependencies": { + "entities": "^6.0.0" + }, + "funding": { + "url": "https://github.com/inikulin/parse5?sponsor=1" + } + }, + "node_modules/parse5-htmlparser2-tree-adapter": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-7.1.0.tgz", + "integrity": "sha512-ruw5xyKs6lrpo9x9rCZqZZnIUntICjQAd0Wsmp396Ul9lN/h+ifgVV1x1gZHi8euej6wTfpqX8j+BFQxF0NS/g==", + "license": "MIT", + "dependencies": { + "domhandler": "^5.0.3", + "parse5": "^7.0.0" + }, + "funding": { + "url": "https://github.com/inikulin/parse5?sponsor=1" + } + }, + "node_modules/parse5-parser-stream": { + "version": "7.1.2", + "resolved": "https://registry.npmjs.org/parse5-parser-stream/-/parse5-parser-stream-7.1.2.tgz", + "integrity": "sha512-JyeQc9iwFLn5TbvvqACIF/VXG6abODeB3Fwmv/TGdLk2LfbWkaySGY72at4+Ty7EkPZj854u4CrICqNk2qIbow==", + "license": "MIT", + "dependencies": { + "parse5": "^7.0.0" + }, + "funding": { + "url": "https://github.com/inikulin/parse5?sponsor=1" + } + }, + "node_modules/parse5/node_modules/entities": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/entities/-/entities-6.0.1.tgz", + "integrity": "sha512-aN97NXWF6AWBTahfVOIrB/NShkzi5H7F9r1s9mD3cDj4Ko5f2qhhVoYMibXF7GlLveb/D2ioWay8lxI97Ven3g==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, "node_modules/parseurl": { "version": "1.3.3", "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz", @@ -11217,6 +11884,38 @@ "devOptional": true, "license": "MIT" }, + "node_modules/pdf-parse": { + "version": "2.4.5", + "resolved": "https://registry.npmjs.org/pdf-parse/-/pdf-parse-2.4.5.tgz", + "integrity": "sha512-mHU89HGh7v+4u2ubfnevJ03lmPgQ5WU4CxAVmTSh/sxVTEDYd1er/dKS/A6vg77NX47KTEoihq8jZBLr8Cxuwg==", + "license": "Apache-2.0", + "dependencies": { + "@napi-rs/canvas": "0.1.80", + "pdfjs-dist": "5.4.296" + }, + "bin": { + "pdf-parse": "bin/cli.mjs" + }, + "engines": { + "node": ">=20.16.0 <21 || >=22.3.0" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/mehmet-kozan" + } + }, + "node_modules/pdfjs-dist": { + "version": "5.4.296", + "resolved": "https://registry.npmjs.org/pdfjs-dist/-/pdfjs-dist-5.4.296.tgz", + "integrity": "sha512-DlOzet0HO7OEnmUmB6wWGJrrdvbyJKftI1bhMitK7O2N8W2gc757yyYBbINy9IDafXAV9wmKr9t7xsTaNKRG5Q==", + "license": "Apache-2.0", + "engines": { + "node": ">=20.16.0 || >=22.3.0" + }, + "optionalDependencies": { + "@napi-rs/canvas": "^0.1.80" + } + }, "node_modules/perfect-debounce": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/perfect-debounce/-/perfect-debounce-1.0.0.tgz", @@ -12013,7 +12712,6 @@ "version": "7.7.4", "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.4.tgz", "integrity": "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA==", - "dev": true, "license": "ISC", "bin": { "semver": "bin/semver.js" @@ -12234,6 +12932,12 @@ "url": "https://github.com/sponsors/isaacs" } }, + "node_modules/simple-wcswidth": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/simple-wcswidth/-/simple-wcswidth-1.1.2.tgz", + "integrity": "sha512-j7piyCjAeTDSjzTSQ7DokZtMNwNlEAyxqSZeCS+CXH7fJ4jx3FuJ/mTW3mE+6JLs4VJBbcll0Kjn+KXI5t21Iw==", + "license": "MIT" + }, "node_modules/slash": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/slash/-/slash-3.0.0.tgz", @@ -13244,6 +13948,15 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/undici": { + "version": "7.22.0", + "resolved": "https://registry.npmjs.org/undici/-/undici-7.22.0.tgz", + "integrity": "sha512-RqslV2Us5BrllB+JeiZnK4peryVTndy9Dnqq62S3yYRRTj0tFQCwEniUy2167skdGOy3vqRzEvl1Dm4sV2ReDg==", + "license": "MIT", + "engines": { + "node": ">=20.18.1" + } + }, "node_modules/undici-types": { "version": "6.21.0", "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz", @@ -13619,6 +14332,31 @@ "url": "https://opencollective.com/webpack" } }, + "node_modules/whatwg-encoding": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/whatwg-encoding/-/whatwg-encoding-3.1.1.tgz", + "integrity": "sha512-6qN4hJdMwfYBtE3YBTTHhoeuUrDBPZmbQaxWAqSALV/MeEnR5z1xd8UKud2RAkFoPkmB+hli1TZSnyi84xz1vQ==", + "deprecated": "Use @exodus/bytes instead for a more spec-conformant and faster implementation", + "license": "MIT", + "dependencies": { + "iconv-lite": "0.6.3" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/whatwg-encoding/node_modules/iconv-lite": { + "version": "0.6.3", + "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz", + "integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==", + "license": "MIT", + "dependencies": { + "safer-buffer": ">= 2.1.2 < 3.0.0" + }, + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/whatwg-mimetype": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-4.0.0.tgz", diff --git a/package.json b/package.json index 33d54f5..8d6d80e 100644 --- a/package.json +++ b/package.json @@ -25,6 +25,8 @@ "@as-integrations/express5": "^1.1.2", "@aws-sdk/client-s3": "^3.1000.0", "@aws-sdk/s3-request-presigner": "^3.1000.0", + "@langchain/core": "^1.1.29", + "@langchain/google-genai": "^2.1.22", "@nestjs/apollo": "^13.2.4", "@nestjs/common": "^11.1.14", "@nestjs/config": "^4.0.3", @@ -33,11 +35,13 @@ "@nestjs/platform-express": "^11.0.1", "@prisma/adapter-pg": "^7.4.2", "@prisma/client": "^7.4.2", + "cheerio": "^1.2.0", "class-transformer": "^0.5.1", "class-validator": "^0.15.1", "graphql": "^16.13.0", "graphql-type-json": "^0.3.2", "graphql-upload-ts": "^2.1.3", + "pdf-parse": "^2.4.5", "pg": "^8.19.0", "reflect-metadata": "^0.2.2", "rxjs": "^7.8.2", diff --git a/src/cases/parser/parser.service.ts b/src/cases/parser/parser.service.ts new file mode 100644 index 0000000..a90914e --- /dev/null +++ b/src/cases/parser/parser.service.ts @@ -0,0 +1,98 @@ +import { Injectable, BadRequestException } from '@nestjs/common'; +import * as cheerio from 'cheerio'; +import { z } from 'zod'; +import { ChatGoogleGenerativeAI } from '@langchain/google-genai'; +import { PDFParse } from 'pdf-parse'; + +const CaseMetadataSchema = z.object({ + title: z.string().describe("The official title or heading of the case."), + decisionType: z.string().describe("The formal category of the ruling (e.g., Judgment, Final Decision, Stay of Execution, or Administrative Order etc.)."), + decisionDate: z.string().describe("The date the decision was rendered"), + office: z.string().describe("The specific department, chamber, or administrative division within the court system."), + court: z.string().describe("The court's full official name of the adjudicating body. Do not use relative references; instead, identify the institution from the document header, the formal seal, or the specific treaty/statute that establishes its jurisdiction."), + caseNumber: z.string().describe( + "The formal administrative or legal identifier assigned to the matter." + ), + summary: z.string().describe( + "A concise brief of the dispute and the final determination." + ), +}); + +export interface ExtractedData extends z.infer { + metadata?: any; + decisionDateAsDate?: Date; +} + +@Injectable() +export class ParserService { + static async parse(buffer: Buffer, mimetype: string, job?: any): Promise { + const parser = new ParserService(); + return parser.process(buffer, mimetype, job); + } + + async process(buffer: Buffer, mimetype: string, job?: any): Promise { + let text = ''; + + if (mimetype === 'application/pdf') { + const parser = new PDFParse({ data: buffer as any }); + const textResult = await parser.getText(); + text = textResult.text; + await parser.destroy(); + } else if (mimetype === 'text/html') { + const $ = cheerio.load(buffer.toString()); + // Strip elements that add noise — scripts, styles, navigation etc. + $('script, style, nav, footer, header, noscript, iframe').remove(); + text = $('body').text().replace(/\s+/g, ' ').trim() || buffer.toString(); + } else { + throw new BadRequestException(`Unsupported file type: ${mimetype}`); + } + + if (job) await job.updateProgress('🤖 Analyzing case structure with Gemini AI...'); + return this.extractWithAI(text); + } + + private async extractWithAI(text: string): Promise { + const apiKey = process.env.GOOGLE_API_KEY; + if (!apiKey) { + throw new Error('GOOGLE_API_KEY not configured.'); + } + + const model = new ChatGoogleGenerativeAI({ + apiKey, + model: 'gemini-2.5-flash', + temperature: 0, + }); + + const structuredLlm = model.withStructuredOutput(CaseMetadataSchema); + + // I read that the most important part of the document for metadata extraction is the + // start/end of the document. But I am not a lawyer and uncertain so decided not to + // risk it. In the end I just set a hard cap at 500k characters to avoid abuse. + const maxChars = 500_000; + const documentText = text.length > maxChars ? text.substring(0, maxChars) : text; + + try { + const result = await structuredLlm.invoke(` + You are a specialized legal document processing assistant. + Extract the case metadata from the following court decision text. + All fields should be extracted without exception. + + TEXT TO ANALYZE: + ${documentText} + `); + return { + ...result, + decisionDateAsDate: result.decisionDate ? new Date(result.decisionDate) : undefined, + metadata: { + rawLength: text.length, + model: 'gemini-2.5-flash', + extractedAt: new Date().toISOString(), + }, + }; + } catch (error) { + throw new Error(`AI Extraction failed: ${error.message}`); + } + } +} + +