First draft of parser service

This commit is contained in:
GeorgeWebberley 2026-03-01 12:27:08 +01:00
parent 36f34257d2
commit 42eaac6a39
3 changed files with 842 additions and 2 deletions

742
package-lock.json generated
View file

@ -14,6 +14,8 @@
"@as-integrations/express5": "^1.1.2",
"@aws-sdk/client-s3": "^3.1000.0",
"@aws-sdk/s3-request-presigner": "^3.1000.0",
"@langchain/core": "^1.1.29",
"@langchain/google-genai": "^2.1.22",
"@nestjs/apollo": "^13.2.4",
"@nestjs/common": "^11.1.14",
"@nestjs/config": "^4.0.3",
@ -22,11 +24,13 @@
"@nestjs/platform-express": "^11.0.1",
"@prisma/adapter-pg": "^7.4.2",
"@prisma/client": "^7.4.2",
"cheerio": "^1.2.0",
"class-transformer": "^0.5.1",
"class-validator": "^0.15.1",
"graphql": "^16.13.0",
"graphql-type-json": "^0.3.2",
"graphql-upload-ts": "^2.1.3",
"pdf-parse": "^2.4.5",
"pg": "^8.19.0",
"reflect-metadata": "^0.2.2",
"rxjs": "^7.8.2",
@ -1889,6 +1893,12 @@
"url": "https://github.com/sponsors/Borewit"
}
},
"node_modules/@cfworker/json-schema": {
"version": "4.1.1",
"resolved": "https://registry.npmjs.org/@cfworker/json-schema/-/json-schema-4.1.1.tgz",
"integrity": "sha512-gAmrUZSGtKc3AiBL71iNWxDsyUC5uMaKKGdvzYsBoTW/xi42JQHl7eKV2OYzCUqvc+D2RCcf7EXY2iCyFIk6og==",
"license": "MIT"
},
"node_modules/@chevrotain/cst-dts-gen": {
"version": "10.5.0",
"resolved": "https://registry.npmjs.org/@chevrotain/cst-dts-gen/-/cst-dts-gen-10.5.0.tgz",
@ -2182,6 +2192,15 @@
"node": "^18.18.0 || ^20.9.0 || >=21.1.0"
}
},
"node_modules/@google/generative-ai": {
"version": "0.24.1",
"resolved": "https://registry.npmjs.org/@google/generative-ai/-/generative-ai-0.24.1.tgz",
"integrity": "sha512-MqO+MLfM6kjxcKoy0p1wRzG3b4ZZXtPI+z2IE26UogS2Cm/XHO+7gGRBh6gcJsOiIVoH93UwKvW4HdgiOZCy9Q==",
"license": "Apache-2.0",
"engines": {
"node": ">=18.0.0"
}
},
"node_modules/@graphql-tools/merge": {
"version": "9.1.7",
"resolved": "https://registry.npmjs.org/@graphql-tools/merge/-/merge-9.1.7.tgz",
@ -3352,6 +3371,67 @@
"@jridgewell/sourcemap-codec": "^1.4.14"
}
},
"node_modules/@langchain/core": {
"version": "1.1.29",
"resolved": "https://registry.npmjs.org/@langchain/core/-/core-1.1.29.tgz",
"integrity": "sha512-BPoegTtIdZX4gl2kxcSXAlLrrJFl1cxeRsk9DM/wlIuvyPrFwjWqrEK5NwF5diDt5XSArhQxIFaifGAl4F7fgw==",
"license": "MIT",
"dependencies": {
"@cfworker/json-schema": "^4.0.2",
"ansi-styles": "^5.0.0",
"camelcase": "6",
"decamelize": "1.2.0",
"js-tiktoken": "^1.0.12",
"langsmith": ">=0.5.0 <1.0.0",
"mustache": "^4.2.0",
"p-queue": "^6.6.2",
"uuid": "^11.1.0",
"zod": "^3.25.76 || ^4"
},
"engines": {
"node": ">=20"
}
},
"node_modules/@langchain/core/node_modules/ansi-styles": {
"version": "5.2.0",
"resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz",
"integrity": "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==",
"license": "MIT",
"engines": {
"node": ">=10"
},
"funding": {
"url": "https://github.com/chalk/ansi-styles?sponsor=1"
}
},
"node_modules/@langchain/core/node_modules/camelcase": {
"version": "6.3.0",
"resolved": "https://registry.npmjs.org/camelcase/-/camelcase-6.3.0.tgz",
"integrity": "sha512-Gmy6FhYlCY7uOElZUSbxo2UCDH8owEk996gkbrpsgGtrJLM3J7jGxl9Ic7Qwwj4ivOE5AWZWRMecDdF7hqGjFA==",
"license": "MIT",
"engines": {
"node": ">=10"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/@langchain/google-genai": {
"version": "2.1.22",
"resolved": "https://registry.npmjs.org/@langchain/google-genai/-/google-genai-2.1.22.tgz",
"integrity": "sha512-01dplEIhEgwdY5dGwjWQXl5jhaAcz8DDXIlyCCms1QnR3HhVckZY1YrpAUiY1atoOcPXN+6/DrSPWUAM0mZVUQ==",
"license": "MIT",
"dependencies": {
"@google/generative-ai": "^0.24.0",
"uuid": "^11.1.0"
},
"engines": {
"node": ">=20"
},
"peerDependencies": {
"@langchain/core": "^1.1.29"
}
},
"node_modules/@lukeed/csprng": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/@lukeed/csprng/-/csprng-1.1.0.tgz",
@ -3375,6 +3455,190 @@
"node": ">=16"
}
},
"node_modules/@napi-rs/canvas": {
"version": "0.1.80",
"resolved": "https://registry.npmjs.org/@napi-rs/canvas/-/canvas-0.1.80.tgz",
"integrity": "sha512-DxuT1ClnIPts1kQx8FBmkk4BQDTfI5kIzywAaMjQSXfNnra5UFU9PwurXrl+Je3bJ6BGsp/zmshVVFbCmyI+ww==",
"license": "MIT",
"workspaces": [
"e2e/*"
],
"engines": {
"node": ">= 10"
},
"optionalDependencies": {
"@napi-rs/canvas-android-arm64": "0.1.80",
"@napi-rs/canvas-darwin-arm64": "0.1.80",
"@napi-rs/canvas-darwin-x64": "0.1.80",
"@napi-rs/canvas-linux-arm-gnueabihf": "0.1.80",
"@napi-rs/canvas-linux-arm64-gnu": "0.1.80",
"@napi-rs/canvas-linux-arm64-musl": "0.1.80",
"@napi-rs/canvas-linux-riscv64-gnu": "0.1.80",
"@napi-rs/canvas-linux-x64-gnu": "0.1.80",
"@napi-rs/canvas-linux-x64-musl": "0.1.80",
"@napi-rs/canvas-win32-x64-msvc": "0.1.80"
}
},
"node_modules/@napi-rs/canvas-android-arm64": {
"version": "0.1.80",
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-android-arm64/-/canvas-android-arm64-0.1.80.tgz",
"integrity": "sha512-sk7xhN/MoXeuExlggf91pNziBxLPVUqF2CAVnB57KLG/pz7+U5TKG8eXdc3pm0d7Od0WreB6ZKLj37sX9muGOQ==",
"cpu": [
"arm64"
],
"license": "MIT",
"optional": true,
"os": [
"android"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@napi-rs/canvas-darwin-arm64": {
"version": "0.1.80",
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-darwin-arm64/-/canvas-darwin-arm64-0.1.80.tgz",
"integrity": "sha512-O64APRTXRUiAz0P8gErkfEr3lipLJgM6pjATwavZ22ebhjYl/SUbpgM0xcWPQBNMP1n29afAC/Us5PX1vg+JNQ==",
"cpu": [
"arm64"
],
"license": "MIT",
"optional": true,
"os": [
"darwin"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@napi-rs/canvas-darwin-x64": {
"version": "0.1.80",
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-darwin-x64/-/canvas-darwin-x64-0.1.80.tgz",
"integrity": "sha512-FqqSU7qFce0Cp3pwnTjVkKjjOtxMqRe6lmINxpIZYaZNnVI0H5FtsaraZJ36SiTHNjZlUB69/HhxNDT1Aaa9vA==",
"cpu": [
"x64"
],
"license": "MIT",
"optional": true,
"os": [
"darwin"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@napi-rs/canvas-linux-arm-gnueabihf": {
"version": "0.1.80",
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm-gnueabihf/-/canvas-linux-arm-gnueabihf-0.1.80.tgz",
"integrity": "sha512-eyWz0ddBDQc7/JbAtY4OtZ5SpK8tR4JsCYEZjCE3dI8pqoWUC8oMwYSBGCYfsx2w47cQgQCgMVRVTFiiO38hHQ==",
"cpu": [
"arm"
],
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@napi-rs/canvas-linux-arm64-gnu": {
"version": "0.1.80",
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm64-gnu/-/canvas-linux-arm64-gnu-0.1.80.tgz",
"integrity": "sha512-qwA63t8A86bnxhuA/GwOkK3jvb+XTQaTiVML0vAWoHyoZYTjNs7BzoOONDgTnNtr8/yHrq64XXzUoLqDzU+Uuw==",
"cpu": [
"arm64"
],
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@napi-rs/canvas-linux-arm64-musl": {
"version": "0.1.80",
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm64-musl/-/canvas-linux-arm64-musl-0.1.80.tgz",
"integrity": "sha512-1XbCOz/ymhj24lFaIXtWnwv/6eFHXDrjP0jYkc6iHQ9q8oXKzUX1Lc6bu+wuGiLhGh2GS/2JlfORC5ZcXimRcg==",
"cpu": [
"arm64"
],
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@napi-rs/canvas-linux-riscv64-gnu": {
"version": "0.1.80",
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-riscv64-gnu/-/canvas-linux-riscv64-gnu-0.1.80.tgz",
"integrity": "sha512-XTzR125w5ZMs0lJcxRlS1K3P5RaZ9RmUsPtd1uGt+EfDyYMu4c6SEROYsxyatbbu/2+lPe7MPHOO/0a0x7L/gw==",
"cpu": [
"riscv64"
],
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@napi-rs/canvas-linux-x64-gnu": {
"version": "0.1.80",
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-x64-gnu/-/canvas-linux-x64-gnu-0.1.80.tgz",
"integrity": "sha512-BeXAmhKg1kX3UCrJsYbdQd3hIMDH/K6HnP/pG2LuITaXhXBiNdh//TVVVVCBbJzVQaV5gK/4ZOCMrQW9mvuTqA==",
"cpu": [
"x64"
],
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@napi-rs/canvas-linux-x64-musl": {
"version": "0.1.80",
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-x64-musl/-/canvas-linux-x64-musl-0.1.80.tgz",
"integrity": "sha512-x0XvZWdHbkgdgucJsRxprX/4o4sEed7qo9rCQA9ugiS9qE2QvP0RIiEugtZhfLH3cyI+jIRFJHV4Fuz+1BHHMg==",
"cpu": [
"x64"
],
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@napi-rs/canvas-win32-x64-msvc": {
"version": "0.1.80",
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-win32-x64-msvc/-/canvas-win32-x64-msvc-0.1.80.tgz",
"integrity": "sha512-Z8jPsM6df5V8B1HrCHB05+bDiCxjE9QA//3YrkKIdVDEwn5RKaqOxCJDRJkl48cJbylcrJbW4HxZbTte8juuPg==",
"cpu": [
"x64"
],
"license": "MIT",
"optional": true,
"os": [
"win32"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@napi-rs/wasm-runtime": {
"version": "0.2.12",
"resolved": "https://registry.npmjs.org/@napi-rs/wasm-runtime/-/wasm-runtime-0.2.12.tgz",
@ -5481,6 +5745,12 @@
"@types/superagent": "^8.1.0"
}
},
"node_modules/@types/uuid": {
"version": "10.0.0",
"resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-10.0.0.tgz",
"integrity": "sha512-7gqG38EyHgyP1S+7+xomFtL+ZNHcKv6DwNaCZmJmo1vgMugyF3TCnXVg4t1uk89mLNwnLtnY3TpOpCOyp1/xHQ==",
"license": "MIT"
},
"node_modules/@types/validator": {
"version": "13.15.10",
"resolved": "https://registry.npmjs.org/@types/validator/-/validator-13.15.10.tgz",
@ -6645,7 +6915,6 @@
"version": "1.5.1",
"resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
"integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==",
"dev": true,
"funding": [
{
"type": "github",
@ -6711,6 +6980,12 @@
"url": "https://opencollective.com/express"
}
},
"node_modules/boolbase": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz",
"integrity": "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==",
"license": "ISC"
},
"node_modules/bowser": {
"version": "2.14.1",
"resolved": "https://registry.npmjs.org/bowser/-/bowser-2.14.1.tgz",
@ -6999,6 +7274,48 @@
"dev": true,
"license": "MIT"
},
"node_modules/cheerio": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/cheerio/-/cheerio-1.2.0.tgz",
"integrity": "sha512-WDrybc/gKFpTYQutKIK6UvfcuxijIZfMfXaYm8NMsPQxSYvf+13fXUJ4rztGGbJcBQ/GF55gvrZ0Bc0bj/mqvg==",
"license": "MIT",
"dependencies": {
"cheerio-select": "^2.1.0",
"dom-serializer": "^2.0.0",
"domhandler": "^5.0.3",
"domutils": "^3.2.2",
"encoding-sniffer": "^0.2.1",
"htmlparser2": "^10.1.0",
"parse5": "^7.3.0",
"parse5-htmlparser2-tree-adapter": "^7.1.0",
"parse5-parser-stream": "^7.1.2",
"undici": "^7.19.0",
"whatwg-mimetype": "^4.0.0"
},
"engines": {
"node": ">=20.18.1"
},
"funding": {
"url": "https://github.com/cheeriojs/cheerio?sponsor=1"
}
},
"node_modules/cheerio-select": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/cheerio-select/-/cheerio-select-2.1.0.tgz",
"integrity": "sha512-9v9kG0LvzrlcungtnJtpGNxY+fzECQKhK4EGJX2vByejiMX84MFNQw4UxPJl3bFbTMw+Dfs37XaIkCwTZfLh4g==",
"license": "BSD-2-Clause",
"dependencies": {
"boolbase": "^1.0.0",
"css-select": "^5.1.0",
"css-what": "^6.1.0",
"domelementtype": "^2.3.0",
"domhandler": "^5.0.3",
"domutils": "^3.0.1"
},
"funding": {
"url": "https://github.com/sponsors/fb55"
}
},
"node_modules/chevrotain": {
"version": "10.5.0",
"resolved": "https://registry.npmjs.org/chevrotain/-/chevrotain-10.5.0.tgz",
@ -7293,6 +7610,15 @@
"node": "^14.18.0 || >=16.10.0"
}
},
"node_modules/console-table-printer": {
"version": "2.15.0",
"resolved": "https://registry.npmjs.org/console-table-printer/-/console-table-printer-2.15.0.tgz",
"integrity": "sha512-SrhBq4hYVjLCkBVOWaTzceJalvn5K1Zq5aQA6wXC/cYjI3frKWNPEMK3sZsJfNNQApvCQmgBcc13ZKmFj8qExw==",
"license": "MIT",
"dependencies": {
"simple-wcswidth": "^1.1.2"
}
},
"node_modules/content-disposition": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-1.0.1.tgz",
@ -7431,6 +7757,34 @@
"node": ">= 8"
}
},
"node_modules/css-select": {
"version": "5.2.2",
"resolved": "https://registry.npmjs.org/css-select/-/css-select-5.2.2.tgz",
"integrity": "sha512-TizTzUddG/xYLA3NXodFM0fSbNizXjOKhqiQQwvhlspadZokn1KDy0NZFS0wuEubIYAV5/c1/lAr0TaaFXEXzw==",
"license": "BSD-2-Clause",
"dependencies": {
"boolbase": "^1.0.0",
"css-what": "^6.1.0",
"domhandler": "^5.0.2",
"domutils": "^3.0.1",
"nth-check": "^2.0.1"
},
"funding": {
"url": "https://github.com/sponsors/fb55"
}
},
"node_modules/css-what": {
"version": "6.2.2",
"resolved": "https://registry.npmjs.org/css-what/-/css-what-6.2.2.tgz",
"integrity": "sha512-u/O3vwbptzhMs3L1fQE82ZSLHQQfto5gyZzwteVIEyeaY5Fc7R4dapF/BvRoSYFeqfBk4m0V1Vafq5Pjv25wvA==",
"license": "BSD-2-Clause",
"engines": {
"node": ">= 6"
},
"funding": {
"url": "https://github.com/sponsors/fb55"
}
},
"node_modules/cssfilter": {
"version": "0.0.10",
"resolved": "https://registry.npmjs.org/cssfilter/-/cssfilter-0.0.10.tgz",
@ -7462,6 +7816,15 @@
}
}
},
"node_modules/decamelize": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/decamelize/-/decamelize-1.2.0.tgz",
"integrity": "sha512-z2S+W9X73hAUUki+N+9Za2lBlun89zigOyGrsax+KUQ6wKW4ZoWpEYBkGhQjwAjjDCkWxhY0VKEhk8wzY7F5cA==",
"license": "MIT",
"engines": {
"node": ">=0.10.0"
}
},
"node_modules/dedent": {
"version": "1.7.1",
"resolved": "https://registry.npmjs.org/dedent/-/dedent-1.7.1.tgz",
@ -7608,6 +7971,61 @@
"node": ">=0.3.1"
}
},
"node_modules/dom-serializer": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-2.0.0.tgz",
"integrity": "sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==",
"license": "MIT",
"dependencies": {
"domelementtype": "^2.3.0",
"domhandler": "^5.0.2",
"entities": "^4.2.0"
},
"funding": {
"url": "https://github.com/cheeriojs/dom-serializer?sponsor=1"
}
},
"node_modules/domelementtype": {
"version": "2.3.0",
"resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz",
"integrity": "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/fb55"
}
],
"license": "BSD-2-Clause"
},
"node_modules/domhandler": {
"version": "5.0.3",
"resolved": "https://registry.npmjs.org/domhandler/-/domhandler-5.0.3.tgz",
"integrity": "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==",
"license": "BSD-2-Clause",
"dependencies": {
"domelementtype": "^2.3.0"
},
"engines": {
"node": ">= 4"
},
"funding": {
"url": "https://github.com/fb55/domhandler?sponsor=1"
}
},
"node_modules/domutils": {
"version": "3.2.2",
"resolved": "https://registry.npmjs.org/domutils/-/domutils-3.2.2.tgz",
"integrity": "sha512-6kZKyUajlDuqlHKVX1w7gyslj9MPIXzIFiz/rGu35uC1wMi+kMhQwGhl4lt9unC9Vb9INnY9Z3/ZA3+FhASLaw==",
"license": "BSD-2-Clause",
"dependencies": {
"dom-serializer": "^2.0.0",
"domelementtype": "^2.3.0",
"domhandler": "^5.0.3"
},
"funding": {
"url": "https://github.com/fb55/domutils?sponsor=1"
}
},
"node_modules/dotenv": {
"version": "16.6.1",
"resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.6.1.tgz",
@ -7719,6 +8137,31 @@
"node": ">= 0.8"
}
},
"node_modules/encoding-sniffer": {
"version": "0.2.1",
"resolved": "https://registry.npmjs.org/encoding-sniffer/-/encoding-sniffer-0.2.1.tgz",
"integrity": "sha512-5gvq20T6vfpekVtqrYQsSCFZ1wEg5+wW0/QaZMWkFr6BqD3NfKs0rLCx4rrVlSWJeZb5NBJgVLswK/w2MWU+Gw==",
"license": "MIT",
"dependencies": {
"iconv-lite": "^0.6.3",
"whatwg-encoding": "^3.1.1"
},
"funding": {
"url": "https://github.com/fb55/encoding-sniffer?sponsor=1"
}
},
"node_modules/encoding-sniffer/node_modules/iconv-lite": {
"version": "0.6.3",
"resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz",
"integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==",
"license": "MIT",
"dependencies": {
"safer-buffer": ">= 2.1.2 < 3.0.0"
},
"engines": {
"node": ">=0.10.0"
}
},
"node_modules/enhanced-resolve": {
"version": "5.20.0",
"resolved": "https://registry.npmjs.org/enhanced-resolve/-/enhanced-resolve-5.20.0.tgz",
@ -7733,6 +8176,18 @@
"node": ">=10.13.0"
}
},
"node_modules/entities": {
"version": "4.5.0",
"resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz",
"integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==",
"license": "BSD-2-Clause",
"engines": {
"node": ">=0.12"
},
"funding": {
"url": "https://github.com/fb55/entities?sponsor=1"
}
},
"node_modules/error-ex": {
"version": "1.3.4",
"resolved": "https://registry.npmjs.org/error-ex/-/error-ex-1.3.4.tgz",
@ -9068,6 +9523,37 @@
"dev": true,
"license": "MIT"
},
"node_modules/htmlparser2": {
"version": "10.1.0",
"resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-10.1.0.tgz",
"integrity": "sha512-VTZkM9GWRAtEpveh7MSF6SjjrpNVNNVJfFup7xTY3UpFtm67foy9HDVXneLtFVt4pMz5kZtgNcvCniNFb1hlEQ==",
"funding": [
"https://github.com/fb55/htmlparser2?sponsor=1",
{
"type": "github",
"url": "https://github.com/sponsors/fb55"
}
],
"license": "MIT",
"dependencies": {
"domelementtype": "^2.3.0",
"domhandler": "^5.0.3",
"domutils": "^3.2.2",
"entities": "^7.0.1"
}
},
"node_modules/htmlparser2/node_modules/entities": {
"version": "7.0.1",
"resolved": "https://registry.npmjs.org/entities/-/entities-7.0.1.tgz",
"integrity": "sha512-TWrgLOFUQTH994YUyl1yT4uyavY5nNB5muff+RtWaqNVCAK408b5ZnnbNAUEWLTCpum9w6arT70i1XdQ4UeOPA==",
"license": "BSD-2-Clause",
"engines": {
"node": ">=0.12"
},
"funding": {
"url": "https://github.com/fb55/entities?sponsor=1"
}
},
"node_modules/http-errors": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/http-errors/-/http-errors-2.0.1.tgz",
@ -10228,6 +10714,15 @@
"jiti": "lib/jiti-cli.mjs"
}
},
"node_modules/js-tiktoken": {
"version": "1.0.21",
"resolved": "https://registry.npmjs.org/js-tiktoken/-/js-tiktoken-1.0.21.tgz",
"integrity": "sha512-biOj/6M5qdgx5TKjDnFT1ymSpM5tbd3ylwDtrQvFQSu0Z7bBYko2dF+W/aUkXUPuk6IVpRxk/3Q2sHOzGlS36g==",
"license": "MIT",
"dependencies": {
"base64-js": "^1.5.1"
}
},
"node_modules/js-tokens": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
@ -10332,6 +10827,65 @@
"json-buffer": "3.0.1"
}
},
"node_modules/langsmith": {
"version": "0.5.7",
"resolved": "https://registry.npmjs.org/langsmith/-/langsmith-0.5.7.tgz",
"integrity": "sha512-FjYf2oBGMoSXnaT4SRaFguIiGJaonZ5VKWKJDPl9awLZjz2RkN29AcQWceecSINVzXzTvtRWPOjAWT+XggqNNg==",
"license": "MIT",
"dependencies": {
"@types/uuid": "^10.0.0",
"chalk": "^5.6.2",
"console-table-printer": "^2.12.1",
"p-queue": "^6.6.2",
"semver": "^7.6.3",
"uuid": "^10.0.0"
},
"peerDependencies": {
"@opentelemetry/api": "*",
"@opentelemetry/exporter-trace-otlp-proto": "*",
"@opentelemetry/sdk-trace-base": "*",
"openai": "*"
},
"peerDependenciesMeta": {
"@opentelemetry/api": {
"optional": true
},
"@opentelemetry/exporter-trace-otlp-proto": {
"optional": true
},
"@opentelemetry/sdk-trace-base": {
"optional": true
},
"openai": {
"optional": true
}
}
},
"node_modules/langsmith/node_modules/chalk": {
"version": "5.6.2",
"resolved": "https://registry.npmjs.org/chalk/-/chalk-5.6.2.tgz",
"integrity": "sha512-7NzBL0rN6fMUW+f7A6Io4h40qQlG+xGmtMxfbnH/K7TAtt8JQWVQK+6g0UXKMeVJoyV5EkkNsErQ8pVD3bLHbA==",
"license": "MIT",
"engines": {
"node": "^12.17.0 || ^14.13 || >=16.0.0"
},
"funding": {
"url": "https://github.com/chalk/chalk?sponsor=1"
}
},
"node_modules/langsmith/node_modules/uuid": {
"version": "10.0.0",
"resolved": "https://registry.npmjs.org/uuid/-/uuid-10.0.0.tgz",
"integrity": "sha512-8XkAphELsDnEGrDxUOHB3RGvXz6TeuYSGEZBOjtTtPm2lwhGBjLgOzLHB63IUWfBpNucQjND6d3AOudO+H3RWQ==",
"funding": [
"https://github.com/sponsors/broofa",
"https://github.com/sponsors/ctavan"
],
"license": "MIT",
"bin": {
"uuid": "dist/bin/uuid"
}
},
"node_modules/leven": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/leven/-/leven-3.1.0.tgz",
@ -10759,6 +11313,15 @@
"integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==",
"license": "MIT"
},
"node_modules/mustache": {
"version": "4.2.0",
"resolved": "https://registry.npmjs.org/mustache/-/mustache-4.2.0.tgz",
"integrity": "sha512-71ippSywq5Yb7/tVYyGbkBggbU8H3u5Rz56fH60jGFgr8uHwxs+aSKeqmluIVzM0m0kB7xQjKS6qPfd0b2ZoqQ==",
"license": "MIT",
"bin": {
"mustache": "bin/mustache"
}
},
"node_modules/mute-stream": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/mute-stream/-/mute-stream-2.0.0.tgz",
@ -10902,6 +11465,18 @@
"node": ">=8"
}
},
"node_modules/nth-check": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz",
"integrity": "sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==",
"license": "BSD-2-Clause",
"dependencies": {
"boolbase": "^1.0.0"
},
"funding": {
"url": "https://github.com/fb55/nth-check?sponsor=1"
}
},
"node_modules/nypm": {
"version": "0.6.5",
"resolved": "https://registry.npmjs.org/nypm/-/nypm-0.6.5.tgz",
@ -11043,6 +11618,15 @@
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/p-finally": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/p-finally/-/p-finally-1.0.0.tgz",
"integrity": "sha512-LICb2p9CB7FS+0eR1oqWnHhp0FljGLZCWBE9aix0Uye9W8LTQPwMTYVGWQWIw9RdQiDg4+epXQODwIYJtSJaow==",
"license": "MIT",
"engines": {
"node": ">=4"
}
},
"node_modules/p-limit": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/p-limit/-/p-limit-3.1.0.tgz",
@ -11075,6 +11659,40 @@
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/p-queue": {
"version": "6.6.2",
"resolved": "https://registry.npmjs.org/p-queue/-/p-queue-6.6.2.tgz",
"integrity": "sha512-RwFpb72c/BhQLEXIZ5K2e+AhgNVmIejGlTgiB9MzZ0e93GRvqZ7uSi0dvRF7/XIXDeNkra2fNHBxTyPDGySpjQ==",
"license": "MIT",
"dependencies": {
"eventemitter3": "^4.0.4",
"p-timeout": "^3.2.0"
},
"engines": {
"node": ">=8"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/p-queue/node_modules/eventemitter3": {
"version": "4.0.7",
"resolved": "https://registry.npmjs.org/eventemitter3/-/eventemitter3-4.0.7.tgz",
"integrity": "sha512-8guHBZCwKnFhYdHr2ysuRWErTwhoN2X8XELRlrRwpmfeY2jjuUN4taQMsULKUVo1K4DvZl+0pgfyoysHxvmvEw==",
"license": "MIT"
},
"node_modules/p-timeout": {
"version": "3.2.0",
"resolved": "https://registry.npmjs.org/p-timeout/-/p-timeout-3.2.0.tgz",
"integrity": "sha512-rhIwUycgwwKcP9yTOOFK/AKsAopjjCakVqLHePO3CC6Mir1Z99xT+R63jZxAT5lFZLa2inS5h+ZS2GvR99/FBg==",
"license": "MIT",
"dependencies": {
"p-finally": "^1.0.0"
},
"engines": {
"node": ">=8"
}
},
"node_modules/p-try": {
"version": "2.2.0",
"resolved": "https://registry.npmjs.org/p-try/-/p-try-2.2.0.tgz",
@ -11124,6 +11742,55 @@
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/parse5": {
"version": "7.3.0",
"resolved": "https://registry.npmjs.org/parse5/-/parse5-7.3.0.tgz",
"integrity": "sha512-IInvU7fabl34qmi9gY8XOVxhYyMyuH2xUNpb2q8/Y+7552KlejkRvqvD19nMoUW/uQGGbqNpA6Tufu5FL5BZgw==",
"license": "MIT",
"dependencies": {
"entities": "^6.0.0"
},
"funding": {
"url": "https://github.com/inikulin/parse5?sponsor=1"
}
},
"node_modules/parse5-htmlparser2-tree-adapter": {
"version": "7.1.0",
"resolved": "https://registry.npmjs.org/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-7.1.0.tgz",
"integrity": "sha512-ruw5xyKs6lrpo9x9rCZqZZnIUntICjQAd0Wsmp396Ul9lN/h+ifgVV1x1gZHi8euej6wTfpqX8j+BFQxF0NS/g==",
"license": "MIT",
"dependencies": {
"domhandler": "^5.0.3",
"parse5": "^7.0.0"
},
"funding": {
"url": "https://github.com/inikulin/parse5?sponsor=1"
}
},
"node_modules/parse5-parser-stream": {
"version": "7.1.2",
"resolved": "https://registry.npmjs.org/parse5-parser-stream/-/parse5-parser-stream-7.1.2.tgz",
"integrity": "sha512-JyeQc9iwFLn5TbvvqACIF/VXG6abODeB3Fwmv/TGdLk2LfbWkaySGY72at4+Ty7EkPZj854u4CrICqNk2qIbow==",
"license": "MIT",
"dependencies": {
"parse5": "^7.0.0"
},
"funding": {
"url": "https://github.com/inikulin/parse5?sponsor=1"
}
},
"node_modules/parse5/node_modules/entities": {
"version": "6.0.1",
"resolved": "https://registry.npmjs.org/entities/-/entities-6.0.1.tgz",
"integrity": "sha512-aN97NXWF6AWBTahfVOIrB/NShkzi5H7F9r1s9mD3cDj4Ko5f2qhhVoYMibXF7GlLveb/D2ioWay8lxI97Ven3g==",
"license": "BSD-2-Clause",
"engines": {
"node": ">=0.12"
},
"funding": {
"url": "https://github.com/fb55/entities?sponsor=1"
}
},
"node_modules/parseurl": {
"version": "1.3.3",
"resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz",
@ -11217,6 +11884,38 @@
"devOptional": true,
"license": "MIT"
},
"node_modules/pdf-parse": {
"version": "2.4.5",
"resolved": "https://registry.npmjs.org/pdf-parse/-/pdf-parse-2.4.5.tgz",
"integrity": "sha512-mHU89HGh7v+4u2ubfnevJ03lmPgQ5WU4CxAVmTSh/sxVTEDYd1er/dKS/A6vg77NX47KTEoihq8jZBLr8Cxuwg==",
"license": "Apache-2.0",
"dependencies": {
"@napi-rs/canvas": "0.1.80",
"pdfjs-dist": "5.4.296"
},
"bin": {
"pdf-parse": "bin/cli.mjs"
},
"engines": {
"node": ">=20.16.0 <21 || >=22.3.0"
},
"funding": {
"type": "github",
"url": "https://github.com/sponsors/mehmet-kozan"
}
},
"node_modules/pdfjs-dist": {
"version": "5.4.296",
"resolved": "https://registry.npmjs.org/pdfjs-dist/-/pdfjs-dist-5.4.296.tgz",
"integrity": "sha512-DlOzet0HO7OEnmUmB6wWGJrrdvbyJKftI1bhMitK7O2N8W2gc757yyYBbINy9IDafXAV9wmKr9t7xsTaNKRG5Q==",
"license": "Apache-2.0",
"engines": {
"node": ">=20.16.0 || >=22.3.0"
},
"optionalDependencies": {
"@napi-rs/canvas": "^0.1.80"
}
},
"node_modules/perfect-debounce": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/perfect-debounce/-/perfect-debounce-1.0.0.tgz",
@ -12013,7 +12712,6 @@
"version": "7.7.4",
"resolved": "https://registry.npmjs.org/semver/-/semver-7.7.4.tgz",
"integrity": "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA==",
"dev": true,
"license": "ISC",
"bin": {
"semver": "bin/semver.js"
@ -12234,6 +12932,12 @@
"url": "https://github.com/sponsors/isaacs"
}
},
"node_modules/simple-wcswidth": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/simple-wcswidth/-/simple-wcswidth-1.1.2.tgz",
"integrity": "sha512-j7piyCjAeTDSjzTSQ7DokZtMNwNlEAyxqSZeCS+CXH7fJ4jx3FuJ/mTW3mE+6JLs4VJBbcll0Kjn+KXI5t21Iw==",
"license": "MIT"
},
"node_modules/slash": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/slash/-/slash-3.0.0.tgz",
@ -13244,6 +13948,15 @@
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/undici": {
"version": "7.22.0",
"resolved": "https://registry.npmjs.org/undici/-/undici-7.22.0.tgz",
"integrity": "sha512-RqslV2Us5BrllB+JeiZnK4peryVTndy9Dnqq62S3yYRRTj0tFQCwEniUy2167skdGOy3vqRzEvl1Dm4sV2ReDg==",
"license": "MIT",
"engines": {
"node": ">=20.18.1"
}
},
"node_modules/undici-types": {
"version": "6.21.0",
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz",
@ -13619,6 +14332,31 @@
"url": "https://opencollective.com/webpack"
}
},
"node_modules/whatwg-encoding": {
"version": "3.1.1",
"resolved": "https://registry.npmjs.org/whatwg-encoding/-/whatwg-encoding-3.1.1.tgz",
"integrity": "sha512-6qN4hJdMwfYBtE3YBTTHhoeuUrDBPZmbQaxWAqSALV/MeEnR5z1xd8UKud2RAkFoPkmB+hli1TZSnyi84xz1vQ==",
"deprecated": "Use @exodus/bytes instead for a more spec-conformant and faster implementation",
"license": "MIT",
"dependencies": {
"iconv-lite": "0.6.3"
},
"engines": {
"node": ">=18"
}
},
"node_modules/whatwg-encoding/node_modules/iconv-lite": {
"version": "0.6.3",
"resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz",
"integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==",
"license": "MIT",
"dependencies": {
"safer-buffer": ">= 2.1.2 < 3.0.0"
},
"engines": {
"node": ">=0.10.0"
}
},
"node_modules/whatwg-mimetype": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-4.0.0.tgz",

View file

@ -25,6 +25,8 @@
"@as-integrations/express5": "^1.1.2",
"@aws-sdk/client-s3": "^3.1000.0",
"@aws-sdk/s3-request-presigner": "^3.1000.0",
"@langchain/core": "^1.1.29",
"@langchain/google-genai": "^2.1.22",
"@nestjs/apollo": "^13.2.4",
"@nestjs/common": "^11.1.14",
"@nestjs/config": "^4.0.3",
@ -33,11 +35,13 @@
"@nestjs/platform-express": "^11.0.1",
"@prisma/adapter-pg": "^7.4.2",
"@prisma/client": "^7.4.2",
"cheerio": "^1.2.0",
"class-transformer": "^0.5.1",
"class-validator": "^0.15.1",
"graphql": "^16.13.0",
"graphql-type-json": "^0.3.2",
"graphql-upload-ts": "^2.1.3",
"pdf-parse": "^2.4.5",
"pg": "^8.19.0",
"reflect-metadata": "^0.2.2",
"rxjs": "^7.8.2",

View file

@ -0,0 +1,98 @@
import { Injectable, BadRequestException } from '@nestjs/common';
import * as cheerio from 'cheerio';
import { z } from 'zod';
import { ChatGoogleGenerativeAI } from '@langchain/google-genai';
import { PDFParse } from 'pdf-parse';
const CaseMetadataSchema = z.object({
title: z.string().describe("The official title or heading of the case."),
decisionType: z.string().describe("The formal category of the ruling (e.g., Judgment, Final Decision, Stay of Execution, or Administrative Order etc.)."),
decisionDate: z.string().describe("The date the decision was rendered"),
office: z.string().describe("The specific department, chamber, or administrative division within the court system."),
court: z.string().describe("The court's full official name of the adjudicating body. Do not use relative references; instead, identify the institution from the document header, the formal seal, or the specific treaty/statute that establishes its jurisdiction."),
caseNumber: z.string().describe(
"The formal administrative or legal identifier assigned to the matter."
),
summary: z.string().describe(
"A concise brief of the dispute and the final determination."
),
});
export interface ExtractedData extends z.infer<typeof CaseMetadataSchema> {
metadata?: any;
decisionDateAsDate?: Date;
}
@Injectable()
export class ParserService {
static async parse(buffer: Buffer, mimetype: string, job?: any): Promise<ExtractedData> {
const parser = new ParserService();
return parser.process(buffer, mimetype, job);
}
async process(buffer: Buffer, mimetype: string, job?: any): Promise<ExtractedData> {
let text = '';
if (mimetype === 'application/pdf') {
const parser = new PDFParse({ data: buffer as any });
const textResult = await parser.getText();
text = textResult.text;
await parser.destroy();
} else if (mimetype === 'text/html') {
const $ = cheerio.load(buffer.toString());
// Strip elements that add noise — scripts, styles, navigation etc.
$('script, style, nav, footer, header, noscript, iframe').remove();
text = $('body').text().replace(/\s+/g, ' ').trim() || buffer.toString();
} else {
throw new BadRequestException(`Unsupported file type: ${mimetype}`);
}
if (job) await job.updateProgress('🤖 Analyzing case structure with Gemini AI...');
return this.extractWithAI(text);
}
private async extractWithAI(text: string): Promise<ExtractedData> {
const apiKey = process.env.GOOGLE_API_KEY;
if (!apiKey) {
throw new Error('GOOGLE_API_KEY not configured.');
}
const model = new ChatGoogleGenerativeAI({
apiKey,
model: 'gemini-2.5-flash',
temperature: 0,
});
const structuredLlm = model.withStructuredOutput(CaseMetadataSchema);
// I read that the most important part of the document for metadata extraction is the
// start/end of the document. But I am not a lawyer and uncertain so decided not to
// risk it. In the end I just set a hard cap at 500k characters to avoid abuse.
const maxChars = 500_000;
const documentText = text.length > maxChars ? text.substring(0, maxChars) : text;
try {
const result = await structuredLlm.invoke(`
You are a specialized legal document processing assistant.
Extract the case metadata from the following court decision text.
All fields should be extracted without exception.
TEXT TO ANALYZE:
${documentText}
`);
return {
...result,
decisionDateAsDate: result.decisionDate ? new Date(result.decisionDate) : undefined,
metadata: {
rawLength: text.length,
model: 'gemini-2.5-flash',
extractedAt: new Date().toISOString(),
},
};
} catch (error) {
throw new Error(`AI Extraction failed: ${error.message}`);
}
}
}