From 44415babdb61584de34f7ea495956cf59a779083 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=20David=20M=C3=BCller?= <57191390+t0mdavid-m@users.noreply.github.com> Date: Fri, 20 Feb 2026 15:24:08 +0200 Subject: [PATCH 01/11] Add Matomo analytics integration with GDPR consent support (#341) * Add Matomo Tag Manager as third analytics tracking mode Adds Matomo Tag Manager support alongside existing Google Analytics and Piwik Pro integrations. Includes settings.json configuration (url + tag), build-time script injection via hook-analytics.py, Klaro GDPR consent banner integration, and runtime consent granting via MTM data layer API. https://claude.ai/code/session_0165AXHkmRZ6bx23n7Tbyz8h * Fix Matomo Tag Manager snippet to match official docs - Accept full container JS URL instead of separate url + tag fields, supporting both self-hosted and Matomo Cloud URL patterns - Match the official snippet: var _mtm alias, _mtm.push shorthand - Remove redundant type="text/javascript" attribute - Remove unused "tag" field from settings.json https://claude.ai/code/session_0165AXHkmRZ6bx23n7Tbyz8h * Split Matomo config into base url + tag fields Separate the Matomo setting into `url` (base URL, e.g. https://cdn.matomo.cloud/openms.matomo.cloud) and `tag` (container ID, e.g. yDGK8bfY), consistent with how other providers use a tag field. The script constructs the full path: {url}/container_{tag}.js https://claude.ai/code/session_0165AXHkmRZ6bx23n7Tbyz8h * install matomo tag --------- Co-authored-by: Claude --- gdpr_consent/dist/bundle.js | 2 +- gdpr_consent/src/main.ts | 10 ++++++++++ hooks/hook-analytics.py | 21 +++++++++++++++++++++ settings.json | 7 ++++++- src/common/captcha_.py | 5 +++-- src/common/common.py | 17 +++++++++++++++++ 6 files changed, 58 insertions(+), 4 deletions(-) diff --git a/gdpr_consent/dist/bundle.js b/gdpr_consent/dist/bundle.js index 2d2d814..8614457 100644 --- a/gdpr_consent/dist/bundle.js +++ b/gdpr_consent/dist/bundle.js @@ -235,7 +235,7 @@ eval("__webpack_require__.r(__webpack_exports__);\n/* harmony export */ __webpac /***/ ((__unused_webpack_module, __webpack_exports__, __webpack_require__) => { "use strict"; -eval("__webpack_require__.r(__webpack_exports__);\n/* harmony import */ var streamlit_component_lib__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! streamlit-component-lib */ \"./node_modules/streamlit-component-lib/dist/index.js\");\nvar __awaiter = (undefined && undefined.__awaiter) || function (thisArg, _arguments, P, generator) {\n function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }\n return new (P || (P = Promise))(function (resolve, reject) {\n function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }\n function rejected(value) { try { step(generator[\"throw\"](value)); } catch (e) { reject(e); } }\n function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }\n step((generator = generator.apply(thisArg, _arguments || [])).next());\n });\n};\nvar __generator = (undefined && undefined.__generator) || function (thisArg, body) {\n var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g;\n return g = { next: verb(0), \"throw\": verb(1), \"return\": verb(2) }, typeof Symbol === \"function\" && (g[Symbol.iterator] = function() { return this; }), g;\n function verb(n) { return function (v) { return step([n, v]); }; }\n function step(op) {\n if (f) throw new TypeError(\"Generator is already executing.\");\n while (g && (g = 0, op[0] && (_ = 0)), _) try {\n if (f = 1, y && (t = op[0] & 2 ? y[\"return\"] : op[0] ? y[\"throw\"] || ((t = y[\"return\"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t;\n if (y = 0, t) op = [op[0] & 2, t.value];\n switch (op[0]) {\n case 0: case 1: t = op; break;\n case 4: _.label++; return { value: op[1], done: false };\n case 5: _.label++; y = op[1]; op = [0]; continue;\n case 7: op = _.ops.pop(); _.trys.pop(); continue;\n default:\n if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; }\n if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; }\n if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; }\n if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; }\n if (t[2]) _.ops.pop();\n _.trys.pop(); continue;\n }\n op = body.call(thisArg, _);\n } catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; }\n if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true };\n }\n};\n\n// Defines the configuration for Klaro\nvar klaroConfig = {\n mustConsent: true,\n acceptAll: true,\n services: []\n};\n// This will make klaroConfig globally accessible\nwindow.klaroConfig = klaroConfig;\n// Function to safely access the Klaro manager\nfunction getKlaroManager() {\n var _a;\n return ((_a = window.klaro) === null || _a === void 0 ? void 0 : _a.getManager) ? window.klaro.getManager() : null;\n}\n// Waits until Klaro Manager is available\nfunction waitForKlaroManager() {\n return __awaiter(this, arguments, void 0, function (maxWaitTime, interval) {\n var startTime, klaroManager;\n if (maxWaitTime === void 0) { maxWaitTime = 5000; }\n if (interval === void 0) { interval = 100; }\n return __generator(this, function (_a) {\n switch (_a.label) {\n case 0:\n startTime = Date.now();\n _a.label = 1;\n case 1:\n if (!(Date.now() - startTime < maxWaitTime)) return [3 /*break*/, 3];\n klaroManager = getKlaroManager();\n if (klaroManager) {\n return [2 /*return*/, klaroManager];\n }\n return [4 /*yield*/, new Promise(function (resolve) { return setTimeout(resolve, interval); })];\n case 2:\n _a.sent();\n return [3 /*break*/, 1];\n case 3: throw new Error(\"Klaro manager did not become available within the allowed time.\");\n }\n });\n });\n}\n// Helper function to handle unknown errors\nfunction handleError(error) {\n if (error instanceof Error) {\n console.error(\"Error:\", error.message);\n }\n else {\n console.error(\"Unknown error:\", error);\n }\n}\n// Tracking was accepted\nfunction callback() {\n return __awaiter(this, void 0, void 0, function () {\n var manager, return_vals, _i, _a, service, error_1;\n return __generator(this, function (_b) {\n switch (_b.label) {\n case 0:\n _b.trys.push([0, 2, , 3]);\n return [4 /*yield*/, waitForKlaroManager()];\n case 1:\n manager = _b.sent();\n if (manager.confirmed) {\n return_vals = {};\n for (_i = 0, _a = klaroConfig.services; _i < _a.length; _i++) {\n service = _a[_i];\n return_vals[service.name] = manager.getConsent(service.name);\n }\n streamlit_component_lib__WEBPACK_IMPORTED_MODULE_0__.Streamlit.setComponentValue(return_vals);\n }\n return [3 /*break*/, 3];\n case 2:\n error_1 = _b.sent();\n handleError(error_1);\n return [3 /*break*/, 3];\n case 3: return [2 /*return*/];\n }\n });\n });\n}\n// Stores if the component has been rendered before\nvar rendered = false;\nfunction onRender(event) {\n // Klaro does not work if embedded multiple times\n if (rendered) {\n return;\n }\n rendered = true;\n var data = event.detail;\n if (data.args['google_analytics']) {\n klaroConfig.services.push({\n name: 'google-analytics',\n cookies: [\n /^_ga(_.*)?/ // we delete the Google Analytics cookies if the user declines its use\n ],\n purposes: ['analytics'],\n onAccept: callback,\n onDecline: callback,\n });\n }\n if (data.args['piwik_pro']) {\n klaroConfig.services.push({\n name: 'piwik-pro',\n purposes: ['analytics'],\n onAccept: callback,\n onDecline: callback,\n });\n }\n // Create a new script element\n var script = document.createElement('script');\n // Set the necessary attributes\n script.defer = true;\n script.type = 'application/javascript';\n script.src = 'https://cdn.kiprotect.com/klaro/v0.7/klaro.js';\n // Set the klaro config\n script.setAttribute('data-config', 'klaroConfig');\n // Append the script to the head or body\n document.head.appendChild(script);\n}\n// Attach our `onRender` handler to Streamlit's render event.\nstreamlit_component_lib__WEBPACK_IMPORTED_MODULE_0__.Streamlit.events.addEventListener(streamlit_component_lib__WEBPACK_IMPORTED_MODULE_0__.Streamlit.RENDER_EVENT, onRender);\n// Tell Streamlit we're ready to start receiving data. We won't get our\n// first RENDER_EVENT until we call this function.\nstreamlit_component_lib__WEBPACK_IMPORTED_MODULE_0__.Streamlit.setComponentReady();\n// Finally, tell Streamlit to update the initial height.\nstreamlit_component_lib__WEBPACK_IMPORTED_MODULE_0__.Streamlit.setFrameHeight(1000);\n\n\n//# sourceURL=webpack://gdpr_consent/./src/main.ts?"); +eval("__webpack_require__.r(__webpack_exports__);\n/* harmony import */ var streamlit_component_lib__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! streamlit-component-lib */ \"./node_modules/streamlit-component-lib/dist/index.js\");\nvar __awaiter = (undefined && undefined.__awaiter) || function (thisArg, _arguments, P, generator) {\n function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }\n return new (P || (P = Promise))(function (resolve, reject) {\n function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }\n function rejected(value) { try { step(generator[\"throw\"](value)); } catch (e) { reject(e); } }\n function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }\n step((generator = generator.apply(thisArg, _arguments || [])).next());\n });\n};\nvar __generator = (undefined && undefined.__generator) || function (thisArg, body) {\n var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g;\n return g = { next: verb(0), \"throw\": verb(1), \"return\": verb(2) }, typeof Symbol === \"function\" && (g[Symbol.iterator] = function() { return this; }), g;\n function verb(n) { return function (v) { return step([n, v]); }; }\n function step(op) {\n if (f) throw new TypeError(\"Generator is already executing.\");\n while (g && (g = 0, op[0] && (_ = 0)), _) try {\n if (f = 1, y && (t = op[0] & 2 ? y[\"return\"] : op[0] ? y[\"throw\"] || ((t = y[\"return\"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t;\n if (y = 0, t) op = [op[0] & 2, t.value];\n switch (op[0]) {\n case 0: case 1: t = op; break;\n case 4: _.label++; return { value: op[1], done: false };\n case 5: _.label++; y = op[1]; op = [0]; continue;\n case 7: op = _.ops.pop(); _.trys.pop(); continue;\n default:\n if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; }\n if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; }\n if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; }\n if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; }\n if (t[2]) _.ops.pop();\n _.trys.pop(); continue;\n }\n op = body.call(thisArg, _);\n } catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; }\n if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true };\n }\n};\n\n// Defines the configuration for Klaro\nvar klaroConfig = {\n mustConsent: true,\n acceptAll: true,\n services: []\n};\n// This will make klaroConfig globally accessible\nwindow.klaroConfig = klaroConfig;\n// Function to safely access the Klaro manager\nfunction getKlaroManager() {\n var _a;\n return ((_a = window.klaro) === null || _a === void 0 ? void 0 : _a.getManager) ? window.klaro.getManager() : null;\n}\n// Waits until Klaro Manager is available\nfunction waitForKlaroManager() {\n return __awaiter(this, arguments, void 0, function (maxWaitTime, interval) {\n var startTime, klaroManager;\n if (maxWaitTime === void 0) { maxWaitTime = 5000; }\n if (interval === void 0) { interval = 100; }\n return __generator(this, function (_a) {\n switch (_a.label) {\n case 0:\n startTime = Date.now();\n _a.label = 1;\n case 1:\n if (!(Date.now() - startTime < maxWaitTime)) return [3 /*break*/, 3];\n klaroManager = getKlaroManager();\n if (klaroManager) {\n return [2 /*return*/, klaroManager];\n }\n return [4 /*yield*/, new Promise(function (resolve) { return setTimeout(resolve, interval); })];\n case 2:\n _a.sent();\n return [3 /*break*/, 1];\n case 3: throw new Error(\"Klaro manager did not become available within the allowed time.\");\n }\n });\n });\n}\n// Helper function to handle unknown errors\nfunction handleError(error) {\n if (error instanceof Error) {\n console.error(\"Error:\", error.message);\n }\n else {\n console.error(\"Unknown error:\", error);\n }\n}\n// Tracking was accepted\nfunction callback() {\n return __awaiter(this, void 0, void 0, function () {\n var manager, return_vals, _i, _a, service, error_1;\n return __generator(this, function (_b) {\n switch (_b.label) {\n case 0:\n _b.trys.push([0, 2, , 3]);\n return [4 /*yield*/, waitForKlaroManager()];\n case 1:\n manager = _b.sent();\n if (manager.confirmed) {\n return_vals = {};\n for (_i = 0, _a = klaroConfig.services; _i < _a.length; _i++) {\n service = _a[_i];\n return_vals[service.name] = manager.getConsent(service.name);\n }\n streamlit_component_lib__WEBPACK_IMPORTED_MODULE_0__.Streamlit.setComponentValue(return_vals);\n }\n return [3 /*break*/, 3];\n case 2:\n error_1 = _b.sent();\n handleError(error_1);\n return [3 /*break*/, 3];\n case 3: return [2 /*return*/];\n }\n });\n });\n}\n// Stores if the component has been rendered before\nvar rendered = false;\nfunction onRender(event) {\n // Klaro does not work if embedded multiple times\n if (rendered) {\n return;\n }\n rendered = true;\n var data = event.detail;\n if (data.args['google_analytics']) {\n klaroConfig.services.push({\n name: 'google-analytics',\n cookies: [\n /^_ga(_.*)?/ // we delete the Google Analytics cookies if the user declines its use\n ],\n purposes: ['analytics'],\n onAccept: callback,\n onDecline: callback,\n });\n }\n if (data.args['piwik_pro']) {\n klaroConfig.services.push({\n name: 'piwik-pro',\n purposes: ['analytics'],\n onAccept: callback,\n onDecline: callback,\n });\n }\n if (data.args['matomo']) {\n klaroConfig.services.push({\n name: 'matomo',\n purposes: ['analytics'],\n onAccept: callback,\n onDecline: callback,\n });\n }\n // Create a new script element\n var script = document.createElement('script');\n // Set the necessary attributes\n script.defer = true;\n script.type = 'application/javascript';\n script.src = 'https://cdn.kiprotect.com/klaro/v0.7/klaro.js';\n // Set the klaro config\n script.setAttribute('data-config', 'klaroConfig');\n // Append the script to the head or body\n document.head.appendChild(script);\n}\n// Attach our `onRender` handler to Streamlit's render event.\nstreamlit_component_lib__WEBPACK_IMPORTED_MODULE_0__.Streamlit.events.addEventListener(streamlit_component_lib__WEBPACK_IMPORTED_MODULE_0__.Streamlit.RENDER_EVENT, onRender);\n// Tell Streamlit we're ready to start receiving data. We won't get our\n// first RENDER_EVENT until we call this function.\nstreamlit_component_lib__WEBPACK_IMPORTED_MODULE_0__.Streamlit.setComponentReady();\n// Finally, tell Streamlit to update the initial height.\nstreamlit_component_lib__WEBPACK_IMPORTED_MODULE_0__.Streamlit.setFrameHeight(1000);\n\n\n//# sourceURL=webpack://gdpr_consent/./src/main.ts?"); /***/ }), diff --git a/gdpr_consent/src/main.ts b/gdpr_consent/src/main.ts index f7219ff..059fef8 100644 --- a/gdpr_consent/src/main.ts +++ b/gdpr_consent/src/main.ts @@ -114,6 +114,16 @@ function onRender(event: Event): void { } ) } + if (data.args['matomo']) { + klaroConfig.services.push( + { + name: 'matomo', + purposes: ['analytics'], + onAccept: callback, + onDecline: callback, + } + ) + } // Create a new script element var script = document.createElement('script') diff --git a/hooks/hook-analytics.py b/hooks/hook-analytics.py index 6b8b2da..c47f0c2 100644 --- a/hooks/hook-analytics.py +++ b/hooks/hook-analytics.py @@ -56,6 +56,21 @@ def piwik_pro_body(piwik_tag): """ +def matomo_head(matomo_url, matomo_tag): + return f""" + + + + """ + + if __name__ == '__main__': # Load configuration @@ -79,6 +94,12 @@ def piwik_pro_body(piwik_tag): piwik_tag = settings['analytics']['piwik-pro']['tag'] index = patch_body(index, piwik_pro_body(piwik_tag)) + # Configure matomo tag manager + if settings['analytics']['matomo']['enabled']: + matomo_url = settings['analytics']['matomo']['url'] + matomo_tag = settings['analytics']['matomo']['tag'] + index = patch_head(index, matomo_head(matomo_url, matomo_tag)) + # Save index.html with open(index_path, 'w') as f: f.write(index) \ No newline at end of file diff --git a/settings.json b/settings.json index 34189b8..e073396 100644 --- a/settings.json +++ b/settings.json @@ -9,8 +9,13 @@ "tag": "" }, "piwik-pro": { + "enabled": false, + "tag": "" + }, + "matomo": { "enabled": true, - "tag": "57690c44-d635-43b0-ab43-f8bd3064ca06" + "url": "https://cdn.matomo.cloud/openms.matomo.cloud", + "tag": "yDGK8bfY" } }, "online_deployment": false, diff --git a/src/common/captcha_.py b/src/common/captcha_.py index 5c7afd7..498b133 100644 --- a/src/common/captcha_.py +++ b/src/common/captcha_.py @@ -208,12 +208,13 @@ def captcha_control(): # Check if consent for tracking was given ga = st.session_state.settings['analytics']['google-analytics']['enabled'] pp = st.session_state.settings['analytics']['piwik-pro']['enabled'] - if (ga or pp) and (st.session_state.tracking_consent is None): + mt = st.session_state.settings['analytics']['matomo']['enabled'] + if (ga or pp or mt) and (st.session_state.tracking_consent is None): consent_component = st_components.declare_component("gdpr_consent", path=Path("gdpr_consent")) with st.spinner(): # Ask for consent st.session_state.tracking_consent = consent_component( - google_analytics=ga, piwik_pro=pp + google_analytics=ga, piwik_pro=pp, matomo=mt ) if st.session_state.tracking_consent is None: # No response by user yet diff --git a/src/common/common.py b/src/common/common.py index 4d24e76..b29d14f 100644 --- a/src/common/common.py +++ b/src/common/common.py @@ -405,6 +405,23 @@ def page_setup(page: str = "") -> dict[str, Any]: width=1, height=1, ) + if (st.session_state.settings["analytics"]["matomo"]["enabled"]) and ( + st.session_state.tracking_consent["matomo"] == True + ): + html( + """ + + + + + + """, + width=1, + height=1, + ) # Determine the workspace for the current session if ("workspace" not in st.session_state) or ( From c65d503b225c9eb9cb448dbf69d152cf32c1f500 Mon Sep 17 00:00:00 2001 From: Copilot <198982749+Copilot@users.noreply.github.com> Date: Tue, 3 Mar 2026 16:33:47 +0100 Subject: [PATCH 02/11] Remove duplicate `address` key in `.streamlit/config.toml` (#346) * Initial plan * fix: remove duplicate address entry in config.toml Co-authored-by: t0mdavid-m <57191390+t0mdavid-m@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: t0mdavid-m <57191390+t0mdavid-m@users.noreply.github.com> --- .streamlit/config.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/.streamlit/config.toml b/.streamlit/config.toml index daf71ea..e3d442e 100644 --- a/.streamlit/config.toml +++ b/.streamlit/config.toml @@ -8,7 +8,6 @@ developmentMode = false address = "0.0.0.0" maxUploadSize = 200 #MB port = 8501 # should be same as configured in deployment repo -address = "0.0.0.0" enableCORS = false enableXsrfProtection = false From c81fee60f318630ac628cea888a1ebaf452f1ec2 Mon Sep 17 00:00:00 2001 From: Copilot <198982749+Copilot@users.noreply.github.com> Date: Wed, 11 Mar 2026 18:40:41 +0100 Subject: [PATCH 03/11] Fix integration test failures caused by sys.modules pollution and shutil.SameFileError (#349) * Initial plan * Fix integration test failures: restore sys.modules mocks, handle SameFileError, update CI workflow Co-authored-by: t0mdavid-m <57191390+t0mdavid-m@users.noreply.github.com> * Remove unnecessary pyopenms mock from test_topp_workflow_parameter.py, simplify test_parameter_presets.py Co-authored-by: t0mdavid-m <57191390+t0mdavid-m@users.noreply.github.com> * Fix Windows build: correct site-packages path in cleanup step Co-authored-by: t0mdavid-m <57191390+t0mdavid-m@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: t0mdavid-m <57191390+t0mdavid-m@users.noreply.github.com> --- .../build-windows-executable-app.yaml | 2 +- .github/workflows/ci.yml | 9 ++---- test_gui.py | 10 +++++-- tests/test_parameter_presets.py | 28 ++++++++++++++----- tests/test_topp_workflow_parameter.py | 11 -------- 5 files changed, 32 insertions(+), 28 deletions(-) diff --git a/.github/workflows/build-windows-executable-app.yaml b/.github/workflows/build-windows-executable-app.yaml index ff5ea81..e6ef4a9 100644 --- a/.github/workflows/build-windows-executable-app.yaml +++ b/.github/workflows/build-windows-executable-app.yaml @@ -248,7 +248,7 @@ jobs: - name: Clean up unnecessary Python files shell: pwsh run: | - $sitePkgs = "py/Lib/site-packages" + $sitePkgs = "python-${{ env.PYTHON_VERSION }}/Lib/site-packages" # Remove __pycache__ directories Get-ChildItem -Path $sitePkgs -Recurse -Directory -Filter "__pycache__" | Remove-Item -Recurse -Force diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index dce98b6..7baf813 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -8,19 +8,14 @@ jobs: strategy: matrix: os: [ubuntu-latest] - # Requirements file generated with python=3.11 + # Requirements file generated with python=3.12; tested with python=3.11 python-version: ["3.11"] steps: - uses: actions/checkout@v4 - - uses: conda-incubator/setup-miniconda@v3 + - uses: actions/setup-python@v4 with: - activate-environment: openms python-version: ${{ matrix.python-version }} - channels: defaults,bioconda,conda-forge - - name: Install OpenMS - run: | - conda install openms -y - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/test_gui.py b/test_gui.py index 101865c..10f13b6 100644 --- a/test_gui.py +++ b/test_gui.py @@ -93,7 +93,10 @@ def test_view_raw_ms_data(launch, example): # Copy files from example-data/mzML to workspace mzML directory, add to selected files for f in Path("example-data", "mzML").glob("*.mzML"): - shutil.copy(f, mzML_dir) + try: + shutil.copy(f, mzML_dir) + except shutil.SameFileError: + pass # File already exists as a symlink to the same source (on Linux) launch.run() ## TODO: Figure out a way to select a spectrum to be displayed @@ -119,7 +122,10 @@ def test_run_workflow(launch, example): # Copy files from example-data/mzML to workspace mzML directory, add to selected files for f in Path("example-data", "mzML").glob("*.mzML"): - shutil.copy(f, mzML_dir) + try: + shutil.copy(f, mzML_dir) + except shutil.SameFileError: + pass # File already exists as a symlink to the same source (on Linux) launch.run() ## Select experiments to process diff --git a/tests/test_parameter_presets.py b/tests/test_parameter_presets.py index 5104abc..30e0fc0 100644 --- a/tests/test_parameter_presets.py +++ b/tests/test_parameter_presets.py @@ -16,19 +16,33 @@ PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.append(PROJECT_ROOT) -# Create mock for streamlit before importing ParameterManager +# Mock streamlit before importing ParameterManager so that the imported module +# uses a controllable `st.session_state` (a plain dict) instead of the real one, +# which requires a running Streamlit app context. This allows unit-testing +# ParameterManager's preset logic (apply_preset, clear_parameter_session_state) +# in isolation. mock_streamlit = MagicMock() mock_streamlit.session_state = {} -sys.modules['streamlit'] = mock_streamlit -# Create mock for pyopenms -mock_pyopenms = MagicMock() -mock_pyopenms.__version__ = "2.9.1" -sys.modules['pyopenms'] = mock_pyopenms +# Temporarily replace streamlit in sys.modules so that ParameterManager's +# `import streamlit as st` picks up the mock. Restore immediately after import +# so other test files (e.g., test_gui.py AppTest) get the real streamlit. +_original_streamlit = sys.modules.get('streamlit') +sys.modules['streamlit'] = mock_streamlit -# Now import after mocks are set up from src.workflow.ParameterManager import ParameterManager +if _original_streamlit is not None: + sys.modules['streamlit'] = _original_streamlit +else: + sys.modules.pop('streamlit', None) + +# Remove cached src.workflow modules that were imported with mocked streamlit so +# that AppTest (in test_gui.py) re-imports them fresh with the real package. +for _key in list(sys.modules.keys()): + if _key.startswith('src.workflow'): + sys.modules.pop(_key, None) + @pytest.fixture def temp_workflow_dir(): diff --git a/tests/test_topp_workflow_parameter.py b/tests/test_topp_workflow_parameter.py index fa4d75e..20ea1d2 100644 --- a/tests/test_topp_workflow_parameter.py +++ b/tests/test_topp_workflow_parameter.py @@ -14,11 +14,6 @@ PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.append(PROJECT_ROOT) -# Create mock for pyopenms to avoid dependency on actual OpenMS installation -mock_pyopenms = MagicMock() -mock_pyopenms.__version__ = "2.9.1" # Mock version for testing -sys.modules['pyopenms'] = mock_pyopenms - @pytest.fixture def mock_streamlit(): """Mock essential Streamlit components for testing parameter display.""" @@ -47,12 +42,6 @@ def mock_streamlit(): } -def test_mock_pyopenms(): - """Verify that pyopenms mock is working correctly.""" - import pyopenms - assert hasattr(pyopenms, '__version__') - - def test_topp_parameter_correctness(): """Test that TOPP parameters are displayed with correct values.""" # Define expected parameters with values From fb2fe67a2c392abd53a882d07f953dd7a2289957 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=20David=20M=C3=BCller?= <57191390+t0mdavid-m@users.noreply.github.com> Date: Sat, 14 Mar 2026 19:21:22 +0100 Subject: [PATCH 04/11] Remove server address from bundled config.toml for Windows installer (#351) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On Windows, 0.0.0.0 is not a valid connect address β€” the browser fails to open http://0.0.0.0:8501. By removing the address entry from the bundled .streamlit/config.toml, Streamlit defaults to localhost, which works correctly for local deployments. Docker deployments are unaffected as they pass --server.address 0.0.0.0 on the command line. https://claude.ai/code/session_016amsLCZeFogTksmtk1geb5 Co-authored-by: Claude --- .github/workflows/build-windows-executable-app.yaml | 4 ++++ .github/workflows/test-win-exe-w-embed-py.yaml | 4 ++++ docs/win_exe_with_embed_py.md | 6 ++++++ docs/win_exe_with_pyinstaller.md | 6 ++++++ 4 files changed, 20 insertions(+) diff --git a/.github/workflows/build-windows-executable-app.yaml b/.github/workflows/build-windows-executable-app.yaml index e6ef4a9..cc2e23c 100644 --- a/.github/workflows/build-windows-executable-app.yaml +++ b/.github/workflows/build-windows-executable-app.yaml @@ -311,6 +311,10 @@ jobs: Copy-Item "openms-bin/${file}.exe" -Destination "streamlit_exe/${file}.exe" } + - name: Remove server address for local Windows deployment + run: | + (Get-Content streamlit_exe/.streamlit/config.toml) -notmatch '^address' | Set-Content streamlit_exe/.streamlit/config.toml + - name: Generate Readme.txt shell: bash run: | diff --git a/.github/workflows/test-win-exe-w-embed-py.yaml b/.github/workflows/test-win-exe-w-embed-py.yaml index 57a588b..b543d0e 100644 --- a/.github/workflows/test-win-exe-w-embed-py.yaml +++ b/.github/workflows/test-win-exe-w-embed-py.yaml @@ -62,6 +62,10 @@ jobs: cp default-parameters.json streamlit_exe cp ${{ env.APP_NAME }}.bat streamlit_exe + - name: Remove server address for local Windows deployment + run: | + (Get-Content streamlit_exe/.streamlit/config.toml) -notmatch '^address' | Set-Content streamlit_exe/.streamlit/config.toml + - name: Generate Readme.txt shell: bash run: | diff --git a/docs/win_exe_with_embed_py.md b/docs/win_exe_with_embed_py.md index 8852ba2..cb3d054 100644 --- a/docs/win_exe_with_embed_py.md +++ b/docs/win_exe_with_embed_py.md @@ -96,6 +96,12 @@ python -m pip install -r requirements.txt --target python-3.11.9/Lib/site-packag cp app.py ../streamlit_exe ``` +3. Remove the server address from the bundled config to use `localhost` (default) instead of `0.0.0.0`, which doesn't work as a connect address on Windows: + + ```powershell + (Get-Content streamlit_exe/.streamlit/config.toml) -notmatch '^address' | Set-Content streamlit_exe/.streamlit/config.toml + ``` + #### πŸš€ After successfully completing all these steps, the Streamlit app will be available by running the run_app.bat file. :pencil: You can still change the configuration of Streamlit app with .streamlit/config.toml file, e.g., provide a different port, change upload size, etc. diff --git a/docs/win_exe_with_pyinstaller.md b/docs/win_exe_with_pyinstaller.md index 566f2a4..7112691 100644 --- a/docs/win_exe_with_pyinstaller.md +++ b/docs/win_exe_with_pyinstaller.md @@ -103,6 +103,12 @@ cp -r src dist/src cp -r assets dist/assets cp app.py dist/ cp presets.json dist/ +``` + +Remove the server address from the bundled config so Streamlit uses `localhost` (default) instead of `0.0.0.0`, which doesn't work as a connect address on Windows: + +```powershell +(Get-Content dist/.streamlit/config.toml) -notmatch '^address' | Set-Content dist/.streamlit/config.toml ``` From 128da6dcd4eda6350cd5871d8369c5e15651b53f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=20David=20M=C3=BCller?= <57191390+t0mdavid-m@users.noreply.github.com> Date: Sat, 14 Mar 2026 20:51:01 +0100 Subject: [PATCH 05/11] reenable cross origin protection --- .streamlit/config.toml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.streamlit/config.toml b/.streamlit/config.toml index e3d442e..00c6abb 100644 --- a/.streamlit/config.toml +++ b/.streamlit/config.toml @@ -8,8 +8,6 @@ developmentMode = false address = "0.0.0.0" maxUploadSize = 200 #MB port = 8501 # should be same as configured in deployment repo -enableCORS = false -enableXsrfProtection = false [theme] From 5949251d0e6843237ba8122df1de80ce26f5329d Mon Sep 17 00:00:00 2001 From: Tom David Mueller Date: Tue, 7 Apr 2026 13:31:34 +0200 Subject: [PATCH 06/11] Fix tool instance name handling and move custom defaults to params.json MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Resolves the issue where multiple instances of the same TOPP tool could not have independent custom defaults. The .ini file (keyed by tool name) only supported one set of defaults β€” the first instance's defaults were baked in and subsequent instances were silently ignored. Changes: - Add get_merged_params() to ParameterManager for centralized three-layer parameter resolution (ini defaults < _defaults < user overrides) - Replace .ini file mutation with _defaults seeding in params.json, keyed by tool instance name - Update save_parameters() to compare against _defaults when determining which values are non-default - Update run_topp() and non_default_params_summary() to use merged params - Resolve instance names to real tool names across save_parameters, input_TOPP, run_topp, and get_topp_parameters --- .streamlit/config.toml | 2 + src/workflow/CommandExecutor.py | 32 +-- src/workflow/ParameterManager.py | 72 ++++-- src/workflow/StreamlitUI.py | 78 ++++--- tests/test_parameter_defaults.py | 363 +++++++++++++++++++++++++++++++ tests/test_tool_instance_name.py | 268 +++++++++++++++++++++++ 6 files changed, 756 insertions(+), 59 deletions(-) create mode 100644 tests/test_parameter_defaults.py create mode 100644 tests/test_tool_instance_name.py diff --git a/.streamlit/config.toml b/.streamlit/config.toml index 00c6abb..e3d442e 100644 --- a/.streamlit/config.toml +++ b/.streamlit/config.toml @@ -8,6 +8,8 @@ developmentMode = false address = "0.0.0.0" maxUploadSize = 200 #MB port = 8501 # should be same as configured in deployment repo +enableCORS = false +enableXsrfProtection = false [theme] diff --git a/src/workflow/CommandExecutor.py b/src/workflow/CommandExecutor.py index 86f265b..bfea082 100644 --- a/src/workflow/CommandExecutor.py +++ b/src/workflow/CommandExecutor.py @@ -216,7 +216,7 @@ def read_stderr(): stdout_thread.join() stderr_thread.join() - def run_topp(self, tool: str, input_output: dict, custom_params: dict = {}) -> bool: + def run_topp(self, tool: str, input_output: dict, custom_params: dict = {}, tool_instance_name: str = None) -> bool: """ Constructs and executes commands for the specified tool OpenMS TOPP tool based on the given input and output configurations. Ensures that all input/output file lists @@ -234,6 +234,9 @@ def run_topp(self, tool: str, input_output: dict, custom_params: dict = {}) -> b tool (str): The executable name or path of the tool. input_output (dict): A dictionary specifying the input/output parameter names (as key) and their corresponding file paths (as value). custom_params (dict): A dictionary of custom parameters to pass to the tool. + tool_instance_name (str, optional): A unique instance name for this tool + invocation, used for parameter lookup when multiple instances of the + same tool exist. If not provided, defaults to the tool name. Returns: bool: True if all commands succeeded, False if any failed. @@ -242,6 +245,8 @@ def run_topp(self, tool: str, input_output: dict, custom_params: dict = {}) -> b ValueError: If the lengths of input/output file lists are inconsistent, except for single string inputs. """ + # Use tool_instance_name for parameter lookup, fall back to tool name + params_key = tool_instance_name if tool_instance_name else tool # check input: any input lists must be same length, other items can be a single string # e.g. input_mzML : [list of n mzML files], output_featureXML : [list of n featureXML files], input_database : database.tsv io_lengths = [len(v) for v in input_output.values() if len(v) > 1] @@ -261,8 +266,8 @@ def run_topp(self, tool: str, input_output: dict, custom_params: dict = {}) -> b commands = [] - # Load parameters for non-defaults - params = self.parameter_manager.get_parameters_from_json() + # Load merged parameters (_defaults + user overrides) for this tool instance + merged_params = self.parameter_manager.get_merged_params(params_key) # Construct commands for each process for i in range(n_processes): command = [tool] @@ -281,17 +286,16 @@ def run_topp(self, tool: str, input_output: dict, custom_params: dict = {}) -> b # standard case, files was a list of strings, take the file name at index else: command += [value[i]] - # Add non-default TOPP tool parameters - if tool in params.keys(): - for k, v in params[tool].items(): - command += [f"-{k}"] - # Skip only empty strings (pass flag with no value) - # Note: 0 and 0.0 are valid values, so use explicit check - if v != "" and v is not None: - if isinstance(v, str) and "\n" in v: - command += v.split("\n") - else: - command += [str(v)] + # Add merged TOPP tool parameters (_defaults + user overrides) + for k, v in merged_params.items(): + command += [f"-{k}"] + # Skip only empty strings (pass flag with no value) + # Note: 0 and 0.0 are valid values, so use explicit check + if v != "" and v is not None: + if isinstance(v, str) and "\n" in v: + command += v.split("\n") + else: + command += [str(v)] # Add custom parameters for k, v in custom_params.items(): command += [f"-{k}"] diff --git a/src/workflow/ParameterManager.py b/src/workflow/ParameterManager.py index b0c3626..c4eaf3e 100644 --- a/src/workflow/ParameterManager.py +++ b/src/workflow/ParameterManager.py @@ -65,7 +65,7 @@ def save_parameters(self) -> None: # Advanced parameters are only in session state if the view is active json_params = self.get_parameters_from_json() | json_params - # get a list of TOPP tools which are in session state + # get a list of TOPP tools (or tool instance names) which are in session state current_topp_tools = list( set( [ @@ -75,12 +75,16 @@ def save_parameters(self) -> None: ] ) ) - # for each TOPP tool, open the ini file + # Retrieve the instance-name β†’ real-tool-name mapping (set by input_TOPP) + tool_instance_map = st.session_state.get("_topp_tool_instance_map", {}) + # for each TOPP tool (or instance name), open the ini file for tool in current_topp_tools: - if not self.create_ini(tool): + # Resolve instance name to real tool name for create_ini / ini loading + real_tool = tool_instance_map.get(tool, tool) + if not self.create_ini(real_tool): # Could not create ini file - skip this tool continue - ini_path = Path(self.ini_dir, f"{tool}.ini") + ini_path = Path(self.ini_dir, f"{real_tool}.ini") if tool not in json_params: json_params[tool] = {} # load the param object @@ -92,19 +96,26 @@ def save_parameters(self) -> None: # Skip display keys used by multiselect widgets if key.endswith("_display"): continue - # get ini_key - ini_key = key.replace(self.topp_param_prefix, "").encode() + # get ini_key – map instance name back to real tool name + ini_key = key.replace(self.topp_param_prefix, "") + if tool != real_tool: + ini_key = ini_key.replace(f"{tool}:1:", f"{real_tool}:1:", 1) + ini_key = ini_key.encode() # get ini (default) value by ini_key ini_value = param.getValue(ini_key) is_list_param = isinstance(ini_value, list) - # check if value is different from default OR is an empty list parameter + # Effective default: _defaults value if present, else ini value + short_key = key.split(":1:")[1] + defaults = json_params.get("_defaults", {}).get(tool, {}) + default_value = defaults.get(short_key, ini_value) + # check if value is different from effective default OR is an empty list parameter if ( - (ini_value != value) - or (key.split(":1:")[1] in json_params[tool]) + (default_value != value) + or (short_key in json_params[tool]) or (is_list_param and not value) # Always save empty list params ): # store non-default value - json_params[tool][key.split(":1:")[1]] = value + json_params[tool][short_key] = value # Save to json file with open(self.params_file, "w", encoding="utf-8") as f: json.dump(json_params, f, indent=4) @@ -130,17 +141,44 @@ def get_parameters_from_json(self) -> dict: st.error("**ERROR**: Attempting to load an invalid JSON parameter file. Reset to defaults.") return {} - def get_topp_parameters(self, tool: str) -> dict: + def get_merged_params(self, tool_instance_name: str, ini_params: dict = None) -> dict: + """ + Three-layer parameter merge: ini defaults < _defaults < user overrides. + + Args: + tool_instance_name: Instance name (or tool name) to look up in params.json. + ini_params: Base parameters from the .ini file. Optional β€” callers that + don't need the ini layer (e.g., run_topp, which passes -ini separately) + can omit this. + + Returns: + Merged dict with the effective value for each parameter. + """ + params = self.get_parameters_from_json() + defaults = params.get("_defaults", {}).get(tool_instance_name, {}) + user = params.get(tool_instance_name, {}) + + merged = {} + if ini_params: + merged.update(ini_params) + merged.update(defaults) + merged.update(user) + return merged + + def get_topp_parameters(self, tool: str, tool_instance_name: str = None) -> dict: """ Get all parameters for a TOPP tool, merging defaults with user values. Args: - tool: Name of the TOPP tool (e.g., "CometAdapter") + tool: Name of the TOPP tool executable (e.g., "CometAdapter") + tool_instance_name: Optional instance name used for parameter storage + (e.g., "IDFilter_step1"). If not provided, defaults to tool name. Returns: Dict with parameter names as keys (without tool prefix) and their values. Returns empty dict if ini file doesn't exist. """ + instance_name = tool_instance_name or tool ini_path = Path(self.ini_dir, f"{tool}.ini") if not ini_path.exists(): return {} @@ -151,18 +189,14 @@ def get_topp_parameters(self, tool: str) -> dict: # Build dict from ini (extract short key names) prefix = f"{tool}:1:" - full_params = {} + ini_params = {} for key in param.keys(): key_str = key.decode() if isinstance(key, bytes) else str(key) if prefix in key_str: short_key = key_str.split(prefix, 1)[1] - full_params[short_key] = param.getValue(key) - - # Override with user-modified values from JSON - user_params = self.get_parameters_from_json().get(tool, {}) - full_params.update(user_params) + ini_params[short_key] = param.getValue(key) - return full_params + return self.get_merged_params(instance_name, ini_params=ini_params) def reset_to_default_parameters(self) -> None: """ diff --git a/src/workflow/StreamlitUI.py b/src/workflow/StreamlitUI.py index 3cca528..c088096 100644 --- a/src/workflow/StreamlitUI.py +++ b/src/workflow/StreamlitUI.py @@ -616,6 +616,7 @@ def input_TOPP( display_subsections: bool = True, display_subsection_tabs: bool = False, custom_defaults: dict = {}, + tool_instance_name: str = None, ) -> None: """ Generates input widgets for TOPP tool parameters dynamically based on the tool's @@ -631,29 +632,43 @@ def input_TOPP( display_subsections (bool, optional): Whether to split parameters into subsections based on the prefix. Defaults to True. display_subsection_tabs (bool, optional): Whether to display main subsections in separate tabs (if more than one main section). Defaults to False. custom_defaults (dict, optional): Dictionary of custom defaults to use. Defaults to an empty dict. + tool_instance_name (str, optional): A unique instance name for this tool + invocation. Allows multiple instances of the same TOPP tool with + independent parameters (e.g., two IDFilter calls). If not provided, + defaults to topp_tool_name. The instance name is used for session + state keys and parameter storage, while topp_tool_name is used for + the actual tool executable and ini file creation. """ + # Default instance name to the tool name when not provided + if tool_instance_name is None: + tool_instance_name = topp_tool_name + + # Register instance-name β†’ real-tool-name mapping in session state + if "_topp_tool_instance_map" not in st.session_state: + st.session_state["_topp_tool_instance_map"] = {} + st.session_state["_topp_tool_instance_map"][tool_instance_name] = topp_tool_name if not display_subsections: display_subsection_tabs = False if display_subsection_tabs: display_subsections = True - # write defaults ini files + # Create pristine ini file (never mutated with custom defaults) ini_file_path = Path(self.parameter_manager.ini_dir, f"{topp_tool_name}.ini") - ini_existed = ini_file_path.exists() if not self.parameter_manager.create_ini(topp_tool_name): st.error(f"TOPP tool **'{topp_tool_name}'** not found.") return - if not ini_existed: - # update custom defaults if necessary - if custom_defaults: - param = poms.Param() - poms.ParamXMLFile().load(str(ini_file_path), param) - for key, value in custom_defaults.items(): - encoded_key = f"{topp_tool_name}:1:{key}".encode() - if encoded_key in param.keys(): - param.setValue(encoded_key, value) - poms.ParamXMLFile().store(str(ini_file_path), param) + + # Seed custom defaults into params.json under _defaults key + if custom_defaults: + params = self.parameter_manager.get_parameters_from_json() + if "_defaults" not in params: + params["_defaults"] = {} + params["_defaults"][tool_instance_name] = custom_defaults + with open(self.parameter_manager.params_file, "w", encoding="utf-8") as f: + json.dump(params, f, indent=4) + # Refresh self.params so widget resolution sees _defaults + self.params = self.parameter_manager.get_parameters_from_json() # read into Param object param = poms.Param() @@ -730,18 +745,18 @@ def _matches_parameter(pattern: str, key: bytes) -> bool: ) params.append(p) - # for each parameter in params_decoded - # if a parameter with custom default value exists, use that value - # else check if the parameter is already in self.params, if yes take the value from self.params + # Build ini_params dict for three-layer merge + ini_params = {} + for p in params: + name = p["key"].decode().split(":1:")[1] + ini_params[name] = p["value"] + + # Resolve effective values: ini < _defaults < user overrides + merged = self.parameter_manager.get_merged_params(tool_instance_name, ini_params=ini_params) for p in params: name = p["key"].decode().split(":1:")[1] - if topp_tool_name in self.params: - if name in self.params[topp_tool_name]: - p["value"] = self.params[topp_tool_name][name] - elif name in custom_defaults: - p["value"] = custom_defaults[name] - elif name in custom_defaults: - p["value"] = custom_defaults[name] + if name in merged: + p["value"] = merged[name] # Ensure list parameters stay as lists after loading from JSON # (JSON may store single-item lists as strings) if p["original_is_list"] and isinstance(p["value"], str): @@ -775,7 +790,7 @@ def _matches_parameter(pattern: str, key: bytes) -> bool: # Display tool name if required if display_tool_name: - st.markdown(f"**{topp_tool_name}**") + st.markdown(f"**{tool_instance_name}**") tab_names = [k for k in param_sections.keys() if ":" not in k] tabs = None @@ -803,8 +818,11 @@ def display_TOPP_params(params: dict, num_cols): cols = st.columns(num_cols) i = 0 for p in params: - # get key and name - key = f"{self.parameter_manager.topp_param_prefix}{p['key'].decode()}" + # get key and name – use tool_instance_name in session state key + key_str = p['key'].decode() + if tool_instance_name != topp_tool_name: + key_str = key_str.replace(f"{topp_tool_name}:1:", f"{tool_instance_name}:1:", 1) + key = f"{self.parameter_manager.topp_param_prefix}{key_str}" name = p["name"] try: # sometimes strings with newline, handle as list @@ -1371,7 +1389,8 @@ def remove_full_paths(d: dict) -> dict: general = {} for k, v in params.items(): - # skip if v is a file path + if k == "_defaults": + continue if isinstance(v, dict): topp[k] = v elif ".py" in k: @@ -1382,6 +1401,13 @@ def remove_full_paths(d: dict) -> dict: else: general[k] = v + # Merge _defaults into topp so summary shows custom defaults + user overrides + defaults = params.get("_defaults", {}) + for tool_name, default_vals in defaults.items(): + if tool_name not in topp: + topp[tool_name] = {} + topp[tool_name] = {**default_vals, **topp.get(tool_name, {})} + markdown = [] def dict_to_markdown(d: dict): diff --git a/tests/test_parameter_defaults.py b/tests/test_parameter_defaults.py new file mode 100644 index 0000000..9ccd679 --- /dev/null +++ b/tests/test_parameter_defaults.py @@ -0,0 +1,363 @@ +""" +Tests for get_merged_params() and the refactored get_topp_parameters(). + +This module verifies the three-layer parameter merge: + ini defaults < _defaults < user overrides +""" +import os +import sys +import json +import pytest +import tempfile +from pathlib import Path +from unittest.mock import MagicMock + +# Add project root to path for imports +PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +sys.path.append(PROJECT_ROOT) + +# Mock streamlit before importing ParameterManager so that the imported module +# uses a controllable `st.session_state` (a plain dict) instead of the real one, +# which requires a running Streamlit app context. +mock_streamlit = MagicMock() +mock_streamlit.session_state = {} + +# Temporarily replace streamlit in sys.modules so that ParameterManager's +# `import streamlit as st` picks up the mock. Restore immediately after import +# so other test files (e.g., test_gui.py AppTest) get the real streamlit. +_original_streamlit = sys.modules.get('streamlit') +sys.modules['streamlit'] = mock_streamlit + +from src.workflow.ParameterManager import ParameterManager + +if _original_streamlit is not None: + sys.modules['streamlit'] = _original_streamlit +else: + sys.modules.pop('streamlit', None) + +# Remove cached src.workflow modules that were imported with mocked streamlit so +# that AppTest (in test_gui.py) re-imports them fresh with the real package. +for _key in list(sys.modules.keys()): + if _key.startswith('src.workflow'): + sys.modules.pop(_key, None) + + +@pytest.fixture(autouse=True) +def reset_streamlit_state(): + """Reset mock streamlit session state before each test.""" + mock_streamlit.session_state.clear() + yield + + +@pytest.fixture +def temp_workflow_dir(): + """Create a temporary workflow directory for testing.""" + with tempfile.TemporaryDirectory() as tmpdir: + workflow_dir = Path(tmpdir) / "test-workflow" + workflow_dir.mkdir() + ini_dir = workflow_dir / "ini" + ini_dir.mkdir() + yield workflow_dir + + +class TestGetMergedParams: + """Tests for ParameterManager.get_merged_params().""" + + def test_returns_ini_params_when_no_json(self, temp_workflow_dir): + """ini params returned when params.json doesn't exist.""" + pm = ParameterManager(temp_workflow_dir) + ini_params = {"algorithm:param": 1.0, "algorithm:other": "hello"} + + result = pm.get_merged_params("SomeTool", ini_params=ini_params) + + assert result == {"algorithm:param": 1.0, "algorithm:other": "hello"} + + def test_defaults_override_ini(self, temp_workflow_dir): + """_defaults layer overrides ini values.""" + pm = ParameterManager(temp_workflow_dir) + params_json = { + "_defaults": {"SomeTool": {"algorithm:param": 42.0}} + } + with open(pm.params_file, "w") as f: + json.dump(params_json, f) + + result = pm.get_merged_params("SomeTool", ini_params={"algorithm:param": 1.0}) + + assert result["algorithm:param"] == 42.0 + + def test_user_overrides_defaults(self, temp_workflow_dir): + """User overrides take priority over _defaults.""" + pm = ParameterManager(temp_workflow_dir) + params_json = { + "_defaults": {"SomeTool": {"algorithm:param": 42.0}}, + "SomeTool": {"algorithm:param": 99.0} + } + with open(pm.params_file, "w") as f: + json.dump(params_json, f) + + result = pm.get_merged_params("SomeTool", ini_params={"algorithm:param": 1.0}) + + assert result["algorithm:param"] == 99.0 + + def test_full_three_layer_merge(self, temp_workflow_dir): + """All three layers merge correctly: ini < _defaults < user.""" + pm = ParameterManager(temp_workflow_dir) + params_json = { + "_defaults": { + "SomeTool": { + "algorithm:param_a": 10.0, # overrides ini + "algorithm:param_b": 20.0, # overrides ini, NOT overridden by user + } + }, + "SomeTool": { + "algorithm:param_a": 99.0, # overrides _defaults + "algorithm:param_c": 55.0, # only in user + } + } + with open(pm.params_file, "w") as f: + json.dump(params_json, f) + + ini_params = { + "algorithm:param_a": 1.0, + "algorithm:param_b": 2.0, + "algorithm:param_d": 3.0, # only in ini + } + + result = pm.get_merged_params("SomeTool", ini_params=ini_params) + + assert result["algorithm:param_a"] == 99.0 # user wins + assert result["algorithm:param_b"] == 20.0 # _defaults wins over ini + assert result["algorithm:param_c"] == 55.0 # user-only key present + assert result["algorithm:param_d"] == 3.0 # ini-only key present + + def test_no_ini_params(self, temp_workflow_dir): + """Works when ini_params is None.""" + pm = ParameterManager(temp_workflow_dir) + params_json = { + "_defaults": {"SomeTool": {"algorithm:param": 42.0}}, + "SomeTool": {"algorithm:other": 7.0} + } + with open(pm.params_file, "w") as f: + json.dump(params_json, f) + + result = pm.get_merged_params("SomeTool") + + assert result["algorithm:param"] == 42.0 + assert result["algorithm:other"] == 7.0 + + def test_different_instances_same_tool(self, temp_workflow_dir): + """Different instance names get independent _defaults and user overrides.""" + pm = ParameterManager(temp_workflow_dir) + params_json = { + "_defaults": { + "IDFilter_step1": {"score:min": 0.05}, + "IDFilter_step2": {"score:min": 0.01}, + }, + "IDFilter_step1": {"score:min": 0.001}, + } + with open(pm.params_file, "w") as f: + json.dump(params_json, f) + + result1 = pm.get_merged_params("IDFilter_step1", ini_params={"score:min": 0.5}) + result2 = pm.get_merged_params("IDFilter_step2", ini_params={"score:min": 0.5}) + + assert result1["score:min"] == 0.001 # user override for step1 + assert result2["score:min"] == 0.01 # _defaults for step2 (no user override) + + def test_empty_params_json(self, temp_workflow_dir): + """Returns empty dict when params.json is empty and no ini_params.""" + pm = ParameterManager(temp_workflow_dir) + with open(pm.params_file, "w") as f: + json.dump({}, f) + + result = pm.get_merged_params("SomeTool") + + assert result == {} + + +class TestGetToppParametersWithDefaults: + + def test_get_topp_parameters_includes_defaults(self, temp_workflow_dir): + """get_topp_parameters merges _defaults between ini and user values.""" + pm = ParameterManager(temp_workflow_dir) + params_json = { + "_defaults": {"SomeTool": {"algorithm:param": 42.0}}, + "SomeTool": {"algorithm:other": 99.0} + } + with open(pm.params_file, "w") as f: + json.dump(params_json, f) + + result = pm.get_merged_params("SomeTool", ini_params={"algorithm:param": 1.0}) + assert result["algorithm:param"] == 42.0 + assert result["algorithm:other"] == 99.0 + + +class TestDefaultsSeeding: + + def test_seed_writes_defaults_to_params_json(self, temp_workflow_dir): + """Seeding creates _defaults entry in params.json.""" + pm = ParameterManager(temp_workflow_dir) + custom_defaults = {"param_a": 10.0, "param_b": "fast"} + + # Simulate what input_TOPP seeding does + params = pm.get_parameters_from_json() + if "_defaults" not in params: + params["_defaults"] = {} + params["_defaults"]["MyTool"] = custom_defaults + with open(pm.params_file, "w") as f: + json.dump(params, f) + + # Verify + loaded = pm.get_parameters_from_json() + assert loaded["_defaults"]["MyTool"] == {"param_a": 10.0, "param_b": "fast"} + + def test_seed_is_idempotent(self, temp_workflow_dir): + """Seeding the same tool twice overwrites cleanly.""" + pm = ParameterManager(temp_workflow_dir) + + # First seed + params = {"_defaults": {"Tool": {"p1": 1.0}}, "other_key": "keep"} + with open(pm.params_file, "w") as f: + json.dump(params, f) + + # Second seed with updated defaults + params = pm.get_parameters_from_json() + params["_defaults"]["Tool"] = {"p1": 2.0} + with open(pm.params_file, "w") as f: + json.dump(params, f) + + loaded = pm.get_parameters_from_json() + assert loaded["_defaults"]["Tool"]["p1"] == 2.0 + assert loaded["other_key"] == "keep" + + def test_seed_multiple_instances(self, temp_workflow_dir): + """Different instances of the same tool get independent _defaults.""" + pm = ParameterManager(temp_workflow_dir) + params = { + "_defaults": { + "IDFilter_strict": {"score:pep": 0.01}, + "IDFilter_lenient": {"score:pep": 0.05}, + } + } + with open(pm.params_file, "w") as f: + json.dump(params, f) + + loaded = pm.get_parameters_from_json() + assert loaded["_defaults"]["IDFilter_strict"]["score:pep"] == 0.01 + assert loaded["_defaults"]["IDFilter_lenient"]["score:pep"] == 0.05 + + +try: + import pyopenms as poms + HAS_PYOPENMS = True +except ImportError: + HAS_PYOPENMS = False + + +@pytest.mark.skipif(not HAS_PYOPENMS, reason="pyopenms not available") +class TestSaveParametersWithDefaults: + + def _create_fake_ini(self, pm, tool_name, params_dict): + """Create a fake .ini file with given parameters.""" + param = poms.Param() + for key, value in params_dict.items(): + param.setValue(f"{tool_name}:1:{key}".encode(), value) + poms.ParamXMLFile().store(str(Path(pm.ini_dir, f"{tool_name}.ini")), param) + + def test_value_matching_custom_default_not_saved(self, temp_workflow_dir): + """A value equal to the _defaults entry should not be saved as a user override.""" + pm = ParameterManager(temp_workflow_dir) + + # Create a fake ini with a default value + self._create_fake_ini(pm, "Tool", {"param_a": 10.0}) + + # Pre-seed _defaults with a different value than ini + params = {"_defaults": {"Tool": {"param_a": 42.0}}} + with open(pm.params_file, "w") as f: + json.dump(params, f) + + # Session state has value matching the custom default (42.0), not the ini default (10.0) + mock_streamlit.session_state[f"{pm.topp_param_prefix}Tool:1:param_a"] = 42.0 + mock_streamlit.session_state["_topp_tool_instance_map"] = {"Tool": "Tool"} + + pm.save_parameters() + + with open(pm.params_file, "r") as f: + saved = json.load(f) + + # param_a should NOT appear under Tool (it matches the _defaults value) + assert "param_a" not in saved.get("Tool", {}) + # _defaults should still be present + assert saved["_defaults"]["Tool"]["param_a"] == 42.0 + + def test_value_different_from_custom_default_saved(self, temp_workflow_dir): + """A value different from _defaults entry should be saved as user override.""" + pm = ParameterManager(temp_workflow_dir) + + # Create a fake ini with a default value + self._create_fake_ini(pm, "Tool", {"param_a": 10.0}) + + params = {"_defaults": {"Tool": {"param_a": 42.0}}} + with open(pm.params_file, "w") as f: + json.dump(params, f) + + mock_streamlit.session_state[f"{pm.topp_param_prefix}Tool:1:param_a"] = 99.0 + mock_streamlit.session_state["_topp_tool_instance_map"] = {"Tool": "Tool"} + + pm.save_parameters() + + with open(pm.params_file, "r") as f: + saved = json.load(f) + + assert saved["Tool"]["param_a"] == 99.0 + + +class TestNonDefaultParamsSummaryDefaults: + + def test_defaults_key_excluded_from_classification(self): + """_defaults dict should not appear as a TOPP tool in the summary.""" + params = { + "_defaults": {"Tool": {"p1": 10}}, + "Tool": {"p1": 20}, + "general_param": "value" + } + # Simulate the classification logic + topp = {} + general = {} + for k, v in params.items(): + if k == "_defaults": + continue + if isinstance(v, dict): + topp[k] = v + else: + general[k] = v + + assert "_defaults" not in topp + assert "Tool" in topp + assert "general_param" in general + + def test_defaults_merged_into_summary(self): + """_defaults values should appear in summary merged with user overrides.""" + params = { + "_defaults": { + "ToolA": {"p1": 10, "p2": 20}, + "ToolB": {"p3": 30} + }, + "ToolA": {"p1": 99} + } + # Simulate the merge logic for summary + topp = {} + for k, v in params.items(): + if k == "_defaults": + continue + if isinstance(v, dict): + topp[k] = v + + defaults = params.get("_defaults", {}) + for tool_name, default_vals in defaults.items(): + if tool_name not in topp: + topp[tool_name] = {} + topp[tool_name] = {**default_vals, **topp.get(tool_name, {})} + + assert topp["ToolA"] == {"p1": 99, "p2": 20} # user override wins for p1 + assert topp["ToolB"] == {"p3": 30} # defaults-only tool appears diff --git a/tests/test_tool_instance_name.py b/tests/test_tool_instance_name.py new file mode 100644 index 0000000..cd060ca --- /dev/null +++ b/tests/test_tool_instance_name.py @@ -0,0 +1,268 @@ +""" +Tests for the tool_instance_name functionality. + +This module verifies that save_parameters correctly resolves tool instance names +to real tool names when calling create_ini, and that parameters are stored and +retrieved using the instance name as the key. +""" +import os +import sys +import json +import pytest +import tempfile +from pathlib import Path +from unittest.mock import patch, MagicMock, call + +# Add project root to path for imports +PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +sys.path.append(PROJECT_ROOT) + +# Mock streamlit before importing ParameterManager +mock_streamlit = MagicMock() +mock_streamlit.session_state = {} + +_original_streamlit = sys.modules.get('streamlit') +sys.modules['streamlit'] = mock_streamlit + +from src.workflow.ParameterManager import ParameterManager + +if _original_streamlit is not None: + sys.modules['streamlit'] = _original_streamlit +else: + sys.modules.pop('streamlit', None) + +# Remove cached src.workflow modules +for _key in list(sys.modules.keys()): + if _key.startswith('src.workflow'): + sys.modules.pop(_key, None) + + +@pytest.fixture +def temp_workflow_dir(): + """Create a temporary workflow directory for testing.""" + with tempfile.TemporaryDirectory() as tmpdir: + workflow_dir = Path(tmpdir) / "test-workflow" + workflow_dir.mkdir() + ini_dir = workflow_dir / "ini" + ini_dir.mkdir() + yield workflow_dir + + +@pytest.fixture(autouse=True) +def reset_streamlit_state(): + """Reset mock streamlit session state before each test.""" + mock_streamlit.session_state.clear() + yield + + +class TestSaveParametersWithInstanceName: + """Tests for save_parameters correctly resolving tool instance names.""" + + def test_save_parameters_uses_real_tool_name_for_create_ini(self, temp_workflow_dir): + """Test that save_parameters resolves instance name to real tool name + before calling create_ini.""" + pm = ParameterManager(temp_workflow_dir) + + # Simulate session state with an instance name (IDFilter_step1) + # that differs from the real tool name (IDFilter) + mock_streamlit.session_state[f"{pm.topp_param_prefix}IDFilter_step1:1:score:pep"] = 0.05 + # Register instance mapping (as input_TOPP would) + mock_streamlit.session_state["_topp_tool_instance_map"] = { + "IDFilter_step1": "IDFilter" + } + + # Mock create_ini to track calls - return False (tool not found) + # to prevent further processing that requires actual ini files + with patch.object(pm, 'create_ini', return_value=False) as mock_create_ini: + pm.save_parameters() + + # Verify create_ini was called with the REAL tool name, not the instance name + mock_create_ini.assert_called_once_with("IDFilter") + + def test_save_parameters_without_instance_map_uses_tool_name_directly(self, temp_workflow_dir): + """Test that save_parameters works normally when no instance map exists + (backward compatibility).""" + pm = ParameterManager(temp_workflow_dir) + + # Simulate session state with a normal tool name (no instance mapping) + mock_streamlit.session_state[f"{pm.topp_param_prefix}IDFilter:1:score:pep"] = 0.05 + + with patch.object(pm, 'create_ini', return_value=False) as mock_create_ini: + pm.save_parameters() + + # Should use the tool name directly + mock_create_ini.assert_called_once_with("IDFilter") + + def test_save_parameters_stores_under_instance_name(self, temp_workflow_dir): + """Test that parameters are stored in JSON under the instance name, not + the real tool name.""" + pm = ParameterManager(temp_workflow_dir) + + # Create a mock ini file for IDFilter + ini_path = temp_workflow_dir / "ini" / "IDFilter.ini" + ini_path.touch() + + # Set up instance mapping + mock_streamlit.session_state["_topp_tool_instance_map"] = { + "IDFilter_step1": "IDFilter" + } + mock_streamlit.session_state[f"{pm.topp_param_prefix}IDFilter_step1:1:score:pep"] = 0.05 + + # Mock pyopenms Param and ParamXMLFile to avoid needing real ini files + mock_param = MagicMock() + mock_param.getValue.return_value = 0.01 # Different from session state value + + with patch.object(pm, 'create_ini', return_value=True), \ + patch('pyopenms.Param', return_value=mock_param), \ + patch('pyopenms.ParamXMLFile') as mock_xml: + pm.save_parameters() + + # Load saved parameters + with open(pm.params_file, "r") as f: + saved = json.load(f) + + # Parameters should be stored under the instance name + assert "IDFilter_step1" in saved + assert saved["IDFilter_step1"]["score:pep"] == 0.05 + + def test_save_parameters_multiple_instances_same_tool(self, temp_workflow_dir): + """Test that two instances of the same tool get separate parameter entries.""" + pm = ParameterManager(temp_workflow_dir) + + ini_path = temp_workflow_dir / "ini" / "IDFilter.ini" + ini_path.touch() + + # Set up two instances with different parameter values + mock_streamlit.session_state["_topp_tool_instance_map"] = { + "IDFilter_step1": "IDFilter", + "IDFilter_step2": "IDFilter", + } + mock_streamlit.session_state[f"{pm.topp_param_prefix}IDFilter_step1:1:score:pep"] = 0.01 + mock_streamlit.session_state[f"{pm.topp_param_prefix}IDFilter_step2:1:score:pep"] = 0.05 + + mock_param = MagicMock() + mock_param.getValue.return_value = 0.0 # Default differs from both + + with patch.object(pm, 'create_ini', return_value=True), \ + patch('pyopenms.Param', return_value=mock_param), \ + patch('pyopenms.ParamXMLFile'): + pm.save_parameters() + + with open(pm.params_file, "r") as f: + saved = json.load(f) + + # Both instances should have separate entries + assert "IDFilter_step1" in saved + assert "IDFilter_step2" in saved + assert saved["IDFilter_step1"]["score:pep"] == 0.01 + assert saved["IDFilter_step2"]["score:pep"] == 0.05 + + def test_save_parameters_ini_key_maps_instance_to_real_tool(self, temp_workflow_dir): + """Test that ini_key correctly maps instance name back to real tool name + for param.getValue lookup.""" + pm = ParameterManager(temp_workflow_dir) + + ini_path = temp_workflow_dir / "ini" / "IDFilter.ini" + ini_path.touch() + + mock_streamlit.session_state["_topp_tool_instance_map"] = { + "IDFilter_step1": "IDFilter" + } + mock_streamlit.session_state[f"{pm.topp_param_prefix}IDFilter_step1:1:score:pep"] = 0.05 + + mock_param = MagicMock() + mock_param.getValue.return_value = 0.01 + + with patch.object(pm, 'create_ini', return_value=True), \ + patch('pyopenms.Param', return_value=mock_param), \ + patch('pyopenms.ParamXMLFile'): + pm.save_parameters() + + # Verify that param.getValue was called with the REAL tool name key + # (IDFilter:1:score:pep), not the instance name key (IDFilter_step1:1:score:pep) + mock_param.getValue.assert_called_with(b"IDFilter:1:score:pep") + + def test_save_parameters_display_keys_skipped_with_instance_name(self, temp_workflow_dir): + """Test that _display keys are still skipped when using instance names.""" + pm = ParameterManager(temp_workflow_dir) + + ini_path = temp_workflow_dir / "ini" / "IDFilter.ini" + ini_path.touch() + + mock_streamlit.session_state["_topp_tool_instance_map"] = { + "IDFilter_step1": "IDFilter" + } + mock_streamlit.session_state[f"{pm.topp_param_prefix}IDFilter_step1:1:score:pep"] = 0.05 + mock_streamlit.session_state[f"{pm.topp_param_prefix}IDFilter_step1:1:score:pep_display"] = ["0.05"] + + mock_param = MagicMock() + mock_param.getValue.return_value = 0.01 + + with patch.object(pm, 'create_ini', return_value=True), \ + patch('pyopenms.Param', return_value=mock_param), \ + patch('pyopenms.ParamXMLFile'): + pm.save_parameters() + + with open(pm.params_file, "r") as f: + saved = json.load(f) + + # _display key should not be stored + assert "score:pep_display" not in saved.get("IDFilter_step1", {}) + assert "score:pep" in saved.get("IDFilter_step1", {}) + + +class TestGetToppParametersWithInstanceName: + """Tests for get_topp_parameters with tool_instance_name.""" + + def test_get_topp_parameters_with_instance_name(self, temp_workflow_dir): + """Test that get_topp_parameters uses instance name for JSON lookup.""" + pm = ParameterManager(temp_workflow_dir) + + # Create params.json with instance-keyed parameters + params = { + "IDFilter_step1": { + "score:pep": 0.05 + } + } + with open(pm.params_file, "w") as f: + json.dump(params, f) + + # Create a mock ini file + ini_path = temp_workflow_dir / "ini" / "IDFilter.ini" + ini_path.touch() + + mock_param = MagicMock() + mock_param.keys.return_value = [b"IDFilter:1:score:pep"] + mock_param.getValue.return_value = 0.01 # default + + with patch('pyopenms.Param', return_value=mock_param), \ + patch('pyopenms.ParamXMLFile'): + result = pm.get_topp_parameters("IDFilter", tool_instance_name="IDFilter_step1") + + # Should return the instance-specific value + assert result["score:pep"] == 0.05 + + def test_get_topp_parameters_without_instance_name_backward_compat(self, temp_workflow_dir): + """Test that get_topp_parameters works without instance name (backward compat).""" + pm = ParameterManager(temp_workflow_dir) + + params = { + "IDFilter": { + "score:pep": 0.05 + } + } + with open(pm.params_file, "w") as f: + json.dump(params, f) + + ini_path = temp_workflow_dir / "ini" / "IDFilter.ini" + ini_path.touch() + + mock_param = MagicMock() + mock_param.keys.return_value = [b"IDFilter:1:score:pep"] + mock_param.getValue.return_value = 0.01 + + with patch('pyopenms.Param', return_value=mock_param), \ + patch('pyopenms.ParamXMLFile'): + result = pm.get_topp_parameters("IDFilter") + + assert result["score:pep"] == 0.05 From e3c3665a7e53814f1c057aa4b57a0bd01f7a21fb Mon Sep 17 00:00:00 2001 From: Tom David Mueller Date: Tue, 7 Apr 2026 13:40:39 +0200 Subject: [PATCH 07/11] Remove redundant -ini flag from run_topp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .ini file is no longer mutated with custom defaults β€” it stays pristine. All parameters (_defaults + user overrides) are already passed as CLI flags via get_merged_params(), making -ini redundant. --- src/workflow/CommandExecutor.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/workflow/CommandExecutor.py b/src/workflow/CommandExecutor.py index bfea082..14d28a3 100644 --- a/src/workflow/CommandExecutor.py +++ b/src/workflow/CommandExecutor.py @@ -310,11 +310,6 @@ def run_topp(self, tool: str, input_output: dict, custom_params: dict = {}, tool command += ["-threads", str(threads_per_command)] commands.append(command) - # check if a ini file has been written, if yes use it (contains custom defaults) - ini_path = Path(self.parameter_manager.ini_dir, tool + ".ini") - if ini_path.exists(): - command += ["-ini", str(ini_path)] - # Run command(s) if len(commands) == 1: return self.run_command(commands[0]) From ef8c83a4d02e19df10aac02f9fb979a7da10e5c2 Mon Sep 17 00:00:00 2001 From: Yoo HoJun Date: Fri, 22 May 2026 11:43:36 +0900 Subject: [PATCH 08/11] feat: add TMT analysis result visualization pages and full pipeline workflow - Add visualization scripts for abundance, database search, heatmap, etc. - Add `get_abundance_data` helper in `results_helpers.py` to load data. - Update `src/Workflow.py` to support TMT analysis pipeline and group assignment UI. - Register new result visualization pages and clean up legacy menus in `app.py`. --- app.py | 34 +- content/results_abundance.py | 132 ++++ content/results_database_search.py | 79 +++ content/results_filtered.py | 79 +++ content/results_heatmap.py | 76 ++ content/results_pathway.py | 253 +++++++ content/results_pca.py | 103 +++ content/results_rescoring.py | 80 +++ content/results_volcano.py | 109 +++ src/Workflow.py | 1040 ++++++++++++++++++++++++++-- src/common/results_helpers.py | 483 +++++++++++++ 11 files changed, 2390 insertions(+), 78 deletions(-) create mode 100644 content/results_abundance.py create mode 100644 content/results_database_search.py create mode 100644 content/results_filtered.py create mode 100644 content/results_heatmap.py create mode 100644 content/results_pathway.py create mode 100644 content/results_pca.py create mode 100644 content/results_rescoring.py create mode 100644 content/results_volcano.py create mode 100644 src/common/results_helpers.py diff --git a/app.py b/app.py index d9b3d73..cec8d4e 100644 --- a/app.py +++ b/app.py @@ -8,33 +8,35 @@ with open("settings.json", "r") as f: st.session_state.settings = json.load(f) +# Initialize session state for workspace +if "chosen-workspace" not in st.session_state: + if "workspace" in st.session_state: + st.session_state["chosen-workspace"] = str(st.session_state.workspace.stem) + else: + st.session_state["chosen-workspace"] = "default" + if __name__ == '__main__': pages = { str(st.session_state.settings["app-name"]) : [ st.Page(Path("content", "quickstart.py"), title="Quickstart", icon="πŸ‘‹"), st.Page(Path("content", "documentation.py"), title="Documentation", icon="πŸ“–"), ], - "pyOpenMS Toolbox": [ - st.Page(Path("content", "digest.py"), title="In Silico Digest", icon="βœ‚οΈ"), - st.Page(Path("content", "peptide_mz_calculator.py"), title="m/z Calculator", icon="βš–οΈ"), - st.Page(Path("content", "isotope_pattern_generator.py"), title="Isotopic Pattern Calculator", icon="πŸ“Ά"), - st.Page(Path("content", "fragmentation.py"), title="Fragment Ion Generation", icon="πŸ’₯"), - ], "TOPP Workflow Framework": [ st.Page(Path("content", "topp_workflow_file_upload.py"), title="File Upload", icon="πŸ“"), st.Page(Path("content", "topp_workflow_parameter.py"), title="Configure", icon="βš™οΈ"), st.Page(Path("content", "topp_workflow_execution.py"), title="Run", icon="πŸš€"), - st.Page(Path("content", "topp_workflow_results.py"), title="Results", icon="πŸ“Š"), - ], - "pyOpenMS Workflow" : [ - st.Page(Path("content", "file_upload.py"), title="File Upload", icon="πŸ“‚"), - st.Page(Path("content", "raw_data_viewer.py"), title="View MS data", icon="πŸ‘€"), - st.Page(Path("content", "run_example_workflow.py"), title="Run Workflow", icon="βš™οΈ"), - st.Page(Path("content", "download_section.py"), title="Download Results", icon="⬇️"), + # st.Page(Path("content", "topp_workflow_results.py"), title="Results", icon="πŸ“Š"), ], - "Others Topics": [ - st.Page(Path("content", "simple_workflow.py"), title="Simple Workflow", icon="βš™οΈ"), - st.Page(Path("content", "run_subprocess.py"), title="Run Subprocess", icon="πŸ–₯️"), + "Results": [ + st.Page(Path("content", "results_database_search.py"), title="Database Search", icon="πŸ”¬"), + st.Page(Path("content", "results_rescoring.py"), title="Rescoring", icon="πŸ“ˆ"), + st.Page(Path("content", "results_filtered.py"), title="Filtered PSMs", icon="🎯"), + st.Page(Path("content", "results_abundance.py"), title="Abundance", icon="πŸ“‹"), + st.Page(Path("content", "results_volcano.py"), title="Volcano", icon="πŸŒ‹"), + st.Page(Path("content", "results_pca.py"), title="PCA", icon="πŸ“Š"), + st.Page(Path("content", "results_heatmap.py"), title="Heatmap", icon="πŸ”₯"), + # st.Page(Path("content", "results_library.py"), title="Spectral Library", icon="πŸ“š"), + st.Page(Path("content", "results_pathway.py"), title="Pathway Analysis", icon="πŸ§ͺ"), ] } diff --git a/content/results_abundance.py b/content/results_abundance.py new file mode 100644 index 0000000..baf5cb9 --- /dev/null +++ b/content/results_abundance.py @@ -0,0 +1,132 @@ +"""Abundance (ProteomicsLFQ) Results Page.""" +import streamlit as st +import pandas as pd +import numpy as np +from pathlib import Path +from scipy.stats import ttest_ind +from src.common.common import page_setup +from statsmodels.stats.multitest import multipletests +from src.common.results_helpers import get_workflow_dir, get_abundance_data + +params = page_setup() +st.title("Abundance Quantification") + +st.markdown( + """ +View protein and PSM-level quantification from **ProteomicsLFQ**. +This page calculates differential expression statistics between sample groups. +""" +) + +if "workspace" not in st.session_state: + st.warning("Please initialize your workspace first.") + st.stop() + +results_dir = Path(st.session_state["workspace"]) / "topp-workflow" / "results" / "quant_results" +consensus_out = results_dir / "openms_design_protein_openms.csv" + +@st.cache_data +def load_data(file_path): + return pd.read_csv(file_path, sep="\t", comment="#", engine="python") + +if consensus_out.exists(): + + # df = load_data(consensus_out) + # # ratio column removal + # df = df.loc[:, ~df.columns.str.contains('ratio', case=False)] + + pre_processing_tab, protein_tab = st.tabs(["Pre-processing", "Protein Table"]) + + with pre_processing_tab: + # result = get_abundance_data(st.session_state["workspace"]) + # DEBUG: 상세 원인 좜λ ₯ (μž„μ‹œ) + try: + result = get_abundance_data(st.session_state["workspace"]) + except Exception as e: + st.exception(e) + result = None + + if result is None: + ws = st.session_state.get("workspace") + st.error("Debug: get_abundance_data returned None") + st.write("workspace:", ws) + wf = Path(ws) / "topp-workflow" + st.write("workflow dir exists:", wf.exists(), "->", wf) + qdir = wf / "results" / "quant_results" + st.write("quant_dir exists:", qdir.exists(), "->", qdir) + if qdir.exists(): + st.write("csv files:", sorted([p.name for p in qdir.glob('*.csv')])) + # show cached param snapshot if available + try: + from src.workflow.ParameterManager import ParameterManager + pm = ParameterManager(wf) + st.write("parameters keys (sample):", list(pm.get_parameters_from_json().items())[:20]) + except Exception as e: + st.write("Param manager error:", e) + st.stop() + + if result is None: + st.info("πŸ’‘ Please complete the configuration in the 'Configure' page to see results.") + st.stop() + + pivot_df, expr_df, group_map = result + + st.write("### Final Results (Group row removed, Stats added)") + st.dataframe(pivot_df.head(10)) + + + with protein_tab: + st.markdown("### Protein-Level Abundance Table") + + st.info( + "This protein-level table is generated by grouping all PSMs that map to the " + "same protein and aggregating their intensities across samples.\n\n" + "Additionally, log2 fold change and p-values are calculated between sample groups." + ) + + # Display group comparison info + groups = sorted(set(group_map.values())) + if len(groups) >= 2: + group1, group2 = sorted(groups)[:2] + st.info(f"Statistical comparison: **{group2} vs {group1}**") + + exclude_cols = ["protein", "log2FC", "p-value", "p-adj", + "n_proteins", "n_peptides", "protein_score"] + + # Get sample columns (between stats and PeptideSequence) + sample_cols = [c for c in pivot_df.columns if c + not in exclude_cols and "ratio" not in c.lower()] + + # Create bar chart column with log2-transformed values + pivot_df["Intensity"] = pivot_df[sample_cols].apply( + lambda row: [np.log2(v + 1) for v in row], axis=1 + ) + + # Reorder columns: place Intensity after p-value + display_cols = ["protein", "log2FC", "p-value", "Intensity"] + sample_cols + available_cols = [c for c in display_cols if c in pivot_df.columns] + + st.dataframe( + pivot_df[available_cols].sort_values("p-value"), + column_config={ + "Intensity": st.column_config.BarChartColumn( + "Intensity", + help="Sample intensities (log2 scale)", + width="small", + y_min=0, + ), + }, + width="stretch" + ) +else: + st.warning(f"File not found: {consensus_out}") + +st.markdown("---") +st.markdown("**Next steps:** Explore statistical visualizations") +col1, col2, col3 = st.columns(3) +with col1: + st.page_link("content/results_volcano.py", label="Volcano Plot", icon="πŸŒ‹") +with col2: + st.page_link("content/results_pca.py", label="PCA", icon="πŸ“Š") +with col3: + st.page_link("content/results_heatmap.py", label="Heatmap", icon="πŸ”₯") diff --git a/content/results_database_search.py b/content/results_database_search.py new file mode 100644 index 0000000..cd04e69 --- /dev/null +++ b/content/results_database_search.py @@ -0,0 +1,79 @@ +"""Database Search (Comet) Results Page.""" +import streamlit as st +from pathlib import Path +from src.common.common import page_setup +from src.common.results_helpers import get_workflow_dir +from openms_insight import Table, Heatmap, LinePlot, SequenceView, StateManager + +params = page_setup() +st.title("Database Search Results") + +st.markdown( + """ +View peptide-spectrum matches (PSMs) identified by **Comet** database search. +Click on a PSM to view the annotated spectrum and peptide sequence. +""" +) + +st.info( + "**Score:** The e-value (expectation value) represents the expected number of random PSMs " + "with an equal or better score. Lower values indicate higher confidence identifications." +) + +if "workspace" not in st.session_state: + st.warning("Please initialize your workspace first.") + st.stop() + +workflow_dir = get_workflow_dir(st.session_state["workspace"]) +comet_dir = workflow_dir / "results" / "comet_results" +cache_dir = workflow_dir / "results" / "insight_cache" + +if not comet_dir.exists(): + st.info("No database search results available yet. Please run the workflow first.") + st.page_link("content/workflow_run.py", label="Go to Run", icon="πŸš€") + st.stop() + +comet_files = sorted(comet_dir.glob("*.idXML")) + +if not comet_files: + st.warning("No identification output files found.") + st.stop() + +selected_file = st.selectbox( + "Select identification result file", + comet_files, + format_func=lambda x: x.name +) + +cache_id_prefix = selected_file.stem + +# Check if cache exists +if not (cache_dir / f"table_{cache_id_prefix}").is_dir(): + st.warning("Visualization cache not found. Please re-run the workflow.") + st.stop() + +# Initialize state manager for cross-component linking +state_manager = StateManager() + +# Load components from cache (no data parameter needed) +table = Table(cache_id=f"table_{cache_id_prefix}", cache_path=str(cache_dir)) +heatmap = Heatmap(cache_id=f"heatmap_{cache_id_prefix}", cache_path=str(cache_dir)) +seq_view = SequenceView(cache_id=f"seqview_{cache_id_prefix}", cache_path=str(cache_dir)) +line_plot = LinePlot(cache_id=f"lineplot_{cache_id_prefix}", cache_path=str(cache_dir)) + +# Render components +st.subheader("PSM Overview") +heatmap(state_manager=state_manager, height=350) + +st.subheader("PSM Table") +table(state_manager=state_manager, height=533) + +st.subheader("Peptide Sequence") +seq_view(key=f"seqview_{cache_id_prefix}", state_manager=state_manager, height=533) + +st.subheader("MS2 Spectrum") +line_plot(key=f"lineplot_{cache_id_prefix}", state_manager=state_manager, height=450, sequence_view_key=f"seqview_{cache_id_prefix}") + +st.markdown("---") +st.markdown("**Next step:** View rescoring results") +st.page_link("content/results_rescoring.py", label="Go to Rescoring", icon="πŸ“ˆ") diff --git a/content/results_filtered.py b/content/results_filtered.py new file mode 100644 index 0000000..a108ebd --- /dev/null +++ b/content/results_filtered.py @@ -0,0 +1,79 @@ +"""Filtered PSMs Results Page.""" +import streamlit as st +from pathlib import Path +from src.common.common import page_setup +from src.common.results_helpers import get_workflow_dir +from openms_insight import Table, Heatmap, LinePlot, SequenceView, StateManager + +params = page_setup() +st.title("Filtered PSMs") + +st.markdown( + """ +View FDR-controlled peptide identifications after **IDFilter** processing. +These are high-confidence PSMs that passed the specified FDR threshold. +Click on a PSM to view the annotated spectrum and peptide sequence. +""" +) + +st.info( + "**Score:** The q-value represents the minimum false discovery rate (FDR) at which this PSM " + "would be accepted. Lower values indicate higher confidence identifications." +) + +if "workspace" not in st.session_state: + st.warning("Please initialize your workspace first.") + st.stop() + +workflow_dir = get_workflow_dir(st.session_state["workspace"]) +filter_dir = workflow_dir / "results" / "psm_filter" +cache_dir = workflow_dir / "results" / "insight_cache" + +if not filter_dir.exists(): + st.info("No filtered results available yet. Please run the workflow first.") + st.stop() + +filter_files = sorted(filter_dir.glob("*.idXML")) + +if not filter_files: + st.warning("No filtering output files found.") + st.stop() + +selected_file = st.selectbox( + "Select filtering result file", + filter_files, + format_func=lambda x: x.name +) + +cache_id_prefix = selected_file.stem + +# Check if cache exists +if not (cache_dir / f"table_{cache_id_prefix}").is_dir(): + st.warning("Visualization cache not found. Please re-run the workflow.") + st.stop() + +# Initialize state manager for cross-component linking +state_manager = StateManager() + +# Load components from cache (no data parameter needed) +table = Table(cache_id=f"table_{cache_id_prefix}", cache_path=str(cache_dir)) +heatmap = Heatmap(cache_id=f"heatmap_{cache_id_prefix}", cache_path=str(cache_dir)) +seq_view = SequenceView(cache_id=f"seqview_{cache_id_prefix}", cache_path=str(cache_dir)) +line_plot = LinePlot(cache_id=f"lineplot_{cache_id_prefix}", cache_path=str(cache_dir)) + +# Render components +st.subheader("PSM Overview") +heatmap(state_manager=state_manager, height=350) + +st.subheader("PSM Table") +table(state_manager=state_manager, height=533) + +st.subheader("Peptide Sequence") +seq_view(key=f"seqview_{cache_id_prefix}", state_manager=state_manager, height=533) + +st.subheader("MS2 Spectrum") +line_plot(key=f"lineplot_{cache_id_prefix}", state_manager=state_manager, height=450, sequence_view_key=f"seqview_{cache_id_prefix}") + +st.markdown("---") +st.markdown("**Next step:** View abundance quantification") +st.page_link("content/results_abundance.py", label="Go to Abundance", icon="πŸ“‹") diff --git a/content/results_heatmap.py b/content/results_heatmap.py new file mode 100644 index 0000000..5a7ba47 --- /dev/null +++ b/content/results_heatmap.py @@ -0,0 +1,76 @@ +"""Heatmap Results Page.""" +import streamlit as st +import numpy as np +import plotly.express as px +from scipy.cluster.hierarchy import linkage, leaves_list +from scipy.spatial.distance import pdist +from src.common.common import page_setup +from src.common.results_helpers import get_abundance_data + +params = page_setup() +st.title("Heatmap") + +st.markdown( + """ +Hierarchically clustered heatmap of protein-level abundance (Z-score normalized). +Proteins and samples are ordered by similarity. +""" +) + +if "workspace" not in st.session_state: + st.warning("Please initialize your workspace first.") + st.stop() + +result = get_abundance_data(st.session_state["workspace"]) +if result is None: + st.info("Abundance data not available. Please run the workflow and configure sample groups first.") + st.page_link("content/results_abundance.py", label="Go to Abundance", icon="πŸ“‹") + st.stop() + +pivot_df, expr_df, group_map = result + +top_n = st.slider("Number of proteins", 20, 200, 50, key="heatmap_top_n") + +var_series = expr_df.var(axis=1) +top_proteins = var_series.sort_values(ascending=False).head(top_n).index +heatmap_df = expr_df.loc[top_proteins] +heatmap_z = heatmap_df.sub(heatmap_df.mean(axis=1), axis=0).div(heatmap_df.std(axis=1), axis=0) +heatmap_z = heatmap_z.replace([np.inf, -np.inf], np.nan).dropna() + +if not heatmap_z.empty: + row_linkage = linkage(pdist(heatmap_z.values), method="average") + row_order = leaves_list(row_linkage) + + col_linkage = linkage(pdist(heatmap_z.T.values), method="average") + col_order = leaves_list(col_linkage) + + heatmap_clustered = heatmap_z.iloc[row_order, col_order] + + fig_heatmap = px.imshow( + heatmap_clustered, + labels=dict(x="Sample", y="Protein", color="Z-score"), + aspect="auto", + color_continuous_scale=[[0.0, "#3b6fb6"], [0.5, "white"], [1.0, "#b40426"]], + zmin=-3, zmax=3 + ) + + fig_heatmap.update_layout( + height=700, + xaxis={'side': 'bottom'}, + yaxis={'side': 'left'} + ) + + fig_heatmap.update_xaxes(tickfont=dict(size=10)) + fig_heatmap.update_yaxes(tickfont=dict(size=8)) + + st.plotly_chart(fig_heatmap, width="stretch") +else: + st.warning("Insufficient data to generate the heatmap.") + +st.markdown("---") +st.markdown("**Other visualizations:**") +col1, col2 = st.columns(2) +with col1: + st.page_link("content/results_volcano.py", label="Volcano Plot", icon="πŸŒ‹") +with col2: + st.page_link("content/results_pca.py", label="PCA", icon="πŸ“Š") diff --git a/content/results_pathway.py b/content/results_pathway.py new file mode 100644 index 0000000..e6b1376 --- /dev/null +++ b/content/results_pathway.py @@ -0,0 +1,253 @@ +import mygene +import streamlit as st +import pandas as pd +import numpy as np +import plotly.express as px +from pathlib import Path +from collections import defaultdict +from scipy.stats import fisher_exact +from src.common.common import page_setup +from src.common.results_helpers import get_abundance_data + + +# ================================ +# Page setup +# ================================ +params = page_setup() +st.title("ProteomicsLFQ Results") + +# ================================ +# Workspace check +# ================================ +if "workspace" not in st.session_state: + st.warning("Please initialize your workspace first.") + st.stop() + +# ================================ +# _run_go_enrichment function +# ================================ +def _run_go_enrichment(pivot_df: pd.DataFrame, results_dir: Path): + p_cutoff = 0.05 + fc_cutoff = 1.0 + + analysis_df = pivot_df.dropna(subset=["p-value", "log2FC"]).copy() + + if analysis_df.empty: + st.error("No valid statistical data found for GO enrichment.") + st.write("❗ analysis_df is empty") + else: + with st.spinner("Fetching GO terms from MyGene.info API..."): + mg = mygene.MyGeneInfo() + + def get_clean_uniprot(name): + parts = str(name).split("|") + return parts[1] if len(parts) >= 2 else parts[0] + + analysis_df["UniProt"] = analysis_df["protein"].apply(get_clean_uniprot) + + bg_ids = analysis_df["UniProt"].dropna().astype(str).unique().tolist() + fg_ids = analysis_df[ + (analysis_df["p-value"] < p_cutoff) & + (analysis_df["log2FC"].abs() >= fc_cutoff) + ]["UniProt"].dropna().astype(str).unique().tolist() + # st.write("βœ… get_clean_uniprot applied") + + if len(fg_ids) < 3: + st.warning( + f"Not enough significant proteins " + f"(p < {p_cutoff}, |log2FC| β‰₯ {fc_cutoff}). " + f"Found: {len(fg_ids)}" + ) + st.write("❗ Not enough significant proteins") + else: + res_list = mg.querymany( + bg_ids, scopes="uniprot", fields="go", as_dataframe=False + ) + res_go = pd.DataFrame(res_list) + if "notfound" in res_go.columns: + res_go = res_go[res_go["notfound"] != True] + + def extract_go_terms(go_data, go_type): + if not isinstance(go_data, dict) or go_type not in go_data: + return [] + terms = go_data[go_type] + if isinstance(terms, dict): + terms = [terms] + return list({t.get("term") for t in terms if "term" in t}) + + for go_type in ["BP", "CC", "MF"]: + res_go[f"{go_type}_terms"] = res_go["go"].apply( + lambda x: extract_go_terms(x, go_type) + ) + + annotated_ids = set(res_go["query"].astype(str)) + fg_set = annotated_ids.intersection(fg_ids) + bg_set = annotated_ids + # st.write(f"βœ… fg_set bg_set are set") + + def run_go(go_type): + go2fg = defaultdict(set) + go2bg = defaultdict(set) + + for _, row in res_go.iterrows(): + uid = str(row["query"]) + for term in row[f"{go_type}_terms"]: + go2bg[term].add(uid) + if uid in fg_set: + go2fg[term].add(uid) + + records = [] + N_fg = len(fg_set) + N_bg = len(bg_set) + + for term, fg_genes in go2fg.items(): + a = len(fg_genes) + if a == 0: + continue + b = N_fg - a + c = len(go2bg[term]) - a + d = N_bg - (a + b + c) + + _, p = fisher_exact([[a, b], [c, d]], alternative="greater") + records.append({ + "GO_Term": term, + "Count": a, + "GeneRatio": f"{a}/{N_fg}", + "p_value": p, + }) + + df = pd.DataFrame(records) + if df.empty: + return None, None + + df["-log10(p)"] = -np.log10(df["p_value"].replace(0, 1e-10)) + df = df.sort_values("p_value").head(20) + + # βœ… Plotly Figure + fig = px.bar( + df, + x="-log10(p)", + y="GO_Term", + orientation="h", + title=f"GO Enrichment ({go_type})", + ) + + # st.write(f"βœ… Plotly Figure generated") + + fig.update_layout( + yaxis=dict(autorange="reversed"), + height=500, + margin=dict(l=10, r=10, t=40, b=10), + ) + + return fig, df + + go_results = {} + + for go_type in ["BP", "CC", "MF"]: + fig, df_go = run_go(go_type) + if fig is not None: + go_results[go_type] = { + "fig": fig, + "df": df_go + } + # st.write(f"βœ… go_type generated") + + go_dir = results_dir / "go-terms" + go_dir.mkdir(parents=True, exist_ok=True) + + import json + go_data = {} + + for go_type in ["BP", "CC", "MF"]: + if go_type in go_results: + fig = go_results[go_type]["fig"] + df = go_results[go_type]["df"] + + go_data[go_type] = { + "fig_json": fig.to_json(), # Figure β†’ JSON string + "df_dict": df.to_dict(orient="records") # DataFrame β†’ list of dicts + } + + go_json_file = go_dir / "go_results.json" + with open(go_json_file, "w") as f: + json.dump(go_data, f) + st.session_state["go_results"] = go_results + st.session_state["go_ready"] = True if go_data else False + # st.write("βœ… GO enrichment analysis complete") + +results_dir = Path(st.session_state["workspace"]) / "topp-workflow" / "results" / "quant" +result = get_abundance_data(st.session_state["workspace"]) +if result is None: + st.info("Abundance data not available. Please run the workflow and configure sample groups first.") + st.page_link("content/results_abundance.py", label="Go to Abundance", icon="πŸ“‹") + st.stop() + +pivot_df, expr_df, group_map = result +_run_go_enrichment(pivot_df, results_dir) + +# ================================ +# Tabs +# ================================ +protein_tab, = st.tabs(["🧬 Protein Table"]) + +# ================================ +# Protein-level results +# ================================ +with protein_tab: + st.markdown("### 🧬 Protein-Level Abundance Table") + st.info( + "This protein-level table is generated by grouping all PSMs that map to the " + "same protein and aggregating their intensities across samples.\n\n" + "Additionally, log2 fold change and p-values are calculated between sample groups." + ) + + if pivot_df.empty: + st.info("No protein-level data available.") + else: + st.session_state["pivot_df"] = pivot_df + st.dataframe(pivot_df.sort_values("p-value"), width="stretch") + +# ====================================================== +# GO Enrichment Results +# ====================================================== +st.markdown("---") +st.subheader("🧬 GO Enrichment Analysis") + +go_json_file = results_dir / "go-terms" / "go_results.json" + +if not go_json_file.exists(): + st.info("GO Enrichment results are not available yet. Please run the analysis first.") +else: + import json + import plotly.io as pio + + with open(go_json_file, "r") as f: + go_data = json.load(f) + + bp_tab, cc_tab, mf_tab = st.tabs([ + "🧬 Biological Process", + "🏠 Cellular Component", + "βš™οΈ Molecular Function", + ]) + + for tab, go_type in zip([bp_tab, cc_tab, mf_tab], ["BP", "CC", "MF"]): + with tab: + if go_type not in go_data: + st.info(f"No enriched {go_type} terms found.") + continue + + fig_json = go_data[go_type]["fig_json"] + df_dict = go_data[go_type]["df_dict"] + + fig = pio.from_json(fig_json) + + df_go = pd.DataFrame(df_dict) + + if df_go.empty: + st.info(f"No enriched {go_type} terms found.") + else: + st.plotly_chart(fig, width="stretch") + + st.markdown(f"#### {go_type} Enrichment Results") + st.dataframe(df_go, width="stretch") \ No newline at end of file diff --git a/content/results_pca.py b/content/results_pca.py new file mode 100644 index 0000000..d15475d --- /dev/null +++ b/content/results_pca.py @@ -0,0 +1,103 @@ +"""PCA Results Page.""" +import streamlit as st +import pandas as pd +import plotly.express as px +from sklearn.decomposition import PCA +from sklearn.preprocessing import StandardScaler +from src.common.common import page_setup +from src.common.results_helpers import get_abundance_data + +params = page_setup() +st.title("PCA Analysis") + +st.markdown( + """ +Principal Component Analysis (PCA) of protein-level abundance. +Samples are colored by group assignment to visualize clustering. +""" +) + +if "workspace" not in st.session_state: + st.warning("Please initialize your workspace first.") + st.stop() + +result = get_abundance_data(st.session_state["workspace"]) +if result is None: + st.info("Abundance data not available. Please run the workflow and configure sample groups first.") + st.page_link("content/results_abundance.py", label="Go to Abundance", icon="πŸ“‹") + st.stop() + +pivot_df, expr_df, group_map = result + +top_n = 500 + +top_proteins = ( + pivot_df + .dropna(subset=["p-adj"]) + .sort_values("p-adj", ascending=True) + .head(top_n)["protein"] +) + +expr_df_pca = expr_df.loc[ + expr_df.index.intersection(top_proteins) +] + +if expr_df_pca.shape[0] < 2: + st.info("Not enough proteins after p-value filtering for PCA.") + st.stop() + +X = expr_df_pca.T +X_scaled = StandardScaler().fit_transform(X) + +pca = PCA(n_components=2) +pcs = pca.fit_transform(X_scaled) + +pca_df = pd.DataFrame( + pcs, + columns=["PC1", "PC2"], + index=X.index +) + +actual_sample_names = pca_df.index.tolist() + +norm_map = {} + +for k, v in group_map.items(): + try: + sample_idx = int(k) + 1 + target_substring = f"sample{sample_idx}[" + real_full_name = next((name for name in actual_sample_names if target_substring in name), None) + + if real_full_name: + norm_map[real_full_name] = v if v and v.strip() else "Unassigned" + except ValueError: + continue + +pca_df["Group"] = pca_df.index.map(norm_map) + +fig_pca = px.scatter( + pca_df, + x="PC1", + y="PC2", + color="Group", + text=pca_df.index, +) + +fig_pca.update_traces(textposition="top center") +fig_pca.update_layout( + xaxis_title=f"PC1 ({pca.explained_variance_ratio_[0]*100:.1f}%)", + yaxis_title=f"PC2 ({pca.explained_variance_ratio_[1]*100:.1f}%)", + height=600, +) + +st.plotly_chart(fig_pca, width="stretch") + +st.markdown(f"**Proteins used:** {expr_df_pca.shape[0]} (top {top_n} by p-adj)") + +st.markdown("---") +st.markdown("**Other visualizations:**") +col1, col2 = st.columns(2) +with col1: + st.page_link("content/results_volcano.py", label="Volcano Plot", icon="πŸŒ‹") +with col2: + st.page_link("content/results_heatmap.py", label="Heatmap", icon="πŸ”₯") diff --git a/content/results_rescoring.py b/content/results_rescoring.py new file mode 100644 index 0000000..ed8ec5f --- /dev/null +++ b/content/results_rescoring.py @@ -0,0 +1,80 @@ +"""Rescoring (Percolator) Results Page.""" +import streamlit as st +from pathlib import Path +from src.common.common import page_setup +from src.common.results_helpers import get_workflow_dir +from openms_insight import Table, Heatmap, LinePlot, SequenceView, StateManager + +params = page_setup() +st.title("Rescoring Results") + +st.markdown( + """ +View PSMs after **Percolator** statistical validation. Percolator uses machine learning +to re-score PSMs and estimate false discovery rates (FDR) for more accurate results. +Click on a PSM to view the annotated spectrum and peptide sequence. +""" +) + +st.info( + "**Score:** The Posterior Error Probability (PEP) represents the probability that this PSM " + "is incorrect. Lower values indicate higher confidence identifications." +) + +if "workspace" not in st.session_state: + st.warning("Please initialize your workspace first.") + st.stop() + +workflow_dir = get_workflow_dir(st.session_state["workspace"]) +perc_dir = workflow_dir / "results" / "percolator" +cache_dir = workflow_dir / "results" / "insight_cache" + +if not perc_dir.exists(): + st.info("No rescoring results available yet. Please run the workflow first.") + # st.page_link("content/workflow_run.py", label="Go to Run", icon="πŸš€") + st.stop() + +perc_files = sorted(perc_dir.glob("*.idXML")) + +if not perc_files: + st.warning("No rescoring output files found.") + st.stop() + +selected_file = st.selectbox( + "Select rescoring result file", + perc_files, + format_func=lambda x: x.name +) + +cache_id_prefix = selected_file.stem + +# Check if cache exists +if not (cache_dir / f"table_{cache_id_prefix}").is_dir(): + st.warning("Visualization cache not found. Please re-run the workflow.") + st.stop() + +# Initialize state manager for cross-component linking +state_manager = StateManager() + +# Load components from cache (no data parameter needed) +table = Table(cache_id=f"table_{cache_id_prefix}", cache_path=str(cache_dir)) +heatmap = Heatmap(cache_id=f"heatmap_{cache_id_prefix}", cache_path=str(cache_dir)) +seq_view = SequenceView(cache_id=f"seqview_{cache_id_prefix}", cache_path=str(cache_dir)) +line_plot = LinePlot(cache_id=f"lineplot_{cache_id_prefix}", cache_path=str(cache_dir)) + +# Render components +st.subheader("PSM Overview") +heatmap(state_manager=state_manager, height=350) + +st.subheader("PSM Table") +table(state_manager=state_manager, height=533) + +st.subheader("Peptide Sequence") +seq_view(key=f"seqview_{cache_id_prefix}", state_manager=state_manager, height=533) + +st.subheader("MS2 Spectrum") +line_plot(key=f"lineplot_{cache_id_prefix}", state_manager=state_manager, height=450, sequence_view_key=f"seqview_{cache_id_prefix}") + +st.markdown("---") +st.markdown("**Next step:** View filtered PSMs") +st.page_link("content/results_filtered.py", label="Go to Filtered PSMs", icon="🎯") diff --git a/content/results_volcano.py b/content/results_volcano.py new file mode 100644 index 0000000..1751807 --- /dev/null +++ b/content/results_volcano.py @@ -0,0 +1,109 @@ +"""Volcano Plot Results Page.""" +import streamlit as st +import plotly.express as px +import numpy as np +import pandas as pd +from src.common.common import page_setup +from src.common.results_helpers import get_abundance_data +from statsmodels.stats.multitest import multipletests +from scipy.stats import ttest_ind +from pathlib import Path + +params = page_setup() +st.title("Volcano Plot") + +st.markdown( + """ +Visualize differential expression analysis with a volcano plot. +Points represent proteins colored by significance status. +""" +) + +if "workspace" not in st.session_state: + st.warning("Please initialize your workspace first.") + st.stop() + +result = get_abundance_data(st.session_state["workspace"]) +if result is None: + st.info("Abundance data not available. Please run the workflow and configure sample groups first.") + st.page_link("content/results_abundance.py", label="Go to Abundance", icon="πŸ“‹") + st.stop() + +# Load the processed dataframe from session state +pivot_df, expr_df, group_map = result + +# Threshold Selection UI +st.divider() +c1, c2 = st.columns(2) +with c1: + fc_thresh = st.slider( + "log2 Fold Change threshold", + min_value=0.1, + max_value=3.0, + value=1.0, + step=0.1, + ) +with c2: + p_thresh = st.slider( + "p-adj (FDR) threshold", + min_value=0.001, + max_value=0.1, + value=0.05, + step=0.001, + ) + +volcano_df = pivot_df.dropna(subset=["log2FC", "p-adj"]).copy() +volcano_df["neg_log10_padj"] = -np.log10(volcano_df["p-adj"]) + +volcano_df["Significance"] = "Not significant" +volcano_df.loc[ + (volcano_df["p-adj"] <= p_thresh) & (volcano_df["log2FC"] >= fc_thresh), + "Significance", +] = "Up-regulated" + +volcano_df.loc[ + (volcano_df["p-adj"] <= p_thresh) & (volcano_df["log2FC"] <= -fc_thresh), + "Significance", +] = "Down-regulated" + +fig_volcano = px.scatter( + volcano_df, + x="log2FC", + y="neg_log10_padj", + color="Significance", + hover_data=["protein", "log2FC", "p-value", "p-adj"], + color_discrete_map={ + "Up-regulated": "red", + "Down-regulated": "blue", + "Not significant": "lightgrey", + } +) + +fig_volcano.add_vline(x=fc_thresh, line_dash="dash") +fig_volcano.add_vline(x=-fc_thresh, line_dash="dash") +fig_volcano.add_hline(y=-np.log10(p_thresh), line_dash="dash") + +# Make x-axis symmetric around zero +max_abs_fc = volcano_df["log2FC"].abs().max() +x_range = [-max_abs_fc * 1.1, max_abs_fc * 1.1] # 10% padding + +fig_volcano.update_layout( + xaxis_title="log2 Fold Change", + yaxis_title="-log10(p-adj)", + xaxis_range=x_range, + height=600, +) + +st.plotly_chart(fig_volcano, width="stretch") + +up_count = (volcano_df["Significance"] == "Up-regulated").sum() +down_count = (volcano_df["Significance"] == "Down-regulated").sum() +st.markdown(f"**Up-regulated:** {up_count} | **Down-regulated:** {down_count}") + +st.markdown("---") +st.markdown("**Other visualizations:**") +col1, col2 = st.columns(2) +with col1: + st.page_link("content/results_pca.py", label="PCA", icon="πŸ“Š") +with col2: + st.page_link("content/results_heatmap.py", label="Heatmap", icon="πŸ”₯") diff --git a/src/Workflow.py b/src/Workflow.py index 7523a0c..2764c8c 100644 --- a/src/Workflow.py +++ b/src/Workflow.py @@ -1,11 +1,17 @@ +from altair import value import streamlit as st -from src.workflow.WorkflowManager import WorkflowManager +import pandas as pd +import numpy as np +import plotly.express as px +from src.workflow.WorkflowManager import WorkflowManager +from scipy.stats import ttest_ind +from pyopenms import IdXMLFile # for result section: from pathlib import Path -import pandas as pd -import plotly.express as px from src.common.common import show_fig +from openms_insight import Table, Heatmap, LinePlot, SequenceView +from src.common.results_helpers import parse_idxml, build_spectra_cache class Workflow(WorkflowManager): @@ -16,7 +22,7 @@ def __init__(self) -> None: super().__init__("TOPP Workflow", st.session_state["workspace"]) def upload(self) -> None: - t = st.tabs(["MS data"]) + t = st.tabs(["MS data", "FASTA database"]) with t[0]: # Use the upload method from StreamlitUI to handle mzML file uploads. self.ui.upload_widget( @@ -26,98 +32,1008 @@ def upload(self) -> None: fallback=[str(f) for f in Path("example-data", "mzML").glob("*.mzML")], ) + with t[1]: + self.ui.upload_widget( + key="fasta-file", + name="Protein FASTA database", + file_types=("fasta", "fa"), + fallback=[str(f) for f in Path("example-data", "db").glob("*.fasta")], + ) + @st.fragment def configure(self) -> None: # Allow users to select mzML files for the analysis. self.ui.select_input_file("mzML-files", multiple=True) + self.ui.select_input_file("fasta-file", multiple=False) # Create tabs for different analysis steps. t = st.tabs( - ["**Feature Detection**", "**Feature Linking**", "**Python Custom Tool**"] + ["**IsobaricAnalyzer**", "**CometAdapter**", "**PercolatorAdapter**", "**IDFilter**", "**IDMapper**", "**FileMerger**", + "**ProteinInference**", "**IDFilter**", "**IDConflictResolver**", "**ProteinQuantifier**", "**Group Selection**"] ) with t[0]: - # Parameters for FeatureFinderMetabo TOPP tool. + # Checkbox for decoy generation + # reactive=True ensures the parent configure() fragment re-runs when checkbox changes, + # so conditional UI (DecoyDatabase settings) updates immediately + self.ui.input_widget( + key="generate-decoys", + default=True, + name="Generate Decoy Database", + widget_type="checkbox", + help="Generate reversed decoy sequences for FDR calculation. Disable if your FASTA already contains decoys.", + reactive=True, + ) + + # Reload params to get current checkbox value after it was saved + self.params = self.parameter_manager.get_parameters_from_json() + + # Show DecoyDatabase settings if generating decoys + if self.params.get("generate-decoys", True): + st.info(""" + **Decoy Database Settings:** + * **method**: How decoy sequences are generated from target protein sequences. + *Reverse* creates decoys by reversing each sequence, while *shuffle* randomly + rearranges the amino acids. Both methods preserve the amino acid composition + of the original protein, ensuring decoys have similar properties to real sequences + for accurate false discovery rate (FDR) estimation. + """) + self.ui.input_TOPP( + "DecoyDatabase", + custom_defaults={ + "decoy_string": "rev_", + "decoy_string_position": "prefix", + "method": "reverse", + }, + include_parameters=["method"], + ) + + comet_info = """ + **Identification (Comet):** + * **enzyme**: The enzyme used for peptide digestion. + * **missed_cleavages**: Number of possible cleavage sites missed by the enzyme. It has no effect if enzyme is unspecific cleavage. + * **fixed_modifications**: Fixed modifications, specified using Unimod (www.unimod.org) terms, e.g. 'Carbamidomethyl (C)' or 'Oxidation (M)' + * **variable_modifications**: Variable modifications, specified using Unimod (www.unimod.org) terms, e.g. 'Carbamidomethyl (C)' or 'Oxidation (M)' + * **instrument**: Type of instrument (high_res or low_res). Use 'high_res' for high-resolution MS2 (Orbitrap, TOF), 'low_res' for ion trap. + * **fragment_mass_tolerance**: Fragment mass tolerance for MS2 matching. + * **fragment_bin_offset**: Offset for binning MS2 spectra. Typically 0.0 for high-res, 0.4 for low-res instruments. + """ + if not self.params.get("generate-decoys", True): + comet_info += """* **PeptideIndexing:decoy_string**: String that was appended (or prefixed - see 'decoy_string_position' flag below) to the accessions + in the protein database to indicate decoy proteins. + """ + st.info(comet_info) + + st.write(Path(self.workflow_dir, "results")) + + comet_include = [":enzyme", "missed_cleavages", "fixed_modifications", "variable_modifications", + "instrument", "fragment_mass_tolerance", "fragment_error_units", "fragment_bin_offset"] + if not self.params.get("generate-decoys", True): + # Only show decoy_string when not generating decoys + comet_include.append("PeptideIndexing:decoy_string") + self.ui.input_TOPP( - "FeatureFinderMetabo", - custom_defaults={"algorithm:common:noise_threshold_int": 1000.0}, + "IsobaricAnalyzer", + custom_defaults={ + "tmt11plex:reference_channel": 126, + "type": "tmt11plex", + "extraction:select_activation": "auto", + "extraction:reporter_mass_shift": 0.002, + "extraction:min_reporter_intensity": 0.0, + "extraction:min_precursor_purity": 0.0, + "extraction:precursor_isotope_deviation": 10.0, + "quantification:isotope_correction": "false", + } ) with t[1]: - # Parameters for MetaboliteAdductDecharger TOPP tool. - self.ui.input_TOPP("FeatureLinkerUnlabeledKD") + # Parameters for FeatureFinderMetabo TOPP tool. + # self.ui.input_TOPP( + # "FeatureFinderMetabo", + # custom_defaults={"algorithm:common:noise_threshold_int": 1000.0}, + # ) + comet_include = [":enzyme", "missed_cleavages", "fixed_modifications", "variable_modifications", + "instrument", "fragment_mass_tolerance", "fragment_error_units", "fragment_bin_offset", "PeptideIndexing:IL_equivalent"] + self.ui.input_TOPP( + "CometAdapter", + custom_defaults={ + "PeptideIndexing:IL_equivalent": True, + "clip_nterm_methionine": "true", + "instrument": "high_res", + "missed_cleavages": 2, + "min_peptide_length": 6, + "max_peptide_length": 40, + "enzyme": "Trypsin/P", + "PeptideIndexing:unmatched_action": "warn", + "max_variable_mods_in_peptide": 3, + "precursor_mass_tolerance": 4.5, + "isotope_error": "0/1", + "precursor_error_units": "ppm", + "num_hits": 1, + "num_enzyme_termini": "fully", + "fragment_bin_offset": 0.0, + "minimum_peaks": 10, + "precursor_charge": "2:4", + "fragment_mass_tolerance": 0.015, + "PeptideIndexing:unmatched_action": "warn", + "variable_modifications": "Oxidation (M)\nAcetyl (Protein N-term)\nTMT6plex (K)\nTMT6plex (N-term)", + "debug": 0, + "force": True, + }, + include_parameters=comet_include, + flag_parameters=["PeptideIndexing:IL_equivalent", "force"], + exclude_parameters=["second_enzyme"], + ) with t[2]: + st.info(""" + **Filtering (IDFilter):** + * **score:type_peptide**: Score used for filtering. If empty, the main score is used. + * **score:psm**: The score which should be reached by a peptide hit to be kept. (use 'NAN' to disable this filter) + """) + self.ui.input_TOPP( + "PercolatorAdapter", + custom_defaults={ + "subset_max_train": 300000, + "decoy_pattern": "DECOY_", + "score_type": "pep", + "post_processing_tdc": True, + "debug": 0, + }, + flag_parameters=["post_processing_tdc"], + ) + + with t[3]: + # Parameters for MetaboliteAdductDecharger TOPP tool. + # self.ui.input_TOPP("FeatureLinkerUnlabeledKD") + self.ui.input_TOPP( + "IDFilter", + custom_defaults={ + "score:type_peptide": "q-value", + "score:psm": 0.10, + }, + tool_instance_name="IDFilter-strict", + ) + with t[4]: + st.info(""" + **Quantification (ProteomicsLFQ):** + * **intThreshold**: Peak intensity threshold applied in seed detection. + * **psmFDR**: FDR threshold for sub-protein level (e.g. 0.05=5%). Use -FDR_type to choose the level. Cutoff is applied at the highest level. If Bayesian inference was chosen, it is equivalent with a peptide FDR + * **proteinFDR**: Protein FDR threshold (0.05=5%). + """) + self.ui.input_TOPP( + "IDMapper", + custom_defaults={ + "threads": 8, + "debug": 0, + } + ) + with t[5]: + self.ui.input_TOPP( + "FileMerger", + custom_defaults={ + "in_type": "consensusXML", + "append_method": "append_cols", + "annotate_file_origin": True, + "threads": 8, + }, + flag_parameters=["annotate_file_origin"] + ) + with t[6]: + self.ui.input_TOPP( + "ProteinInference", + custom_defaults={ + "threads": 8, + "picked_decoy_string": "DECOY_", + "picked_fdr": "true", + "protein_fdr": "true", + "Algorithm:use_shared_peptides": "true", + "Algorithm:annotate_indistinguishable_groups": "true", + "Algorithm:score_type": "PEP", + "Algorithm:score_aggregation_method": "best", + "Algorithm:min_peptides_per_protein": 1, + } + ) + with t[7]: # A single checkbox widget for workflow logic. - self.ui.input_widget("run-python-script", False, "Run custom Python script") + # self.ui.input_widget("run-python-script", False, "Run custom Python script") * # Generate input widgets for a custom Python tool, located at src/python-tools. # Parameters are specified within the file in the DEFAULTS dictionary. - self.ui.input_python("example") + # self.ui.input_python("example") * + self.ui.input_TOPP( + "IDFilter", + custom_defaults={ + "score:type_protein": "q-value", + "score:proteingroup": 0.01, + "score:psm": 0.01, + "delete_unreferenced_peptide_hits": True, + "remove_decoys": True + }, + flag_parameters=["delete_unreferenced_peptide_hits", "remove_decoys"], + tool_instance_name="IDFilter-lenient", + ) + with t[8]: + self.ui.input_TOPP( + "IDConflictResolver", + custom_defaults={ + "threads": 4, + } + ) + + with t[9]: + self.ui.input_TOPP( + "ProteinQuantifier", + custom_defaults={ + "method": "top", + "top:N": 3, + "top:aggregate": "median", + "top:include_all": True, + "ratios": True, + "threads": 8, + "debug": 0, + }, + flag_parameters=["top:include_all", "ratios"] + ) + with t[10]: + st.markdown("### πŸ§ͺ TMT Sample Group Assignment") + + # 1. Determine TMT type (e.g., tmt10plex, tmt16plex) + target_key = f"{self.parameter_manager.topp_param_prefix}IsobaricAnalyzer:1:type" + selected_tmt = st.session_state.get(target_key, "tmt12plex") + + if "tmt" in selected_tmt: + import re + # Extract the number to determine the plex count + num_plex_match = re.search(r'\d+', selected_tmt) + if num_plex_match: + num_plex = int(num_plex_match.group()) + all_channels = [f"sample{i+1}" for i in range(num_plex)] + + st.info( + "Enter a group name for each TMT channel.\n\n" + "Type **'skip'** for channels you wish to skip. (e.g., control, case, skip)" + ) + + # 2. Create an input_widget for each channel (automatically saved to params.json) + cols = st.columns(2) + for i, ch in enumerate(all_channels): + with cols[i % 2]: + self.ui.input_widget( + key=f"TMT-group-{ch}", + default="", + name=f"Group for {ch}", + widget_type="text", + help="Enter group name or 'skip' to ignore this channel.", + ) + + # 3. Read values from params.json and construct a dictionary in tmt_group_map format + # (This can be used later to filter DataFrames in subsequent logic) + self.params = self.parameter_manager.get_parameters_from_json() + + tmt_group_map = {} + for i, ch in enumerate(all_channels): + # Retrieve stored value (default is empty string) + group_val = self.params.get(f"TMT-group-{ch}", "") + tmt_group_map[str(i)] = group_val + + # For data inspection (remove if not needed) + if st.checkbox("Show current TMT mapping"): + st.json(tmt_group_map) + + # 4. Clean up parameters from unused/previous TMT settings + all_possible_channel_keys = {f"TMT-group-{ch}" for ch in all_channels} + orphaned_keys = [ + k for k in self.params.keys() + if k.startswith("TMT-group-") and k not in all_possible_channel_keys + ] + + if orphaned_keys: + for key in orphaned_keys: + del self.params[key] + self.parameter_manager.save_parameters() + + else: + st.warning("Please select a TMT type in the parameters first.") + # with t[10]: + # st.markdown("### πŸ§ͺ Sample Group Assignment") + # target_key = f"{self.parameter_manager.topp_param_prefix}IsobaricAnalyzer:1:type" + # selected_tmt = st.session_state.get(target_key, "tmt12plex") + + # if "tmt" in selected_tmt: + # import re + # num_plex = int(re.search(r'\d+', selected_tmt).group()) + # all_channels = [f"sample{i+1}" for i in range(num_plex)] + + # @st.fragment + # def render_group_assignment(): + # exclude_channels = st.multiselect( + # "Select samples to exclude.", + # options=all_channels, + # key="exclude_selector", + # help="Samples selected here will be excluded from the dataframe." + # ) + # st.write("exclude_channels:", exclude_channels) + + # exclude_indices = [i for i, ch in enumerate(all_channels) if ch in exclude_channels] + + # st.write("exclude_indices:", exclude_indices) + # st.session_state["tmt_exclude_indices"] = exclude_indices + + # keep_channels = [ch for ch in all_channels if ch not in exclude_channels] + + # st.info("Enter group names for remaining samples.") + # group_mapping = {} + + # if keep_channels: + # cols = st.columns(2) + # for idx, ch in enumerate(keep_channels): + # original_idx = all_channels.index(ch) + # with cols[idx % 2]: + # group_name = st.text_input( + # f"Group for {ch}", + # value=st.session_state.get("tmt_group_map", {}).get(original_idx, ""), + # placeholder="e.g. Control or Case", + # key=f"input_{ch}_{len(keep_channels)}" + # ) + # group_mapping[original_idx] = group_name + # else: + # st.warning("All samples were selected for exclusion.") + + # st.session_state["tmt_group_map"] = group_mapping + + # if st.checkbox("Saved session data check"): + # st.write("Indices to exclude:", st.session_state["tmt_exclude_indices"]) + # st.write("Groups for remaining indices:", st.session_state["tmt_group_map"]) + + # render_group_assignment() + + # else: + # st.warning("Please select a TMT type in the parameters first.") + def execution(self) -> None: # Any parameter checks, here simply checking if mzML files are selected if not self.params["mzML-files"]: self.logger.log("ERROR: No mzML files selected.") return + + if not self.params.get("fasta-file"): + st.error("No FASTA file selected.") + return False # Get mzML files with FileManager in_mzML = self.file_manager.get_files(self.params["mzML-files"]) + fasta_file = self.file_manager.get_files([self.params["fasta-file"]])[0] + + if len(in_mzML) < 1: + st.error("At least one mzML file is required.") + return False + + fasta_path = Path(fasta_file) + + self.logger.log(f"πŸ“‚ Loaded {len(in_mzML)} sample(s)") + + if self.params.get("generate-decoys", True): + decoy_fasta = fasta_path.with_suffix(".decoy.fasta") + # Get decoy_string from DecoyDatabase params + decoy_string = self.params.get("DecoyDatabase", {}).get("decoy_string", "rev_") + + if not decoy_fasta.exists(): + self.logger.log("🧬 Generating decoy database...") + st.info("Generating decoy FASTA database...") + if not self.executor.run_topp( + "DecoyDatabase", + {"in": [str(fasta_path)], "out": [str(decoy_fasta)]}, + ): + self.logger.log("Workflow stopped due to error") + return False + self.logger.log("βœ… Decoy database ready") + st.success(f"Using decoy FASTA: {decoy_fasta.name}") + database_fasta = decoy_fasta + else: + # Get decoy_string from CometAdapter params + decoy_string = self.params.get("CometAdapter", {}).get("PeptideIndexing:decoy_string", "rev_") + self.logger.log("πŸ“„ Using existing FASTA database") + st.info(f"Using original FASTA: {fasta_path.name}") + database_fasta = fasta_path # Log any messages. self.logger.log(f"Number of input mzML files: {len(in_mzML)}") - # Prepare output files for feature detection. - out_ffm = self.file_manager.get_files( - in_mzML, "featureXML", "feature-detection" - ) + results_dir = Path(self.workflow_dir, "results") + iso_dir = results_dir / "isobaric_consensusXML" + comet_dir = results_dir / "comet_results" + perc_dir = results_dir / "percolator" + psm_filter_dir = results_dir / "psm_filter" + map_dir = results_dir / "idmapper" + merge_dir = results_dir / "merged" + protein_dir = results_dir / "protein" + msstats_dir = results_dir / "msstats" + quant_dir = results_dir / "quant_results" - # Run FeatureFinderMetabo tool with input and output files. - self.logger.log("Detecting features...") - self.executor.run_topp( - "FeatureFinderMetabo", input_output={"in": in_mzML, "out": out_ffm} - ) + iso_consensus = [] + comet_results = [] + percolator_results = [] + psm_filtered = [] + mapped_ids = [] - # Prepare input and output files for feature linking - in_fl = self.file_manager.get_files(out_ffm, collect=True) - out_fl = self.file_manager.get_files( - "feature_matrix.consensusXML", set_results_dir="feature-linking" - ) + for d in [ + iso_dir, comet_dir, perc_dir, psm_filter_dir, + map_dir, merge_dir, protein_dir, msstats_dir, quant_dir + ]: + d.mkdir(parents=True, exist_ok=True) - # Run FeatureLinkerUnlabaeledKD with all feature maps passed at once - self.logger.log("Linking features...") - self.executor.run_topp( - "FeatureLinkerUnlabeledKD", input_output={"in": in_fl, "out": out_fl} - ) - self.logger.log("Exporting consensus features to pandas DataFrame...") - self.executor.run_python( - "export_consensus_feature_df", input_output={"in": out_fl[0]} - ) - # Check if adduct detection should be run. - if self.params["run-python-script"]: - # Example for a custom Python tool, which is located in src/python-tools. - self.executor.run_python("example", {"in": in_mzML}) + for mz in in_mzML: + stem = Path(mz).stem + iso_consensus.append(str(iso_dir / f"{stem}_iso.consensusXML")) + comet_results.append(str(comet_dir / f"{stem}_comet.idXML")) + percolator_results.append(str(perc_dir / f"{stem}_comet_perc.idXML")) + psm_filtered.append(str(psm_filter_dir / f"{stem}_comet_perc_filter.idXML")) + mapped_ids.append(str(map_dir / f"{stem}_comet_perc_filter_map.consensusXML")) + + merged_id = str(merge_dir / "ID_mapper_merge.consensusXML") + protein_id = str(protein_dir / "ID_mapper_merge_epi.consensusXML") + protein_filter = str(protein_dir / "ID_mapper_merge_epi_filter.consensusXML") + protein_resolved = str(protein_dir / "ID_mapper_merge_epi_filter_resconf.consensusXML") + # msstats_input = str(msstats_dir / "msstats_input.csv") + consensus_out = str(quant_dir / "openms_design_protein_openms.csv") + + # --- IsobaricAnalyzer --- + self.logger.log("🏷️ Running isobaric labeling analysis...") + with st.spinner("IsobaricAnalyzer"): + if not self.executor.run_topp( + "IsobaricAnalyzer", + { + "in": in_mzML, + "out": iso_consensus, + }, + ): + self.logger.log("Workflow stopped due to error") + return False + self.logger.log("βœ… IsobaricAnalyzer complete") + + # --- CometAdapter --- + self.logger.log("πŸ”Ž Running peptide search...") + with st.spinner(f"CometAdapter ({stem})"): + comet_extra_params = {"database": str(database_fasta)} + if self.params.get("generate-decoys", True): + # Propagate decoy_string from DecoyDatabase + comet_extra_params["PeptideIndexing:decoy_string"] = decoy_string + if not self.executor.run_topp( + "CometAdapter", + { + "in": in_mzML, + "out": comet_results, + }, + comet_extra_params, + ): + self.logger.log("Workflow stopped due to error") + return False + self.logger.log("βœ… CometAdapter complete") + + # Get fragment tolerance from CometAdapter parameters for visualization + comet_params = self.parameter_manager.get_topp_parameters("CometAdapter") + frag_tol = comet_params.get("fragment_mass_tolerance", 0.02) + frag_tol_is_ppm = comet_params.get("fragment_error_units", "Da") != "Da" + + # Build visualization cache for Comet results + results_dir_path = Path(self.workflow_dir, "results") + cache_dir = results_dir_path / "insight_cache" + cache_dir.mkdir(parents=True, exist_ok=True) + + # Get mzML directory + mzml_dir = Path(in_mzML[0]).parent + + # Build spectra cache (once, shared by all stages) + spectra_df = None + filename_to_index = {} + + for idxml_file in comet_results: + idxml_path = Path(idxml_file) + cache_id_prefix = idxml_path.stem + + # Parse idXML to DataFrame + id_df, spectra_data = parse_idxml(idxml_path) + + # Build spectra cache (only once) + if spectra_df is None: + filename_to_index = {Path(f).name: i for i, f in enumerate(spectra_data)} + spectra_df, filename_to_index = build_spectra_cache(mzml_dir, filename_to_index) + + # Initialize Table component (caches itself) + Table( + cache_id=f"table_{cache_id_prefix}", + data=id_df.lazy(), + cache_path=str(cache_dir), + interactivity={"file": "file_index", "spectrum": "scan_id", "identification": "id_idx"}, + column_definitions=[ + {"field": "sequence", "title": "Sequence"}, + {"field": "charge", "title": "Z", "sorter": "number"}, + {"field": "mz", "title": "m/z", "sorter": "number"}, + {"field": "rt", "title": "RT", "sorter": "number"}, + {"field": "score", "title": "Score", "sorter": "number"}, + {"field": "protein_accession", "title": "Proteins"}, + ], + initial_sort=[{"column": "score", "dir": "asc"}], + index_field="id_idx", + ) + + # Initialize Heatmap component + Heatmap( + cache_id=f"heatmap_{cache_id_prefix}", + data=id_df.lazy(), + cache_path=str(cache_dir), + x_column="rt", + y_column="mz", + intensity_column="score", + interactivity={"identification": "id_idx"}, + ) + + # Initialize SequenceView component + seq_view = SequenceView( + cache_id=f"seqview_{cache_id_prefix}", + sequence_data=id_df.lazy().select(["id_idx", "sequence", "charge", "file_index", "scan_id"]).rename({ + "id_idx": "sequence_id", + "charge": "precursor_charge", + }), + peaks_data=spectra_df.lazy(), + filters={ + "identification": "sequence_id", + "file": "file_index", + "spectrum": "scan_id", + }, + interactivity={"peak": "peak_id"}, + cache_path=str(cache_dir), + deconvolved=False, + annotation_config={ + "ion_types": ["b", "y"], + "neutral_losses": True, + "tolerance": frag_tol, + "tolerance_ppm": frag_tol_is_ppm, + }, + ) + + # Initialize LinePlot from SequenceView + LinePlot.from_sequence_view( + seq_view, + cache_id=f"lineplot_{cache_id_prefix}", + cache_path=str(cache_dir), + title="Annotated Spectrum", + styling={ + "unhighlightedColor": "#CCCCCC", + "highlightColor": "#E74C3C", + "selectedColor": "#F3A712", + }, + ) + + self.logger.log("βœ… Peptide search complete") + + # --- PercolatorAdapter --- + self.logger.log("πŸ“Š Running rescoring...") + with st.spinner(f"PercolatorAdapter"): + if not self.executor.run_topp( + "PercolatorAdapter", + { + "in": comet_results, + "out": percolator_results, + }, + ): + self.logger.log("Workflow stopped due to error") + return False + # Build visualization cache for Percolator results + for idxml_file in percolator_results: + idxml_path = Path(idxml_file) + cache_id_prefix = idxml_path.stem + + # Parse idXML to DataFrame + id_df, spectra_data = parse_idxml(idxml_path) + + # Initialize Table component (caches itself) + Table( + cache_id=f"table_{cache_id_prefix}", + data=id_df.lazy(), + cache_path=str(cache_dir), + interactivity={"file": "file_index", "spectrum": "scan_id", "identification": "id_idx"}, + column_definitions=[ + {"field": "sequence", "title": "Sequence"}, + {"field": "charge", "title": "Z", "sorter": "number"}, + {"field": "mz", "title": "m/z", "sorter": "number"}, + {"field": "rt", "title": "RT", "sorter": "number"}, + {"field": "score", "title": "Score", "sorter": "number"}, + {"field": "protein_accession", "title": "Proteins"}, + ], + initial_sort=[{"column": "score", "dir": "asc"}], + index_field="id_idx", + ) + + # Initialize Heatmap component + Heatmap( + cache_id=f"heatmap_{cache_id_prefix}", + data=id_df.lazy(), + cache_path=str(cache_dir), + x_column="rt", + y_column="mz", + intensity_column="score", + interactivity={"identification": "id_idx"}, + ) + + # Initialize SequenceView component + seq_view = SequenceView( + cache_id=f"seqview_{cache_id_prefix}", + sequence_data=id_df.lazy().select(["id_idx", "sequence", "charge", "file_index", "scan_id"]).rename({ + "id_idx": "sequence_id", + "charge": "precursor_charge", + }), + peaks_data=spectra_df.lazy(), + filters={ + "identification": "sequence_id", + "file": "file_index", + "spectrum": "scan_id", + }, + interactivity={"peak": "peak_id"}, + cache_path=str(cache_dir), + deconvolved=False, + annotation_config={ + "ion_types": ["b", "y"], + "neutral_losses": True, + "tolerance": frag_tol, + "tolerance_ppm": frag_tol_is_ppm, + }, + ) + + # Initialize LinePlot from SequenceView + LinePlot.from_sequence_view( + seq_view, + cache_id=f"lineplot_{cache_id_prefix}", + cache_path=str(cache_dir), + title="Annotated Spectrum", + styling={ + "unhighlightedColor": "#CCCCCC", + "highlightColor": "#E74C3C", + "selectedColor": "#F3A712", + }, + ) + + self.logger.log("βœ… PercolatorAdapter complete") + + # --- IDFilter --- + self.logger.log("πŸ”§ Filtering identifications...") + with st.spinner(f"IDFilter"): + if not self.executor.run_topp( + "IDFilter", + { + "in": percolator_results, + "out": psm_filtered, + }, + tool_instance_name="IDFilter-strict" + ): + self.logger.log("Workflow stopped due to error") + return False + self.logger.log("βœ… IDFilter-strict complete") + + # Build visualization cache for Filter results + for idxml_file in psm_filtered: + idxml_path = Path(idxml_file) + cache_id_prefix = idxml_path.stem + + # Parse idXML to DataFrame + id_df, spectra_data = parse_idxml(idxml_path) + + # Initialize Table component (caches itself) + Table( + cache_id=f"table_{cache_id_prefix}", + data=id_df.lazy(), + cache_path=str(cache_dir), + interactivity={"file": "file_index", "spectrum": "scan_id", "identification": "id_idx"}, + column_definitions=[ + {"field": "sequence", "title": "Sequence"}, + {"field": "charge", "title": "Z", "sorter": "number"}, + {"field": "mz", "title": "m/z", "sorter": "number"}, + {"field": "rt", "title": "RT", "sorter": "number"}, + {"field": "score", "title": "Score", "sorter": "number"}, + {"field": "protein_accession", "title": "Proteins"}, + ], + initial_sort=[{"column": "score", "dir": "asc"}], + index_field="id_idx", + ) + + # Initialize Heatmap component + Heatmap( + cache_id=f"heatmap_{cache_id_prefix}", + data=id_df.lazy(), + cache_path=str(cache_dir), + x_column="rt", + y_column="mz", + intensity_column="score", + interactivity={"identification": "id_idx"}, + ) + + # Initialize SequenceView component + seq_view = SequenceView( + cache_id=f"seqview_{cache_id_prefix}", + sequence_data=id_df.lazy().select(["id_idx", "sequence", "charge", "file_index", "scan_id"]).rename({ + "id_idx": "sequence_id", + "charge": "precursor_charge", + }), + peaks_data=spectra_df.lazy(), + filters={ + "identification": "sequence_id", + "file": "file_index", + "spectrum": "scan_id", + }, + interactivity={"peak": "peak_id"}, + cache_path=str(cache_dir), + deconvolved=False, + annotation_config={ + "ion_types": ["b", "y"], + "neutral_losses": True, + "tolerance": frag_tol, + "tolerance_ppm": frag_tol_is_ppm, + }, + ) + + # Initialize LinePlot from SequenceView + LinePlot.from_sequence_view( + seq_view, + cache_id=f"lineplot_{cache_id_prefix}", + cache_path=str(cache_dir), + title="Annotated Spectrum", + styling={ + "unhighlightedColor": "#CCCCCC", + "highlightColor": "#E74C3C", + "selectedColor": "#F3A712", + }, + ) + + # --- IDMapper --- + self.logger.log("πŸ—ΊοΈ Mapping IDs to isobaric consensus features...") + for iso, psm, mapped in zip(iso_consensus, psm_filtered, mapped_ids): + iso_stem = Path(iso).stem + with st.spinner(f"IDMapper ({iso_stem})"): + if not self.executor.run_topp( + "IDMapper", + { + "in": [iso], + "id": [psm], + "out": [mapped], + }, + ): + self.logger.log("Workflow stopped due to error") + return False + self.logger.log("βœ… IDMapper complete") + + # --- FileMerger --- + self.logger.log("πŸ”— Merging mapped consensus files...") + with st.spinner("FileMerger"): + if not self.executor.run_topp( + "FileMerger", + { + "in": mapped_ids, + "out": [merged_id], + }, + ): + self.logger.log("Workflow stopped due to error") + return False + self.logger.log("βœ… FileMerger complete") + + # --- ProteinInference --- + self.logger.log("🧩 Running protein inference...") + with st.spinner("ProteinInference"): + if not self.executor.run_topp( + "ProteinInference", + { + "in": [merged_id], + "out": [protein_id], + }, + ): + self.logger.log("Workflow stopped due to error") + return False + self.logger.log("βœ… ProteinInference complete") + + # --- IDFilter-lenient (Protein) --- + self.logger.log("πŸ”¬ Filtering proteins...") + with st.spinner("IDFilter (Protein)"): + if not self.executor.run_topp( + "IDFilter", + { + "in": [protein_id], + "out": [protein_filter], + }, + tool_instance_name="IDFilter-lenient" + ): + self.logger.log("Workflow stopped due to error") + return False + self.logger.log("βœ… IDFilter-lenient (Protein) complete") + + # ================================ + # ✨ NEW: 8️⃣ IDConflictResolver (protein_filter β†’ protein_resolved) + # ================================ + self.logger.log("βš–οΈ Resolving ID conflicts...") + with st.spinner("IDConflictResolver"): + if not self.executor.run_topp( + "IDConflictResolver", + { + "in": [protein_filter], + "out": [protein_resolved], + }, + ): + self.logger.log("Workflow stopped due to error") + return False + self.logger.log("βœ… IDConflictResolver complete") + + # ================================ + # ✨ NEW: πŸ”Ÿ ProteinQuantifier (protein_resolved β†’ consensus_out) + # ================================ + self.logger.log("πŸ“ Running protein quantification...") + with st.spinner("ProteinQuantifier"): + if not self.executor.run_topp( + "ProteinQuantifier", + { + "in": [protein_resolved], + "out": [consensus_out], + }, + ): + self.logger.log("Workflow stopped due to error") + return False + self.logger.log("βœ… ProteinQuantifier complete") + self.logger.log("πŸŽ‰ WORKFLOW FINISHED") @st.fragment def results(self) -> None: - @st.fragment - def show_consensus_features(): - df = pd.read_csv(file, sep="\t", index_col=0) - st.metric("number of consensus features", df.shape[0]) - c1, c2 = st.columns(2) - rows = c1.dataframe(df, selection_mode="multi-row", on_select="rerun")[ - "selection" - ]["rows"] - if rows: - df = df.iloc[rows, 4:] - fig = px.bar(df, barmode="group", labels={"value": "intensity"}) - with c2: - show_fig(fig, "consensus-feature-intensities") + st.title("πŸ“Š DDA-TMT Analysis Results") + + # Tab configuration (TMT-specific) + tabs = st.tabs([ + "πŸ” Identification", + "πŸ” Rescoring & Filter", + "πŸ“Š TMT Reporter Intensity", + "🧬 Protein Grouping", + "πŸŒ‹ Statistical Analysis" + ]) + + id_tab, filter_tab, tmt_tab, prot_tab, stat_tab = tabs + + # Helper: idXML to DataFrame (Maintain existing code) + def idxml_to_df(idxml_file): + proteins, peptides = [], [] + IdXMLFile().load(str(idxml_file), proteins, peptides) + records = [] + for pep in peptides: + rt, mz = pep.getRT(), pep.getMZ() + for h in pep.getHits(): + records.append({ + "RT": rt, "m/z": mz, "Sequence": h.getSequence().toString(), + "Charge": h.getCharge(), "Score": h.getScore(), + "Proteins": ",".join([ev.getProteinAccession() for ev in h.getPeptideEvidences()]) + }) + return pd.DataFrame(records) + + # 1. Identification (Comet) + with id_tab: + comet_files = sorted(Path(self.workflow_dir, "results", "comet").glob("*.idXML")) + if comet_files: + selected_file = st.selectbox("Select Identification file", comet_files, key="comet_sb") + df_comet = idxml_to_df(selected_file) + st.dataframe(df_comet, use_container_width=True) + # Scatter plot code can remain the same + + # 2. Filtering (Percolator + IDFilter) + with filter_tab: + filter_files = sorted(Path(self.workflow_dir, "results", "psm_filter").glob("*.idXML")) + if filter_files: + selected_f = st.selectbox("Select Filtered file", filter_files, key="filter_sb") + df_filter = idxml_to_df(selected_f) + st.success(f"Identified {len(df_filter)} PSMs after filtering (FDR < 0.01)") + st.dataframe(df_filter, use_container_width=True) + + # 3. TMT Reporter Intensity (IsobaricAnalyzer) + with tmt_tab: + st.subheader("TMT Reporter Ion Intensity Distribution") + # Extract channel-specific intensities from IsobaricAnalyzer consensusXML + iso_files = sorted(Path(self.workflow_dir, "results", "isobaric_consensusXML").glob("*.consensusXML")) + if iso_files: + sel_iso = st.selectbox("Select TMT result", iso_files) + # For simple visualization, it is better to use the quantitative results CSV if available + # Example distribution using the final CSV (openms_design_protein_openms.csv) + quant_file = Path(self.workflow_dir, "results", "quant", "openms_design_protein_openms.csv") + if quant_file.exists(): + df_q = pd.read_csv(quant_file) + # Filter channel columns (usually prefixed with 'Abundance_' or specific tags) + # Column names need to be verified based on the OpenMS output structure + intensity_cols = [c for c in df_q.columns if 'intensity' in c.lower() or 'abundance' in c.lower()] + if intensity_cols: + fig_box = px.box(df_q.melt(value_vars=intensity_cols), x="variable", y="value", log_y=True, + title="Log-scaled Reporter Intensity Distribution per Channel") + st.plotly_chart(fig_box) + + # 4. Protein Grouping & Quantification + with prot_tab: + st.subheader("🧬 Final Protein-Level Results") + final_csv = Path(self.workflow_dir, "results", "quant", "openms_design_protein_openms.csv") + + if final_csv.exists(): + df_final = pd.read_csv(final_csv) + st.info(f"Total Protein Groups: {df_final['ProteinName'].nunique() if 'ProteinName' in df_final.columns else len(df_final)}") + st.dataframe(df_final, use_container_width=True) + + # CSV Download Button + st.download_button("Download Results", df_final.to_csv(index=False), "TMT_Results.csv") else: - st.info( - "πŸ’‘ Select one ore more rows in the table to show a barplot with intensities." - ) + st.warning("Final quantification CSV not found.") - file = Path( - self.workflow_dir, "results", "feature-linking", "feature_matrix.tsv" - ) - if file.exists(): - show_consensus_features() - else: - st.warning("No consensus feature file found. Please run workflow first.") + # 5. Statistical Analysis (Volcano Plot etc.) + with stat_tab: + final_csv = Path(self.workflow_dir, "results", "quant", "openms_design_protein_openms.csv") + + if not final_csv.exists(): + st.warning("Analysis results not found. Please run the workflow first.") + return + + try: + # 1️⃣ Data loading and preprocessing + df_quant = pd.read_csv(final_csv) + + # Identify intensity (abundance) columns from TMT results + # Typically starts with 'abundance_' or 'intensity_' depending on OpenMS output format + intensity_cols = [c for c in df_quant.columns if 'abundance' in c.lower() or 'intensity' in c.lower()] + + if len(intensity_cols) < 2: + st.error("Not enough intensity columns found for comparison.") + return + + # 2️⃣ Group setup (use existing source's group_map) + # In TMT, multiple channels (samples) exist within a single file, + # so map which column belongs to which group (Control/Case). + st.subheader("Group Comparison Setup") + col1, col2 = st.columns(2) + with col1: + group_a_cols = st.multiselect("Select Control Group Channels", intensity_cols, default=[intensity_cols[0]]) + with col2: + group_b_cols = st.multiselect("Select Case Group Channels", intensity_cols, default=[intensity_cols[-1]]) + + if st.button("Run Statistical Analysis"): + stats_results = [] + + for _, row in df_quant.iterrows(): + g1 = row[group_a_cols].values.astype(float) + g2 = row[group_b_cols].values.astype(float) + + # Calculate Log2 Fold Change + log2fc = np.log2(np.mean(g2) / np.mean(g1)) if np.mean(g1) > 0 else 0 + + # T-test (p-value) + _, pval = ttest_ind(g1, g2, nan_policy='omit') + + stats_results.append({ + "Protein": row.get("ProteinName", "Unknown"), + "log2FC": log2fc, + "pvalue": pval, + "-log10_pvalue": -np.log10(pval) if pval > 0 else 0 + }) + + df_stats = pd.DataFrame(stats_results) + + # 3️⃣ Volcano plot visualization + st.divider() + st.subheader("Volcano Plot") + + # Define colors to highlight significant proteins + df_stats['Significance'] = 'Normal' + df_stats.loc[(df_stats['log2FC'] > 1) & (df_stats['pvalue'] < 0.05), 'Significance'] = 'Up' + df_stats.loc[(df_stats['log2FC'] < -1) & (df_stats['pvalue'] < 0.05), 'Significance'] = 'Down' + + fig_volcano = px.scatter( + df_stats, x="log2FC", y="-log10_pvalue", + color="Significance", + hover_data=["Protein"], + color_discrete_map={'Up': 'red', 'Down': 'blue', 'Normal': 'gray'}, + title=f"Comparison: {', '.join(group_b_cols)} vs {', '.join(group_a_cols)}" + ) + + # Guidelines (p=0.05, FC=2) + fig_volcano.add_hline(y=-np.log10(0.05), line_dash="dash", line_color="black") + fig_volcano.add_vline(x=1, line_dash="dash", line_color="black") + fig_volcano.add_vline(x=-1, line_dash="dash", line_color="black") + + st.plotly_chart(fig_volcano, use_container_width=True) + st.dataframe(df_stats.sort_values("pvalue"), use_container_width=True) + + except Exception as e: + st.error(f"Error during statistical analysis: {e}") \ No newline at end of file diff --git a/src/common/results_helpers.py b/src/common/results_helpers.py new file mode 100644 index 0000000..8ff3110 --- /dev/null +++ b/src/common/results_helpers.py @@ -0,0 +1,483 @@ +"""Helper functions for results pages.""" +import re +import pandas as pd +import polars as pl +import numpy as np +import streamlit as st +from pathlib import Path +from scipy.stats import ttest_ind +from pyopenms import IdXMLFile, MSExperiment, MzMLFile +from src.workflow.ParameterManager import ParameterManager +from statsmodels.stats.multitest import multipletests + +def get_workflow_dir(workspace): + """Get the workflow directory path.""" + return Path(workspace, "topp-workflow") + + +def idxml_to_df(idxml_file): + """Parse idXML file and return DataFrame with peptide hits.""" + proteins = [] + peptides = [] + IdXMLFile().load(str(idxml_file), proteins, peptides) + + records = [] + for pep in peptides: + rt = pep.getRT() + mz = pep.getMZ() + for h in pep.getHits(): + protein_refs = [ev.getProteinAccession() for ev in h.getPeptideEvidences()] + records.append({ + "RT": rt, + "m/z": mz, + "Sequence": h.getSequence().toString(), + "Charge": h.getCharge(), + "Score": h.getScore(), + "Proteins": ",".join(protein_refs) if protein_refs else None, + }) + + df = pd.DataFrame(records) + if not df.empty: + df["Charge"] = df["Charge"].astype(str) + df["Charge_num"] = df["Charge"].astype(int) + return df + + +def create_psm_scatter_plot(df_plot): + """Create a scatter plot for PSM visualization.""" + import plotly.express as px + + fig = px.scatter( + df_plot, + x="RT", + y="m/z", + color="Score", + custom_data=["index", "Sequence", "Proteins"], + color_continuous_scale=["#a6cee3", "#1f78b4", "#08519c", "#08306b"], + ) + fig.update_traces( + marker=dict(size=6, opacity=0.8), + hovertemplate='Index: %{customdata[0]}
' + + 'RT: %{x:.2f}
' + + 'm/z: %{y:.4f}
' + + 'Score: %{marker.color:.3f}
' + + 'Sequence: %{customdata[1]}
' + + 'Proteins: %{customdata[2]}
' + + '' + ) + fig.update_layout( + coloraxis_colorbar=dict(title="Score"), + hovermode="closest" + ) + return fig + + +def extract_scan_from_ref(spec_ref: str) -> int: + """Extract scan number from spectrum reference string. + + Format: "controllerType=0 controllerNumber=1 scan=1234" + """ + match = re.search(r'scan=(\d+)', spec_ref) + return int(match.group(1)) if match else 0 + + +def extract_scan_number(native_id: str) -> int: + """Extract scan number from native ID.""" + match = re.search(r'scan=(\d+)', native_id) + return int(match.group(1)) if match else 0 + + +def extract_filename_from_idxml(idxml_path: Path) -> str: + """Derive mzML filename from idXML filename.""" + stem = idxml_path.stem + for suffix in ['_comet', '_per', '_filter']: + stem = stem.replace(suffix, '') + return f"{stem}.mzML" + + +def parse_idxml(idxml_path: Path) -> tuple[pl.DataFrame, list[str]]: + """Parse idXML and return DataFrame for openms_insight. + + Returns: + Tuple of (id_df, spectra_data list of source filenames) + """ + proteins = [] + peptides = [] + IdXMLFile().load(str(idxml_path), proteins, peptides) + + # Derive mzML filename from idXML filename (e.g., 02COVID_filter.idXML -> 02COVID.mzML) + spectra_data = [extract_filename_from_idxml(idxml_path)] + + # Build filename to index mapping + filename_to_index = {Path(f).name: i for i, f in enumerate(spectra_data)} + + records = [] + for pep in peptides: + # Get spectrum reference from meta value (key may be bytes or string) + spec_ref = "" + if pep.metaValueExists("spectrum_reference"): + spec_ref = pep.getMetaValue("spectrum_reference") + if isinstance(spec_ref, bytes): + spec_ref = spec_ref.decode() + scan_id = extract_scan_from_ref(spec_ref) + + # Get file index from id_merge_index or derive from filename + file_index = pep.getMetaValue("id_merge_index") if pep.metaValueExists("id_merge_index") else 0 + filename = spectra_data[file_index] if file_index < len(spectra_data) else "" + + for h in pep.getHits(): + records.append({ + "id_idx": len(records), + "scan_id": scan_id, + "file_index": file_index, + "filename": Path(filename).name if filename else "", + "sequence": h.getSequence().toString(), + "charge": h.getCharge(), + "mz": pep.getMZ(), + "rt": pep.getRT(), + "score": h.getScore(), + "protein_accession": ";".join([ev.getProteinAccession() for ev in h.getPeptideEvidences()]), + }) + + return pl.DataFrame(records), spectra_data + + +def build_spectra_cache(mzml_dir: Path, filename_to_index: dict) -> tuple[pl.DataFrame, dict]: + """Extract MS2 spectra from mzML files and return DataFrame. + + Args: + mzml_dir: Directory containing mzML files + filename_to_index: Dict mapping filename to file_index + + Returns: + Tuple of (spectra_df, updated filename_to_index) + """ + records = [] + peak_id = 0 + + for mzml_path in sorted(mzml_dir.glob("*.mzML")): + # Get or create file index + if mzml_path.name not in filename_to_index: + filename_to_index[mzml_path.name] = len(filename_to_index) + file_index = filename_to_index[mzml_path.name] + + exp = MSExperiment() + MzMLFile().load(str(mzml_path), exp) + + for spec in exp: + if spec.getMSLevel() != 2: + continue + scan_id = extract_scan_number(spec.getNativeID()) + mz_array, int_array = spec.get_peaks() + + for mz, intensity in zip(mz_array, int_array): + records.append({ + "peak_id": peak_id, + "file_index": file_index, + "scan_id": scan_id, + "mass": float(mz), # Use "mass" not "mz" + "intensity": float(intensity), + }) + peak_id += 1 + + return pl.DataFrame(records), filename_to_index + + +@st.cache_data +def load_abundance_data(workspace_path: str, csv_mtime: float) -> tuple | None: + """Load CSV, compute stats (log2FC, p-value), build pivot_df and expr_df. + + Args: + workspace_path: Path to the workspace directory + csv_mtime: Modification time of CSV file (used as cache key) + + Returns: + Tuple of (pivot_df, expr_df, group_map) or None if data unavailable + """ + workflow_dir = get_workflow_dir(Path(workspace_path)) + quant_dir = workflow_dir / "results" / "quant_results" + + if not quant_dir.exists(): + return None + + csv_files = sorted(quant_dir.glob("*.csv")) + if not csv_files: + return None + + csv_file = csv_files[0] + + try: + df = pd.read_csv(csv_file, sep="\t", comment="#", engine="python") + except Exception: + return None + + st.write(f"Loaded quantification data from {csv_file.name} (mtime: {csv_mtime})") + + if df.empty: + return None + + # ratio column removal + df = df.loc[:, ~df.columns.str.contains('ratio', case=False)] + + # exclude_indices = st.session_state.get("tmt_exclude_indices", []) + # group_map = st.session_state.get("tmt_group_map", {}) + # Get group mapping from parameters + param_manager = ParameterManager(Path(workflow_dir)) + params = param_manager.get_parameters_from_json() + group_map = {} + for key, value in params.items(): + if key.startswith("TMT-group-") and value: + # Extract the numeric part from keys like "TMT-group-sample1" + match = re.search(r'sample(\d+)', key) + if match: + # Subtract 1 to convert to a 0-based index (0, 1, 2...). + # If your samples are already 0-based, remove the -1 adjustment. + index = str(int(match.group(1)) - 1) + group_map[index] = value + + # 1. Extract keys labeled as "skip" from group_map as integer list + exclude_indices = [ + int(k) for k, v in group_map.items() if v.lower() == "skip" + ] + + # 2. Remove "skip" entries from group_map (keep only actual group info) + group_map = { + int(k): v for k, v in group_map.items() if v.lower() != "skip" + } + + start_column_offset = 4 + + # st.write("exclude_indices:", exclude_indices) + # st.write("group_map:", group_map) + + if not group_map: + st.warning("⚠️ Group mapping information is missing. Please configure sample groups in the Setup page.") + return None + + if exclude_indices: + # st.write("Current columns:", df.columns.tolist()) + # st.write("Number of columns:", len(df.columns)) + # st.write("Exclude indices:", exclude_indices) + # st.write("Offset:", start_column_offset) + cols_to_drop = [df.columns[i + start_column_offset] for i in exclude_indices] + df_cleaned = df.drop(columns=cols_to_drop) + else: + df_cleaned = df.copy() + + if group_map: + # Create new row data (defaulting to empty strings) + # Create a list with the same length as the column order of df_cleaned + new_row = [""] * len(df_cleaned.columns) + new_row[0] = "Group" + + # Get the column names of the current dataframe as a list + current_cols = df_cleaned.columns.tolist() + original_cols = df.columns.tolist() + + for col_name in current_cols[start_column_offset:]: + # Check the original index position of this column + original_idx = original_cols.index(col_name) - start_column_offset + col_pos = current_cols.index(col_name) + new_row[col_pos] = group_map.get(original_idx, "NA") + + # Insert the row at the top of the dataframe + # Create a new DF and concatenate to prepend the row to existing data + group_df = pd.DataFrame([new_row], columns=df_cleaned.columns) + df_with_groups = pd.concat([group_df, df_cleaned], ignore_index=True) + + # drop_msg = f"{len(exclude_indices)} channels dropped" if exclude_indices else "No channels dropped" + # st.success(f"βœ… {drop_msg} and Group names have been inserted at the top of the data.") + + # st.write("### Data Preview with Group Information") + # st.dataframe(df_with_groups.head(10)) + + if group_map and len(set(group_map.values())) >= 2: + # Prepare data for calculation + # Extract group information from row 0 of df_with_groups (the newly added Group row) + # Actual sample data starts from the 5th column (index 4) + group_info_row = df_with_groups.iloc[0] + + # Get unique group names (excluding NA) + unique_groups = sorted([g for g in set(group_map.values()) if g != "NA"]) + g1_name, g2_name = unique_groups[0], unique_groups[1] + + # Extract numerical data for statistical calculation (from row 1 and column index 4 onwards) + # Convert to numeric type (to prevent calculation errors) + numeric_data = df_with_groups.iloc[1:, 4:].apply(pd.to_numeric, errors='coerce') + + # Column indexing by group + # Categorize columns based on the values in the Group row + g1_cols = [col for col in numeric_data.columns if group_info_row[col] == g1_name] + g2_cols = [col for col in numeric_data.columns if group_info_row[col] == g2_name] + + # Calculate log2FC and p-value for each row + def run_stats(row): + v1 = row[g1_cols].dropna() + v2 = row[g2_cols].dropna() + + # log2FC (Group2 / Group1) + m1, m2 = v1.mean(), v2.mean() + l2fc = np.log2(m2 / m1) if m1 > 0 and m2 > 0 else np.nan + + # p-value (T-test) + if len(v1) > 1 and len(v2) > 1: + _, pval = ttest_ind(v1, v2, equal_var=False) + else: + pval = np.nan + return pd.Series([l2fc, pval]) + + stats_results = numeric_data.apply(run_stats, axis=1) + stats_results.columns = ['log2FC', 'p-value'] + # Add Adjusted p-value (FDR) calculation + if not stats_results['p-value'].isna().all(): + # Select only rows that contain p-values + mask = stats_results['p-value'].notna() + # Apply Benjamini-Hochberg (BH) correction + _, p_adj, _, _ = multipletests(stats_results.loc[mask, 'p-value'], method='fdr_bh') + stats_results.loc[mask, 'p-adj'] = p_adj + else: + stats_results['p-adj'] = np.nan + + # Construct the final dataframe (Based on df_cleaned - excluding the group row) + # Insert calculation results into the 2nd and 3rd column positions + pivot_df = df_cleaned.copy() + pivot_df.insert(1, "log2FC", stats_results['log2FC'].values) + pivot_df.insert(2, "p-value", stats_results['p-value'].values) + pivot_df.insert(3, "p-adj", stats_results['p-adj'].values) + + # st.success(f"Analysis Complete: {g1_name} (n={len(g1_cols)}) vs {g2_name} (n={len(g2_cols)})") + + # Set the first column ('protein') of final_df as the index + protein_col = pivot_df.columns[0] + sample_cols = current_cols[start_column_offset:] # Identify actual sample column names + + # Select sample columns and create a matrix + expr_df = pivot_df.set_index(protein_col)[sample_cols] + + # Replace 0 with NaN (to prevent log transformation errors) + expr_df = expr_df.replace(0, np.nan) + + # Log2 transformation (data normalization) + expr_df = np.log2(expr_df + 1) + + # Remove proteins (rows) with any missing values + expr_df = expr_df.dropna() + + return pivot_df, expr_df, group_map + else: + st.warning("⚠️ At least two distinct groups are required for statistical analysis.") + else: + st.warning("⚠️ No group mapping information is set. Please check the Configure page.") + return None + + # Get group mapping from parameters + # param_manager = ParameterManager(workflow_dir) + # params = param_manager.get_parameters_from_json() + # group_map = { + # key[11:]: value # Remove "mzML-group-" prefix + # for key, value in params.items() + # if key.startswith("mzML-group-") and value + # } + + # if not group_map: + # return None + + # df["Sample"] = df["Reference"].str.replace(".mzML", "", regex=False) + # df["Group"] = df["Reference"].map(group_map) + # df = df.dropna(subset=["Group"]) + + # groups = sorted(df["Group"].unique()) + + # if len(groups) < 2: + # return None + + # group1, group2 = groups[:2] + + # # Compute statistics per protein + # stats_rows = [] + # for protein, protein_df in df.groupby("ProteinName"): + # g1_vals = protein_df[protein_df["Group"] == group1]["Intensity"].values + # g2_vals = protein_df[protein_df["Group"] == group2]["Intensity"].values + + # if len(g1_vals) < 2 or len(g2_vals) < 2: + # pval = np.nan + # else: + # _, pval = ttest_ind(g1_vals, g2_vals, equal_var=False) + + # mean_g1 = np.mean(g1_vals) if len(g1_vals) > 0 else np.nan + # mean_g2 = np.mean(g2_vals) if len(g2_vals) > 0 else np.nan + + # log2fc = np.log2(mean_g2 / mean_g1) if mean_g1 > 0 else np.nan + + # stats_rows.append({ + # "ProteinName": protein, + # "log2FC": log2fc, + # "p-value": pval, + # }) + + # stats_df = pd.DataFrame(stats_rows) + + # if not stats_df.empty: + # mask = stats_df["p-value"].notna() + # if mask.any(): + # _, p_adj, _, _ = multipletests(stats_df.loc[mask, "p-value"], method="fdr_bh") + # stats_df.loc[mask, "p-adj"] = p_adj + # else: + # stats_df["p-adj"] = np.nan + + # # Order samples by group (group2 first, then group1) + # sample_group_df = df[["Sample", "Group"]].drop_duplicates() + # group2_samples = sample_group_df[sample_group_df["Group"] == group2]["Sample"].tolist() + # group1_samples = sample_group_df[sample_group_df["Group"] == group1]["Sample"].tolist() + # all_samples = group2_samples + group1_samples + + # # Build pivot table + # pivot_list = [] + # for protein, group_df in df.groupby("ProteinName"): + # peptides = ";".join(group_df["PeptideSequence"].unique()) + # intensity_dict = group_df.groupby("Sample")["Intensity"].sum().to_dict() + # intensity_dict_complete = { + # sample: intensity_dict.get(sample, 0) + # for sample in all_samples + # } + # row = { + # "ProteinName": protein, + # **intensity_dict_complete, + # "PeptideSequence": peptides, + # } + # pivot_list.append(row) + + # pivot_df = pd.DataFrame(pivot_list) + # pivot_df = pivot_df.merge(stats_df, on="ProteinName", how="left") + # pivot_df = pivot_df[["ProteinName", "log2FC", "p-value", "p-adj"] + all_samples + ["PeptideSequence"]] + + # # Build expression matrix (log2-transformed) + # expr_df = pivot_df.set_index("ProteinName")[all_samples] + # expr_df = expr_df.replace(0, np.nan) + # expr_df = np.log2(expr_df + 1) + # expr_df = expr_df.dropna() + + # return pivot_df, expr_df, group_map + + +def get_abundance_data(workspace: Path) -> tuple | None: + """Wrapper that handles cache key (workspace + CSV mtime). + + Args: + workspace: Path to the workspace directory + + Returns: + Tuple of (pivot_df, expr_df, group_map) or None if data unavailable + """ + workflow_dir = get_workflow_dir(workspace) + quant_dir = workflow_dir / "results" / "quant_results" + + if not quant_dir.exists(): + return None + + csv_files = sorted(quant_dir.glob("*.csv")) + if not csv_files: + return None + + csv_mtime = csv_files[0].stat().st_mtime + return load_abundance_data(str(workspace), csv_mtime) From acbffa9856c3f4471367d45a927dd712c0976042 Mon Sep 17 00:00:00 2001 From: Yoo HoJun Date: Fri, 22 May 2026 13:07:50 +0900 Subject: [PATCH 09/11] feat: support no-value CLI flag parameters in UI and execution - Update `StreamlitUI` to handle `flag_parameters` and render them as selectboxes. - Persist flag parameter metadata into session state and JSON configurations. - Update `CommandExecutor` to conditionally append flags without appending values. - Add readable command logging/printing before execution. --- src/workflow/CommandExecutor.py | 61 +++++++++++++++++++++++-------- src/workflow/StreamlitUI.py | 63 ++++++++++++++++++++++++++++----- 2 files changed, 101 insertions(+), 23 deletions(-) diff --git a/src/workflow/CommandExecutor.py b/src/workflow/CommandExecutor.py index 14d28a3..4c1d433 100644 --- a/src/workflow/CommandExecutor.py +++ b/src/workflow/CommandExecutor.py @@ -21,6 +21,7 @@ class CommandExecutor: for execution. """ # Methods for running commands and logging + def __init__(self, workflow_dir: Path, logger: Logger, parameter_manager: ParameterManager): self.pid_dir = Path(workflow_dir, "pids") self.logger = logger @@ -268,6 +269,11 @@ def run_topp(self, tool: str, input_output: dict, custom_params: dict = {}, tool # Load merged parameters (_defaults + user overrides) for this tool instance merged_params = self.parameter_manager.get_merged_params(params_key) + flag_map = self.parameter_manager.get_parameters_from_json().get("_flag_params", {}) + if not flag_map: + flag_map = st.session_state.get("_topp_flag_params", {}) + flag_params = set(flag_map.get(params_key, [])) + # Construct commands for each process for i in range(n_processes): command = [tool] @@ -288,28 +294,55 @@ def run_topp(self, tool: str, input_output: dict, custom_params: dict = {}, tool command += [value[i]] # Add merged TOPP tool parameters (_defaults + user overrides) for k, v in merged_params.items(): - command += [f"-{k}"] - # Skip only empty strings (pass flag with no value) - # Note: 0 and 0.0 are valid values, so use explicit check - if v != "" and v is not None: - if isinstance(v, str) and "\n" in v: - command += v.split("\n") + if k in flag_params: + # CLI flag: include "-k" only when enabled + if isinstance(v, str): + is_enabled = v.lower() in {"true", "1", "yes", "on"} else: - command += [str(v)] + is_enabled = bool(v) + if is_enabled: + command += [f"-{k}"] + continue + # For non-flag parameters, skip entirely if empty. + # Note: 0 and 0.0 are valid values, so use explicit checks. + if v == "" or v is None: + continue + command += [f"-{k}"] + if isinstance(v, str) and "\n" in v: + command += v.split("\n") + elif isinstance(v, bool): + command += [str(v).lower()] + else: + command += [str(v)] # Add custom parameters for k, v in custom_params.items(): - command += [f"-{k}"] - # Skip only empty strings (pass flag with no value) - # Note: 0 and 0.0 are valid values, so use explicit check - if v != "" and v is not None: - if isinstance(v, list): - command += [str(x) for x in v] + if k in flag_params: + if isinstance(v, str): + is_enabled = v.lower() in {"true", "1", "yes", "on"} else: - command += [str(v)] + is_enabled = bool(v) + if is_enabled: + command += [f"-{k}"] + continue + if v == "" or v is None: + continue + command += [f"-{k}"] + if isinstance(v, list): + command += [str(x) for x in v] + elif isinstance(v, bool): + command += [str(v).lower()] + else: + command += [str(v)] # Add threads parameter for TOPP tools command += ["-threads", str(threads_per_command)] commands.append(command) + for idx, cmd in enumerate(commands): + # Print list-form command joined into a single string for readability + print(f" πŸ”Ή Command {idx + 1}: {' '.join(cmd)}") + print("==========================================================\n") + + # Run command(s) if len(commands) == 1: return self.run_command(commands[0]) diff --git a/src/workflow/StreamlitUI.py b/src/workflow/StreamlitUI.py index c088096..ff3dd05 100644 --- a/src/workflow/StreamlitUI.py +++ b/src/workflow/StreamlitUI.py @@ -31,6 +31,7 @@ class StreamlitUI: """ # Methods for Streamlit UI components + def __init__(self, workflow_dir, logger, executor, parameter_manager): self.workflow_dir = workflow_dir self.logger = logger @@ -612,6 +613,7 @@ def input_TOPP( num_cols: int = 4, exclude_parameters: List[str] = [], include_parameters: List[str] = [], + flag_parameters: List[str] = [], display_tool_name: bool = True, display_subsections: bool = True, display_subsection_tabs: bool = False, @@ -628,6 +630,8 @@ def input_TOPP( num_cols (int, optional): Number of columns to use for the layout. Defaults to 3. exclude_parameters (List[str], optional): List of parameter names to exclude from the widget. Defaults to an empty list. include_parameters (List[str], optional): List of parameter names to include in the widget. Defaults to an empty list. + flag_parameters (List[str], optional): List of parameter names that should + be treated as no-value CLI flags during command construction. display_tool_name (bool, optional): Whether to display the TOPP tool name. Defaults to True. display_subsections (bool, optional): Whether to split parameters into subsections based on the prefix. Defaults to True. display_subsection_tabs (bool, optional): Whether to display main subsections in separate tabs (if more than one main section). Defaults to False. @@ -647,6 +651,16 @@ def input_TOPP( if "_topp_tool_instance_map" not in st.session_state: st.session_state["_topp_tool_instance_map"] = {} st.session_state["_topp_tool_instance_map"][tool_instance_name] = topp_tool_name + if "_topp_flag_params" not in st.session_state: + st.session_state["_topp_flag_params"] = {} + st.session_state["_topp_flag_params"][tool_instance_name] = list(flag_parameters) + # Persist flag metadata so execution still sees it outside UI reruns/session context. + params = self.parameter_manager.get_parameters_from_json() + if "_flag_params" not in params: + params["_flag_params"] = {} + params["_flag_params"][tool_instance_name] = list(flag_parameters) + with open(self.parameter_manager.params_file, "w", encoding="utf-8") as f: + json.dump(params, f, indent=4) if not display_subsections: display_subsection_tabs = False @@ -738,6 +752,7 @@ def _matches_parameter(pattern: str, key: bytes) -> bool: ":".join(key.decode().split(":")[:-1]) ), } + p["is_flag"] = (b"flag" in param.getTags(key)) # Parameter sections and subsections as string (e.g. "section:subsection") if display_subsections: p["sections"] = ":".join( @@ -817,27 +832,58 @@ def display_TOPP_params(params: dict, num_cols): """Displays individual TOPP parameters in given number of columns""" cols = st.columns(num_cols) i = 0 + # st.write("--------------------------------display_TOPP_params------------------------------------") + # st.write(params) for p in params: # get key and name – use tool_instance_name in session state key key_str = p['key'].decode() if tool_instance_name != topp_tool_name: key_str = key_str.replace(f"{topp_tool_name}:1:", f"{tool_instance_name}:1:", 1) key = f"{self.parameter_manager.topp_param_prefix}{key_str}" + # st.write("--------------------------------p['key'](display_TOPP_params loop)------------------------------------") + # st.write(key) name = p["name"] try: # sometimes strings with newline, handle as list if isinstance(p["value"], str) and "\n" in p["value"]: p["value"] = p["value"].split("\n") + # no-value CLI flag parameters should be shown as checkboxes + if p.get("is_flag", False): + flag_default = p["value"] + if isinstance(flag_default, str): + flag_default = flag_default.lower() in {"true", "1", "yes", "on"} + else: + flag_default = bool(flag_default) + # Streamlit widget keys persist in session_state and can override + # updated custom_defaults. Normalize and seed key explicitly. + if key in st.session_state: + current = st.session_state[key] + if isinstance(current, str): + st.session_state[key] = current.lower() in {"true", "1", "yes", "on"} + else: + st.session_state[key] = bool(current) + else: + st.session_state[key] = flag_default + cols[i].selectbox( + name, + options=[True, False], + index=0 if st.session_state[key] else 1, + format_func=lambda x: "True" if x else "False", + help=p["description"], + key=key, + ) # bools - if isinstance(p["value"], bool): - cols[i].markdown("##") - cols[i].checkbox( + elif isinstance(p["value"], bool): + bool_value = ( + (p["value"] == "true") + if type(p["value"]) == str + else p["value"] + ) + cols[i].selectbox( name, - value=( - (p["value"] == "true") - if type(p["value"]) == str - else p["value"] - ), + options=[True, False], + index=0 if bool_value else 1, + format_func=lambda x: "True" if x else "False", help=p["description"], key=key, ) @@ -922,7 +968,6 @@ def on_multiselect_change(dk=display_key, tk=key): cols[i].error(f"Error in parameter **{p['name']}**.") print('Error parsing "' + p["name"] + '": ' + str(e)) - for section, params in param_sections.items(): if tabs is None: show_subsection_header(section, display_subsections) From 62de516a9f9337131cee50ae71da750518a83ce2 Mon Sep 17 00:00:00 2001 From: Yoo HoJun Date: Fri, 22 May 2026 13:09:54 +0900 Subject: [PATCH 10/11] fix: support process termination on Windows in WorkflowManager - Use Windows `taskkill` command via subprocess to forcefully terminate process trees on Windows. - Fallback to `os.kill` with `SIGTERM` on Linux/macOS environments. - Improve exception handling and stale PID file cleanup logic during process stop. --- src/workflow/WorkflowManager.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/workflow/WorkflowManager.py b/src/workflow/WorkflowManager.py index a87fb33..c35ecc5 100644 --- a/src/workflow/WorkflowManager.py +++ b/src/workflow/WorkflowManager.py @@ -194,6 +194,8 @@ def _stop_local_workflow(self) -> bool: """Stop locally running workflow process""" import os import signal + import platform + import subprocess pid_dir = self.executor.pid_dir if not pid_dir.exists(): @@ -203,11 +205,17 @@ def _stop_local_workflow(self) -> bool: for pid_file in pid_dir.iterdir(): try: pid = int(pid_file.name) - os.kill(pid, signal.SIGTERM) + if platform.system() == "Windows": + subprocess.call(["taskkill", "/F", "/T", "/PID", str(pid)], + capture_output=True, text=True) + else: + os.kill(pid, signal.SIGTERM) pid_file.unlink() stopped = True - except (ValueError, ProcessLookupError, PermissionError): - pid_file.unlink() # Clean up stale PID file + except (ValueError, ProcessLookupError, PermissionError, Exception) as e: + print(f"Error stopping process {pid}: {e}") + if pid_file.exists(): + pid_file.unlink() # Clean up stale PID file # Clean up the pid directory shutil.rmtree(pid_dir, ignore_errors=True) From b32af7b40a76c6b2ca694bbd16b9e14ffa991f3e Mon Sep 17 00:00:00 2001 From: Yoo HoJun Date: Mon, 1 Jun 2026 14:16:17 +0900 Subject: [PATCH 11/11] chore: delete file status log print in results_helpers --- src/common/results_helpers.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/common/results_helpers.py b/src/common/results_helpers.py index 8ff3110..e2e268a 100644 --- a/src/common/results_helpers.py +++ b/src/common/results_helpers.py @@ -210,8 +210,6 @@ def load_abundance_data(workspace_path: str, csv_mtime: float) -> tuple | None: df = pd.read_csv(csv_file, sep="\t", comment="#", engine="python") except Exception: return None - - st.write(f"Loaded quantification data from {csv_file.name} (mtime: {csv_mtime})") if df.empty: return None