Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 56 additions & 15 deletions python/cuopt_server/cuopt_server/tests/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,8 @@ def cuopt_service_sync(
# Fixture and client to allow full cuopt service
# to run as a separate process for multiple tests
cuoptmain = None
# True after server has passed initial healthcheck; used to fail-fast on later crashes
_server_was_up = False
# Use module name instead of file path to ensure we use the installed package
server_script = "-m"
server_module = "cuopt_server.cuopt_service"
Expand Down Expand Up @@ -296,7 +298,21 @@ def signal_handler(signum, frame):
signal.signal(signal.SIGTERM, signal_handler)


def _exit_if_server_gone(exc):
"""Exit pytest immediately when server was up but is now unreachable (e.g. crashed)."""
global _server_was_up
if _server_was_up:
pytest.exit(
"cuOpt server stopped responding (connection error). "
"Skipping remaining server tests to reduce log noise. "
"Check server startup and dependencies (e.g. cudf/GPU).",
returncode=1,
)
raise exc


def spinup_wait():
global _server_was_up
client = RequestClient()
count = 0
result = None
Expand All @@ -309,7 +325,13 @@ def spinup_wait():
break
except Exception:
time.sleep(1)
assert result.status_code == 200
if result is None or result.status_code != 200:
pytest.exit(
"cuOpt server failed to pass healthcheck after 30s. "
"Skipping all server tests. Check server logs for startup errors (e.g. cudf/GPU).",
returncode=1,
)
_server_was_up = True


@pytest.fixture(scope="session")
Expand Down Expand Up @@ -347,9 +369,12 @@ def poll_for_completion(self, reqId, delete=True):
cnt = 0
headers = {"Accept": "application/json"}
while True:
res = requests.get(
self.url + f"/cuopt/solution/{reqId}", headers=headers
)
try:
res = requests.get(
self.url + f"/cuopt/solution/{reqId}", headers=headers
)
except (requests.ConnectionError, requests.ConnectTimeout) as e:
_exit_if_server_gone(e)
Comment on lines +372 to +377
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

rg -nP 'requests\.(get|post|delete)\(' python/cuopt_server/cuopt_server/tests/utils/utils.py -A8 -B1

Repository: NVIDIA/cuopt

Length of output: 1909


🏁 Script executed:

cat -n python/cuopt_server/cuopt_server/tests/utils/utils.py | sed -n '360,450p'

Repository: NVIDIA/cuopt

Length of output: 3329


Add timeout parameter to all requests calls and broaden exception handling.

At lines 373–375, 386–388, 406–412, 431–433, and 439–444, requests.get(), requests.post(), and requests.delete() lack a timeout parameter, preventing read timeouts from triggering promptly. Additionally, exception handlers only catch ConnectTimeout, not the broader Timeout exception (which includes read timeouts), weakening fail-fast behavior when the server becomes unresponsive.

Add timeout to all requests calls and catch requests.Timeout instead of just requests.ConnectTimeout.

Suggested approach
 class RequestClient:
     def __init__(self, port=5555):
         self.ip = "127.0.0.1"
         self.port = port
         self.url = f"http://{self.ip}:{self.port}"
+        self.http_timeout = 10

     def poll_for_completion(self, reqId, delete=True):
@@
-                res = requests.get(
-                    self.url + f"/cuopt/solution/{reqId}", headers=headers
-                )
-            except (requests.ConnectionError, requests.ConnectTimeout) as e:
+                res = requests.get(
+                    self.url + f"/cuopt/solution/{reqId}",
+                    headers=headers,
+                    timeout=self.http_timeout,
+                )
+            except (requests.ConnectionError, requests.Timeout) as e:
                 _exit_if_server_gone(e)
@@
                 requests.delete(
-                    self.url + f"/cuopt/solution/{reqId}", headers=headers
+                    self.url + f"/cuopt/solution/{reqId}",
+                    headers=headers,
+                    timeout=self.http_timeout,
                 )
-            except (requests.ConnectionError, requests.ConnectTimeout) as e:
+            except (requests.ConnectionError, requests.Timeout) as e:
                 _exit_if_server_gone(e)
@@
             res = requests.post(
                 self.url + endpoint,
                 params=params,
                 headers=headers,
                 json=json,
                 data=data,
+                timeout=self.http_timeout,
             )
-        except (requests.ConnectionError, requests.ConnectTimeout) as e:
+        except (requests.ConnectionError, requests.Timeout) as e:
             _exit_if_server_gone(e)
@@
             return requests.get(
-                self.url + endpoint, headers=headers, json=json
+                self.url + endpoint, headers=headers, json=json, timeout=self.http_timeout
             )
-        except (requests.ConnectionError, requests.ConnectTimeout) as e:
+        except (requests.ConnectionError, requests.Timeout) as e:
             _exit_if_server_gone(e)
@@
             return requests.delete(
                 self.url + endpoint,
                 params=params,
                 headers=headers,
                 json=json,
+                timeout=self.http_timeout,
             )
-        except (requests.ConnectionError, requests.ConnectTimeout) as e:
+        except (requests.ConnectionError, requests.Timeout) as e:
             _exit_if_server_gone(e)
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@python/cuopt_server/cuopt_server/tests/utils/utils.py` around lines 372 -
377, The requests calls in utils.py (e.g., the requests.get to self.url +
f"/cuopt/solution/{reqId}" that currently calls _exit_if_server_gone(e) on
exceptions) must include a timeout argument and broaden exception handling to
catch requests.Timeout instead of only requests.ConnectTimeout; update every
requests.get/requests.post/requests.delete call noted in the diff to pass a
sensible timeout (e.g., timeout=5 or a configurable value) and change the except
clauses from (requests.ConnectionError, requests.ConnectTimeout) to
(requests.ConnectionError, requests.Timeout) so read timeouts are handled and
routed to _exit_if_server_gone or the existing error path.

if "response" in res.json() or "error" in res.json():
break
time.sleep(1)
Expand All @@ -361,6 +386,8 @@ def poll_for_completion(self, reqId, delete=True):
requests.delete(
self.url + f"/cuopt/solution/{reqId}", headers=headers
)
except (requests.ConnectionError, requests.ConnectTimeout) as e:
_exit_if_server_gone(e)
except Exception:
pass
return res
Expand All @@ -375,13 +402,16 @@ def post(
block=True,
delete=True,
):
res = requests.post(
self.url + endpoint,
params=params,
headers=headers,
json=json,
data=data,
)
try:
res = requests.post(
self.url + endpoint,
params=params,
headers=headers,
json=json,
data=data,
)
except (requests.ConnectionError, requests.ConnectTimeout) as e:
_exit_if_server_gone(e)

# cuopt/cuot is already blocking, don't ever poll
if endpoint == "/cuopt/cuopt":
Expand All @@ -397,9 +427,20 @@ def post(
return self.poll_for_completion(res.json()["reqId"], delete)

def get(self, endpoint, headers=None, json=None):
return requests.get(self.url + endpoint, headers=headers, json=json)
try:
return requests.get(
self.url + endpoint, headers=headers, json=json
)
except (requests.ConnectionError, requests.ConnectTimeout) as e:
_exit_if_server_gone(e)

def delete(self, endpoint, headers=None, json=None, params=None):
return requests.delete(
self.url + endpoint, params=params, headers=headers, json=json
)
try:
return requests.delete(
self.url + endpoint,
params=params,
headers=headers,
json=json,
)
except (requests.ConnectionError, requests.ConnectTimeout) as e:
_exit_if_server_gone(e)
Loading