From 6ac710f3b09494e501dc09c8885ee1f31f9abcfc Mon Sep 17 00:00:00 2001 From: ehhuang Date: Fri, 1 Aug 2025 16:23:54 -0700 Subject: [PATCH] fix(recording): endpoint resolution (#3013) # What does this PR do? ## Test Plan --- llama_stack/testing/inference_recorder.py | 62 +- tests/integration/recordings/index.sqlite | Bin 45056 -> 45056 bytes .../recordings/responses/00ba04f74a96.json | 10 +- .../recordings/responses/0b27fd737699.json | 10 +- .../recordings/responses/0b3f2e4754ff.json | 235 + .../recordings/responses/0e8f2b001dd9.json | 56 + .../recordings/responses/10eea8c15ddc.json | 10 +- .../recordings/responses/17253d7cc667.json | 10 +- .../recordings/responses/173ecb3aab28.json | 251 + .../recordings/responses/174458ad71b2.json | 10 +- .../recordings/responses/178016edef0e.json | 10 +- .../recordings/responses/197228e26971.json | 10 +- .../recordings/responses/198ef7208389.json | 10 +- .../recordings/responses/1adfaa0e062e.json | 10 +- .../recordings/responses/1b8394f90636.json | 10 +- .../recordings/responses/211b1562d4e6.json | 10 +- .../recordings/responses/2afe3b38ca01.json | 34 +- .../recordings/responses/2d187a11704c.json | 208 +- .../recordings/responses/325a72db5755.json | 544 ++ .../recordings/responses/3c3f13cb7794.json | 30 +- .../recordings/responses/3ca695048bee.json | 104 + .../recordings/responses/40f524d1934a.json | 30 +- .../recordings/responses/48d2fb183a2a.json | 10 +- .../recordings/responses/4a3a4447b16b.json | 40 +- .../recordings/responses/50340cd4d253.json | 10 +- .../recordings/responses/545d86510a80.json | 34 +- .../recordings/responses/554de3cd986f.json | 46 +- .../recordings/responses/6906a6e71988.json | 10 +- .../recordings/responses/6cc063bbd7d3.json | 48 +- .../recordings/responses/6d35c91287e2.json | 34 +- .../recordings/responses/6f96090aa955.json | 648 +++ .../recordings/responses/6fbea1abca7c.json | 46 +- .../recordings/responses/6fe1d4fedf12.json | 4603 +++++++++++++++++ .../recordings/responses/70adef2c30c4.json | 10 +- .../recordings/responses/75d0dd9d0fa3.json | 10 +- .../recordings/responses/7b4815aba6c5.json | 46 +- .../recordings/responses/80e4404d8987.json | 28 +- .../recordings/responses/81a91f79c51d.json | 108 + .../recordings/responses/836f51dfb3c5.json | 10 +- .../recordings/responses/840fbb380b73.json | 10 +- .../recordings/responses/84cab42e1f5c.json | 974 ++-- .../recordings/responses/85594a69d74a.json | 10 +- .../recordings/responses/97d3812bfccb.json | 10 +- .../recordings/responses/97e259c0d3e5.json | 46 +- .../recordings/responses/9b812cbcb88d.json | 10 +- .../recordings/responses/9c140a29ae09.json | 34 +- .../recordings/responses/9e7a83d3d596.json | 12 +- .../recordings/responses/9fadf5a3d68f.json | 10 +- .../recordings/responses/a0c4df33879f.json | 1740 +++++++ .../recordings/responses/a4c8d19bb1eb.json | 56 + .../recordings/responses/a59d0d7c1485.json | 10 +- .../recordings/responses/a6810c23eda8.json | 94 +- .../recordings/responses/ae6835cfe70e.json | 10 +- .../recordings/responses/b91f1fb4aedb.json | 30 +- .../recordings/responses/bbd0637dce16.json | 466 +- .../recordings/responses/c9cba6f3ee38.json | 10 +- .../recordings/responses/d0ac68cbde69.json | 30 +- .../recordings/responses/d4c86ac355fb.json | 10 +- .../recordings/responses/dd226d71f844.json | 34 +- .../recordings/responses/dd9e7d5913e9.json | 12 +- .../recordings/responses/e96152610712.json | 10 +- .../recordings/responses/e9c8a0e4f0e0.json | 56 + .../recordings/responses/eee47930e3ae.json | 46 +- .../recordings/responses/ef59cbff54d0.json | 10 +- .../recordings/responses/f477c2fe1332.json | 50 +- .../recordings/responses/f70f30f54211.json | 84 + .../recordings/responses/fcdef245da95.json | 10 +- 67 files changed, 9880 insertions(+), 1409 deletions(-) create mode 100644 tests/integration/recordings/responses/0b3f2e4754ff.json create mode 100644 tests/integration/recordings/responses/0e8f2b001dd9.json create mode 100644 tests/integration/recordings/responses/173ecb3aab28.json create mode 100644 tests/integration/recordings/responses/325a72db5755.json create mode 100644 tests/integration/recordings/responses/3ca695048bee.json create mode 100644 tests/integration/recordings/responses/6f96090aa955.json create mode 100644 tests/integration/recordings/responses/6fe1d4fedf12.json create mode 100644 tests/integration/recordings/responses/81a91f79c51d.json create mode 100644 tests/integration/recordings/responses/a0c4df33879f.json create mode 100644 tests/integration/recordings/responses/a4c8d19bb1eb.json create mode 100644 tests/integration/recordings/responses/e9c8a0e4f0e0.json create mode 100644 tests/integration/recordings/responses/f70f30f54211.json diff --git a/llama_stack/testing/inference_recorder.py b/llama_stack/testing/inference_recorder.py index abfefa0ce..478f77773 100644 --- a/llama_stack/testing/inference_recorder.py +++ b/llama_stack/testing/inference_recorder.py @@ -217,55 +217,21 @@ class ResponseStorage: return cast(dict[str, Any], data) -async def _patched_inference_method(original_method, self, client_type, method_name=None, *args, **kwargs): +async def _patched_inference_method(original_method, self, client_type, endpoint, *args, **kwargs): global _current_mode, _current_storage if _current_mode == InferenceMode.LIVE or _current_storage is None: # Normal operation return await original_method(self, *args, **kwargs) - # Get base URL and endpoint based on client type + # Get base URL based on client type if client_type == "openai": base_url = str(self._client.base_url) - - # Determine endpoint based on the method's module/class path - method_str = str(original_method) - if "chat.completions" in method_str: - endpoint = "/v1/chat/completions" - elif "embeddings" in method_str: - endpoint = "/v1/embeddings" - elif "completions" in method_str: - endpoint = "/v1/completions" - else: - # Fallback - try to guess from the self object - if hasattr(self, "_resource") and hasattr(self._resource, "_resource"): - resource_name = getattr(self._resource._resource, "_resource", "unknown") - if "chat" in str(resource_name): - endpoint = "/v1/chat/completions" - elif "embeddings" in str(resource_name): - endpoint = "/v1/embeddings" - else: - endpoint = "/v1/completions" - else: - endpoint = "/v1/completions" - elif client_type == "ollama": # Get base URL from the client (Ollama client uses host attribute) base_url = getattr(self, "host", "http://localhost:11434") if not base_url.startswith("http"): base_url = f"http://{base_url}" - - # Determine endpoint based on method name - if method_name == "generate": - endpoint = "/api/generate" - elif method_name == "chat": - endpoint = "/api/chat" - elif method_name == "embed": - endpoint = "/api/embeddings" - elif method_name == "list": - endpoint = "/api/tags" - else: - endpoint = f"/api/{method_name}" else: raise ValueError(f"Unknown client type: {client_type}") @@ -366,14 +332,18 @@ def patch_inference_clients(): # Create patched methods for OpenAI client async def patched_chat_completions_create(self, *args, **kwargs): return await _patched_inference_method( - _original_methods["chat_completions_create"], self, "openai", *args, **kwargs + _original_methods["chat_completions_create"], self, "openai", "/v1/chat/completions", *args, **kwargs ) async def patched_completions_create(self, *args, **kwargs): - return await _patched_inference_method(_original_methods["completions_create"], self, "openai", *args, **kwargs) + return await _patched_inference_method( + _original_methods["completions_create"], self, "openai", "/v1/completions", *args, **kwargs + ) async def patched_embeddings_create(self, *args, **kwargs): - return await _patched_inference_method(_original_methods["embeddings_create"], self, "openai", *args, **kwargs) + return await _patched_inference_method( + _original_methods["embeddings_create"], self, "openai", "/v1/embeddings", *args, **kwargs + ) # Apply OpenAI patches AsyncChatCompletions.create = patched_chat_completions_create @@ -383,30 +353,32 @@ def patch_inference_clients(): # Create patched methods for Ollama client async def patched_ollama_generate(self, *args, **kwargs): return await _patched_inference_method( - _original_methods["ollama_generate"], self, "ollama", "generate", *args, **kwargs + _original_methods["ollama_generate"], self, "ollama", "/api/generate", *args, **kwargs ) async def patched_ollama_chat(self, *args, **kwargs): return await _patched_inference_method( - _original_methods["ollama_chat"], self, "ollama", "chat", *args, **kwargs + _original_methods["ollama_chat"], self, "ollama", "/api/chat", *args, **kwargs ) async def patched_ollama_embed(self, *args, **kwargs): return await _patched_inference_method( - _original_methods["ollama_embed"], self, "ollama", "embed", *args, **kwargs + _original_methods["ollama_embed"], self, "ollama", "/api/embeddings", *args, **kwargs ) async def patched_ollama_ps(self, *args, **kwargs): - return await _patched_inference_method(_original_methods["ollama_ps"], self, "ollama", "ps", *args, **kwargs) + return await _patched_inference_method( + _original_methods["ollama_ps"], self, "ollama", "/api/ps", *args, **kwargs + ) async def patched_ollama_pull(self, *args, **kwargs): return await _patched_inference_method( - _original_methods["ollama_pull"], self, "ollama", "pull", *args, **kwargs + _original_methods["ollama_pull"], self, "ollama", "/api/pull", *args, **kwargs ) async def patched_ollama_list(self, *args, **kwargs): return await _patched_inference_method( - _original_methods["ollama_list"], self, "ollama", "list", *args, **kwargs + _original_methods["ollama_list"], self, "ollama", "/api/tags", *args, **kwargs ) # Apply Ollama patches diff --git a/tests/integration/recordings/index.sqlite b/tests/integration/recordings/index.sqlite index 2e1256d210d5818a229505ec22c6936ea3b4bf1e..72a1dae9ba02100162c1804f90ae7d35d5b62651 100644 GIT binary patch delta 4998 zcmbtX3yf6N8NPGxo%f5ktRgO2tGGPQz31H5l;SM7JQM`2;+CRvUuRL#b(UpC*Ro6z zW{ou63=C=uDaGiUC1 zzyCk~Iro2^HHXAChs3_~Gjf9<2ub`@|N1Z2M<6n#>!Eb^s32w@6fzHH|4 zktfoZ>BLt;JaSk_6ygWtEwLA34bioc!`Wq-J*T^13VCs`AV^$FQ(d5xm5u$zL8>U7!hL40>4;E%hezBnxi51gRwJT}NX0GWemg6YCMgyf8>;7MhNfA*uDQfkJ!<**@Aw*X43}vRwQa&3sxsGLj_r85%~e-3T}JhweBHED z-S#{0Uha3^)_&KD*4Y&W-^1QCRY@b zx(?2)QO__{msqOFJj$qM;AREn&%y>;Y*@eg_&&I{ZfS&>j_*5;tx~2_!@$AUJff=H z@+^;7wysj5*+}p`?kk4wsJ5k3SLHMye-<{VXv6(OHt5{aiA8K1H$l^FoRHa^JG$;N z2aO@VP6)FM+rSS~@D(Y#%5{TLS3zg-c~Jf=Y#?aEsv#R};xdm@Y8n>D`D{iMQ>9cT zF4r8(^Z#AGhMFweIg-*K7Y1m(ZI4XTMYbPf4~Z@H#Te8!1SY)eJY zb2(LXOJ{~bJ&kLM;VPZ-v$4>QH7O_ZHZ;WJP5uZ&rD-SIusbBrc1 z*%`tvA#xzLTUsZ*9@-?{CT@?d&F+j2gyu&ZvrVyS(Ib(K(YDM7nJx0-WM5`c_~lGC zT@)`)=OWjnwq-jYx3uws)MxQ0LI>lm^2qq`^lPDruq(7c5N656w779&9u^el!JjVA z_elS#LW)6sx006})$}>zI#V2%=&t3t%*N56x~FL9!i;c_+6px-su(Im4>lcx7}Ald z8sbo{`pnQ6=iDc{=PMM2Ir^Hbsg(OFK2bFfvz}s@rmyv2&T}Xu4p$Y74psWRsz=9U z;4zL#K~YW3(rjj7ND#;OT#g1|Rs+wRjJ$*86 zJ99RcepXvnAR&I2get8mZ+GmFC><@2^j-+Ts400-`Ud)%TB6dY)fr%oj?gFY`^FYg z+5-nT4u65hHXQatTV?n&474Hrof>^fAJX53Y0L1*Td=k=?1SCQHbkYHYlJ-`k=PVU)i5Er#KXL$nu_n?PEcKOb%naFZabBp z2irS_LZ;?(d2Ce;TssqeY8Jd#Xo*ObW_WggVP<9f%XfE74~x=4*nSUs+q*UL!sn5E z02WNZ`1%4K#z7tp?@hp%8c`)+CKaWz zDuV-WUeW@6-S0%D&YJ#1kE8repj?XF;jrpbRGh6n`KB2oFbea&k8(?4*lHxm@GjoT zB22ifMHDB&01nbua0CZgfN7Vv%n%_R-y)2XJB7&8k+Ie#3+8&>OAsyj!Gm8H9&d)P_fic%XCw(_7B?Al_bp8(lb?>`LD#qiQ; z&>zE`=erw@&xIaX;kH)x;<6?hgZFtJ#tqODb0C8v<0e=-P(McnT?8MW2E7qhy-?4s zc=-)*Ltpp&;LK`TZWhJ}ty1#gq>#8h{&wuQvCE>5MB~y9=@N017!Gv_9}2CJ`@;VU zFOlEK7BUAi&6TT`83%JCLqLw}(+RZOmDmGv}D#`PTP%EPWF z-+>*!{sqi=_UGlqZ+?KacT$Du19IGfSyJKdHjsxAy6k1N3>~J|Wwg{$rY?9!+JEs}dh47R8>)yq~!~+86mczB@iW z;>E^=-wd1bW;raii~9x(kBNAq?j1Yie&?EjgYtFmS2NJqbIzw=ym(B{&;)jh1lJn6|pmIY!Svy*{yBw=1&hk*qx k!Pyl&cl9P*-M1^K1w>Aadn_PTl4%xL;}v{y9#4}uS7I{*Lx delta 2333 zcmZ9Ndu$X{6o+T-ymxnY`VuLYWede3rS0y{?zW5z9Vkmx)DmNa)Jn?3((;lmVt|5l zVVCk|%i*C3f(AsWr7cWiM4+e`gM=a`@CO13CYS&MF$zJ98qe&s+m=aY&)o0)?%6Z< z&N;W~25GuM5<{F!kfJC%-V)xVWmGT&Nzr>7WaTf4I5$wv4a%9Lk-<)5y>lg<#whnu zM%k77$^+$Z<(83^_LK6AiFz+KmW7X8tPFo`@z61MmjXYI(mh0$ZI7u8>NaJINLQt> z*db;MYx#TpQtn?a!d_)7nKMiw*-h;9QWV{WBF_mnD#o(l_twVrl0wz%_LgXVzvlNv zGh&&r^7A+h+apRy)Kb)CKt|gDooedRU~gMemySAL5Q!{C<>LG@RWh@Jl}E<)M)3wQ ziXF}!Y0OUjo2vOeNNveE1*!-?&DP=T4z<5REy1W#Dr#4;4qKXgqNsuOQp)qZqz2{w3&FeGn1)VJ6grgCLE0q}+l}%SuWIg={U9G(9 zyybjN{$AD`2ONX#YixIIuS+MTXT^l@mr%o>JFx&_N++kveQVtoD6S zLCtG^K5n>hct9kt8EF{&*B1$2Q;fM%kzOs(;o4{YA_dHh#n@1ISr5YI_OP7VaP+8V z4IOQ|MGv+){-laXC&gb8&oOc4Jn0nID#wL(aMcWQI2HM<9F;YGilYrC)wrF~K506e zDW!9#P<4NsTC^%&anzo_fB!ub^ov@H2t_$QV z>p6Y94oZE6Kx(Y<#Y~j1hJDYWe3*q6A4D`93P&Sqve1lnL_^_(i83wJxF1mgyjXx4 zxF9iqGa+^08iRs4mZ17+9aKibn5F9T?BN>kx$~i*LKxi2bzlF7? z`35R;(TI?~9L;FBZjSO`pE)`XVs01;R#XsD5A%m383`v=Or}+MY!2L@k3drkiAFSU z!-WxH$XR(=AW>^sZp^?Os=zxEy(1vHD$J8y=daY{mdM0pp53d^pAE|U=${R5m}U@s zWST6<%}4bZtoxMO$Vasu9_FJz145(1P6FsRFB)&&`p&P>f{deQsHq@Kbrt*hJv_@T zU@x&D<^ba$P4ol0j=DxoHKz@{qGCJ*%eM73;XtVt7>o4zROL;m!zLW-t2|SIH5}J~ zjrY~CNlTE9g~SsSt2AB;IqMoz6>4e&M(a!8vLsH!#c5{uki?Q?TZ~gsJKX|7RAEM2eUt>r^P*RO_&s9d>h%=7Gig5=QrqT-tau;iBRG&TYo>mb1i=c83V8*p)CY ze>jcc;T|uC1DpEo*jNqUT#Qcy&*p(NfOG