#!/usr/bin/env python3 """Test context_management API across multiple scenarios.""" import json, urllib.request, os, time creds_path = os.path.expanduser("~/.claude/.credentials.json") with open(creds_path) as f: token = json.load(f)['claudeAiOauth']['accessToken'] headers = { 'Authorization': f'Bearer {token}', 'anthropic-version': '2023-06-01', 'anthropic-beta': 'oauth-2025-04-20,context-management-2025-06-27,interleaved-thinking-2025-05-14', 'content-type': 'application/json' } def api_call(body): req = urllib.request.Request('https://api.anthropic.com/v1/messages', data=json.dumps(body).encode(), headers=headers) try: r = urllib.request.urlopen(req, timeout=30) return json.loads(r.read()) except urllib.error.HTTPError as e: return json.loads(e.read()) large = 'X' * 5000 results = {} # Step 1: Get real thinking block print("Getting real thinking signature...") r1 = api_call({"model":"claude-haiku-4-5-20251001","max_tokens":256, "thinking":{"type":"enabled","budget_tokens":1024}, "messages":[{"role":"user","content":"say hi briefly"}]}) if 'error' in r1: print("Cannot get thinking:", r1['error']); exit(1) tb = next(c for c in r1['content'] if c.get('type') == 'thinking') print("OK\n") time.sleep(2) # Scenario 4: combined print("=== SCENARIO 4: combined clear_thinking + clear_tool_uses ===") r4 = api_call({ "model":"claude-haiku-4-5-20251001","max_tokens":128, "thinking":{"type":"enabled","budget_tokens":1024}, "messages":[ {"role":"user","content":"say hi"}, {"role":"assistant","content":[tb,{"type":"text","text":"Hi!"}, {"type":"tool_use","id":"t1","name":"Read","input":{"path":"/a"}}, {"type":"tool_use","id":"t2","name":"Bash","input":{"cmd":"ls"}}]}, {"role":"user","content":[ {"type":"tool_result","tool_use_id":"t1","content":large}, {"type":"tool_result","tool_use_id":"t2","content":large}]}, {"role":"assistant","content":[tb,{"type":"text","text":"Done."}]}, {"role":"user","content":"next"}], "context_management":{"edits":[ {"type":"clear_thinking_20251015","keep":{"type":"thinking_turns","value":1}}, {"type":"clear_tool_uses_20250919","trigger":{"type":"input_tokens","value":100},"keep":{"type":"tool_uses","value":1}}]} }) if 'error' in r4: print("ERROR:", r4['error']) results['s4'] = 'FAIL' else: ae = r4.get('context_management',{}).get('applied_edits',[]) types = [e['type'] for e in ae] print('input_tokens:', r4.get('usage',{}).get('input_tokens')) print('edit_types:', types) print('applied_edits:', json.dumps(ae, indent=2)) has_thinking = 'clear_thinking_20251015' in types has_tools = 'clear_tool_uses_20250919' in types results['s4'] = 'PASS' if (has_thinking or has_tools) else 'FAIL' print() time.sleep(2) # Scenario 5: clear_at_least print("=== SCENARIO 5: clear_at_least ===") r5 = api_call({ "model":"claude-haiku-4-5-20251001","max_tokens":64, "messages":[ {"role":"user","content":"read"}, {"role":"assistant","content":[{"type":"text","text":"Ok."}, {"type":"tool_use","id":"t1","name":"Read","input":{"path":"/a"}}, {"type":"tool_use","id":"t2","name":"Bash","input":{"cmd":"x"}}, {"type":"tool_use","id":"t3","name":"Grep","input":{"q":"y"}}]}, {"role":"user","content":[ {"type":"tool_result","tool_use_id":"t1","content":large}, {"type":"tool_result","tool_use_id":"t2","content":large}, {"type":"tool_result","tool_use_id":"t3","content":large}]}, {"role":"assistant","content":[{"type":"text","text":"Done."}]}, {"role":"user","content":"next"}], "context_management":{"edits":[ {"type":"clear_tool_uses_20250919","trigger":{"type":"input_tokens","value":100}, "keep":{"type":"tool_uses","value":1}, "clear_at_least":{"type":"input_tokens","value":2000}}]} }) if 'error' in r5: print("ERROR:", r5['error']) results['s5'] = 'FAIL' else: s5_tokens = r5.get('usage',{}).get('input_tokens') ae = r5.get('context_management',{}).get('applied_edits',[]) print('input_tokens:', s5_tokens) print('applied_edits:', json.dumps(ae, indent=2)) cleared = ae[0].get('cleared_input_tokens', 0) if ae else 0 results['s5'] = 'PASS' if cleared >= 2000 else 'FAIL' print(f'cleared={cleared} >= 2000? {results["s5"]}') print() time.sleep(2) # Scenario 6: control group print("=== SCENARIO 6: control group (no context_management) ===") r6 = api_call({ "model":"claude-haiku-4-5-20251001","max_tokens":64, "messages":[ {"role":"user","content":"read"}, {"role":"assistant","content":[{"type":"text","text":"Ok."}, {"type":"tool_use","id":"t1","name":"Read","input":{"path":"/a"}}, {"type":"tool_use","id":"t2","name":"Bash","input":{"cmd":"x"}}, {"type":"tool_use","id":"t3","name":"Grep","input":{"q":"y"}}]}, {"role":"user","content":[ {"type":"tool_result","tool_use_id":"t1","content":large}, {"type":"tool_result","tool_use_id":"t2","content":large}, {"type":"tool_result","tool_use_id":"t3","content":large}]}, {"role":"assistant","content":[{"type":"text","text":"Done."}]}, {"role":"user","content":"next"}] }) if 'error' in r6: print("ERROR:", r6['error']) results['s6'] = 'FAIL' else: no_cm = r6.get('usage',{}).get('input_tokens') with_cm = r5.get('usage',{}).get('input_tokens', 0) if 'error' not in r5 else 0 print(f'WITHOUT context_management: {no_cm} input_tokens') print(f'WITH context_management: {with_cm} input_tokens') saved = no_cm - with_cm print(f'Saved: {saved} tokens') results['s6'] = 'PASS' if saved > 0 else 'FAIL' print() time.sleep(2) # Scenario 7: clear_tool_inputs print("=== SCENARIO 7: clear_tool_inputs ===") r7 = api_call({ "model":"claude-haiku-4-5-20251001","max_tokens":64, "messages":[ {"role":"user","content":"read"}, {"role":"assistant","content":[{"type":"text","text":"Ok."}, {"type":"tool_use","id":"t1","name":"Read","input":{"path":"/a","extra_data":"Z"*500}}, {"type":"tool_use","id":"t2","name":"Bash","input":{"cmd":"x","extra":"Z"*500}}]}, {"role":"user","content":[ {"type":"tool_result","tool_use_id":"t1","content":large}, {"type":"tool_result","tool_use_id":"t2","content":large}]}, {"role":"assistant","content":[{"type":"text","text":"Done."}]}, {"role":"user","content":"next"}], "context_management":{"edits":[ {"type":"clear_tool_uses_20250919","trigger":{"type":"input_tokens","value":100}, "keep":{"type":"tool_uses","value":1}, "clear_tool_inputs":True}]} }) if 'error' in r7: print("ERROR:", r7['error']) results['s7'] = 'FAIL' else: print('input_tokens:', r7.get('usage',{}).get('input_tokens')) ae = r7.get('context_management',{}).get('applied_edits',[]) print('applied_edits:', json.dumps(ae, indent=2)) results['s7'] = 'PASS' if ae else 'FAIL' print() # Summary print("=" * 60) print("SUMMARY") print("=" * 60) print(f"Scenario 1: clear_tool_uses basic -> PASS (pre-verified)") print(f"Scenario 2: threshold not reached -> PASS (pre-verified)") print(f"Scenario 3: exclude_tools -> PASS (pre-verified)") print(f"Scenario 4: combined strategies -> {results.get('s4','SKIP')}") print(f"Scenario 5: clear_at_least -> {results.get('s5','SKIP')}") print(f"Scenario 6: control group -> {results.get('s6','SKIP')}") print(f"Scenario 7: clear_tool_inputs -> {results.get('s7','SKIP')}") total = sum(1 for v in results.values() if v == 'PASS') + 3 # 3 pre-verified fails = sum(1 for v in results.values() if v == 'FAIL') print(f"\nTotal: {total} PASS / {fails} FAIL")