{
  "issues": [
    {
      "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/181",
      "id": 3730632357,
      "node_id": "I_kwDOPMjOmM7eXO6l",
      "number": 181,
      "title": "Cannot run RL with Qwen3-VL series on multimodal dataset",
      "user": {
        "login": "Kun-Xiang",
        "id": 50045221,
        "node_id": "MDQ6VXNlcjUwMDQ1MjIx",
        "avatar_url": "https://avatars.githubusercontent.com/u/50045221?v=4",
        "gravatar_id": "",
        "url": "https://api.github.com/users/Kun-Xiang",
        "type": "User",
        "user_view_type": "public",
        "site_admin": false
      },
      "labels": {},
      "state": "open",
      "locked": false,
      "assignee": null,
      "assignees": {},
      "milestone": null,
      "comments": 1,
      "created_at": "2025-12-15T14:12:53Z",
      "updated_at": "2025-12-15T19:14:32Z",
      "closed_at": null,
      "author_association": "NONE",
      "type": null,
      "active_lock_reason": null,
      "sub_issues_summary": {
        "total": 0,
        "completed": 0,
        "percent_completed": 0
      },
      "issue_dependencies_summary": {
        "blocked_by": 0,
        "total_blocked_by": 0,
        "blocking": 0,
        "total_blocking": 0
      },
      "body": "Hi, I want to finetune Qwen3-VL model using my dataset with RLVR algorithm and an error occurred regarding image token mismatch.\n\nHere is my code:\n```python\n\nimport asyncio\nimport random\nimport re\nfrom typing import Any, Dict, List, Sequence\n\nimport datasets\nimport requests\nimport tinker\nfrom PIL import Image\nimport io\nfrom tinker import types\nfrom tinker_cookbook import renderers, tokenizer_utils\nfrom tinker_cookbook.hyperparam_utils import get_lr\nfrom tinker_cookbook.image_processing_utils import get_image_processor\n\n# External grading helpers (with safe fallback)\ntry:\n    from mathruler.grader import extract_boxed_content, grade_answer  # type: ignore\nexcept Exception:  # pragma: no cover - runtime fallback\n    def extract_boxed_content(text: str) -> str:\n        m = re.search(r\"\\\\boxed\\{([^}]*)\\}\", text)\n        return m.group(1) if m else \"\"\n\n    def grade_answer(pred: str, gt: str) -> bool:\n        return pred.strip().upper() == gt.strip().upper()\n\n# --------------------\n# Config\n# --------------------\nBASE_MODEL = \"Qwen/Qwen3-VL-30B-A3B-Instruct\"\nBATCH_SIZE = 128          # number of unique problems per step\nGROUP_SIZE = 4          # number of rollouts per problem (GRPO-style groups)\nMAX_TOKENS = 4096\nSAVE_EVERY = 50         # steps between sampler checkpoints\nNUM_STEPS = 200\nLOG_PATH = \"/tmp/tinker-virl39k-grpo\"\nDATASET_NAME = \"Quinn777/ViRL39K\"\n# Qwen3-VL tokenizes images to patches; keep images small to meet expected token count.\n# If you still see \"Expected X tokens, got Y\" errors, reduce this further (e.g., 192).\nMAX_IMAGE_SIDE = 224  # resize longer edge to this to match expected tokens\n\n\n\ndef _to_pil_single(image_entry) -> Image.Image:\n    \"\"\"Normalize one image entry (URL / PIL / dict) to PIL.Image.\"\"\"\n    if isinstance(image_entry, Image.Image):\n        return image_entry.convert(\"RGB\")\n    if isinstance(image_entry, dict):\n        if \"image\" in image_entry and isinstance(image_entry[\"image\"], Image.Image):\n            return image_entry[\"image\"].convert(\"RGB\")\n        if \"url\" in image_entry and isinstance(image_entry[\"url\"], str):\n            url = image_entry[\"url\"]\n            resp = requests.get(url, timeout=15)\n            resp.raise_for_status()\n            return Image.open(io.BytesIO(resp.content)).convert(\"RGB\")\n        raise ValueError(f\"Unsupported image entry dict keys: {image_entry.keys()}\")\n    if isinstance(image_entry, str):\n        url = image_entry\n        if url.startswith(\"http://\") or url.startswith(\"https://\"):\n            resp = requests.get(url, timeout=15)\n            resp.raise_for_status()\n            return Image.open(io.BytesIO(resp.content)).convert(\"RGB\")\n        raise ValueError(f\"Invalid image URL: {url}\")\n    raise ValueError(f\"Unsupported image entry type: {type(image_entry)}\")\n\n\ndef _resize_long_edge(img: Image.Image, max_side: int) -> Image.Image:\n    \"\"\"Resize keeping aspect ratio so that longer edge == max_side.\"\"\"\n    w, h = img.size\n    if max(w, h) <= max_side:\n        return img\n    if w >= h:\n        new_w = max_side\n        new_h = int(h * max_side / w)\n    else:\n        new_h = max_side\n        new_w = int(w * max_side / h)\n    return img.resize((new_w, new_h), Image.BICUBIC)\n\n\ndef normalize_images(image_field) -> list[Image.Image]:\n    \"\"\"\n    Accepts:\n      - single image (URL/PIL/dict)\n      - list/tuple of images (HF image column may be a list of PILs)\n    Returns list of PILs.\n    \"\"\"\n    imgs = (\n        [_to_pil_single(x) for x in image_field]\n        if isinstance(image_field, (list, tuple))\n        else [_to_pil_single(image_field)]\n    )\n    return [_resize_long_edge(img, MAX_IMAGE_SIDE) for img in imgs]\n\n\ndef build_messages(question: str, pil_images: list[Image.Image]) -> list[renderers.Message]:\n    instruction = (\n        \"You first think through the reasoning process as an internal monologue, \"\n        \"enclosed within <think> </think> tags. Then, provide your final answer \"\n        \"enclosed within \\\\boxed{} with a single choice letter (A/B/C/D). \"\n        \"Do not add extra text after the boxed answer.\"\n    )\n    image_parts = [\n        {\"type\": \"image\", \"image\": img.convert(\"RGB\")}\n        for img in pil_images\n    ]\n    return [\n        {\n            \"role\": \"user\",\n            \"content\": [\n                *image_parts,\n                {\"type\": \"text\", \"text\": instruction + \"\\n\\n\" + question},\n            ],\n        }\n    ]\n\n\ndef format_reward(response: str) -> float:\n    pattern = re.compile(r\"<think>.*</think>.*\\\\boxed\\{.*\\}.*\", re.DOTALL)\n    return 1.0 if re.fullmatch(pattern, response) else 0.0\n\n\ndef accuracy_reward(response: str, ground_truth: str) -> float:\n    answer = extract_boxed_content(response)\n    return 1.0 if grade_answer(answer, ground_truth) else 0.0\n\n\ndef compute_score_batch(\n    reward_inputs: List[Dict[str, Any]], format_weight: float = 0.1\n) -> List[Dict[str, float]]:\n    scores = []\n    for reward_input in reward_inputs:\n        response = re.sub(\n            r\"\\s*(<|>|/)\\s*\", r\"\\1\", reward_input[\"response\"]\n        )  # handle spaced tags\n        format_score = format_reward(response)\n        accuracy_score = accuracy_reward(response, reward_input[\"ground_truth\"])\n        scores.append(\n            {\n                \"overall\": (1 - format_weight) * accuracy_score\n                + format_weight * format_score,\n                \"format\": format_score,\n                \"accuracy\": accuracy_score,\n            }\n        )\n    return scores\n\n\ndef resolve_lr(model_name: str) -> float:\n    \"\"\"\n    Safe LR resolver for models whose configs may lack hidden_size.\n    Falls back to a sane default if the cookbook heuristic fails.\n    \"\"\"\n    try:\n        return get_lr(model_name)\n    except Exception as e:\n        print(f\"[warn] get_lr failed for {model_name}: {e}; fallback lr=3e-4\")\n        return 3e-4\n\n\nasync def main() -> None:\n    # Clients\n    service_client = tinker.ServiceClient()\n    training_client = service_client.create_lora_training_client(\n        base_model=BASE_MODEL, rank=32\n    )\n    sampling_client = service_client.create_sampling_client(base_model=BASE_MODEL)\n\n    # Tokenizer + renderer\n    tokenizer = tokenizer_utils.get_tokenizer(BASE_MODEL)\n    image_processor = get_image_processor(BASE_MODEL)\n    renderer = renderers.Qwen3VLRenderer(tokenizer, image_processor)\n    stop_sequences = renderer.get_stop_sequences()\n    sampling_params = types.SamplingParams(\n        max_tokens=MAX_TOKENS,\n        temperature=0.7,\n        top_p=1.0,\n        stop=stop_sequences,\n    )\n\n    # Data\n    ds = datasets.load_dataset(DATASET_NAME, split=\"train\")\n    print(f\"Loaded {len(ds)} examples from {DATASET_NAME}\")\n\n    lr = resolve_lr(BASE_MODEL)\n    print(f\"Using learning_rate={lr:.2e}\")\n\n    for step in range(1, NUM_STEPS + 1):\n        batch_examples = [ds[random.randint(0, len(ds) - 1)] for _ in range(BATCH_SIZE)]\n        data: List[tinker.Datum] = []\n        rewards_logged: List[float] = []\n\n        for ex in batch_examples:\n            question = ex[\"problem\"]\n            gold = str(ex[\"answer\"]).strip().upper()\n            image_field = ex[\"images\"]\n            pil_images = normalize_images(image_field)\n\n            prompt = renderer.build_generation_prompt(\n                build_messages(question, pil_images)\n            )\n\n            sample_resp = await sampling_client.sample_async(\n                prompt=prompt,\n                num_samples=GROUP_SIZE,\n                sampling_params=sampling_params,\n                include_prompt_logprobs=False,\n            )\n\n            group_rewards: List[float] = []\n            # Collect per-sample info\n            parsed = []\n            for seq in sample_resp.sequences:\n                text = renderer.parse_response(seq.tokens)[0][\"content\"]\n                scores = compute_score_batch(\n                    [{\"response\": text, \"ground_truth\": gold}]\n                )[0]\n                reward = scores[\"overall\"]\n                group_rewards.append(reward)\n                parsed.append((seq, scores))\n\n            # GRPO: center rewards within group\n            mean_r = sum(group_rewards) / len(group_rewards)\n            advantages_group = [r - mean_r for r in group_rewards]\n            rewards_logged.extend(group_rewards)\n\n            # Build Datums\n            for (seq, scores), adv in zip(parsed, advantages_group):\n                advantages = [adv] * len(seq.tokens)\n                sampling_logprobs = seq.logprobs or [0.0] * len(seq.tokens)\n                datum = tinker.Datum(\n                    model_input=prompt,\n                    loss_fn_inputs={\n                        \"target_tokens\": seq.tokens,\n                        \"advantages\": advantages,\n                        \"logprobs\": sampling_logprobs,  # sampling policy logprobs\n                    },\n                )\n                data.append(datum)\n\n        # Train step: importance_sampling with centered advantages (GRPO-style)\n        fwd_bwd_future = await training_client.forward_backward_async(\n            data, loss_fn=\"importance_sampling\"\n        )\n        optim_future = await training_client.optim_step_async(\n            types.AdamParams(learning_rate=lr)\n        )\n        _ = await fwd_bwd_future\n        _ = await optim_future\n\n        mean_reward = sum(rewards_logged) / max(1, len(rewards_logged))\n        print(f\"[step {step}] mean_reward={mean_reward:.3f} datums={len(data)}\")\n\n        if step % SAVE_EVERY == 0:\n            path = training_client.save_weights_for_sampler(name=f\"{step:05d}\").result().path\n            print(f\"Saved sampler weights to {path}\")\n\n\nif __name__ == \"__main__\":\n    asyncio.run(main())\n\n\n```\n\nHere is the error:\n```shell\ntinker/_base_client.py\", line 1033, in request\n    raise self._make_status_error_from_response(err.response) from None\ntinker.BadRequestError: Error code: 400 - {'detail': 'Expected 35 tokens, got 70 from image'}\n```\nI've tried small image size but not work.\n",
      "closed_by": null,
      "reactions": {
        "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/181/reactions",
        "total_count": 0,
        "+1": 0,
        "-1": 0,
        "laugh": 0,
        "hooray": 0,
        "confused": 0,
        "heart": 0,
        "rocket": 0,
        "eyes": 0
      },
      "performed_via_github_app": null,
      "state_reason": null,
      "linked_prs": []
    },
    {
      "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/180",
      "id": 3730318492,
      "node_id": "I_kwDOPMjOmM7eWCSc",
      "number": 180,
      "title": "Example of RL on MATH dataset not reproducable.",
      "user": {
        "login": "HaoquanZhang",
        "id": 151443624,
        "node_id": "U_kgDOCQbYqA",
        "avatar_url": "https://avatars.githubusercontent.com/u/151443624?v=4",
        "gravatar_id": "",
        "url": "https://api.github.com/users/HaoquanZhang",
        "type": "User",
        "user_view_type": "public",
        "site_admin": false
      },
      "labels": {},
      "state": "open",
      "locked": false,
      "assignee": null,
      "assignees": {},
      "milestone": null,
      "comments": 0,
      "created_at": "2025-12-15T12:54:55Z",
      "updated_at": "2025-12-15T12:55:06Z",
      "closed_at": null,
      "author_association": "NONE",
      "type": null,
      "active_lock_reason": null,
      "sub_issues_summary": {
        "total": 0,
        "completed": 0,
        "percent_completed": 0
      },
      "issue_dependencies_summary": {
        "blocked_by": 0,
        "total_blocked_by": 0,
        "blocking": 0,
        "total_blocking": 0
      },
      "body": "`python -m tinker_cookbook.recipes.math_rl.train env=math model_name=\"Qwen/Qwen3-8B\" group_size=16 groups_per_batch=64 learning_rate=2e-5 max_tokens=512`\n\nresult of step 24\n\n`\n{\"step\": 24, ... \"env/all/reward/total\": -0.0814453125, \"env/all/format\": 0.0, }\n`",
      "closed_by": null,
      "reactions": {
        "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/180/reactions",
        "total_count": 0,
        "+1": 0,
        "-1": 0,
        "laugh": 0,
        "hooray": 0,
        "confused": 0,
        "heart": 0,
        "rocket": 0,
        "eyes": 0
      },
      "performed_via_github_app": null,
      "state_reason": null,
      "linked_prs": []
    },
    {
      "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/179",
      "id": 3729202598,
      "node_id": "I_kwDOPMjOmM7eRx2m",
      "number": 179,
      "title": "Pricing",
      "user": {
        "login": "hbarnard",
        "id": 364483,
        "node_id": "MDQ6VXNlcjM2NDQ4Mw==",
        "avatar_url": "https://avatars.githubusercontent.com/u/364483?v=4",
        "gravatar_id": "",
        "url": "https://api.github.com/users/hbarnard",
        "type": "User",
        "user_view_type": "public",
        "site_admin": false
      },
      "labels": {},
      "state": "open",
      "locked": false,
      "assignee": null,
      "assignees": {},
      "milestone": null,
      "comments": 0,
      "created_at": "2025-12-15T07:48:50Z",
      "updated_at": "2025-12-15T07:48:50Z",
      "closed_at": null,
      "author_association": "NONE",
      "type": null,
      "active_lock_reason": null,
      "sub_issues_summary": {
        "total": 0,
        "completed": 0,
        "percent_completed": 0
      },
      "issue_dependencies_summary": {
        "blocked_by": 0,
        "total_blocked_by": 0,
        "blocking": 0,
        "total_blocking": 0
      },
      "body": "I've arrived at the tinker website and I don't see anything on pricing? I do small projects both personal and voluntary orgs in UK. Don't want to get locked in to something I/we can't afford.",
      "closed_by": null,
      "reactions": {
        "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/179/reactions",
        "total_count": 0,
        "+1": 0,
        "-1": 0,
        "laugh": 0,
        "hooray": 0,
        "confused": 0,
        "heart": 0,
        "rocket": 0,
        "eyes": 0
      },
      "performed_via_github_app": null,
      "state_reason": null,
      "linked_prs": []
    },
    {
      "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/177",
      "id": 3729151212,
      "node_id": "I_kwDOPMjOmM7eRlTs",
      "number": 177,
      "title": "Qwen3Renderer should preserve last <think> block to match HF tokenizer",
      "user": {
        "login": "thejaminator",
        "id": 30519287,
        "node_id": "MDQ6VXNlcjMwNTE5Mjg3",
        "avatar_url": "https://avatars.githubusercontent.com/u/30519287?v=4",
        "gravatar_id": "",
        "url": "https://api.github.com/users/thejaminator",
        "type": "User",
        "user_view_type": "public",
        "site_admin": false
      },
      "labels": {},
      "state": "open",
      "locked": false,
      "assignee": null,
      "assignees": {},
      "milestone": null,
      "comments": 1,
      "created_at": "2025-12-15T07:32:29Z",
      "updated_at": "2025-12-15T07:42:32Z",
      "closed_at": null,
      "author_association": "NONE",
      "type": null,
      "active_lock_reason": null,
      "sub_issues_summary": {
        "total": 0,
        "completed": 0,
        "percent_completed": 0
      },
      "issue_dependencies_summary": {
        "blocked_by": 0,
        "total_blocked_by": 0,
        "blocking": 0,
        "total_blocking": 0
      },
      "body": "With default strip_thinking_from_history=True, Qwen3Renderer removes all <think> blocks.\nBut it should preserve the last one to match the HF tokenizer\n\n\nExample:\n```\n2-TURN CONVERSATION - THINKING SHOULD BE PRESERVED:\n================================================================================\nTINKER:\n<|im_start|>user\nWhat is 2+2?<|im_end|>\n<|im_start|>assistant\nThe answer is 4.<|im_end|>\n\nHUGGINGFACE:\n<|im_start|>user\nWhat is 2+2?<|im_end|>\n<|im_start|>assistant\n<think> <---- Huggingface tokenizer template preserves <think>\nLet me calculate this.\n</think>\n\nThe answer is 4.<|im_end|```",
      "closed_by": null,
      "reactions": {
        "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/177/reactions",
        "total_count": 0,
        "+1": 0,
        "-1": 0,
        "laugh": 0,
        "hooray": 0,
        "confused": 0,
        "heart": 0,
        "rocket": 0,
        "eyes": 0
      },
      "performed_via_github_app": null,
      "state_reason": null,
      "linked_prs": []
    },
    {
      "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/176",
      "id": 3729033598,
      "node_id": "I_kwDOPMjOmM7eRIl-",
      "number": 176,
      "title": "Qwen3DisableThinkingRenderer fails to add <think>\\n\\n</think>\\n\\n tokens in SFT",
      "user": {
        "login": "thejaminator",
        "id": 30519287,
        "node_id": "MDQ6VXNlcjMwNTE5Mjg3",
        "avatar_url": "https://avatars.githubusercontent.com/u/30519287?v=4",
        "gravatar_id": "",
        "url": "https://api.github.com/users/thejaminator",
        "type": "User",
        "user_view_type": "public",
        "site_admin": false
      },
      "labels": {},
      "state": "open",
      "locked": false,
      "assignee": null,
      "assignees": {},
      "milestone": null,
      "comments": 0,
      "created_at": "2025-12-15T06:57:56Z",
      "updated_at": "2025-12-15T07:00:34Z",
      "closed_at": null,
      "author_association": "NONE",
      "type": null,
      "active_lock_reason": null,
      "sub_issues_summary": {
        "total": 0,
        "completed": 0,
        "percent_completed": 0
      },
      "issue_dependencies_summary": {
        "blocked_by": 0,
        "total_blocked_by": 0,
        "blocking": 0,
        "total_blocking": 0
      },
      "body": "For Qwen3 hybrid models: Qwen3-8B and Qwen3-32B, to indicate non-thinking, the `<think>\\n\\n</think>\\n\\n` tokens should be added.\n\nDuring SFT this does not get added. \n\nBecause current class Qwen3DisableThinkingRenderer(Qwen3Renderer) does not override `render_message`.\n\nDemonstrative example:\n\n```\n================================================================================\nBUG: Official tinker-cookbook Qwen3DisableThinkingRenderer\n================================================================================\n\nActual output from renderer:\n<|im_start|>user\nWhat is 2+2?<|im_end|>\n<|im_start|>assistant\n<think> <---- Missing the \\n\\n</think> tokens\nThe answer is 4.<|im_end|>\n\n================================================================================\nExpected output from Qwen3-8B tokenizer:\n================================================================================\n<|im_start|>user\nWhat is 2+2?<|im_end|>\n<|im_start|>assistant\n<think>\n\n</think>\n\nThe answer is 4.<|im_end|>\n```\n\n\nReproduce:\n```python\nfrom transformers import AutoTokenizer\nfrom tinker_cookbook.renderers import Qwen3DisableThinkingRenderer, Message\n\n# Load tokenizer\nmodel_name = \"Qwen/Qwen3-8B\"\ntokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)\n\n# Create renderer with official buggy implementation\nrenderer = Qwen3DisableThinkingRenderer(tokenizer)\n\n# Test messages - simulating a multi-turn conversation\nmessages: list[Message] = [\n    {\"role\": \"user\", \"content\": \"What is 2+2?\"},\n    {\"role\": \"assistant\", \"content\": \"The answer is 4.\"},\n]\n\nprint(\"=\" * 80)\nprint(\"BUG: Official tinker-cookbook Qwen3DisableThinkingRenderer\")\nprint(\"=\" * 80)\n\n# Build supervised example (for training)\nmodel_input, weights = renderer.build_supervised_example(messages)\ntokens = []\nfor chunk in model_input.chunks:\n    tokens.extend(chunk.tokens)\ndecoded = tokenizer.decode(tokens)\n\nprint(\"\\nActual output from official renderer:\")\nprint(decoded)\n\n# Show what the official tokenizer does\n\nofficial_format = tokenizer.apply_chat_template(messages, tokenize=False, thinking=False)\n\nprint(\"\\n\" + \"=\" * 80)\nprint(\"Expected output from Qwen3-8B tokenizer:\")\nprint(\"=\" * 80)\nprint(official_format)\n```",
      "closed_by": null,
      "reactions": {
        "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/176/reactions",
        "total_count": 0,
        "+1": 0,
        "-1": 0,
        "laugh": 0,
        "hooray": 0,
        "confused": 0,
        "heart": 0,
        "rocket": 0,
        "eyes": 0
      },
      "performed_via_github_app": null,
      "state_reason": null,
      "linked_prs": []
    },
    {
      "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/170",
      "id": 3726640283,
      "node_id": "I_kwDOPMjOmM7eIASb",
      "number": 170,
      "title": "VLM Classifier sample command/code not working",
      "user": {
        "login": "rhsu-nuro",
        "id": 225059369,
        "node_id": "U_kgDODWoiKQ",
        "avatar_url": "https://avatars.githubusercontent.com/u/225059369?v=4",
        "gravatar_id": "",
        "url": "https://api.github.com/users/rhsu-nuro",
        "type": "User",
        "user_view_type": "public",
        "site_admin": false
      },
      "labels": {},
      "state": "open",
      "locked": false,
      "assignee": null,
      "assignees": {},
      "milestone": null,
      "comments": 4,
      "created_at": "2025-12-14T02:28:21Z",
      "updated_at": "2025-12-16T01:58:13Z",
      "closed_at": null,
      "author_association": "NONE",
      "type": null,
      "active_lock_reason": null,
      "sub_issues_summary": {
        "total": 0,
        "completed": 0,
        "percent_completed": 0
      },
      "issue_dependencies_summary": {
        "blocked_by": 0,
        "total_blocked_by": 0,
        "blocking": 0,
        "total_blocking": 0
      },
      "body": "Trying to run the very first command for the latest released VLM fine-tuning feature from tinker_cookbook/recipes/vlm_classifier:\n\n```\npython -m tinker_cookbook.recipes.vlm_classifier.train \\\n    experiment_dir=./vlm_classifier \\\n    wandb_project=vlm-classifier \\\n    dataset=caltech101 \\\n    renderer_name=qwen3_vl \\\n    model_name=Qwen/Qwen3-VL-30B-A3B-Instruct\n```\n\nGot me: \n\n```\ntinker.lib.retry_handler:222 [ERROR] Request failed with non-retryable error: BadRequestError: Error code: 400 - {'detail': 'Expected 63 tokens, got 70 from image'}\n```\n\nMore terminal output: \n```\ntinker.lib.retry_handler:222 [ERROR] Request failed with non-retryable error: BadRequestError: Error code: 400 - {'detail': 'Expected 63 tokens, got 70 from image'}\ntinker.lib.retry_handler:222 [ERROR] Request failed with non-retryable error: BadRequestError: Error code: 400 - {'detail': 'Expected 40 tokens, got 77 from image'}\ntinker.lib.retry_handler:222 [ERROR] Request failed with non-retryable error: BadRequestError: Error code: 400 - {'detail': 'Expected 63 tokens, got 70 from image'}\ntinker.lib.telemetry:204 [INFO] Exception logged for session ID: 0788e796-c9e5-5f41-b718-329641ca5a58\nTraceback (most recent call last):\n  File \"<frozen runpy>\", line 198, in _run_module_as_main\n  File \"<frozen runpy>\", line 88, in _run_code\n  File \"/Users/rhsu/bates-mcp/tinker-cookbook/tinker_cookbook/recipes/vlm_classifier/train.py\", line 156, in <module>\n    chz.nested_entrypoint(run_experiment)\n  File \"/opt/homebrew/lib/python3.11/site-packages/chz/blueprint/_entrypoint.py\", line 39, in inner\n    return fn(*args, **kwargs)\n           ^^^^^^^^^^^^^^^^^^^\n  File \"/opt/homebrew/lib/python3.11/site-packages/chz/blueprint/_entrypoint.py\", line 109, in nested_entrypoint\n    return main(value)\n           ^^^^^^^^^^^\n  File \"/Users/rhsu/bates-mcp/tinker-cookbook/tinker_cookbook/recipes/vlm_classifier/train.py\", line 152, in run_experiment\n    asyncio.run(train.main(config))\n  File \"/opt/homebrew/Cellar/python@3.11/3.11.14_1/Frameworks/Python.framework/Versions/3.11/lib/python3.11/asyncio/runners.py\", line 190, in run\n    return runner.run(main)\n           ^^^^^^^^^^^^^^^^\n  File \"/opt/homebrew/Cellar/python@3.11/3.11.14_1/Frameworks/Python.framework/Versions/3.11/lib/python3.11/asyncio/runners.py\", line 118, in run\n    return self._loop.run_until_complete(task)\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/opt/homebrew/Cellar/python@3.11/3.11.14_1/Frameworks/Python.framework/Versions/3.11/lib/python3.11/asyncio/base_events.py\", line 654, in run_until_complete\n    return future.result()\n           ^^^^^^^^^^^^^^^\n  File \"/Users/rhsu/bates-mcp/tinker-cookbook/tinker_cookbook/utils/trace.py\", line 332, in async_wrapper\n    return await func(*args, **kwargs)\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/Users/rhsu/bates-mcp/tinker-cookbook/tinker_cookbook/supervised/train.py\", line 349, in main\n    submitted_batch = await submit_batch(epoch_idx, batch_idx)\n                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/Users/rhsu/bates-mcp/tinker-cookbook/tinker_cookbook/utils/trace.py\", line 332, in async_wrapper\n    return await func(*args, **kwargs)\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/Users/rhsu/bates-mcp/tinker-cookbook/tinker_cookbook/supervised/train.py\", line 265, in submit_batch\n    eval_metrics = await run_evals(evaluators, training_client, step)\n                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/Users/rhsu/bates-mcp/tinker-cookbook/tinker_cookbook/utils/trace.py\", line 332, in async_wrapper\n    return await func(*args, **kwargs)\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/Users/rhsu/bates-mcp/tinker-cookbook/tinker_cookbook/supervised/train.py\", line 140, in run_evals\n    eval_metrics = await run_evaluator(evaluator)\n                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/Users/rhsu/bates-mcp/tinker-cookbook/tinker_cookbook/utils/trace.py\", line 332, in async_wrapper\n    return await func(*args, **kwargs)\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/Users/rhsu/bates-mcp/tinker-cookbook/tinker_cookbook/supervised/train.py\", line 135, in run_evaluator\n    return await evaluator(sampling_client)\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/Users/rhsu/bates-mcp/tinker-cookbook/tinker_cookbook/recipes/vlm_classifier/eval.py\", line 214, in __call__\n    outputs = await asyncio.gather(*async_tasks)\n              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/Users/rhsu/bates-mcp/tinker-cookbook/tinker_cookbook/recipes/vlm_classifier/eval.py\", line 196, in bounded_generate_output\n    return await self.generate_output(\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/Users/rhsu/bates-mcp/tinker-cookbook/tinker_cookbook/recipes/vlm_classifier/eval.py\", line 144, in generate_output\n    r: types.SampleResponse = await sampling_client.sample_async(\n                              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/opt/homebrew/lib/python3.11/site-packages/tinker/lib/public_interfaces/sampling_client.py\", line 249, in sample_async\n    return await AwaitableConcurrentFuture(\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/opt/homebrew/lib/python3.11/site-packages/tinker/lib/public_interfaces/api_future.py\", line 132, in result_async\n    return await asyncio.wrap_future(self._future)\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/opt/homebrew/lib/python3.11/site-packages/tinker/lib/telemetry.py\", line 384, in _awrapper\n    return await cast(Callable[..., Awaitable[R]], func)(*args, **kwargs)\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/opt/homebrew/lib/python3.11/site-packages/tinker/lib/public_interfaces/sampling_client.py\", line 235, in _sample_async_with_retries\n    return await self.retry_handler.execute(_sample_async)\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/opt/homebrew/lib/python3.11/site-packages/tinker/lib/retry_handler.py\", line 136, in execute\n    result = await self._execute_with_retry(func, *args, **kwargs)\n             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/opt/homebrew/lib/python3.11/site-packages/tinker/lib/retry_handler.py\", line 188, in _execute_with_retry\n    result = await func(*args, **kwargs)\n             ^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/opt/homebrew/lib/python3.11/site-packages/tinker/lib/public_interfaces/sampling_client.py\", line 225, in _sample_async\n    return await self._sample_async_impl(\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/opt/homebrew/lib/python3.11/site-packages/tinker/lib/public_interfaces/sampling_client.py\", line 168, in _sample_async_impl\n    untyped_future = await self.holder.execute_with_retries(\n                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/opt/homebrew/lib/python3.11/site-packages/tinker/lib/internal_client_holder.py\", line 364, in execute_with_retries\n    raise e\n  File \"/opt/homebrew/lib/python3.11/site-packages/tinker/lib/internal_client_holder.py\", line 325, in execute_with_retries\n    return await func(*args, **kwargs)\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/opt/homebrew/lib/python3.11/site-packages/tinker/lib/public_interfaces/sampling_client.py\", line 148, in _send_asample_request\n    raise e\n  File \"/opt/homebrew/lib/python3.11/site-packages/tinker/lib/public_interfaces/sampling_client.py\", line 140, in _send_asample_request\n    return await client.sampling.asample(\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/opt/homebrew/lib/python3.11/site-packages/tinker/resources/sampling.py\", line 57, in asample\n    return await self._post(\n           ^^^^^^^^^^^^^^^^^\n  File \"/opt/homebrew/lib/python3.11/site-packages/tinker/_base_client.py\", line 1232, in post\n    return await self.request(cast_to, opts, stream=stream, stream_cls=stream_cls)\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/opt/homebrew/lib/python3.11/site-packages/tinker/_base_client.py\", line 1033, in request\n    raise self._make_status_error_from_response(err.response) from None\ntinker.BadRequestError: Error code: 400 - {'detail': 'Expected 60 tokens, got 78 from image'}\n```",
      "closed_by": null,
      "reactions": {
        "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/170/reactions",
        "total_count": 0,
        "+1": 0,
        "-1": 0,
        "laugh": 0,
        "hooray": 0,
        "confused": 0,
        "heart": 0,
        "rocket": 0,
        "eyes": 0
      },
      "performed_via_github_app": null,
      "state_reason": null,
      "linked_prs": []
    },
    {
      "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/165",
      "id": 3726135208,
      "node_id": "I_kwDOPMjOmM7eGE-o",
      "number": 165,
      "title": "flawed tinker code for pig latin",
      "user": {
        "login": "samearle-VCConnector",
        "id": 249469544,
        "node_id": "U_kgDODt6aaA",
        "avatar_url": "https://avatars.githubusercontent.com/u/249469544?v=4",
        "gravatar_id": "",
        "url": "https://api.github.com/users/samearle-VCConnector",
        "type": "User",
        "user_view_type": "public",
        "site_admin": false
      },
      "labels": {},
      "state": "open",
      "locked": false,
      "assignee": null,
      "assignees": {},
      "milestone": null,
      "comments": 0,
      "created_at": "2025-12-13T16:14:54Z",
      "updated_at": "2025-12-13T16:14:54Z",
      "closed_at": null,
      "author_association": "NONE",
      "type": null,
      "active_lock_reason": null,
      "sub_issues_summary": {
        "total": 0,
        "completed": 0,
        "percent_completed": 0
      },
      "issue_dependencies_summary": {
        "blocked_by": 0,
        "total_blocked_by": 0,
        "blocking": 0,
        "total_blocking": 0
      },
      "body": "This is a classic \"Garbage In, Garbage Out\" scenario, but specifically related to pattern contention. Because the training set contains contradictory rules for how to handle consonant clusters (combinations of consonants like sp, st, tr, gl, qu), you are forcing the model to learn a rule that does not exist, leading to stochastic confusion rather than deterministic logic. Here is the breakdown of the semantic error and how it corrupts the model’s understanding of the language.1. The Semantic Error: Inconsistent Cluster Handling The flaw lies in the inconsistency between these two examples in your dataset: Example A (\"space\"): space $\\rightarrow$ ace-spay Rule Implied: Identify the consonant cluster (sp) and move the entire group to the end. (This is correct Pig Latin).Example B (\"split\"): split $\\rightarrow$ plit-say Rule Implied: Identify only the first letter (s) and move it, ignoring that p is attached to it phonetically. (This is incorrect Pig Latin; it should be it-splay).Example C (\"quantum\"): quantum $\\rightarrow$ uantum-qay Rule Implied: Treat q as a standalone consonant. (Incorrect; qu is phonetically a single unit /kw/, so it should typically be antum-quay).2. How This Trains the Model Incorrectly When an LLM (or even a simple sequence-to-sequence model) trains on this, it attempts to minimize a loss function by finding a pattern that satisfies all examples. However, you have provided a mathematical impossibility: The Ambiguity Trap The model learns that when it sees the token s followed by p:Sometimes it must move both (from learning \"space\").Sometimes it must move only the s (from learning \"split\").Because the dataset is small, the model cannot distinguish context. It doesn't know that \"split\" and \"space\" are just different words; it only sees token sequences. It will likely settle on a \"muddy\" probability distribution—perhaps giving a 50% probability to moving \"sp\" and 50% to moving \"s\".3. The Consequence: Failure on \"Many Such Examples \"English is dense with consonant clusters. If the model is trained on the logic of your \"split\" example, it will fail to phonetically parse a massive percentage of the English vocabulary. If you ask the model to translate \"String\": Correct Logic: Move \"str\" $\\rightarrow$ ing-stray Your Model's Logic (derived from \"split\"): It sees \"s\" and immediately moves it $\\rightarrow$ tring-say. The Semantic \"Uncanny Valley\" The output tring-say is pronounceable, but semantically broken in the target language (Pig Latin).Tring-say implies the original word was \"string\" (if adhering to the bad rule) or \"sing\" (if adhering to a different rule).This destroys the reversibility of the cypher. A language model trained this way acts as a lossy compression algorithm—you cannot reliably get back to the English input from the Pig Latin output because the rules of transformation were applied inconsistently. Summary of Failure By failing to treat consonant clusters (onset) as a cohesive unit, the model learns a character-level positional rule (\"always move index 0\") rather than a phonological rule (\"move characters until the first vowel\").This results in a model that technically \"works\" (it outputs text) but fails the semantic requirement of the specific language game it is meant to play.\n\nHere is a correction:\n\n# Create some training examples\nexamples = [\n    {\n        \"input\": \"banana split\",\n        # FIX: \"split\" now moves \"spl\" together -> \"it-splay\"\n        # Old (Bad): \"plit-say\"\n        \"output\": \"anana-bay it-splay\"\n    },\n    {\n        \"input\": \"quantum physics\",\n        # FIX: \"quantum\" treats \"qu\" as a single phonetic unit -> \"antum-quay\"\n        # Old (Bad): \"uantum-qay\"\n        \"output\": \"antum-quay ysics-phay\"\n    },\n    {\n        \"input\": \"donut shop\",\n        # \"sh\" is a digraph, moved together (Correct)\n        \"output\": \"onut-day op-shay\"\n    },\n    {\n        \"input\": \"pickle jar\",\n        \"output\": \"ickle-pay ar-jay\"\n    },\n    {\n        \"input\": \"space exploration\",\n        # \"sp\" moved together (Correct)\n        # \"exploration\" starts with vowel, add \"-way\" (Correct)\n        \"output\": \"ace-spay exploration-way\"\n    },\n    {\n        \"input\": \"rubber duck\",\n        \"output\": \"ubber-ray uck-day\"\n    },\n    {\n        \"input\": \"coding wizard\",\n        \"output\": \"oding-cay izard-way\"\n    },\n]\nSummary of Changes split $\\rightarrow$ it-splay:Why: The cluster is spl (three consonants). The previous version only moved s, which broke the phonetic pattern established by words like \"shop\" and \"space. \"quantum $\\rightarrow$ antum-quay: Why: In English phonology, qu acts as a single sound (/kw/). Standard Pig Latin treats qu as a block. If you only move the q (uantum-qay), the result is unpronounceable and breaks the \"starts with a vowel sound\" rule for the remainder of the word. If you use this dataset, the model will learn a consistent attention pattern: \"Attend to the beginning of the sequence, scan until the first vowel is found, and perform the swap operation.\"",
      "closed_by": null,
      "reactions": {
        "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/165/reactions",
        "total_count": 0,
        "+1": 0,
        "-1": 0,
        "laugh": 0,
        "hooray": 0,
        "confused": 0,
        "heart": 0,
        "rocket": 0,
        "eyes": 0
      },
      "performed_via_github_app": null,
      "state_reason": null,
      "linked_prs": []
    },
    {
      "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/159",
      "id": 3713409040,
      "node_id": "I_kwDOPMjOmM7dViAQ",
      "number": 159,
      "title": "Please add support for tool calling int gpt-oss renderer",
      "user": {
        "login": "Guido1Alessandro1Trevisan",
        "id": 46954188,
        "node_id": "MDQ6VXNlcjQ2OTU0MTg4",
        "avatar_url": "https://avatars.githubusercontent.com/u/46954188?v=4",
        "gravatar_id": "",
        "url": "https://api.github.com/users/Guido1Alessandro1Trevisan",
        "type": "User",
        "user_view_type": "public",
        "site_admin": false
      },
      "labels": {},
      "state": "open",
      "locked": false,
      "assignee": null,
      "assignees": {},
      "milestone": null,
      "comments": 0,
      "created_at": "2025-12-10T03:27:09Z",
      "updated_at": "2025-12-10T03:27:09Z",
      "closed_at": null,
      "author_association": "NONE",
      "type": null,
      "active_lock_reason": null,
      "sub_issues_summary": {
        "total": 0,
        "completed": 0,
        "percent_completed": 0
      },
      "issue_dependencies_summary": {
        "blocked_by": 0,
        "total_blocked_by": 0,
        "blocking": 0,
        "total_blocking": 0
      },
      "body": null,
      "closed_by": null,
      "reactions": {
        "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/159/reactions",
        "total_count": 0,
        "+1": 0,
        "-1": 0,
        "laugh": 0,
        "hooray": 0,
        "confused": 0,
        "heart": 0,
        "rocket": 0,
        "eyes": 0
      },
      "performed_via_github_app": null,
      "state_reason": null,
      "linked_prs": []
    },
    {
      "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/158",
      "id": 3710089201,
      "node_id": "I_kwDOPMjOmM7dI3fx",
      "number": 158,
      "title": "Non-retriable exception",
      "user": {
        "login": "shash42",
        "id": 32355229,
        "node_id": "MDQ6VXNlcjMyMzU1MjI5",
        "avatar_url": "https://avatars.githubusercontent.com/u/32355229?v=4",
        "gravatar_id": "",
        "url": "https://api.github.com/users/shash42",
        "type": "User",
        "user_view_type": "public",
        "site_admin": false
      },
      "labels": {},
      "state": "open",
      "locked": false,
      "assignee": null,
      "assignees": {},
      "milestone": null,
      "comments": 1,
      "created_at": "2025-12-09T09:53:15Z",
      "updated_at": "2025-12-09T19:12:03Z",
      "closed_at": null,
      "author_association": "NONE",
      "type": null,
      "active_lock_reason": null,
      "sub_issues_summary": {
        "total": 0,
        "completed": 0,
        "percent_completed": 0
      },
      "issue_dependencies_summary": {
        "blocked_by": 0,
        "total_blocked_by": 0,
        "blocking": 0,
        "total_blocking": 0
      },
      "body": "My runs keep crashing during training due to the following error from the sampling client, which only started appearing after I set kl_penalty_coef to non-zero. How can this be fixed?\n\n```\ntinker.lib.retry_handler:222 [\u001b[91mERROR\u001b[0m] Request failed with non-retryable error: RequestFailedError: Request failed: Unknown error: Failed to sample because of a non-retriable exception. Sample request id: 01ff51b5-52ae-565f-8e71-4ccd145570d5:sample:57:1155 for self.request_id='01ff51b5-52ae-565f-8e71-4ccd145570d5:sample:57:1155' and expected type self.model_cls=<class 'tinker.SampleResponse'>\ntinker.lib.telemetry:204 [\u001b[92mINFO\u001b[0m] Exception logged for session ID: 01ff51b5-52ae-565f-8e71-4ccd145570d5\nTraceback (most recent call last):\n  File \"<frozen runpy>\", line 198, in _run_module_as_main\n  File \"<frozen runpy>\", line 88, in _run_code\n  File \"tinker-cookbook/tinker_cookbook/recipes/forecasting/train.py\", line 154, in <module>\n    ...\n  File \"tinker-cookbook/tinker_cookbook/rl/train.py\", line 754, in prepare_minibatch\n    kl_penalty_metrics = await incorporate_kl_penalty(\n                         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  ...\n                      ^^^^^^^^^^^^^^^^^^^^^\n  File \"tinker-cookbook/tinker/lib/python3.12/site-packages/tinker/lib/public_interfaces/sampling_client.py\", line 296, in compute_logprobs_async\n    return await AwaitableConcurrentFuture(self.compute_logprobs(prompt))  \n...\n  File \"tinker-cookbook/tinker/lib/python3.12/site-packages/tinker/lib/public_interfaces/sampling_client.py\", line 290, in _compute_logprobs_async_with_retries\n    return await self.retry_handler.execute(_compute_logprobs_async)\n    ...\n  File \"tinker-cookbook/tinker/lib/python3.12/site-packages/tinker/lib/public_interfaces/sampling_client.py\", line 280, in _compute_logprobs_async\n    sample_res = await self._sample_async_impl(\n                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"tinker-cookbook/tinker/lib/python3.12/site-packages/tinker/lib/public_interfaces/sampling_client.py\", line 181, in _sample_async_impl\n    return await _APIFuture(\n          ...\n  File \"tinker-cookbook/tinker/lib/python3.12/site-packages/tinker/lib/api_future_impl.py\", line 224, in _result_async\n    raise RequestFailedError(\ntinker.RequestFailedError: Request failed: Unknown error: Failed to sample because of a non-retriable exception. Sample request id: 01ff51b5-52ae-565f-8e71-4ccd145570d5:sample:57:1155 for self.request_id='01ff51b5-52ae-565f-8e71-4ccd145570d5:sample:57:1155' and expected type self.model_cls=<class 'tinker.SampleResponse'>\n```\n\n",
      "closed_by": null,
      "reactions": {
        "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/158/reactions",
        "total_count": 1,
        "+1": 1,
        "-1": 0,
        "laugh": 0,
        "hooray": 0,
        "confused": 0,
        "heart": 0,
        "rocket": 0,
        "eyes": 0
      },
      "performed_via_github_app": null,
      "state_reason": null,
      "linked_prs": []
    },
    {
      "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/157",
      "id": 3708445035,
      "node_id": "I_kwDOPMjOmM7dCmFr",
      "number": 157,
      "title": "Typo in the docs",
      "user": {
        "login": "devndevs",
        "id": 67359053,
        "node_id": "MDQ6VXNlcjY3MzU5MDUz",
        "avatar_url": "https://avatars.githubusercontent.com/u/67359053?v=4",
        "gravatar_id": "",
        "url": "https://api.github.com/users/devndevs",
        "type": "User",
        "user_view_type": "public",
        "site_admin": false
      },
      "labels": {},
      "state": "open",
      "locked": false,
      "assignee": null,
      "assignees": {},
      "milestone": null,
      "comments": 0,
      "created_at": "2025-12-08T23:32:29Z",
      "updated_at": "2025-12-08T23:32:43Z",
      "closed_at": null,
      "author_association": "NONE",
      "type": null,
      "active_lock_reason": null,
      "sub_issues_summary": {
        "total": 0,
        "completed": 0,
        "percent_completed": 0
      },
      "issue_dependencies_summary": {
        "blocked_by": 0,
        "total_blocked_by": 0,
        "blocking": 0,
        "total_blocking": 0
      },
      "body": "Not a real big issue, but it should say \"we also released\" as opposed to \"we also release\"\n\n<img width=\"1678\" height=\"652\" alt=\"Image\" src=\"https://github.com/user-attachments/assets/31e4cc00-4cb6-4a8a-98bb-e745768cc769\" />",
      "closed_by": null,
      "reactions": {
        "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/157/reactions",
        "total_count": 0,
        "+1": 0,
        "-1": 0,
        "laugh": 0,
        "hooray": 0,
        "confused": 0,
        "heart": 0,
        "rocket": 0,
        "eyes": 0
      },
      "performed_via_github_app": null,
      "state_reason": null,
      "linked_prs": []
    },
    {
      "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/154",
      "id": 3705776128,
      "node_id": "I_kwDOPMjOmM7c4agA",
      "number": 154,
      "title": "How to reason about concurrency limits?",
      "user": {
        "login": "shash42",
        "id": 32355229,
        "node_id": "MDQ6VXNlcjMyMzU1MjI5",
        "avatar_url": "https://avatars.githubusercontent.com/u/32355229?v=4",
        "gravatar_id": "",
        "url": "https://api.github.com/users/shash42",
        "type": "User",
        "user_view_type": "public",
        "site_admin": false
      },
      "labels": {},
      "state": "open",
      "locked": false,
      "assignee": null,
      "assignees": {},
      "milestone": null,
      "comments": 0,
      "created_at": "2025-12-08T10:44:11Z",
      "updated_at": "2025-12-08T10:44:11Z",
      "closed_at": null,
      "author_association": "NONE",
      "type": null,
      "active_lock_reason": null,
      "sub_issues_summary": {
        "total": 0,
        "completed": 0,
        "percent_completed": 0
      },
      "issue_dependencies_summary": {
        "blocked_by": 0,
        "total_blocked_by": 0,
        "blocking": 0,
        "total_blocking": 0
      },
      "body": "Hey, thanks for the excellent API and research credits! \n\nI am running into unpredictable issues with concurrency limits. For example, a run eventually crashed with:\n\n`tinker.RequestFailedError: Request failed: Unknown error: max number of clients + cluster connections reached for self.request_id='f51abf2d-1027-5792-ac40-f303348de5ca:sample:31:1203' and expected type self.model_cls=<class 'tinker.SampleResponse'>`\n\nI also got this error earlier in the run, but they didn't lead to the job stopping (until it eventually did on step 30, though it should have run till 64). The number of concurrent requests (if I am reasoning correctly about them) should have been <= 1024, which is not crazy? Note that I am using the default `rl/train.py` train loop, without optimizations for clock cycles / pipelining, which would increase concurrency further (I guess?)\n\nI was not getting this earlier when I was developing and debugging my script earlier in the day. I could not find documentation on what the max number of clients / connections is, and how I can fix my code to work around and avoid this issue. \n\nAre there any best practices around concurrency?",
      "closed_by": null,
      "reactions": {
        "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/154/reactions",
        "total_count": 2,
        "+1": 2,
        "-1": 0,
        "laugh": 0,
        "hooray": 0,
        "confused": 0,
        "heart": 0,
        "rocket": 0,
        "eyes": 0
      },
      "performed_via_github_app": null,
      "state_reason": null,
      "linked_prs": []
    },
    {
      "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/153",
      "id": 3702692167,
      "node_id": "I_kwDOPMjOmM7csplH",
      "number": 153,
      "title": "Initially running sampling_client = training_client.save_weights_and_get_sampling_client(name='pig-latin-model') doesn't actually seem to save the checkpoints",
      "user": {
        "login": "rhsu-nuro",
        "id": 225059369,
        "node_id": "U_kgDODWoiKQ",
        "avatar_url": "https://avatars.githubusercontent.com/u/225059369?v=4",
        "gravatar_id": "",
        "url": "https://api.github.com/users/rhsu-nuro",
        "type": "User",
        "user_view_type": "public",
        "site_admin": false
      },
      "labels": {},
      "state": "open",
      "locked": false,
      "assignee": null,
      "assignees": {},
      "milestone": null,
      "comments": 0,
      "created_at": "2025-12-06T23:28:08Z",
      "updated_at": "2025-12-06T23:28:08Z",
      "closed_at": null,
      "author_association": "NONE",
      "type": null,
      "active_lock_reason": null,
      "sub_issues_summary": {
        "total": 0,
        "completed": 0,
        "percent_completed": 0
      },
      "issue_dependencies_summary": {
        "blocked_by": 0,
        "total_blocked_by": 0,
        "blocking": 0,
        "total_blocking": 0
      },
      "body": "I was following https://tinker-docs.thinkingmachines.ai/training-sampling , and seems like while it was able to get the sampling client, the checkpoint doesn't seem to be persisted at all (tried with UI and API calling list_checkpoints, both returned nothing). \n\nThat said, I was able to break it apart separately and run: \n1. `sampling_path = training_client.save_weights_for_sampler(name=\"step_6-Richard-Hsu\").result().path`\n2. `sampling_client = service_client.create_sampling_client(model_path=sampling_path)` \n\nWhich did work. Just wanted to report that! ",
      "closed_by": null,
      "reactions": {
        "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/153/reactions",
        "total_count": 0,
        "+1": 0,
        "-1": 0,
        "laugh": 0,
        "hooray": 0,
        "confused": 0,
        "heart": 0,
        "rocket": 0,
        "eyes": 0
      },
      "performed_via_github_app": null,
      "state_reason": null,
      "linked_prs": []
    },
    {
      "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/147",
      "id": 3696283667,
      "node_id": "I_kwDOPMjOmM7cUNAT",
      "number": 147,
      "title": "Model Poisoning and Stuck API Requests",
      "user": {
        "login": "GuanxingLu",
        "id": 70553669,
        "node_id": "MDQ6VXNlcjcwNTUzNjY5",
        "avatar_url": "https://avatars.githubusercontent.com/u/70553669?v=4",
        "gravatar_id": "",
        "url": "https://api.github.com/users/GuanxingLu",
        "type": "User",
        "user_view_type": "public",
        "site_admin": false
      },
      "labels": {},
      "state": "open",
      "locked": false,
      "assignee": null,
      "assignees": {},
      "milestone": null,
      "comments": 0,
      "created_at": "2025-12-04T19:54:45Z",
      "updated_at": "2025-12-04T19:54:45Z",
      "closed_at": null,
      "author_association": "NONE",
      "type": null,
      "active_lock_reason": null,
      "sub_issues_summary": {
        "total": 0,
        "completed": 0,
        "percent_completed": 0
      },
      "issue_dependencies_summary": {
        "blocked_by": 0,
        "total_blocked_by": 0,
        "blocking": 0,
        "total_blocking": 0
      },
      "body": "Thanks for your great project. I am working on my own project based on tinker API, but after period of regular training, I encountered two errors. Why did these errors happen and how can I fix them?\n\n**Error 1**:\n```\n2025-12-03 02:45:45\n  File \"/mnt/disk_3/guanxing/tinker-cookbook/tinker_cookbook/rl/train.py\", line 774, in do_train_step_and_get_sampling_client\n2025-12-03 02:45:45\n    data_D, prepare_minibatch_metrics, original_data = await prepare_minibatch(\n2025-12-03 02:45:45\n                                                       ^^^^^^^^^^^^^^^^^^^^^^^^\n2025-12-03 02:45:45\n  File \"/mnt/disk_3/guanxing/tinker-cookbook/tinker_cookbook/utils/trace.py\", line 332, in async_wrapper\n2025-12-03 02:45:45\n    return await func(*args, **kwargs)\n2025-12-03 02:45:45\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^^\n2025-12-03 02:45:45\n  File \"/mnt/disk_3/guanxing/tinker-cookbook/tinker_cookbook/rl/train.py\", line 683, in prepare_minibatch\n2025-12-03 02:45:45\n    current_logprobs_D = await asyncio.gather(\n2025-12-03 02:45:45\n                         ^^^^^^^^^^^^^^^^^^^^^\n2025-12-03 02:45:45\n  File \"/mnt/disk_3/guanxing/tinker-cookbook/.venv/lib/python3.11/site-packages/tinker/lib/public_interfaces/sampling_client.py\", line 227, in compute_logprobs_async\n2025-12-03 02:45:45\n    return await AwaitableConcurrentFuture(self.compute_logprobs(prompt))\n2025-12-03 02:45:45\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n2025-12-03 02:45:45\n  File \"/mnt/disk_3/guanxing/tinker-cookbook/.venv/lib/python3.11/site-packages/tinker/lib/public_interfaces/api_future.py\", line 37, in result_async\n2025-12-03 02:45:45\n    return await asyncio.wrap_future(self._future)\n2025-12-03 02:45:45\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n2025-12-03 02:45:45\n  File \"/mnt/disk_3/guanxing/tinker-cookbook/.venv/lib/python3.11/site-packages/tinker/lib/telemetry.py\", line 384, in _awrapper\n2025-12-03 02:45:45\n    return await cast(Callable[..., Awaitable[R]], func)(*args, **kwargs)\n2025-12-03 02:45:45\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n2025-12-03 02:45:45\n  File \"/mnt/disk_3/guanxing/tinker-cookbook/.venv/lib/python3.11/site-packages/tinker/lib/public_interfaces/sampling_client.py\", line 222, in _compute_logprobs_async_with_retries\n2025-12-03 02:45:45\n    return await self.retry_handler.execute(_compute_logprobs_async)\n2025-12-03 02:45:45\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n2025-12-03 02:45:45\n  File \"/mnt/disk_3/guanxing/tinker-cookbook/.venv/lib/python3.11/site-packages/tinker/lib/retry_handler.py\", line 144, in execute\n2025-12-03 02:45:45\n    raise tinker.APIConnectionError(\n2025-12-03 02:45:45\ntinker.APIConnectionError: No progress made in 1800s. Requests appear to be stuck.\n```\n\n**Error 2**:\n```\n2025-12-04 13:49:26\n  File \"/mnt/disk_3/guanxing/tinker-cookbook/.venv/lib/python3.11/site-packages/tinker/lib/public_interfaces/training_client.py\", line 224, in _send_single_forward_backward_request\n2025-12-04 13:49:26\n    return await client.training.forward_backward(\n2025-12-04 13:49:26\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n2025-12-04 13:49:26\n  File \"/mnt/disk_3/guanxing/tinker-cookbook/.venv/lib/python3.11/site-packages/tinker/resources/training.py\", line 103, in forward_backward\n2025-12-04 13:49:26\n    return await self._post(\n2025-12-04 13:49:26\n           ^^^^^^^^^^^^^^^^^\n2025-12-04 13:49:26\n  File \"/mnt/disk_3/guanxing/tinker-cookbook/.venv/lib/python3.11/site-packages/tinker/_base_client.py\", line 1232, in post\n2025-12-04 13:49:26\n    return await self.request(cast_to, opts, stream=stream, stream_cls=stream_cls)\n2025-12-04 13:49:26\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n2025-12-04 13:49:26\n  File \"/mnt/disk_3/guanxing/tinker-cookbook/.venv/lib/python3.11/site-packages/tinker/_base_client.py\", line 1033, in request\n2025-12-04 13:49:26\n    raise self._make_status_error_from_response(err.response) from None\n2025-12-04 13:49:26\ntinker.BadRequestError: Error code: 400 - {'detail': 'This model is poisoned, likely due to a previous failure. We cannot run further operations against it. Please create a new model from a checkpoint if you wish to continue.'}\n```\n\n## Environment\n- Tinker SDK: 0.4.1\n- Python: 3.11\n- Training: `tinker_cookbook/rl/train.py`\n",
      "closed_by": null,
      "reactions": {
        "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/147/reactions",
        "total_count": 1,
        "+1": 1,
        "-1": 0,
        "laugh": 0,
        "hooray": 0,
        "confused": 0,
        "heart": 0,
        "rocket": 0,
        "eyes": 0
      },
      "performed_via_github_app": null,
      "state_reason": null,
      "linked_prs": []
    },
    {
      "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/136",
      "id": 3673582050,
      "node_id": "I_kwDOPMjOmM7a9mni",
      "number": 136,
      "title": "Tokenizer download fails with corrupted cache; no way to pass",
      "user": {
        "login": "moesy",
        "id": 7231851,
        "node_id": "MDQ6VXNlcjcyMzE4NTE=",
        "avatar_url": "https://avatars.githubusercontent.com/u/7231851?v=4",
        "gravatar_id": "",
        "url": "https://api.github.com/users/moesy",
        "type": "User",
        "user_view_type": "public",
        "site_admin": false
      },
      "labels": {},
      "state": "open",
      "locked": false,
      "assignee": null,
      "assignees": {},
      "milestone": null,
      "comments": 2,
      "created_at": "2025-11-28T04:34:40Z",
      "updated_at": "2025-12-06T18:59:01Z",
      "closed_at": null,
      "author_association": "NONE",
      "type": null,
      "active_lock_reason": null,
      "sub_issues_summary": {
        "total": 0,
        "completed": 0,
        "percent_completed": 0
      },
      "issue_dependencies_summary": {
        "blocked_by": 0,
        "total_blocked_by": 0,
        "blocking": 0,
        "total_blocking": 0
      },
      "body": "## **Summary**\n\nRunning `sl_loop.py` fails during tokenizer load due to a corrupted HuggingFace cache entry (`tokenizer.json`). The HuggingFace error explicitly instructs the user to retry with `force_download=True`, but **thinking-cookbook’s `get_tokenizer()` wrapper does not expose any HF download/hub options**, making recovery impossible without manually clearing cache.\n\nThis prevents the recipe from working when huggingface cache corruption occurs.\n\nEnvironment\n\n- Command: python ./sl_loop.py\n- OS: Linux (Arch-based, Python 3.13 virtualenv)\n\nLibraries:\n```\nPackage                   Version     Editable project location\n------------------------- ----------- -----------------------------------------------\naiobotocore               2.25.2\naiohappyeyeballs          2.6.1\naiohttp                   3.13.2\naioitertools              0.13.0\naiosignal                 1.4.0\nannotated-types           0.7.0\nantlr4-python3-runtime    4.13.2\nanyio                     4.11.0\nattrs                     25.4.0\nbeautifulsoup4            4.14.2\nblobfile                  3.1.0\nbotocore                  1.40.70\ncertifi                   2025.11.12\ncharset-normalizer        3.4.4\nchess                     1.11.2\nchz                       0.4.0\nclick                     8.3.1\ndatasets                  4.4.1\ndebugpy                   1.8.17\ndill                      0.4.0\ndistro                    1.9.0\ndocstring_parser          0.17.0\nfilelock                  3.20.0\nfrozenlist                1.8.0\nfsspec                    2025.10.0\ngitdb                     4.0.12\nGitPython                 3.1.45\nh11                       0.16.0\nh2                        4.3.0\nhf-xet                    1.2.0\nhpack                     4.1.0\nhttpcore                  1.0.9\nhttpx                     0.28.1\nhuggingface-hub           0.36.0\nhyperframe                6.1.0\nidna                      3.11\nijson                     3.4.0.post0\ninspect_ai                0.3.69\nJinja2                    3.1.6\njiter                     0.12.0\njmespath                  1.0.1\njoblib                    1.5.2\njsonlines                 4.0.0\njsonpatch                 1.33\njsonpointer               3.0.0\njsonschema                4.25.1\njsonschema-specifications 2025.9.1\nlatex2sympy2_extended     1.10.2\nlinkify-it-py             2.0.3\nlxml                      6.0.2\nmarkdown-it-py            4.0.0\nMarkupSafe                3.0.3\nmath-verify               0.8.0\nmdit-py-plugins           0.5.0\nmdurl                     0.1.2\nmmh3                      5.2.0\nmpmath                    1.3.0\nmultidict                 6.7.0\nmultiprocess              0.70.18\nnest-asyncio              1.6.0\nnetworkx                  3.6\nnltk                      3.9.2\nnumpy                     2.3.5\nnvidia-cublas-cu12        12.8.4.1\nnvidia-cuda-cupti-cu12    12.8.90\nnvidia-cuda-nvrtc-cu12    12.8.93\nnvidia-cuda-runtime-cu12  12.8.90\nnvidia-cudnn-cu12         9.10.2.21\nnvidia-cufft-cu12         11.3.3.83\nnvidia-cufile-cu12        1.13.1.3\nnvidia-curand-cu12        10.3.9.90\nnvidia-cusolver-cu12      11.7.3.90\nnvidia-cusparse-cu12      12.5.8.93\nnvidia-cusparselt-cu12    0.7.1\nnvidia-nccl-cu12          2.27.5\nnvidia-nvjitlink-cu12     12.8.93\nnvidia-nvshmem-cu12       3.3.20\nnvidia-nvtx-cu12          12.8.90\nopenai                    2.8.1\npackaging                 25.0\npandas                    2.3.3\npip                       25.2\nplatformdirs              4.5.0\npropcache                 0.4.1\nprotobuf                  6.33.1\npsutil                    7.1.3\npyarrow                   22.0.0\npycryptodomex             3.23.0\npydantic                  2.12.5\npydantic_core             2.41.5\nPygments                  2.19.2\npylatexenc                2.10\npython-dateutil           2.9.0.post0\npython-dotenv             1.2.1\npytz                      2025.2\nPyYAML                    6.0.3\nreferencing               0.37.0\nregex                     2025.11.3\nrequests                  2.32.5\nrich                      14.2.0\nrpds-py                   0.29.0\ns3fs                      2025.10.0\nsafetensors               0.7.0\nscipy                     1.16.3\nsemver                    3.0.4\nsentry-sdk                2.46.0\nsetuptools                80.9.0\nshortuuid                 1.0.13\nsix                       1.17.0\nsmmap                     5.0.2\nsniffio                   1.3.1\nsoupsieve                 2.8\nsympy                     1.14.0\ntenacity                  9.1.2\ntermcolor                 3.2.0\nTextArena                 0.7.4\ntextual                   1.0.0\ntinker                    0.5.1\ntinker_cookbook           0.1.0\ntokenizers                0.22.1\ntorch                     2.9.1\ntqdm                      4.67.1\ntransformers              4.57.3\ntriton                    3.5.1\ntyping_extensions         4.15.0\ntyping-inspection         0.4.2\ntzdata                    2025.2\nuc-micro-py               1.0.3\nurllib3                   2.5.0\nwandb                     0.23.0\nwebsockets                15.0.1\nwrapt                     1.17.3\nxxhash                    3.6.0\nyarl                      1.22.0\nzipp                      3.23.0\n```\n\nLogs\n```\nwandb: Tracking run with wandb version 0.23.0\nwandb: Run data is saved locally in /tmp/tinker-examples/sl-loop/wandb/run-<timestamp>\nwandb: Syncing run\ntinker_cookbook.utils.ml_log: [INFO] Logging to: /tmp/tinker-examples/sl-loop\n\ntokenizer.json: 51.9MB [00:01, 6.65MB/s]\n\nTraceback (most recent call last):\n  File \"tinker_cookbook/recipes/sl_loop.py\", line 155, in <module>\n    chz.nested_entrypoint(main)\n  File \"<venv>/site-packages/chz/blueprint/_entrypoint.py\", line 39, in inner\n    return fn(*args, **kwargs)\n  File \"<venv>/site-packages/chz/blueprint/_entrypoint.py\", line 109, in nested_entrypoint\n    return main(value)\n  File \"tinker_cookbook/recipes/sl_loop.py\", line 46, in main\n    tokenizer = get_tokenizer(config.model_name)\n  File \"tinker_cookbook/tokenizer_utils.py\", line 31, in get_tokenizer\n    return AutoTokenizer.from_pretrained(model_name, use_fast=True)\n  File \"<venv>/site-packages/transformers/models/auto/tokenization_auto.py\", line 1156, in from_pretrained\n    return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)\n  File \"<venv>/site-packages/transformers/tokenization_utils_base.py\", line 2076, in from_pretrained\n    resolved_vocab_files[file_id] = cached_file(...)\n  File \"<venv>/site-packages/transformers/utils/hub.py\", line 322, in cached_file\n    file = cached_files(...)\n  File \"<venv>/site-packages/transformers/utils/hub.py\", line 567, in cached_files\n    raise e\n  File \"<venv>/site-packages/transformers/utils/hub.py\", line 479, in cached_files\n    hf_hub_download(...)\n  File \"<venv>/site-packages/huggingface_hub/file_download.py\", line 1007, in hf_hub_download\n    return _hf_hub_download_to_cache_dir(...)\n  File \"<venv>/site-packages/huggingface_hub/file_download.py\", line 1168, in _hf_hub_download_to_cache_dir\n    _download_to_tmp_and_move(...)\n  File \"<venv>/site-packages/huggingface_hub/file_download.py\", line 1735, in _download_to_tmp_and_move\n    http_get(...)\n  File \"<venv>/site-packages/huggingface_hub/file_download.py\", line 522, in http_get\n    raise EnvironmentError(\n      OSError: Consistency check failed: file should be of size 9085698 but has size 51851870 (tokenizer.json).\n      This is usually due to network issues while downloading the file.\n      Please retry with `force_download=True`.\n\nwandb: Logs saved to /tmp/tinker-examples/sl-loop/wandb/run-<timestamp>/logs\n```\nWorkaround: manually delete cache.",
      "closed_by": null,
      "reactions": {
        "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/136/reactions",
        "total_count": 0,
        "+1": 0,
        "-1": 0,
        "laugh": 0,
        "hooray": 0,
        "confused": 0,
        "heart": 0,
        "rocket": 0,
        "eyes": 0
      },
      "performed_via_github_app": null,
      "state_reason": null,
      "linked_prs": []
    },
    {
      "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/130",
      "id": 3661193130,
      "node_id": "I_kwDOPMjOmM7aOV-q",
      "number": 130,
      "title": "suggested tweaks to tic-tac-toe example",
      "user": {
        "login": "murphyk",
        "id": 4632336,
        "node_id": "MDQ6VXNlcjQ2MzIzMzY=",
        "avatar_url": "https://avatars.githubusercontent.com/u/4632336?v=4",
        "gravatar_id": "",
        "url": "https://api.github.com/users/murphyk",
        "type": "User",
        "user_view_type": "public",
        "site_admin": false
      },
      "labels": {},
      "state": "open",
      "locked": false,
      "assignee": null,
      "assignees": {},
      "milestone": null,
      "comments": 1,
      "created_at": "2025-11-25T01:55:10Z",
      "updated_at": "2025-12-06T19:05:05Z",
      "closed_at": null,
      "author_association": "NONE",
      "type": null,
      "active_lock_reason": null,
      "sub_issues_summary": {
        "total": 0,
        "completed": 0,
        "percent_completed": 0
      },
      "issue_dependencies_summary": {
        "blocked_by": 0,
        "total_blocked_by": 0,
        "blocking": 0,
        "total_blocking": 0
      },
      "body": "The default settings in https://github.com/thinking-machines-lab/tinker-cookbook/blob/main/tinker_cookbook/recipes/multiplayer_rl/text_arena/train.py use `num_train_datapoints = 131072` with a batch size of 512, which amounts to 256 steps.  However, I found that you can get good results with many fewer steps (see figure), which is faster and cheaper. For example, running for 130 steps took me 6h (wall clock) and cost $40 (!).\n\nI suggest changing `num_train_datapoints = 20_000`  which amounts to 39 steps (which should only take ~2h).\nI also suggest showing the expected reward curve (as in example below), and explaining why a reward of 0 is good (since I assume it corresponds to always drawing, which is the expected return for player 0 under an optimal strategy).\n\n[Of course, if we just use 39 steps, we won't see the instabilities that occur later in training, which seem to be due to going off policy.]\n\nIt would also be great if you could tweak the demo to support playing against a random opponent (instead of self play), either at training or testing or both. This may simplify the code, and also shows that the learned policy is general, and doesn't always deterministically execute the same sequence of actions from the initial board state. It would be nice to also allow the user to interactively play against the learned strategy.  (My [naive attempt](https://discord.com/channels/1435768626681741364/1435768628250415258/1442238316429705247) to interact with this TwoAgentEnv failed, due to async issues.) \n\n<img width=\"559\" height=\"413\" alt=\"Image\" src=\"https://github.com/user-attachments/assets/b0b0bb25-fce9-4efc-a2e1-79b411c92252\" />\n",
      "closed_by": null,
      "reactions": {
        "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/130/reactions",
        "total_count": 0,
        "+1": 0,
        "-1": 0,
        "laugh": 0,
        "hooray": 0,
        "confused": 0,
        "heart": 0,
        "rocket": 0,
        "eyes": 0
      },
      "performed_via_github_app": null,
      "state_reason": null,
      "linked_prs": []
    },
    {
      "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/129",
      "id": 3661024957,
      "node_id": "I_kwDOPMjOmM7aNs69",
      "number": 129,
      "title": "Request for Qwen3-30B-A3B-Thinking-2507",
      "user": {
        "login": "tonghuikang",
        "id": 17105577,
        "node_id": "MDQ6VXNlcjE3MTA1NTc3",
        "avatar_url": "https://avatars.githubusercontent.com/u/17105577?v=4",
        "gravatar_id": "",
        "url": "https://api.github.com/users/tonghuikang",
        "type": "User",
        "user_view_type": "public",
        "site_admin": false
      },
      "labels": {},
      "state": "open",
      "locked": false,
      "assignee": null,
      "assignees": {},
      "milestone": null,
      "comments": 2,
      "created_at": "2025-11-25T00:39:03Z",
      "updated_at": "2025-12-07T04:57:30Z",
      "closed_at": null,
      "author_association": "NONE",
      "type": null,
      "active_lock_reason": null,
      "sub_issues_summary": {
        "total": 0,
        "completed": 0,
        "percent_completed": 0
      },
      "issue_dependencies_summary": {
        "blocked_by": 0,
        "total_blocked_by": 0,
        "blocking": 0,
        "total_blocking": 0
      },
      "body": "Thanks for sponsoring AIMO 3 - https://www.kaggle.com/competitions/ai-mathematical-olympiad-progress-prize-3/discussion/635859\n\nCould you add [Qwen3-30B-A3B-Thinking-2507](https://huggingface.co/Qwen/Qwen3-30B-A3B-Thinking-2507) to the list of models that we can fine-tune?",
      "closed_by": null,
      "reactions": {
        "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/129/reactions",
        "total_count": 0,
        "+1": 0,
        "-1": 0,
        "laugh": 0,
        "hooray": 0,
        "confused": 0,
        "heart": 0,
        "rocket": 0,
        "eyes": 0
      },
      "performed_via_github_app": null,
      "state_reason": null,
      "linked_prs": []
    },
    {
      "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/128",
      "id": 3659532959,
      "node_id": "I_kwDOPMjOmM7aIAqf",
      "number": 128,
      "title": "Support for Qwen 3B 0.6B",
      "user": {
        "login": "marianokamp",
        "id": 3245189,
        "node_id": "MDQ6VXNlcjMyNDUxODk=",
        "avatar_url": "https://avatars.githubusercontent.com/u/3245189?v=4",
        "gravatar_id": "",
        "url": "https://api.github.com/users/marianokamp",
        "type": "User",
        "user_view_type": "public",
        "site_admin": false
      },
      "labels": {},
      "state": "open",
      "locked": false,
      "assignee": null,
      "assignees": {},
      "milestone": null,
      "comments": 4,
      "created_at": "2025-11-24T16:00:35Z",
      "updated_at": "2025-12-09T08:30:51Z",
      "closed_at": null,
      "author_association": "NONE",
      "type": null,
      "active_lock_reason": null,
      "sub_issues_summary": {
        "total": 0,
        "completed": 0,
        "percent_completed": 0
      },
      "issue_dependencies_summary": {
        "blocked_by": 0,
        "total_blocked_by": 0,
        "blocking": 0,
        "total_blocking": 0
      },
      "body": "It would be great to play around with teacher/student with small and cheap models, say 8B vs 0.6B. For the student, the smallest Qwen is 4B and it would be awesome to get the 0.6B version instead. Llama 3.2 1B is not available in Europe, but is in general only available with personal registration and wait period. ",
      "closed_by": null,
      "reactions": {
        "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/128/reactions",
        "total_count": 2,
        "+1": 2,
        "-1": 0,
        "laugh": 0,
        "hooray": 0,
        "confused": 0,
        "heart": 0,
        "rocket": 0,
        "eyes": 0
      },
      "performed_via_github_app": null,
      "state_reason": null,
      "linked_prs": []
    },
    {
      "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/105",
      "id": 3649252526,
      "node_id": "I_kwDOPMjOmM7Zgyyu",
      "number": 105,
      "title": "Max context window / max sequence length clarification",
      "user": {
        "login": "yifei-reducto",
        "id": 193959838,
        "node_id": "U_kgDOC4-Xng",
        "avatar_url": "https://avatars.githubusercontent.com/u/193959838?v=4",
        "gravatar_id": "",
        "url": "https://api.github.com/users/yifei-reducto",
        "type": "User",
        "user_view_type": "public",
        "site_admin": false
      },
      "labels": {},
      "state": "open",
      "locked": false,
      "assignee": null,
      "assignees": {},
      "milestone": null,
      "comments": 3,
      "created_at": "2025-11-20T21:54:15Z",
      "updated_at": "2025-12-15T22:25:20Z",
      "closed_at": null,
      "author_association": "NONE",
      "type": null,
      "active_lock_reason": null,
      "sub_issues_summary": {
        "total": 0,
        "completed": 0,
        "percent_completed": 0
      },
      "issue_dependencies_summary": {
        "blocked_by": 0,
        "total_blocked_by": 0,
        "blocking": 0,
        "total_blocking": 0
      },
      "body": "Qwen3 models support up to 256k context window. However, I think thinker currently limits the max sequence length to 32k. Here's an error message I saw when I tried to finetune Qwen3-30b-a3b-Instruct-2507 using tinker:\n`tinker.BadRequestError: Error code: 400 - {'detail': 'sequence length 38632 exceeds max sequence length 32768 for field target_tokens[4]'}`\nCould you add some clarification in docs? Also, any chance Tinker would support longer context window in the future?\nThank you for building this amazing product!",
      "closed_by": null,
      "reactions": {
        "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/105/reactions",
        "total_count": 13,
        "+1": 8,
        "-1": 0,
        "laugh": 0,
        "hooray": 0,
        "confused": 0,
        "heart": 0,
        "rocket": 0,
        "eyes": 5
      },
      "performed_via_github_app": null,
      "state_reason": null,
      "linked_prs": []
    },
    {
      "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/98",
      "id": 3627551886,
      "node_id": "I_kwDOPMjOmM7YOAyO",
      "number": 98,
      "title": "HIPAA compliance",
      "user": {
        "login": "aditya-silna",
        "id": 193926150,
        "node_id": "U_kgDOC48UBg",
        "avatar_url": "https://avatars.githubusercontent.com/u/193926150?v=4",
        "gravatar_id": "",
        "url": "https://api.github.com/users/aditya-silna",
        "type": "User",
        "user_view_type": "public",
        "site_admin": false
      },
      "labels": {},
      "state": "open",
      "locked": false,
      "assignee": null,
      "assignees": {},
      "milestone": null,
      "comments": 1,
      "created_at": "2025-11-14T23:50:46Z",
      "updated_at": "2025-11-30T03:37:23Z",
      "closed_at": null,
      "author_association": "NONE",
      "type": null,
      "active_lock_reason": null,
      "sub_issues_summary": {
        "total": 0,
        "completed": 0,
        "percent_completed": 0
      },
      "issue_dependencies_summary": {
        "blocked_by": 0,
        "total_blocked_by": 0,
        "blocking": 0,
        "total_blocking": 0
      },
      "body": "Hi, \n\nAre there any plans for this API to be HIPAA compliant?\n\nThanks!",
      "closed_by": null,
      "reactions": {
        "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/98/reactions",
        "total_count": 0,
        "+1": 0,
        "-1": 0,
        "laugh": 0,
        "hooray": 0,
        "confused": 0,
        "heart": 0,
        "rocket": 0,
        "eyes": 0
      },
      "performed_via_github_app": null,
      "state_reason": null,
      "linked_prs": []
    },
    {
      "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/97",
      "id": 3627504934,
      "node_id": "I_kwDOPMjOmM7YN1Um",
      "number": 97,
      "title": "Use load_dotenv or similar for env vars",
      "user": {
        "login": "aksheyd",
        "id": 131929364,
        "node_id": "U_kgDOB90VFA",
        "avatar_url": "https://avatars.githubusercontent.com/u/131929364?v=4",
        "gravatar_id": "",
        "url": "https://api.github.com/users/aksheyd",
        "type": "User",
        "user_view_type": "public",
        "site_admin": false
      },
      "labels": {},
      "state": "open",
      "locked": false,
      "assignee": null,
      "assignees": {},
      "milestone": null,
      "comments": 0,
      "created_at": "2025-11-14T23:27:10Z",
      "updated_at": "2025-11-14T23:27:10Z",
      "closed_at": null,
      "author_association": "NONE",
      "type": null,
      "active_lock_reason": null,
      "sub_issues_summary": {
        "total": 0,
        "completed": 0,
        "percent_completed": 0
      },
      "issue_dependencies_summary": {
        "blocked_by": 0,
        "total_blocked_by": 0,
        "blocking": 0,
        "total_blocking": 0
      },
      "body": "Env vars like the `TINKER_API_KEY` should be auto-loaded using load_dotenv() or a similar utility to make it easier then exporting into terminal.  This may not make sense in the package but in the cookbook, I feel like it would make sense.",
      "closed_by": null,
      "reactions": {
        "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/97/reactions",
        "total_count": 1,
        "+1": 0,
        "-1": 0,
        "laugh": 0,
        "hooray": 0,
        "confused": 0,
        "heart": 0,
        "rocket": 0,
        "eyes": 1
      },
      "performed_via_github_app": null,
      "state_reason": null,
      "linked_prs": []
    },
    {
      "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/91",
      "id": 3619967915,
      "node_id": "I_kwDOPMjOmM7XxFOr",
      "number": 91,
      "title": "Having all the docs in a PDF file",
      "user": {
        "login": "cmirceac",
        "id": 32462732,
        "node_id": "MDQ6VXNlcjMyNDYyNzMy",
        "avatar_url": "https://avatars.githubusercontent.com/u/32462732?v=4",
        "gravatar_id": "",
        "url": "https://api.github.com/users/cmirceac",
        "type": "User",
        "user_view_type": "public",
        "site_admin": false
      },
      "labels": {},
      "state": "open",
      "locked": false,
      "assignee": null,
      "assignees": {},
      "milestone": null,
      "comments": 0,
      "created_at": "2025-11-13T07:51:43Z",
      "updated_at": "2025-11-13T07:51:43Z",
      "closed_at": null,
      "author_association": "NONE",
      "type": null,
      "active_lock_reason": null,
      "sub_issues_summary": {
        "total": 0,
        "completed": 0,
        "percent_completed": 0
      },
      "issue_dependencies_summary": {
        "blocked_by": 0,
        "total_blocked_by": 0,
        "blocking": 0,
        "total_blocking": 0
      },
      "body": "This is not an issue per se, but a feature request: give us an option to have all the docs in a PDF file, for some people having a PDF with all in one (nicely structured) place is very useful. Also a reminder when somethings changed and the PDF must be refreshed it will be a nice cherry on the top 🥇.",
      "closed_by": null,
      "reactions": {
        "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/91/reactions",
        "total_count": 0,
        "+1": 0,
        "-1": 0,
        "laugh": 0,
        "hooray": 0,
        "confused": 0,
        "heart": 0,
        "rocket": 0,
        "eyes": 0
      },
      "performed_via_github_app": null,
      "state_reason": null,
      "linked_prs": []
    },
    {
      "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/82",
      "id": 3607755802,
      "node_id": "I_kwDOPMjOmM7XCfwa",
      "number": 82,
      "title": "Can I use my own checkpoint built on a supported base model?",
      "user": {
        "login": "WaitHZ",
        "id": 98675853,
        "node_id": "U_kgDOBeGsjQ",
        "avatar_url": "https://avatars.githubusercontent.com/u/98675853?v=4",
        "gravatar_id": "",
        "url": "https://api.github.com/users/WaitHZ",
        "type": "User",
        "user_view_type": "public",
        "site_admin": false
      },
      "labels": {},
      "state": "open",
      "locked": false,
      "assignee": null,
      "assignees": {},
      "milestone": null,
      "comments": 0,
      "created_at": "2025-11-10T12:13:13Z",
      "updated_at": "2025-11-10T12:13:13Z",
      "closed_at": null,
      "author_association": "NONE",
      "type": null,
      "active_lock_reason": null,
      "sub_issues_summary": {
        "total": 0,
        "completed": 0,
        "percent_completed": 0
      },
      "issue_dependencies_summary": {
        "blocked_by": 0,
        "total_blocked_by": 0,
        "blocking": 0,
        "total_blocking": 0
      },
      "body": "I’d like to know whether it’s possible to upload and use my own checkpoint as the starting point for fine-tuning through the Tinker API. In other words, can I initialize the training with a custom ckpt file (derived from the same supported base model) instead of starting from the original base weights provided by Tinker?",
      "closed_by": null,
      "reactions": {
        "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/82/reactions",
        "total_count": 0,
        "+1": 0,
        "-1": 0,
        "laugh": 0,
        "hooray": 0,
        "confused": 0,
        "heart": 0,
        "rocket": 0,
        "eyes": 0
      },
      "performed_via_github_app": null,
      "state_reason": null,
      "linked_prs": []
    },
    {
      "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/77",
      "id": 3598320072,
      "node_id": "I_kwDOPMjOmM7WegHI",
      "number": 77,
      "title": "Expose raw logits in trainer output",
      "user": {
        "login": "thunderous77",
        "id": 93523630,
        "node_id": "U_kgDOBZMOrg",
        "avatar_url": "https://avatars.githubusercontent.com/u/93523630?v=4",
        "gravatar_id": "",
        "url": "https://api.github.com/users/thunderous77",
        "type": "User",
        "user_view_type": "public",
        "site_admin": false
      },
      "labels": {},
      "state": "open",
      "locked": false,
      "assignee": null,
      "assignees": {},
      "milestone": null,
      "comments": 1,
      "created_at": "2025-11-07T02:07:15Z",
      "updated_at": "2025-11-27T18:21:14Z",
      "closed_at": null,
      "author_association": "NONE",
      "type": null,
      "active_lock_reason": null,
      "sub_issues_summary": {
        "total": 0,
        "completed": 0,
        "percent_completed": 0
      },
      "issue_dependencies_summary": {
        "blocked_by": 0,
        "total_blocked_by": 0,
        "blocking": 0,
        "total_blocking": 0
      },
      "body": "I was wondering if you’d be open to adding an option in both the trainer and sampler modules to return the raw logits alongside the logprobs.\nThis would be really useful for experiments that need to directly manipulate or analyze logits — for example, applying controlled perturbations, studying logit sharpness, or implementing custom regularization and reward shaping methods.\nIf the internal pipeline already keeps logits before computing logprobs, it might be straightforward to expose them via a flag such as return_logits=True in both components.\nThis feature would make Tinker more flexible for advanced RLHF and off-policy research setups, where having access to logits is often essential for fine-grained diagnostics and exploration.\nThanks for considering!",
      "closed_by": null,
      "reactions": {
        "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/77/reactions",
        "total_count": 3,
        "+1": 3,
        "-1": 0,
        "laugh": 0,
        "hooray": 0,
        "confused": 0,
        "heart": 0,
        "rocket": 0,
        "eyes": 0
      },
      "performed_via_github_app": null,
      "state_reason": null,
      "linked_prs": []
    },
    {
      "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/74",
      "id": 3588928470,
      "node_id": "I_kwDOPMjOmM7V6rPW",
      "number": 74,
      "title": "qwen3 tool rendering",
      "user": {
        "login": "csmith23",
        "id": 6314425,
        "node_id": "MDQ6VXNlcjYzMTQ0MjU=",
        "avatar_url": "https://avatars.githubusercontent.com/u/6314425?v=4",
        "gravatar_id": "",
        "url": "https://api.github.com/users/csmith23",
        "type": "User",
        "user_view_type": "public",
        "site_admin": false
      },
      "labels": {},
      "state": "open",
      "locked": false,
      "assignee": null,
      "assignees": {},
      "milestone": null,
      "comments": 1,
      "created_at": "2025-11-05T02:54:49Z",
      "updated_at": "2025-11-20T20:43:03Z",
      "closed_at": null,
      "author_association": "NONE",
      "type": null,
      "active_lock_reason": null,
      "sub_issues_summary": {
        "total": 0,
        "completed": 0,
        "percent_completed": 0
      },
      "issue_dependencies_summary": {
        "blocked_by": 0,
        "total_blocked_by": 0,
        "blocking": 0,
        "total_blocking": 0
      },
      "body": "I've been using tinker to finetune qwen3 30b a3b instruct on conversations involving tool use, and just wanted to bring up a discrepancy in the qwen3 renderer in the cookbook vs. the jinja templates on huggingface.\n\nIn the [jinja template](https://huggingface.co/Qwen/Qwen3-30B-A3B-Instruct-2507?chat_template=default), there are the lines\n```\n{%- elif message.role == \"tool\" %}\n        {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n```\nwhich render tool responses with the `user` role, while in the cookbook the role is used as-is,\n```\nob_str = f\"{maybe_newline}<|im_start|>{message['role']}\\n\"\n```\nin `renderers.py`, lines 332 and 445.\n\nThe rendering of elements under the `\"tools\"` key of a conversation as part of the system prompt is also not implemented, but that seems to be the case for all renderers, not just the qwen ones",
      "closed_by": null,
      "reactions": {
        "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/74/reactions",
        "total_count": 1,
        "+1": 1,
        "-1": 0,
        "laugh": 0,
        "hooray": 0,
        "confused": 0,
        "heart": 0,
        "rocket": 0,
        "eyes": 0
      },
      "performed_via_github_app": null,
      "state_reason": null,
      "linked_prs": []
    },
    {
      "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/63",
      "id": 3574248947,
      "node_id": "I_kwDOPMjOmM7VCrXz",
      "number": 63,
      "title": "wandb does not work with verifiers integration",
      "user": {
        "login": "tmabraham",
        "id": 37097934,
        "node_id": "MDQ6VXNlcjM3MDk3OTM0",
        "avatar_url": "https://avatars.githubusercontent.com/u/37097934?v=4",
        "gravatar_id": "",
        "url": "https://api.github.com/users/tmabraham",
        "type": "User",
        "user_view_type": "public",
        "site_admin": false
      },
      "labels": {},
      "state": "open",
      "locked": false,
      "assignee": null,
      "assignees": {},
      "milestone": null,
      "comments": 0,
      "created_at": "2025-10-31T10:04:31Z",
      "updated_at": "2025-10-31T10:04:43Z",
      "closed_at": null,
      "author_association": "CONTRIBUTOR",
      "type": null,
      "active_lock_reason": null,
      "sub_issues_summary": {
        "total": 0,
        "completed": 0,
        "percent_completed": 0
      },
      "issue_dependencies_summary": {
        "blocked_by": 0,
        "total_blocked_by": 0,
        "blocking": 0,
        "total_blocking": 0
      },
      "body": "Running `python -m tinker_cookbook.recipes.verifiers_rl.train` errors out if wandb logging is enabled:\n\n```\nTraceback (most recent call last):\n  File \"<frozen runpy>\", line 198, in _run_module_as_main\n  File \"<frozen runpy>\", line 88, in _run_code\n  File \"/home/tanishq/tinker-cookbook/tinker_cookbook/recipes/verifiers_rl/train.py\", line 184, in <module>\n    asyncio.run(cli_main(cli_config, None))\n  File \"/home/tanishq/.local/share/uv/python/cpython-3.12.11-linux-x86_64-gnu/lib/python3.12/asyncio/runners.py\", line 195, in run\n    return runner.run(main)\n           ^^^^^^^^^^^^^^^^\n  File \"/home/tanishq/.local/share/uv/python/cpython-3.12.11-linux-x86_64-gnu/lib/python3.12/asyncio/runners.py\", line 118, in run\n    return self._loop.run_until_complete(task)\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/home/tanishq/.local/share/uv/python/cpython-3.12.11-linux-x86_64-gnu/lib/python3.12/asyncio/base_events.py\", line 691, in run_until_complete\n    return future.result()\n           ^^^^^^^^^^^^^^^\n  File \"/home/tanishq/tinker-cookbook/tinker_cookbook/recipes/verifiers_rl/train.py\", line 179, in cli_main\n    await train.main(cfg)\n  File \"/home/tanishq/tinker-cookbook/tinker_cookbook/utils/trace.py\", line 332, in async_wrapper\n    return await func(*args, **kwargs)\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/home/tanishq/tinker-cookbook/tinker_cookbook/rl/train.py\", line 990, in main\n    ml_logger = ml_log.setup_logging(\n                ^^^^^^^^^^^^^^^^^^^^^\n  File \"/home/tanishq/tinker-cookbook/tinker_cookbook/utils/ml_log.py\", line 470, in setup_logging\n    ml_logger.log_hparams(config)\n  File \"/home/tanishq/tinker-cookbook/tinker_cookbook/utils/ml_log.py\", line 349, in log_hparams\n    logger.log_hparams(config)\n  File \"/home/tanishq/tinker-cookbook/tinker_cookbook/utils/ml_log.py\", line 230, in log_hparams\n    wandb.config.update(dump_config(config))\n  File \"/home/tanishq/tinker-cookbook/.venv/lib/python3.12/site-packages/wandb/sdk/wandb_config.py\", line 187, in update\n    sanitized = self._update(d, allow_val_change)\n                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/home/tanishq/tinker-cookbook/.venv/lib/python3.12/site-packages/wandb/sdk/wandb_config.py\", line 180, in _update\n    sanitized = self._sanitize_dict(\n                ^^^^^^^^^^^^^^^^^^^^\n  File \"/home/tanishq/tinker-cookbook/.venv/lib/python3.12/site-packages/wandb/sdk/wandb_config.py\", line 267, in _sanitize_dict\n    k, v = self._sanitize(k, v, allow_val_change)\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/home/tanishq/tinker-cookbook/.venv/lib/python3.12/site-packages/wandb/sdk/wandb_config.py\", line 288, in _sanitize\n    raise config_util.ConfigError(\nwandb.sdk.lib.config_util.ConfigError: Attempted to change value of key \"dataset_builder\" from {'vf_env': '<verifiers.envs.singleturn_env.SingleTurnEnv object at 0x7fd46752da30>',\n'groups_per_batch': 128, 'dataset_n': -1, 'dataset_seed': None} to {'vf_env': '<verifiers.envs.singleturn_env.SingleTurnEnv object at 0x7fd4677eb800>', 'groups_per_batch': 128, 'dataset_n': -1,\n 'dataset_seed': None}\nIf you really want to do this, pass allow_val_change=True to config.update()\n```\n\nIt seems the reason seems to be `VerifiersRLDatasetBuilder` is a `chz.chz` class and all the fields get saved to wandb but this includes `vf_env` the full verifiers env... i couldn't find any good solution to avoid this and came up with some hacky solution with Claude Code's help:\n\n```\n-@chz.chz\n class VerifiersRLDatasetBuilder(RLDatasetBuilder):\n-    vf_env: vf.Environment\n-    groups_per_batch: int\n-    dataset_n: int\n-    dataset_seed: int | None\n+    def __init__(\n+        self,\n+        vf_env: vf.Environment,\n+        groups_per_batch: int,\n+        dataset_n: int,\n+        dataset_seed: int | None,\n+    ):\n+        # Use object.__setattr__ to bypass frozen instance restrictions from parent chz class\n+        object.__setattr__(self, 'vf_env', vf_env)\n+        object.__setattr__(self, 'groups_per_batch', groups_per_batch)\n+        object.__setattr__(self, 'dataset_n', dataset_n)\n+        object.__setattr__(self, 'dataset_seed', dataset_seed)\n+\n+    def to_dict(self):\n+        \"\"\"Return a dictionary representation excluding the vf_env object.\"\"\"\n+        return {\n+            \"groups_per_batch\": self.groups_per_batch,\n+            \"dataset_n\": self.dataset_n,\n+            \"dataset_seed\": self.dataset_seed,\n+        }\n```\n\nLet me know if there may be better approaches for addressing this issue otherwise I'm happy to PR this change directly as well.\n\ncc: @willccbb ",
      "closed_by": null,
      "reactions": {
        "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/63/reactions",
        "total_count": 0,
        "+1": 0,
        "-1": 0,
        "laugh": 0,
        "hooray": 0,
        "confused": 0,
        "heart": 0,
        "rocket": 0,
        "eyes": 0
      },
      "performed_via_github_app": null,
      "state_reason": null,
      "linked_prs": []
    },
    {
      "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/57",
      "id": 3572608957,
      "node_id": "I_kwDOPMjOmM7U8a-9",
      "number": 57,
      "title": "Applying rope scaling to the model's config",
      "user": {
        "login": "mungg",
        "id": 2150951,
        "node_id": "MDQ6VXNlcjIxNTA5NTE=",
        "avatar_url": "https://avatars.githubusercontent.com/u/2150951?v=4",
        "gravatar_id": "",
        "url": "https://api.github.com/users/mungg",
        "type": "User",
        "user_view_type": "public",
        "site_admin": false
      },
      "labels": {},
      "state": "open",
      "locked": false,
      "assignee": null,
      "assignees": {},
      "milestone": null,
      "comments": 0,
      "created_at": "2025-10-30T21:46:27Z",
      "updated_at": "2025-10-30T21:46:27Z",
      "closed_at": null,
      "author_association": "NONE",
      "type": null,
      "active_lock_reason": null,
      "sub_issues_summary": {
        "total": 0,
        "completed": 0,
        "percent_completed": 0
      },
      "issue_dependencies_summary": {
        "blocked_by": 0,
        "total_blocked_by": 0,
        "blocking": 0,
        "total_blocking": 0
      },
      "body": "Thank you for the awesome project!\nI’m looking for a way to train on long-context data.\nNormally, I manually modify the `config.json` of a locally saved model to apply rope scaling,\nbut since Tinker hosts the model on their server side, I couldn’t find a way to change it.\nIs there any way to perform long-context training now? ",
      "closed_by": null,
      "reactions": {
        "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/57/reactions",
        "total_count": 3,
        "+1": 3,
        "-1": 0,
        "laugh": 0,
        "hooray": 0,
        "confused": 0,
        "heart": 0,
        "rocket": 0,
        "eyes": 0
      },
      "performed_via_github_app": null,
      "state_reason": null,
      "linked_prs": []
    },
    {
      "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/55",
      "id": 3572404926,
      "node_id": "I_kwDOPMjOmM7U7pK-",
      "number": 55,
      "title": "Rate Limits",
      "user": {
        "login": "isaacrobinson-maker",
        "id": 232454225,
        "node_id": "U_kgDODdr4UQ",
        "avatar_url": "https://avatars.githubusercontent.com/u/232454225?v=4",
        "gravatar_id": "",
        "url": "https://api.github.com/users/isaacrobinson-maker",
        "type": "User",
        "user_view_type": "public",
        "site_admin": false
      },
      "labels": {},
      "state": "open",
      "locked": false,
      "assignee": null,
      "assignees": {},
      "milestone": null,
      "comments": 1,
      "created_at": "2025-10-30T20:31:57Z",
      "updated_at": "2025-12-11T19:16:52Z",
      "closed_at": null,
      "author_association": "NONE",
      "type": null,
      "active_lock_reason": null,
      "sub_issues_summary": {
        "total": 0,
        "completed": 0,
        "percent_completed": 0
      },
      "issue_dependencies_summary": {
        "blocked_by": 0,
        "total_blocked_by": 0,
        "blocking": 0,
        "total_blocking": 0
      },
      "body": "Could you add transparency around concurrent LoRA rate limits?",
      "closed_by": null,
      "reactions": {
        "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/55/reactions",
        "total_count": 4,
        "+1": 4,
        "-1": 0,
        "laugh": 0,
        "hooray": 0,
        "confused": 0,
        "heart": 0,
        "rocket": 0,
        "eyes": 0
      },
      "performed_via_github_app": null,
      "state_reason": null,
      "linked_prs": []
    },
    {
      "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/53",
      "id": 3566787567,
      "node_id": "I_kwDOPMjOmM7UmNvv",
      "number": 53,
      "title": "On-policy Distillation Checkpoint Release",
      "user": {
        "login": "rattlesnakey",
        "id": 57869572,
        "node_id": "MDQ6VXNlcjU3ODY5NTcy",
        "avatar_url": "https://avatars.githubusercontent.com/u/57869572?v=4",
        "gravatar_id": "",
        "url": "https://api.github.com/users/rattlesnakey",
        "type": "User",
        "user_view_type": "public",
        "site_admin": false
      },
      "labels": {},
      "state": "open",
      "locked": false,
      "assignee": null,
      "assignees": {},
      "milestone": null,
      "comments": 0,
      "created_at": "2025-10-29T16:05:46Z",
      "updated_at": "2025-10-29T16:05:46Z",
      "closed_at": null,
      "author_association": "NONE",
      "type": null,
      "active_lock_reason": null,
      "sub_issues_summary": {
        "total": 0,
        "completed": 0,
        "percent_completed": 0
      },
      "issue_dependencies_summary": {
        "blocked_by": 0,
        "total_blocked_by": 0,
        "blocking": 0,
        "total_blocking": 0
      },
      "body": "Thank you for offering the on-policy distillation project. I wonder if you could share the checkpoints of the Qwen-3-8B model after SFT training and the model after on-policy distillation? Thank you very much :)",
      "closed_by": null,
      "reactions": {
        "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/53/reactions",
        "total_count": 0,
        "+1": 0,
        "-1": 0,
        "laugh": 0,
        "hooray": 0,
        "confused": 0,
        "heart": 0,
        "rocket": 0,
        "eyes": 0
      },
      "performed_via_github_app": null,
      "state_reason": null,
      "linked_prs": []
    },
    {
      "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/49",
      "id": 3562457625,
      "node_id": "I_kwDOPMjOmM7UVsoZ",
      "number": 49,
      "title": "VLM models and custom tokens",
      "user": {
        "login": "deepakshankar94",
        "id": 13984593,
        "node_id": "MDQ6VXNlcjEzOTg0NTkz",
        "avatar_url": "https://avatars.githubusercontent.com/u/13984593?v=4",
        "gravatar_id": "",
        "url": "https://api.github.com/users/deepakshankar94",
        "type": "User",
        "user_view_type": "public",
        "site_admin": false
      },
      "labels": {},
      "state": "open",
      "locked": false,
      "assignee": null,
      "assignees": {},
      "milestone": null,
      "comments": 2,
      "created_at": "2025-10-28T16:45:24Z",
      "updated_at": "2025-12-07T01:33:19Z",
      "closed_at": null,
      "author_association": "NONE",
      "type": null,
      "active_lock_reason": null,
      "sub_issues_summary": {
        "total": 0,
        "completed": 0,
        "percent_completed": 0
      },
      "issue_dependencies_summary": {
        "blocked_by": 0,
        "total_blocked_by": 0,
        "blocking": 0,
        "total_blocking": 0
      },
      "body": "Are there plans to support VLM models as well. Also will custom tokens be supported in the tinker API as well ?",
      "closed_by": null,
      "reactions": {
        "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/49/reactions",
        "total_count": 3,
        "+1": 0,
        "-1": 0,
        "laugh": 0,
        "hooray": 0,
        "confused": 0,
        "heart": 0,
        "rocket": 0,
        "eyes": 3
      },
      "performed_via_github_app": null,
      "state_reason": null,
      "linked_prs": []
    },
    {
      "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/40",
      "id": 3553290697,
      "node_id": "I_kwDOPMjOmM7TyunJ",
      "number": 40,
      "title": "Checkpoint archive empty when downloading weights",
      "user": {
        "login": "Ro0tee",
        "id": 47998597,
        "node_id": "MDQ6VXNlcjQ3OTk4NTk3",
        "avatar_url": "https://avatars.githubusercontent.com/u/47998597?v=4",
        "gravatar_id": "",
        "url": "https://api.github.com/users/Ro0tee",
        "type": "User",
        "user_view_type": "public",
        "site_admin": false
      },
      "labels": {},
      "state": "open",
      "locked": false,
      "assignee": null,
      "assignees": {},
      "milestone": null,
      "comments": 0,
      "created_at": "2025-10-26T05:02:52Z",
      "updated_at": "2025-10-26T05:02:52Z",
      "closed_at": null,
      "author_association": "NONE",
      "type": null,
      "active_lock_reason": null,
      "sub_issues_summary": {
        "total": 0,
        "completed": 0,
        "percent_completed": 0
      },
      "issue_dependencies_summary": {
        "blocked_by": 0,
        "total_blocked_by": 0,
        "blocking": 0,
        "total_blocking": 0
      },
      "body": "Thank you for the great work.\n\nWhen I followed the example code in the [document ](https://tinker-docs.thinkingmachines.ai/download-weights) to download the weights, the file was created but appears to be empty. Its file size is 12 KB and the entire content consists of zero bytes.\n\nCould you please advise on how to resolve this issue?\n\nThanks in advance!",
      "closed_by": null,
      "reactions": {
        "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/40/reactions",
        "total_count": 0,
        "+1": 0,
        "-1": 0,
        "laugh": 0,
        "hooray": 0,
        "confused": 0,
        "heart": 0,
        "rocket": 0,
        "eyes": 0
      },
      "performed_via_github_app": null,
      "state_reason": null,
      "linked_prs": []
    },
    {
      "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/21",
      "id": 3509042659,
      "node_id": "I_kwDOPMjOmM7RJ73j",
      "number": 21,
      "title": "Incorrect `margin` calculation in DPO cookbook",
      "user": {
        "login": "abheesht17",
        "id": 31360468,
        "node_id": "MDQ6VXNlcjMxMzYwNDY4",
        "avatar_url": "https://avatars.githubusercontent.com/u/31360468?v=4",
        "gravatar_id": "",
        "url": "https://api.github.com/users/abheesht17",
        "type": "User",
        "user_view_type": "public",
        "site_admin": false
      },
      "labels": {},
      "state": "open",
      "locked": false,
      "assignee": null,
      "assignees": {},
      "milestone": null,
      "comments": 1,
      "created_at": "2025-10-13T08:42:57Z",
      "updated_at": "2025-10-14T04:10:20Z",
      "closed_at": null,
      "author_association": "NONE",
      "type": null,
      "active_lock_reason": null,
      "sub_issues_summary": {
        "total": 0,
        "completed": 0,
        "percent_completed": 0
      },
      "issue_dependencies_summary": {
        "blocked_by": 0,
        "total_blocked_by": 0,
        "blocking": 0,
        "total_blocking": 0
      },
      "body": "We don't need the `dpo_beta` term while computing the margin here: https://github.com/thinking-machines-lab/tinker-cookbook/blob/main/tinker_cookbook/preference/train_dpo.py#L142.\n\nIt's extraneous, since we are already multiplying it in the two lines above: https://github.com/thinking-machines-lab/tinker-cookbook/blob/main/tinker_cookbook/preference/train_dpo.py#L140-L141\n\n",
      "closed_by": null,
      "reactions": {
        "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/21/reactions",
        "total_count": 0,
        "+1": 0,
        "-1": 0,
        "laugh": 0,
        "hooray": 0,
        "confused": 0,
        "heart": 0,
        "rocket": 0,
        "eyes": 0
      },
      "performed_via_github_app": null,
      "state_reason": null,
      "linked_prs": [
        172
      ]
    },
    {
      "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/4",
      "id": 3474719432,
      "node_id": "I_kwDOPMjOmM7PHALI",
      "number": 4,
      "title": "Consider adding `push_to_hub()` functionality for trained models",
      "user": {
        "login": "lewtun",
        "id": 26859204,
        "node_id": "MDQ6VXNlcjI2ODU5MjA0",
        "avatar_url": "https://avatars.githubusercontent.com/u/26859204?v=4",
        "gravatar_id": "",
        "url": "https://api.github.com/users/lewtun",
        "type": "User",
        "user_view_type": "public",
        "site_admin": false
      },
      "labels": {},
      "state": "open",
      "locked": false,
      "assignee": null,
      "assignees": {},
      "milestone": null,
      "comments": 2,
      "created_at": "2025-10-01T18:53:34Z",
      "updated_at": "2025-10-02T19:05:01Z",
      "closed_at": null,
      "author_association": "NONE",
      "type": null,
      "active_lock_reason": null,
      "sub_issues_summary": {
        "total": 0,
        "completed": 0,
        "percent_completed": 0
      },
      "issue_dependencies_summary": {
        "blocked_by": 0,
        "total_blocked_by": 0,
        "blocking": 0,
        "total_blocking": 0
      },
      "body": "Hello Thinking Machines team, congrats on the release of Tinker and the wonderful docs and clean API!\n\nI was wondering if you'd be open to adding a `push_to_hub()` functionality to Tinker, where users can choose to push their model checkpoints to the Hugging Face Hub (either mid-training or afterwards)?\n\nIf your models are implemented as `transformers` models, this should be pretty straightforward and you can check out e.g. how we do this in TRL [here](https://github.com/huggingface/trl/blob/e086f073cf6dee30acc2d3fe357db21e1901c2be/trl/trainer/base_trainer.py#L33) to also supply a pretty model card, complete with metadata about the dataset lineage, how to run the model etc. \n\nOne nice aspect of this is that you can add e.g. a `tinker` tag and banner and get visibility on the Hugging Face Hub for models trained with Tinker.  (TRL [example](https://huggingface.co/trl-lib/Qwen3-4B-LoRA) below)\n\n<img width=\"777\" height=\"779\" alt=\"Image\" src=\"https://github.com/user-attachments/assets/392fddbc-37c6-49bc-9373-c42b6e2b87d1\" />\n\nIf this is a feature that you're interested in supporting, please let me know if you need any pointers!",
      "closed_by": null,
      "reactions": {
        "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/4/reactions",
        "total_count": 18,
        "+1": 12,
        "-1": 0,
        "laugh": 0,
        "hooray": 0,
        "confused": 0,
        "heart": 0,
        "rocket": 0,
        "eyes": 6
      },
      "performed_via_github_app": null,
      "state_reason": null,
      "linked_prs": []
    }
  ],
  "pulls": [
    {
      "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/182",
      "id": 3104488596,
      "node_id": "PR_kwDOPMjOmM65CryU",
      "number": 182,
      "state": "open",
      "locked": false,
      "title": "Revise visualization example in training-sampling.mdx",
      "user": {
        "login": "TonAldo48",
        "id": 36107616,
        "node_id": "MDQ6VXNlcjM2MTA3NjE2",
        "avatar_url": "https://avatars.githubusercontent.com/u/36107616?v=4",
        "gravatar_id": "",
        "url": "https://api.github.com/users/TonAldo48",
        "type": "User",
        "user_view_type": "public",
        "site_admin": false
      },
      "body": "Updated the visualization example in training-sampling.mdx to include correct input and target pairs.",
      "created_at": "2025-12-16T00:58:47Z",
      "updated_at": "2025-12-16T01:00:48Z",
      "closed_at": null,
      "merged_at": null,
      "merge_commit_sha": "e6c88bd9d97ba3fb1f307cf9cc372c406347ef39",
      "assignee": null,
      "assignees": {},
      "requested_reviewers": {},
      "requested_teams": {},
      "labels": {},
      "milestone": null,
      "draft": false,
      "head": {
        "label": "TonAldo48:patch-1",
        "ref": "patch-1",
        "sha": "f6c52108102fa83845b1c72bb1df0ccbf7846253",
        "user": {
          "login": "TonAldo48",
          "id": 36107616,
          "node_id": "MDQ6VXNlcjM2MTA3NjE2",
          "avatar_url": "https://avatars.githubusercontent.com/u/36107616?v=4",
          "gravatar_id": "",
          "url": "https://api.github.com/users/TonAldo48",
          "type": "User",
          "user_view_type": "public",
          "site_admin": false
        },
        "repo": {
          "id": 1117173030,
          "node_id": "R_kgDOQpa1Jg",
          "name": "tinker-cookbook",
          "full_name": "TonAldo48/tinker-cookbook",
          "private": false,
          "owner": {
            "login": "TonAldo48",
            "id": 36107616,
            "node_id": "MDQ6VXNlcjM2MTA3NjE2",
            "avatar_url": "https://avatars.githubusercontent.com/u/36107616?v=4",
            "gravatar_id": "",
            "url": "https://api.github.com/users/TonAldo48",
            "type": "User",
            "user_view_type": "public",
            "site_admin": false
          },
          "description": "Post-training with Tinker",
          "fork": true,
          "url": "https://api.github.com/repos/TonAldo48/tinker-cookbook",
          "created_at": "2025-12-15T23:56:35Z",
          "updated_at": "2025-12-15T23:56:35Z",
          "pushed_at": "2025-12-16T00:55:57Z",
          "homepage": null,
          "size": 1224,
          "stargazers_count": 0,
          "watchers_count": 0,
          "language": null,
          "has_issues": false,
          "has_projects": true,
          "has_downloads": true,
          "has_wiki": true,
          "has_pages": false,
          "has_discussions": false,
          "forks_count": 0,
          "archived": false,
          "disabled": false,
          "open_issues_count": 0,
          "license": {
            "key": "apache-2.0",
            "name": "Apache License 2.0",
            "spdx_id": "Apache-2.0",
            "url": "https://api.github.com/licenses/apache-2.0",
            "node_id": "MDc6TGljZW5zZTI="
          },
          "allow_forking": true,
          "is_template": false,
          "web_commit_signoff_required": false,
          "topics": {},
          "visibility": "public",
          "forks": 0,
          "open_issues": 0,
          "watchers": 0,
          "default_branch": "main"
        }
      },
      "base": {
        "label": "thinking-machines-lab:main",
        "ref": "main",
        "sha": "1d30325041c77cbf711f9527afca70394c651e52",
        "user": {
          "login": "thinking-machines-lab",
          "id": 220441404,
          "node_id": "O_kgDODSOrPA",
          "avatar_url": "https://avatars.githubusercontent.com/u/220441404?v=4",
          "gravatar_id": "",
          "url": "https://api.github.com/users/thinking-machines-lab",
          "type": "Organization",
          "user_view_type": "public",
          "site_admin": false
        },
        "repo": {
          "id": 1019793048,
          "node_id": "R_kgDOPMjOmA",
          "name": "tinker-cookbook",
          "full_name": "thinking-machines-lab/tinker-cookbook",
          "private": false,
          "owner": {
            "login": "thinking-machines-lab",
            "id": 220441404,
            "node_id": "O_kgDODSOrPA",
            "avatar_url": "https://avatars.githubusercontent.com/u/220441404?v=4",
            "gravatar_id": "",
            "url": "https://api.github.com/users/thinking-machines-lab",
            "type": "Organization",
            "user_view_type": "public",
            "site_admin": false
          },
          "description": "Post-training with Tinker",
          "fork": false,
          "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook",
          "created_at": "2025-07-14T22:10:00Z",
          "updated_at": "2025-12-16T03:13:15Z",
          "pushed_at": "2025-12-14T20:47:56Z",
          "homepage": null,
          "size": 1524,
          "stargazers_count": 2519,
          "watchers_count": 2519,
          "language": "Python",
          "has_issues": true,
          "has_projects": true,
          "has_downloads": true,
          "has_wiki": true,
          "has_pages": false,
          "has_discussions": true,
          "forks_count": 237,
          "archived": false,
          "disabled": false,
          "open_issues_count": 45,
          "license": {
            "key": "apache-2.0",
            "name": "Apache License 2.0",
            "spdx_id": "Apache-2.0",
            "url": "https://api.github.com/licenses/apache-2.0",
            "node_id": "MDc6TGljZW5zZTI="
          },
          "allow_forking": true,
          "is_template": false,
          "web_commit_signoff_required": false,
          "topics": {},
          "visibility": "public",
          "forks": 237,
          "open_issues": 45,
          "watchers": 2519,
          "default_branch": "main"
        }
      },
      "_links": {
        "self": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/182"
        },
        "html": {
          "href": "https://github.com/thinking-machines-lab/tinker-cookbook/pull/182"
        },
        "issue": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/182"
        },
        "comments": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/182/comments"
        },
        "review_comments": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/182/comments"
        },
        "review_comment": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/comments{/number}"
        },
        "commits": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/182/commits"
        },
        "statuses": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/statuses/f6c52108102fa83845b1c72bb1df0ccbf7846253"
        }
      },
      "author_association": "NONE",
      "auto_merge": null,
      "active_lock_reason": null,
      "linked_issues": []
    },
    {
      "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/178",
      "id": 3101369646,
      "node_id": "PR_kwDOPMjOmM642yUu",
      "number": 178,
      "state": "open",
      "locked": false,
      "title": "Fix Qwen3Renderer stripping out last <think>, and Qwen3DisableThinkingRenderer not adding <think>\\n\\n</think>",
      "user": {
        "login": "thejaminator",
        "id": 30519287,
        "node_id": "MDQ6VXNlcjMwNTE5Mjg3",
        "avatar_url": "https://avatars.githubusercontent.com/u/30519287?v=4",
        "gravatar_id": "",
        "url": "https://api.github.com/users/thejaminator",
        "type": "User",
        "user_view_type": "public",
        "site_admin": false
      },
      "body": "Suggested fix for  https://github.com/thinking-machines-lab/tinker-cookbook/issues/177 and https://github.com/thinking-machines-lab/tinker-cookbook/issues/176",
      "created_at": "2025-12-15T07:35:36Z",
      "updated_at": "2025-12-15T09:01:55Z",
      "closed_at": null,
      "merged_at": null,
      "merge_commit_sha": "cff9d3fa44706efeaca2371c09e7aad653dfa206",
      "assignee": null,
      "assignees": {},
      "requested_reviewers": {},
      "requested_teams": {},
      "labels": {},
      "milestone": null,
      "draft": false,
      "head": {
        "label": "thejaminator:fix-qwen3-disable-thinking-renderer",
        "ref": "fix-qwen3-disable-thinking-renderer",
        "sha": "9209ba634c8b42963430a52f6b92e3a8766b3d46",
        "user": {
          "login": "thejaminator",
          "id": 30519287,
          "node_id": "MDQ6VXNlcjMwNTE5Mjg3",
          "avatar_url": "https://avatars.githubusercontent.com/u/30519287?v=4",
          "gravatar_id": "",
          "url": "https://api.github.com/users/thejaminator",
          "type": "User",
          "user_view_type": "public",
          "site_admin": false
        },
        "repo": {
          "id": 1116642309,
          "node_id": "R_kgDOQo6cBQ",
          "name": "tinker-cookbook",
          "full_name": "thejaminator/tinker-cookbook",
          "private": false,
          "owner": {
            "login": "thejaminator",
            "id": 30519287,
            "node_id": "MDQ6VXNlcjMwNTE5Mjg3",
            "avatar_url": "https://avatars.githubusercontent.com/u/30519287?v=4",
            "gravatar_id": "",
            "url": "https://api.github.com/users/thejaminator",
            "type": "User",
            "user_view_type": "public",
            "site_admin": false
          },
          "description": "Post-training with Tinker",
          "fork": true,
          "url": "https://api.github.com/repos/thejaminator/tinker-cookbook",
          "created_at": "2025-12-15T07:07:40Z",
          "updated_at": "2025-12-15T07:07:40Z",
          "pushed_at": "2025-12-15T09:01:17Z",
          "homepage": null,
          "size": 1259,
          "stargazers_count": 0,
          "watchers_count": 0,
          "language": null,
          "has_issues": false,
          "has_projects": true,
          "has_downloads": true,
          "has_wiki": true,
          "has_pages": false,
          "has_discussions": false,
          "forks_count": 0,
          "archived": false,
          "disabled": false,
          "open_issues_count": 0,
          "license": {
            "key": "apache-2.0",
            "name": "Apache License 2.0",
            "spdx_id": "Apache-2.0",
            "url": "https://api.github.com/licenses/apache-2.0",
            "node_id": "MDc6TGljZW5zZTI="
          },
          "allow_forking": true,
          "is_template": false,
          "web_commit_signoff_required": false,
          "topics": {},
          "visibility": "public",
          "forks": 0,
          "open_issues": 0,
          "watchers": 0,
          "default_branch": "main"
        }
      },
      "base": {
        "label": "thinking-machines-lab:main",
        "ref": "main",
        "sha": "1d30325041c77cbf711f9527afca70394c651e52",
        "user": {
          "login": "thinking-machines-lab",
          "id": 220441404,
          "node_id": "O_kgDODSOrPA",
          "avatar_url": "https://avatars.githubusercontent.com/u/220441404?v=4",
          "gravatar_id": "",
          "url": "https://api.github.com/users/thinking-machines-lab",
          "type": "Organization",
          "user_view_type": "public",
          "site_admin": false
        },
        "repo": {
          "id": 1019793048,
          "node_id": "R_kgDOPMjOmA",
          "name": "tinker-cookbook",
          "full_name": "thinking-machines-lab/tinker-cookbook",
          "private": false,
          "owner": {
            "login": "thinking-machines-lab",
            "id": 220441404,
            "node_id": "O_kgDODSOrPA",
            "avatar_url": "https://avatars.githubusercontent.com/u/220441404?v=4",
            "gravatar_id": "",
            "url": "https://api.github.com/users/thinking-machines-lab",
            "type": "Organization",
            "user_view_type": "public",
            "site_admin": false
          },
          "description": "Post-training with Tinker",
          "fork": false,
          "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook",
          "created_at": "2025-07-14T22:10:00Z",
          "updated_at": "2025-12-16T03:13:15Z",
          "pushed_at": "2025-12-14T20:47:56Z",
          "homepage": null,
          "size": 1524,
          "stargazers_count": 2519,
          "watchers_count": 2519,
          "language": "Python",
          "has_issues": true,
          "has_projects": true,
          "has_downloads": true,
          "has_wiki": true,
          "has_pages": false,
          "has_discussions": true,
          "forks_count": 237,
          "archived": false,
          "disabled": false,
          "open_issues_count": 45,
          "license": {
            "key": "apache-2.0",
            "name": "Apache License 2.0",
            "spdx_id": "Apache-2.0",
            "url": "https://api.github.com/licenses/apache-2.0",
            "node_id": "MDc6TGljZW5zZTI="
          },
          "allow_forking": true,
          "is_template": false,
          "web_commit_signoff_required": false,
          "topics": {},
          "visibility": "public",
          "forks": 237,
          "open_issues": 45,
          "watchers": 2519,
          "default_branch": "main"
        }
      },
      "_links": {
        "self": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/178"
        },
        "html": {
          "href": "https://github.com/thinking-machines-lab/tinker-cookbook/pull/178"
        },
        "issue": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/178"
        },
        "comments": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/178/comments"
        },
        "review_comments": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/178/comments"
        },
        "review_comment": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/comments{/number}"
        },
        "commits": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/178/commits"
        },
        "statuses": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/statuses/9209ba634c8b42963430a52f6b92e3a8766b3d46"
        }
      },
      "author_association": "NONE",
      "auto_merge": null,
      "active_lock_reason": null,
      "linked_issues": []
    },
    {
      "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/172",
      "id": 3099258562,
      "node_id": "PR_kwDOPMjOmM64uu7C",
      "number": 172,
      "state": "open",
      "locked": false,
      "title": "fix: correct margin calculation in DPO training",
      "user": {
        "login": "leejianwoo-collab",
        "id": 248585198,
        "node_id": "U_kgDODtEb7g",
        "avatar_url": "https://avatars.githubusercontent.com/u/248585198?v=4",
        "gravatar_id": "",
        "url": "https://api.github.com/users/leejianwoo-collab",
        "type": "User",
        "user_view_type": "public",
        "site_admin": false
      },
      "body": "fix: correct margin calculation in DPO training\r\n\r\nRemove redundant dpo_beta multiplication in margin computation.\r\nThe chosen_rewards and rejected_rewards already include the dpo_beta\r\nfactor, so margin should not multiply by dpo_beta again.\r\n\r\nThis fix only affects logging metrics and does not impact training.\r\n\r\nFixes #21",
      "created_at": "2025-12-14T02:54:22Z",
      "updated_at": "2025-12-14T02:54:22Z",
      "closed_at": null,
      "merged_at": null,
      "merge_commit_sha": "63994c8060593fb3b03669ae9dac59cc0dc2c1cd",
      "assignee": null,
      "assignees": {},
      "requested_reviewers": {},
      "requested_teams": {},
      "labels": {},
      "milestone": null,
      "draft": false,
      "head": {
        "label": "leejianwoo-collab:tinker_issue03",
        "ref": "tinker_issue03",
        "sha": "cbfb9d078e78b231baebdce185d262c08d88fe2e",
        "user": {
          "login": "leejianwoo-collab",
          "id": 248585198,
          "node_id": "U_kgDODtEb7g",
          "avatar_url": "https://avatars.githubusercontent.com/u/248585198?v=4",
          "gravatar_id": "",
          "url": "https://api.github.com/users/leejianwoo-collab",
          "type": "User",
          "user_view_type": "public",
          "site_admin": false
        },
        "repo": {
          "id": 1115775500,
          "node_id": "R_kgDOQoFiDA",
          "name": "tinker-cookbook_woo",
          "full_name": "leejianwoo-collab/tinker-cookbook_woo",
          "private": false,
          "owner": {
            "login": "leejianwoo-collab",
            "id": 248585198,
            "node_id": "U_kgDODtEb7g",
            "avatar_url": "https://avatars.githubusercontent.com/u/248585198?v=4",
            "gravatar_id": "",
            "url": "https://api.github.com/users/leejianwoo-collab",
            "type": "User",
            "user_view_type": "public",
            "site_admin": false
          },
          "description": "Post-training with Tinker",
          "fork": true,
          "url": "https://api.github.com/repos/leejianwoo-collab/tinker-cookbook_woo",
          "created_at": "2025-12-13T14:28:36Z",
          "updated_at": "2025-12-13T14:28:36Z",
          "pushed_at": "2025-12-14T02:54:01Z",
          "homepage": null,
          "size": 931,
          "stargazers_count": 0,
          "watchers_count": 0,
          "language": null,
          "has_issues": false,
          "has_projects": true,
          "has_downloads": true,
          "has_wiki": true,
          "has_pages": false,
          "has_discussions": false,
          "forks_count": 0,
          "archived": false,
          "disabled": false,
          "open_issues_count": 0,
          "license": {
            "key": "apache-2.0",
            "name": "Apache License 2.0",
            "spdx_id": "Apache-2.0",
            "url": "https://api.github.com/licenses/apache-2.0",
            "node_id": "MDc6TGljZW5zZTI="
          },
          "allow_forking": true,
          "is_template": false,
          "web_commit_signoff_required": false,
          "topics": {},
          "visibility": "public",
          "forks": 0,
          "open_issues": 0,
          "watchers": 0,
          "default_branch": "main"
        }
      },
      "base": {
        "label": "thinking-machines-lab:main",
        "ref": "main",
        "sha": "281c82fc83e22317c4aa2b3dbcbb805a511ac573",
        "user": {
          "login": "thinking-machines-lab",
          "id": 220441404,
          "node_id": "O_kgDODSOrPA",
          "avatar_url": "https://avatars.githubusercontent.com/u/220441404?v=4",
          "gravatar_id": "",
          "url": "https://api.github.com/users/thinking-machines-lab",
          "type": "Organization",
          "user_view_type": "public",
          "site_admin": false
        },
        "repo": {
          "id": 1019793048,
          "node_id": "R_kgDOPMjOmA",
          "name": "tinker-cookbook",
          "full_name": "thinking-machines-lab/tinker-cookbook",
          "private": false,
          "owner": {
            "login": "thinking-machines-lab",
            "id": 220441404,
            "node_id": "O_kgDODSOrPA",
            "avatar_url": "https://avatars.githubusercontent.com/u/220441404?v=4",
            "gravatar_id": "",
            "url": "https://api.github.com/users/thinking-machines-lab",
            "type": "Organization",
            "user_view_type": "public",
            "site_admin": false
          },
          "description": "Post-training with Tinker",
          "fork": false,
          "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook",
          "created_at": "2025-07-14T22:10:00Z",
          "updated_at": "2025-12-16T03:13:15Z",
          "pushed_at": "2025-12-14T20:47:56Z",
          "homepage": null,
          "size": 1524,
          "stargazers_count": 2519,
          "watchers_count": 2519,
          "language": "Python",
          "has_issues": true,
          "has_projects": true,
          "has_downloads": true,
          "has_wiki": true,
          "has_pages": false,
          "has_discussions": true,
          "forks_count": 237,
          "archived": false,
          "disabled": false,
          "open_issues_count": 45,
          "license": {
            "key": "apache-2.0",
            "name": "Apache License 2.0",
            "spdx_id": "Apache-2.0",
            "url": "https://api.github.com/licenses/apache-2.0",
            "node_id": "MDc6TGljZW5zZTI="
          },
          "allow_forking": true,
          "is_template": false,
          "web_commit_signoff_required": false,
          "topics": {},
          "visibility": "public",
          "forks": 237,
          "open_issues": 45,
          "watchers": 2519,
          "default_branch": "main"
        }
      },
      "_links": {
        "self": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/172"
        },
        "html": {
          "href": "https://github.com/thinking-machines-lab/tinker-cookbook/pull/172"
        },
        "issue": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/172"
        },
        "comments": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/172/comments"
        },
        "review_comments": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/172/comments"
        },
        "review_comment": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/comments{/number}"
        },
        "commits": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/172/commits"
        },
        "statuses": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/statuses/cbfb9d078e78b231baebdce185d262c08d88fe2e"
        }
      },
      "author_association": "NONE",
      "auto_merge": null,
      "active_lock_reason": null,
      "linked_issues": [
        2,
        21
      ]
    },
    {
      "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/171",
      "id": 3099244781,
      "node_id": "PR_kwDOPMjOmM64urjt",
      "number": 171,
      "state": "open",
      "locked": false,
      "title": "fix: fix grammar in Search-R1 README",
      "user": {
        "login": "leejianwoo-collab",
        "id": 248585198,
        "node_id": "U_kgDODtEb7g",
        "avatar_url": "https://avatars.githubusercontent.com/u/248585198?v=4",
        "gravatar_id": "",
        "url": "https://api.github.com/users/leejianwoo-collab",
        "type": "User",
        "user_view_type": "public",
        "site_admin": false
      },
      "body": "Change \"also releases\" to \"also released\" to use correct past tense\r\nwhen describing already published research paper and data.\r\n\r\nFixes  #157 ",
      "created_at": "2025-12-14T02:33:10Z",
      "updated_at": "2025-12-14T02:33:10Z",
      "closed_at": null,
      "merged_at": null,
      "merge_commit_sha": "8df625c74dfbe01419d418ac991c894540c420ea",
      "assignee": null,
      "assignees": {},
      "requested_reviewers": {},
      "requested_teams": {},
      "labels": {},
      "milestone": null,
      "draft": false,
      "head": {
        "label": "leejianwoo-collab:tinker_iss02",
        "ref": "tinker_iss02",
        "sha": "80afacf6fd7f1d83c7cacb9bf91cb96aef6bbb55",
        "user": {
          "login": "leejianwoo-collab",
          "id": 248585198,
          "node_id": "U_kgDODtEb7g",
          "avatar_url": "https://avatars.githubusercontent.com/u/248585198?v=4",
          "gravatar_id": "",
          "url": "https://api.github.com/users/leejianwoo-collab",
          "type": "User",
          "user_view_type": "public",
          "site_admin": false
        },
        "repo": {
          "id": 1115775500,
          "node_id": "R_kgDOQoFiDA",
          "name": "tinker-cookbook_woo",
          "full_name": "leejianwoo-collab/tinker-cookbook_woo",
          "private": false,
          "owner": {
            "login": "leejianwoo-collab",
            "id": 248585198,
            "node_id": "U_kgDODtEb7g",
            "avatar_url": "https://avatars.githubusercontent.com/u/248585198?v=4",
            "gravatar_id": "",
            "url": "https://api.github.com/users/leejianwoo-collab",
            "type": "User",
            "user_view_type": "public",
            "site_admin": false
          },
          "description": "Post-training with Tinker",
          "fork": true,
          "url": "https://api.github.com/repos/leejianwoo-collab/tinker-cookbook_woo",
          "created_at": "2025-12-13T14:28:36Z",
          "updated_at": "2025-12-13T14:28:36Z",
          "pushed_at": "2025-12-14T02:54:01Z",
          "homepage": null,
          "size": 931,
          "stargazers_count": 0,
          "watchers_count": 0,
          "language": null,
          "has_issues": false,
          "has_projects": true,
          "has_downloads": true,
          "has_wiki": true,
          "has_pages": false,
          "has_discussions": false,
          "forks_count": 0,
          "archived": false,
          "disabled": false,
          "open_issues_count": 0,
          "license": {
            "key": "apache-2.0",
            "name": "Apache License 2.0",
            "spdx_id": "Apache-2.0",
            "url": "https://api.github.com/licenses/apache-2.0",
            "node_id": "MDc6TGljZW5zZTI="
          },
          "allow_forking": true,
          "is_template": false,
          "web_commit_signoff_required": false,
          "topics": {},
          "visibility": "public",
          "forks": 0,
          "open_issues": 0,
          "watchers": 0,
          "default_branch": "main"
        }
      },
      "base": {
        "label": "thinking-machines-lab:main",
        "ref": "main",
        "sha": "281c82fc83e22317c4aa2b3dbcbb805a511ac573",
        "user": {
          "login": "thinking-machines-lab",
          "id": 220441404,
          "node_id": "O_kgDODSOrPA",
          "avatar_url": "https://avatars.githubusercontent.com/u/220441404?v=4",
          "gravatar_id": "",
          "url": "https://api.github.com/users/thinking-machines-lab",
          "type": "Organization",
          "user_view_type": "public",
          "site_admin": false
        },
        "repo": {
          "id": 1019793048,
          "node_id": "R_kgDOPMjOmA",
          "name": "tinker-cookbook",
          "full_name": "thinking-machines-lab/tinker-cookbook",
          "private": false,
          "owner": {
            "login": "thinking-machines-lab",
            "id": 220441404,
            "node_id": "O_kgDODSOrPA",
            "avatar_url": "https://avatars.githubusercontent.com/u/220441404?v=4",
            "gravatar_id": "",
            "url": "https://api.github.com/users/thinking-machines-lab",
            "type": "Organization",
            "user_view_type": "public",
            "site_admin": false
          },
          "description": "Post-training with Tinker",
          "fork": false,
          "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook",
          "created_at": "2025-07-14T22:10:00Z",
          "updated_at": "2025-12-16T03:13:15Z",
          "pushed_at": "2025-12-14T20:47:56Z",
          "homepage": null,
          "size": 1524,
          "stargazers_count": 2519,
          "watchers_count": 2519,
          "language": "Python",
          "has_issues": true,
          "has_projects": true,
          "has_downloads": true,
          "has_wiki": true,
          "has_pages": false,
          "has_discussions": true,
          "forks_count": 237,
          "archived": false,
          "disabled": false,
          "open_issues_count": 45,
          "license": {
            "key": "apache-2.0",
            "name": "Apache License 2.0",
            "spdx_id": "Apache-2.0",
            "url": "https://api.github.com/licenses/apache-2.0",
            "node_id": "MDc6TGljZW5zZTI="
          },
          "allow_forking": true,
          "is_template": false,
          "web_commit_signoff_required": false,
          "topics": {},
          "visibility": "public",
          "forks": 237,
          "open_issues": 45,
          "watchers": 2519,
          "default_branch": "main"
        }
      },
      "_links": {
        "self": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/171"
        },
        "html": {
          "href": "https://github.com/thinking-machines-lab/tinker-cookbook/pull/171"
        },
        "issue": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/171"
        },
        "comments": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/171/comments"
        },
        "review_comments": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/171/comments"
        },
        "review_comment": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/comments{/number}"
        },
        "commits": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/171/commits"
        },
        "statuses": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/statuses/80afacf6fd7f1d83c7cacb9bf91cb96aef6bbb55"
        }
      },
      "author_association": "NONE",
      "auto_merge": null,
      "active_lock_reason": null,
      "linked_issues": [
        1
      ]
    },
    {
      "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/169",
      "id": 3099241462,
      "node_id": "PR_kwDOPMjOmM64uqv2",
      "number": 169,
      "state": "open",
      "locked": false,
      "title": "fix: correct pig latin training examples for proper consonant cluster…",
      "user": {
        "login": "leejianwoo-collab",
        "id": 248585198,
        "node_id": "U_kgDODtEb7g",
        "avatar_url": "https://avatars.githubusercontent.com/u/248585198?v=4",
        "gravatar_id": "",
        "url": "https://api.github.com/users/leejianwoo-collab",
        "type": "User",
        "user_view_type": "public",
        "site_admin": false
      },
      "body": "fix: #165 \r\ncorrect pig latin training examples for proper consonant cluster handling\r\n\r\n- Fix \"split\" → \"it-splay\" (move entire \"spl\" consonant cluster)\r\n- Fix \"quantum\" → \"antum-quay\" (treat \"qu\" as single phonetic unit)\r\n- Resolves inconsistent phonological rules in training data\r\n- Addresses Issue #165: flawed tinker code for pig latin\r\n\r\nThis ensures the model learns consistent linguistic patterns instead of\r\ncontradictory character-level rules that lead to training confusion.",
      "created_at": "2025-12-14T02:25:23Z",
      "updated_at": "2025-12-14T02:27:04Z",
      "closed_at": null,
      "merged_at": null,
      "merge_commit_sha": "f32d85470ae19d358ab9863405d1561ebbb2c497",
      "assignee": null,
      "assignees": {},
      "requested_reviewers": {},
      "requested_teams": {},
      "labels": {},
      "milestone": null,
      "draft": false,
      "head": {
        "label": "leejianwoo-collab:tinker_issue01",
        "ref": "tinker_issue01",
        "sha": "f920010f8674710a66d82904363b474f7b752907",
        "user": {
          "login": "leejianwoo-collab",
          "id": 248585198,
          "node_id": "U_kgDODtEb7g",
          "avatar_url": "https://avatars.githubusercontent.com/u/248585198?v=4",
          "gravatar_id": "",
          "url": "https://api.github.com/users/leejianwoo-collab",
          "type": "User",
          "user_view_type": "public",
          "site_admin": false
        },
        "repo": {
          "id": 1115775500,
          "node_id": "R_kgDOQoFiDA",
          "name": "tinker-cookbook_woo",
          "full_name": "leejianwoo-collab/tinker-cookbook_woo",
          "private": false,
          "owner": {
            "login": "leejianwoo-collab",
            "id": 248585198,
            "node_id": "U_kgDODtEb7g",
            "avatar_url": "https://avatars.githubusercontent.com/u/248585198?v=4",
            "gravatar_id": "",
            "url": "https://api.github.com/users/leejianwoo-collab",
            "type": "User",
            "user_view_type": "public",
            "site_admin": false
          },
          "description": "Post-training with Tinker",
          "fork": true,
          "url": "https://api.github.com/repos/leejianwoo-collab/tinker-cookbook_woo",
          "created_at": "2025-12-13T14:28:36Z",
          "updated_at": "2025-12-13T14:28:36Z",
          "pushed_at": "2025-12-14T02:54:01Z",
          "homepage": null,
          "size": 931,
          "stargazers_count": 0,
          "watchers_count": 0,
          "language": null,
          "has_issues": false,
          "has_projects": true,
          "has_downloads": true,
          "has_wiki": true,
          "has_pages": false,
          "has_discussions": false,
          "forks_count": 0,
          "archived": false,
          "disabled": false,
          "open_issues_count": 0,
          "license": {
            "key": "apache-2.0",
            "name": "Apache License 2.0",
            "spdx_id": "Apache-2.0",
            "url": "https://api.github.com/licenses/apache-2.0",
            "node_id": "MDc6TGljZW5zZTI="
          },
          "allow_forking": true,
          "is_template": false,
          "web_commit_signoff_required": false,
          "topics": {},
          "visibility": "public",
          "forks": 0,
          "open_issues": 0,
          "watchers": 0,
          "default_branch": "main"
        }
      },
      "base": {
        "label": "thinking-machines-lab:main",
        "ref": "main",
        "sha": "281c82fc83e22317c4aa2b3dbcbb805a511ac573",
        "user": {
          "login": "thinking-machines-lab",
          "id": 220441404,
          "node_id": "O_kgDODSOrPA",
          "avatar_url": "https://avatars.githubusercontent.com/u/220441404?v=4",
          "gravatar_id": "",
          "url": "https://api.github.com/users/thinking-machines-lab",
          "type": "Organization",
          "user_view_type": "public",
          "site_admin": false
        },
        "repo": {
          "id": 1019793048,
          "node_id": "R_kgDOPMjOmA",
          "name": "tinker-cookbook",
          "full_name": "thinking-machines-lab/tinker-cookbook",
          "private": false,
          "owner": {
            "login": "thinking-machines-lab",
            "id": 220441404,
            "node_id": "O_kgDODSOrPA",
            "avatar_url": "https://avatars.githubusercontent.com/u/220441404?v=4",
            "gravatar_id": "",
            "url": "https://api.github.com/users/thinking-machines-lab",
            "type": "Organization",
            "user_view_type": "public",
            "site_admin": false
          },
          "description": "Post-training with Tinker",
          "fork": false,
          "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook",
          "created_at": "2025-07-14T22:10:00Z",
          "updated_at": "2025-12-16T03:13:15Z",
          "pushed_at": "2025-12-14T20:47:56Z",
          "homepage": null,
          "size": 1524,
          "stargazers_count": 2519,
          "watchers_count": 2519,
          "language": "Python",
          "has_issues": true,
          "has_projects": true,
          "has_downloads": true,
          "has_wiki": true,
          "has_pages": false,
          "has_discussions": true,
          "forks_count": 237,
          "archived": false,
          "disabled": false,
          "open_issues_count": 45,
          "license": {
            "key": "apache-2.0",
            "name": "Apache License 2.0",
            "spdx_id": "Apache-2.0",
            "url": "https://api.github.com/licenses/apache-2.0",
            "node_id": "MDc6TGljZW5zZTI="
          },
          "allow_forking": true,
          "is_template": false,
          "web_commit_signoff_required": false,
          "topics": {},
          "visibility": "public",
          "forks": 237,
          "open_issues": 45,
          "watchers": 2519,
          "default_branch": "main"
        }
      },
      "_links": {
        "self": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/169"
        },
        "html": {
          "href": "https://github.com/thinking-machines-lab/tinker-cookbook/pull/169"
        },
        "issue": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/169"
        },
        "comments": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/169/comments"
        },
        "review_comments": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/169/comments"
        },
        "review_comment": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/comments{/number}"
        },
        "commits": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/169/commits"
        },
        "statuses": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/statuses/f920010f8674710a66d82904363b474f7b752907"
        }
      },
      "author_association": "NONE",
      "auto_merge": null,
      "active_lock_reason": null,
      "linked_issues": [
        1
      ]
    },
    {
      "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/168",
      "id": 3099184353,
      "node_id": "PR_kwDOPMjOmM64uczh",
      "number": 168,
      "state": "open",
      "locked": false,
      "title": "fix(wandb): allow config value changes on resume",
      "user": {
        "login": "piotrgoral",
        "id": 44177888,
        "node_id": "MDQ6VXNlcjQ0MTc3ODg4",
        "avatar_url": "https://avatars.githubusercontent.com/u/44177888?v=4",
        "gravatar_id": "",
        "url": "https://api.github.com/users/piotrgoral",
        "type": "User",
        "user_view_type": "public",
        "site_admin": false
      },
      "body": "I faced wandb error when I tried to run `python -m tinker_cookbook.recipes.preference.shorter.train`\r\nAlso it's related to the https://github.com/thinking-machines-lab/tinker-cookbook/issues/63\r\n\r\nFix mighr be `allow_val_change=True`\r\n\r\n> wandb: ERROR Attempted to change value of key \"dataset_builder\" from {'comparison_builder': {'swap': False}, 'batch_size': 32, 'policy_renderer_name': 'qwen3_instruct', 'policy_model_name': 'Qwen/Qwen3-4B-Instruct-2507', 'tournament_pattern': 'ALL_PAIRS_BOTH_WAYS', 'group_size': 16, 'content_preprocessor': None} to {'comparison_builder': {'swap': False}, 'batch_size': 32, 'policy_renderer_name': 'qwen3_instruct', 'policy_model_name': 'Qwen/Qwen3-4B-Instruct-2507', 'tournament_pattern': 'ALL_PAIRS_BOTH_WAYS', 'group_size': 16, 'content_preprocessor': None, 'preference_model_builder': {}}\r\n> wandb: ERROR If you really want to do this, pass allow_val_change=True to config.update()\r\n> Traceback (most recent call last):\r\n>   File \"<frozen runpy>\", line 198, in _run_module_as_main\r\n>   File \"<frozen runpy>\", line 88, in _run_code\r\n>   File \"/Users/piotr/Desktop/explore/tinker/tinker-cookbook/tinker_cookbook/recipes/preference/shorter/train.py\", line 48, in <module>\r\n>     main()\r\n>     ~~~~^^\r\n>   File \"/Users/piotr/Desktop/explore/tinker/tinker-cookbook/tinker_cookbook/recipes/preference/shorter/train.py\", line 44, in main\r\n>     asyncio.run(train.main(config))\r\n>     ~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^\r\n>   File \"/Users/piotr/miniconda3/lib/python3.13/asyncio/runners.py\", line 195, in run\r\n>     return runner.run(main)\r\n>            ~~~~~~~~~~^^^^^^\r\n>   File \"/Users/piotr/miniconda3/lib/python3.13/asyncio/runners.py\", line 118, in run\r\n>     return self._loop.run_until_complete(task)\r\n>            ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^\r\n>   File \"/Users/piotr/miniconda3/lib/python3.13/asyncio/base_events.py\", line 725, in run_until_complete\r\n>     return future.result()\r\n>            ~~~~~~~~~~~~~^^\r\n>   File \"/Users/piotr/Desktop/explore/tinker/tinker-cookbook/tinker_cookbook/utils/trace.py\", line 332, in async_wrapper\r\n>     return await func(*args, **kwargs)\r\n>            ^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n>   File \"/Users/piotr/Desktop/explore/tinker/tinker-cookbook/tinker_cookbook/rl/train.py\", line 1102, in main\r\n>     ml_logger = ml_log.setup_logging(\r\n>         log_dir=cfg.log_path,\r\n>     ...<2 lines>...\r\n>         wandb_name=cfg.wandb_name,\r\n>     )\r\n>   File \"/Users/piotr/Desktop/explore/tinker/tinker-cookbook/tinker_cookbook/utils/ml_log.py\", line 470, in setup_logging\r\n>     ml_logger.log_hparams(config)\r\n>     ~~~~~~~~~~~~~~~~~~~~~^^^^^^^^\r\n>   File \"/Users/piotr/Desktop/explore/tinker/tinker-cookbook/tinker_cookbook/utils/ml_log.py\", line 349, in log_hparams\r\n>     logger.log_hparams(config)\r\n>     ~~~~~~~~~~~~~~~~~~^^^^^^^^\r\n>   File \"/Users/piotr/Desktop/explore/tinker/tinker-cookbook/tinker_cookbook/utils/ml_log.py\", line 230, in log_hparams\r\n>     wandb.config.update(dump_config(config))\r\n>     ~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^\r\n>   File \"/Users/piotr/Desktop/explore/tinker/tinker-cookbook/.venv/lib/python3.13/site-packages/wandb/sdk/wandb_config.py\", line 187, in update\r\n>     sanitized = self._update(d, allow_val_change)\r\n>   File \"/Users/piotr/Desktop/explore/tinker/tinker-cookbook/.venv/lib/python3.13/site-packages/wandb/sdk/wandb_config.py\", line 180, in _update\r\n>     sanitized = self._sanitize_dict(\r\n>         parsed_dict, allow_val_change, ignore_keys=locked_keys\r\n>     )\r\n>   File \"/Users/piotr/Desktop/explore/tinker/tinker-cookbook/.venv/lib/python3.13/site-packages/wandb/sdk/wandb_config.py\", line 267, in _sanitize_dict\r\n>     k, v = self._sanitize(k, v, allow_val_change)\r\n>            ~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^\r\n>   File \"/Users/piotr/Desktop/explore/tinker/tinker-cookbook/.venv/lib/python3.13/site-packages/wandb/sdk/wandb_config.py\", line 288, in _sanitize\r\n>     raise config_util.ConfigError(\r\n>     ...<4 lines>...\r\n>     )\r\n> wandb.sdk.lib.config_util.ConfigError: Attempted to change value of key \"dataset_builder\" from {'comparison_builder': {'swap': False}, 'batch_size': 32, 'policy_renderer_name': 'qwen3_instruct', 'policy_model_name': 'Qwen/Qwen3-4B-Instruct-2507', 'tournament_pattern': 'ALL_PAIRS_BOTH_WAYS', 'group_size': 16, 'content_preprocessor': None} to {'comparison_builder': {'swap': False}, 'batch_size': 32, 'policy_renderer_name': 'qwen3_instruct', 'policy_model_name': 'Qwen/Qwen3-4B-Instruct-2507', 'tournament_pattern': 'ALL_PAIRS_BOTH_WAYS', 'group_size': 16, 'content_preprocessor': None, 'preference_model_builder': {}}\r\n> If you really want to do this, pass allow_val_change=True to config.update()\r\n> wandb: \r\n> wandb: 🚀 View run tinker-examples/shorter at: \r\n> wandb: Find logs at: ../../../../../../tmp/tinker-examples/shorter/1765671682/wandb/run-20251213_192123-9sjlhye7/logs",
      "created_at": "2025-12-14T00:50:35Z",
      "updated_at": "2025-12-14T00:50:35Z",
      "closed_at": null,
      "merged_at": null,
      "merge_commit_sha": "2ed8aff3ae14136e402a7bcd9eb35de81ccca7c1",
      "assignee": null,
      "assignees": {},
      "requested_reviewers": {},
      "requested_teams": {},
      "labels": {},
      "milestone": null,
      "draft": false,
      "head": {
        "label": "piotrgoral:fix/wandb-config-allow-val-change",
        "ref": "fix/wandb-config-allow-val-change",
        "sha": "02e96a4977f2e26fa5224918f615edf0a8b09401",
        "user": {
          "login": "piotrgoral",
          "id": 44177888,
          "node_id": "MDQ6VXNlcjQ0MTc3ODg4",
          "avatar_url": "https://avatars.githubusercontent.com/u/44177888?v=4",
          "gravatar_id": "",
          "url": "https://api.github.com/users/piotrgoral",
          "type": "User",
          "user_view_type": "public",
          "site_admin": false
        },
        "repo": {
          "id": 1115988461,
          "node_id": "R_kgDOQoSh7Q",
          "name": "tinker-cookbook",
          "full_name": "piotrgoral/tinker-cookbook",
          "private": false,
          "owner": {
            "login": "piotrgoral",
            "id": 44177888,
            "node_id": "MDQ6VXNlcjQ0MTc3ODg4",
            "avatar_url": "https://avatars.githubusercontent.com/u/44177888?v=4",
            "gravatar_id": "",
            "url": "https://api.github.com/users/piotrgoral",
            "type": "User",
            "user_view_type": "public",
            "site_admin": false
          },
          "description": "Post-training with Tinker",
          "fork": true,
          "url": "https://api.github.com/repos/piotrgoral/tinker-cookbook",
          "created_at": "2025-12-14T00:46:41Z",
          "updated_at": "2025-12-14T00:46:41Z",
          "pushed_at": "2025-12-14T00:47:24Z",
          "homepage": null,
          "size": 932,
          "stargazers_count": 0,
          "watchers_count": 0,
          "language": null,
          "has_issues": false,
          "has_projects": true,
          "has_downloads": true,
          "has_wiki": true,
          "has_pages": false,
          "has_discussions": false,
          "forks_count": 0,
          "archived": false,
          "disabled": false,
          "open_issues_count": 0,
          "license": {
            "key": "apache-2.0",
            "name": "Apache License 2.0",
            "spdx_id": "Apache-2.0",
            "url": "https://api.github.com/licenses/apache-2.0",
            "node_id": "MDc6TGljZW5zZTI="
          },
          "allow_forking": true,
          "is_template": false,
          "web_commit_signoff_required": false,
          "topics": {},
          "visibility": "public",
          "forks": 0,
          "open_issues": 0,
          "watchers": 0,
          "default_branch": "main"
        }
      },
      "base": {
        "label": "thinking-machines-lab:main",
        "ref": "main",
        "sha": "281c82fc83e22317c4aa2b3dbcbb805a511ac573",
        "user": {
          "login": "thinking-machines-lab",
          "id": 220441404,
          "node_id": "O_kgDODSOrPA",
          "avatar_url": "https://avatars.githubusercontent.com/u/220441404?v=4",
          "gravatar_id": "",
          "url": "https://api.github.com/users/thinking-machines-lab",
          "type": "Organization",
          "user_view_type": "public",
          "site_admin": false
        },
        "repo": {
          "id": 1019793048,
          "node_id": "R_kgDOPMjOmA",
          "name": "tinker-cookbook",
          "full_name": "thinking-machines-lab/tinker-cookbook",
          "private": false,
          "owner": {
            "login": "thinking-machines-lab",
            "id": 220441404,
            "node_id": "O_kgDODSOrPA",
            "avatar_url": "https://avatars.githubusercontent.com/u/220441404?v=4",
            "gravatar_id": "",
            "url": "https://api.github.com/users/thinking-machines-lab",
            "type": "Organization",
            "user_view_type": "public",
            "site_admin": false
          },
          "description": "Post-training with Tinker",
          "fork": false,
          "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook",
          "created_at": "2025-07-14T22:10:00Z",
          "updated_at": "2025-12-16T03:13:15Z",
          "pushed_at": "2025-12-14T20:47:56Z",
          "homepage": null,
          "size": 1524,
          "stargazers_count": 2519,
          "watchers_count": 2519,
          "language": "Python",
          "has_issues": true,
          "has_projects": true,
          "has_downloads": true,
          "has_wiki": true,
          "has_pages": false,
          "has_discussions": true,
          "forks_count": 237,
          "archived": false,
          "disabled": false,
          "open_issues_count": 45,
          "license": {
            "key": "apache-2.0",
            "name": "Apache License 2.0",
            "spdx_id": "Apache-2.0",
            "url": "https://api.github.com/licenses/apache-2.0",
            "node_id": "MDc6TGljZW5zZTI="
          },
          "allow_forking": true,
          "is_template": false,
          "web_commit_signoff_required": false,
          "topics": {},
          "visibility": "public",
          "forks": 237,
          "open_issues": 45,
          "watchers": 2519,
          "default_branch": "main"
        }
      },
      "_links": {
        "self": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/168"
        },
        "html": {
          "href": "https://github.com/thinking-machines-lab/tinker-cookbook/pull/168"
        },
        "issue": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/168"
        },
        "comments": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/168/comments"
        },
        "review_comments": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/168/comments"
        },
        "review_comment": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/comments{/number}"
        },
        "commits": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/168/commits"
        },
        "statuses": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/statuses/02e96a4977f2e26fa5224918f615edf0a8b09401"
        }
      },
      "author_association": "NONE",
      "auto_merge": null,
      "active_lock_reason": null,
      "linked_issues": []
    },
    {
      "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/166",
      "id": 3098916750,
      "node_id": "PR_kwDOPMjOmM64tbeO",
      "number": 166,
      "state": "open",
      "locked": false,
      "title": "[Cookbook Recipe of Using Rubric]",
      "user": {
        "login": "ruiqi-zhong",
        "id": 22912421,
        "node_id": "MDQ6VXNlcjIyOTEyNDIx",
        "avatar_url": "https://avatars.githubusercontent.com/u/22912421?v=4",
        "gravatar_id": "",
        "url": "https://api.github.com/users/ruiqi-zhong",
        "type": "User",
        "user_view_type": "public",
        "site_admin": false
      },
      "body": "# Rubric-based Grading for LLMs\r\n\r\n- [`data.py`](./data.py) contains the definition for the datapoint class. Each datapoint consists of a conversation prefix and a list of rubric items.\r\n- [`generate_data.py`](./generate_data.py) generates some example datapoint if you want to run our demo on addition.\r\n- [`env.py`](./env.py) determines what each rollout will do. It will let the policy read the prefix, generate a response, ask a grader LLM to grade based on a list of rubric items, and finally provide a reward by summing the response of each grader.\r\n- [`train.py`](./train.py) allows you train LLMs on any dataset saved in our format (specified in `data.py`). The default script will train on the addition task, whose data is generated by `generate_data.py`.\r\n- [`prometheus_experimental.py`](./prometheus_experimental.py) contains a script to train the LLMs based on the rubrics from the [`prometheus-eval/Feedback-Collection`](https://huggingface.co/datasets/prometheus-eval/Feedback-Collection/viewer/default/train?row=0&views%5B%5D=train) dataset. It is experimental though -- even though the reward goes up, there is no guarantee that the model is actually better. We hope our script serves as a starting point, and more research is needed.\r\n\r\n\r\n## A simple example of using a grader LLM with rubrics\r\n\r\nWe show how to use rubric-based LLM to provide reward on an addition task. E.g.\r\n\r\n```\r\n**User**: What's 233 + 100?\r\n**Assistant**: 333\r\n```\r\n\r\nUsually, this could be graded by matching the number to the ground truth 333 without needing an LLLM. However, for pedagogical purposes we will grade the response using a language model with rubric. I.e. We will ask a language mode \"Does the assistant answer 333?\"\r\n\r\n### Generate an example dataset\r\n\r\nTo run this, first generate a dataset:\r\n\r\n```\r\npython -m tinker_cookbook.recipes.rubric.generate_data\r\n```\r\n\r\nThen you will see two `jsonl` file generated, one for training, one for testing. For example, if you look into ` tinker_cookbook/example_data/example_rubric_train.jsonl`, each datapoint consists of\r\n- a convo (the conversation prefix that the policy sees)\r\n- rubric_items: a list of rubric items that specify what is a good item, how the grader should format the response, and how the grading result should be extracted.\r\n\r\n```\r\n{\r\n  \"convo\": [\r\n    {\r\n      \"role\": \"user\",\r\n      \"content\": \"What is 4 + 5?\"\r\n    },\r\n    {\r\n      \"role\": \"assistant\",\r\n      \"content\": \"9\"\r\n    },\r\n    {\r\n      \"role\": \"user\",\r\n      \"content\": \"What is 122 + 12?\"\r\n    }\r\n  ],\r\n  \"rubric_items\": [\r\n    {\r\n      \"rubric_str\": \"Does the chatbot correctly gets the answer 134?\",\r\n      \"extraction_regex\": \"<score>(.*)</score>\",\r\n      \"grader_output_format_instruction\": \"Please output your score between 0 and 1 wrapped in <score> ... </score>\"\r\n    }\r\n  ]\r\n}\r\n```\r\n\r\n### Debugging and Printing What Happens During Rollouts\r\n\r\nRun\r\n```\r\npython -m tinker_cookbook.recipes.rubric.debug_env\r\n```\r\n\r\nYou can see the message that the policy sees, its response, the grader input, and the grader output.\r\n\r\n<img width=\"1168\" height=\"771\" alt=\"image\" src=\"https://github.com/user-attachments/assets/9f4e3c89-f21e-49b0-96d6-e2f27bd21b43\" />\r\n\r\n\r\n### An example training run\r\n\r\nTo train the LLM to add with a rubric-based LLM, run\r\n```\r\npython -m tinker_cookbook.recipes.rubric.train\r\n```\r\n\r\nYou can see the reward quickly goes up.\r\n\r\n<img width=\"705\" height=\"279\" alt=\"image\" src=\"https://github.com/user-attachments/assets/2f825805-20a7-4cf3-8d06-55d5e9a98098\" />\r\n\r\n### A more realistic dataset\r\n\r\nWe take the `prometheus-eval/Feedback-Collection` dataset from [hugingface](https://huggingface.co/datasets/prometheus-eval/Feedback-Collection/), which contains rubric to grade general chat responses. Run the following to kick off training:\r\n\r\n```\r\npython -m tinker_cookbook.recipes.rubric.prometheus_experimental\r\n```\r\n\r\nWe can see that the reward climbs up steadily.\r\n\r\n<img width=\"1086\" height=\"514\" alt=\"image\" src=\"https://github.com/user-attachments/assets/8877ea6c-b9ea-46da-b995-046bbd3e7c80\" />\r\n\r\nNote that this training recipe is experimental -- to make the performance better we may need to fine-tune the grader LLM as well. We hope our code serves as a starting for you to improve rubric-based grading for training LLMs!\r\n",
      "created_at": "2025-12-13T18:11:25Z",
      "updated_at": "2025-12-14T00:11:04Z",
      "closed_at": null,
      "merged_at": null,
      "merge_commit_sha": "891be75a1a003362c5935dfa259e6189c761281f",
      "assignee": null,
      "assignees": {},
      "requested_reviewers": {
        "0": {
          "login": "joschu",
          "id": 144178,
          "node_id": "MDQ6VXNlcjE0NDE3OA==",
          "avatar_url": "https://avatars.githubusercontent.com/u/144178?v=4",
          "gravatar_id": "",
          "url": "https://api.github.com/users/joschu",
          "type": "User",
          "user_view_type": "public",
          "site_admin": false
        },
        "1": {
          "login": "rafailovtm",
          "id": 203880604,
          "node_id": "U_kgDODCb4nA",
          "avatar_url": "https://avatars.githubusercontent.com/u/203880604?v=4",
          "gravatar_id": "",
          "url": "https://api.github.com/users/rafailovtm",
          "type": "User",
          "user_view_type": "public",
          "site_admin": false
        }
      },
      "requested_teams": {},
      "labels": {},
      "milestone": null,
      "draft": false,
      "head": {
        "label": "thinking-machines-lab:rubric_example",
        "ref": "rubric_example",
        "sha": "07a54efcc2a25b285d44a22a9e191e57a7a0eb24",
        "user": {
          "login": "thinking-machines-lab",
          "id": 220441404,
          "node_id": "O_kgDODSOrPA",
          "avatar_url": "https://avatars.githubusercontent.com/u/220441404?v=4",
          "gravatar_id": "",
          "url": "https://api.github.com/users/thinking-machines-lab",
          "type": "Organization",
          "user_view_type": "public",
          "site_admin": false
        },
        "repo": {
          "id": 1019793048,
          "node_id": "R_kgDOPMjOmA",
          "name": "tinker-cookbook",
          "full_name": "thinking-machines-lab/tinker-cookbook",
          "private": false,
          "owner": {
            "login": "thinking-machines-lab",
            "id": 220441404,
            "node_id": "O_kgDODSOrPA",
            "avatar_url": "https://avatars.githubusercontent.com/u/220441404?v=4",
            "gravatar_id": "",
            "url": "https://api.github.com/users/thinking-machines-lab",
            "type": "Organization",
            "user_view_type": "public",
            "site_admin": false
          },
          "description": "Post-training with Tinker",
          "fork": false,
          "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook",
          "created_at": "2025-07-14T22:10:00Z",
          "updated_at": "2025-12-16T03:13:15Z",
          "pushed_at": "2025-12-14T20:47:56Z",
          "homepage": null,
          "size": 1524,
          "stargazers_count": 2519,
          "watchers_count": 2519,
          "language": "Python",
          "has_issues": true,
          "has_projects": true,
          "has_downloads": true,
          "has_wiki": true,
          "has_pages": false,
          "has_discussions": true,
          "forks_count": 237,
          "archived": false,
          "disabled": false,
          "open_issues_count": 45,
          "license": {
            "key": "apache-2.0",
            "name": "Apache License 2.0",
            "spdx_id": "Apache-2.0",
            "url": "https://api.github.com/licenses/apache-2.0",
            "node_id": "MDc6TGljZW5zZTI="
          },
          "allow_forking": true,
          "is_template": false,
          "web_commit_signoff_required": false,
          "topics": {},
          "visibility": "public",
          "forks": 237,
          "open_issues": 45,
          "watchers": 2519,
          "default_branch": "main"
        }
      },
      "base": {
        "label": "thinking-machines-lab:main",
        "ref": "main",
        "sha": "281c82fc83e22317c4aa2b3dbcbb805a511ac573",
        "user": {
          "login": "thinking-machines-lab",
          "id": 220441404,
          "node_id": "O_kgDODSOrPA",
          "avatar_url": "https://avatars.githubusercontent.com/u/220441404?v=4",
          "gravatar_id": "",
          "url": "https://api.github.com/users/thinking-machines-lab",
          "type": "Organization",
          "user_view_type": "public",
          "site_admin": false
        },
        "repo": {
          "id": 1019793048,
          "node_id": "R_kgDOPMjOmA",
          "name": "tinker-cookbook",
          "full_name": "thinking-machines-lab/tinker-cookbook",
          "private": false,
          "owner": {
            "login": "thinking-machines-lab",
            "id": 220441404,
            "node_id": "O_kgDODSOrPA",
            "avatar_url": "https://avatars.githubusercontent.com/u/220441404?v=4",
            "gravatar_id": "",
            "url": "https://api.github.com/users/thinking-machines-lab",
            "type": "Organization",
            "user_view_type": "public",
            "site_admin": false
          },
          "description": "Post-training with Tinker",
          "fork": false,
          "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook",
          "created_at": "2025-07-14T22:10:00Z",
          "updated_at": "2025-12-16T03:13:15Z",
          "pushed_at": "2025-12-14T20:47:56Z",
          "homepage": null,
          "size": 1524,
          "stargazers_count": 2519,
          "watchers_count": 2519,
          "language": "Python",
          "has_issues": true,
          "has_projects": true,
          "has_downloads": true,
          "has_wiki": true,
          "has_pages": false,
          "has_discussions": true,
          "forks_count": 237,
          "archived": false,
          "disabled": false,
          "open_issues_count": 45,
          "license": {
            "key": "apache-2.0",
            "name": "Apache License 2.0",
            "spdx_id": "Apache-2.0",
            "url": "https://api.github.com/licenses/apache-2.0",
            "node_id": "MDc6TGljZW5zZTI="
          },
          "allow_forking": true,
          "is_template": false,
          "web_commit_signoff_required": false,
          "topics": {},
          "visibility": "public",
          "forks": 237,
          "open_issues": 45,
          "watchers": 2519,
          "default_branch": "main"
        }
      },
      "_links": {
        "self": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/166"
        },
        "html": {
          "href": "https://github.com/thinking-machines-lab/tinker-cookbook/pull/166"
        },
        "issue": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/166"
        },
        "comments": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/166/comments"
        },
        "review_comments": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/166/comments"
        },
        "review_comment": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/comments{/number}"
        },
        "commits": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/166/commits"
        },
        "statuses": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/statuses/07a54efcc2a25b285d44a22a9e191e57a7a0eb24"
        }
      },
      "author_association": "CONTRIBUTOR",
      "auto_merge": null,
      "active_lock_reason": null,
      "linked_issues": []
    },
    {
      "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/164",
      "id": 3097878725,
      "node_id": "PR_kwDOPMjOmM64peDF",
      "number": 164,
      "state": "open",
      "locked": false,
      "title": "Add pillow to pyproject.toml dependencies",
      "user": {
        "login": "Copilot",
        "id": 198982749,
        "node_id": "BOT_kgDOC9w8XQ",
        "avatar_url": "https://avatars.githubusercontent.com/in/1143301?v=4",
        "gravatar_id": "",
        "url": "https://api.github.com/users/Copilot",
        "type": "Bot",
        "user_view_type": "public",
        "site_admin": false
      },
      "body": "Multiple core modules import `PIL` but `pillow` wasn't declared as a dependency, causing import failures for fresh installs.\n\n### Changes\n- Added `pillow` to the dependencies list in `pyproject.toml`\n\n### Affected modules\n- `tinker_cookbook/image_processing_utils.py`\n- `tinker_cookbook/renderers.py`\n- `tinker_cookbook/recipes/vlm_classifier/data.py`\n- `tinker_cookbook/recipes/vlm_classifier/eval.py`\n\n<!-- START COPILOT CODING AGENT SUFFIX -->\n\n\n\n<!-- START COPILOT ORIGINAL PROMPT -->\n\n\n\n<details>\n\n<summary>Original prompt</summary>\n\n> Do we need to add pillow to pyproject in tinker-cookbook after the PR I landed? if so let's do it\n\n\n</details>\n\n\n\n<!-- START COPILOT CODING AGENT TIPS -->\n---\n\n💬 We'd love your input! Share your thoughts on Copilot coding agent in our [2 minute survey](https://gh.io/copilot-coding-agent-survey).\n",
      "created_at": "2025-12-12T21:55:16Z",
      "updated_at": "2025-12-12T22:17:34Z",
      "closed_at": null,
      "merged_at": null,
      "merge_commit_sha": "490b50c0603fb7aeb656f6cae9ff1ae405c08a4b",
      "assignee": null,
      "assignees": {
        "0": {
          "login": "rowanz",
          "id": 1411527,
          "node_id": "MDQ6VXNlcjE0MTE1Mjc=",
          "avatar_url": "https://avatars.githubusercontent.com/u/1411527?v=4",
          "gravatar_id": "",
          "url": "https://api.github.com/users/rowanz",
          "type": "User",
          "user_view_type": "public",
          "site_admin": false
        },
        "1": {
          "login": "Copilot",
          "id": 198982749,
          "node_id": "BOT_kgDOC9w8XQ",
          "avatar_url": "https://avatars.githubusercontent.com/in/1143301?v=4",
          "gravatar_id": "",
          "url": "https://api.github.com/users/Copilot",
          "type": "Bot",
          "user_view_type": "public",
          "site_admin": false
        }
      },
      "requested_reviewers": {},
      "requested_teams": {},
      "labels": {},
      "milestone": null,
      "draft": false,
      "head": {
        "label": "thinking-machines-lab:copilot/add-pillow-to-pyproject",
        "ref": "copilot/add-pillow-to-pyproject",
        "sha": "2cbad7f20333a0bfcfc17d124abec0ca83d7c010",
        "user": {
          "login": "thinking-machines-lab",
          "id": 220441404,
          "node_id": "O_kgDODSOrPA",
          "avatar_url": "https://avatars.githubusercontent.com/u/220441404?v=4",
          "gravatar_id": "",
          "url": "https://api.github.com/users/thinking-machines-lab",
          "type": "Organization",
          "user_view_type": "public",
          "site_admin": false
        },
        "repo": {
          "id": 1019793048,
          "node_id": "R_kgDOPMjOmA",
          "name": "tinker-cookbook",
          "full_name": "thinking-machines-lab/tinker-cookbook",
          "private": false,
          "owner": {
            "login": "thinking-machines-lab",
            "id": 220441404,
            "node_id": "O_kgDODSOrPA",
            "avatar_url": "https://avatars.githubusercontent.com/u/220441404?v=4",
            "gravatar_id": "",
            "url": "https://api.github.com/users/thinking-machines-lab",
            "type": "Organization",
            "user_view_type": "public",
            "site_admin": false
          },
          "description": "Post-training with Tinker",
          "fork": false,
          "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook",
          "created_at": "2025-07-14T22:10:00Z",
          "updated_at": "2025-12-16T03:13:15Z",
          "pushed_at": "2025-12-14T20:47:56Z",
          "homepage": null,
          "size": 1524,
          "stargazers_count": 2519,
          "watchers_count": 2519,
          "language": "Python",
          "has_issues": true,
          "has_projects": true,
          "has_downloads": true,
          "has_wiki": true,
          "has_pages": false,
          "has_discussions": true,
          "forks_count": 237,
          "archived": false,
          "disabled": false,
          "open_issues_count": 45,
          "license": {
            "key": "apache-2.0",
            "name": "Apache License 2.0",
            "spdx_id": "Apache-2.0",
            "url": "https://api.github.com/licenses/apache-2.0",
            "node_id": "MDc6TGljZW5zZTI="
          },
          "allow_forking": true,
          "is_template": false,
          "web_commit_signoff_required": false,
          "topics": {},
          "visibility": "public",
          "forks": 237,
          "open_issues": 45,
          "watchers": 2519,
          "default_branch": "main"
        }
      },
      "base": {
        "label": "thinking-machines-lab:main",
        "ref": "main",
        "sha": "3b4d2f49076061770abbdd5242251f9ed0215b53",
        "user": {
          "login": "thinking-machines-lab",
          "id": 220441404,
          "node_id": "O_kgDODSOrPA",
          "avatar_url": "https://avatars.githubusercontent.com/u/220441404?v=4",
          "gravatar_id": "",
          "url": "https://api.github.com/users/thinking-machines-lab",
          "type": "Organization",
          "user_view_type": "public",
          "site_admin": false
        },
        "repo": {
          "id": 1019793048,
          "node_id": "R_kgDOPMjOmA",
          "name": "tinker-cookbook",
          "full_name": "thinking-machines-lab/tinker-cookbook",
          "private": false,
          "owner": {
            "login": "thinking-machines-lab",
            "id": 220441404,
            "node_id": "O_kgDODSOrPA",
            "avatar_url": "https://avatars.githubusercontent.com/u/220441404?v=4",
            "gravatar_id": "",
            "url": "https://api.github.com/users/thinking-machines-lab",
            "type": "Organization",
            "user_view_type": "public",
            "site_admin": false
          },
          "description": "Post-training with Tinker",
          "fork": false,
          "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook",
          "created_at": "2025-07-14T22:10:00Z",
          "updated_at": "2025-12-16T03:13:15Z",
          "pushed_at": "2025-12-14T20:47:56Z",
          "homepage": null,
          "size": 1524,
          "stargazers_count": 2519,
          "watchers_count": 2519,
          "language": "Python",
          "has_issues": true,
          "has_projects": true,
          "has_downloads": true,
          "has_wiki": true,
          "has_pages": false,
          "has_discussions": true,
          "forks_count": 237,
          "archived": false,
          "disabled": false,
          "open_issues_count": 45,
          "license": {
            "key": "apache-2.0",
            "name": "Apache License 2.0",
            "spdx_id": "Apache-2.0",
            "url": "https://api.github.com/licenses/apache-2.0",
            "node_id": "MDc6TGljZW5zZTI="
          },
          "allow_forking": true,
          "is_template": false,
          "web_commit_signoff_required": false,
          "topics": {},
          "visibility": "public",
          "forks": 237,
          "open_issues": 45,
          "watchers": 2519,
          "default_branch": "main"
        }
      },
      "_links": {
        "self": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/164"
        },
        "html": {
          "href": "https://github.com/thinking-machines-lab/tinker-cookbook/pull/164"
        },
        "issue": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/164"
        },
        "comments": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/164/comments"
        },
        "review_comments": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/164/comments"
        },
        "review_comment": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/comments{/number}"
        },
        "commits": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/164/commits"
        },
        "statuses": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/statuses/2cbad7f20333a0bfcfc17d124abec0ca83d7c010"
        }
      },
      "author_association": "NONE",
      "auto_merge": null,
      "active_lock_reason": null,
      "linked_issues": []
    },
    {
      "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/160",
      "id": 3090282805,
      "node_id": "PR_kwDOPMjOmM64Mfk1",
      "number": 160,
      "state": "open",
      "locked": false,
      "title": "feat(rl): Filter incomplete trajectories that hit max_tokens limit",
      "user": {
        "login": "EvanZhuang",
        "id": 20547465,
        "node_id": "MDQ6VXNlcjIwNTQ3NDY1",
        "avatar_url": "https://avatars.githubusercontent.com/u/20547465?v=4",
        "gravatar_id": "",
        "url": "https://api.github.com/users/EvanZhuang",
        "type": "User",
        "user_view_type": "public",
        "site_admin": false
      },
      "body": "### Description\r\nWhen training with RL, trajectories that terminate due to hitting the `max_tokens` limit (rather than naturally completing via stop sequence) can introduce noise into training. This PR:\r\n\r\n1. Tracks the `stop_reason` from the Tinker API through the data pipeline\r\n2. Adds a configurable filter to exclude incomplete trajectories from training\r\n3. Logs metrics and warnings when trajectories are filtered\r\n\r\n**Disabled by default** - set `filter_incomplete_trajectories=true` to enable.\r\n\r\n## Changes\r\n\r\n### Files Modified (5 files)\r\n\r\n1. **`tinker_cookbook/completers.py`**\r\n   - Added `StopReason` type alias (`Literal[\"length\", \"stop\"]`)\r\n   - Added `stop_reason` field to `TokensWithLogprobs` dataclass (default `\"stop\"` for backward compatibility)\r\n   - Added `is_complete` property that returns `True` if generation hit stop sequence\r\n   - Updated `TinkerTokenCompleter` to capture and propagate `stop_reason` from the Tinker API\r\n\r\n2. **`tinker_cookbook/rl/types.py`**\r\n   - Added `is_complete: bool = True` field to `Transition` dataclass\r\n\r\n3. **`tinker_cookbook/rl/rollouts.py`**\r\n   - Updated `do_single_rollout()` to pass `is_complete=ac_with_logprobs.is_complete` when creating `Transition`\r\n\r\n4. **`tinker_cookbook/rl/data_processing.py`**\r\n   - Added `filter_incomplete_trajectories()` function that:\r\n     - Filters out any trajectory where any transition hit `max_tokens`\r\n     - Returns filtered groups + statistics dict for metrics\r\n     - Logs warning when trajectories are filtered\r\n\r\n5. **`tinker_cookbook/rl/train.py`**\r\n   - Added `filter_incomplete_trajectories: bool = False` config option\r\n   - Updated `prepare_minibatch()` to filter incomplete trajectories when enabled\r\n   - Added handling for cases where all trajectories are filtered\r\n\r\n## Usage\r\n\r\nThe filtering is **disabled by default** (`filter_incomplete_trajectories=False`). To enable:\r\n\r\n```python\r\n# In your training config\r\nconfig = Config(\r\n    # ... other config ...\r\n    filter_incomplete_trajectories=True,  # Enable filtering\r\n)\r\n```\r\n\r\n## Backward Compatibility\r\n\r\nAll changes are backward compatible:\r\n- `stop_reason` defaults to `\"stop\"` (assumes complete)\r\n- `is_complete` defaults to `True` on `Transition`\r\n- `filter_incomplete_trajectories` config defaults to `False`",
      "created_at": "2025-12-10T18:01:34Z",
      "updated_at": "2025-12-10T18:07:26Z",
      "closed_at": null,
      "merged_at": null,
      "merge_commit_sha": null,
      "assignee": null,
      "assignees": {},
      "requested_reviewers": {},
      "requested_teams": {},
      "labels": {},
      "milestone": null,
      "draft": false,
      "head": {
        "label": "EvanZhuang:add_stop_reason_in_rollout_generation",
        "ref": "add_stop_reason_in_rollout_generation",
        "sha": "11885f569f96d40186659802b0f8c938938bc564",
        "user": {
          "login": "EvanZhuang",
          "id": 20547465,
          "node_id": "MDQ6VXNlcjIwNTQ3NDY1",
          "avatar_url": "https://avatars.githubusercontent.com/u/20547465?v=4",
          "gravatar_id": "",
          "url": "https://api.github.com/users/EvanZhuang",
          "type": "User",
          "user_view_type": "public",
          "site_admin": false
        },
        "repo": {
          "id": 1113410390,
          "node_id": "R_kgDOQl1LVg",
          "name": "tinker-cookbook",
          "full_name": "EvanZhuang/tinker-cookbook",
          "private": false,
          "owner": {
            "login": "EvanZhuang",
            "id": 20547465,
            "node_id": "MDQ6VXNlcjIwNTQ3NDY1",
            "avatar_url": "https://avatars.githubusercontent.com/u/20547465?v=4",
            "gravatar_id": "",
            "url": "https://api.github.com/users/EvanZhuang",
            "type": "User",
            "user_view_type": "public",
            "site_admin": false
          },
          "description": "Post-training with Tinker",
          "fork": true,
          "url": "https://api.github.com/repos/EvanZhuang/tinker-cookbook",
          "created_at": "2025-12-10T00:05:09Z",
          "updated_at": "2025-12-10T00:05:09Z",
          "pushed_at": "2025-12-10T18:07:25Z",
          "homepage": null,
          "size": 851,
          "stargazers_count": 0,
          "watchers_count": 0,
          "language": null,
          "has_issues": false,
          "has_projects": true,
          "has_downloads": true,
          "has_wiki": true,
          "has_pages": false,
          "has_discussions": false,
          "forks_count": 0,
          "archived": false,
          "disabled": false,
          "open_issues_count": 0,
          "license": {
            "key": "apache-2.0",
            "name": "Apache License 2.0",
            "spdx_id": "Apache-2.0",
            "url": "https://api.github.com/licenses/apache-2.0",
            "node_id": "MDc6TGljZW5zZTI="
          },
          "allow_forking": true,
          "is_template": false,
          "web_commit_signoff_required": false,
          "topics": {},
          "visibility": "public",
          "forks": 0,
          "open_issues": 0,
          "watchers": 0,
          "default_branch": "main"
        }
      },
      "base": {
        "label": "thinking-machines-lab:main",
        "ref": "main",
        "sha": "989f84926245b227634797b8eac46abe232f9c24",
        "user": {
          "login": "thinking-machines-lab",
          "id": 220441404,
          "node_id": "O_kgDODSOrPA",
          "avatar_url": "https://avatars.githubusercontent.com/u/220441404?v=4",
          "gravatar_id": "",
          "url": "https://api.github.com/users/thinking-machines-lab",
          "type": "Organization",
          "user_view_type": "public",
          "site_admin": false
        },
        "repo": {
          "id": 1019793048,
          "node_id": "R_kgDOPMjOmA",
          "name": "tinker-cookbook",
          "full_name": "thinking-machines-lab/tinker-cookbook",
          "private": false,
          "owner": {
            "login": "thinking-machines-lab",
            "id": 220441404,
            "node_id": "O_kgDODSOrPA",
            "avatar_url": "https://avatars.githubusercontent.com/u/220441404?v=4",
            "gravatar_id": "",
            "url": "https://api.github.com/users/thinking-machines-lab",
            "type": "Organization",
            "user_view_type": "public",
            "site_admin": false
          },
          "description": "Post-training with Tinker",
          "fork": false,
          "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook",
          "created_at": "2025-07-14T22:10:00Z",
          "updated_at": "2025-12-16T03:13:15Z",
          "pushed_at": "2025-12-14T20:47:56Z",
          "homepage": null,
          "size": 1524,
          "stargazers_count": 2519,
          "watchers_count": 2519,
          "language": "Python",
          "has_issues": true,
          "has_projects": true,
          "has_downloads": true,
          "has_wiki": true,
          "has_pages": false,
          "has_discussions": true,
          "forks_count": 237,
          "archived": false,
          "disabled": false,
          "open_issues_count": 45,
          "license": {
            "key": "apache-2.0",
            "name": "Apache License 2.0",
            "spdx_id": "Apache-2.0",
            "url": "https://api.github.com/licenses/apache-2.0",
            "node_id": "MDc6TGljZW5zZTI="
          },
          "allow_forking": true,
          "is_template": false,
          "web_commit_signoff_required": false,
          "topics": {},
          "visibility": "public",
          "forks": 237,
          "open_issues": 45,
          "watchers": 2519,
          "default_branch": "main"
        }
      },
      "_links": {
        "self": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/160"
        },
        "html": {
          "href": "https://github.com/thinking-machines-lab/tinker-cookbook/pull/160"
        },
        "issue": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/160"
        },
        "comments": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/160/comments"
        },
        "review_comments": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/160/comments"
        },
        "review_comment": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/comments{/number}"
        },
        "commits": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/160/commits"
        },
        "statuses": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/statuses/11885f569f96d40186659802b0f8c938938bc564"
        }
      },
      "author_association": "NONE",
      "auto_merge": null,
      "active_lock_reason": null,
      "linked_issues": []
    },
    {
      "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/156",
      "id": 3083141673,
      "node_id": "PR_kwDOPMjOmM63xQIp",
      "number": 156,
      "state": "open",
      "locked": false,
      "title": "Add loss_fn_config",
      "user": {
        "login": "maitchison",
        "id": 4619344,
        "node_id": "MDQ6VXNlcjQ2MTkzNDQ=",
        "avatar_url": "https://avatars.githubusercontent.com/u/4619344?v=4",
        "gravatar_id": "",
        "url": "https://api.github.com/users/maitchison",
        "type": "User",
        "user_view_type": "public",
        "site_admin": false
      },
      "body": "### Motivation\r\n\r\nSome of the [loss functions](https://tinker-docs.thinkingmachines.ai/losses) have configuration settings that are currently not accessible. \r\n\r\nThis PR adds an optional `loss_fn_config` the RL and distillation training steps and forward_backward calls. \r\n\r\nSFT is hardcoded to `cross_entropy` and so was not updated.\r\n\r\n### Changes\r\n* Added optional `loss_fn_config` to `rl.train.Config`\r\n* For any training script that have a `loss_fn`, I've also added `loss_fn_config`.\r\n* Added explicit argument names to some function calls.",
      "created_at": "2025-12-08T21:22:41Z",
      "updated_at": "2025-12-08T21:24:21Z",
      "closed_at": null,
      "merged_at": null,
      "merge_commit_sha": null,
      "assignee": null,
      "assignees": {},
      "requested_reviewers": {},
      "requested_teams": {},
      "labels": {},
      "milestone": null,
      "draft": false,
      "head": {
        "label": "maitchison:matthew/add-loss-fn",
        "ref": "matthew/add-loss-fn",
        "sha": "42537062e2aadff7702f0f0669fea35da5d6ab9d",
        "user": {
          "login": "maitchison",
          "id": 4619344,
          "node_id": "MDQ6VXNlcjQ2MTkzNDQ=",
          "avatar_url": "https://avatars.githubusercontent.com/u/4619344?v=4",
          "gravatar_id": "",
          "url": "https://api.github.com/users/maitchison",
          "type": "User",
          "user_view_type": "public",
          "site_admin": false
        },
        "repo": {
          "id": 1104173097,
          "node_id": "R_kgDOQdBYKQ",
          "name": "tinker-cookbook",
          "full_name": "maitchison/tinker-cookbook",
          "private": false,
          "owner": {
            "login": "maitchison",
            "id": 4619344,
            "node_id": "MDQ6VXNlcjQ2MTkzNDQ=",
            "avatar_url": "https://avatars.githubusercontent.com/u/4619344?v=4",
            "gravatar_id": "",
            "url": "https://api.github.com/users/maitchison",
            "type": "User",
            "user_view_type": "public",
            "site_admin": false
          },
          "description": "Post-training with Tinker",
          "fork": true,
          "url": "https://api.github.com/repos/maitchison/tinker-cookbook",
          "created_at": "2025-11-25T21:22:21Z",
          "updated_at": "2025-12-08T20:41:59Z",
          "pushed_at": "2025-12-08T21:12:32Z",
          "homepage": null,
          "size": 872,
          "stargazers_count": 0,
          "watchers_count": 0,
          "language": "Python",
          "has_issues": false,
          "has_projects": true,
          "has_downloads": true,
          "has_wiki": true,
          "has_pages": false,
          "has_discussions": false,
          "forks_count": 0,
          "archived": false,
          "disabled": false,
          "open_issues_count": 0,
          "license": {
            "key": "apache-2.0",
            "name": "Apache License 2.0",
            "spdx_id": "Apache-2.0",
            "url": "https://api.github.com/licenses/apache-2.0",
            "node_id": "MDc6TGljZW5zZTI="
          },
          "allow_forking": true,
          "is_template": false,
          "web_commit_signoff_required": false,
          "topics": {},
          "visibility": "public",
          "forks": 0,
          "open_issues": 0,
          "watchers": 0,
          "default_branch": "main"
        }
      },
      "base": {
        "label": "thinking-machines-lab:main",
        "ref": "main",
        "sha": "989f84926245b227634797b8eac46abe232f9c24",
        "user": {
          "login": "thinking-machines-lab",
          "id": 220441404,
          "node_id": "O_kgDODSOrPA",
          "avatar_url": "https://avatars.githubusercontent.com/u/220441404?v=4",
          "gravatar_id": "",
          "url": "https://api.github.com/users/thinking-machines-lab",
          "type": "Organization",
          "user_view_type": "public",
          "site_admin": false
        },
        "repo": {
          "id": 1019793048,
          "node_id": "R_kgDOPMjOmA",
          "name": "tinker-cookbook",
          "full_name": "thinking-machines-lab/tinker-cookbook",
          "private": false,
          "owner": {
            "login": "thinking-machines-lab",
            "id": 220441404,
            "node_id": "O_kgDODSOrPA",
            "avatar_url": "https://avatars.githubusercontent.com/u/220441404?v=4",
            "gravatar_id": "",
            "url": "https://api.github.com/users/thinking-machines-lab",
            "type": "Organization",
            "user_view_type": "public",
            "site_admin": false
          },
          "description": "Post-training with Tinker",
          "fork": false,
          "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook",
          "created_at": "2025-07-14T22:10:00Z",
          "updated_at": "2025-12-16T03:13:15Z",
          "pushed_at": "2025-12-14T20:47:56Z",
          "homepage": null,
          "size": 1524,
          "stargazers_count": 2519,
          "watchers_count": 2519,
          "language": "Python",
          "has_issues": true,
          "has_projects": true,
          "has_downloads": true,
          "has_wiki": true,
          "has_pages": false,
          "has_discussions": true,
          "forks_count": 237,
          "archived": false,
          "disabled": false,
          "open_issues_count": 45,
          "license": {
            "key": "apache-2.0",
            "name": "Apache License 2.0",
            "spdx_id": "Apache-2.0",
            "url": "https://api.github.com/licenses/apache-2.0",
            "node_id": "MDc6TGljZW5zZTI="
          },
          "allow_forking": true,
          "is_template": false,
          "web_commit_signoff_required": false,
          "topics": {},
          "visibility": "public",
          "forks": 237,
          "open_issues": 45,
          "watchers": 2519,
          "default_branch": "main"
        }
      },
      "_links": {
        "self": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/156"
        },
        "html": {
          "href": "https://github.com/thinking-machines-lab/tinker-cookbook/pull/156"
        },
        "issue": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/156"
        },
        "comments": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/156/comments"
        },
        "review_comments": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/156/comments"
        },
        "review_comment": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/comments{/number}"
        },
        "commits": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/156/commits"
        },
        "statuses": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/statuses/42537062e2aadff7702f0f0669fea35da5d6ab9d"
        }
      },
      "author_association": "CONTRIBUTOR",
      "auto_merge": null,
      "active_lock_reason": null,
      "linked_issues": []
    },
    {
      "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/107",
      "id": 3035457259,
      "node_id": "PR_kwDOPMjOmM607Wbr",
      "number": 107,
      "state": "open",
      "locked": false,
      "title": "[tinker-cookbook] rl: avoid hanging in async runs when we run out of data",
      "user": {
        "login": "kennyyu",
        "id": 602219,
        "node_id": "MDQ6VXNlcjYwMjIxOQ==",
        "avatar_url": "https://avatars.githubusercontent.com/u/602219?v=4",
        "gravatar_id": "",
        "url": "https://api.github.com/users/kennyyu",
        "type": "User",
        "user_view_type": "public",
        "site_admin": false
      },
      "body": "Previously, on async RL runs, we can hang in shutdown if we run out of data. This fixes it to ensure proper shutdown and that all data in queues are drained when the dataloader loop terminates:\r\n\r\nOverview of the shutdown workflow:\r\n1. dataloader terminates, enqueues sentinel values in the trajectory worker queues\r\n2. trajectory worker sees sentinel values and terminates\r\n3. last trajectory worker enqueues a sentinel value to terminate the training loop\r\n4. training loop sees a sentinel value and terminates, and signals the evals loop\r\n5. evals loop is signaled and terminates",
      "created_at": "2025-11-22T01:22:15Z",
      "updated_at": "2025-11-23T06:38:25Z",
      "closed_at": null,
      "merged_at": null,
      "merge_commit_sha": null,
      "assignee": null,
      "assignees": {},
      "requested_reviewers": {
        "0": {
          "login": "joschu",
          "id": 144178,
          "node_id": "MDQ6VXNlcjE0NDE3OA==",
          "avatar_url": "https://avatars.githubusercontent.com/u/144178?v=4",
          "gravatar_id": "",
          "url": "https://api.github.com/users/joschu",
          "type": "User",
          "user_view_type": "public",
          "site_admin": false
        },
        "1": {
          "login": "Tiiiger",
          "id": 19514537,
          "node_id": "MDQ6VXNlcjE5NTE0NTM3",
          "avatar_url": "https://avatars.githubusercontent.com/u/19514537?v=4",
          "gravatar_id": "",
          "url": "https://api.github.com/users/Tiiiger",
          "type": "User",
          "user_view_type": "public",
          "site_admin": false
        },
        "2": {
          "login": "ruiqi-zhong",
          "id": 22912421,
          "node_id": "MDQ6VXNlcjIyOTEyNDIx",
          "avatar_url": "https://avatars.githubusercontent.com/u/22912421?v=4",
          "gravatar_id": "",
          "url": "https://api.github.com/users/ruiqi-zhong",
          "type": "User",
          "user_view_type": "public",
          "site_admin": false
        }
      },
      "requested_teams": {},
      "labels": {},
      "milestone": null,
      "draft": false,
      "head": {
        "label": "thinking-machines-lab:kenny/finish_async_hang_squash",
        "ref": "kenny/finish_async_hang_squash",
        "sha": "4f5f60d01172a6332e4bd3de55b20a5c6dfa2b69",
        "user": {
          "login": "thinking-machines-lab",
          "id": 220441404,
          "node_id": "O_kgDODSOrPA",
          "avatar_url": "https://avatars.githubusercontent.com/u/220441404?v=4",
          "gravatar_id": "",
          "url": "https://api.github.com/users/thinking-machines-lab",
          "type": "Organization",
          "user_view_type": "public",
          "site_admin": false
        },
        "repo": {
          "id": 1019793048,
          "node_id": "R_kgDOPMjOmA",
          "name": "tinker-cookbook",
          "full_name": "thinking-machines-lab/tinker-cookbook",
          "private": false,
          "owner": {
            "login": "thinking-machines-lab",
            "id": 220441404,
            "node_id": "O_kgDODSOrPA",
            "avatar_url": "https://avatars.githubusercontent.com/u/220441404?v=4",
            "gravatar_id": "",
            "url": "https://api.github.com/users/thinking-machines-lab",
            "type": "Organization",
            "user_view_type": "public",
            "site_admin": false
          },
          "description": "Post-training with Tinker",
          "fork": false,
          "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook",
          "created_at": "2025-07-14T22:10:00Z",
          "updated_at": "2025-12-16T03:13:15Z",
          "pushed_at": "2025-12-14T20:47:56Z",
          "homepage": null,
          "size": 1524,
          "stargazers_count": 2519,
          "watchers_count": 2519,
          "language": "Python",
          "has_issues": true,
          "has_projects": true,
          "has_downloads": true,
          "has_wiki": true,
          "has_pages": false,
          "has_discussions": true,
          "forks_count": 237,
          "archived": false,
          "disabled": false,
          "open_issues_count": 45,
          "license": {
            "key": "apache-2.0",
            "name": "Apache License 2.0",
            "spdx_id": "Apache-2.0",
            "url": "https://api.github.com/licenses/apache-2.0",
            "node_id": "MDc6TGljZW5zZTI="
          },
          "allow_forking": true,
          "is_template": false,
          "web_commit_signoff_required": false,
          "topics": {},
          "visibility": "public",
          "forks": 237,
          "open_issues": 45,
          "watchers": 2519,
          "default_branch": "main"
        }
      },
      "base": {
        "label": "thinking-machines-lab:main",
        "ref": "main",
        "sha": "14923ca90b549dd4f3c76181d94eeb07419477bf",
        "user": {
          "login": "thinking-machines-lab",
          "id": 220441404,
          "node_id": "O_kgDODSOrPA",
          "avatar_url": "https://avatars.githubusercontent.com/u/220441404?v=4",
          "gravatar_id": "",
          "url": "https://api.github.com/users/thinking-machines-lab",
          "type": "Organization",
          "user_view_type": "public",
          "site_admin": false
        },
        "repo": {
          "id": 1019793048,
          "node_id": "R_kgDOPMjOmA",
          "name": "tinker-cookbook",
          "full_name": "thinking-machines-lab/tinker-cookbook",
          "private": false,
          "owner": {
            "login": "thinking-machines-lab",
            "id": 220441404,
            "node_id": "O_kgDODSOrPA",
            "avatar_url": "https://avatars.githubusercontent.com/u/220441404?v=4",
            "gravatar_id": "",
            "url": "https://api.github.com/users/thinking-machines-lab",
            "type": "Organization",
            "user_view_type": "public",
            "site_admin": false
          },
          "description": "Post-training with Tinker",
          "fork": false,
          "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook",
          "created_at": "2025-07-14T22:10:00Z",
          "updated_at": "2025-12-16T03:13:15Z",
          "pushed_at": "2025-12-14T20:47:56Z",
          "homepage": null,
          "size": 1524,
          "stargazers_count": 2519,
          "watchers_count": 2519,
          "language": "Python",
          "has_issues": true,
          "has_projects": true,
          "has_downloads": true,
          "has_wiki": true,
          "has_pages": false,
          "has_discussions": true,
          "forks_count": 237,
          "archived": false,
          "disabled": false,
          "open_issues_count": 45,
          "license": {
            "key": "apache-2.0",
            "name": "Apache License 2.0",
            "spdx_id": "Apache-2.0",
            "url": "https://api.github.com/licenses/apache-2.0",
            "node_id": "MDc6TGljZW5zZTI="
          },
          "allow_forking": true,
          "is_template": false,
          "web_commit_signoff_required": false,
          "topics": {},
          "visibility": "public",
          "forks": 237,
          "open_issues": 45,
          "watchers": 2519,
          "default_branch": "main"
        }
      },
      "_links": {
        "self": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/107"
        },
        "html": {
          "href": "https://github.com/thinking-machines-lab/tinker-cookbook/pull/107"
        },
        "issue": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/107"
        },
        "comments": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/107/comments"
        },
        "review_comments": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/107/comments"
        },
        "review_comment": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/comments{/number}"
        },
        "commits": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/107/commits"
        },
        "statuses": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/statuses/4f5f60d01172a6332e4bd3de55b20a5c6dfa2b69"
        }
      },
      "author_association": "CONTRIBUTOR",
      "auto_merge": null,
      "active_lock_reason": null,
      "linked_issues": []
    },
    {
      "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/79",
      "id": 2993417184,
      "node_id": "PR_kwDOPMjOmM6ya-vg",
      "number": 79,
      "state": "open",
      "locked": false,
      "title": "make deepseekv3 renderer work with system messages, add renderer that forces thinking",
      "user": {
        "login": "joschu",
        "id": 144178,
        "node_id": "MDQ6VXNlcjE0NDE3OA==",
        "avatar_url": "https://avatars.githubusercontent.com/u/144178?v=4",
        "gravatar_id": "",
        "url": "https://api.github.com/users/joschu",
        "type": "User",
        "user_view_type": "public",
        "site_admin": false
      },
      "body": null,
      "created_at": "2025-11-10T02:53:46Z",
      "updated_at": "2025-11-11T01:08:57Z",
      "closed_at": null,
      "merged_at": null,
      "merge_commit_sha": null,
      "assignee": null,
      "assignees": {},
      "requested_reviewers": {},
      "requested_teams": {},
      "labels": {},
      "milestone": null,
      "draft": false,
      "head": {
        "label": "thinking-machines-lab:ds-renderer-sys-msg",
        "ref": "ds-renderer-sys-msg",
        "sha": "c7659f6f64ff06f6b915a941c79eb88bc75de641",
        "user": {
          "login": "thinking-machines-lab",
          "id": 220441404,
          "node_id": "O_kgDODSOrPA",
          "avatar_url": "https://avatars.githubusercontent.com/u/220441404?v=4",
          "gravatar_id": "",
          "url": "https://api.github.com/users/thinking-machines-lab",
          "type": "Organization",
          "user_view_type": "public",
          "site_admin": false
        },
        "repo": {
          "id": 1019793048,
          "node_id": "R_kgDOPMjOmA",
          "name": "tinker-cookbook",
          "full_name": "thinking-machines-lab/tinker-cookbook",
          "private": false,
          "owner": {
            "login": "thinking-machines-lab",
            "id": 220441404,
            "node_id": "O_kgDODSOrPA",
            "avatar_url": "https://avatars.githubusercontent.com/u/220441404?v=4",
            "gravatar_id": "",
            "url": "https://api.github.com/users/thinking-machines-lab",
            "type": "Organization",
            "user_view_type": "public",
            "site_admin": false
          },
          "description": "Post-training with Tinker",
          "fork": false,
          "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook",
          "created_at": "2025-07-14T22:10:00Z",
          "updated_at": "2025-12-16T03:13:15Z",
          "pushed_at": "2025-12-14T20:47:56Z",
          "homepage": null,
          "size": 1524,
          "stargazers_count": 2519,
          "watchers_count": 2519,
          "language": "Python",
          "has_issues": true,
          "has_projects": true,
          "has_downloads": true,
          "has_wiki": true,
          "has_pages": false,
          "has_discussions": true,
          "forks_count": 237,
          "archived": false,
          "disabled": false,
          "open_issues_count": 45,
          "license": {
            "key": "apache-2.0",
            "name": "Apache License 2.0",
            "spdx_id": "Apache-2.0",
            "url": "https://api.github.com/licenses/apache-2.0",
            "node_id": "MDc6TGljZW5zZTI="
          },
          "allow_forking": true,
          "is_template": false,
          "web_commit_signoff_required": false,
          "topics": {},
          "visibility": "public",
          "forks": 237,
          "open_issues": 45,
          "watchers": 2519,
          "default_branch": "main"
        }
      },
      "base": {
        "label": "thinking-machines-lab:main",
        "ref": "main",
        "sha": "5d08be6d130596b7bedd02197861c41fa81ea436",
        "user": {
          "login": "thinking-machines-lab",
          "id": 220441404,
          "node_id": "O_kgDODSOrPA",
          "avatar_url": "https://avatars.githubusercontent.com/u/220441404?v=4",
          "gravatar_id": "",
          "url": "https://api.github.com/users/thinking-machines-lab",
          "type": "Organization",
          "user_view_type": "public",
          "site_admin": false
        },
        "repo": {
          "id": 1019793048,
          "node_id": "R_kgDOPMjOmA",
          "name": "tinker-cookbook",
          "full_name": "thinking-machines-lab/tinker-cookbook",
          "private": false,
          "owner": {
            "login": "thinking-machines-lab",
            "id": 220441404,
            "node_id": "O_kgDODSOrPA",
            "avatar_url": "https://avatars.githubusercontent.com/u/220441404?v=4",
            "gravatar_id": "",
            "url": "https://api.github.com/users/thinking-machines-lab",
            "type": "Organization",
            "user_view_type": "public",
            "site_admin": false
          },
          "description": "Post-training with Tinker",
          "fork": false,
          "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook",
          "created_at": "2025-07-14T22:10:00Z",
          "updated_at": "2025-12-16T03:13:15Z",
          "pushed_at": "2025-12-14T20:47:56Z",
          "homepage": null,
          "size": 1524,
          "stargazers_count": 2519,
          "watchers_count": 2519,
          "language": "Python",
          "has_issues": true,
          "has_projects": true,
          "has_downloads": true,
          "has_wiki": true,
          "has_pages": false,
          "has_discussions": true,
          "forks_count": 237,
          "archived": false,
          "disabled": false,
          "open_issues_count": 45,
          "license": {
            "key": "apache-2.0",
            "name": "Apache License 2.0",
            "spdx_id": "Apache-2.0",
            "url": "https://api.github.com/licenses/apache-2.0",
            "node_id": "MDc6TGljZW5zZTI="
          },
          "allow_forking": true,
          "is_template": false,
          "web_commit_signoff_required": false,
          "topics": {},
          "visibility": "public",
          "forks": 237,
          "open_issues": 45,
          "watchers": 2519,
          "default_branch": "main"
        }
      },
      "_links": {
        "self": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/79"
        },
        "html": {
          "href": "https://github.com/thinking-machines-lab/tinker-cookbook/pull/79"
        },
        "issue": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/79"
        },
        "comments": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/79/comments"
        },
        "review_comments": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/79/comments"
        },
        "review_comment": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/comments{/number}"
        },
        "commits": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/79/commits"
        },
        "statuses": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/statuses/c7659f6f64ff06f6b915a941c79eb88bc75de641"
        }
      },
      "author_association": "COLLABORATOR",
      "auto_merge": null,
      "active_lock_reason": null,
      "linked_issues": []
    },
    {
      "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/36",
      "id": 2940310556,
      "node_id": "PR_kwDOPMjOmM6vQZQc",
      "number": 36,
      "state": "open",
      "locked": false,
      "title": "Integrate with GEM",
      "user": {
        "login": "lkevinzc",
        "id": 38581401,
        "node_id": "MDQ6VXNlcjM4NTgxNDAx",
        "avatar_url": "https://avatars.githubusercontent.com/u/38581401?v=4",
        "gravatar_id": "",
        "url": "https://api.github.com/users/lkevinzc",
        "type": "User",
        "user_view_type": "public",
        "site_admin": false
      },
      "body": "Hi tinker maintainers,\r\n\r\nThanks for the great tool! I would like to contribute the integration of Tinker x GEM (a collection of diverse environments for LLM RL) in the PR. We provide training recipes for both using tinker-cookbook and using tinker directly.",
      "created_at": "2025-10-23T12:34:21Z",
      "updated_at": "2025-11-05T15:01:08Z",
      "closed_at": null,
      "merged_at": null,
      "merge_commit_sha": null,
      "assignee": null,
      "assignees": {},
      "requested_reviewers": {},
      "requested_teams": {},
      "labels": {},
      "milestone": null,
      "draft": false,
      "head": {
        "label": "lkevinzc:main",
        "ref": "main",
        "sha": "b6b029ef7cd4d0fe8b6cdf2505580b0c95dade94",
        "user": {
          "login": "lkevinzc",
          "id": 38581401,
          "node_id": "MDQ6VXNlcjM4NTgxNDAx",
          "avatar_url": "https://avatars.githubusercontent.com/u/38581401?v=4",
          "gravatar_id": "",
          "url": "https://api.github.com/users/lkevinzc",
          "type": "User",
          "user_view_type": "public",
          "site_admin": false
        },
        "repo": {
          "id": 1081796426,
          "node_id": "R_kgDOQHrnSg",
          "name": "tinker-cookbook",
          "full_name": "lkevinzc/tinker-cookbook",
          "private": false,
          "owner": {
            "login": "lkevinzc",
            "id": 38581401,
            "node_id": "MDQ6VXNlcjM4NTgxNDAx",
            "avatar_url": "https://avatars.githubusercontent.com/u/38581401?v=4",
            "gravatar_id": "",
            "url": "https://api.github.com/users/lkevinzc",
            "type": "User",
            "user_view_type": "public",
            "site_admin": false
          },
          "description": "Post-training with Tinker",
          "fork": true,
          "url": "https://api.github.com/repos/lkevinzc/tinker-cookbook",
          "created_at": "2025-10-23T09:52:30Z",
          "updated_at": "2025-11-05T15:01:09Z",
          "pushed_at": "2025-11-05T15:01:06Z",
          "homepage": null,
          "size": 833,
          "stargazers_count": 0,
          "watchers_count": 0,
          "language": "Python",
          "has_issues": false,
          "has_projects": true,
          "has_downloads": true,
          "has_wiki": true,
          "has_pages": false,
          "has_discussions": false,
          "forks_count": 0,
          "archived": false,
          "disabled": false,
          "open_issues_count": 0,
          "license": {
            "key": "apache-2.0",
            "name": "Apache License 2.0",
            "spdx_id": "Apache-2.0",
            "url": "https://api.github.com/licenses/apache-2.0",
            "node_id": "MDc6TGljZW5zZTI="
          },
          "allow_forking": true,
          "is_template": false,
          "web_commit_signoff_required": false,
          "topics": {},
          "visibility": "public",
          "forks": 0,
          "open_issues": 0,
          "watchers": 0,
          "default_branch": "main"
        }
      },
      "base": {
        "label": "thinking-machines-lab:main",
        "ref": "main",
        "sha": "6e01614fd46883704b6569387ff43ea40eef844a",
        "user": {
          "login": "thinking-machines-lab",
          "id": 220441404,
          "node_id": "O_kgDODSOrPA",
          "avatar_url": "https://avatars.githubusercontent.com/u/220441404?v=4",
          "gravatar_id": "",
          "url": "https://api.github.com/users/thinking-machines-lab",
          "type": "Organization",
          "user_view_type": "public",
          "site_admin": false
        },
        "repo": {
          "id": 1019793048,
          "node_id": "R_kgDOPMjOmA",
          "name": "tinker-cookbook",
          "full_name": "thinking-machines-lab/tinker-cookbook",
          "private": false,
          "owner": {
            "login": "thinking-machines-lab",
            "id": 220441404,
            "node_id": "O_kgDODSOrPA",
            "avatar_url": "https://avatars.githubusercontent.com/u/220441404?v=4",
            "gravatar_id": "",
            "url": "https://api.github.com/users/thinking-machines-lab",
            "type": "Organization",
            "user_view_type": "public",
            "site_admin": false
          },
          "description": "Post-training with Tinker",
          "fork": false,
          "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook",
          "created_at": "2025-07-14T22:10:00Z",
          "updated_at": "2025-12-16T03:13:15Z",
          "pushed_at": "2025-12-14T20:47:56Z",
          "homepage": null,
          "size": 1524,
          "stargazers_count": 2519,
          "watchers_count": 2519,
          "language": "Python",
          "has_issues": true,
          "has_projects": true,
          "has_downloads": true,
          "has_wiki": true,
          "has_pages": false,
          "has_discussions": true,
          "forks_count": 237,
          "archived": false,
          "disabled": false,
          "open_issues_count": 45,
          "license": {
            "key": "apache-2.0",
            "name": "Apache License 2.0",
            "spdx_id": "Apache-2.0",
            "url": "https://api.github.com/licenses/apache-2.0",
            "node_id": "MDc6TGljZW5zZTI="
          },
          "allow_forking": true,
          "is_template": false,
          "web_commit_signoff_required": false,
          "topics": {},
          "visibility": "public",
          "forks": 237,
          "open_issues": 45,
          "watchers": 2519,
          "default_branch": "main"
        }
      },
      "_links": {
        "self": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/36"
        },
        "html": {
          "href": "https://github.com/thinking-machines-lab/tinker-cookbook/pull/36"
        },
        "issue": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/36"
        },
        "comments": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/issues/36/comments"
        },
        "review_comments": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/36/comments"
        },
        "review_comment": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/comments{/number}"
        },
        "commits": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/pulls/36/commits"
        },
        "statuses": {
          "href": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook/statuses/b6b029ef7cd4d0fe8b6cdf2505580b0c95dade94"
        }
      },
      "author_association": "NONE",
      "auto_merge": null,
      "active_lock_reason": null,
      "linked_issues": []
    }
  ],
  "discussions": [],
  "details": {
    "id": 1019793048,
    "node_id": "R_kgDOPMjOmA",
    "name": "tinker-cookbook",
    "full_name": "thinking-machines-lab/tinker-cookbook",
    "private": false,
    "owner": {
      "login": "thinking-machines-lab",
      "id": 220441404,
      "node_id": "O_kgDODSOrPA",
      "avatar_url": "https://avatars.githubusercontent.com/u/220441404?v=4",
      "gravatar_id": "",
      "url": "https://api.github.com/users/thinking-machines-lab",
      "type": "Organization",
      "user_view_type": "public",
      "site_admin": false
    },
    "description": "Post-training with Tinker",
    "fork": false,
    "url": "https://api.github.com/repos/thinking-machines-lab/tinker-cookbook",
    "created_at": "2025-07-14T22:10:00Z",
    "updated_at": "2025-12-16T03:13:15Z",
    "pushed_at": "2025-12-14T20:47:56Z",
    "homepage": null,
    "size": 1524,
    "stargazers_count": 2519,
    "watchers_count": 2519,
    "language": "Python",
    "has_issues": true,
    "has_projects": true,
    "has_downloads": true,
    "has_wiki": true,
    "has_pages": false,
    "has_discussions": true,
    "forks_count": 237,
    "archived": false,
    "disabled": false,
    "open_issues_count": 45,
    "license": {
      "key": "apache-2.0",
      "name": "Apache License 2.0",
      "spdx_id": "Apache-2.0",
      "url": "https://api.github.com/licenses/apache-2.0",
      "node_id": "MDc6TGljZW5zZTI="
    },
    "allow_forking": true,
    "is_template": false,
    "web_commit_signoff_required": false,
    "topics": {},
    "visibility": "public",
    "forks": 237,
    "open_issues": 45,
    "watchers": 2519,
    "default_branch": "main",
    "permissions": {
      "admin": false,
      "maintain": false,
      "push": false,
      "triage": false,
      "pull": true
    },
    "temp_clone_token": "",
    "custom_properties": {},
    "organization": {
      "login": "thinking-machines-lab",
      "id": 220441404,
      "node_id": "O_kgDODSOrPA",
      "avatar_url": "https://avatars.githubusercontent.com/u/220441404?v=4",
      "gravatar_id": "",
      "url": "https://api.github.com/users/thinking-machines-lab",
      "type": "Organization",
      "user_view_type": "public",
      "site_admin": false
    },
    "network_count": 237,
    "subscribers_count": 25
  },
  "lastFetched": 1765854961373
}