deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct · AttributeError: 'DynamicCache' object has no attribute 'get_max_length'. Did you mean: 'get_seq

System Info:

Remote Modal Container
Platform: debian-slim
Python version: 3.12
GPU type: NVIDIA H100
transformers-4.53.2-py
image for container created using this code:

  modal.Image.debian_slim(python_version="3.12")
  .run_commands("pip install --upgrade pip")
  .pip_install("torch", "transformers>=4.40.0", "accelerate", "fastapi[standard]", "sentencepiece", "bitsandbytes")
  .run_commands("mkdir -p /models")

Task:
Run the "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct" model on a remote CUDA-enabled H100 GPU

The following error persists after ensuring the latest version of the transformers and pip are installed. It seems like the code in this repo uses the deprecated "get_max_length()" function.

Error:

Traceback (most recent call last):
  File "/pkg/modal/_runtime/container_io_manager.py", line 772, in handle_input_exception
    yield
  File "/pkg/modal/_container_entrypoint.py", line 222, in run_input_async
    value = await res
            ^^^^^^^^^
  File "/root/llm/modal_app.py", line 87, in inference
    outputs = self.model["generator"].generate(
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/site-packages/transformers/generation/utils.py", line 2625, in generate
    result = self._sample(
             ^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/site-packages/transformers/generation/utils.py", line 3599, in _sample
    model_inputs = self.prepare_inputs_for_generation(input_ids, **model_kwargs)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/root/.cache/huggingface/modules/transformers_modules/deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct/e434a23f91ba5b4923cf6c9d9a238eb4a08e3a11/modeling_deepseek.py", line 1728, in prepare_inputs_for_generation
    max_cache_length = past_key_values.get_max_length()
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: 'DynamicCache' object has no attribute 'get_max_length'. Did you mean: 'get_seq_length'?
Traceback (most recent call last):
  File "/pkg/modal/_runtime/container_io_manager.py", line 772, in handle_input_exception
    yield
  File "/pkg/modal/_container_entrypoint.py", line 205, in run_input_async
    async for value in res:
  File "/pkg/modal/_runtime/asgi.py", line 226, in fn
    app_task.result()  # consume/raise exceptions if there are any!
    ^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/site-packages/fastapi/applications.py", line 1054, in __call__
    await super().__call__(scope, receive, send)
  File "/usr/local/lib/python3.12/site-packages/starlette/applications.py", line 113, in __call__
    await self.middleware_stack(scope, receive, send)
  File "/usr/local/lib/python3.12/site-packages/starlette/middleware/errors.py", line 186, in __call__
    raise exc
  File "/usr/local/lib/python3.12/site-packages/starlette/middleware/errors.py", line 164, in __call__
    await self.app(scope, receive, _send)
  File "/usr/local/lib/python3.12/site-packages/starlette/middleware/cors.py", line 85, in __call__
    await self.app(scope, receive, send)
  File "/usr/local/lib/python3.12/site-packages/starlette/middleware/exceptions.py", line 63, in __call__
    await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
  File "/usr/local/lib/python3.12/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
    raise exc
  File "/usr/local/lib/python3.12/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
    await app(scope, receive, sender)
  File "/usr/local/lib/python3.12/site-packages/starlette/routing.py", line 716, in __call__
    await self.middleware_stack(scope, receive, send)
  File "/usr/local/lib/python3.12/site-packages/starlette/routing.py", line 736, in app
    await route.handle(scope, receive, send)
  File "/usr/local/lib/python3.12/site-packages/starlette/routing.py", line 290, in handle
    await self.app(scope, receive, send)
  File "/usr/local/lib/python3.12/site-packages/starlette/routing.py", line 78, in app
    await wrap_app_handling_exceptions(app, request)(scope, receive, send)
  File "/usr/local/lib/python3.12/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
    raise exc
  File "/usr/local/lib/python3.12/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
    await app(scope, receive, sender)
  File "/usr/local/lib/python3.12/site-packages/starlette/routing.py", line 75, in app
    response = await f(request)
               ^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/site-packages/fastapi/routing.py", line 302, in app
    raw_response = await run_endpoint_function(
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/site-packages/fastapi/routing.py", line 215, in run_endpoint_function
    return await run_in_threadpool(dependant.call, **values)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/site-packages/starlette/concurrency.py", line 38, in run_in_threadpool
    return await anyio.to_thread.run_sync(func)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/site-packages/anyio/to_thread.py", line 56, in run_sync
    return await get_async_backend().run_sync_in_worker_thread(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/site-packages/anyio/_backends/_asyncio.py", line 2470, in run_sync_in_worker_thread
    return await future
           ^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/site-packages/anyio/_backends/_asyncio.py", line 967, in run
    result = context.run(func, *args)
             ^^^^^^^^^^^^^^^^^^^^^^^^
  File "/root/llm/modal_app.py", line 131, in run_generator_inference
    output = modalGenatorValidator.inference.remote(messages)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/pkg/modal/_object.py", line 285, in wrapped
    return await method(self, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/pkg/modal/_functions.py", line 1570, in remote
    return await self._call_function(args, kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/pkg/modal/_functions.py", line 1525, in _call_function
    return await invocation.run_function()
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/pkg/modal/_functions.py", line 285, in run_function
    return await _process_result(item.result, item.data_format, self.stub, self.client)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/pkg/modal/_utils/function_utils.py", line 506, in _process_result
    raise exc_with_hints(exc)
  File "<ta-01K0N5C94X71R8EYC4AK213MK9>:/root/llm/modal_app.py", line 87, in inference
  File "<ta-01K0N5C94X71R8EYC4AK213MK9>:/usr/local/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
  File "<ta-01K0N5C94X71R8EYC4AK213MK9>:/usr/local/lib/python3.12/site-packages/transformers/generation/utils.py", line 2625, in generate
  File "<ta-01K0N5C94X71R8EYC4AK213MK9>:/usr/local/lib/python3.12/site-packages/transformers/generation/utils.py", line 3599, in _sample
  File "<ta-01K0N5C94X71R8EYC4AK213MK9>:/root/.cache/huggingface/modules/transformers_modules/deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct/e434a23f91ba5b4923cf6c9d9a238eb4a08e3a11/modeling_deepseek.py", line 1728, in prepare_inputs_for_generation
AttributeError: 'DynamicCache' object has no attribute 'get_max_length'