AttributeError: 'DynamicCache' object has no attribute 'get_max_length'. Did you mean: 'get_seq_length'?
#16
by
Ariphael
- opened
System Info:
- Remote Modal Container
- Platform: debian-slim
- Python version: 3.12
- GPU type: NVIDIA H100
- transformers-4.53.2-py
- image for container created using this code:
modal.Image.debian_slim(python_version="3.12")
.run_commands("pip install --upgrade pip")
.pip_install("torch", "transformers>=4.40.0", "accelerate", "fastapi[standard]", "sentencepiece", "bitsandbytes")
.run_commands("mkdir -p /models")
Task:
Run the "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct" model on a remote CUDA-enabled H100 GPU
The following error persists after ensuring the latest version of the transformers and pip are installed. It seems like the code in this repo uses the deprecated "get_max_length()" function.
Error:
Traceback (most recent call last):
File "/pkg/modal/_runtime/container_io_manager.py", line 772, in handle_input_exception
yield
File "/pkg/modal/_container_entrypoint.py", line 222, in run_input_async
value = await res
^^^^^^^^^
File "/root/llm/modal_app.py", line 87, in inference
outputs = self.model["generator"].generate(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/site-packages/transformers/generation/utils.py", line 2625, in generate
result = self._sample(
^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/site-packages/transformers/generation/utils.py", line 3599, in _sample
model_inputs = self.prepare_inputs_for_generation(input_ids, **model_kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/.cache/huggingface/modules/transformers_modules/deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct/e434a23f91ba5b4923cf6c9d9a238eb4a08e3a11/modeling_deepseek.py", line 1728, in prepare_inputs_for_generation
max_cache_length = past_key_values.get_max_length()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: 'DynamicCache' object has no attribute 'get_max_length'. Did you mean: 'get_seq_length'?
Traceback (most recent call last):
File "/pkg/modal/_runtime/container_io_manager.py", line 772, in handle_input_exception
yield
File "/pkg/modal/_container_entrypoint.py", line 205, in run_input_async
async for value in res:
File "/pkg/modal/_runtime/asgi.py", line 226, in fn
app_task.result() # consume/raise exceptions if there are any!
^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/site-packages/fastapi/applications.py", line 1054, in __call__
await super().__call__(scope, receive, send)
File "/usr/local/lib/python3.12/site-packages/starlette/applications.py", line 113, in __call__
await self.middleware_stack(scope, receive, send)
File "/usr/local/lib/python3.12/site-packages/starlette/middleware/errors.py", line 186, in __call__
raise exc
File "/usr/local/lib/python3.12/site-packages/starlette/middleware/errors.py", line 164, in __call__
await self.app(scope, receive, _send)
File "/usr/local/lib/python3.12/site-packages/starlette/middleware/cors.py", line 85, in __call__
await self.app(scope, receive, send)
File "/usr/local/lib/python3.12/site-packages/starlette/middleware/exceptions.py", line 63, in __call__
await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
File "/usr/local/lib/python3.12/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
raise exc
File "/usr/local/lib/python3.12/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
await app(scope, receive, sender)
File "/usr/local/lib/python3.12/site-packages/starlette/routing.py", line 716, in __call__
await self.middleware_stack(scope, receive, send)
File "/usr/local/lib/python3.12/site-packages/starlette/routing.py", line 736, in app
await route.handle(scope, receive, send)
File "/usr/local/lib/python3.12/site-packages/starlette/routing.py", line 290, in handle
await self.app(scope, receive, send)
File "/usr/local/lib/python3.12/site-packages/starlette/routing.py", line 78, in app
await wrap_app_handling_exceptions(app, request)(scope, receive, send)
File "/usr/local/lib/python3.12/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
raise exc
File "/usr/local/lib/python3.12/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
await app(scope, receive, sender)
File "/usr/local/lib/python3.12/site-packages/starlette/routing.py", line 75, in app
response = await f(request)
^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/site-packages/fastapi/routing.py", line 302, in app
raw_response = await run_endpoint_function(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/site-packages/fastapi/routing.py", line 215, in run_endpoint_function
return await run_in_threadpool(dependant.call, **values)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/site-packages/starlette/concurrency.py", line 38, in run_in_threadpool
return await anyio.to_thread.run_sync(func)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/site-packages/anyio/to_thread.py", line 56, in run_sync
return await get_async_backend().run_sync_in_worker_thread(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/site-packages/anyio/_backends/_asyncio.py", line 2470, in run_sync_in_worker_thread
return await future
^^^^^^^^^^^^
File "/usr/local/lib/python3.12/site-packages/anyio/_backends/_asyncio.py", line 967, in run
result = context.run(func, *args)
^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/llm/modal_app.py", line 131, in run_generator_inference
output = modalGenatorValidator.inference.remote(messages)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/pkg/modal/_object.py", line 285, in wrapped
return await method(self, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/pkg/modal/_functions.py", line 1570, in remote
return await self._call_function(args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/pkg/modal/_functions.py", line 1525, in _call_function
return await invocation.run_function()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/pkg/modal/_functions.py", line 285, in run_function
return await _process_result(item.result, item.data_format, self.stub, self.client)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/pkg/modal/_utils/function_utils.py", line 506, in _process_result
raise exc_with_hints(exc)
File "<ta-01K0N5C94X71R8EYC4AK213MK9>:/root/llm/modal_app.py", line 87, in inference
File "<ta-01K0N5C94X71R8EYC4AK213MK9>:/usr/local/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
File "<ta-01K0N5C94X71R8EYC4AK213MK9>:/usr/local/lib/python3.12/site-packages/transformers/generation/utils.py", line 2625, in generate
File "<ta-01K0N5C94X71R8EYC4AK213MK9>:/usr/local/lib/python3.12/site-packages/transformers/generation/utils.py", line 3599, in _sample
File "<ta-01K0N5C94X71R8EYC4AK213MK9>:/root/.cache/huggingface/modules/transformers_modules/deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct/e434a23f91ba5b4923cf6c9d9a238eb4a08e3a11/modeling_deepseek.py", line 1728, in prepare_inputs_for_generation
AttributeError: 'DynamicCache' object has no attribute 'get_max_length'
I was able to fix it by replaceing get_max_length with get_seq_length()-1