This commit is contained in:
frob 2025-10-07 13:55:05 +03:00 committed by GitHub
commit 3d564acbcd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 9 additions and 1 deletions

View File

@ -156,7 +156,7 @@ PARAMETER <parameter> <parametervalue>
| temperature | The temperature of the model. Increasing the temperature will make the model answer more creatively. (Default: 0.8) | float | temperature 0.7 | | temperature | The temperature of the model. Increasing the temperature will make the model answer more creatively. (Default: 0.8) | float | temperature 0.7 |
| seed | Sets the random number seed to use for generation. Setting this to a specific number will make the model generate the same text for the same prompt. (Default: 0) | int | seed 42 | | seed | Sets the random number seed to use for generation. Setting this to a specific number will make the model generate the same text for the same prompt. (Default: 0) | int | seed 42 |
| stop | Sets the stop sequences to use. When this pattern is encountered the LLM will stop generating text and return. Multiple stop patterns may be set by specifying multiple separate `stop` parameters in a modelfile. | string | stop "AI assistant:" | | stop | Sets the stop sequences to use. When this pattern is encountered the LLM will stop generating text and return. Multiple stop patterns may be set by specifying multiple separate `stop` parameters in a modelfile. | string | stop "AI assistant:" |
| num_predict | Maximum number of tokens to predict when generating text. (Default: -1, infinite generation) | int | num_predict 42 | | num_predict | Maximum number of tokens to predict when generating text. (Default: -1, infinite generation, -2 = fill context) | int | num_predict 42 |
| top_k | Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40) | int | top_k 40 | | top_k | Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40) | int | top_k 40 |
| top_p | Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9) | float | top_p 0.9 | | top_p | Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9) | float | top_p 0.9 |
| min_p | Alternative to the top_p, and aims to ensure a balance of quality and variety. The parameter *p* represents the minimum probability for a token to be considered, relative to the probability of the most likely token. For example, with *p*=0.05 and the most likely token having a probability of 0.9, logits with a value less than 0.045 are filtered out. (Default: 0.0) | float | min_p 0.05 | | min_p | Alternative to the top_p, and aims to ensure a balance of quality and variety. The parameter *p* represents the minimum probability for a token to be considered, relative to the probability of the most likely token. For example, with *p*=0.05 and the most likely token having a probability of 0.9, logits with a value less than 0.045 are filtered out. (Default: 0.0) | float | min_p 0.05 |

View File

@ -384,6 +384,10 @@ func (s *Server) processBatch(tokenBatch *llama.Batch, embedBatch *llama.Batch)
s.removeSequence(seqIdx, llm.DoneReasonLength) s.removeSequence(seqIdx, llm.DoneReasonLength)
continue continue
} }
if seq.numPredict == -2 && len(seq.cache.Inputs) >= s.cache.numCtx {
s.removeSequence(seqIdx, llm.DoneReasonLength)
continue
}
for i, input := range seq.inputs { for i, input := range seq.inputs {
if len(seq.cache.Inputs)+len(seq.pendingInputs)+1 > s.cache.numCtx { if len(seq.cache.Inputs)+len(seq.pendingInputs)+1 > s.cache.numCtx {

View File

@ -486,6 +486,10 @@ func (s *Server) forwardBatch(pendingBatch batchState) (nextBatch batchState, er
nextBatch.seqs[seqIdx] = nil nextBatch.seqs[seqIdx] = nil
continue continue
} }
if seq.numPredict == -2 && int32(len(seq.cache.Inputs)) >= s.cache.numCtx {
s.removeSequence(seqIdx, llm.DoneReasonLength)
continue
}
if !s.cache.enabled { if !s.cache.enabled {
seq.inputs = append(seq.cache.Inputs, seq.inputs...) seq.inputs = append(seq.cache.Inputs, seq.inputs...)