llama: update llama.cpp vendor code to commit d7cfe1ff (#9356)
This commit is contained in:
		
							
								
								
									
										120
									
								
								llama/llama.cpp/common/sampling.cpp
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										120
									
								
								llama/llama.cpp/common/sampling.cpp
									
									
									
									
										vendored
									
									
								
							| @@ -113,7 +113,10 @@ struct common_sampler { | ||||
|     void set_logits(struct llama_context * ctx, int idx) { | ||||
|         const auto * logits = llama_get_logits_ith(ctx, idx); | ||||
|  | ||||
|         const int n_vocab = llama_n_vocab(llama_get_model(ctx)); | ||||
|         const llama_model * model = llama_get_model(ctx); | ||||
|         const llama_vocab * vocab = llama_model_get_vocab(model); | ||||
|  | ||||
|         const int n_vocab = llama_vocab_n_tokens(vocab); | ||||
|  | ||||
|         cur.resize(n_vocab); | ||||
|  | ||||
| @@ -131,24 +134,47 @@ std::string common_params_sampling::print() const { | ||||
|     snprintf(result, sizeof(result), | ||||
|             "\trepeat_last_n = %d, repeat_penalty = %.3f, frequency_penalty = %.3f, presence_penalty = %.3f\n" | ||||
|             "\tdry_multiplier = %.3f, dry_base = %.3f, dry_allowed_length = %d, dry_penalty_last_n = %d\n" | ||||
|             "\ttop_k = %d, top_p = %.3f, min_p = %.3f, xtc_probability = %.3f, xtc_threshold = %.3f, typical_p = %.3f, temp = %.3f\n" | ||||
|             "\ttop_k = %d, top_p = %.3f, min_p = %.3f, xtc_probability = %.3f, xtc_threshold = %.3f, typical_p = %.3f, top_n_sigma = %.3f, temp = %.3f\n" | ||||
|             "\tmirostat = %d, mirostat_lr = %.3f, mirostat_ent = %.3f", | ||||
|             penalty_last_n, penalty_repeat, penalty_freq, penalty_present, | ||||
|             dry_multiplier, dry_base, dry_allowed_length, dry_penalty_last_n, | ||||
|             top_k, top_p, min_p, xtc_probability, xtc_threshold, typ_p, temp, | ||||
|             top_k, top_p, min_p, xtc_probability, xtc_threshold, typ_p, top_n_sigma, temp, | ||||
|             mirostat, mirostat_eta, mirostat_tau); | ||||
|  | ||||
|     return std::string(result); | ||||
| } | ||||
|  | ||||
| struct common_sampler * common_sampler_init(const struct llama_model * model, const struct common_params_sampling & params) { | ||||
|     const llama_vocab * vocab = llama_model_get_vocab(model); | ||||
|  | ||||
|     llama_sampler_chain_params lparams = llama_sampler_chain_default_params(); | ||||
|  | ||||
|     lparams.no_perf = params.no_perf; | ||||
|  | ||||
|     struct llama_sampler * grmr; | ||||
|     if (params.grammar.compare(0, 11, "%llguidance") == 0) { | ||||
| #ifdef LLAMA_USE_LLGUIDANCE | ||||
|         grmr = llama_sampler_init_llg(vocab, "lark", params.grammar.c_str()); | ||||
| #else | ||||
|         GGML_ABORT("llguidance (cmake -DLLAMA_LLGUIDANCE=ON) is not enabled"); | ||||
| #endif // LLAMA_USE_LLGUIDANCE | ||||
|     } else { | ||||
|         std::vector<const char *> trigger_words; | ||||
|         trigger_words.reserve(params.grammar_trigger_words.size()); | ||||
|         for (const auto & str : params.grammar_trigger_words) { | ||||
|             trigger_words.push_back(str.word.c_str()); | ||||
|         } | ||||
|  | ||||
|         grmr = params.grammar_lazy | ||||
|              ? llama_sampler_init_grammar_lazy(vocab, params.grammar.c_str(), "root", | ||||
|                                                trigger_words.data(), trigger_words.size(), | ||||
|                                                params.grammar_trigger_tokens.data(), params.grammar_trigger_tokens.size()) | ||||
|              :      llama_sampler_init_grammar(vocab, params.grammar.c_str(), "root"); | ||||
|     } | ||||
|  | ||||
|     auto * result = new common_sampler { | ||||
|         /* .params = */ params, | ||||
|         /* .grmr   = */ llama_sampler_init_grammar(model, params.grammar.c_str(), "root"), | ||||
|         /* .grmr   = */ grmr, | ||||
|         /* .chain  = */ llama_sampler_chain_init(lparams), | ||||
|         /* .prev   = */ ring_buffer<llama_token>(std::max(32, params.n_prev)), | ||||
|         /* .cur    = */ {}, | ||||
| @@ -157,56 +183,62 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co | ||||
|  | ||||
|     llama_sampler_chain_add(result->chain, | ||||
|             llama_sampler_init_logit_bias( | ||||
|                 llama_n_vocab(model), | ||||
|                 llama_vocab_n_tokens(vocab), | ||||
|                 params.logit_bias.size(), | ||||
|                 params.logit_bias.data())); | ||||
|  | ||||
|     if (params.mirostat == 0) { | ||||
|         for (const auto & cnstr : params.samplers) { | ||||
|             switch (cnstr) { | ||||
|                 case COMMON_SAMPLER_TYPE_DRY: | ||||
|                     { | ||||
|                         std::vector<const char *> c_breakers; | ||||
|                         c_breakers.reserve(params.dry_sequence_breakers.size()); | ||||
|                         for (const auto & str : params.dry_sequence_breakers) { | ||||
|                             c_breakers.push_back(str.c_str()); | ||||
|                         } | ||||
|         if (params.top_n_sigma >= 0) { | ||||
|             llama_sampler_chain_add(result->chain, llama_sampler_init_top_k        (params.top_k)); | ||||
|             llama_sampler_chain_add(result->chain, llama_sampler_init_temp         (params.temp)); | ||||
|             llama_sampler_chain_add(result->chain, llama_sampler_init_top_n_sigma  (params.top_n_sigma)); | ||||
|         } else { | ||||
|             for (const auto & cnstr : params.samplers) { | ||||
|                 switch (cnstr) { | ||||
|                     case COMMON_SAMPLER_TYPE_DRY: | ||||
|                         { | ||||
|                             std::vector<const char *> c_breakers; | ||||
|                             c_breakers.reserve(params.dry_sequence_breakers.size()); | ||||
|                             for (const auto & str : params.dry_sequence_breakers) { | ||||
|                                 c_breakers.push_back(str.c_str()); | ||||
|                             } | ||||
|  | ||||
|                         llama_sampler_chain_add(result->chain, llama_sampler_init_dry      (model, params.dry_multiplier, params.dry_base, params.dry_allowed_length, params.dry_penalty_last_n, c_breakers.data(), c_breakers.size())); | ||||
|                     } | ||||
|                     break; | ||||
|                 case COMMON_SAMPLER_TYPE_TOP_K: | ||||
|                     llama_sampler_chain_add(result->chain, llama_sampler_init_top_k    (params.top_k)); | ||||
|                     break; | ||||
|                 case COMMON_SAMPLER_TYPE_TOP_P: | ||||
|                     llama_sampler_chain_add(result->chain, llama_sampler_init_top_p    (params.top_p, params.min_keep)); | ||||
|                     break; | ||||
|                 case COMMON_SAMPLER_TYPE_MIN_P: | ||||
|                     llama_sampler_chain_add(result->chain, llama_sampler_init_min_p    (params.min_p, params.min_keep)); | ||||
|                     break; | ||||
|                 case COMMON_SAMPLER_TYPE_XTC: | ||||
|                     llama_sampler_chain_add(result->chain, llama_sampler_init_xtc      (params.xtc_probability, params.xtc_threshold, params.min_keep, params.seed)); | ||||
|                     break; | ||||
|                 case COMMON_SAMPLER_TYPE_TYPICAL_P: | ||||
|                     llama_sampler_chain_add(result->chain, llama_sampler_init_typical  (params.typ_p, params.min_keep)); | ||||
|                     break; | ||||
|                 case COMMON_SAMPLER_TYPE_TEMPERATURE: | ||||
|                     llama_sampler_chain_add(result->chain, llama_sampler_init_temp_ext (params.temp, params.dynatemp_range, params.dynatemp_exponent)); | ||||
|                     break; | ||||
|                 case COMMON_SAMPLER_TYPE_INFILL: | ||||
|                     llama_sampler_chain_add(result->chain, llama_sampler_init_infill   (model)); | ||||
|                     break; | ||||
|                 case COMMON_SAMPLER_TYPE_PENALTIES: | ||||
|                     llama_sampler_chain_add(result->chain, llama_sampler_init_penalties(params.penalty_last_n, params.penalty_repeat, params.penalty_freq, params.penalty_present)); | ||||
|                     break; | ||||
|                 default: | ||||
|                     GGML_ASSERT(false && "unknown sampler type"); | ||||
|                             llama_sampler_chain_add(result->chain, llama_sampler_init_dry      (vocab, llama_model_n_ctx_train(model), params.dry_multiplier, params.dry_base, params.dry_allowed_length, params.dry_penalty_last_n, c_breakers.data(), c_breakers.size())); | ||||
|                         } | ||||
|                         break; | ||||
|                     case COMMON_SAMPLER_TYPE_TOP_K: | ||||
|                         llama_sampler_chain_add(result->chain, llama_sampler_init_top_k    (params.top_k)); | ||||
|                         break; | ||||
|                     case COMMON_SAMPLER_TYPE_TOP_P: | ||||
|                         llama_sampler_chain_add(result->chain, llama_sampler_init_top_p    (params.top_p, params.min_keep)); | ||||
|                         break; | ||||
|                     case COMMON_SAMPLER_TYPE_MIN_P: | ||||
|                         llama_sampler_chain_add(result->chain, llama_sampler_init_min_p    (params.min_p, params.min_keep)); | ||||
|                         break; | ||||
|                     case COMMON_SAMPLER_TYPE_XTC: | ||||
|                         llama_sampler_chain_add(result->chain, llama_sampler_init_xtc      (params.xtc_probability, params.xtc_threshold, params.min_keep, params.seed)); | ||||
|                         break; | ||||
|                     case COMMON_SAMPLER_TYPE_TYPICAL_P: | ||||
|                         llama_sampler_chain_add(result->chain, llama_sampler_init_typical  (params.typ_p, params.min_keep)); | ||||
|                         break; | ||||
|                     case COMMON_SAMPLER_TYPE_TEMPERATURE: | ||||
|                         llama_sampler_chain_add(result->chain, llama_sampler_init_temp_ext (params.temp, params.dynatemp_range, params.dynatemp_exponent)); | ||||
|                         break; | ||||
|                     case COMMON_SAMPLER_TYPE_INFILL: | ||||
|                         llama_sampler_chain_add(result->chain, llama_sampler_init_infill   (vocab)); | ||||
|                         break; | ||||
|                     case COMMON_SAMPLER_TYPE_PENALTIES: | ||||
|                         llama_sampler_chain_add(result->chain, llama_sampler_init_penalties(params.penalty_last_n, params.penalty_repeat, params.penalty_freq, params.penalty_present)); | ||||
|                         break; | ||||
|                     default: | ||||
|                         GGML_ASSERT(false && "unknown sampler type"); | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|         llama_sampler_chain_add(result->chain, llama_sampler_init_dist(params.seed)); | ||||
|     } else if (params.mirostat == 1) { | ||||
|         llama_sampler_chain_add(result->chain, llama_sampler_init_temp(params.temp)); | ||||
|         llama_sampler_chain_add(result->chain, llama_sampler_init_mirostat(llama_n_vocab(model), params.seed, params.mirostat_tau, params.mirostat_eta, 100)); | ||||
|         llama_sampler_chain_add(result->chain, llama_sampler_init_mirostat(llama_vocab_n_tokens(vocab), params.seed, params.mirostat_tau, params.mirostat_eta, 100)); | ||||
|     } else if (params.mirostat == 2) { | ||||
|         llama_sampler_chain_add(result->chain, llama_sampler_init_temp(params.temp)); | ||||
|         llama_sampler_chain_add(result->chain, llama_sampler_init_mirostat_v2(params.seed, params.mirostat_tau, params.mirostat_eta)); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Jeffrey Morgan
					Jeffrey Morgan