mirror of https://github.com/ollama/ollama.git
				
				
				
			Fixed startup sequence to report model loading
This commit is contained in:
		
							parent
							
								
									bd54b08261
								
							
						
					
					
						commit
						c942e4a07b
					
				|  | @ -2726,7 +2726,7 @@ static json format_detokenized_response(std::string content) | ||||||
| static void log_server_request(const httplib::Request &req, const httplib::Response &res) | static void log_server_request(const httplib::Request &req, const httplib::Response &res) | ||||||
| { | { | ||||||
|     // skip GH copilot requests when using default port
 |     // skip GH copilot requests when using default port
 | ||||||
|     if (req.path == "/v1/health" || req.path == "/v1/completions") |     if (req.path == "/health" || req.path == "/v1/health" || req.path == "/v1/completions") | ||||||
|     { |     { | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
|  | @ -3053,6 +3053,26 @@ int main(int argc, char **argv) { | ||||||
|         log_data["api_key"] = "api_key: " + std::to_string(sparams.api_keys.size()) + " keys loaded"; |         log_data["api_key"] = "api_key: " + std::to_string(sparams.api_keys.size()) + " keys loaded"; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     if (sparams.n_threads_http < 1) { | ||||||
|  |         // +2 threads for monitoring endpoints
 | ||||||
|  |         sparams.n_threads_http = std::max(params.n_parallel + 2, (int32_t) std::thread::hardware_concurrency() - 1); | ||||||
|  |     } | ||||||
|  |     log_data["n_threads_http"] =  std::to_string(sparams.n_threads_http); | ||||||
|  |     svr.new_task_queue = [&sparams] { return new httplib::ThreadPool(sparams.n_threads_http); }; | ||||||
|  | 
 | ||||||
|  |     LOG_INFO("HTTP server listening", log_data); | ||||||
|  |     // run the HTTP server in a thread - see comment below
 | ||||||
|  |     std::thread t([&]() | ||||||
|  |             { | ||||||
|  |                 if (!svr.listen_after_bind()) | ||||||
|  |                 { | ||||||
|  |                     state.store(SERVER_STATE_ERROR); | ||||||
|  |                     return 1; | ||||||
|  |                 } | ||||||
|  | 
 | ||||||
|  |                 return 0; | ||||||
|  |             }); | ||||||
|  | 
 | ||||||
|     // load the model
 |     // load the model
 | ||||||
|     if (!llama.load_model(params)) |     if (!llama.load_model(params)) | ||||||
|     { |     { | ||||||
|  | @ -3257,26 +3277,6 @@ int main(int argc, char **argv) { | ||||||
|     }*/ |     }*/ | ||||||
|     //);
 |     //);
 | ||||||
| 
 | 
 | ||||||
|     if (sparams.n_threads_http < 1) { |  | ||||||
|         // +2 threads for monitoring endpoints
 |  | ||||||
|         sparams.n_threads_http = std::max(params.n_parallel + 2, (int32_t) std::thread::hardware_concurrency() - 1); |  | ||||||
|     } |  | ||||||
|     log_data["n_threads_http"] =  std::to_string(sparams.n_threads_http); |  | ||||||
|     svr.new_task_queue = [&sparams] { return new httplib::ThreadPool(sparams.n_threads_http); }; |  | ||||||
| 
 |  | ||||||
|     LOG_INFO("HTTP server listening", log_data); |  | ||||||
|     // run the HTTP server in a thread - see comment below
 |  | ||||||
|     std::thread t([&]() |  | ||||||
|             { |  | ||||||
|                 if (!svr.listen_after_bind()) |  | ||||||
|                 { |  | ||||||
|                     state.store(SERVER_STATE_ERROR); |  | ||||||
|                     return 1; |  | ||||||
|                 } |  | ||||||
| 
 |  | ||||||
|                 return 0; |  | ||||||
|             }); |  | ||||||
| 
 |  | ||||||
|     llama.queue_tasks.on_new_task(std::bind( |     llama.queue_tasks.on_new_task(std::bind( | ||||||
|         &llama_server_context::process_single_task, &llama, std::placeholders::_1)); |         &llama_server_context::process_single_task, &llama, std::placeholders::_1)); | ||||||
|     llama.queue_tasks.on_finish_multitask(std::bind( |     llama.queue_tasks.on_finish_multitask(std::bind( | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue