mirror of https://github.com/ollama/ollama.git
				
				
				
			Fixed startup sequence to report model loading
This commit is contained in:
		
							parent
							
								
									bd54b08261
								
							
						
					
					
						commit
						c942e4a07b
					
				|  | @ -2726,7 +2726,7 @@ static json format_detokenized_response(std::string content) | |||
| static void log_server_request(const httplib::Request &req, const httplib::Response &res) | ||||
| { | ||||
|     // skip GH copilot requests when using default port
 | ||||
|     if (req.path == "/v1/health" || req.path == "/v1/completions") | ||||
|     if (req.path == "/health" || req.path == "/v1/health" || req.path == "/v1/completions") | ||||
|     { | ||||
|         return; | ||||
|     } | ||||
|  | @ -3053,6 +3053,26 @@ int main(int argc, char **argv) { | |||
|         log_data["api_key"] = "api_key: " + std::to_string(sparams.api_keys.size()) + " keys loaded"; | ||||
|     } | ||||
| 
 | ||||
|     if (sparams.n_threads_http < 1) { | ||||
|         // +2 threads for monitoring endpoints
 | ||||
|         sparams.n_threads_http = std::max(params.n_parallel + 2, (int32_t) std::thread::hardware_concurrency() - 1); | ||||
|     } | ||||
|     log_data["n_threads_http"] =  std::to_string(sparams.n_threads_http); | ||||
|     svr.new_task_queue = [&sparams] { return new httplib::ThreadPool(sparams.n_threads_http); }; | ||||
| 
 | ||||
|     LOG_INFO("HTTP server listening", log_data); | ||||
|     // run the HTTP server in a thread - see comment below
 | ||||
|     std::thread t([&]() | ||||
|             { | ||||
|                 if (!svr.listen_after_bind()) | ||||
|                 { | ||||
|                     state.store(SERVER_STATE_ERROR); | ||||
|                     return 1; | ||||
|                 } | ||||
| 
 | ||||
|                 return 0; | ||||
|             }); | ||||
| 
 | ||||
|     // load the model
 | ||||
|     if (!llama.load_model(params)) | ||||
|     { | ||||
|  | @ -3257,26 +3277,6 @@ int main(int argc, char **argv) { | |||
|     }*/ | ||||
|     //);
 | ||||
| 
 | ||||
|     if (sparams.n_threads_http < 1) { | ||||
|         // +2 threads for monitoring endpoints
 | ||||
|         sparams.n_threads_http = std::max(params.n_parallel + 2, (int32_t) std::thread::hardware_concurrency() - 1); | ||||
|     } | ||||
|     log_data["n_threads_http"] =  std::to_string(sparams.n_threads_http); | ||||
|     svr.new_task_queue = [&sparams] { return new httplib::ThreadPool(sparams.n_threads_http); }; | ||||
| 
 | ||||
|     LOG_INFO("HTTP server listening", log_data); | ||||
|     // run the HTTP server in a thread - see comment below
 | ||||
|     std::thread t([&]() | ||||
|             { | ||||
|                 if (!svr.listen_after_bind()) | ||||
|                 { | ||||
|                     state.store(SERVER_STATE_ERROR); | ||||
|                     return 1; | ||||
|                 } | ||||
| 
 | ||||
|                 return 0; | ||||
|             }); | ||||
| 
 | ||||
|     llama.queue_tasks.on_new_task(std::bind( | ||||
|         &llama_server_context::process_single_task, &llama, std::placeholders::_1)); | ||||
|     llama.queue_tasks.on_finish_multitask(std::bind( | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue