source: src/Fragmentation/Automation/FragmentScheduler.cpp@ fb255d

Action_Thermostats Add_AtomRandomPerturbation Add_FitFragmentPartialChargesAction Add_RotateAroundBondAction Add_SelectAtomByNameAction Added_ParseSaveFragmentResults AddingActions_SaveParseParticleParameters Adding_Graph_to_ChangeBondActions Adding_MD_integration_tests Adding_ParticleName_to_Atom Adding_StructOpt_integration_tests AtomFragments Automaking_mpqc_open AutomationFragmentation_failures Candidate_v1.5.4 Candidate_v1.6.0 Candidate_v1.6.1 Candidate_v1.7.0 ChangeBugEmailaddress ChangingTestPorts ChemicalSpaceEvaluator CombiningParticlePotentialParsing Combining_Subpackages Debian_Package_split Debian_package_split_molecuildergui_only Disabling_MemDebug Docu_Python_wait EmpiricalPotential_contain_HomologyGraph EmpiricalPotential_contain_HomologyGraph_documentation Enable_parallel_make_install Enhance_userguide Enhanced_StructuralOptimization Enhanced_StructuralOptimization_continued Example_ManyWaysToTranslateAtom Exclude_Hydrogens_annealWithBondGraph FitPartialCharges_GlobalError Fix_BoundInBox_CenterInBox_MoleculeActions Fix_ChargeSampling_PBC Fix_ChronosMutex Fix_FitPartialCharges Fix_FitPotential_needs_atomicnumbers Fix_ForceAnnealing Fix_IndependentFragmentGrids Fix_ParseParticles Fix_ParseParticles_split_forward_backward_Actions Fix_PopActions Fix_QtFragmentList_sorted_selection Fix_Restrictedkeyset_FragmentMolecule Fix_StatusMsg Fix_StepWorldTime_single_argument Fix_Verbose_Codepatterns Fix_fitting_potentials Fixes ForceAnnealing_goodresults ForceAnnealing_oldresults ForceAnnealing_tocheck ForceAnnealing_with_BondGraph ForceAnnealing_with_BondGraph_continued ForceAnnealing_with_BondGraph_continued_betteresults ForceAnnealing_with_BondGraph_contraction-expansion FragmentAction_writes_AtomFragments FragmentMolecule_checks_bonddegrees GeometryObjects Gui_Fixes Gui_displays_atomic_force_velocity ImplicitCharges IndependentFragmentGrids IndependentFragmentGrids_IndividualZeroInstances IndependentFragmentGrids_IntegrationTest IndependentFragmentGrids_Sole_NN_Calculation JobMarket_RobustOnKillsSegFaults JobMarket_StableWorkerPool JobMarket_unresolvable_hostname_fix MoreRobust_FragmentAutomation ODR_violation_mpqc_open PartialCharges_OrthogonalSummation PdbParser_setsAtomName PythonUI_with_named_parameters QtGui_reactivate_TimeChanged_changes Recreated_GuiChecks Rewrite_FitPartialCharges RotateToPrincipalAxisSystem_UndoRedo SaturateAtoms_findBestMatching SaturateAtoms_singleDegree StoppableMakroAction Subpackage_CodePatterns Subpackage_JobMarket Subpackage_LinearAlgebra Subpackage_levmar Subpackage_mpqc_open Subpackage_vmg Switchable_LogView ThirdParty_MPQC_rebuilt_buildsystem TrajectoryDependenant_MaxOrder TremoloParser_IncreasedPrecision TremoloParser_MultipleTimesteps TremoloParser_setsAtomName Ubuntu_1604_changes stable
Last change on this file since fb255d was e8f397, checked in by Frederik Heber <heber@…>, 13 years ago

PoolWorker is now truely listenting.

  • Property mode set to 100644
File size: 17.0 KB
Line 
1/*
2 * Project: MoleCuilder
3 * Description: creates and alters molecular systems
4 * Copyright (C) 2011 University of Bonn. All rights reserved.
5 * Please see the LICENSE file or "Copyright notice" in builder.cpp for details.
6 */
7
8/*
9 * \file FragmentScheduler.cpp
10 *
11 * This file strongly follows the Serialization example from the boost::asio
12 * library (see server.cpp)
13 *
14 * Created on: Oct 19, 2011
15 * Author: heber
16 */
17
18// include config.h
19#ifdef HAVE_CONFIG_H
20#include <config.h>
21#endif
22
23// boost asio needs specific operator new
24#include <boost/asio.hpp>
25
26#include "CodePatterns/MemDebug.hpp"
27
28#include <boost/bind.hpp>
29#include <boost/lexical_cast.hpp>
30#include <iostream>
31#include <vector>
32#include "Connection.hpp" // Must come before boost/serialization headers.
33#include <boost/serialization/vector.hpp>
34#include "CodePatterns/Info.hpp"
35#include "CodePatterns/Log.hpp"
36#include "Controller/Commands/EnrollInPoolOperation.hpp"
37#include "Jobs/MPQCCommandJob.hpp"
38#include "Jobs/SystemCommandJob.hpp"
39#include "JobId.hpp"
40
41#include "FragmentScheduler.hpp"
42
43FragmentJob::ptr FragmentScheduler::WorkerListener_t::NoJob(new SystemCommandJob(std::string("/bin/true"), std::string("dosomething"), JobId::NoJob));
44
45/** Helper function to enforce binding of FragmentWorker to possible derived
46 * FragmentJob classes.
47 */
48void dummyInit() {
49 SystemCommandJob("/bin/false", "something", JobId::IllegalJob);
50 MPQCCommandJob("nofile", JobId::IllegalJob);
51}
52
53/** Constructor of class FragmentScheduler.
54 *
55 * We setup both acceptors to accept connections from workers and Controller.
56 *
57 * \param io_service io_service of the asynchronous communications
58 * \param workerport port to listen for worker connections
59 * \param controllerport port to listen for controller connections.
60 */
61FragmentScheduler::FragmentScheduler(boost::asio::io_service& io_service, unsigned short workerport, unsigned short controllerport) :
62 WorkerListener(io_service, workerport, JobsQueue, pool,
63 boost::bind(&FragmentScheduler::sendJobToWorker, boost::ref(*this), _1, _2)),
64 ControllerListener(io_service, controllerport, JobsQueue,
65 boost::bind(&Listener::initiateSocket, boost::ref(WorkerListener))),
66 connection(io_service),
67 sendJobOp(connection)
68{
69 Info info(__FUNCTION__);
70
71 // listen for controller
72 ControllerListener.initiateSocket();
73
74 // only initiate socket if jobs are already present
75 if (JobsQueue.isJobPresent()) {
76 WorkerListener.initiateSocket();
77 }
78}
79
80/** Handle a new worker connection.
81 *
82 * We store the given address in the pool.
83 *
84 * \param e error code if something went wrong
85 * \param conn reference with the connection
86 */
87void FragmentScheduler::WorkerListener_t::handle_Accept(const boost::system::error_code& e, connection_ptr conn)
88{
89 Info info(__FUNCTION__);
90 if (!e)
91 {
92 // Successfully accepted a new connection.
93 // read address
94 conn->async_read(address,
95 boost::bind(&FragmentScheduler::WorkerListener_t::handle_ReadAddress, this,
96 boost::asio::placeholders::error, conn));
97 }
98 else
99 {
100 // An error occurred. Log it and return. Since we are not starting a new
101 // accept operation the io_service will run out of work to do and the
102 // server will exit.
103 Exitflag = ErrorFlag;
104 ELOG(0, e.message());
105 }
106}
107
108/** Handle having received Worker's address
109 *
110 * \param e error code if something went wrong
111 * \param conn reference with the connection
112 */
113void FragmentScheduler::WorkerListener_t::handle_ReadAddress(const boost::system::error_code& e, connection_ptr conn)
114{
115 Info info(__FUNCTION__);
116 if (!e)
117 {
118 // Successfully accepted a new connection.
119 // read address
120 conn->async_read(choice,
121 boost::bind(&FragmentScheduler::WorkerListener_t::handle_ReadChoice, this,
122 boost::asio::placeholders::error, conn));
123 }
124 else
125 {
126 // An error occurred. Log it and return. Since we are not starting a new
127 // accept operation the io_service will run out of work to do and the
128 // server will exit.
129 Exitflag = ErrorFlag;
130 ELOG(0, e.message());
131 }
132}
133
134/** Controller callback function to read the choice for next operation.
135 *
136 * \param e error code if something went wrong
137 * \param conn reference with the connection
138 */
139void FragmentScheduler::WorkerListener_t::handle_ReadChoice(const boost::system::error_code& e, connection_ptr conn)
140{
141 Info info(__FUNCTION__);
142 if (!e)
143 {
144 LOG(1, "INFO: Received request for operation " << choice << ".");
145 // switch over the desired choice read previously
146 switch(choice) {
147 case NoWorkerOperation:
148 {
149 ELOG(1, "WorkerListener_t::handle_ReadChoice() - called with NoOperation.");
150 break;
151 }
152 case EnrollInPool:
153 {
154 if (pool.presentInPool(address)) {
155 ELOG(1, "INFO: worker "+toString(address)+" is already contained in pool.");
156 enum EnrollInPoolOperation::EnrollFlag flag = EnrollInPoolOperation::Fail;
157 conn->async_write(flag,
158 boost::bind(&FragmentScheduler::WorkerListener_t::handle_enrolled, this,
159 boost::asio::placeholders::error, conn));
160 } else {
161 // insert as its new worker
162 LOG(1, "INFO: Adding " << address << " to pool ...");
163 pool.addWorker(address);
164 enum EnrollInPoolOperation::EnrollFlag flag = EnrollInPoolOperation::Success;
165 conn->async_write(flag,
166 boost::bind(&FragmentScheduler::WorkerListener_t::handle_enrolled, this,
167 boost::asio::placeholders::error, conn));
168 break;
169 }
170 case SendResult:
171 {
172 if (pool.presentInPool(address)) {
173 // check whether its priority is busy_priority
174 if (pool.isWorkerBusy(address)) {
175 conn->async_read(result,
176 boost::bind(&FragmentScheduler::WorkerListener_t::handle_ReceiveResultFromWorker, this,
177 boost::asio::placeholders::error, conn));
178 } else {
179 ELOG(1, "Worker " << address << " trying to send result who is not marked as busy.");
180 conn->async_read(result,
181 boost::bind(&FragmentScheduler::WorkerListener_t::handle_RejectResultFromWorker, this,
182 boost::asio::placeholders::error, conn));
183 }
184 } else {
185 ELOG(1, "Worker " << address << " trying to send result who is not in pool.");
186 conn->async_read(result,
187 boost::bind(&FragmentScheduler::WorkerListener_t::handle_RejectResultFromWorker, this,
188 boost::asio::placeholders::error, conn));
189 }
190 break;
191 }
192 case RemoveFromPool:
193 {
194 if (pool.presentInPool(address)) {
195 // removing present worker
196 pool.removeWorker(address);
197 } else {
198 ELOG(1, "Shutting down Worker " << address << " not contained in pool.");
199 }
200 break;
201 }
202 default:
203 Exitflag = ErrorFlag;
204 ELOG(1, "WorkerListener_t::handle_ReadChoice() - called with no valid choice.");
205 break;
206 }
207 }
208 // restore NoOperation choice such that choice is not read twice
209 choice = NoWorkerOperation;
210 }
211 else
212 {
213 // An error occurred. Log it and return. Since we are not starting a new
214 // accept operation the io_service will run out of work to do and the
215 // server will exit.
216 Exitflag = ErrorFlag;
217 ELOG(0, e.message());
218 }
219
220 if (JobsQueue.isJobPresent()) {
221 // Start an accept operation for a new Connection only when there
222 // are still jobs present
223 initiateSocket();
224 }
225}
226
227
228/** Callback function when new worker has enrolled.
229 *
230 * \param e error code if something went wrong
231 * \param conn reference with the connection
232 */
233void FragmentScheduler::WorkerListener_t::handle_enrolled(const boost::system::error_code& e, connection_ptr conn)
234{
235 Info info(__FUNCTION__);
236 if (!e)
237 {
238 if (JobsQueue.isJobPresent()) {
239 FragmentJob::ptr job = JobsQueue.popJob();
240 callback_sendJobToWorker(pool.getNextIdleWorker(), job);
241 }
242 }
243 else
244 {
245 // An error occurred. Log it and return. Since we are not starting a new
246 // accept operation the io_service will run out of work to do and the
247 // server will exit.
248 Exitflag = ErrorFlag;
249 ELOG(0, e.message());
250 }
251}
252
253/** Callback function when result has been received.
254 *
255 * \param e error code if something went wrong
256 * \param conn reference with the connection
257 */
258void FragmentScheduler::WorkerListener_t::handle_ReceiveResultFromWorker(const boost::system::error_code& e, connection_ptr conn)
259{
260 Info info(__FUNCTION__);
261 LOG(1, "INFO: Received result for job #" << result->getId() << " ...");
262
263 // and push into queue
264 ASSERT(result->getId() != (JobId_t)JobId::NoJob,
265 "WorkerListener_t::handle_ReceiveResultFromWorker() - result received has NoJob id.");
266 ASSERT(result->getId() != (JobId_t)JobId::IllegalJob,
267 "WorkerListener_t::handle_ReceiveResultFromWorker() - result received has IllegalJob id.");
268 // place id into expected
269 if ((result->getId() != (JobId_t)JobId::NoJob) && (result->getId() != (JobId_t)JobId::IllegalJob))
270 JobsQueue.pushResult(result);
271
272 // mark as idle
273 pool.unmarkWorkerBusy(address);
274
275 // erase result
276 result.reset();
277 LOG(1, "INFO: JobsQueue has " << JobsQueue.getDoneJobs() << " results.");
278
279 // send out new job if present
280 if (JobsQueue.isJobPresent()) {
281 FragmentJob::ptr job = JobsQueue.popJob();
282 callback_sendJobToWorker(pool.getNextIdleWorker(), job);
283 }
284}
285
286/** Callback function when result has been received.
287 *
288 * \param e error code if something went wrong
289 * \param conn reference with the connection
290 */
291void FragmentScheduler::WorkerListener_t::handle_RejectResultFromWorker(const boost::system::error_code& e, connection_ptr conn)
292{
293 Info info(__FUNCTION__);
294 // nothing to do
295 LOG(1, "INFO: Rejecting result for job #" << result->getId() << ", placing back into queue.");
296
297 JobsQueue.resubmitJob(result->getId());
298
299 LOG(1, "INFO: JobsQueue has " << JobsQueue.getDoneJobs() << " results.");
300}
301
302
303/** Handle a new controller connection.
304 *
305 * \sa handle_ReceiveJobs()
306 * \sa handle_CheckResultState()
307 * \sa handle_SendResults()
308 *
309 * \param e error code if something went wrong
310 * \param conn reference with the connection
311 */
312void FragmentScheduler::ControllerListener_t::handle_Accept(const boost::system::error_code& e, connection_ptr conn)
313{
314 Info info(__FUNCTION__);
315 if (!e)
316 {
317 conn->async_read(choice,
318 boost::bind(&FragmentScheduler::ControllerListener_t::handle_ReadChoice, this,
319 boost::asio::placeholders::error, conn));
320 }
321 else
322 {
323 // An error occurred. Log it and return. Since we are not starting a new
324 // accept operation the io_service will run out of work to do and the
325 // server will exit.
326 Exitflag = ErrorFlag;
327 ELOG(0, e.message());
328 }
329}
330
331/** Controller callback function to read the choice for next operation.
332 *
333 * \param e error code if something went wrong
334 * \param conn reference with the connection
335 */
336void FragmentScheduler::ControllerListener_t::handle_ReadChoice(const boost::system::error_code& e, connection_ptr conn)
337{
338 Info info(__FUNCTION__);
339 if (!e)
340 {
341 bool LaunchNewAcceptor = true;
342 LOG(1, "INFO: Received request for operation " << choice << ".");
343 // switch over the desired choice read previously
344 switch(choice) {
345 case NoControllerOperation:
346 {
347 ELOG(1, "ControllerListener_t::handle_ReadChoice() - called with NoOperation.");
348 break;
349 }
350 case GetNextJobId:
351 {
352 const JobId_t nextid = globalId.getNextId();
353 LOG(1, "INFO: Sending next available job id " << nextid << " to controller ...");
354 conn->async_write(nextid,
355 boost::bind(&FragmentScheduler::ControllerListener_t::handle_GetNextJobIdState, this,
356 boost::asio::placeholders::error, conn));
357 break;
358 }
359 case ReceiveJobs:
360 {
361 // The connection::async_write() function will automatically
362 // serialize the data structure for us.
363 LOG(1, "INFO: Receiving bunch of jobs from a controller ...");
364 conn->async_read(jobs,
365 boost::bind(&FragmentScheduler::ControllerListener_t::handle_ReceiveJobs, this,
366 boost::asio::placeholders::error, conn));
367 break;
368 }
369 case CheckState:
370 {
371 // first update number
372 jobInfo[0] = JobsQueue.getPresentJobs();
373 jobInfo[1] = JobsQueue.getDoneJobs();
374 // now we accept connections to check for state of calculations
375 LOG(1, "INFO: Sending state that "+toString(jobInfo[0])+" jobs are present and "+toString(jobInfo[1])+" jobs are done to controller ...");
376 conn->async_write(jobInfo,
377 boost::bind(&FragmentScheduler::ControllerListener_t::handle_CheckResultState, this,
378 boost::asio::placeholders::error, conn));
379 break;
380 }
381 case SendResults:
382 {
383 const std::vector<FragmentResult::ptr> results = JobsQueue.getAllResults();
384 // ... or we give the results
385 LOG(1, "INFO: Sending "+toString(results.size())+" results to controller ...");
386 conn->async_write(results,
387 boost::bind(&FragmentScheduler::ControllerListener_t::handle_SendResults, this,
388 boost::asio::placeholders::error, conn));
389 break;
390 }
391 case ShutdownControllerSocket:
392 {
393 LOG(1, "INFO: Received shutdown from controller ...");
394 // only allow for shutdown when there are no more jobs in the queue
395 if (!JobsQueue.isJobPresent()) {
396 LaunchNewAcceptor = false;
397 } else {
398 ELOG(2, "There are still jobs waiting in the queue.");
399 }
400 break;
401 }
402 default:
403 Exitflag = ErrorFlag;
404 ELOG(1, "ControllerListener_t::handle_ReadChoice() - called with no valid choice.");
405 break;
406 }
407 // restore NoControllerOperation choice such that choice is not read twice
408 choice = NoControllerOperation;
409
410 if (LaunchNewAcceptor) {
411 LOG(1, "Launching new acceptor on socket.");
412 // Start an accept operation for a new Connection.
413 initiateSocket();
414 }
415 }
416 else
417 {
418 // An error occurred. Log it and return. Since we are not starting a new
419 // accept operation the io_service will run out of work to do and the
420 // server will exit.
421 Exitflag = ErrorFlag;
422 ELOG(0, e.message());
423 }
424}
425
426/** Controller callback function when job has been sent.
427 *
428 * We check here whether the worker socket is accepting, if there
429 * have been no jobs we re-activate it, as it is shut down after
430 * last job.
431 *
432 * \param e error code if something went wrong
433 * \param conn reference with the connection
434 */
435void FragmentScheduler::ControllerListener_t::handle_ReceiveJobs(const boost::system::error_code& e, connection_ptr conn)
436{
437 Info info(__FUNCTION__);
438 bool need_initiateSocket = !JobsQueue.isJobPresent();
439
440 // jobs are received, hence place in JobsQueue
441 if (!jobs.empty()) {
442 LOG(1, "INFO: Pushing " << jobs.size() << " jobs into queue.");
443 JobsQueue.pushJobs(jobs);
444 }
445
446 jobs.clear();
447
448 // initiate socket if we had no jobs before
449 if (need_initiateSocket)
450 initiateWorkerSocket();
451}
452
453/** Controller callback function when checking on state of results.
454 *
455 * \param e error code if something went wrong
456 * \param conn reference with the connection
457 */
458void FragmentScheduler::ControllerListener_t::handle_CheckResultState(const boost::system::error_code& e, connection_ptr conn)
459{
460 Info info(__FUNCTION__);
461 // do nothing
462 LOG(1, "INFO: Sent that " << jobInfo << " jobs are (scheduled, done).");
463}
464
465/** Controller callback function when checking on state of results.
466 *
467 * \param e error code if something went wrong
468 * \param conn reference with the connection
469 */
470void FragmentScheduler::ControllerListener_t::handle_GetNextJobIdState(const boost::system::error_code& e, connection_ptr conn)
471{
472 Info info(__FUNCTION__);
473 // do nothing
474 LOG(1, "INFO: Sent next available job id.");
475}
476
477/** Controller callback function when result has been received.
478 *
479 * \param e error code if something went wrong
480 * \param conn reference with the connection
481 */
482void FragmentScheduler::ControllerListener_t::handle_SendResults(const boost::system::error_code& e, connection_ptr conn)
483{
484 Info info(__FUNCTION__);
485 // do nothing
486 LOG(1, "INFO: Results have been sent.");
487}
488
489
490/** Helper function to send a job to worker.
491 *
492 * Note that we do not set the worker as busy. We simply send it the job.
493 *
494 * @param address address of worker
495 * @param job job to send
496 */
497void FragmentScheduler::sendJobToWorker(const WorkerAddress &address, FragmentJob::ptr &job)
498{
499 ASSERT( pool.isWorkerBusy(address),
500 "FragmentScheduler::sendJobToWorker() - Worker "+toString(address)+" is not marked as busy.");
501 LOG(1, "INFO: Sending job " << job->getId() << " to worker " << address << ".");
502 sendJobOp.setJob(job);
503 sendJobOp(address.host, address.service);
504}
505
506///** Helper function to shutdown a single worker.
507// *
508// * We send NoJob to indicate shutdown
509// *
510// * @param address of worker to shutdown
511// */
512//void FragmentScheduler::shutdownWorker(const WorkerAddress &address)
513//{
514// sendJobToWorker(address, NoJob);
515//}
516//
517///** Sends shutdown to all current workers in the pool.
518// *
519// */
520//void FragmentScheduler::removeAllWorkers()
521//{
522// // give all workers shutdown signal
523// while (pool.presentIdleWorkers()) {
524// const WorkerAddress address = pool.getNextIdleWorker();
525// shutdownWorker(address);
526// }
527//}
Note: See TracBrowser for help on using the repository browser.