| 1 | /*
 | 
|---|
| 2 |  * Project: MoleCuilder
 | 
|---|
| 3 |  * Description: creates and alters molecular systems
 | 
|---|
| 4 |  * Copyright (C)  2011 University of Bonn. All rights reserved.
 | 
|---|
| 5 |  * Please see the LICENSE file or "Copyright notice" in builder.cpp for details.
 | 
|---|
| 6 |  */
 | 
|---|
| 7 | 
 | 
|---|
| 8 | /*
 | 
|---|
| 9 |  * \file FragmentScheduler.cpp
 | 
|---|
| 10 |  *
 | 
|---|
| 11 |  * This file strongly follows the Serialization example from the boost::asio
 | 
|---|
| 12 |  * library (see server.cpp)
 | 
|---|
| 13 |  *
 | 
|---|
| 14 |  *  Created on: Oct 19, 2011
 | 
|---|
| 15 |  *      Author: heber
 | 
|---|
| 16 |  */
 | 
|---|
| 17 | 
 | 
|---|
| 18 | // include config.h
 | 
|---|
| 19 | #ifdef HAVE_CONFIG_H
 | 
|---|
| 20 | #include <config.h>
 | 
|---|
| 21 | #endif
 | 
|---|
| 22 | 
 | 
|---|
| 23 | // boost asio needs specific operator new
 | 
|---|
| 24 | #include <boost/asio.hpp>
 | 
|---|
| 25 | 
 | 
|---|
| 26 | #include "CodePatterns/MemDebug.hpp"
 | 
|---|
| 27 | 
 | 
|---|
| 28 | #include <boost/bind.hpp>
 | 
|---|
| 29 | #include <boost/lexical_cast.hpp>
 | 
|---|
| 30 | #include <iostream>
 | 
|---|
| 31 | #include <vector>
 | 
|---|
| 32 | #include "Connection.hpp" // Must come before boost/serialization headers.
 | 
|---|
| 33 | #include <boost/serialization/vector.hpp>
 | 
|---|
| 34 | #include "CodePatterns/Info.hpp"
 | 
|---|
| 35 | #include "CodePatterns/Log.hpp"
 | 
|---|
| 36 | #include "CodePatterns/Observer/Notification.hpp"
 | 
|---|
| 37 | #include "ControllerChoices.hpp"
 | 
|---|
| 38 | #include "Operations/Workers/EnrollInPoolOperation.hpp"
 | 
|---|
| 39 | #include "Jobs/MPQCCommandJob.hpp"
 | 
|---|
| 40 | #include "Jobs/SystemCommandJob.hpp"
 | 
|---|
| 41 | #include "JobId.hpp"
 | 
|---|
| 42 | 
 | 
|---|
| 43 | #include "FragmentScheduler.hpp"
 | 
|---|
| 44 | 
 | 
|---|
| 45 | /** Helper function to enforce binding of FragmentWorker to possible derived
 | 
|---|
| 46 |  * FragmentJob classes.
 | 
|---|
| 47 |  */
 | 
|---|
| 48 | void dummyInit() {
 | 
|---|
| 49 |   SystemCommandJob("/bin/false", "something", JobId::IllegalJob);
 | 
|---|
| 50 |   MPQCCommandJob("nofile", JobId::IllegalJob);
 | 
|---|
| 51 | }
 | 
|---|
| 52 | 
 | 
|---|
| 53 | /** Constructor of class FragmentScheduler.
 | 
|---|
| 54 |  *
 | 
|---|
| 55 |  * We setup both acceptors to accept connections from workers and Controller.
 | 
|---|
| 56 |  *
 | 
|---|
| 57 |  * \param io_service io_service of the asynchronous communications
 | 
|---|
| 58 |  * \param workerport port to listen for worker connections
 | 
|---|
| 59 |  * \param controllerport port to listen for controller connections.
 | 
|---|
| 60 |  */
 | 
|---|
| 61 | FragmentScheduler::FragmentScheduler(boost::asio::io_service& _io_service, unsigned short workerport, unsigned short controllerport) :
 | 
|---|
| 62 |     Observer("FragmentScheduler"),
 | 
|---|
| 63 |     io_service(_io_service),
 | 
|---|
| 64 |     WorkerListener(_io_service, workerport, JobsQueue, pool,
 | 
|---|
| 65 |         boost::bind(&FragmentScheduler::sendJobToWorker, boost::ref(*this), _1, _2)),
 | 
|---|
| 66 |     ControllerListener(_io_service, controllerport, JobsQueue,
 | 
|---|
| 67 |         boost::bind(&FragmentScheduler::shutdown, boost::ref(*this))),
 | 
|---|
| 68 |     connection(_io_service),
 | 
|---|
| 69 |     sendJobOp(connection),
 | 
|---|
| 70 |     shutdownWorkerOp(connection)
 | 
|---|
| 71 | {
 | 
|---|
| 72 |   Info info(__FUNCTION__);
 | 
|---|
| 73 | 
 | 
|---|
| 74 |   // sign on to idle workers and present jobs
 | 
|---|
| 75 |   pool.signOn(this, WorkerPool::WorkerIdle);
 | 
|---|
| 76 |   JobsQueue.signOn(this, FragmentQueue::JobAdded);
 | 
|---|
| 77 | 
 | 
|---|
| 78 |   // listen for controller
 | 
|---|
| 79 |   ControllerListener.initiateSocket();
 | 
|---|
| 80 | 
 | 
|---|
| 81 |   // listen for workers
 | 
|---|
| 82 |   WorkerListener.initiateSocket();
 | 
|---|
| 83 | }
 | 
|---|
| 84 | 
 | 
|---|
| 85 | FragmentScheduler::~FragmentScheduler()
 | 
|---|
| 86 | {
 | 
|---|
| 87 |   // sign off
 | 
|---|
| 88 |   pool.signOff(this, WorkerPool::WorkerIdle);
 | 
|---|
| 89 |   JobsQueue.signOff(this, FragmentQueue::JobAdded);
 | 
|---|
| 90 | }
 | 
|---|
| 91 | 
 | 
|---|
| 92 | /** Handle a new worker connection.
 | 
|---|
| 93 |  *
 | 
|---|
| 94 |  * We store the given address in the pool.
 | 
|---|
| 95 |  *
 | 
|---|
| 96 |  * \param e error code if something went wrong
 | 
|---|
| 97 |  * \param conn reference with the connection
 | 
|---|
| 98 |  */
 | 
|---|
| 99 | void FragmentScheduler::WorkerListener_t::handle_Accept(const boost::system::error_code& e, connection_ptr conn)
 | 
|---|
| 100 | {
 | 
|---|
| 101 |   Info info(__FUNCTION__);
 | 
|---|
| 102 |   if (!e)
 | 
|---|
| 103 |   {
 | 
|---|
| 104 |     // Successfully accepted a new connection.
 | 
|---|
| 105 |     // read address
 | 
|---|
| 106 |     conn->async_read(address,
 | 
|---|
| 107 |       boost::bind(&FragmentScheduler::WorkerListener_t::handle_ReadAddress, this,
 | 
|---|
| 108 |       boost::asio::placeholders::error, conn));
 | 
|---|
| 109 |   }
 | 
|---|
| 110 |   else
 | 
|---|
| 111 |   {
 | 
|---|
| 112 |     // An error occurred. Log it and return. Since we are not starting a new
 | 
|---|
| 113 |     // accept operation the io_service will run out of work to do and the
 | 
|---|
| 114 |     // server will exit.
 | 
|---|
| 115 |     Exitflag = ErrorFlag;
 | 
|---|
| 116 |     ELOG(0, e.message());
 | 
|---|
| 117 |   }
 | 
|---|
| 118 | }
 | 
|---|
| 119 | 
 | 
|---|
| 120 | /** Handle having received Worker's address
 | 
|---|
| 121 |  *
 | 
|---|
| 122 |  * \param e error code if something went wrong
 | 
|---|
| 123 |  * \param conn reference with the connection
 | 
|---|
| 124 |  */
 | 
|---|
| 125 | void FragmentScheduler::WorkerListener_t::handle_ReadAddress(const boost::system::error_code& e, connection_ptr conn)
 | 
|---|
| 126 | {
 | 
|---|
| 127 |   Info info(__FUNCTION__);
 | 
|---|
| 128 |   if (!e)
 | 
|---|
| 129 |   {
 | 
|---|
| 130 |     // Successfully accepted a new connection.
 | 
|---|
| 131 |     // read address
 | 
|---|
| 132 |     conn->async_read(choice,
 | 
|---|
| 133 |       boost::bind(&FragmentScheduler::WorkerListener_t::handle_ReadChoice, this,
 | 
|---|
| 134 |       boost::asio::placeholders::error, conn));
 | 
|---|
| 135 |   }
 | 
|---|
| 136 |   else
 | 
|---|
| 137 |   {
 | 
|---|
| 138 |     // An error occurred. Log it and return. Since we are not starting a new
 | 
|---|
| 139 |     // accept operation the io_service will run out of work to do and the
 | 
|---|
| 140 |     // server will exit.
 | 
|---|
| 141 |     Exitflag = ErrorFlag;
 | 
|---|
| 142 |     ELOG(0, e.message());
 | 
|---|
| 143 |   }
 | 
|---|
| 144 | }
 | 
|---|
| 145 | 
 | 
|---|
| 146 | /** Controller callback function to read the choice for next operation.
 | 
|---|
| 147 |  *
 | 
|---|
| 148 |  * \param e error code if something went wrong
 | 
|---|
| 149 |  * \param conn reference with the connection
 | 
|---|
| 150 |  */
 | 
|---|
| 151 | void FragmentScheduler::WorkerListener_t::handle_ReadChoice(const boost::system::error_code& e, connection_ptr conn)
 | 
|---|
| 152 | {
 | 
|---|
| 153 |   Info info(__FUNCTION__);
 | 
|---|
| 154 |   if (!e)
 | 
|---|
| 155 |   {
 | 
|---|
| 156 |     LOG(1, "INFO: Received request for operation " << choice << ".");
 | 
|---|
| 157 |     // switch over the desired choice read previously
 | 
|---|
| 158 |     switch(choice) {
 | 
|---|
| 159 |       case NoWorkerOperation:
 | 
|---|
| 160 |       {
 | 
|---|
| 161 |         ELOG(1, "WorkerListener_t::handle_ReadChoice() - called with NoOperation.");
 | 
|---|
| 162 |         break;
 | 
|---|
| 163 |       }
 | 
|---|
| 164 |       case EnrollInPool:
 | 
|---|
| 165 |       {
 | 
|---|
| 166 |         if (pool.presentInPool(address)) {
 | 
|---|
| 167 |           ELOG(1, "INFO: worker "+toString(address)+" is already contained in pool.");
 | 
|---|
| 168 |           enum EnrollInPoolOperation::EnrollFlag flag = EnrollInPoolOperation::Fail;
 | 
|---|
| 169 |           conn->async_write(flag,
 | 
|---|
| 170 |             boost::bind(&FragmentScheduler::WorkerListener_t::handle_enrolled, this,
 | 
|---|
| 171 |             boost::asio::placeholders::error, conn));
 | 
|---|
| 172 |         } else {
 | 
|---|
| 173 |           // insert as its new worker
 | 
|---|
| 174 |           LOG(1, "INFO: Adding " << address << " to pool ...");
 | 
|---|
| 175 |           pool.addWorker(address);
 | 
|---|
| 176 |           enum EnrollInPoolOperation::EnrollFlag flag = EnrollInPoolOperation::Success;
 | 
|---|
| 177 |           conn->async_write(flag,
 | 
|---|
| 178 |             boost::bind(&FragmentScheduler::WorkerListener_t::handle_enrolled, this,
 | 
|---|
| 179 |             boost::asio::placeholders::error, conn));
 | 
|---|
| 180 |         break;
 | 
|---|
| 181 |       }
 | 
|---|
| 182 |       case SendResult:
 | 
|---|
| 183 |       {
 | 
|---|
| 184 |         if (pool.presentInPool(address)) {
 | 
|---|
| 185 |           // check whether its priority is busy_priority
 | 
|---|
| 186 |           if (pool.isWorkerBusy(address)) {
 | 
|---|
| 187 |             conn->async_read(result,
 | 
|---|
| 188 |               boost::bind(&FragmentScheduler::WorkerListener_t::handle_ReceiveResultFromWorker, this,
 | 
|---|
| 189 |               boost::asio::placeholders::error, conn));
 | 
|---|
| 190 |           } else {
 | 
|---|
| 191 |             ELOG(1, "Worker " << address << " trying to send result who is not marked as busy.");
 | 
|---|
| 192 |             conn->async_read(result,
 | 
|---|
| 193 |               boost::bind(&FragmentScheduler::WorkerListener_t::handle_RejectResultFromWorker, this,
 | 
|---|
| 194 |               boost::asio::placeholders::error, conn));
 | 
|---|
| 195 |           }
 | 
|---|
| 196 |         } else {
 | 
|---|
| 197 |           ELOG(1, "Worker " << address << " trying to send result who is not in pool.");
 | 
|---|
| 198 |           conn->async_read(result,
 | 
|---|
| 199 |             boost::bind(&FragmentScheduler::WorkerListener_t::handle_RejectResultFromWorker, this,
 | 
|---|
| 200 |             boost::asio::placeholders::error, conn));
 | 
|---|
| 201 |         }
 | 
|---|
| 202 |         break;
 | 
|---|
| 203 |       }
 | 
|---|
| 204 |       case RemoveFromPool:
 | 
|---|
| 205 |       {
 | 
|---|
| 206 |         if (pool.presentInPool(address)) {
 | 
|---|
| 207 |           // removing present worker
 | 
|---|
| 208 |           pool.removeWorker(address);
 | 
|---|
| 209 |         } else {
 | 
|---|
| 210 |           ELOG(1, "Shutting down Worker " << address << " not contained in pool.");
 | 
|---|
| 211 |         }
 | 
|---|
| 212 |         break;
 | 
|---|
| 213 |       }
 | 
|---|
| 214 |       default:
 | 
|---|
| 215 |         Exitflag = ErrorFlag;
 | 
|---|
| 216 |         ELOG(1, "WorkerListener_t::handle_ReadChoice() - called with no valid choice.");
 | 
|---|
| 217 |         break;
 | 
|---|
| 218 |       }
 | 
|---|
| 219 |     }
 | 
|---|
| 220 |     // restore NoOperation choice such that choice is not read twice
 | 
|---|
| 221 |     choice = NoWorkerOperation;
 | 
|---|
| 222 | 
 | 
|---|
| 223 |     initiateSocket();
 | 
|---|
| 224 |   }
 | 
|---|
| 225 |   else
 | 
|---|
| 226 |   {
 | 
|---|
| 227 |     // An error occurred. Log it and return. Since we are not starting a new
 | 
|---|
| 228 |     // accept operation the io_service will run out of work to do and the
 | 
|---|
| 229 |     // server will exit.
 | 
|---|
| 230 |     Exitflag = ErrorFlag;
 | 
|---|
| 231 |     ELOG(0, e.message());
 | 
|---|
| 232 |   }
 | 
|---|
| 233 | }
 | 
|---|
| 234 | 
 | 
|---|
| 235 | 
 | 
|---|
| 236 | /** Callback function when new worker has enrolled.
 | 
|---|
| 237 |  *
 | 
|---|
| 238 |  * \param e error code if something went wrong
 | 
|---|
| 239 |  * \param conn reference with the connection
 | 
|---|
| 240 |  */
 | 
|---|
| 241 | void FragmentScheduler::WorkerListener_t::handle_enrolled(const boost::system::error_code& e, connection_ptr conn)
 | 
|---|
| 242 | {
 | 
|---|
| 243 |   Info info(__FUNCTION__);
 | 
|---|
| 244 |   if (e)
 | 
|---|
| 245 |   {
 | 
|---|
| 246 |     // An error occurred. Log it and return. Since we are not starting a new
 | 
|---|
| 247 |     // accept operation the io_service will run out of work to do and the
 | 
|---|
| 248 |     // server will exit.
 | 
|---|
| 249 |     Exitflag = ErrorFlag;
 | 
|---|
| 250 |     ELOG(0, e.message());
 | 
|---|
| 251 |   }
 | 
|---|
| 252 | }
 | 
|---|
| 253 | 
 | 
|---|
| 254 | /** Callback function when result has been received.
 | 
|---|
| 255 |  *
 | 
|---|
| 256 |  * \param e error code if something went wrong
 | 
|---|
| 257 |  * \param conn reference with the connection
 | 
|---|
| 258 |  */
 | 
|---|
| 259 | void FragmentScheduler::WorkerListener_t::handle_ReceiveResultFromWorker(const boost::system::error_code& e, connection_ptr conn)
 | 
|---|
| 260 | {
 | 
|---|
| 261 |   Info info(__FUNCTION__);
 | 
|---|
| 262 |   LOG(1, "INFO: Received result for job #" << result->getId() << " ...");
 | 
|---|
| 263 | 
 | 
|---|
| 264 |   // and push into queue
 | 
|---|
| 265 |   ASSERT(result->getId() != (JobId_t)JobId::NoJob,
 | 
|---|
| 266 |       "WorkerListener_t::handle_ReceiveResultFromWorker() - result received has NoJob id.");
 | 
|---|
| 267 |   ASSERT(result->getId() != (JobId_t)JobId::IllegalJob,
 | 
|---|
| 268 |       "WorkerListener_t::handle_ReceiveResultFromWorker() - result received has IllegalJob id.");
 | 
|---|
| 269 |   // place id into expected
 | 
|---|
| 270 |   if ((result->getId() != (JobId_t)JobId::NoJob) && (result->getId() != (JobId_t)JobId::IllegalJob))
 | 
|---|
| 271 |     JobsQueue.pushResult(result);
 | 
|---|
| 272 | 
 | 
|---|
| 273 |   // mark as idle
 | 
|---|
| 274 |   pool.unmarkWorkerBusy(address);
 | 
|---|
| 275 | 
 | 
|---|
| 276 |   // erase result
 | 
|---|
| 277 |   result.reset();
 | 
|---|
| 278 |   LOG(1, "INFO: JobsQueue has " << JobsQueue.getDoneJobs() << " results.");
 | 
|---|
| 279 | }
 | 
|---|
| 280 | 
 | 
|---|
| 281 | /** Callback function when result has been received.
 | 
|---|
| 282 |  *
 | 
|---|
| 283 |  * \param e error code if something went wrong
 | 
|---|
| 284 |  * \param conn reference with the connection
 | 
|---|
| 285 |  */
 | 
|---|
| 286 | void FragmentScheduler::WorkerListener_t::handle_RejectResultFromWorker(const boost::system::error_code& e, connection_ptr conn)
 | 
|---|
| 287 | {
 | 
|---|
| 288 |   Info info(__FUNCTION__);
 | 
|---|
| 289 |   // nothing to do
 | 
|---|
| 290 |   LOG(1, "INFO: Rejecting result for job #" << result->getId() << ", placing back into queue.");
 | 
|---|
| 291 | 
 | 
|---|
| 292 |   JobsQueue.resubmitJob(result->getId());
 | 
|---|
| 293 | 
 | 
|---|
| 294 |   LOG(1, "INFO: JobsQueue has " << JobsQueue.getDoneJobs() << " results.");
 | 
|---|
| 295 | }
 | 
|---|
| 296 | 
 | 
|---|
| 297 | 
 | 
|---|
| 298 | /** Handle a new controller connection.
 | 
|---|
| 299 |  *
 | 
|---|
| 300 |  * \sa handle_ReceiveJobs()
 | 
|---|
| 301 |  * \sa handle_CheckResultState()
 | 
|---|
| 302 |  * \sa handle_SendResults()
 | 
|---|
| 303 |  *
 | 
|---|
| 304 |  * \param e error code if something went wrong
 | 
|---|
| 305 |  * \param conn reference with the connection
 | 
|---|
| 306 |  */
 | 
|---|
| 307 | void FragmentScheduler::ControllerListener_t::handle_Accept(const boost::system::error_code& e, connection_ptr conn)
 | 
|---|
| 308 | {
 | 
|---|
| 309 |   Info info(__FUNCTION__);
 | 
|---|
| 310 |   if (!e)
 | 
|---|
| 311 |   {
 | 
|---|
| 312 |     conn->async_read(choice,
 | 
|---|
| 313 |       boost::bind(&FragmentScheduler::ControllerListener_t::handle_ReadChoice, this,
 | 
|---|
| 314 |       boost::asio::placeholders::error, conn));
 | 
|---|
| 315 |   }
 | 
|---|
| 316 |   else
 | 
|---|
| 317 |   {
 | 
|---|
| 318 |     // An error occurred. Log it and return. Since we are not starting a new
 | 
|---|
| 319 |     // accept operation the io_service will run out of work to do and the
 | 
|---|
| 320 |     // server will exit.
 | 
|---|
| 321 |     Exitflag = ErrorFlag;
 | 
|---|
| 322 |     ELOG(0, e.message());
 | 
|---|
| 323 |   }
 | 
|---|
| 324 | }
 | 
|---|
| 325 | 
 | 
|---|
| 326 | /** Controller callback function to read the choice for next operation.
 | 
|---|
| 327 |  *
 | 
|---|
| 328 |  * \param e error code if something went wrong
 | 
|---|
| 329 |  * \param conn reference with the connection
 | 
|---|
| 330 |  */
 | 
|---|
| 331 | void FragmentScheduler::ControllerListener_t::handle_ReadChoice(const boost::system::error_code& e, connection_ptr conn)
 | 
|---|
| 332 | {
 | 
|---|
| 333 |   Info info(__FUNCTION__);
 | 
|---|
| 334 |   if (!e)
 | 
|---|
| 335 |   {
 | 
|---|
| 336 |     bool LaunchNewAcceptor = true;
 | 
|---|
| 337 |     LOG(1, "INFO: Received request for operation " << choice << ".");
 | 
|---|
| 338 |     // switch over the desired choice read previously
 | 
|---|
| 339 |     switch(choice) {
 | 
|---|
| 340 |     case NoControllerOperation:
 | 
|---|
| 341 |     {
 | 
|---|
| 342 |       ELOG(1, "ControllerListener_t::handle_ReadChoice() - called with NoOperation.");
 | 
|---|
| 343 |       break;
 | 
|---|
| 344 |     }
 | 
|---|
| 345 |     case GetNextJobId:
 | 
|---|
| 346 |     {
 | 
|---|
| 347 |       const JobId_t nextid = globalId.getNextId();
 | 
|---|
| 348 |       LOG(1, "INFO: Sending next available job id " << nextid << " to controller ...");
 | 
|---|
| 349 |       conn->async_write(nextid,
 | 
|---|
| 350 |         boost::bind(&FragmentScheduler::ControllerListener_t::handle_GetNextJobIdState, this,
 | 
|---|
| 351 |         boost::asio::placeholders::error, conn));
 | 
|---|
| 352 |       break;
 | 
|---|
| 353 |     }
 | 
|---|
| 354 |     case SendJobs:
 | 
|---|
| 355 |     {
 | 
|---|
| 356 |       // The connection::async_write() function will automatically
 | 
|---|
| 357 |       // serialize the data structure for us.
 | 
|---|
| 358 |       LOG(1, "INFO: Receiving bunch of jobs from a controller ...");
 | 
|---|
| 359 |       conn->async_read(jobs,
 | 
|---|
| 360 |         boost::bind(&FragmentScheduler::ControllerListener_t::handle_ReceiveJobs, this,
 | 
|---|
| 361 |         boost::asio::placeholders::error, conn));
 | 
|---|
| 362 |       break;
 | 
|---|
| 363 |     }
 | 
|---|
| 364 |     case CheckState:
 | 
|---|
| 365 |     {
 | 
|---|
| 366 |       // first update number
 | 
|---|
| 367 |       jobInfo[0] = JobsQueue.getPresentJobs();
 | 
|---|
| 368 |       jobInfo[1] = JobsQueue.getDoneJobs();
 | 
|---|
| 369 |       // now we accept connections to check for state of calculations
 | 
|---|
| 370 |       LOG(1, "INFO: Sending state that "+toString(jobInfo[0])+" jobs are present and "+toString(jobInfo[1])+" jobs are done to controller ...");
 | 
|---|
| 371 |       conn->async_write(jobInfo,
 | 
|---|
| 372 |         boost::bind(&FragmentScheduler::ControllerListener_t::handle_CheckResultState, this,
 | 
|---|
| 373 |         boost::asio::placeholders::error, conn));
 | 
|---|
| 374 |       break;
 | 
|---|
| 375 |     }
 | 
|---|
| 376 |     case ReceiveResults:
 | 
|---|
| 377 |     {
 | 
|---|
| 378 |       const std::vector<FragmentResult::ptr> results = JobsQueue.getAllResults();
 | 
|---|
| 379 |       // ... or we give the results
 | 
|---|
| 380 |       LOG(1, "INFO: Sending "+toString(results.size())+" results to controller ...");
 | 
|---|
| 381 |       conn->async_write(results,
 | 
|---|
| 382 |         boost::bind(&FragmentScheduler::ControllerListener_t::handle_SendResults, this,
 | 
|---|
| 383 |         boost::asio::placeholders::error, conn));
 | 
|---|
| 384 |       break;
 | 
|---|
| 385 |     }
 | 
|---|
| 386 |     case ShutdownControllerSocket:
 | 
|---|
| 387 |     {
 | 
|---|
| 388 |       LOG(1, "INFO: Received shutdown from controller ...");
 | 
|---|
| 389 |       // only allow for shutdown when there are no more jobs in the queue
 | 
|---|
| 390 |       if (!JobsQueue.isJobPresent()) {
 | 
|---|
| 391 |         LaunchNewAcceptor = false;
 | 
|---|
| 392 |       } else {
 | 
|---|
| 393 |         ELOG(2, "There are still jobs waiting in the queue.");
 | 
|---|
| 394 |       }
 | 
|---|
| 395 |       break;
 | 
|---|
| 396 |     }
 | 
|---|
| 397 |     default:
 | 
|---|
| 398 |       Exitflag = ErrorFlag;
 | 
|---|
| 399 |       ELOG(1, "ControllerListener_t::handle_ReadChoice() - called with no valid choice.");
 | 
|---|
| 400 |       break;
 | 
|---|
| 401 |     }
 | 
|---|
| 402 |     // restore NoControllerOperation choice such that choice is not read twice
 | 
|---|
| 403 |     choice = NoControllerOperation;
 | 
|---|
| 404 | 
 | 
|---|
| 405 |     if (LaunchNewAcceptor) {
 | 
|---|
| 406 |       LOG(1, "Launching new acceptor on socket.");
 | 
|---|
| 407 |       // Start an accept operation for a new Connection.
 | 
|---|
| 408 |       initiateSocket();
 | 
|---|
| 409 |     } else {
 | 
|---|
| 410 |       // we shutdown? Hence, also shutdown controller
 | 
|---|
| 411 |       shutdownAllSockets();
 | 
|---|
| 412 |     }
 | 
|---|
| 413 |   }
 | 
|---|
| 414 |   else
 | 
|---|
| 415 |   {
 | 
|---|
| 416 |     // An error occurred. Log it and return. Since we are not starting a new
 | 
|---|
| 417 |     // accept operation the io_service will run out of work to do and the
 | 
|---|
| 418 |     // server will exit.
 | 
|---|
| 419 |     Exitflag = ErrorFlag;
 | 
|---|
| 420 |     ELOG(0, e.message());
 | 
|---|
| 421 |   }
 | 
|---|
| 422 | }
 | 
|---|
| 423 | 
 | 
|---|
| 424 | /** Controller callback function when job has been sent.
 | 
|---|
| 425 |  *
 | 
|---|
| 426 |  * We check here whether the worker socket is accepting, if there
 | 
|---|
| 427 |  * have been no jobs we re-activate it, as it is shut down after
 | 
|---|
| 428 |  * last job.
 | 
|---|
| 429 |  *
 | 
|---|
| 430 |  * \param e error code if something went wrong
 | 
|---|
| 431 |  * \param conn reference with the connection
 | 
|---|
| 432 |  */
 | 
|---|
| 433 | void FragmentScheduler::ControllerListener_t::handle_ReceiveJobs(const boost::system::error_code& e, connection_ptr conn)
 | 
|---|
| 434 | {
 | 
|---|
| 435 |   Info info(__FUNCTION__);
 | 
|---|
| 436 |   // jobs are received, hence place in JobsQueue
 | 
|---|
| 437 |   if (!jobs.empty()) {
 | 
|---|
| 438 |     LOG(1, "INFO: Pushing " << jobs.size() << " jobs into queue.");
 | 
|---|
| 439 |     JobsQueue.pushJobs(jobs);
 | 
|---|
| 440 |   }
 | 
|---|
| 441 |   jobs.clear();
 | 
|---|
| 442 | }
 | 
|---|
| 443 | 
 | 
|---|
| 444 | /** Controller callback function when checking on state of results.
 | 
|---|
| 445 |  *
 | 
|---|
| 446 |  * \param e error code if something went wrong
 | 
|---|
| 447 |  * \param conn reference with the connection
 | 
|---|
| 448 |  */
 | 
|---|
| 449 | void FragmentScheduler::ControllerListener_t::handle_CheckResultState(const boost::system::error_code& e, connection_ptr conn)
 | 
|---|
| 450 | {
 | 
|---|
| 451 |   Info info(__FUNCTION__);
 | 
|---|
| 452 |   // do nothing
 | 
|---|
| 453 |   LOG(1, "INFO: Sent that " << jobInfo << " jobs are (scheduled, done).");
 | 
|---|
| 454 | }
 | 
|---|
| 455 | 
 | 
|---|
| 456 | /** Controller callback function when checking on state of results.
 | 
|---|
| 457 |  *
 | 
|---|
| 458 |  * \param e error code if something went wrong
 | 
|---|
| 459 |  * \param conn reference with the connection
 | 
|---|
| 460 |  */
 | 
|---|
| 461 | void FragmentScheduler::ControllerListener_t::handle_GetNextJobIdState(const boost::system::error_code& e, connection_ptr conn)
 | 
|---|
| 462 | {
 | 
|---|
| 463 |   Info info(__FUNCTION__);
 | 
|---|
| 464 |   // do nothing
 | 
|---|
| 465 |   LOG(1, "INFO: Sent next available job id.");
 | 
|---|
| 466 | }
 | 
|---|
| 467 | 
 | 
|---|
| 468 | /** Controller callback function when result has been received.
 | 
|---|
| 469 |  *
 | 
|---|
| 470 |  * \param e error code if something went wrong
 | 
|---|
| 471 |  * \param conn reference with the connection
 | 
|---|
| 472 |  */
 | 
|---|
| 473 | void FragmentScheduler::ControllerListener_t::handle_SendResults(const boost::system::error_code& e, connection_ptr conn)
 | 
|---|
| 474 | {
 | 
|---|
| 475 |   Info info(__FUNCTION__);
 | 
|---|
| 476 |   // do nothing
 | 
|---|
| 477 |   LOG(1, "INFO: Results have been sent.");
 | 
|---|
| 478 | }
 | 
|---|
| 479 | 
 | 
|---|
| 480 | 
 | 
|---|
| 481 | /** Helper function to send a job to worker.
 | 
|---|
| 482 |  *
 | 
|---|
| 483 |  * Note that we do not set the worker as busy. We simply send it the job.
 | 
|---|
| 484 |  *
 | 
|---|
| 485 |  * @param address address of worker
 | 
|---|
| 486 |  * @param job job to send
 | 
|---|
| 487 |  */
 | 
|---|
| 488 | void FragmentScheduler::sendJobToWorker(const WorkerAddress &address, FragmentJob::ptr &job)
 | 
|---|
| 489 | {
 | 
|---|
| 490 |   ASSERT( pool.isWorkerBusy(address),
 | 
|---|
| 491 |       "FragmentScheduler::sendJobToWorker() - Worker "+toString(address)+" is not marked as busy.");
 | 
|---|
| 492 |   LOG(1, "INFO: Sending job " << job->getId() << " to worker " << address << ".");
 | 
|---|
| 493 |   sendJobOp.setJob(job);
 | 
|---|
| 494 |   sendJobOp(address.host, address.service);
 | 
|---|
| 495 | }
 | 
|---|
| 496 | 
 | 
|---|
| 497 | /** Helper function to shutdown a single worker.
 | 
|---|
| 498 |  *
 | 
|---|
| 499 |  * We send NoJob to indicate shutdown
 | 
|---|
| 500 |  *
 | 
|---|
| 501 |  * @param address of worker to shutdown
 | 
|---|
| 502 |  */
 | 
|---|
| 503 | void FragmentScheduler::shutdownWorker(const WorkerAddress &address)
 | 
|---|
| 504 | {
 | 
|---|
| 505 |   ASSERT( !pool.isWorkerBusy(address),
 | 
|---|
| 506 |       "FragmentScheduler::sendJobToWorker() - Worker "+toString(address)+" is already busy.");
 | 
|---|
| 507 |   LOG(2, "INFO: Shutting down worker " << address << "...");
 | 
|---|
| 508 |   shutdownWorkerOp(address.host, address.service);
 | 
|---|
| 509 | }
 | 
|---|
| 510 | 
 | 
|---|
| 511 | /** Sends shutdown to all current workers in the pool.
 | 
|---|
| 512 |  *
 | 
|---|
| 513 |  */
 | 
|---|
| 514 | void FragmentScheduler::removeAllWorkers()
 | 
|---|
| 515 | {
 | 
|---|
| 516 |   LOG(2, "INFO: Shutting down workers ...");
 | 
|---|
| 517 | 
 | 
|---|
| 518 |   // \todo We have to wait here until all workers are done
 | 
|---|
| 519 |   // first, sign off such that no new jobs are given to workers
 | 
|---|
| 520 |   pool.signOff(this, WorkerPool::WorkerIdle);
 | 
|---|
| 521 |   while (pool.hasBusyWorkers())
 | 
|---|
| 522 |     ;
 | 
|---|
| 523 | 
 | 
|---|
| 524 |   // give all workers shutdown signal
 | 
|---|
| 525 |   for (WorkerPool::Idle_Queue_t::const_iterator iter = pool.begin_idle(); iter != pool.end_idle(); ++iter) {
 | 
|---|
| 526 |     const WorkerAddress address = iter->second;
 | 
|---|
| 527 |     shutdownWorker(address);
 | 
|---|
| 528 |   }
 | 
|---|
| 529 |   pool.removeAllWorkers();
 | 
|---|
| 530 | }
 | 
|---|
| 531 | 
 | 
|---|
| 532 | /** Helper function to shutdown the server properly.
 | 
|---|
| 533 |  *
 | 
|---|
| 534 |  * \todo one should idle here until all workers have returned from
 | 
|---|
| 535 |  * calculating stuff (or workers need to still listen while the are
 | 
|---|
| 536 |  * calculating which is probably better).
 | 
|---|
| 537 |  *
 | 
|---|
| 538 |  */
 | 
|---|
| 539 | void FragmentScheduler::shutdown()
 | 
|---|
| 540 | {
 | 
|---|
| 541 |   LOG(1, "INFO: Shutting all down ...");
 | 
|---|
| 542 | 
 | 
|---|
| 543 |   /// Remove all workers
 | 
|---|
| 544 |   removeAllWorkers();
 | 
|---|
| 545 | 
 | 
|---|
| 546 |   /// close the worker listener's socket
 | 
|---|
| 547 |   WorkerListener.closeSocket();
 | 
|---|
| 548 | 
 | 
|---|
| 549 |   /// close the controller listener's socket
 | 
|---|
| 550 |   ControllerListener.closeSocket();
 | 
|---|
| 551 | 
 | 
|---|
| 552 |   /// finally, stop the io_service
 | 
|---|
| 553 |   io_service.stop();
 | 
|---|
| 554 | }
 | 
|---|
| 555 | 
 | 
|---|
| 556 | /** Internal helper to send the next available job to the next idle worker.
 | 
|---|
| 557 |  *
 | 
|---|
| 558 |  */
 | 
|---|
| 559 | void FragmentScheduler::sendAvailableJobToNextIdleWorker()
 | 
|---|
| 560 | {
 | 
|---|
| 561 |   const WorkerAddress address = pool.getNextIdleWorker();
 | 
|---|
| 562 |   FragmentJob::ptr job = JobsQueue.popJob();
 | 
|---|
| 563 |   sendJobToWorker(address, job);
 | 
|---|
| 564 | }
 | 
|---|
| 565 | 
 | 
|---|
| 566 | void FragmentScheduler::update(Observable *publisher)
 | 
|---|
| 567 | {
 | 
|---|
| 568 |   ASSERT(0, "FragmentScheduler::update() - we are not signed on for global updates.");
 | 
|---|
| 569 | }
 | 
|---|
| 570 | 
 | 
|---|
| 571 | void FragmentScheduler::recieveNotification(Observable *publisher, Notification_ptr notification)
 | 
|---|
| 572 | {
 | 
|---|
| 573 |   // we have an idle worker
 | 
|---|
| 574 |   if ((publisher == &pool) && (notification->getChannelNo() == WorkerPool::WorkerIdle)) {
 | 
|---|
| 575 |     LOG(1, "INFO: We are notified of an idle worker.");
 | 
|---|
| 576 |     // are jobs available?
 | 
|---|
| 577 |     if (JobsQueue.isJobPresent()) {
 | 
|---|
| 578 |       sendAvailableJobToNextIdleWorker();
 | 
|---|
| 579 |     }
 | 
|---|
| 580 |   }
 | 
|---|
| 581 | 
 | 
|---|
| 582 |   // we have new jobs
 | 
|---|
| 583 |   if ((publisher == &JobsQueue) && (notification->getChannelNo() == FragmentQueue::JobAdded)) {
 | 
|---|
| 584 |     LOG(1, "INFO: We are notified of a new job.");
 | 
|---|
| 585 |     // check for idle workers
 | 
|---|
| 586 |     if (pool.presentIdleWorkers()) {
 | 
|---|
| 587 |       sendAvailableJobToNextIdleWorker();
 | 
|---|
| 588 |     }
 | 
|---|
| 589 |   }
 | 
|---|
| 590 | }
 | 
|---|
| 591 | 
 | 
|---|
| 592 | void FragmentScheduler::subjectKilled(Observable *publisher)
 | 
|---|
| 593 | {}
 | 
|---|