tgstation-server 6.14.0
The /tg/station 13 server suite
Loading...
Searching...
No Matches
WatchdogBase.cs
Go to the documentation of this file.
1using System;
4using System.Linq;
7
9
10using Prometheus;
11
12using Serilog.Context;
13
30
32{
36#pragma warning disable CA1506 // TODO: Decomplexify
38 {
41
43 public uint? ClientCount { get; private set; }
44
46 public DateTimeOffset? LaunchTime => GetActiveController()?.LaunchTime;
47
50 {
51 get => status;
52 protected set
53 {
55 status = value;
56 Logger.LogTrace("Status set from {oldStatus} to {status}", oldStatus, status);
57 }
58 }
59
62
64 public abstract bool AlphaIsActive { get; }
65
68
70 public DreamDaemonLaunchParameters? LastLaunchParameters { get; protected set; }
71
73 public Models.CompileJob? ActiveCompileJob => GetActiveController()?.CompileJob;
74
76 public abstract RebootState? RebootState { get; }
77
82
86 protected ILogger<WatchdogBase> Logger { get; }
87
91 protected IChatManager Chat { get; }
92
97
101 protected IDmbFactory DmbFactory { get; }
102
106 protected IAsyncDelayer AsyncDelayer { get; }
107
111 protected IIOManager GameIOManager { get; }
112
117
122
127
132
137
142
147
152
157
162
167
172
176 volatile TaskCompletionSource activeParametersUpdated;
177
182
187
192
197
202
207
227 protected WatchdogBase(
228 IChatManager chat,
229 ISessionControllerFactory sessionControllerFactory,
230 IDmbFactory dmbFactory,
233 IServerControl serverControl,
234 IAsyncDelayer asyncDelayer,
238 IMetricFactory metricFactory,
239 IIOManager gameIOManager,
242 Api.Models.Instance metadata,
243 bool autoStart)
244 {
245 Chat = chat ?? throw new ArgumentNullException(nameof(chat));
246 SessionControllerFactory = sessionControllerFactory ?? throw new ArgumentNullException(nameof(sessionControllerFactory));
247 DmbFactory = dmbFactory ?? throw new ArgumentNullException(nameof(dmbFactory));
250 AsyncDelayer = asyncDelayer ?? throw new ArgumentNullException(nameof(asyncDelayer));
254 ArgumentNullException.ThrowIfNull(metricFactory);
255 GameIOManager = gameIOManager ?? throw new ArgumentNullException(nameof(gameIOManager));
256 Logger = logger ?? throw new ArgumentNullException(nameof(logger));
260
261 ArgumentNullException.ThrowIfNull(serverControl);
262
263 watchdogStatusMetric = metricFactory.CreateGauge(
264 "tgs_watchdog_status",
265 $"TGS Watchdog status: {(int)WatchdogStatus.Offline} = Offline, {(int)WatchdogStatus.Online} = Online, {(int)WatchdogStatus.Restoring} = Restoring, {(int)WatchdogStatus.DelayedRestart} = Delayed Restart");
266 cpuUsageMetric = metricFactory.CreateGauge("tgs_game_cpu_usage", "Estimated total CPU usage time for the game process from 0-1");
267 ramUsageMetric = metricFactory.CreateGauge("tgs_game_ram_usage", "Total used bytes of private memory for the game process");
268
269 chat.RegisterCommandHandler(this);
270
272 releaseServers = false;
273 activeParametersUpdated = new TaskCompletionSource();
274
275 restartRegistration = serverControl.RegisterForRestart(this);
276 try
277 {
280 }
281 catch
282 {
283 restartRegistration.Dispose();
284 synchronizationSemaphore?.Dispose();
285 throw;
286 }
287
288 Logger.LogTrace("Created watchdog");
289 }
290
293 {
294 Logger.LogTrace("Disposing...");
295 synchronizationSemaphore.Dispose();
296 restartRegistration.Dispose();
297
300 monitorCts?.Dispose();
301
302 disposed = true;
303 }
304
307 {
309 {
313 if (!currentEngine.HasValue)
314 return false;
315
316 bool match = launchParameters.CanApplyWithoutReboot(currentLaunchParameters, currentEngine.Value);
317 if (match || Status == WatchdogStatus.Offline || Status == WatchdogStatus.DelayedRestart)
318 return false;
319
320 var oldTcs = Interlocked.Exchange(ref activeParametersUpdated, new TaskCompletionSource());
321 oldTcs.SetResult();
322 }
323
324 return true;
325 }
326
329 {
331 {
335 if (Status != WatchdogStatus.Online || activeServer == null)
336 return new MessageContent
337 {
338 Text = "TGS: Server offline!",
339 };
340
342
343 if (commandResult == null)
344 return new MessageContent
345 {
346 Text = "TGS: Bad topic exchange!",
347 };
348
349 if (commandResult == null)
350 return new MessageContent
351 {
352 Text = "TGS: Bad topic response!",
353 };
354
356 {
357 Text = commandResult.CommandResponse?.Text ?? commandResult.CommandResponseMessage,
358 Embed = commandResult.CommandResponse?.Embed,
359 };
360
361 if (commandResponse.Text == null && commandResponse.Embed == null)
362 {
363 commandResponse.Text = "TGS: Command processed but no DMAPI response returned!";
364 }
365
367
368 return commandResponse;
369 }
370 }
371
373 public async ValueTask Launch(CancellationToken cancellationToken)
374 {
375 if (Status != WatchdogStatus.Offline)
376 throw new JobException(ErrorCode.WatchdogRunning);
378 await LaunchNoLock(true, true, true, null, cancellationToken);
379 }
380
382 public virtual async ValueTask ResetRebootState(CancellationToken cancellationToken)
383 {
385 {
386 if (Status == WatchdogStatus.Offline)
387 return;
389 if (toClear != null)
390 await toClear.SetRebootState(Session.RebootState.Normal, cancellationToken);
391 }
392 }
393
395 public async ValueTask Restart(bool graceful, CancellationToken cancellationToken)
396 {
397 if (Status == WatchdogStatus.Offline)
398 throw new JobException(ErrorCode.WatchdogNotRunning);
399
400 Logger.LogTrace("Begin Restart. Graceful: {gracefulFlag}", graceful);
402 {
403 if (!graceful)
404 {
405 Chat.QueueWatchdogMessage("Manual restart triggered...");
407 await LaunchNoLock(true, false, true, null, cancellationToken);
408 return;
409 }
410
412 if (toReboot != null
413 && !await toReboot.SetRebootState(Session.RebootState.Restart, cancellationToken))
414 Logger.LogWarning("Unable to send reboot state change event!");
415 }
416 }
417
419 public async Task StartAsync(CancellationToken cancellationToken)
420 {
422 var reattaching = reattachInfo != null;
423 if (!autoStart && !reattaching)
424 return;
425
426 var job = Models.Job.Create(
428 ? JobCode.StartupWatchdogReattach
429 : JobCode.StartupWatchdogLaunch,
430 null,
431 metadata,
432 DreamDaemonRights.Shutdown);
434 job,
435 async (core, databaseContextFactory, paramJob, progressFunction, ct) =>
436 {
437 if (core?.Watchdog != this)
439
441 await LaunchNoLock(true, true, true, reattachInfo, ct);
442
444 },
446 }
447
449 public async Task StopAsync(CancellationToken cancellationToken) =>
451
458
461 {
463 {
465
466 if (Status != WatchdogStatus.Offline)
467 {
468 Logger.LogDebug("Waiting for server to gracefully shut down.");
470 }
471 else
472 Logger.LogTrace("Graceful shutdown requested but server is already offline.");
473
474 return;
475 }
476
477 releaseServers = true;
478 if (Status == WatchdogStatus.Online)
479 Chat.QueueWatchdogMessage("Detaching...");
480 else
481 Logger.LogTrace("Not sending detach chat message as status is: {status}", Status);
482 }
483
485 public abstract ValueTask InstanceRenamed(string newInstanceName, CancellationToken cancellationToken);
486
493
495 public async ValueTask<bool> Broadcast(string message, CancellationToken cancellationToken)
496 {
497 ArgumentNullException.ThrowIfNull(message);
498
500 if (activeServer == null)
501 {
502 Logger.LogInformation("Attempted broadcast failed, no active server!");
503 return false;
504 }
505
506 if (!activeServer.DMApiAvailable)
507 {
508 Logger.LogInformation("Attempted broadcast failed, no DMAPI!");
509 return false;
510 }
511
512 var minimumRequiredVersion = new Version(5, 7, 0);
513 if (activeServer.DMApiVersion < minimumRequiredVersion)
514 {
515 Logger.LogInformation(
516 "Attempted broadcast failed, insufficient interop version: {interopVersion}. Requires {minimumRequiredVersion}!",
517 activeServer.DMApiVersion,
519 return false;
520 }
521
522 Logger.LogInformation("Broadcasting: {message}", message);
523
524 var response = await activeServer.SendCommand(
527
528 return response != null && response.ErrorMessage == null;
529 }
530
532 public void RunMetricsScrape()
533 {
537 cpuUsageMetric.Set(controller?.MeasureProcessorTimeDelta() ?? 0);
538 }
539
542 {
544
545 // Method explicitly implemented to prevent accidental calls when this.eventConsumer should be used.
547
548 // Server may have ended
549 if (activeServer == null)
550 return;
551
553 var result = await activeServer.SendCommand(
556
558 }
559
562 => throw new NotSupportedException("Watchdogs do not support custom events!");
563
572
583 bool startMonitor,
584 bool announce,
585 bool announceFailure,
587 CancellationToken cancellationToken)
588 {
589 Logger.LogTrace("Begin LaunchImplNoLock");
590 if (startMonitor && Status != WatchdogStatus.Offline)
591 throw new JobException(ErrorCode.WatchdogRunning);
592
593 if (reattachInfo == null && !DmbFactory.DmbAvailable)
594 throw new JobException(ErrorCode.WatchdogCompileJobCorrupted);
595
596 // this is necessary, the monitor could be in it's sleep loop trying to restart, if so cancel THAT monitor and start our own with blackjack and hookers
597 var eventTask = ValueTask.CompletedTask;
598 if (announce)
599 {
601 reattachInfo == null
602 ? "Launching..."
603 : "Reattaching..."); // simple announce
604 if (reattachInfo == null)
605 eventTask = HandleEventImpl(EventType.WatchdogLaunch, Enumerable.Empty<string>(), false, cancellationToken);
606 }
607
608 // since neither server is running, this is safe to do
611 ClientCount = null;
612
613 try
614 {
616 }
618 {
619 Logger.LogTrace(ex, "Controller initialization cancelled!");
620 throw;
621 }
622 catch (Exception e)
623 {
624 Logger.LogWarning(e, "Failed to start watchdog!");
627 {
629 if (announceFailure)
630 Chat.QueueWatchdogMessage("Startup failed!");
631 }
632
634 throw;
635 }
636 finally
637 {
638 // finish the chat task that's in flight
639 try
640 {
642 }
644 {
645 Logger.LogTrace(ex, "Announcement task canceled!");
646 }
647 }
648
649 Logger.LogInformation("Controller(s) initialized successfully");
650
651 if (startMonitor)
652 {
655 }
656 }
657
663 {
664 Logger.LogTrace("StopMonitor");
665 if (monitorTask == null)
666 return false;
667 var wasRunning = !monitorTask.IsCompleted;
668 monitorCts!.Cancel();
670 Logger.LogTrace("Stopped Monitor");
671 monitorCts.Dispose();
672 monitorTask = null;
673 monitorCts = null;
674 return wasRunning;
675 }
676
685 {
686 var launchResult = await controller.LaunchResult.WaitAsync(cancellationToken);
687
688 // Dead sessions won't trigger this
689 if (launchResult.ExitCode.HasValue) // you killed us ray...
690 throw new JobException(
691 ErrorCode.WatchdogStartupFailed,
692 new JobException($"{serverName} failed to start: {launchResult}"));
693 if (!launchResult.StartupTime.HasValue)
694 throw new JobException(
695 ErrorCode.WatchdogStartupTimeout,
696 new JobException($"{serverName} timed out on startup: {ActiveLaunchParameters.StartupTimeout!.Value}s"));
697 }
698
705 {
706 // we lost the server, just restart entirely
707 // DCT: Operation must always run
708 await DisposeAndNullControllers(CancellationToken.None);
709 ClientCount = null;
710 const string FailReattachMessage = "Unable to properly reattach to server! Restarting watchdog...";
711 Logger.LogWarning(FailReattachMessage);
712
714 await InitController(ValueTask.CompletedTask, null, cancellationToken);
715 }
716
722
738
744
753 CancellationToken cancellationToken);
754
761 protected async ValueTask BeforeApplyDmb(Models.CompileJob newCompileJob, CancellationToken cancellationToken)
762 {
764 {
765 Logger.LogTrace("Same compile job, not sending deployment event");
766 return;
767 }
768
770 metadata,
772
774 EventType.DeploymentActivation,
775 new List<string?>
776 {
777 GameIOManager.ResolvePath(newCompileJob.DirectoryName!.Value.ToString()),
778 },
779 false,
781
782 try
783 {
785 }
786 catch (Exception ex)
787 {
788 Logger.LogWarning(ex, "Failed to apply remote deployment!");
789 }
790
792 }
793
803 {
804 try
805 {
806 var sessionEventTask = relayToSession ? ((IEventConsumer)this).HandleEvent(eventType, parameters, false, cancellationToken) : ValueTask.CompletedTask;
811 }
812 catch (JobException ex)
813 {
814 Logger.LogError(ex, "Suppressing exception triggered by event!");
815 }
816 }
817
824 {
825 Logger.LogTrace("Monitor restart!");
826
828
829 for (var retryAttempts = 1; ; ++retryAttempts)
830 {
831 Status = WatchdogStatus.Restoring;
834 try
835 {
836 // use LaunchImplNoLock without announcements or restarting the monitor
837 await LaunchNoLock(false, false, false, null, cancellationToken);
838 Status = WatchdogStatus.Online;
839 Logger.LogDebug("Relaunch successful, resuming monitor...");
840 return;
841 }
843 {
845 }
846
847 Logger.LogWarning(launchException, "Failed to automatically restart the watchdog! Attempt: {attemptNumber}", retryAttempts);
848 Status = WatchdogStatus.DelayedRestart;
849
850 var retryDelay = Math.Min(
851 Convert.ToInt32(
852 Math.Pow(2, retryAttempts)),
853 TimeSpan.FromHours(1).TotalSeconds); // max of one hour, increasing by a power of 2 each time
854
856 $"Failed to restart (Attempt: {retryAttempts}), retrying in {retryDelay}s...");
857
859 TimeSpan.FromSeconds(retryDelay),
861 }
862 }
863
870 {
872
875 {
876 Logger.LogDebug("Found new CompileJob without waiting");
877 return;
878 }
879
881 }
882
888#pragma warning disable CA1502
890 {
891 Logger.LogTrace("Entered MonitorLifetimes");
892 Status = WatchdogStatus.Online;
893 using var cancellationTokenLoggingRegistration = cancellationToken.Register(() => Logger.LogTrace("Monitor cancellationToken triggered"));
894
895 // this function is responsible for calling HandlerMonitorWakeup when necessary and manitaining the MonitorState
896 try
897 {
899 Task? activeServerLifetime = null,
900 activeServerReboot = null,
901 activeServerStartup = null,
902 serverPrimed = null,
904 newDmbAvailable = null,
905 healthCheck = null;
907 var ranInitialDmbCheck = false;
908 for (ulong iteration = 1; nextAction != MonitorAction.Exit; ++iteration)
910 {
911 var nextMonitorWakeupTcs = new TaskCompletionSource();
912 try
913 {
914 Logger.LogTrace("Iteration {iteration} of monitor loop", iteration);
915 nextAction = MonitorAction.Continue;
916
918
920 {
923 {
924 if (sameController && oldTask?.IsCompleted == true)
925 return;
926
928 }
929
930 controller!.RebootGate = nextMonitorWakeupTcs.Task;
931
936
937 if (!sameController)
939
943 () =>
944 {
948 ranInitialDmbCheck = true;
949 return result;
950 });
951 }
952
953 if (controller != null)
954 {
956
961 : Task.Delay(
962 TimeSpan.FromSeconds(healthCheckSeconds),
964
965 // cancel waiting if requested
966 var toWaitOn = Task.WhenAny(
973 serverPrimed!);
974
975 // wait for something to happen
977 }
978 else
979 {
980 Logger.LogError("Controller was null on monitor wakeup! Attempting restart...");
981 nextAction = MonitorAction.Restart; // excuse me wtf?
982 }
983
984 cancellationToken.ThrowIfCancellationRequested();
985 Logger.LogTrace("Monitor activated");
986
987 // always run HandleMonitorWakeup from the context of the semaphore lock
989 {
990 // Set this sooner so chat sends don't hold us up
992 Status = WatchdogStatus.Restoring;
993
994 // multiple things may have happened, handle them one at a time
996 {
997 MonitorActivationReason activationReason = default; // this will always be assigned before being used
998
1000 {
1001 var taskCompleted = task?.IsCompleted == true;
1002 task = null;
1003 if (nextAction == MonitorAction.Skip)
1004 nextAction = MonitorAction.Continue;
1005 else if (taskCompleted)
1006 {
1008 return true;
1009 }
1010
1011 return false;
1012 }
1013
1014 // process the tasks in this order and call HandlerMonitorWakup for each depending on the new monitorState
1022
1024
1025 if (!anyActivation)
1027 else
1028 {
1029 Logger.LogTrace("Reason: {activationReason}", activationReason);
1030 if (activationReason == MonitorActivationReason.HealthCheck)
1033 else
1037 }
1038 }
1039 }
1040
1041 Logger.LogTrace("Next monitor action is to {nextAction}", nextAction);
1042
1043 // Restart if requested
1044 if (nextAction == MonitorAction.Restart)
1045 {
1047 nextAction = MonitorAction.Continue;
1048 }
1049 }
1051 {
1052 // really, this should NEVER happen
1053 Logger.LogError(
1054 e,
1055 "Monitor crashed! Iteration: {iteration}",
1056 iteration);
1057
1059 ? "Recovering"
1060 : "Shutting down";
1062 $"Monitor crashed, this should NEVER happen! Please report this, full details in logs! {nextActionMessage}. Error: {e.Message}");
1063
1064 if (disposed)
1066 else if (nextAction != MonitorAction.Exit)
1067 {
1068 if (GetActiveController()?.Lifetime.IsCompleted != true)
1070 else
1071 Logger.LogDebug("Server seems to be okay, not restarting");
1072 nextAction = MonitorAction.Continue;
1073 }
1074 }
1075 finally
1076 {
1077 nextMonitorWakeupTcs.SetResult();
1078 }
1079 }
1080 }
1082 {
1083 // stop signal
1084 Logger.LogDebug("Monitor cancelled");
1085
1086 if (releaseServers)
1087 {
1088 Logger.LogTrace("Detaching server...");
1090 if (controller != null)
1091 await controller.Release();
1092 else
1093 Logger.LogError("Controller was null on monitor shutdown!");
1094 }
1095 }
1096
1097 // DCT: Operation must always run
1098 await DisposeAndNullControllers(CancellationToken.None);
1099 Status = WatchdogStatus.Offline;
1100
1101 Logger.LogTrace("Monitor exiting...");
1102 }
1103#pragma warning restore CA1502
1104
1113 {
1114 if (Status == WatchdogStatus.Offline)
1115 return;
1116 if (!graceful)
1117 {
1120 ? EventType.WatchdogDetach
1121 : EventType.WatchdogShutdown,
1122 Enumerable.Empty<string>(),
1125
1126 if (announce)
1127 Chat.QueueWatchdogMessage("Shutting down...");
1128
1130
1132
1133 LastLaunchParameters = null;
1134 return;
1135 }
1136
1137 // merely set the reboot state
1139 if (toKill != null)
1140 {
1141 await toKill.SetRebootState(Session.RebootState.Shutdown, cancellationToken);
1142 Logger.LogTrace("Graceful termination requested");
1143 }
1144 else
1145 Logger.LogTrace("Could not gracefully terminate as there is no active controller!");
1146 }
1147
1154 {
1155 Logger.LogTrace("Sending health check to active server...");
1157 if (activeServer == null)
1158 return MonitorAction.Restart; // uhhhh???
1159
1161
1162 var shouldShutdown = activeServer.RebootState == Session.RebootState.Shutdown;
1163 if (response == null)
1164 {
1165 switch (++healthChecksMissed)
1166 {
1167 case 1:
1168 Logger.LogDebug("DEFCON 4: Game server missed first health check!");
1169 break;
1170 case 2:
1171 const string message2 = "DEFCON 3: Game server has missed 2 health checks!";
1172 Logger.LogInformation(message2);
1174 break;
1175 case 3:
1177 ? "shutdown"
1178 : "be restarted";
1179 const string logTemplate1 = "DEFCON 2: Game server has missed 3 health checks! If it does not respond to the next one, the watchdog will {actionToTake}!";
1180 Logger.LogWarning(logTemplate1, actionToTake);
1182 logTemplate1.Replace(
1183 "{actionToTake}",
1185 StringComparison.Ordinal));
1186 break;
1187 case 4:
1189 ? "Shutting down due to graceful termination request"
1190 : "Restarting";
1191 const string logTemplate2 = "DEFCON 1: Four health checks have been missed! {actionTaken}...";
1192 Logger.LogWarning(logTemplate2, actionTaken);
1194 logTemplate2.Replace(
1195 "{actionTaken}",
1197 StringComparison.Ordinal));
1198
1200 {
1201 Logger.LogDebug("DumpOnHealthCheckRestart enabled.");
1202 try
1203 {
1205 }
1206 catch (JobException ex)
1207 {
1208 Logger.LogWarning(ex, "Creating dump failed!");
1209 }
1210 catch (Win32Exception ex)
1211 {
1212 Logger.LogWarning(ex, "Creating dump failed!");
1213 }
1214 }
1215 else
1216 Logger.LogTrace("DumpOnHealthCheckRestart disabled.");
1217
1220 default:
1221 Logger.LogError("Invalid health checks missed count: {healthChecksMissed}", healthChecksMissed);
1222 break;
1223 }
1224 }
1225 else
1226 {
1228 ClientCount = response.ClientCount;
1229 }
1230
1231 return MonitorAction.Continue;
1232 }
1233
1239 {
1240 if (result?.ChatResponses != null)
1241 {
1243 foreach (var response in result.ChatResponses
1244 .Where(response =>
1245 {
1246 if (response.ChannelIds == null)
1247 {
1248 if (!warnedMissingChannelIds)
1249 {
1250 Logger.LogWarning("DMAPI response contains null channelIds!");
1251 warnedMissingChannelIds = true;
1252 }
1253
1254 return false;
1255 }
1256
1257 return true;
1258 }))
1260 response,
1261 response.ChannelIds!
1262 .Select(channelIdString =>
1263 {
1265 return (ulong?)channelId;
1266 else
1267 Logger.LogWarning("Could not parse chat response channel ID: {channelID}", channelIdString);
1268
1269 return null;
1270 })
1272 .Select(nullableChannelId => nullableChannelId!.Value));
1273 }
1274 }
1275
1282 {
1283 const string DumpDirectory = "ProcessDumps";
1284
1285 var session = GetActiveController();
1286 if (session?.Lifetime.IsCompleted != false)
1287 throw new JobException(ErrorCode.GameServerOffline);
1288
1289 var dumpFileExtension = session.DumpFileExtension;
1290
1291 var dumpFileNameTemplate = diagnosticsIOManager.ResolvePath(
1292 diagnosticsIOManager.ConcatPath(
1294 $"DreamDaemon-{DateTimeOffset.UtcNow.ToFileStamp()}"));
1295
1296 var dumpFileName = $"{dumpFileNameTemplate}{dumpFileExtension}";
1297 var iteration = 0;
1298 while (await diagnosticsIOManager.FileExists(dumpFileName, cancellationToken))
1299 dumpFileName = $"{dumpFileNameTemplate} ({++iteration}){dumpFileExtension}";
1300
1301 if (iteration == 0)
1302 await diagnosticsIOManager.CreateDirectory(DumpDirectory, cancellationToken);
1303
1304 if (session.Lifetime.IsCompleted)
1305 throw new JobException(ErrorCode.GameServerOffline);
1306
1307 Logger.LogInformation("Dumping session to {dumpFileName}...", dumpFileName);
1308 await session.CreateDump(dumpFileName, ActiveLaunchParameters.Minidumps!.Value, cancellationToken);
1309 }
1310 }
1311}
virtual ? long Id
The ID of the entity.
Definition EntityId.cs:14
Metadata about a server instance.
Definition Instance.cs:9
uint? HealthCheckSeconds
The number of seconds between each watchdog health check. 0 disables.
bool? DumpOnHealthCheckRestart
If a process core dump should be created prior to restarting the watchdog due to health check failure...
Extension methods for the ValueTask and ValueTask<TResult> classes.
static async ValueTask WhenAll(IEnumerable< ValueTask > tasks)
Fully await a given list of tasks .
Represents a tgs_chat_user datum.
Definition ChatUser.cs:12
bool DmbAvailable
If LockNextDmb will succeed.
Definition DmbFactory.cs:42
Task OnNewerDmb
Get a Task that completes when the result of a call to LockNextDmb will be different than the previou...
Definition DmbFactory.cs:32
async ValueTask< CompileJob?> LatestCompileJob()
Gets the latest CompileJob.A ValueTask<TResult> resulting in the latest CompileJob or null if none ar...
const string DifferentCoreExceptionMessage
Message for the InvalidOperationException if ever a job starts on a different IInstanceCore than the ...
Definition Instance.cs:36
Represents a message to send to a chat provider.
Represents a chat command to be handled by DD.
Data structure for TopicCommandType.EventNotification requests.
static TopicParameters CreateBroadcastParameters(string broadcastMessage)
Initializes a new instance of the TopicParameters class.
Parameters necessary for duplicating a ISessionController session.
async ValueTask< ReattachInformation?> Load(CancellationToken cancellationToken)
Load a saved ReattachInformation.A ValueTask<TResult> resulting in the stored ReattachInformation if ...
ValueTask Clear(CancellationToken cancellationToken)
Clear any stored ReattachInformation.A ValueTask representing the running operation.
async ValueTask HandleRestart(Version? updateVersion, bool handlerMayDelayShutdownWithExtremelyLongRunningTasks, CancellationToken cancellationToken)
Handle a restart of the server.A ValueTask representing the running operation.
async ValueTask< bool > Broadcast(string message, CancellationToken cancellationToken)
Send a broadcast message to the DMAPI.A ValueTask<TResult> resulting in true if the broadcast succee...
async ValueTask< MonitorAction > HandleHealthCheck(CancellationToken cancellationToken)
Handles a watchdog health check.
ISessionController? GetActiveController()
Get the active ISessionController.
readonly Gauge cpuUsageMetric
Active session CPU usage as a metric.
readonly IJobManager jobManager
The IJobManager for the WatchdogBase.
ILogger< WatchdogBase > Logger
The ILogger for the WatchdogBase.
bool releaseServers
If the servers should be released instead of shutdown.
async ValueTask ReattachFailure(CancellationToken cancellationToken)
Call from InitController(ValueTask, ReattachInformation, CancellationToken) when a reattach operation...
async ValueTask CreateDumpNoLock(CancellationToken cancellationToken)
Attempt to create a process dump for the game server. Requires a lock on synchronizationSemaphore.
Models.? CompileJob ActiveCompileJob
Retrieves the Models.CompileJob currently running on the server.
DreamDaemonLaunchParameters? LastLaunchParameters
The DreamDaemonLaunchParameters the active server is using.This may not be the exact same as ActiveLa...
DateTimeOffset? LaunchTime
When the current server executions was started.
WatchdogBase(IChatManager chat, ISessionControllerFactory sessionControllerFactory, IDmbFactory dmbFactory, ISessionPersistor sessionPersistor, IJobManager jobManager, IServerControl serverControl, IAsyncDelayer asyncDelayer, IIOManager diagnosticsIOManager, IEventConsumer eventConsumer, IRemoteDeploymentManagerFactory remoteDeploymentManagerFactory, IMetricFactory metricFactory, IIOManager gameIOManager, ILogger< WatchdogBase > logger, DreamDaemonLaunchParameters initialLaunchParameters, Api.Models.Instance metadata, bool autoStart)
Initializes a new instance of the WatchdogBase class.
readonly bool autoStart
If the WatchdogBase should LaunchNoLock(bool, bool, bool, ReattachInformation, CancellationToken) in ...
async ValueTask Terminate(bool graceful, CancellationToken cancellationToken)
Stops the watchdog.A ValueTask representing the running operation.
async ValueTask CheckLaunchResult(ISessionController controller, string serverName, CancellationToken cancellationToken)
Check the LaunchResult of a given controller for errors and throw a JobException if any are detected...
readonly Api.Models.Instance metadata
The Api.Models.Instance for the WatchdogBase.
bool AlphaIsActive
If the alpha server is the active server.
readonly IEventConsumer eventConsumer
The IEventConsumer that is not the WatchdogBase.
bool disposed
If the WatchdogBase has been DisposeAsync'd.
readonly SemaphoreSlim controllerDisposeSemaphore
SemaphoreSlim used for DisposeAndNullControllers.
WatchdogStatus Status
The current WatchdogStatus.
ValueTask< MonitorAction > HandleMonitorWakeup(MonitorActivationReason activationReason, CancellationToken cancellationToken)
Handles the actions to take when the monitor has to "wake up".
CancellationTokenSource? monitorCts
The CancellationTokenSource for the monitor loop.
Task? monitorTask
The Task running the monitor loop.
void RunMetricsScrape()
Callback to update transient metrics.
readonly IRemoteDeploymentManagerFactory remoteDeploymentManagerFactory
The IRemoteDeploymentManagerFactory for the WatchdogBase.
readonly SemaphoreSlim synchronizationSemaphore
The SemaphoreSlim for the WatchdogBase.
async ValueTask Restart(bool graceful, CancellationToken cancellationToken)
Restarts the watchdog.A ValueTask representing the running operation.
long? MemoryUsage
Gets the memory usage of the game server in bytes.
readonly IIOManager diagnosticsIOManager
The IIOManager pointing to the Diagnostics directory.
async ValueTask< MessageContent > HandleChatCommand(string commandName, string arguments, ChatUser sender, CancellationToken cancellationToken)
Handle a chat command.A ValueTask<TResult> resulting in the MessageContent text to send back.
ValueTask InitController(ValueTask eventTask, ReattachInformation? reattachInfo, CancellationToken cancellationToken)
Starts all ISessionControllers.
async ValueTask CreateDump(CancellationToken cancellationToken)
Attempt to create a process dump for DreamDaemon.A ValueTask representing the running operation.
IIOManager GameIOManager
The IIOManager for the WatchdogBase pointing to the Game directory.
DreamDaemonLaunchParameters ActiveLaunchParameters
The DreamDaemonLaunchParameters to be applied.
uint? ClientCount
Last known client count queried from the DMAPI. Requires health checks to be enabled to populate.
async ValueTask BeforeApplyDmb(Models.CompileJob newCompileJob, CancellationToken cancellationToken)
To be called before a given newCompileJob goes live.
async ValueTask< bool > StopMonitor()
Stops MonitorLifetimes(CancellationToken). Doesn't kill the servers.
async ValueTask MonitorRestart(CancellationToken cancellationToken)
Attempt to restart the monitor from scratch.
async ValueTask Launch(CancellationToken cancellationToken)
Start the IWatchdog.A ValueTask representing the running operation.
async Task InitialCheckDmbUpdated(CompileJob currentCompileJob)
Check for a new IDmbProvider.
async ValueTask LaunchNoLock(bool startMonitor, bool announce, bool announceFailure, ReattachInformation? reattachInfo, CancellationToken cancellationToken)
Launches the watchdog.
async ValueTask DisposeAndNullControllers(CancellationToken cancellationToken)
Wrapper for DisposeAndNullControllersImpl under a locked context.
ValueTask DisposeAndNullControllersImpl()
Call IDisposable.Dispose and null the fields for all ISessionControllers.
async ValueTask TerminateNoLock(bool graceful, bool announce, CancellationToken cancellationToken)
Implementation of Terminate(bool, CancellationToken). Does not lock synchronizationSemaphore.
async ValueTask< bool > ChangeSettings(DreamDaemonLaunchParameters launchParameters, CancellationToken cancellationToken)
Changes the ActiveLaunchParameters. If currently running, may trigger a graceful restart....
void HandleChatResponses(TopicResponse? result)
Handle any TopicResponse.ChatResponses in a given topic result .
virtual async ValueTask ResetRebootState(CancellationToken cancellationToken)
Cancels pending graceful actions.A ValueTask representing the running operation.
async Task MonitorLifetimes(CancellationToken cancellationToken)
The main loop of the watchdog. Ayschronously waits for events to occur and then responds to them.
long? SessionId
An incrementing ID for representing current server execution.
async Task StartAsync(CancellationToken cancellationToken)
ValueTask InstanceRenamed(string newInstanceName, CancellationToken cancellationToken)
Called when the owning Instance is renamed.A ValueTask representing the running operation.
readonly Gauge ramUsageMetric
MemoryUsage as a metric.
int healthChecksMissed
The number of hearbeats missed.
readonly Gauge watchdogStatusMetric
The Status as a metric.
IChatManager Chat
The IChatManager for the WatchdogBase.
async Task StopAsync(CancellationToken cancellationToken)
readonly IRestartRegistration restartRegistration
The IRestartRegistration for the WatchdogBase.
async ValueTask HandleEventImpl(EventType eventType, IEnumerable< string > parameters, bool relayToSession, CancellationToken cancellationToken)
Handle a given eventType without re-throwing errors.
WatchdogStatus status
Backing field for Status.
volatile TaskCompletionSource activeParametersUpdated
TaskCompletionSource that completes when ActiveLaunchParameters are changed and we are running.
Operation exceptions thrown from the context of a Models.Job.
async ValueTask Delay(TimeSpan timeSpan, CancellationToken cancellationToken)
Create a Task that completes after a given timeSpan .A ValueTask representing the running operation.
static async ValueTask< SemaphoreSlimContext > Lock(SemaphoreSlim semaphore, CancellationToken cancellationToken, ILogger? logger=null)
Asyncronously locks a semaphore .
Helpers for manipulating the Serilog.Context.LogContext.
const string WatchdogMonitorIterationContextProperty
The Serilog.Context.LogContext property name for the ID of the watchdog monitor iteration currently b...
For managing connected chat services.
void QueueWatchdogMessage(string message)
Queue a chat message to configured watchdog channels.
void RegisterCommandHandler(ICustomCommandHandler customCommandHandler)
Registers a customCommandHandler to use.
ValueTask UpdateTrackingContexts(CancellationToken cancellationToken)
Force an update with the active channels on all active IChatTrackingContexts.
void QueueMessage(MessageContent message, IEnumerable< ulong > channelIds)
Queue a chat message to a given set of channelIds .
Handles Commands.ICommands that map to those defined in a IChatTrackingContext.
IRemoteDeploymentManager CreateRemoteDeploymentManager(Api.Models.Instance metadata, RemoteGitProvider remoteGitProvider)
Creates a IRemoteDeploymentManager for a given remoteGitProvider .
Consumes EventTypes and takes the appropriate actions.
ValueTask? HandleCustomEvent(string eventName, IEnumerable< string?> parameters, CancellationToken cancellationToken)
Handles a given custom event.
ValueTask HandleEvent(EventType eventType, IEnumerable< string?> parameters, bool deploymentPipeline, CancellationToken cancellationToken)
Handle a given eventType .
Handles communication with a DreamDaemon IProcess.
Models.CompileJob CompileJob
Gets the CompileJob associated with the ISessionController.
ReattachInformation ReattachInformation
Gets the Session.ReattachInformation associated with the ISessionController.
EngineVersion EngineVersion
Gets the Api.Models.EngineVersion associated with the ISessionController.
Handles saving and loading ReattachInformation.
Runs and monitors the twin server controllers.
Definition IWatchdog.cs:16
Represents the lifetime of a IRestartHandler registration.
Represents a service that may take an updated Host assembly and run it, stopping the current assembly...
IRestartRegistration RegisterForRestart(IRestartHandler handler)
Register a given handler to run before stopping the server for a restart.
Interface for using filesystems.
Definition IIOManager.cs:13
Manages the runtime of Jobs.
ValueTask RegisterOperation(Job job, JobEntrypoint operation, CancellationToken cancellationToken)
Registers a given Job and begins running it.
long? MemoryUsage
Gets the process' memory usage in bytes.
DateTimeOffset? LaunchTime
When the process was started.
ErrorCode
Types of Response.ErrorMessageResponses that the API may return.
Definition ErrorCode.cs:12
JobCode
The different types of Response.JobResponse.
Definition JobCode.cs:9
WatchdogStatus
The current status of the watchdog.
@ List
User may list files if the Models.Instance allows it.
DreamDaemonRights
Rights for managing DreamDaemon.
EventType
Types of events. Mirror in tgs.dm. Prefer last listed name for script.
Definition EventType.cs:7
RebootState
Represents the action to take when /world/Reboot() is called.
Definition RebootState.cs:7
MonitorAction
The action for the monitor loop to take when control is returned to it.
MonitorActivationReason
Reasons for the monitor to wake up.