tgstation-server 6.14.0
The /tg/station 13 server suite
Loading...
Searching...
No Matches
AdvancedWatchdog.cs
Go to the documentation of this file.
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Threading;
5using System.Threading.Tasks;
6
7using Microsoft.Extensions.Logging;
8
9using Prometheus;
10
22
24{
29 {
33 protected SwappableDmbProvider? ActiveSwappable { get; private set; }
34
39
43 readonly List<Task> deploymentCleanupTasks;
44
49
53 volatile TaskCompletionSource? deploymentCleanupGate;
54
76 IChatManager chat,
77 ISessionControllerFactory sessionControllerFactory,
78 IDmbFactory dmbFactory,
79 ISessionPersistor sessionPersistor,
81 IServerControl serverControl,
82 IAsyncDelayer asyncDelayer,
86 IMetricFactory metricFactory,
87 IIOManager gameIOManager,
88 IFilesystemLinkFactory linkFactory,
89 ILogger<AdvancedWatchdog> logger,
90 DreamDaemonLaunchParameters initialLaunchParameters,
91 Api.Models.Instance instance,
92 bool autoStart)
93 : base(
94 chat,
95 sessionControllerFactory,
96 dmbFactory,
97 sessionPersistor,
99 serverControl,
100 asyncDelayer,
104 metricFactory,
105 gameIOManager,
106 logger,
107 initialLaunchParameters,
108 instance,
109 autoStart)
110 {
111 try
112 {
113 LinkFactory = linkFactory ?? throw new ArgumentNullException(nameof(linkFactory));
114
115 deploymentCleanupTasks = new List<Task>();
116 }
117 catch
118 {
119 // Async dispose is for if we have controllers running, not the case here
120 var disposeTask = DisposeAsync();
121 Debug.Assert(disposeTask.IsCompleted, "This should always be true during construction!");
122 disposeTask.GetAwaiter().GetResult();
123
124 throw;
125 }
126 }
127
129 protected sealed override async ValueTask DisposeAndNullControllersImpl()
130 {
131 await base.DisposeAndNullControllersImpl();
132
133 // If we reach this point, we can guarantee PrepServerForLaunch will be called before starting again.
134 ActiveSwappable = null;
135
136 if (pendingSwappable != null)
137 {
139 pendingSwappable = null;
140 }
141
142 await DrainDeploymentCleanupTasks(true);
143 }
144
146 protected sealed override async ValueTask<MonitorAction> HandleNormalReboot(CancellationToken cancellationToken)
147 {
148 if (pendingSwappable != null)
149 {
150 var needToSwap = !pendingSwappable.Swapped;
151 var controller = Server!;
152 if (needToSwap)
153 {
154 // IMPORTANT: THE SESSIONCONTROLLER SHOULD STILL BE PROCESSING THE BRIDGE REQUEST SO WE KNOW DD IS SLEEPING
155 // OTHERWISE, IT COULD RETURN TO /world/Reboot() TOO EARLY AND LOAD THE WRONG .DMB
156 if (!controller.ProcessingRebootBridgeRequest)
157 {
158 // integration test logging will catch this
159 Logger.LogError(
160 "The reboot bridge request completed before the watchdog could suspend the server! This can lead to buggy DreamDaemon behaviour and should be reported! To ensure stability, we will need to hard reboot the server");
161 return MonitorAction.Restart;
162 }
163
164 // DCT: Not necessary
165 if (!pendingSwappable.FinishActivationPreparation(CancellationToken.None).IsCompleted)
166 {
167 // rare pokemon
168 Logger.LogInformation("Deployed .dme is not ready to swap, delaying until next reboot!");
169 Chat.QueueWatchdogMessage("The pending deployment was not ready to be activated this reboot. It will be applied at the next one.");
170 return MonitorAction.Continue;
171 }
172 }
173
174 var updateTask = BeforeApplyDmb(pendingSwappable.CompileJob, cancellationToken);
175 if (needToSwap)
176 await PerformDmbSwap(pendingSwappable, cancellationToken);
177
178 var currentCompileJobId = controller.ReattachInformation.Dmb.CompileJob.Id;
179
180 await DrainDeploymentCleanupTasks(false);
181
182 IAsyncDisposable lingeringDeployment;
183 var localDeploymentCleanupGate = new TaskCompletionSource();
184 async Task CleanupLingeringDeployment()
185 {
186 var lingeringDeploymentExpirySeconds = ActiveLaunchParameters.StartupTimeout!.Value;
187 Logger.LogDebug(
188 "Holding old deployment {compileJobId} for up to {expiry} seconds...",
189 currentCompileJobId,
190 lingeringDeploymentExpirySeconds);
191
192 // DCT: A cancel firing here can result in us leaving a dmbprovider undisposed, localDeploymentCleanupGate will always fire in that case
193 var timeout = AsyncDelayer.Delay(TimeSpan.FromSeconds(lingeringDeploymentExpirySeconds), CancellationToken.None).AsTask();
194
195 var completedTask = await Task.WhenAny(
196 localDeploymentCleanupGate.Task,
197 timeout);
198
199 var timedOut = completedTask == timeout;
200 Logger.Log(
201 timedOut
202 ? LogLevel.Warning
203 : LogLevel.Trace,
204 "Releasing old deployment {compileJobId}{afterTimeout}",
205 currentCompileJobId,
206 timedOut
207 ? " due to timeout!"
208 : "...");
209
210 await lingeringDeployment.DisposeAsync();
211 }
212
213 var oldDeploymentCleanupGate = Interlocked.Exchange(ref deploymentCleanupGate, localDeploymentCleanupGate);
214 oldDeploymentCleanupGate?.TrySetResult();
215
216 Logger.LogTrace("Replacing activeSwappable with pendingSwappable...");
217
219 {
220 lingeringDeployment = controller.ReplaceDmbProvider(pendingSwappable);
222 CleanupLingeringDeployment());
223 }
224
226 pendingSwappable = null;
227
228 await SessionPersistor.Update(controller.ReattachInformation, cancellationToken);
229 await updateTask;
230 }
231 else
232 Logger.LogTrace("Nothing to do as pendingSwappable is null.");
233
234 return await base.HandleNormalReboot(cancellationToken);
235 }
236
238 protected sealed override async ValueTask HandleNewDmbAvailable(CancellationToken cancellationToken)
239 {
240 IDmbProvider compileJobProvider = DmbFactory.LockNextDmb("AdvancedWatchdog next compile job preload");
241 bool canSeamlesslySwap = CanUseSwappableDmbProvider(compileJobProvider);
242 if (canSeamlesslySwap)
243 if (compileJobProvider.CompileJob.EngineVersion != ActiveCompileJob!.EngineVersion)
244 {
245 // have to do a graceful restart
246 Logger.LogDebug(
247 "Not swapping to new compile job {compileJobId} as it uses a different engine version ({newEngineVersion}) than what is currently active {oldEngineVersion}.",
248 compileJobProvider.CompileJob.Id,
249 compileJobProvider.CompileJob.EngineVersion,
250 ActiveCompileJob.EngineVersion);
251 canSeamlesslySwap = false;
252 }
253 else if (compileJobProvider.CompileJob.DmeName != ActiveCompileJob.DmeName)
254 {
255 Logger.LogDebug(
256 "Not swapping to new compile job {compileJobId} as it uses a different .dmb name ({newDmbName}) than what is currently active {oldDmbName}.",
257 compileJobProvider.CompileJob.Id,
258 compileJobProvider.CompileJob.DmeName,
260 canSeamlesslySwap = false;
261 }
262
263 if (!canSeamlesslySwap)
264 {
265 Logger.LogDebug("Queueing graceful restart instead...");
266 await compileJobProvider.DisposeAsync();
267 await base.HandleNewDmbAvailable(cancellationToken);
268 return;
269 }
270
271 SwappableDmbProvider? swappableProvider = null;
272 try
273 {
274 swappableProvider = CreateSwappableDmbProvider(compileJobProvider);
275 if (ActiveCompileJob!.DMApiVersion == null)
276 {
277 Logger.LogWarning("Active compile job has no DMAPI! Commencing immediate .dmb swap. Note this behavior is known to be buggy in some DM code contexts. See https://github.com/tgstation/tgstation-server/issues/1550");
278 await PerformDmbSwap(swappableProvider, cancellationToken);
279 }
280 }
281 catch (Exception ex)
282 {
283 Logger.LogError(ex, "Exception while swapping");
284 IDmbProvider providerToDispose = swappableProvider ?? compileJobProvider;
285 await providerToDispose.DisposeAsync();
286 throw;
287 }
288
289 await (pendingSwappable?.DisposeAsync() ?? ValueTask.CompletedTask);
290 pendingSwappable = swappableProvider;
291 }
292
294 protected sealed override async ValueTask<IDmbProvider> PrepServerForLaunch(IDmbProvider dmbToUse, CancellationToken cancellationToken)
295 {
296 if (ActiveSwappable != null)
297 throw new InvalidOperationException("Expected activeSwappable to be null!");
298 if (pendingSwappable != null)
299 throw new InvalidOperationException("Expected pendingSwappable to be null!");
300
301 Logger.LogTrace("Prep for server launch");
302 if (!CanUseSwappableDmbProvider(dmbToUse))
303 return dmbToUse;
304
306 try
307 {
308 await InitialLink(cancellationToken);
309 }
310 catch (Exception ex)
311 {
312 // We won't worry about disposing activeSwappable here as we can't dispose dmbToUse here.
313 Logger.LogTrace(ex, "Initial link error, nulling ActiveSwappable");
314 ActiveSwappable = null;
315 throw;
316 }
317
318 return ActiveSwappable;
319 }
320
326 protected abstract ValueTask ApplyInitialDmb(CancellationToken cancellationToken);
327
334
336 protected override async ValueTask SessionStartupPersist(CancellationToken cancellationToken)
337 {
338 await ApplyInitialDmb(cancellationToken);
339 await base.SessionStartupPersist(cancellationToken);
340 }
341
343 protected override async ValueTask<MonitorAction> HandleMonitorWakeup(MonitorActivationReason reason, CancellationToken cancellationToken)
344 {
345 var result = await base.HandleMonitorWakeup(reason, cancellationToken);
346 if (reason == MonitorActivationReason.ActiveServerStartup)
347 await DrainDeploymentCleanupTasks(false);
348
349 return result;
350 }
351
358 {
359 if (dmbProvider.EngineVersion.Engine != EngineType.Byond)
360 {
361 Logger.LogDebug("Not using SwappableDmbProvider for engine type {engineType}", dmbProvider.EngineVersion.Engine);
362 return false;
363 }
364
365 return true;
366 }
367
373 async ValueTask InitialLink(CancellationToken cancellationToken)
374 {
375 await ActiveSwappable!.FinishActivationPreparation(cancellationToken);
376 Logger.LogTrace("Linking compile job...");
377 await ActiveSwappable.MakeActive(cancellationToken);
378 }
379
386 async ValueTask PerformDmbSwap(SwappableDmbProvider newProvider, CancellationToken cancellationToken)
387 {
388 Logger.LogDebug("Swapping to compile job {id}...", newProvider.CompileJob.Id);
389
390 await newProvider.FinishActivationPreparation(cancellationToken);
391
392 var suspended = false;
393 var server = Server!;
394 try
395 {
396 server.SuspendProcess();
397 suspended = true;
398 }
399 catch (Exception ex)
400 {
401 Logger.LogWarning(ex, "Exception while suspending server!");
402 }
403
404 try
405 {
406 Logger.LogTrace("Making new provider {id} active...", newProvider.CompileJob.Id);
407 await newProvider.MakeActive(cancellationToken);
408 }
409 finally
410 {
411 // Let this throw hard if it fails
412 if (suspended)
413 server.ResumeProcess();
414 }
415 }
416
422 Task DrainDeploymentCleanupTasks(bool blocking)
423 {
424 Logger.LogTrace("DrainDeploymentCleanupTasks...");
425 var localDeploymentCleanupGate = Interlocked.Exchange(ref deploymentCleanupGate, null);
426 localDeploymentCleanupGate?.TrySetResult();
427
428 List<Task> localDeploymentCleanupTasks;
430 {
431 var totalActiveTasks = deploymentCleanupTasks.Count;
432 localDeploymentCleanupTasks = new List<Task>(totalActiveTasks);
433 for (var i = totalActiveTasks - 1; i >= 0; --i)
434 {
435 var currentTask = deploymentCleanupTasks[i];
436 if (!blocking && !currentTask.IsCompleted)
437 continue;
438
439 localDeploymentCleanupTasks.Add(currentTask);
440 deploymentCleanupTasks.RemoveAt(i);
441 }
442 }
443
444 return Task.WhenAll(localDeploymentCleanupTasks);
445 }
446 }
447}
virtual ? long Id
The ID of the entity.
Definition EntityId.cs:14
string? DmeName
The .dme file used for compilation.
Definition CompileJob.cs:17
IDmbProvider LockNextDmb(string reason, [CallerFilePath] string? callerFile=null, [CallerLineNumber] int callerLine=default)
Gets the next IDmbProvider. DmbAvailable is a precondition.A new IDmbProvider.
A IDmbProvider that uses filesystem links to change directory structure underneath the server process...
Task FinishActivationPreparation(CancellationToken cancellationToken)
Should be awaited. before calling MakeActive(CancellationToken) to ensure the SwappableDmbProvider is...
bool Swapped
If MakeActive(CancellationToken) has been run.
ValueTask MakeActive(CancellationToken cancellationToken)
Make the SwappableDmbProvider active by replacing the live link with our CompileJob.
ValueTask Update(ReattachInformation reattachInformation, CancellationToken cancellationToken)
Update some reattachInformation .A ValueTask representing the running operation.
A IWatchdog that, instead of killing servers for updates, uses the wonders of filesystem links to swa...
readonly List< Task > deploymentCleanupTasks
List<T> of Tasks that are waiting to clean up old deployments.
override async ValueTask< IDmbProvider > PrepServerForLaunch(IDmbProvider dmbToUse, CancellationToken cancellationToken)
Prepare the server to launch a new instance with the WatchdogBase.ActiveLaunchParameters and a given ...
bool CanUseSwappableDmbProvider(IDmbProvider dmbProvider)
If the SwappableDmbProvider feature of the AdvancedWatchdog can be used with a given dmbProvider .
SwappableDmbProvider? pendingSwappable
The active SwappableDmbProvider for WatchdogBase.ActiveLaunchParameters.
async ValueTask InitialLink(CancellationToken cancellationToken)
Create the initial link to the live game directory using ActiveSwappable.
SwappableDmbProvider? ActiveSwappable
The SwappableDmbProvider for WatchdogBase.LastLaunchParameters.
override async ValueTask< MonitorAction > HandleNormalReboot(CancellationToken cancellationToken)
Handler for MonitorActivationReason.ActiveServerRebooted when the RebootState is RebootState....
Task DrainDeploymentCleanupTasks(bool blocking)
Asynchronously drain deploymentCleanupTasks.
SwappableDmbProvider CreateSwappableDmbProvider(IDmbProvider dmbProvider)
Create a SwappableDmbProvider for a given dmbProvider .
override async ValueTask SessionStartupPersist(CancellationToken cancellationToken)
Called to save the current Server into the WatchdogBase.SessionPersistor when initially launched....
override async ValueTask HandleNewDmbAvailable(CancellationToken cancellationToken)
Handler for MonitorActivationReason.NewDmbAvailable.A ValueTask representing the running operation.
IFilesystemLinkFactory LinkFactory
The IFilesystemLinkFactory for the AdvancedWatchdog.
volatile? TaskCompletionSource deploymentCleanupGate
The TaskCompletionSource representing the cleanup of an unused IDmbProvider.
async ValueTask PerformDmbSwap(SwappableDmbProvider newProvider, CancellationToken cancellationToken)
Suspends the BasicWatchdog.Server and calls SwappableDmbProvider.MakeActive(CancellationToken) on a n...
ValueTask ApplyInitialDmb(CancellationToken cancellationToken)
Set the ReattachInformation.InitialDmb for the BasicWatchdog.Server.
AdvancedWatchdog(IChatManager chat, ISessionControllerFactory sessionControllerFactory, IDmbFactory dmbFactory, ISessionPersistor sessionPersistor, IJobManager jobManager, IServerControl serverControl, IAsyncDelayer asyncDelayer, IIOManager diagnosticsIOManager, IEventConsumer eventConsumer, IRemoteDeploymentManagerFactory remoteDeploymentManagerFactory, IMetricFactory metricFactory, IIOManager gameIOManager, IFilesystemLinkFactory linkFactory, ILogger< AdvancedWatchdog > logger, DreamDaemonLaunchParameters initialLaunchParameters, Api.Models.Instance instance, bool autoStart)
Initializes a new instance of the AdvancedWatchdog class.
override async ValueTask< MonitorAction > HandleMonitorWakeup(MonitorActivationReason reason, CancellationToken cancellationToken)
readonly IJobManager jobManager
The IJobManager for the WatchdogBase.
ILogger< WatchdogBase > Logger
The ILogger for the WatchdogBase.
Models.? CompileJob ActiveCompileJob
Retrieves the Models.CompileJob currently running on the server.
readonly bool autoStart
If the WatchdogBase should LaunchNoLock(bool, bool, bool, ReattachInformation, CancellationToken) in ...
readonly IEventConsumer eventConsumer
The IEventConsumer that is not the WatchdogBase.
readonly IRemoteDeploymentManagerFactory remoteDeploymentManagerFactory
The IRemoteDeploymentManagerFactory for the WatchdogBase.
readonly IIOManager diagnosticsIOManager
The IIOManager pointing to the Diagnostics directory.
DreamDaemonLaunchParameters ActiveLaunchParameters
The DreamDaemonLaunchParameters to be applied.
async ValueTask BeforeApplyDmb(Models.CompileJob newCompileJob, CancellationToken cancellationToken)
To be called before a given newCompileJob goes live.
IChatManager Chat
The IChatManager for the WatchdogBase.
ValueTask Restart()
Restarts the Host.A ValueTask representing the running operation.
async ValueTask Delay(TimeSpan timeSpan, CancellationToken cancellationToken)
Create a Task that completes after a given timeSpan .A ValueTask representing the running operation.
For managing connected chat services.
void QueueWatchdogMessage(string message)
Queue a chat message to configured watchdog channels.
Provides absolute paths to the latest compiled .dmbs.
EngineVersion EngineVersion
The Api.Models.EngineVersion used to build the .dmb.
Models.CompileJob CompileJob
The CompileJob of the .dmb.
Consumes EventTypes and takes the appropriate actions.
Handles saving and loading ReattachInformation.
Represents a service that may take an updated Host assembly and run it, stopping the current assembly...
Interface for using filesystems.
Definition IIOManager.cs:13
Manages the runtime of Jobs.
EngineType
The type of engine the codebase is using.
Definition EngineType.cs:7
MonitorAction
The action for the monitor loop to take when control is returned to it.
MonitorActivationReason
Reasons for the monitor to wake up.