1
2
3
4 package main
5
6 import (
7 "flag"
8 "fmt"
9 "io"
10 "os"
11 "os/exec"
12 "path/filepath"
13 "syscall"
14 "time"
15
16 "github.com/cenkalti/backoff/v4"
17 "github.com/containerd/cgroups"
18 cgroupstats "github.com/containerd/cgroups/stats/v1"
19 oci "github.com/opencontainers/runtime-spec/specs-go"
20 "github.com/pkg/errors"
21 "github.com/sirupsen/logrus"
22 "go.opencensus.io/trace"
23
24 "github.com/Microsoft/hcsshim/internal/guest/bridge"
25 "github.com/Microsoft/hcsshim/internal/guest/kmsg"
26 "github.com/Microsoft/hcsshim/internal/guest/runtime/hcsv2"
27 "github.com/Microsoft/hcsshim/internal/guest/runtime/runc"
28 "github.com/Microsoft/hcsshim/internal/guest/transport"
29 "github.com/Microsoft/hcsshim/internal/guestpath"
30 "github.com/Microsoft/hcsshim/internal/log"
31 "github.com/Microsoft/hcsshim/internal/oc"
32 "github.com/Microsoft/hcsshim/pkg/securitypolicy"
33 )
34
35 func memoryLogFormat(metrics *cgroupstats.Metrics) logrus.Fields {
36 return logrus.Fields{
37 "memoryUsage": metrics.Memory.Usage.Usage,
38 "memoryUsageMax": metrics.Memory.Usage.Max,
39 "memoryUsageLimit": metrics.Memory.Usage.Limit,
40 "swapUsage": metrics.Memory.Swap.Usage,
41 "swapUsageMax": metrics.Memory.Swap.Max,
42 "swapUsageLimit": metrics.Memory.Swap.Limit,
43 "kernelUsage": metrics.Memory.Kernel.Usage,
44 "kernelUsageMax": metrics.Memory.Kernel.Max,
45 "kernelUsageLimit": metrics.Memory.Kernel.Limit,
46 }
47 }
48
49 func readMemoryEvents(startTime time.Time, efdFile *os.File, cgName string, threshold int64, cg cgroups.Cgroup) {
50
51
52 count := 0
53 buf := make([]byte, 8)
54 for {
55 if _, err := efdFile.Read(buf); err != nil {
56 logrus.WithError(err).WithField("cgroup", cgName).Error("failed to read from eventfd")
57 return
58 }
59
60
61
62
63 _, err := os.Lstat(fmt.Sprintf("/sys/fs/cgroup/memory%s/cgroup.event_control", cgName))
64 if os.IsNotExist(err) {
65 return
66 }
67
68 count++
69 msg := "memory usage for cgroup exceeded threshold"
70 entry := logrus.WithFields(logrus.Fields{
71 "gcsStartTime": startTime,
72 "time": time.Now(),
73 "cgroup": cgName,
74 "thresholdBytes": threshold,
75 "count": count,
76 })
77
78
79 time.Sleep(time.Second)
80 metrics, err := cg.Stat(cgroups.IgnoreNotExist)
81 if err != nil {
82
83
84
85 entry.WithError(err).Error(msg)
86 } else {
87 entry.WithFields(memoryLogFormat(metrics)).Warn(msg)
88 }
89 }
90 }
91
92
93
94
95
96 func runWithRestartMonitor(arg0 string, args ...string) {
97 backoffSettings := backoff.NewExponentialBackOff()
98
99
100 backoffSettings.MaxInterval = time.Minute * 10
101 for {
102 command := exec.Command(arg0, args...)
103 if err := command.Run(); err != nil {
104 logrus.WithFields(logrus.Fields{
105 "error": err,
106 "command": command.Args,
107 }).Warn("restart monitor: run command returns error")
108 }
109 backOffTime := backoffSettings.NextBackOff()
110
111 time.Sleep(backOffTime)
112 }
113 }
114
115
116
117
118
119
120 func startTimeSyncService() error {
121 ptpClassDir, err := os.Open("/sys/class/ptp")
122 if err != nil {
123 return errors.Wrap(err, "failed to open PTP class directory")
124 }
125
126 ptpDirList, err := ptpClassDir.Readdirnames(-1)
127 if err != nil {
128 return errors.Wrap(err, "failed to list PTP class directory")
129 }
130
131 var ptpDirPath string
132 found := false
133
134 expectedClockName := "hyperv\n"
135 for _, ptpDirPath = range ptpDirList {
136 clockNameFilePath := filepath.Join(ptpClassDir.Name(), ptpDirPath, "clock_name")
137 buf, err := os.ReadFile(clockNameFilePath)
138 if err != nil && !os.IsNotExist(err) {
139 return errors.Wrapf(err, "failed to read clock name file at %s", clockNameFilePath)
140 }
141
142 if string(buf) == expectedClockName {
143 found = true
144 break
145 }
146 }
147
148 if !found {
149 return errors.Errorf("no PTP device found with name \"%s\"", expectedClockName)
150 }
151
152
153 ptpDevPath := filepath.Join("/dev", filepath.Base(ptpDirPath))
154
155 chronydConfigString := fmt.Sprintf("refclock PHC %s poll 3 dpoll -2 offset 0 stratum 2\nmakestep 0.1 -1\n", ptpDevPath)
156 chronydConfPath := "/tmp/chronyd.conf"
157 err = os.WriteFile(chronydConfPath, []byte(chronydConfigString), 0644)
158 if err != nil {
159 return errors.Wrapf(err, "failed to create chronyd conf file %s", chronydConfPath)
160 }
161
162
163
164
165 go runWithRestartMonitor("chronyd", "-n", "-f", chronydConfPath)
166 return nil
167 }
168
169 func main() {
170 startTime := time.Now()
171 logLevel := flag.String("loglevel",
172 "debug",
173 "Logging Level: debug, info, warning, error, fatal, panic.")
174 coreDumpLoc := flag.String("core-dump-location",
175 "",
176 "The location/format where process core dumps will be written to.")
177 kmsgLogLevel := flag.Uint("kmsgLogLevel",
178 uint(kmsg.Warning),
179 "Log all kmsg entries with a priority less than or equal to the supplied level.")
180 logFile := flag.String("logfile",
181 "",
182 "Logging Target: An optional file name/path. Omit for console output.")
183 logFormat := flag.String("log-format", "text", "Logging Format: text or json")
184 useInOutErr := flag.Bool("use-inouterr",
185 false,
186 "If true use stdin/stdout for bridge communication and stderr for logging")
187 v4 := flag.Bool("v4", false, "enable the v4 protocol support and v2 schema")
188 rootMemReserveBytes := flag.Uint64("root-mem-reserve-bytes",
189 75*1024*1024,
190 "the amount of memory reserved for the orchestration, the rest will be assigned to containers")
191 gcsMemLimitBytes := flag.Uint64("gcs-mem-limit-bytes",
192 50*1024*1024,
193 "the maximum amount of memory the gcs can use")
194 disableTimeSync := flag.Bool("disable-time-sync",
195 false,
196 "If true do not run chronyd time synchronization service inside the UVM")
197 scrubLogs := flag.Bool("scrub-logs", false, "If true, scrub potentially sensitive information from logging")
198 initialPolicyStance := flag.String("initial-policy-stance",
199 "allow",
200 "Stance: allow, deny.")
201
202 flag.Usage = func() {
203 fmt.Fprintf(os.Stderr, "\nUsage of %s:\n", os.Args[0])
204 flag.PrintDefaults()
205 fmt.Fprintf(os.Stderr, "Examples:\n")
206 fmt.Fprintf(os.Stderr, " %s -loglevel=debug -logfile=/run/gcs/gcs.log\n", os.Args[0])
207 fmt.Fprintf(os.Stderr, " %s -loglevel=info -logfile=stdout\n", os.Args[0])
208 }
209
210 flag.Parse()
211
212
213 if *v4 {
214 trace.ApplyConfig(trace.Config{DefaultSampler: trace.AlwaysSample()})
215 trace.RegisterExporter(&oc.LogrusExporter{})
216 }
217
218 logrus.AddHook(log.NewHook())
219
220 var logWriter *os.File
221 if *logFile != "" {
222 logFileHandle, err := os.OpenFile(*logFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0600)
223 if err != nil {
224 logrus.WithFields(logrus.Fields{
225 "path": *logFile,
226 logrus.ErrorKey: err,
227 }).Fatal("failed to create log file")
228 }
229 logWriter = logFileHandle
230 } else {
231
232 logWriter = os.Stderr
233 }
234
235
236 var initialEnforcer securitypolicy.SecurityPolicyEnforcer
237 switch *initialPolicyStance {
238 case "allow":
239 initialEnforcer = &securitypolicy.OpenDoorSecurityPolicyEnforcer{}
240 logrus.SetOutput(logWriter)
241 case "deny":
242 initialEnforcer = &securitypolicy.ClosedDoorSecurityPolicyEnforcer{}
243 logrus.SetOutput(io.Discard)
244 default:
245 logrus.WithFields(logrus.Fields{
246 "initial-policy-stance": *initialPolicyStance,
247 }).Fatal("unknown initial-policy-stance")
248 }
249
250 switch *logFormat {
251 case "text":
252
253 case "json":
254 logrus.SetFormatter(&logrus.JSONFormatter{
255 TimestampFormat: time.RFC3339Nano,
256 })
257 default:
258 logrus.WithFields(logrus.Fields{
259 "log-format": *logFormat,
260 }).Fatal("unknown log-format")
261 }
262
263 level, err := logrus.ParseLevel(*logLevel)
264 if err != nil {
265 logrus.Fatal(err)
266 }
267
268 logrus.SetLevel(level)
269
270 log.SetScrubbing(*scrubLogs)
271
272 baseLogPath := guestpath.LCOWRootPrefixInUVM
273
274 logrus.Info("GCS started")
275
276
277
278
279 if *coreDumpLoc != "" {
280 if err := os.WriteFile(
281 "/proc/sys/kernel/core_pattern",
282 []byte(*coreDumpLoc),
283 0644,
284 ); err != nil {
285 logrus.WithError(err).Fatal("failed to set core dump location")
286 }
287 }
288
289
290 go kmsg.ReadForever(kmsg.LogLevel(*kmsgLogLevel))
291
292 tport := &transport.VsockTransport{}
293 rtime, err := runc.NewRuntime(baseLogPath)
294 if err != nil {
295 logrus.WithError(err).Fatal("failed to initialize new runc runtime")
296 }
297 mux := bridge.NewBridgeMux()
298 b := bridge.Bridge{
299 Handler: mux,
300 EnableV4: *v4,
301 }
302 h := hcsv2.NewHost(rtime, tport, initialEnforcer, logWriter)
303 b.AssignHandlers(mux, h)
304
305 var bridgeIn io.ReadCloser
306 var bridgeOut io.WriteCloser
307 if *useInOutErr {
308 bridgeIn = os.Stdin
309 bridgeOut = os.Stdout
310 } else {
311 const commandPort uint32 = 0x40000000
312 bridgeCon, err := tport.Dial(commandPort)
313 if err != nil {
314 logrus.WithFields(logrus.Fields{
315 "port": commandPort,
316 logrus.ErrorKey: err,
317 }).Fatal("failed to dial host vsock connection")
318 }
319 bridgeIn = bridgeCon
320 bridgeOut = bridgeCon
321 }
322
323
324
325
326
327
328
329
330
331 if err := os.WriteFile("/sys/fs/cgroup/memory/memory.use_hierarchy", []byte("1"), 0644); err != nil {
332 logrus.WithError(err).Fatal("failed to enable hierarchy support for root cgroup")
333 }
334
335
336
337
338
339
340 sinfo := syscall.Sysinfo_t{}
341 if err := syscall.Sysinfo(&sinfo); err != nil {
342 logrus.WithError(err).Fatal("failed to get sys info")
343 }
344 containersLimit := int64(sinfo.Totalram - *rootMemReserveBytes)
345 containersControl, err := cgroups.New(cgroups.V1, cgroups.StaticPath("/containers"), &oci.LinuxResources{
346 Memory: &oci.LinuxMemory{
347 Limit: &containersLimit,
348 },
349 })
350 if err != nil {
351 logrus.WithError(err).Fatal("failed to create containers cgroup")
352 }
353 defer containersControl.Delete()
354
355 gcsControl, err := cgroups.New(cgroups.V1, cgroups.StaticPath("/gcs"), &oci.LinuxResources{})
356 if err != nil {
357 logrus.WithError(err).Fatal("failed to create gcs cgroup")
358 }
359 defer gcsControl.Delete()
360 if err := gcsControl.Add(cgroups.Process{Pid: os.Getpid()}); err != nil {
361 logrus.WithError(err).Fatal("failed add gcs pid to gcs cgroup")
362 }
363
364 event := cgroups.MemoryThresholdEvent(*gcsMemLimitBytes, false)
365 gefd, err := gcsControl.RegisterMemoryEvent(event)
366 if err != nil {
367 logrus.WithError(err).Fatal("failed to register memory threshold for gcs cgroup")
368 }
369 gefdFile := os.NewFile(gefd, "gefd")
370 defer gefdFile.Close()
371
372 oom, err := containersControl.OOMEventFD()
373 if err != nil {
374 logrus.WithError(err).Fatal("failed to retrieve the container cgroups oom eventfd")
375 }
376 oomFile := os.NewFile(oom, "cefd")
377 defer oomFile.Close()
378
379
380 if !(*disableTimeSync) {
381 if err = startTimeSyncService(); err != nil {
382 logrus.WithError(err).Fatal("failed to start time synchronization service")
383 }
384 }
385
386 go readMemoryEvents(startTime, gefdFile, "/gcs", int64(*gcsMemLimitBytes), gcsControl)
387 go readMemoryEvents(startTime, oomFile, "/containers", containersLimit, containersControl)
388 err = b.ListenAndServe(bridgeIn, bridgeOut)
389 if err != nil {
390 logrus.WithFields(logrus.Fields{
391 logrus.ErrorKey: err,
392 }).Fatal("failed to serve gcs service")
393 }
394 }
395
View as plain text