Bump github.com/containerd/containerd from 1.6.4 to 1.6.6

Bumps [github.com/containerd/containerd](https://github.com/containerd/containerd) from 1.6.4 to 1.6.6.
- [Release notes](https://github.com/containerd/containerd/releases)
- [Changelog](https://github.com/containerd/containerd/blob/main/RELEASES.md)
- [Commits](https://github.com/containerd/containerd/compare/v1.6.4...v1.6.6)

---
updated-dependencies:
- dependency-name: github.com/containerd/containerd
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
This commit is contained in:
dependabot[bot] 2022-07-08 14:48:41 +00:00 committed by Alex Ellis
parent 5cedf28929
commit 6dbc33d045
20 changed files with 1399 additions and 62 deletions

10
go.mod
View File

@ -6,8 +6,8 @@ require (
github.com/alexellis/go-execute v0.5.0
github.com/alexellis/k3sup v0.0.0-20220105194923-e2bb18116d36
github.com/compose-spec/compose-go v0.0.0-20200528042322-36d8ce368e05
github.com/containerd/containerd v1.6.4
github.com/containerd/go-cni v1.1.5
github.com/containerd/containerd v1.6.6
github.com/containerd/go-cni v1.1.6
github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf
github.com/docker/cli v0.0.0-20191105005515-99c5edceb48d
github.com/docker/distribution v2.8.1+incompatible
@ -31,7 +31,7 @@ require (
require (
github.com/Microsoft/go-winio v0.5.1 // indirect
github.com/Microsoft/hcsshim v0.9.2 // indirect
github.com/Microsoft/hcsshim v0.9.3 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/cespare/xxhash/v2 v2.1.2 // indirect
github.com/containerd/cgroups v1.0.3 // indirect
@ -39,7 +39,7 @@ require (
github.com/containerd/fifo v1.0.0 // indirect
github.com/containerd/ttrpc v1.1.0 // indirect
github.com/containerd/typeurl v1.0.2 // indirect
github.com/containernetworking/cni v1.1.0 // indirect
github.com/containernetworking/cni v1.1.1 // indirect
github.com/docker/go-connections v0.4.0 // indirect
github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c // indirect
github.com/gogo/googleapis v1.4.0 // indirect
@ -58,7 +58,7 @@ require (
github.com/moby/sys/signal v0.6.0 // indirect
github.com/opencontainers/go-digest v1.0.0 // indirect
github.com/opencontainers/image-spec v1.0.3-0.20211202183452-c5a74bcca799 // indirect
github.com/opencontainers/runc v1.1.1 // indirect
github.com/opencontainers/runc v1.1.2 // indirect
github.com/opencontainers/selinux v1.10.1 // indirect
github.com/prometheus/client_golang v1.12.2 // indirect
github.com/prometheus/client_model v0.2.0 // indirect

20
go.sum
View File

@ -66,8 +66,8 @@ github.com/Microsoft/hcsshim v0.8.14/go.mod h1:NtVKoYxQuTLx6gEq0L96c9Ju4JbRJ4nY2
github.com/Microsoft/hcsshim v0.8.15/go.mod h1:x38A4YbHbdxJtc0sF6oIz+RG0npwSCAvn69iY6URG00=
github.com/Microsoft/hcsshim v0.8.16/go.mod h1:o5/SZqmR7x9JNKsW3pu+nqHm0MF8vbA+VxGOoXdC600=
github.com/Microsoft/hcsshim v0.8.21/go.mod h1:+w2gRZ5ReXQhFOrvSQeNfhrYB/dg3oDwTOcER2fw4I4=
github.com/Microsoft/hcsshim v0.9.2 h1:wB06W5aYFfUB3IvootYAY2WnOmIdgPGfqSI6tufQNnY=
github.com/Microsoft/hcsshim v0.9.2/go.mod h1:7pLA8lDk46WKDWlVsENo92gC0XFa8rbKfyFRBqxEbCc=
github.com/Microsoft/hcsshim v0.9.3 h1:k371PzBuRrz2b+ebGuI2nVgVhgsVX60jMfSw80NECxo=
github.com/Microsoft/hcsshim v0.9.3/go.mod h1:7pLA8lDk46WKDWlVsENo92gC0XFa8rbKfyFRBqxEbCc=
github.com/Microsoft/hcsshim/test v0.0.0-20201218223536-d3e5debf77da/go.mod h1:5hlzMzRKMLyo42nCZ9oml8AdTlq/0cvIaBv6tK1RehU=
github.com/Microsoft/hcsshim/test v0.0.0-20210227013316-43a75bb4edd3/go.mod h1:mw7qgWloBUl75W/gVH3cQszUg1+gUITj7D6NY7ywVnY=
github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb06e3pkSAbeQ52E9H9iFoQsEEwGN64994WTCIhntQ=
@ -177,8 +177,8 @@ github.com/containerd/containerd v1.5.0-beta.4/go.mod h1:GmdgZd2zA2GYIBZ0w09Zvgq
github.com/containerd/containerd v1.5.0-rc.0/go.mod h1:V/IXoMqNGgBlabz3tHD2TWDoTJseu1FGOKuoA4nNb2s=
github.com/containerd/containerd v1.5.1/go.mod h1:0DOxVqwDy2iZvrZp2JUx/E+hS0UNTVn7dJnIOwtYR4g=
github.com/containerd/containerd v1.5.7/go.mod h1:gyvv6+ugqY25TiXxcZC3L5yOeYgEw0QMhscqVp1AR9c=
github.com/containerd/containerd v1.6.4 h1:SEDZBp10mhCp+hkO3Njz/YhGrI7ah3edNcUlRdUPOgg=
github.com/containerd/containerd v1.6.4/go.mod h1:oWOqbuJUZmOVafhA0lj2NAXbiO1u7F0K5l1bUgdyo94=
github.com/containerd/containerd v1.6.6 h1:xJNPhbrmz8xAMDNoVjHy9YHtWwEQNS+CDkcIRh7t8Y0=
github.com/containerd/containerd v1.6.6/go.mod h1:ZoP1geJldzCVY3Tonoz7b1IXk8rIX0Nltt5QE4OMNk0=
github.com/containerd/continuity v0.0.0-20190426062206-aaeac12a7ffc/go.mod h1:GL3xCUCBDV3CZiTSEKksMWbLE66hEyuu9qyDOOqM47Y=
github.com/containerd/continuity v0.0.0-20190815185530-f2a389ac0a02/go.mod h1:GL3xCUCBDV3CZiTSEKksMWbLE66hEyuu9qyDOOqM47Y=
github.com/containerd/continuity v0.0.0-20191127005431-f65d91d395eb/go.mod h1:GL3xCUCBDV3CZiTSEKksMWbLE66hEyuu9qyDOOqM47Y=
@ -197,8 +197,8 @@ github.com/containerd/fifo v1.0.0 h1:6PirWBr9/L7GDamKr+XM0IeUFXu5mf3M/BPpH9gaLBU
github.com/containerd/fifo v1.0.0/go.mod h1:ocF/ME1SX5b1AOlWi9r677YJmCPSwwWnQ9O123vzpE4=
github.com/containerd/go-cni v1.0.1/go.mod h1:+vUpYxKvAF72G9i1WoDOiPGRtQpqsNW/ZHtSlv++smU=
github.com/containerd/go-cni v1.0.2/go.mod h1:nrNABBHzu0ZwCug9Ije8hL2xBCYh/pjfMb1aZGrrohk=
github.com/containerd/go-cni v1.1.5 h1:WUkuZ6kDkmECdd/qEPetq1czj6ivDkAoaOpL8yv8vO8=
github.com/containerd/go-cni v1.1.5/go.mod h1:Rf2ZrMycr1El589IyuRzn7RkfdRZVKaFGaxSDHVAjj0=
github.com/containerd/go-cni v1.1.6 h1:el5WPymG5nRRLQF1EfB97FWob4Tdc8INg8RZMaXWZlo=
github.com/containerd/go-cni v1.1.6/go.mod h1:BWtoWl5ghVymxu6MBjg79W9NZrCRyHIdUtk4cauMe34=
github.com/containerd/go-runc v0.0.0-20180907222934-5a6d9f37cfa3/go.mod h1:IV7qH3hrUgRmyYrtgEeGWJfWbgcHL9CSRruz2Vqcph0=
github.com/containerd/go-runc v0.0.0-20190911050354-e029b79d8cda/go.mod h1:IV7qH3hrUgRmyYrtgEeGWJfWbgcHL9CSRruz2Vqcph0=
github.com/containerd/go-runc v0.0.0-20200220073739-7016d3ce2328/go.mod h1:PpyHrqVs8FTi9vpyHwPwiNEGaACDxT/N/pLcvMSRA9g=
@ -232,8 +232,8 @@ github.com/containerd/zfs v1.0.0/go.mod h1:m+m51S1DvAP6r3FcmYCp54bQ34pyOwTieQDNR
github.com/containernetworking/cni v0.7.1/go.mod h1:LGwApLUm2FpoOfxTDEeq8T9ipbpZ61X79hmU3w8FmsY=
github.com/containernetworking/cni v0.8.0/go.mod h1:LGwApLUm2FpoOfxTDEeq8T9ipbpZ61X79hmU3w8FmsY=
github.com/containernetworking/cni v0.8.1/go.mod h1:LGwApLUm2FpoOfxTDEeq8T9ipbpZ61X79hmU3w8FmsY=
github.com/containernetworking/cni v1.1.0 h1:T00oIz4hef+/p9gpRZa57SnIN+QnbmAHBjbxaOSFo9U=
github.com/containernetworking/cni v1.1.0/go.mod h1:sDpYKmGVENF3s6uvMvGgldDWeG8dMxakj/u+i9ht9vw=
github.com/containernetworking/cni v1.1.1 h1:ky20T7c0MvKvbMOwS/FrlbNwjEoqJEUUYfsL4b0mc4k=
github.com/containernetworking/cni v1.1.1/go.mod h1:sDpYKmGVENF3s6uvMvGgldDWeG8dMxakj/u+i9ht9vw=
github.com/containernetworking/plugins v0.8.6/go.mod h1:qnw5mN19D8fIwkqW7oHHYDHVlzhJpcY6TQxn/fUyDDM=
github.com/containernetworking/plugins v0.9.1/go.mod h1:xP/idU2ldlzN6m4p5LmGiwRDjeJr6FLK6vuiUwoH7P8=
github.com/containers/ocicrypt v1.0.1/go.mod h1:MeJDzk1RJHv89LjsH0Sp5KTY3ZYkjXO/C+bKAeWFIrc=
@ -634,8 +634,8 @@ github.com/opencontainers/runc v1.0.0-rc8.0.20190926000215-3e425f80a8c9/go.mod h
github.com/opencontainers/runc v1.0.0-rc9/go.mod h1:qT5XzbpPznkRYVz/mWwUaVBUv2rmF59PVA73FjuZG0U=
github.com/opencontainers/runc v1.0.0-rc93/go.mod h1:3NOsor4w32B2tC0Zbl8Knk4Wg84SM2ImC1fxBuqJ/H0=
github.com/opencontainers/runc v1.0.2/go.mod h1:aTaHFFwQXuA71CiyxOdFFIorAoemI04suvGRQFzWTD0=
github.com/opencontainers/runc v1.1.1 h1:PJ9DSs2sVwE0iVr++pAHE6QkS9tzcVWozlPifdwMgrU=
github.com/opencontainers/runc v1.1.1/go.mod h1:Tj1hFw6eFWp/o33uxGf5yF2BX5yz2Z6iptFpuvbbKqc=
github.com/opencontainers/runc v1.1.2 h1:2VSZwLx5k/BfsBxMMipG/LYUnmqOD/BPkIVgQUcTlLw=
github.com/opencontainers/runc v1.1.2/go.mod h1:Tj1hFw6eFWp/o33uxGf5yF2BX5yz2Z6iptFpuvbbKqc=
github.com/opencontainers/runtime-spec v0.1.2-0.20190507144316-5b71a03e2700/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/runtime-spec v1.0.1/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/runtime-spec v1.0.2-0.20190207185410-29686dbc5559/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=

View File

@ -4,17 +4,22 @@ import (
"context"
"encoding/json"
"errors"
"fmt"
"strings"
"sync"
"syscall"
"time"
"github.com/Microsoft/hcsshim/internal/cow"
"github.com/Microsoft/hcsshim/internal/hcs/schema1"
hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2"
"github.com/Microsoft/hcsshim/internal/jobobject"
"github.com/Microsoft/hcsshim/internal/log"
"github.com/Microsoft/hcsshim/internal/logfields"
"github.com/Microsoft/hcsshim/internal/oc"
"github.com/Microsoft/hcsshim/internal/timeout"
"github.com/Microsoft/hcsshim/internal/vmcompute"
"github.com/sirupsen/logrus"
"go.opencensus.io/trace"
)
@ -28,7 +33,8 @@ type System struct {
waitBlock chan struct{}
waitError error
exitError error
os, typ string
os, typ, owner string
startTime time.Time
}
func newSystem(id string) *System {
@ -38,6 +44,11 @@ func newSystem(id string) *System {
}
}
// Implementation detail for silo naming, this should NOT be relied upon very heavily.
func siloNameFmt(containerID string) string {
return fmt.Sprintf(`\Container_%s`, containerID)
}
// CreateComputeSystem creates a new compute system with the given configuration but does not start it.
func CreateComputeSystem(ctx context.Context, id string, hcsDocumentInterface interface{}) (_ *System, err error) {
operation := "hcs::CreateComputeSystem"
@ -127,6 +138,7 @@ func (computeSystem *System) getCachedProperties(ctx context.Context) error {
}
computeSystem.typ = strings.ToLower(props.SystemType)
computeSystem.os = strings.ToLower(props.RuntimeOSType)
computeSystem.owner = strings.ToLower(props.Owner)
if computeSystem.os == "" && computeSystem.typ == "container" {
// Pre-RS5 HCS did not return the OS, but it only supported containers
// that ran Windows.
@ -195,7 +207,7 @@ func (computeSystem *System) Start(ctx context.Context) (err error) {
if err != nil {
return makeSystemError(computeSystem, operation, err, events)
}
computeSystem.startTime = time.Now()
return nil
}
@ -324,11 +336,115 @@ func (computeSystem *System) Properties(ctx context.Context, types ...schema1.Pr
return properties, nil
}
// PropertiesV2 returns the requested container properties targeting a V2 schema container.
func (computeSystem *System) PropertiesV2(ctx context.Context, types ...hcsschema.PropertyType) (*hcsschema.Properties, error) {
computeSystem.handleLock.RLock()
defer computeSystem.handleLock.RUnlock()
// queryInProc handles querying for container properties without reaching out to HCS. `props`
// will be updated to contain any data returned from the queries present in `types`. If any properties
// failed to be queried they will be tallied up and returned in as the first return value. Failures on
// query are NOT considered errors; the only failure case for this method is if the containers job object
// cannot be opened.
func (computeSystem *System) queryInProc(ctx context.Context, props *hcsschema.Properties, types []hcsschema.PropertyType) ([]hcsschema.PropertyType, error) {
// In the future we can make use of some new functionality in the HCS that allows you
// to pass a job object for HCS to use for the container. Currently, the only way we'll
// be able to open the job/silo is if we're running as SYSTEM.
jobOptions := &jobobject.Options{
UseNTVariant: true,
Name: siloNameFmt(computeSystem.id),
}
job, err := jobobject.Open(ctx, jobOptions)
if err != nil {
return nil, err
}
defer job.Close()
var fallbackQueryTypes []hcsschema.PropertyType
for _, propType := range types {
switch propType {
case hcsschema.PTStatistics:
// Handle a bad caller asking for the same type twice. No use in re-querying if this is
// filled in already.
if props.Statistics == nil {
props.Statistics, err = computeSystem.statisticsInProc(job)
if err != nil {
log.G(ctx).WithError(err).Warn("failed to get statistics in-proc")
fallbackQueryTypes = append(fallbackQueryTypes, propType)
}
}
default:
fallbackQueryTypes = append(fallbackQueryTypes, propType)
}
}
return fallbackQueryTypes, nil
}
// statisticsInProc emulates what HCS does to grab statistics for a given container with a small
// change to make grabbing the private working set total much more efficient.
func (computeSystem *System) statisticsInProc(job *jobobject.JobObject) (*hcsschema.Statistics, error) {
// Start timestamp for these stats before we grab them to match HCS
timestamp := time.Now()
memInfo, err := job.QueryMemoryStats()
if err != nil {
return nil, err
}
processorInfo, err := job.QueryProcessorStats()
if err != nil {
return nil, err
}
storageInfo, err := job.QueryStorageStats()
if err != nil {
return nil, err
}
// This calculates the private working set more efficiently than HCS does. HCS calls NtQuerySystemInformation
// with the class SystemProcessInformation which returns an array containing system information for *every*
// process running on the machine. They then grab the pids that are running in the container and filter down
// the entries in the array to only what's running in that silo and start tallying up the total. This doesn't
// work well as performance should get worse if more processess are running on the machine in general and not
// just in the container. All of the additional information besides the WorkingSetPrivateSize field is ignored
// as well which isn't great and is wasted work to fetch.
//
// HCS only let's you grab statistics in an all or nothing fashion, so we can't just grab the private
// working set ourselves and ask for everything else seperately. The optimization we can make here is
// to open the silo ourselves and do the same queries for the rest of the info, as well as calculating
// the private working set in a more efficient manner by:
//
// 1. Find the pids running in the silo
// 2. Get a process handle for every process (only need PROCESS_QUERY_LIMITED_INFORMATION access)
// 3. Call NtQueryInformationProcess on each process with the class ProcessVmCounters
// 4. Tally up the total using the field PrivateWorkingSetSize in VM_COUNTERS_EX2.
privateWorkingSet, err := job.QueryPrivateWorkingSet()
if err != nil {
return nil, err
}
return &hcsschema.Statistics{
Timestamp: timestamp,
ContainerStartTime: computeSystem.startTime,
Uptime100ns: uint64(time.Since(computeSystem.startTime).Nanoseconds()) / 100,
Memory: &hcsschema.MemoryStats{
MemoryUsageCommitBytes: memInfo.JobMemory,
MemoryUsageCommitPeakBytes: memInfo.PeakJobMemoryUsed,
MemoryUsagePrivateWorkingSetBytes: privateWorkingSet,
},
Processor: &hcsschema.ProcessorStats{
RuntimeKernel100ns: uint64(processorInfo.TotalKernelTime),
RuntimeUser100ns: uint64(processorInfo.TotalUserTime),
TotalRuntime100ns: uint64(processorInfo.TotalKernelTime + processorInfo.TotalUserTime),
},
Storage: &hcsschema.StorageStats{
ReadCountNormalized: uint64(storageInfo.ReadStats.IoCount),
ReadSizeBytes: storageInfo.ReadStats.TotalSize,
WriteCountNormalized: uint64(storageInfo.WriteStats.IoCount),
WriteSizeBytes: storageInfo.WriteStats.TotalSize,
},
}, nil
}
// hcsPropertiesV2Query is a helper to make a HcsGetComputeSystemProperties call using the V2 schema property types.
func (computeSystem *System) hcsPropertiesV2Query(ctx context.Context, types []hcsschema.PropertyType) (*hcsschema.Properties, error) {
operation := "hcs::System::PropertiesV2"
queryBytes, err := json.Marshal(hcsschema.PropertyQuery{PropertyTypes: types})
@ -345,12 +461,66 @@ func (computeSystem *System) PropertiesV2(ctx context.Context, types ...hcsschem
if propertiesJSON == "" {
return nil, ErrUnexpectedValue
}
properties := &hcsschema.Properties{}
if err := json.Unmarshal([]byte(propertiesJSON), properties); err != nil {
props := &hcsschema.Properties{}
if err := json.Unmarshal([]byte(propertiesJSON), props); err != nil {
return nil, makeSystemError(computeSystem, operation, err, nil)
}
return properties, nil
return props, nil
}
// PropertiesV2 returns the requested compute systems properties targeting a V2 schema compute system.
func (computeSystem *System) PropertiesV2(ctx context.Context, types ...hcsschema.PropertyType) (_ *hcsschema.Properties, err error) {
computeSystem.handleLock.RLock()
defer computeSystem.handleLock.RUnlock()
// Let HCS tally up the total for VM based queries instead of querying ourselves.
if computeSystem.typ != "container" {
return computeSystem.hcsPropertiesV2Query(ctx, types)
}
// Define a starter Properties struct with the default fields returned from every
// query. Owner is only returned from Statistics but it's harmless to include.
properties := &hcsschema.Properties{
Id: computeSystem.id,
SystemType: computeSystem.typ,
RuntimeOsType: computeSystem.os,
Owner: computeSystem.owner,
}
logEntry := log.G(ctx)
// First lets try and query ourselves without reaching to HCS. If any of the queries fail
// we'll take note and fallback to querying HCS for any of the failed types.
fallbackTypes, err := computeSystem.queryInProc(ctx, properties, types)
if err == nil && len(fallbackTypes) == 0 {
return properties, nil
} else if err != nil {
logEntry.WithError(fmt.Errorf("failed to query compute system properties in-proc: %w", err))
fallbackTypes = types
}
logEntry.WithFields(logrus.Fields{
logfields.ContainerID: computeSystem.id,
"propertyTypes": fallbackTypes,
}).Info("falling back to HCS for property type queries")
hcsProperties, err := computeSystem.hcsPropertiesV2Query(ctx, fallbackTypes)
if err != nil {
return nil, err
}
// Now add in anything that we might have successfully queried in process.
if properties.Statistics != nil {
hcsProperties.Statistics = properties.Statistics
hcsProperties.Owner = properties.Owner
}
// For future support for querying processlist in-proc as well.
if properties.ProcessList != nil {
hcsProperties.ProcessList = properties.ProcessList
}
return hcsProperties, nil
}
// Pause pauses the execution of the computeSystem. This feature is not enabled in TP5.

View File

@ -21,10 +21,11 @@ const (
)
type NatPolicy struct {
Type PolicyType `json:"Type"`
Protocol string `json:",omitempty"`
InternalPort uint16 `json:",omitempty"`
ExternalPort uint16 `json:",omitempty"`
Type PolicyType `json:"Type"`
Protocol string `json:",omitempty"`
InternalPort uint16 `json:",omitempty"`
ExternalPort uint16 `json:",omitempty"`
ExternalPortReserved bool `json:",omitempty"`
}
type QosPolicy struct {

View File

@ -0,0 +1,111 @@
package jobobject
import (
"context"
"fmt"
"sync"
"unsafe"
"github.com/Microsoft/hcsshim/internal/log"
"github.com/Microsoft/hcsshim/internal/queue"
"github.com/Microsoft/hcsshim/internal/winapi"
"github.com/sirupsen/logrus"
"golang.org/x/sys/windows"
)
var (
ioInitOnce sync.Once
initIOErr error
// Global iocp handle that will be re-used for every job object
ioCompletionPort windows.Handle
// Mapping of job handle to queue to place notifications in.
jobMap sync.Map
)
// MsgAllProcessesExited is a type representing a message that every process in a job has exited.
type MsgAllProcessesExited struct{}
// MsgUnimplemented represents a message that we are aware of, but that isn't implemented currently.
// This should not be treated as an error.
type MsgUnimplemented struct{}
// pollIOCP polls the io completion port forever.
func pollIOCP(ctx context.Context, iocpHandle windows.Handle) {
var (
overlapped uintptr
code uint32
key uintptr
)
for {
err := windows.GetQueuedCompletionStatus(iocpHandle, &code, &key, (**windows.Overlapped)(unsafe.Pointer(&overlapped)), windows.INFINITE)
if err != nil {
log.G(ctx).WithError(err).Error("failed to poll for job object message")
continue
}
if val, ok := jobMap.Load(key); ok {
msq, ok := val.(*queue.MessageQueue)
if !ok {
log.G(ctx).WithField("value", msq).Warn("encountered non queue type in job map")
continue
}
notification, err := parseMessage(code, overlapped)
if err != nil {
log.G(ctx).WithFields(logrus.Fields{
"code": code,
"overlapped": overlapped,
}).Warn("failed to parse job object message")
continue
}
if err := msq.Write(notification); err == queue.ErrQueueClosed {
// Write will only return an error when the queue is closed.
// The only time a queue would ever be closed is when we call `Close` on
// the job it belongs to which also removes it from the jobMap, so something
// went wrong here. We can't return as this is reading messages for all jobs
// so just log it and move on.
log.G(ctx).WithFields(logrus.Fields{
"code": code,
"overlapped": overlapped,
}).Warn("tried to write to a closed queue")
continue
}
} else {
log.G(ctx).Warn("received a message for a job not present in the mapping")
}
}
}
func parseMessage(code uint32, overlapped uintptr) (interface{}, error) {
// Check code and parse out relevant information related to that notification
// that we care about. For now all we handle is the message that all processes
// in the job have exited.
switch code {
case winapi.JOB_OBJECT_MSG_ACTIVE_PROCESS_ZERO:
return MsgAllProcessesExited{}, nil
// Other messages for completeness and a check to make sure that if we fall
// into the default case that this is a code we don't know how to handle.
case winapi.JOB_OBJECT_MSG_END_OF_JOB_TIME:
case winapi.JOB_OBJECT_MSG_END_OF_PROCESS_TIME:
case winapi.JOB_OBJECT_MSG_ACTIVE_PROCESS_LIMIT:
case winapi.JOB_OBJECT_MSG_NEW_PROCESS:
case winapi.JOB_OBJECT_MSG_EXIT_PROCESS:
case winapi.JOB_OBJECT_MSG_ABNORMAL_EXIT_PROCESS:
case winapi.JOB_OBJECT_MSG_PROCESS_MEMORY_LIMIT:
case winapi.JOB_OBJECT_MSG_JOB_MEMORY_LIMIT:
case winapi.JOB_OBJECT_MSG_NOTIFICATION_LIMIT:
default:
return nil, fmt.Errorf("unknown job notification type: %d", code)
}
return MsgUnimplemented{}, nil
}
// Assigns an IO completion port to get notified of events for the registered job
// object.
func attachIOCP(job windows.Handle, iocp windows.Handle) error {
info := winapi.JOBOBJECT_ASSOCIATE_COMPLETION_PORT{
CompletionKey: job,
CompletionPort: iocp,
}
_, err := windows.SetInformationJobObject(job, windows.JobObjectAssociateCompletionPortInformation, uintptr(unsafe.Pointer(&info)), uint32(unsafe.Sizeof(info)))
return err
}

View File

@ -0,0 +1,499 @@
package jobobject
import (
"context"
"errors"
"fmt"
"sync"
"unsafe"
"github.com/Microsoft/hcsshim/internal/queue"
"github.com/Microsoft/hcsshim/internal/winapi"
"golang.org/x/sys/windows"
)
// This file provides higher level constructs for the win32 job object API.
// Most of the core creation and management functions are already present in "golang.org/x/sys/windows"
// (CreateJobObject, AssignProcessToJobObject, etc.) as well as most of the limit information
// structs and associated limit flags. Whatever is not present from the job object API
// in golang.org/x/sys/windows is located in /internal/winapi.
//
// https://docs.microsoft.com/en-us/windows/win32/procthread/job-objects
// JobObject is a high level wrapper around a Windows job object. Holds a handle to
// the job, a queue to receive iocp notifications about the lifecycle
// of the job and a mutex for synchronized handle access.
type JobObject struct {
handle windows.Handle
mq *queue.MessageQueue
handleLock sync.RWMutex
}
// JobLimits represents the resource constraints that can be applied to a job object.
type JobLimits struct {
CPULimit uint32
CPUWeight uint32
MemoryLimitInBytes uint64
MaxIOPS int64
MaxBandwidth int64
}
type CPURateControlType uint32
const (
WeightBased CPURateControlType = iota
RateBased
)
// Processor resource controls
const (
cpuLimitMin = 1
cpuLimitMax = 10000
cpuWeightMin = 1
cpuWeightMax = 9
)
var (
ErrAlreadyClosed = errors.New("the handle has already been closed")
ErrNotRegistered = errors.New("job is not registered to receive notifications")
)
// Options represents the set of configurable options when making or opening a job object.
type Options struct {
// `Name` specifies the name of the job object if a named job object is desired.
Name string
// `Notifications` specifies if the job will be registered to receive notifications.
// Defaults to false.
Notifications bool
// `UseNTVariant` specifies if we should use the `Nt` variant of Open/CreateJobObject.
// Defaults to false.
UseNTVariant bool
}
// Create creates a job object.
//
// If options.Name is an empty string, the job will not be assigned a name.
//
// If options.Notifications are not enabled `PollNotifications` will return immediately with error `errNotRegistered`.
//
// If `options` is nil, use default option values.
//
// Returns a JobObject structure and an error if there is one.
func Create(ctx context.Context, options *Options) (_ *JobObject, err error) {
if options == nil {
options = &Options{}
}
var jobName *winapi.UnicodeString
if options.Name != "" {
jobName, err = winapi.NewUnicodeString(options.Name)
if err != nil {
return nil, err
}
}
var jobHandle windows.Handle
if options.UseNTVariant {
oa := winapi.ObjectAttributes{
Length: unsafe.Sizeof(winapi.ObjectAttributes{}),
ObjectName: jobName,
Attributes: 0,
}
status := winapi.NtCreateJobObject(&jobHandle, winapi.JOB_OBJECT_ALL_ACCESS, &oa)
if status != 0 {
return nil, winapi.RtlNtStatusToDosError(status)
}
} else {
var jobNameBuf *uint16
if jobName != nil && jobName.Buffer != nil {
jobNameBuf = jobName.Buffer
}
jobHandle, err = windows.CreateJobObject(nil, jobNameBuf)
if err != nil {
return nil, err
}
}
defer func() {
if err != nil {
windows.Close(jobHandle)
}
}()
job := &JobObject{
handle: jobHandle,
}
// If the IOCP we'll be using to receive messages for all jobs hasn't been
// created, create it and start polling.
if options.Notifications {
mq, err := setupNotifications(ctx, job)
if err != nil {
return nil, err
}
job.mq = mq
}
return job, nil
}
// Open opens an existing job object with name provided in `options`. If no name is provided
// return an error since we need to know what job object to open.
//
// If options.Notifications is false `PollNotifications` will return immediately with error `errNotRegistered`.
//
// Returns a JobObject structure and an error if there is one.
func Open(ctx context.Context, options *Options) (_ *JobObject, err error) {
if options == nil || (options != nil && options.Name == "") {
return nil, errors.New("no job object name specified to open")
}
unicodeJobName, err := winapi.NewUnicodeString(options.Name)
if err != nil {
return nil, err
}
var jobHandle windows.Handle
if options != nil && options.UseNTVariant {
oa := winapi.ObjectAttributes{
Length: unsafe.Sizeof(winapi.ObjectAttributes{}),
ObjectName: unicodeJobName,
Attributes: 0,
}
status := winapi.NtOpenJobObject(&jobHandle, winapi.JOB_OBJECT_ALL_ACCESS, &oa)
if status != 0 {
return nil, winapi.RtlNtStatusToDosError(status)
}
} else {
jobHandle, err = winapi.OpenJobObject(winapi.JOB_OBJECT_ALL_ACCESS, false, unicodeJobName.Buffer)
if err != nil {
return nil, err
}
}
defer func() {
if err != nil {
windows.Close(jobHandle)
}
}()
job := &JobObject{
handle: jobHandle,
}
// If the IOCP we'll be using to receive messages for all jobs hasn't been
// created, create it and start polling.
if options != nil && options.Notifications {
mq, err := setupNotifications(ctx, job)
if err != nil {
return nil, err
}
job.mq = mq
}
return job, nil
}
// helper function to setup notifications for creating/opening a job object
func setupNotifications(ctx context.Context, job *JobObject) (*queue.MessageQueue, error) {
job.handleLock.RLock()
defer job.handleLock.RUnlock()
if job.handle == 0 {
return nil, ErrAlreadyClosed
}
ioInitOnce.Do(func() {
h, err := windows.CreateIoCompletionPort(windows.InvalidHandle, 0, 0, 0xffffffff)
if err != nil {
initIOErr = err
return
}
ioCompletionPort = h
go pollIOCP(ctx, h)
})
if initIOErr != nil {
return nil, initIOErr
}
mq := queue.NewMessageQueue()
jobMap.Store(uintptr(job.handle), mq)
if err := attachIOCP(job.handle, ioCompletionPort); err != nil {
jobMap.Delete(uintptr(job.handle))
return nil, fmt.Errorf("failed to attach job to IO completion port: %w", err)
}
return mq, nil
}
// PollNotification will poll for a job object notification. This call should only be called once
// per job (ideally in a goroutine loop) and will block if there is not a notification ready.
// This call will return immediately with error `ErrNotRegistered` if the job was not registered
// to receive notifications during `Create`. Internally, messages will be queued and there
// is no worry of messages being dropped.
func (job *JobObject) PollNotification() (interface{}, error) {
if job.mq == nil {
return nil, ErrNotRegistered
}
return job.mq.ReadOrWait()
}
// UpdateProcThreadAttribute updates the passed in ProcThreadAttributeList to contain what is necessary to
// launch a process in a job at creation time. This can be used to avoid having to call Assign() after a process
// has already started running.
func (job *JobObject) UpdateProcThreadAttribute(attrList *windows.ProcThreadAttributeListContainer) error {
job.handleLock.RLock()
defer job.handleLock.RUnlock()
if job.handle == 0 {
return ErrAlreadyClosed
}
if err := attrList.Update(
winapi.PROC_THREAD_ATTRIBUTE_JOB_LIST,
unsafe.Pointer(&job.handle),
unsafe.Sizeof(job.handle),
); err != nil {
return fmt.Errorf("failed to update proc thread attributes for job object: %w", err)
}
return nil
}
// Close closes the job object handle.
func (job *JobObject) Close() error {
job.handleLock.Lock()
defer job.handleLock.Unlock()
if job.handle == 0 {
return ErrAlreadyClosed
}
if err := windows.Close(job.handle); err != nil {
return err
}
if job.mq != nil {
job.mq.Close()
}
// Handles now invalid so if the map entry to receive notifications for this job still
// exists remove it so we can stop receiving notifications.
if _, ok := jobMap.Load(uintptr(job.handle)); ok {
jobMap.Delete(uintptr(job.handle))
}
job.handle = 0
return nil
}
// Assign assigns a process to the job object.
func (job *JobObject) Assign(pid uint32) error {
job.handleLock.RLock()
defer job.handleLock.RUnlock()
if job.handle == 0 {
return ErrAlreadyClosed
}
if pid == 0 {
return errors.New("invalid pid: 0")
}
hProc, err := windows.OpenProcess(winapi.PROCESS_ALL_ACCESS, true, pid)
if err != nil {
return err
}
defer windows.Close(hProc)
return windows.AssignProcessToJobObject(job.handle, hProc)
}
// Terminate terminates the job, essentially calls TerminateProcess on every process in the
// job.
func (job *JobObject) Terminate(exitCode uint32) error {
job.handleLock.RLock()
defer job.handleLock.RUnlock()
if job.handle == 0 {
return ErrAlreadyClosed
}
return windows.TerminateJobObject(job.handle, exitCode)
}
// Pids returns all of the process IDs in the job object.
func (job *JobObject) Pids() ([]uint32, error) {
job.handleLock.RLock()
defer job.handleLock.RUnlock()
if job.handle == 0 {
return nil, ErrAlreadyClosed
}
info := winapi.JOBOBJECT_BASIC_PROCESS_ID_LIST{}
err := winapi.QueryInformationJobObject(
job.handle,
winapi.JobObjectBasicProcessIdList,
uintptr(unsafe.Pointer(&info)),
uint32(unsafe.Sizeof(info)),
nil,
)
// This is either the case where there is only one process or no processes in
// the job. Any other case will result in ERROR_MORE_DATA. Check if info.NumberOfProcessIdsInList
// is 1 and just return this, otherwise return an empty slice.
if err == nil {
if info.NumberOfProcessIdsInList == 1 {
return []uint32{uint32(info.ProcessIdList[0])}, nil
}
// Return empty slice instead of nil to play well with the caller of this.
// Do not return an error if no processes are running inside the job
return []uint32{}, nil
}
if err != winapi.ERROR_MORE_DATA {
return nil, fmt.Errorf("failed initial query for PIDs in job object: %w", err)
}
jobBasicProcessIDListSize := unsafe.Sizeof(info) + (unsafe.Sizeof(info.ProcessIdList[0]) * uintptr(info.NumberOfAssignedProcesses-1))
buf := make([]byte, jobBasicProcessIDListSize)
if err = winapi.QueryInformationJobObject(
job.handle,
winapi.JobObjectBasicProcessIdList,
uintptr(unsafe.Pointer(&buf[0])),
uint32(len(buf)),
nil,
); err != nil {
return nil, fmt.Errorf("failed to query for PIDs in job object: %w", err)
}
bufInfo := (*winapi.JOBOBJECT_BASIC_PROCESS_ID_LIST)(unsafe.Pointer(&buf[0]))
pids := make([]uint32, bufInfo.NumberOfProcessIdsInList)
for i, bufPid := range bufInfo.AllPids() {
pids[i] = uint32(bufPid)
}
return pids, nil
}
// QueryMemoryStats gets the memory stats for the job object.
func (job *JobObject) QueryMemoryStats() (*winapi.JOBOBJECT_MEMORY_USAGE_INFORMATION, error) {
job.handleLock.RLock()
defer job.handleLock.RUnlock()
if job.handle == 0 {
return nil, ErrAlreadyClosed
}
info := winapi.JOBOBJECT_MEMORY_USAGE_INFORMATION{}
if err := winapi.QueryInformationJobObject(
job.handle,
winapi.JobObjectMemoryUsageInformation,
uintptr(unsafe.Pointer(&info)),
uint32(unsafe.Sizeof(info)),
nil,
); err != nil {
return nil, fmt.Errorf("failed to query for job object memory stats: %w", err)
}
return &info, nil
}
// QueryProcessorStats gets the processor stats for the job object.
func (job *JobObject) QueryProcessorStats() (*winapi.JOBOBJECT_BASIC_ACCOUNTING_INFORMATION, error) {
job.handleLock.RLock()
defer job.handleLock.RUnlock()
if job.handle == 0 {
return nil, ErrAlreadyClosed
}
info := winapi.JOBOBJECT_BASIC_ACCOUNTING_INFORMATION{}
if err := winapi.QueryInformationJobObject(
job.handle,
winapi.JobObjectBasicAccountingInformation,
uintptr(unsafe.Pointer(&info)),
uint32(unsafe.Sizeof(info)),
nil,
); err != nil {
return nil, fmt.Errorf("failed to query for job object process stats: %w", err)
}
return &info, nil
}
// QueryStorageStats gets the storage (I/O) stats for the job object.
func (job *JobObject) QueryStorageStats() (*winapi.JOBOBJECT_IO_ATTRIBUTION_INFORMATION, error) {
job.handleLock.RLock()
defer job.handleLock.RUnlock()
if job.handle == 0 {
return nil, ErrAlreadyClosed
}
info := winapi.JOBOBJECT_IO_ATTRIBUTION_INFORMATION{
ControlFlags: winapi.JOBOBJECT_IO_ATTRIBUTION_CONTROL_ENABLE,
}
if err := winapi.QueryInformationJobObject(
job.handle,
winapi.JobObjectIoAttribution,
uintptr(unsafe.Pointer(&info)),
uint32(unsafe.Sizeof(info)),
nil,
); err != nil {
return nil, fmt.Errorf("failed to query for job object storage stats: %w", err)
}
return &info, nil
}
// QueryPrivateWorkingSet returns the private working set size for the job. This is calculated by adding up the
// private working set for every process running in the job.
func (job *JobObject) QueryPrivateWorkingSet() (uint64, error) {
pids, err := job.Pids()
if err != nil {
return 0, err
}
openAndQueryWorkingSet := func(pid uint32) (uint64, error) {
h, err := windows.OpenProcess(windows.PROCESS_QUERY_LIMITED_INFORMATION, false, pid)
if err != nil {
// Continue to the next if OpenProcess doesn't return a valid handle (fails). Handles a
// case where one of the pids in the job exited before we open.
return 0, nil
}
defer func() {
_ = windows.Close(h)
}()
// Check if the process is actually running in the job still. There's a small chance
// that the process could have exited and had its pid re-used between grabbing the pids
// in the job and opening the handle to it above.
var inJob int32
if err := winapi.IsProcessInJob(h, job.handle, &inJob); err != nil {
// This shouldn't fail unless we have incorrect access rights which we control
// here so probably best to error out if this failed.
return 0, err
}
// Don't report stats for this process as it's not running in the job. This shouldn't be
// an error condition though.
if inJob == 0 {
return 0, nil
}
var vmCounters winapi.VM_COUNTERS_EX2
status := winapi.NtQueryInformationProcess(
h,
winapi.ProcessVmCounters,
uintptr(unsafe.Pointer(&vmCounters)),
uint32(unsafe.Sizeof(vmCounters)),
nil,
)
if !winapi.NTSuccess(status) {
return 0, fmt.Errorf("failed to query information for process: %w", winapi.RtlNtStatusToDosError(status))
}
return uint64(vmCounters.PrivateWorkingSetSize), nil
}
var jobWorkingSetSize uint64
for _, pid := range pids {
workingSet, err := openAndQueryWorkingSet(pid)
if err != nil {
return 0, err
}
jobWorkingSetSize += workingSet
}
return jobWorkingSetSize, nil
}

View File

@ -0,0 +1,315 @@
package jobobject
import (
"errors"
"fmt"
"unsafe"
"github.com/Microsoft/hcsshim/internal/winapi"
"golang.org/x/sys/windows"
)
const (
memoryLimitMax uint64 = 0xffffffffffffffff
)
func isFlagSet(flag, controlFlags uint32) bool {
return (flag & controlFlags) == flag
}
// SetResourceLimits sets resource limits on the job object (cpu, memory, storage).
func (job *JobObject) SetResourceLimits(limits *JobLimits) error {
// Go through and check what limits were specified and apply them to the job.
if limits.MemoryLimitInBytes != 0 {
if err := job.SetMemoryLimit(limits.MemoryLimitInBytes); err != nil {
return fmt.Errorf("failed to set job object memory limit: %w", err)
}
}
if limits.CPULimit != 0 {
if err := job.SetCPULimit(RateBased, limits.CPULimit); err != nil {
return fmt.Errorf("failed to set job object cpu limit: %w", err)
}
} else if limits.CPUWeight != 0 {
if err := job.SetCPULimit(WeightBased, limits.CPUWeight); err != nil {
return fmt.Errorf("failed to set job object cpu limit: %w", err)
}
}
if limits.MaxBandwidth != 0 || limits.MaxIOPS != 0 {
if err := job.SetIOLimit(limits.MaxBandwidth, limits.MaxIOPS); err != nil {
return fmt.Errorf("failed to set io limit on job object: %w", err)
}
}
return nil
}
// SetTerminateOnLastHandleClose sets the job object flag that specifies that the job should terminate
// all processes in the job on the last open handle being closed.
func (job *JobObject) SetTerminateOnLastHandleClose() error {
info, err := job.getExtendedInformation()
if err != nil {
return err
}
info.BasicLimitInformation.LimitFlags |= windows.JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE
return job.setExtendedInformation(info)
}
// SetMemoryLimit sets the memory limit of the job object based on the given `memoryLimitInBytes`.
func (job *JobObject) SetMemoryLimit(memoryLimitInBytes uint64) error {
if memoryLimitInBytes >= memoryLimitMax {
return errors.New("memory limit specified exceeds the max size")
}
info, err := job.getExtendedInformation()
if err != nil {
return err
}
info.JobMemoryLimit = uintptr(memoryLimitInBytes)
info.BasicLimitInformation.LimitFlags |= windows.JOB_OBJECT_LIMIT_JOB_MEMORY
return job.setExtendedInformation(info)
}
// GetMemoryLimit gets the memory limit in bytes of the job object.
func (job *JobObject) GetMemoryLimit() (uint64, error) {
info, err := job.getExtendedInformation()
if err != nil {
return 0, err
}
return uint64(info.JobMemoryLimit), nil
}
// SetCPULimit sets the CPU limit depending on the specified `CPURateControlType` to
// `rateControlValue` for the job object.
func (job *JobObject) SetCPULimit(rateControlType CPURateControlType, rateControlValue uint32) error {
cpuInfo, err := job.getCPURateControlInformation()
if err != nil {
return err
}
switch rateControlType {
case WeightBased:
if rateControlValue < cpuWeightMin || rateControlValue > cpuWeightMax {
return fmt.Errorf("processor weight value of `%d` is invalid", rateControlValue)
}
cpuInfo.ControlFlags |= winapi.JOB_OBJECT_CPU_RATE_CONTROL_ENABLE | winapi.JOB_OBJECT_CPU_RATE_CONTROL_WEIGHT_BASED
cpuInfo.Value = rateControlValue
case RateBased:
if rateControlValue < cpuLimitMin || rateControlValue > cpuLimitMax {
return fmt.Errorf("processor rate of `%d` is invalid", rateControlValue)
}
cpuInfo.ControlFlags |= winapi.JOB_OBJECT_CPU_RATE_CONTROL_ENABLE | winapi.JOB_OBJECT_CPU_RATE_CONTROL_HARD_CAP
cpuInfo.Value = rateControlValue
default:
return errors.New("invalid job object cpu rate control type")
}
return job.setCPURateControlInfo(cpuInfo)
}
// GetCPULimit gets the cpu limits for the job object.
// `rateControlType` is used to indicate what type of cpu limit to query for.
func (job *JobObject) GetCPULimit(rateControlType CPURateControlType) (uint32, error) {
info, err := job.getCPURateControlInformation()
if err != nil {
return 0, err
}
if !isFlagSet(winapi.JOB_OBJECT_CPU_RATE_CONTROL_ENABLE, info.ControlFlags) {
return 0, errors.New("the job does not have cpu rate control enabled")
}
switch rateControlType {
case WeightBased:
if !isFlagSet(winapi.JOB_OBJECT_CPU_RATE_CONTROL_WEIGHT_BASED, info.ControlFlags) {
return 0, errors.New("cannot get cpu weight for job object without cpu weight option set")
}
case RateBased:
if !isFlagSet(winapi.JOB_OBJECT_CPU_RATE_CONTROL_HARD_CAP, info.ControlFlags) {
return 0, errors.New("cannot get cpu rate hard cap for job object without cpu rate hard cap option set")
}
default:
return 0, errors.New("invalid job object cpu rate control type")
}
return info.Value, nil
}
// SetCPUAffinity sets the processor affinity for the job object.
// The affinity is passed in as a bitmask.
func (job *JobObject) SetCPUAffinity(affinityBitMask uint64) error {
info, err := job.getExtendedInformation()
if err != nil {
return err
}
info.BasicLimitInformation.LimitFlags |= uint32(windows.JOB_OBJECT_LIMIT_AFFINITY)
info.BasicLimitInformation.Affinity = uintptr(affinityBitMask)
return job.setExtendedInformation(info)
}
// GetCPUAffinity gets the processor affinity for the job object.
// The returned affinity is a bitmask.
func (job *JobObject) GetCPUAffinity() (uint64, error) {
info, err := job.getExtendedInformation()
if err != nil {
return 0, err
}
return uint64(info.BasicLimitInformation.Affinity), nil
}
// SetIOLimit sets the IO limits specified on the job object.
func (job *JobObject) SetIOLimit(maxBandwidth, maxIOPS int64) error {
ioInfo, err := job.getIOLimit()
if err != nil {
return err
}
ioInfo.ControlFlags |= winapi.JOB_OBJECT_IO_RATE_CONTROL_ENABLE
if maxBandwidth != 0 {
ioInfo.MaxBandwidth = maxBandwidth
}
if maxIOPS != 0 {
ioInfo.MaxIops = maxIOPS
}
return job.setIORateControlInfo(ioInfo)
}
// GetIOMaxBandwidthLimit gets the max bandwidth for the job object.
func (job *JobObject) GetIOMaxBandwidthLimit() (int64, error) {
info, err := job.getIOLimit()
if err != nil {
return 0, err
}
return info.MaxBandwidth, nil
}
// GetIOMaxIopsLimit gets the max iops for the job object.
func (job *JobObject) GetIOMaxIopsLimit() (int64, error) {
info, err := job.getIOLimit()
if err != nil {
return 0, err
}
return info.MaxIops, nil
}
// Helper function for getting a job object's extended information.
func (job *JobObject) getExtendedInformation() (*windows.JOBOBJECT_EXTENDED_LIMIT_INFORMATION, error) {
job.handleLock.RLock()
defer job.handleLock.RUnlock()
if job.handle == 0 {
return nil, ErrAlreadyClosed
}
info := windows.JOBOBJECT_EXTENDED_LIMIT_INFORMATION{}
if err := winapi.QueryInformationJobObject(
job.handle,
windows.JobObjectExtendedLimitInformation,
uintptr(unsafe.Pointer(&info)),
uint32(unsafe.Sizeof(info)),
nil,
); err != nil {
return nil, fmt.Errorf("query %v returned error: %w", info, err)
}
return &info, nil
}
// Helper function for getting a job object's CPU rate control information.
func (job *JobObject) getCPURateControlInformation() (*winapi.JOBOBJECT_CPU_RATE_CONTROL_INFORMATION, error) {
job.handleLock.RLock()
defer job.handleLock.RUnlock()
if job.handle == 0 {
return nil, ErrAlreadyClosed
}
info := winapi.JOBOBJECT_CPU_RATE_CONTROL_INFORMATION{}
if err := winapi.QueryInformationJobObject(
job.handle,
windows.JobObjectCpuRateControlInformation,
uintptr(unsafe.Pointer(&info)),
uint32(unsafe.Sizeof(info)),
nil,
); err != nil {
return nil, fmt.Errorf("query %v returned error: %w", info, err)
}
return &info, nil
}
// Helper function for setting a job object's extended information.
func (job *JobObject) setExtendedInformation(info *windows.JOBOBJECT_EXTENDED_LIMIT_INFORMATION) error {
job.handleLock.RLock()
defer job.handleLock.RUnlock()
if job.handle == 0 {
return ErrAlreadyClosed
}
if _, err := windows.SetInformationJobObject(
job.handle,
windows.JobObjectExtendedLimitInformation,
uintptr(unsafe.Pointer(info)),
uint32(unsafe.Sizeof(*info)),
); err != nil {
return fmt.Errorf("failed to set Extended info %v on job object: %w", info, err)
}
return nil
}
// Helper function for querying job handle for IO limit information.
func (job *JobObject) getIOLimit() (*winapi.JOBOBJECT_IO_RATE_CONTROL_INFORMATION, error) {
job.handleLock.RLock()
defer job.handleLock.RUnlock()
if job.handle == 0 {
return nil, ErrAlreadyClosed
}
ioInfo := &winapi.JOBOBJECT_IO_RATE_CONTROL_INFORMATION{}
var blockCount uint32 = 1
if _, err := winapi.QueryIoRateControlInformationJobObject(
job.handle,
nil,
&ioInfo,
&blockCount,
); err != nil {
return nil, fmt.Errorf("query %v returned error: %w", ioInfo, err)
}
if !isFlagSet(winapi.JOB_OBJECT_IO_RATE_CONTROL_ENABLE, ioInfo.ControlFlags) {
return nil, fmt.Errorf("query %v cannot get IO limits for job object without IO rate control option set", ioInfo)
}
return ioInfo, nil
}
// Helper function for setting a job object's IO rate control information.
func (job *JobObject) setIORateControlInfo(ioInfo *winapi.JOBOBJECT_IO_RATE_CONTROL_INFORMATION) error {
job.handleLock.RLock()
defer job.handleLock.RUnlock()
if job.handle == 0 {
return ErrAlreadyClosed
}
if _, err := winapi.SetIoRateControlInformationJobObject(job.handle, ioInfo); err != nil {
return fmt.Errorf("failed to set IO limit info %v on job object: %w", ioInfo, err)
}
return nil
}
// Helper function for setting a job object's CPU rate control information.
func (job *JobObject) setCPURateControlInfo(cpuInfo *winapi.JOBOBJECT_CPU_RATE_CONTROL_INFORMATION) error {
job.handleLock.RLock()
defer job.handleLock.RUnlock()
if job.handle == 0 {
return ErrAlreadyClosed
}
if _, err := windows.SetInformationJobObject(
job.handle,
windows.JobObjectCpuRateControlInformation,
uintptr(unsafe.Pointer(cpuInfo)),
uint32(unsafe.Sizeof(cpuInfo)),
); err != nil {
return fmt.Errorf("failed to set cpu limit info %v on job object: %w", cpuInfo, err)
}
return nil
}

View File

@ -0,0 +1,111 @@
package queue
import (
"errors"
"sync"
)
var (
ErrQueueClosed = errors.New("the queue is closed for reading and writing")
ErrQueueEmpty = errors.New("the queue is empty")
)
// MessageQueue represents a threadsafe message queue to be used to retrieve or
// write messages to.
type MessageQueue struct {
m *sync.RWMutex
c *sync.Cond
messages []interface{}
closed bool
}
// NewMessageQueue returns a new MessageQueue.
func NewMessageQueue() *MessageQueue {
m := &sync.RWMutex{}
return &MessageQueue{
m: m,
c: sync.NewCond(m),
messages: []interface{}{},
}
}
// Write writes `msg` to the queue.
func (mq *MessageQueue) Write(msg interface{}) error {
mq.m.Lock()
defer mq.m.Unlock()
if mq.closed {
return ErrQueueClosed
}
mq.messages = append(mq.messages, msg)
// Signal a waiter that there is now a value available in the queue.
mq.c.Signal()
return nil
}
// Read will read a value from the queue if available, otherwise return an error.
func (mq *MessageQueue) Read() (interface{}, error) {
mq.m.Lock()
defer mq.m.Unlock()
if mq.closed {
return nil, ErrQueueClosed
}
if mq.isEmpty() {
return nil, ErrQueueEmpty
}
val := mq.messages[0]
mq.messages[0] = nil
mq.messages = mq.messages[1:]
return val, nil
}
// ReadOrWait will read a value from the queue if available, else it will wait for a
// value to become available. This will block forever if nothing gets written or until
// the queue gets closed.
func (mq *MessageQueue) ReadOrWait() (interface{}, error) {
mq.m.Lock()
if mq.closed {
mq.m.Unlock()
return nil, ErrQueueClosed
}
if mq.isEmpty() {
for !mq.closed && mq.isEmpty() {
mq.c.Wait()
}
mq.m.Unlock()
return mq.Read()
}
val := mq.messages[0]
mq.messages[0] = nil
mq.messages = mq.messages[1:]
mq.m.Unlock()
return val, nil
}
// IsEmpty returns if the queue is empty
func (mq *MessageQueue) IsEmpty() bool {
mq.m.RLock()
defer mq.m.RUnlock()
return len(mq.messages) == 0
}
// Nonexported empty check that doesn't lock so we can call this in Read and Write.
func (mq *MessageQueue) isEmpty() bool {
return len(mq.messages) == 0
}
// Close closes the queue for future writes or reads. Any attempts to read or write from the
// queue after close will return ErrQueueClosed. This is safe to call multiple times.
func (mq *MessageQueue) Close() {
mq.m.Lock()
defer mq.m.Unlock()
// Already closed
if mq.closed {
return
}
mq.messages = nil
mq.closed = true
// If there's anybody currently waiting on a value from ReadOrWait, we need to
// broadcast so the read(s) can return ErrQueueClosed.
mq.c.Broadcast()
}

View File

@ -1,3 +0,0 @@
package winapi
//sys GetQueuedCompletionStatus(cphandle windows.Handle, qty *uint32, key *uintptr, overlapped **windows.Overlapped, timeout uint32) (err error)

View File

@ -24,7 +24,10 @@ const (
// Access rights for creating or opening job objects.
//
// https://docs.microsoft.com/en-us/windows/win32/procthread/job-object-security-and-access-rights
const JOB_OBJECT_ALL_ACCESS = 0x1F001F
const (
JOB_OBJECT_QUERY = 0x0004
JOB_OBJECT_ALL_ACCESS = 0x1F001F
)
// IO limit flags
//
@ -93,7 +96,7 @@ type JOBOBJECT_BASIC_PROCESS_ID_LIST struct {
// AllPids returns all the process Ids in the job object.
func (p *JOBOBJECT_BASIC_PROCESS_ID_LIST) AllPids() []uintptr {
return (*[(1 << 27) - 1]uintptr)(unsafe.Pointer(&p.ProcessIdList[0]))[:p.NumberOfProcessIdsInList]
return (*[(1 << 27) - 1]uintptr)(unsafe.Pointer(&p.ProcessIdList[0]))[:p.NumberOfProcessIdsInList:p.NumberOfProcessIdsInList]
}
// https://docs.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-jobobject_basic_accounting_information
@ -162,7 +165,7 @@ type JOBOBJECT_ASSOCIATE_COMPLETION_PORT struct {
// PBOOL Result
// );
//
//sys IsProcessInJob(procHandle windows.Handle, jobHandle windows.Handle, result *bool) (err error) = kernel32.IsProcessInJob
//sys IsProcessInJob(procHandle windows.Handle, jobHandle windows.Handle, result *int32) (err error) = kernel32.IsProcessInJob
// BOOL QueryInformationJobObject(
// HANDLE hJob,

View File

@ -6,3 +6,60 @@ const (
PROC_THREAD_ATTRIBUTE_PSEUDOCONSOLE = 0x20016
PROC_THREAD_ATTRIBUTE_JOB_LIST = 0x2000D
)
// ProcessVmCounters corresponds to the _VM_COUNTERS_EX and _VM_COUNTERS_EX2 structures.
const ProcessVmCounters = 3
// __kernel_entry NTSTATUS NtQueryInformationProcess(
// [in] HANDLE ProcessHandle,
// [in] PROCESSINFOCLASS ProcessInformationClass,
// [out] PVOID ProcessInformation,
// [in] ULONG ProcessInformationLength,
// [out, optional] PULONG ReturnLength
// );
//
//sys NtQueryInformationProcess(processHandle windows.Handle, processInfoClass uint32, processInfo uintptr, processInfoLength uint32, returnLength *uint32) (status uint32) = ntdll.NtQueryInformationProcess
// typedef struct _VM_COUNTERS_EX
// {
// SIZE_T PeakVirtualSize;
// SIZE_T VirtualSize;
// ULONG PageFaultCount;
// SIZE_T PeakWorkingSetSize;
// SIZE_T WorkingSetSize;
// SIZE_T QuotaPeakPagedPoolUsage;
// SIZE_T QuotaPagedPoolUsage;
// SIZE_T QuotaPeakNonPagedPoolUsage;
// SIZE_T QuotaNonPagedPoolUsage;
// SIZE_T PagefileUsage;
// SIZE_T PeakPagefileUsage;
// SIZE_T PrivateUsage;
// } VM_COUNTERS_EX, *PVM_COUNTERS_EX;
//
type VM_COUNTERS_EX struct {
PeakVirtualSize uintptr
VirtualSize uintptr
PageFaultCount uint32
PeakWorkingSetSize uintptr
WorkingSetSize uintptr
QuotaPeakPagedPoolUsage uintptr
QuotaPagedPoolUsage uintptr
QuotaPeakNonPagedPoolUsage uintptr
QuotaNonPagedPoolUsage uintptr
PagefileUsage uintptr
PeakPagefileUsage uintptr
PrivateUsage uintptr
}
// typedef struct _VM_COUNTERS_EX2
// {
// VM_COUNTERS_EX CountersEx;
// SIZE_T PrivateWorkingSetSize;
// SIZE_T SharedCommitUsage;
// } VM_COUNTERS_EX2, *PVM_COUNTERS_EX2;
//
type VM_COUNTERS_EX2 struct {
CountersEx VM_COUNTERS_EX
PrivateWorkingSetSize uintptr
SharedCommitUsage uintptr
}

View File

@ -2,4 +2,4 @@
// be thought of as an extension to golang.org/x/sys/windows.
package winapi
//go:generate go run ..\..\mksyscall_windows.go -output zsyscall_windows.go console.go system.go net.go path.go thread.go iocp.go jobobject.go logon.go memory.go process.go processor.go devices.go filesystem.go errors.go
//go:generate go run ..\..\mksyscall_windows.go -output zsyscall_windows.go user.go console.go system.go net.go path.go thread.go jobobject.go logon.go memory.go process.go processor.go devices.go filesystem.go errors.go

View File

@ -50,7 +50,6 @@ var (
procSetJobCompartmentId = modiphlpapi.NewProc("SetJobCompartmentId")
procSearchPathW = modkernel32.NewProc("SearchPathW")
procCreateRemoteThread = modkernel32.NewProc("CreateRemoteThread")
procGetQueuedCompletionStatus = modkernel32.NewProc("GetQueuedCompletionStatus")
procIsProcessInJob = modkernel32.NewProc("IsProcessInJob")
procQueryInformationJobObject = modkernel32.NewProc("QueryInformationJobObject")
procOpenJobObjectW = modkernel32.NewProc("OpenJobObjectW")
@ -61,6 +60,7 @@ var (
procLogonUserW = modadvapi32.NewProc("LogonUserW")
procLocalAlloc = modkernel32.NewProc("LocalAlloc")
procLocalFree = modkernel32.NewProc("LocalFree")
procNtQueryInformationProcess = modntdll.NewProc("NtQueryInformationProcess")
procGetActiveProcessorCount = modkernel32.NewProc("GetActiveProcessorCount")
procCM_Get_Device_ID_List_SizeA = modcfgmgr32.NewProc("CM_Get_Device_ID_List_SizeA")
procCM_Get_Device_ID_ListA = modcfgmgr32.NewProc("CM_Get_Device_ID_ListA")
@ -140,19 +140,7 @@ func CreateRemoteThread(process windows.Handle, sa *windows.SecurityAttributes,
return
}
func GetQueuedCompletionStatus(cphandle windows.Handle, qty *uint32, key *uintptr, overlapped **windows.Overlapped, timeout uint32) (err error) {
r1, _, e1 := syscall.Syscall6(procGetQueuedCompletionStatus.Addr(), 5, uintptr(cphandle), uintptr(unsafe.Pointer(qty)), uintptr(unsafe.Pointer(key)), uintptr(unsafe.Pointer(overlapped)), uintptr(timeout), 0)
if r1 == 0 {
if e1 != 0 {
err = errnoErr(e1)
} else {
err = syscall.EINVAL
}
}
return
}
func IsProcessInJob(procHandle windows.Handle, jobHandle windows.Handle, result *bool) (err error) {
func IsProcessInJob(procHandle windows.Handle, jobHandle windows.Handle, result *int32) (err error) {
r1, _, e1 := syscall.Syscall(procIsProcessInJob.Addr(), 3, uintptr(procHandle), uintptr(jobHandle), uintptr(unsafe.Pointer(result)))
if r1 == 0 {
if e1 != 0 {
@ -256,6 +244,12 @@ func LocalFree(ptr uintptr) {
return
}
func NtQueryInformationProcess(processHandle windows.Handle, processInfoClass uint32, processInfo uintptr, processInfoLength uint32, returnLength *uint32) (status uint32) {
r0, _, _ := syscall.Syscall6(procNtQueryInformationProcess.Addr(), 5, uintptr(processHandle), uintptr(processInfoClass), uintptr(processInfo), uintptr(processInfoLength), uintptr(unsafe.Pointer(returnLength)), 0)
status = uint32(r0)
return
}
func GetActiveProcessorCount(groupNumber uint16) (amount uint32) {
r0, _, _ := syscall.Syscall(procGetActiveProcessorCount.Addr(), 1, uintptr(groupNumber), 0, 0)
amount = uint32(r0)

View File

@ -91,7 +91,7 @@ EOF
config.vm.provision "install-golang", type: "shell", run: "once" do |sh|
sh.upload_path = "/tmp/vagrant-install-golang"
sh.env = {
'GO_VERSION': ENV['GO_VERSION'] || "1.17.9",
'GO_VERSION': ENV['GO_VERSION'] || "1.17.11",
}
sh.inline = <<~SHELL
#!/usr/bin/env bash
@ -101,6 +101,7 @@ EOF
GOPATH=\\$HOME/go
PATH=\\$GOPATH/bin:\\$PATH
export GOPATH PATH
git config --global --add safe.directory /vagrant
EOF
source /etc/profile.d/sh.local
SHELL
@ -264,7 +265,7 @@ EOF
fi
trap cleanup EXIT
ctr version
critest --parallel=$(nproc) --report-dir="${REPORT_DIR}" --ginkgo.skip='HostIpc is true'
critest --parallel=$[$(nproc)+2] --ginkgo.skip='HostIpc is true' --report-dir="${REPORT_DIR}"
SHELL
end

View File

@ -31,6 +31,7 @@ import (
"time"
"github.com/containerd/containerd/log"
"github.com/containerd/containerd/pkg/userns"
"github.com/containerd/continuity/fs"
)
@ -380,6 +381,10 @@ func createTarFile(ctx context.Context, path, extractDir string, hdr *tar.Header
// Lchown is not supported on Windows.
if runtime.GOOS != "windows" {
if err := os.Lchown(path, hdr.Uid, hdr.Gid); err != nil {
err = fmt.Errorf("failed to Lchown %q for UID %d, GID %d: %w", path, hdr.Uid, hdr.Gid, err)
if errors.Is(err, syscall.EINVAL) && userns.RunningInUserNS() {
err = fmt.Errorf("%w (Hint: try increasing the number of subordinate IDs in /etc/subuid and /etc/subgid)", err)
}
return err
}
}

View File

@ -23,7 +23,7 @@ var (
Package = "github.com/containerd/containerd"
// Version holds the complete version number. Filled in at linking time.
Version = "1.6.4+unknown"
Version = "1.6.6+unknown"
// Revision is filled with the VCS (e.g. git) revision being used to build
// the program at linking time.

View File

@ -31,12 +31,10 @@ help: ## this help
test: ## run tests, except integration tests and tests that require root
$(Q)go test -v -race $(EXTRA_TESTFLAGS) -count=1 ./...
integration: ## run integration test
integration: bin/integration.test ## run integration test
$(Q)bin/integration.test -test.v -test.count=1 -test.root $(EXTRA_TESTFLAGS) -test.parallel $(TESTFLAGS_PARALLEL)
FORCE:
bin/integration.test: FORCE ## build integration test binary into bin
bin/integration.test: ## build integration test binary into bin
$(Q)cd ./integration && go test -race -c . -o ../bin/integration.test
clean: ## clean up binaries

View File

@ -33,6 +33,8 @@ import (
type CNI interface {
// Setup setup the network for the namespace
Setup(ctx context.Context, id string, path string, opts ...NamespaceOpts) (*Result, error)
// SetupSerially sets up each of the network interfaces for the namespace in serial
SetupSerially(ctx context.Context, id string, path string, opts ...NamespaceOpts) (*Result, error)
// Remove tears down the network of the namespace.
Remove(ctx context.Context, id string, path string, opts ...NamespaceOpts) error
// Check checks if the network is still in desired state
@ -165,6 +167,34 @@ func (c *libcni) Setup(ctx context.Context, id string, path string, opts ...Name
return c.createResult(result)
}
// SetupSerially setups the network in the namespace and returns a Result
func (c *libcni) SetupSerially(ctx context.Context, id string, path string, opts ...NamespaceOpts) (*Result, error) {
if err := c.Status(); err != nil {
return nil, err
}
ns, err := newNamespace(id, path, opts...)
if err != nil {
return nil, err
}
result, err := c.attachNetworksSerially(ctx, ns)
if err != nil {
return nil, err
}
return c.createResult(result)
}
func (c *libcni) attachNetworksSerially(ctx context.Context, ns *Namespace) ([]*types100.Result, error) {
var results []*types100.Result
for _, network := range c.Networks() {
r, err := network.Attach(ctx, ns)
if err != nil {
return nil, err
}
results = append(results, r)
}
return results, nil
}
type asynchAttachResult struct {
index int
res *types100.Result

View File

@ -16,6 +16,7 @@ package invoke
import (
"context"
"encoding/json"
"fmt"
"os"
@ -33,6 +34,43 @@ type Exec interface {
Decode(jsonBytes []byte) (version.PluginInfo, error)
}
// Plugin must return result in same version as specified in netconf; but
// for backwards compatibility reasons if the result version is empty use
// config version (rather than technically correct 0.1.0).
// https://github.com/containernetworking/cni/issues/895
func fixupResultVersion(netconf, result []byte) (string, []byte, error) {
versionDecoder := &version.ConfigDecoder{}
confVersion, err := versionDecoder.Decode(netconf)
if err != nil {
return "", nil, err
}
var rawResult map[string]interface{}
if err := json.Unmarshal(result, &rawResult); err != nil {
return "", nil, fmt.Errorf("failed to unmarshal raw result: %w", err)
}
// Manually decode Result version; we need to know whether its cniVersion
// is empty, while built-in decoders (correctly) substitute 0.1.0 for an
// empty version per the CNI spec.
if resultVerRaw, ok := rawResult["cniVersion"]; ok {
resultVer, ok := resultVerRaw.(string)
if ok && resultVer != "" {
return resultVer, result, nil
}
}
// If the cniVersion is not present or empty, assume the result is
// the same CNI spec version as the config
rawResult["cniVersion"] = confVersion
newBytes, err := json.Marshal(rawResult)
if err != nil {
return "", nil, fmt.Errorf("failed to remarshal fixed result: %w", err)
}
return confVersion, newBytes, nil
}
// For example, a testcase could pass an instance of the following fakeExec
// object to ExecPluginWithResult() to verify the incoming stdin and environment
// and provide a tailored response:
@ -84,7 +122,12 @@ func ExecPluginWithResult(ctx context.Context, pluginPath string, netconf []byte
return nil, err
}
return create.CreateFromBytes(stdoutBytes)
resultVersion, fixedBytes, err := fixupResultVersion(netconf, stdoutBytes)
if err != nil {
return nil, err
}
return create.Create(resultVersion, fixedBytes)
}
func ExecPluginWithoutResult(ctx context.Context, pluginPath string, netconf []byte, args CNIArgs, exec Exec) error {

12
vendor/modules.txt generated vendored
View File

@ -5,7 +5,7 @@ github.com/Microsoft/go-winio/backuptar
github.com/Microsoft/go-winio/pkg/guid
github.com/Microsoft/go-winio/pkg/security
github.com/Microsoft/go-winio/vhd
# github.com/Microsoft/hcsshim v0.9.2
# github.com/Microsoft/hcsshim v0.9.3
## explicit; go 1.13
github.com/Microsoft/hcsshim
github.com/Microsoft/hcsshim/computestorage
@ -16,11 +16,13 @@ github.com/Microsoft/hcsshim/internal/hcs/schema2
github.com/Microsoft/hcsshim/internal/hcserror
github.com/Microsoft/hcsshim/internal/hns
github.com/Microsoft/hcsshim/internal/interop
github.com/Microsoft/hcsshim/internal/jobobject
github.com/Microsoft/hcsshim/internal/log
github.com/Microsoft/hcsshim/internal/logfields
github.com/Microsoft/hcsshim/internal/longpath
github.com/Microsoft/hcsshim/internal/mergemaps
github.com/Microsoft/hcsshim/internal/oc
github.com/Microsoft/hcsshim/internal/queue
github.com/Microsoft/hcsshim/internal/safefile
github.com/Microsoft/hcsshim/internal/timeout
github.com/Microsoft/hcsshim/internal/vmcompute
@ -51,7 +53,7 @@ github.com/compose-spec/compose-go/types
# github.com/containerd/cgroups v1.0.3
## explicit; go 1.16
github.com/containerd/cgroups/stats/v1
# github.com/containerd/containerd v1.6.4
# github.com/containerd/containerd v1.6.6
## explicit; go 1.17
github.com/containerd/containerd
github.com/containerd/containerd/api/services/containers/v1
@ -120,7 +122,7 @@ github.com/containerd/continuity/sysx
# github.com/containerd/fifo v1.0.0
## explicit; go 1.13
github.com/containerd/fifo
# github.com/containerd/go-cni v1.1.5
# github.com/containerd/go-cni v1.1.6
## explicit; go 1.17
github.com/containerd/go-cni
# github.com/containerd/ttrpc v1.1.0
@ -129,7 +131,7 @@ github.com/containerd/ttrpc
# github.com/containerd/typeurl v1.0.2
## explicit; go 1.13
github.com/containerd/typeurl
# github.com/containernetworking/cni v1.1.0
# github.com/containernetworking/cni v1.1.1
## explicit; go 1.14
github.com/containernetworking/cni/libcni
github.com/containernetworking/cni/pkg/invoke
@ -241,7 +243,7 @@ github.com/opencontainers/go-digest
github.com/opencontainers/image-spec/identity
github.com/opencontainers/image-spec/specs-go
github.com/opencontainers/image-spec/specs-go/v1
# github.com/opencontainers/runc v1.1.1
# github.com/opencontainers/runc v1.1.2
## explicit; go 1.16
github.com/opencontainers/runc/libcontainer/user
# github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417