Skip to content

Commit 2178fde

Browse files
saad-alijdef
authored andcommitted
Introduce concept of topology to CSI spec.
1 parent 118bc19 commit 2178fde

File tree

3 files changed

+1117
-265
lines changed

3 files changed

+1117
-265
lines changed

csi.proto

Lines changed: 244 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,14 @@ message PluginCapability {
126126
// attempt to invoke the REQUIRED ControllerService RPCs, as well
127127
// as specific RPCs as indicated by ControllerGetCapabilities.
128128
CONTROLLER_SERVICE = 1;
129+
130+
// ACCESSIBILITY_CONSTRAINTS indicates that the volumes for this
131+
// plugin may not be equally accessible by all nodes in the
132+
// cluster. The CO MUST use the topology information returned by
133+
// CreateVolumeRequest along with the topology information
134+
// returned by NodeGetInfo to ensure that a given volume is
135+
// accessible from a given node when scheduling workloads.
136+
ACCESSIBILITY_CONSTRAINTS = 2;
129137
}
130138
Type type = 1;
131139
}
@@ -212,6 +220,19 @@ message CreateVolumeRequest {
212220
// If specified, the new volume will be pre-populated with data from
213221
// this source. This field is OPTIONAL.
214222
VolumeContentSource volume_content_source = 6;
223+
224+
// Specifies where (regions, zones, racks, etc.) the provisioned
225+
// volume MUST be accessible from.
226+
// An SP SHALL advertise the requirements for topological
227+
// accessibility information in documentation. COs SHALL only specify
228+
// topological accessibility information supported by the SP.
229+
// This field is OPTIONAL.
230+
// This field SHALL NOT be specified unless the SP has the
231+
// ACCESSIBILITY_CONSTRAINTS plugin capability.
232+
// If this field is not specified and the SP has the
233+
// ACCESSIBILITY_CONSTRAINTS plugin capability, the SP MAY choose
234+
// where the provisioned volume is accessible from.
235+
TopologyRequirement accessibility_requirements = 7;
215236
}
216237

217238
// Specifies what source the volume will be created from. One of the
@@ -340,6 +361,194 @@ message Volume {
340361
// pre-populated with data from the specified source.
341362
// This field is OPTIONAL.
342363
VolumeContentSource content_source = 4;
364+
365+
// Specifies where (regions, zones, racks, etc.) the provisioned
366+
// volume is accessible from.
367+
// A plugin that returns this field MUST also set the
368+
// ACCESSIBILITY_CONSTRAINTS plugin capability.
369+
// An SP MAY specify multiple topologies to indicate the volume is
370+
// accessible from multiple locations.
371+
// COs MAY use this information along with the topology information
372+
// returned by NodeGetInfo to ensure that a given volume is accessible
373+
// from a given node when scheduling workloads.
374+
// This field is OPTIONAL. If it is not specified, the CO MAY assume
375+
// the volume is equally accessible from all nodes in the cluster and
376+
// may schedule workloads referencing the volume on any available
377+
// node.
378+
//
379+
// Example 1:
380+
// accessible_topology = {"region": "R1", "zone": "Z2"}
381+
// Indicates a volume accessible only from the "region" "R1" and the
382+
// "zone" "Z2".
383+
//
384+
// Example 2:
385+
// accessible_topology =
386+
// {"region": "R1", "zone": "Z2"},
387+
// {"region": "R1", "zone": "Z3"}
388+
// Indicates a volume accessible from both "zone" "Z2" and "zone" "Z3"
389+
// in the "region" "R1".
390+
repeated Topology accessible_topology = 5;
391+
}
392+
393+
message TopologyRequirement {
394+
// Specifies the list of topologies the provisioned volume MUST be
395+
// accessible from.
396+
// This field is OPTIONAL. If TopologyRequirement is specified either
397+
// requisite or preferred or both MUST be specified.
398+
//
399+
// If requisite is specified, the provisioned volume MUST be
400+
// accessible from at least one of the requisite topologies.
401+
//
402+
// Given
403+
// x = number of topologies provisioned volume is accessible from
404+
// n = number of requisite topologies
405+
// The CO MUST ensure n >= 1. The SP MUST ensure x >= 1
406+
// If x==n, than the SP MUST make the provisioned volume available to
407+
// all topologies from the list of requisite topologies. If it is
408+
// unable to do so, the SP MUST fail the CreateVolume call.
409+
// For example, if a volume should be accessible from a single zone,
410+
// and requisite =
411+
// {"region": "R1", "zone": "Z2"}
412+
// then the provisioned volume MUST be accessible from the "region"
413+
// "R1" and the "zone" "Z2".
414+
// Similarly, if a volume should be accessible from two zones, and
415+
// requisite =
416+
// {"region": "R1", "zone": "Z2"},
417+
// {"region": "R1", "zone": "Z3"}
418+
// then the provisioned volume MUST be accessible from the "region"
419+
// "R1" and both "zone" "Z2" and "zone" "Z3".
420+
//
421+
// If x<n, than the SP SHALL choose x unique topologies from the list
422+
// of requisite topologies. If it is unable to do so, the SP MUST fail
423+
// the CreateVolume call.
424+
// For example, if a volume should be accessible from a single zone,
425+
// and requisite =
426+
// {"region": "R1", "zone": "Z2"},
427+
// {"region": "R1", "zone": "Z3"}
428+
// then the SP may choose to make the provisioned volume available in
429+
// either the "zone" "Z2" or the "zone" "Z3" in the "region" "R1".
430+
// Similarly, if a volume should be accessible from two zones, and
431+
// requisite =
432+
// {"region": "R1", "zone": "Z2"},
433+
// {"region": "R1", "zone": "Z3"},
434+
// {"region": "R1", "zone": "Z4"}
435+
// then the provisioned volume MUST be accessible from any combination
436+
// of two unique topologies: e.g. "R1/Z2" and "R1/Z3", or "R1/Z2" and
437+
// "R1/Z4", or "R1/Z3" and "R1/Z4".
438+
//
439+
// If x>n, than the SP MUST make the provisioned volume available from
440+
// all topologies from the list of requisite topologies and MAY choose
441+
// the remaining x-n unique topologies from the list of all possible
442+
// topologies. If it is unable to do so, the SP MUST fail the
443+
// CreateVolume call.
444+
// For example, if a volume should be accessible from two zones, and
445+
// requisite =
446+
// {"region": "R1", "zone": "Z2"}
447+
// then the provisioned volume MUST be accessible from the "region"
448+
// "R1" and the "zone" "Z2" and the SP may select the second zone
449+
// independently, e.g. "R1/Z4".
450+
repeated Topology requisite = 1;
451+
452+
// Specifies the list of topologies the CO would prefer the volume to
453+
// be provisioned in.
454+
//
455+
// This field is OPTIONAL. If TopologyRequirement is specified either
456+
// requisite or preferred or both MUST be specified.
457+
//
458+
// An SP MUST attempt to make the provisioned volume available using
459+
// the preferred topologies in order from first to last.
460+
//
461+
// If requisite is specified, all topologies in preferred list MUST
462+
// also be present in the list of requisite topologies.
463+
//
464+
// If the SP is unable to to make the provisioned volume available
465+
// from any of the preferred topologies, the SP MAY choose a topology
466+
// from the list of requisite topologies.
467+
// If the list of requisite topologies is not specified, then the SP
468+
// MAY choose from the list of all possible topologies.
469+
// If the list of requisite topologies is specified and the SP is
470+
// unable to to make the provisioned volume available from any of the
471+
// requisite topologies it MUST fail the CreateVolume call.
472+
//
473+
// Example 1:
474+
// Given a volume should be accessible from a single zone, and
475+
// requisite =
476+
// {"region": "R1", "zone": "Z2"},
477+
// {"region": "R1", "zone": "Z3"}
478+
// preferred =
479+
// {"region": "R1", "zone": "Z3"}
480+
// then the the SP SHOULD first attempt to make the provisioned volume
481+
// available from "zone" "Z3" in the "region" "R1" and fall back to
482+
// "zone" "Z2" in the "region" "R1" if that is not possible.
483+
//
484+
// Example 2:
485+
// Given a volume should be accessible from a single zone, and
486+
// requisite =
487+
// {"region": "R1", "zone": "Z2"},
488+
// {"region": "R1", "zone": "Z3"},
489+
// {"region": "R1", "zone": "Z4"},
490+
// {"region": "R1", "zone": "Z5"}
491+
// preferred =
492+
// {"region": "R1", "zone": "Z4"},
493+
// {"region": "R1", "zone": "Z2"}
494+
// then the the SP SHOULD first attempt to make the provisioned volume
495+
// accessible from "zone" "Z4" in the "region" "R1" and fall back to
496+
// "zone" "Z2" in the "region" "R1" if that is not possible. If that
497+
// is not possible, the SP may choose between either the "zone"
498+
// "Z3" or "Z5" in the "region" "R1".
499+
//
500+
// Example 3:
501+
// Given a volume should be accessible from TWO zones (because an
502+
// opaque parameter in CreateVolumeRequest, for example, specifies
503+
// the volume is accessible from two zones, aka synchronously
504+
// replicated), and
505+
// requisite =
506+
// {"region": "R1", "zone": "Z2"},
507+
// {"region": "R1", "zone": "Z3"},
508+
// {"region": "R1", "zone": "Z4"},
509+
// {"region": "R1", "zone": "Z5"}
510+
// preferred =
511+
// {"region": "R1", "zone": "Z5"},
512+
// {"region": "R1", "zone": "Z3"}
513+
// then the the SP SHOULD first attempt to make the provisioned volume
514+
// accessible from the combination of the two "zones" "Z5" and "Z3" in
515+
// the "region" "R1". If that's not possible, it should fall back to
516+
// a combination of "Z5" and other possibilities from the list of
517+
// requisite. If that's not possible, it should fall back to a
518+
// combination of "Z3" and other possibilities from the list of
519+
// requisite. If that's not possible, it should fall back to a
520+
// combination of other possibilities from the list of requisite.
521+
repeated Topology preferred = 2;
522+
}
523+
524+
// Topology is a map of topological domains to topological segments.
525+
// A topological domain is a sub-division of a cluster, like "region",
526+
// "zone", "rack", etc.
527+
// A topological segment is a specific instance of a topological domain,
528+
// like "zone3", "rack3", etc.
529+
// For example {"com.company/zone": "Z1", "com.company/rack": "R3"}
530+
// Valid keys have two segments: an optional prefix and name, separated
531+
// by a slash (/), for example: "com.company.example/zone".
532+
// The key name segment is required. The prefix is optional.
533+
// Both the key name and the prefix MUST each be 63 characters or less,
534+
// begin and end with an alphanumeric character ([a-z0-9A-Z]) and
535+
// contain only dashes (-), underscores (_), dots (.), or alphanumerics
536+
// in between, for example "zone".
537+
// The key prefix MUST follow reverse domain name notation format
538+
// (https://en.wikipedia.org/wiki/Reverse_domain_name_notation).
539+
// The key prefix SHOULD include the plugin's host company name and/or
540+
// the plugin name, to minimize the possibility of collisions with keys
541+
// from other plugins.
542+
// If a key prefix is specified, it MUST be identical across all
543+
// topology keys returned by the SP (across all RPCs).
544+
// Keys MUST be case-insensitive. Meaning the keys "Zone" and "zone"
545+
// MUST not both exist.
546+
// Each value (topological segment) MUST contain 1 or more strings.
547+
// Each string MUST be 63 characters or less and begin and end with an
548+
// alphanumeric character with '-', '_', '.', or alphanumerics in
549+
// between.
550+
message Topology {
551+
map<string, string> segments = 1;
343552
}
344553
message DeleteVolumeRequest {
345554
// The ID of the volume to be deprovisioned.
@@ -424,6 +633,14 @@ message ValidateVolumeCapabilitiesRequest {
424633
// Attributes of the volume to check. This field is OPTIONAL and MUST
425634
// match the attributes of the Volume identified by `volume_id`.
426635
map<string, string> volume_attributes = 3;
636+
637+
// Specifies where (regions, zones, racks, etc.) the caller believes
638+
// the volume is accessible from.
639+
// A caller MAY specify multiple topologies to indicate they believe
640+
// the volume to be accessible from multiple locations.
641+
// This field is OPTIONAL. This field SHALL NOT be set unless the
642+
// plugin advertises the ACCESSIBILITY_CONSTRAINTS capability.
643+
repeated Topology accessible_topology = 4;
427644
}
428645

429646
message ValidateVolumeCapabilitiesResponse {
@@ -482,6 +699,14 @@ message GetCapacityRequest {
482699
// specific `parameters`. These are the same `parameters` the CO will
483700
// use in `CreateVolumeRequest`. This field is OPTIONAL.
484701
map<string, string> parameters = 2;
702+
703+
// If specified, the Plugin SHALL report the capacity of the storage
704+
// that can be used to provision volumes that in the specified
705+
// `accessible_topology`. This is the same as the
706+
// `accessible_topology` the CO returns in a `CreateVolumeResponse`.
707+
// This field is OPTIONAL. This field SHALL NOT be set unless the
708+
// plugin advertises the ACCESSIBILITY_CONSTRAINTS capability.
709+
Topology accessible_topology = 3;
485710
}
486711

487712
message GetCapacityResponse {
@@ -846,4 +1071,23 @@ message NodeGetInfoResponse {
8461071
// plugin MUST NOT set negative values here.
8471072
// This field is OPTIONAL.
8481073
int64 max_volumes_per_node = 2;
1074+
1075+
// Specifies where (regions, zones, racks, etc.) the node is
1076+
// accessible from.
1077+
// A plugin that returns this field MUST also set the
1078+
// ACCESSIBILITY_CONSTRAINTS plugin capability.
1079+
// COs MAY use this information along with the topology information
1080+
// returned in CreateVolumeResponse to ensure that a given volume is
1081+
// accessible from a given node when scheduling workloads.
1082+
// This field is OPTIONAL. If it is not specified, the CO MAY assume
1083+
// the node is not subject to any topological constraint, and MAY
1084+
// schedule workloads that reference any volume V, such that there are
1085+
// no topological constraints declared for V.
1086+
//
1087+
// Example 1:
1088+
// accessible_topology =
1089+
// {"region": "R1", "zone": "R2"}
1090+
// Indicates the node exists within the "region" "R1" and the "zone"
1091+
// "Z2".
1092+
Topology accessible_topology = 3;
8491093
}

0 commit comments

Comments
 (0)