<?xml version="1.0" encoding="utf-8"?>
<?xml-model href="rfc7991bis.rnc"?>  <!-- Required for schema
      validation and schema-aware editing --> 
<!-- <?xml-stylesheet type="text/xsl" href="rfc2629.xslt" ?> --> 
<!-- This third-party XSLT can be enabled for direct transformations
in XML processors, including most browsers --> 

<!DOCTYPE rfc [
  <!ENTITY filename "draft-ietf-sfc-nsh-ecn-support-17">
  <!ENTITY nbsp    "&#160;">
  <!ENTITY zwsp   "&#8203;">
  <!ENTITY nbhy   "&#8209;">
  <!ENTITY wj     "&#8288;">
]>
<!-- If further character entities are required then they should be
added to the DOCTYPE above. Use of an external entity file is not
recommended. --> 
<?rfc strict="yes" ?>
<!-- give errors regarding ID-nits and DTD validation -->
<!-- control the table of contents (ToC) -->
<?rfc toc="yes"?>

<rfc
  xmlns:xi="http://www.w3.org/2001/XInclude"
  category="std"
  docName="&filename;"
  ipr="trust200902"
  updates=""
  submissionType="IETF"
  xml:lang="en"
  version="3">
<!-- 
    * docName should be the name of your draft * category should be
    one of std, bcp, info, exp, historic * ipr should be one of
    trust200902, noModificationTrust200902, noDerivativesTrust200902,
    pre5378Trust200902 * updates can be an RFC number as NNNN *
    obsoletes can be an RFC number as NNNN
-->


<!-- ____________________FRONT_MATTER____________________ -->
<front>
   <title abbrev="NSH ECN &amp; Congestion Feedback">Explicit
   Congestion Notification (ECN) and Congestion Feedback Using the
   Network Service Header (NSH) and IPFIX</title>
   <!--  The abbreviated title is required if the full title is
        longer than 39 characters --> 

   <seriesInfo name="Internet-Draft"
               value="&filename;"/>

   <author fullname="Donald E. Eastlake, 3rd" initials="D."
           surname="Eastlake">
     <organization>Independent</organization>
     <address>
       <postal>
         <street>2386 Panoramic Circle</street>
         <city>Apopka</city>
         <region>Florida</region>
         <code>32703</code>
         <country>USA</country>
       </postal>        
       <phone>+1-508-333-2270</phone>
       <email>d3e3e3@gmail.com</email>
     </address>
   </author>

   <author fullname="Bob Briscoe" initials="B."
           surname="Briscoe">
     <organization>Independent</organization>
     <address>
       <postal>
         <country>UK</country>
       </postal>
       <email>ietf@bobbriscoe.net</email>
       <uri>http://bobbriscoe.net/</uri>
     </address>
   </author>

   <author fullname="Yizhou Li" initials="Y."
           surname="Li">
     <organization>Huawei Technologies</organization>
     <address>
       <postal>
         <street>101 Software Avenue</street>
         <city>Nanjing</city>
         <region>Jiangsu</region>
         <code>210012</code>
         <country>China</country>
       </postal>
       <phone>+86-25-56624584</phone>
       <email>zhuangshunwan@huawei.com</email>
     </address>
   </author>

   <author fullname="Andrew G. Malis" initials="A."
           surname="Malis">
     <organization>Malis Consulting</organization>
     <address>
       <postal>
         <country>USA</country>
       </postal>
       <email>agmalis@gmail.com</email>
     </address>
   </author>

   <author fullname="Xinpeng Wei" initials="X."
           surname="Wei">
     <organization>Huawei Technologies</organization>
     <address>
       <postal>
         <street>Beiqing Rd. Z-park No.156, Haidian District</street>
         <city>Beijing</city>
         <code>100095</code>
         <country>China</country>
       </postal>
       <email>weixinpeng@huawei.com</email>
     </address>
   </author>

   <date year="2026" month="3" day="23"/>

   <area>Routing</area>
   <workgroup>SFC Working Group</workgroup>
   <!-- "Internet Engineering Task Force" is fine for individual
        submissions.  If this element is not present, the default is
        "Network Working Group", which is used by the RFC Editor as a
        nod to the history of the RFC Series. --> 

   <keyword>NSH ECN SFC congestion</keyword>
   <!-- Multiple keywords are allowed.  Keywords are incorporated
        into HTML output files for use by search engines. --> 

<abstract>
<t>Explicit Congestion Notification (ECN) allows a forwarding element
to notify downstream devices of the onset of congestion without having
to drop packets. Coupled with a means to feed information about
congestion back to upstream nodes, this can improve network efficiency
through better congestion control, frequently without packet
drops. This document specifies ECN and congestion feedback support
within a Service Function Chaining (SFC) enabled domain through use of
the Network Service Header (NSH, RFC 8300) and IP Flow Information
Export (IPFIX, RFC 7011) protocol.</t>
</abstract>
 
</front>


<!-- ____________________MIDDLE_MATTER____________________ -->
<middle>
    
<section>  <!-- 1. -->
  <name>Introduction</name>

<t>Explicit Congestion Notification (ECN <xref target="RFC3168"/>)
allows a forwarding element to notify downstream nodes of the onset of
congestion without having to drop packets. Coupled with a means to
feed information about congestion back to upstream nodes, this can
improve network efficiency through better congestion control,
frequently without packet drops. This document specifies ECN and
congestion feedback support within a Service Function Chaining (SFC
<xref target="RFC7665"/>) enabled domain through use of the Network
Service Header (NSH <xref target="RFC8300"/>) and IP Flow Information
Export (IPFIX <xref target="RFC7011"/>) protocol.</t>

<t>This document requires that all ingress and egress nodes of the SFC
domain, for the flows to which these techniques are applied, implement
ECN and that ingress and egress nodes are coordinated in that they
implement the ingress and egress procedures herein specified including
IPFIX between the ingress and egress nodes. While congestion management
will be the most effective if all interior nodes of the SFC enabled
domain transited by those flows implement ECN, some benefit is
obtained even if some of those nodes do not implement ECN. Congestion
at any interior bottleneck where ECN marking is not implemented will
be unmanaged.</t>

<t>Ths solution specified in this document is not suitable for
portions of a network within which there are paths passing through
areas under differnet administrative control or where the ingress and
egress nodes of that network portion are not coordinated.</t>

<t>The following subsections provide background information on NSH,
ECN, congestion feedback through IPFIX, and terminology used in this
document.</t>

<section>  <!-- 1.1 -->
  <name>NSH Background</name>

<t>The Service Function Chaining (SFC <xref target="RFC7665"/>)
architecture calls for the encapsulation of traffic within a service
function chaining domain with a Network Service Header (NSH <xref
target="RFC8300"/>) added by a "Classifier" (ingress node) on entry to
the domain with the NSH being removed on egress from the domain at the
egress node. The NSH is used to control the path of a packet in the
SFC domain.</t>


<figure anchor="SFCpath">
  <name>Example SFC Forwarding Nodes Path</name>
    <artwork type="ascii-art" align="center">
      <![CDATA[
        |
        v
   +----------+
. .|Classifier|. . . . . . . . . . . . . .
.  +----------+                          .
.       |          +----+                .
.       |        --+ SF |     Service    .
.       |       /  +----+     Function   .
.       v    ---              Chaining   .
.    +-----+/       +----+    domain     .
.    | SFF |--------+ SF |               .
.    +-----+\       +----+               .
.       |    ---                         .
.       |       \  +----+                .
.       |        --+ SF |                .
.       v          +----+                .
.    +-----+                 +----+      .
.    | SFF |-----------------+ SF |      .
.    +-----+                 +----+      .
.       |          +----+                .
.       |        --+ SF |                .
.       |       /  +----+                .
.       v    ---                         .
.    +-----+/       +----+               .
.    | SFF |--------+ SF |               .
.    +-----+\       +----+               .
.       |    ---                         .
.       |       \  +----+                .
.       |        --+ SF |                .
.       v          +----+                .
.    +------+                            .
. . .|Egress|. . . . . . . . . . . . . . .
     +------+
        |
        v
      ]]>
   </artwork>
</figure>

<t><xref target="SFCpath"/> shows an SFC enabled domain for the
purpose of illustrating the use of the NSH. Traffic passes through a
sequence of Service Function Forwarders (SFFs) each of which sends the
traffic to one or more Service Functions (SFs). Each SF performs some
operation on the traffic, for example firewalling or Network Address
Translation (NAT) or load balancing, and then returns the traffic to
the SFF from which it was received.</t>

<t>Logically, during the transit of each SFF, the outer transport
header that got the packet to the SFF is stripped (see <xref target=
"encapsulation"/>), the SFF decides on the next forwarding step,
either adding a new outer transport header or, if the SFF is the
egress/end, removing the NSH header. The outer transport headers added
may be different in different regions of the SFC enabled domain. For
example, IP could be used for some SFF-to-SFF communication and MPLS
used for other SFF- to-SFF communication.</t>

</section>

<section>  <!-- 1.2 -->
  <name>ECN Background</name>

<t>Explicit Congestion Notification (ECN <xref target="RFC3168"/>)
allows a forwarding element (such as a router or a Service Function
Forwarder (SFF) or Service Function (SF)) to notify downstream nodes
of the onset of congestion without having to drop packets. This can be
used as an element in active queue management (AQM) <xref
target="RFC7567"/> to improve network efficiency through better
traffic control without packet drops. The forwarding element can
explicitly mark some packets in an ECN field instead of dropping the
packet. For example, a two-bit field is available for ECN marking in
IP headers <xref target="RFC3168"/>.</t>

</section>

<section anchor="TunnelActions">  <!-- 1.3 -->
  <name>Tunnel Congestion Feedback Background</name>

<t>Tunnels are widely deployed in various networks including data
center networks, enterprise networks, and the public Internet. A
tunnel consists of ingress, egress, and a set of intermediate nodes
including routers.  Tunnel Congestion Feedback (<xref
target="TunnelFeedback"/>) is a building block for congestion
mitigation methods. It supports feedback of congestion information
from an egress node to an ingress node. This document treats paths in
the SFC enabled domain as tunnels with the initial Classifier node
being the ingress; however, the tunnel congestion feedback facilities
specified in this document MAY be used in contexts other than SFC.</t>

<t>Any action by a tunnel ingress to reduce congestion needs to allow
sufficient time for the end-to-end congestion control loop to respond
first, for instance by the ingress taking a smoothed average of the
level of congestion signaled by feedback from the tunnel egress or
delaying any action for at least the worst case end-to-end round-trip
time (for example, 200 milliseconds). Otherwise, the system could
become unstable.</t>

<t>Examples of actions that can be taken by an ingress node when it
has knowledge of downstream congestion include those listed below.
Details of implementing these traffic control methods, beyond those
given here, are outside the scope of this document.</t>

<dl>

 <dt>(1)</dt><dd>Traffic throttling (policing), where the downstream
 traffic flowing out of the ingress node is limited to reduce or
 eliminate congestion.</dd>

 <dt>(2)</dt><dd>Upstream congestion feedback, where the ingress node
 sends messages indicating congestion upstream to or towards the
 ultimate traffic source, a function that can throttle traffic
 generation/transmission.</dd>

 <dt>(3)</dt><dd>Traffic re-direction, where the ingress node
 configures the NSH of some future traffic so that it avoids congested
 paths. Great care must be taken with this option to avoid (a)
 significant re- ordering of traffic in flows that it is desirable to
 keep in order due to end-to-end requirements or due to a stateful SF
 and (b) oscillation/instability in traffic paths due to alternate
 congestion of previously idle paths and the idling of previously
 congested paths. For example, it is preferable to classify traffic
 into flows of a sufficiently coarse granularity that the flows are
 long lived and to use a stable path per flow, sending only newly
 appearing flows on apparently uncongested paths rather than changing
 the path for any already existing flow.</dd>

</dl>

<t><xref target= "feedback"/> shows an example path from an original
sender to a final receiver passing through a chain of service
functions between the ingress and egress of an SFC enabled domain. The
path is likely to pass through other network nodes outside the SFC
enabled domain (not shown) before entering that domain and after
leaving that domain.</t>

<t><xref target= "feedback"/> shows typical congestion feedback that
would be expected from the final receiver to the origin sender, which
controls the load the origin sender directs to elements on the
path. The figure also shows the congestion feedback from the egress to
the ingress of the SFC enabled domain that is described in this
document, to control or balance load within that domain.</t>

<figure anchor="feedback">
  <name>Congestion Feedback across an SFC enabled Domain</name>
    <artwork type="ascii-art" align="center">
<![CDATA[
 .:= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = :.
_||_                 End-to-End Congestion Feedback              ||
\  /                                                             ||
 \/                                                              ||
 __                Inner Transport Header and Payload            __
|  | ->- - - - - - - - - - - - - - ->- - - - - -- - - - - - ->- |  |
|  |                                                            |  |
|  |       .:= = = = = = = = = = = = = = = = = = = = = =:.      |  |
|  |      _||_         Tunnel Congestion Feedback       ||      |  |
|  |      \  /                                          ||      |  |
|  |       \/                                           ||      |  |
|  |       __                    NSH                    __      |  |
|  |      |  |-------------------------->--------------|  |     |  |
|  |. . . |  |      ___         ___           ___      |  |. . .|  |
|  |      |  | OT1 |   |  OT4  |   |  . . .  |   | OTn |  |     |  |
|  |      |  |-->--|SFF|--->---|SFF|         |SFF|-->--|  |     |  |
|__|      |__|     |___|       |___|         |___|     |__|     |__|
origin    SFC       | ^         | ^                    SFC     final
sender   domain  OT2| |OT3   OT6| |OT7                domain   rcvr
         ingress    v |         v |                   egress
                   +---+       +---+                   SFF
                   |SF |       |SF |
                   +---+       +---+
]]>
   </artwork>
</figure>

<t>SFC enabled Domain congestion feedback in <xref target=
"feedback"/> is shown within the context of an end-to-end congestion
feedback loop. Also shown is the encapsulated layering of NSH headers
within a series of outer transport headers (OT1, OT2, ... OTn).</t>

<t><xref target= "feedback"/> is simplified as there might be multiple
egress nodes and some of them may be final receivers for particular
packets. (See <xref target="complex"/>.)</t>

</section>

<section>
   <name>Conventions Used in This Document</name>

<t>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
"SHOULD", "SHOULD NOT", "RECOMMENDED", "NOT RECOMMENDED", "MAY", and
"OPTIONAL" in this document are to be interpreted as described in BCP
14 <xref target="RFC2119"/> <xref target="RFC8174"/> when, and only
when, they appear in all capitals, as shown here.</t>

<t>Acronyms:</t>

<dl>

<dt>AQM -</dt><dd>Active Queue Management <xref target="RFC7567"/></dd>

<dt>CE -</dt><dd>Congestion Experienced <xref target="RFC3168"/></dd>

<dt>DDoS -</dt><dd>Distributed Denial of Service</dd>

<dt>downstream -</dt><dd>The direction from ingress to egress</dd>

<dt>ECN -</dt><dd>Explicit Congestion Notification <xref target="RFC3168"/></dd>

<dt>ECT -</dt><dd>ECN Capable Transport <xref target="RFC3168"/></dd>

<dt>IPFIX -</dt><dd>IP Flow Information Export <xref
target="RFC7011"/></dd>

<dt>Not-ECT -</dt><dd>Not ECN-Capable Transport <xref
target="RFC3168"/></dd>

<dt>NSH -</dt><dd>Network Service Header <xref target="RFC8300"/></dd>

<dt>SF -</dt><dd>Service Function <xref target="RFC7665"/></dd>

<dt>SFC -</dt><dd>Service Function Chaining <xref
target="RFC7665"/></dd>

<dt>SFF -</dt><dd>Service Function Forwarder <xref target="RFC7665"/>
- A type of node that forwards based on the NSH.</dd>

<dt>SPI -</dt><dd>Service Path Identifier</dd>

<dt>TLV -</dt><dd>Type Length Value</dd>

<dt>upstream -</dt><dd>The direction from egress to ingress</dd>
  
</dl>

 </section>
 
</section>

<section> <!-- 2. -->
  <name>The NSH ECN Field</name>

<t>The NSH is used to encapsulate traffic and control its subsequent
path (see Section 2 of <xref target="RFC8300"/>). The NSH also
provides for optional metadata inclusion, as shown in <xref
target="encapsulation"/>.</t>

<figure anchor="encapsulation">
  <name>Data Encapsulation with the NSH</name>
    <artwork type="ascii-art" align="center">
<![CDATA[
+-----------------------------------+
|   Outer Transport Header          |
+-----------------------------------+
|   Network Service Header (NSH)    |
| +------------------------------+  |
| | Base Header                  |  |
| +------------------------------+  |
| | Service Path Header          |  |
| +------------------------------+  |
| | Metadata (Context Header(s)) |  |
| +------------------------------+  |
+-----------------------------------+
| Original Packet / Frame / Payload |
+-----------------------------------+
]]>
   </artwork>
</figure>

<t>This document assigns two currently unused bits (indicated by "U")
in the NSH Base Header (Section 2.2 of <xref target="RFC8300"/>) for
the purpose of ECN indication as shown in <xref
target="NSHupdate"/>.</t>

<figure anchor="NSHupdate">
  <name>Updated NSH Base Header</name>
    <artwork type="ascii-art" align="center">
<![CDATA[
 0                   1                   2                   3
 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|Ver|O|U|    TTL    |   Length  |U|U|U|U|MD Type| Next Protocol |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
                                 ^ ^
                                 | |
                              +-------+
                              |NSH ECN|
                              | field |
                              +-------+
]]>
   </artwork>
</figure>

<t>RFC Editor NOTE: The above figure should be adjusted based on the
bits actually assigned by IANA (see <xref target="IANA"/>) and this
note deleted.</t>

<t><xref target="ECNpoints"/> shows the meaning of the code points in
the NSH ECN field.  These have the same meaning as the ECN field code
points in the IPv4 or IPv6 header as defined in Section 23.1 of <xref
target="RFC3168"/>.</t>

<table anchor="ECNpoints" align="center">
  <name>ECN Field Code Points</name>
  <thead>
<tr><th>Binary</th><th  align="center">Name</th><th
align="center">Meaning</th></tr> 
  </thead>
  <tbody>
<tr><td  align="center">00</td><td>Not-ECT</td><td>Not ECN-Capable
Transport</td></tr> 
<tr><td  align="center">01</td><td>ECT(1)</td><td>ECN-Capable
Transport</td></tr> 
<tr><td  align="center">10</td><td>ECT(0)</td><td>ECN-Capable
Transport</td></tr> 
<tr><td  align="center">11</td><td>CE</td><td>Congestion
Experienced</td></tr> 
  </tbody>
</table>

</section> <!-- 2. -->
  
<section anchor="ECNNSH">  <!-- 3. -->
  <name>ECN Support in the NSH</name>

<t>This section describes the required behavior to support ECN using
the NSH. There are two aspects to ECN support:</t>

<ol>

  <li>ECN propagation during ingress or egress;</li>
  <li>ECN marking during congestion at bottlenecks.</li>
  
</ol>

<t>While this section covers all combinations of ECN-aware and ECN-
unaware, it is expected that in most cases the NSH domain will be
uniform so that, if this document is applicable, all SFFs will support
ECN; however, some SFs might not support ECN.</t>

<t>ECN Propagation:</t>

  <t indent="3">The specification of ECN tunneling <xref
  target="RFC6040"/> explains that an ingress must not propagate ECN
  support into an encapsulating header unless the egress supports
  correct onward propagation of the ECN field during decapsulation. We
  define Compliant ECN Decapsulation here as decapsulation compliant
  with either <xref target="RFC6040"/> or an earlier compatible
  equivalent (<xref target="RFC4301"/>, or the full functionality mode
  of <xref target="RFC3168"/>).</t>

  <t indent="3">The procedures in <xref target="TransitProcedures"/>
  ensure that each ingress of the transport links within the SFC
  enabled domain does not propagate ECN support into the encapsulating
  outer transport header unless the corresponding egress of that link
  supports Compliant ECN Decapsulation.</t>

  <t indent="3"><xref target="EgressProcedures"/> requires that all
  the egress nodes of the SFC enabled domain that continue to
  propagate a packet support Compliant ECN Decapsulation in
  conjunction with tunnel congestion feedback; otherwise the scheme in
  this document will not work. (An SFC domain may have nodes that
  terminate packets and thus are logically "egress" nodes but for
  which further propagation of ECN is meaningless.)</t>

<t>ECN Marking:</t>

  <t indent="3">At transit nodes the marking behavior specified in
  <xref target="TransitProcedures"/> is recommended and if not
  implemented at such transit nodes, there may be unmanaged
  congestion.</t>

  <t indent="3">Detection of congestion will be most effective if ECN
  marking is supported by all potential bottlenecks inside the domain
  in which NSH is being used to route traffic as well as at the
  ingress and egress.  Nodes that do not support ECN marking, or that
  support AQM but not ECN, will naturally use drop to relieve
  congestion.  The gap in the end-to-end packet sequence will be
  detected as congestion by the final receiving endpoint, but not by
  the NSH egress (see <xref target="feedback"/>).</t>

 <section>  <!-- 3.1 -->
   <name>At The Ingress</name>

<t>When the ingress/Classifier encapsulates an incoming packet with an
NSH, it MUST set the NSH ECN field using the "Normal mode" specified
in <xref target="RFC6040"/> (e.g., copied from the incoming IP
header).</t>

<t>Then, if the resulting NSH ECN field is Not-ECT, the ingress SHOULD
set it to ECT(0). This indicates that, even though the end-to-end
transport is not ECN-capable, the egress and ingress of the SFC
enabled domain are acting as an ECN-capable transport. This approach
supports and is interoperable with all known variants of ECN,
including the experimental L4S capability <xref target="RFC8311"/>
<xref target="ecnL4S"/>. This "faked ECT" marking at the ingress is
necessary for ECN to measure congestion within the SFC domain. It only
affects marking within the SFC domain and is undone for packets that
pass through an SFC domain egress.</t>

<t>Packets arriving at the ingress might not use IP. If the protocol
of arriving packets supports an ECN field similar to IP, for example
MPLS <xref target="RFC5129"/>, the procedures for IP packets can be
used. If arriving packets do not support an ECN field similar to IP,
they MUST be treated as if they are Not-ECT IP packets.</t>

<t>Then, as the NSH encapsulated packet is further encapsulated with a
transport header, if ECN marking is available for that transport (as
it is for IP <xref target="RFC3168"/> and MPLS <xref
target="RFC5129"/>), the ECN field of the transport header MUST be set
using the "Normal mode" specified in <xref target="RFC6040"/> (i.e.,
copied from the NSH ECN field).</t>

<t>A summary of these normative steps is given in <xref
target="ECNingress"/>.</t>

<table anchor="ECNingress" align="center">
  <name>Setting of ECN fields by an Ingress/Classifier</name>
  <thead>
<tr><th align="center">Incoming Header (also equal to departing Inner
Header</th> <th align="center">Departing NSH and Outer
Headers</th></tr>
  </thead>
  <tbody>
<tr><td align="center">Not-ECT</td><td align="center">ECT(0)</td></tr>
<tr><td align="center">ECT(0)</td><td align="center">ECT(0)</td></tr>
<tr><td align="center">ECT(1)</td><td align="center">ECT(1)</td></tr>
<tr><td align="center">CE</td><td align="center">CE</td></tr>
  </tbody>
</table>

<t>The requirements in this section apply to all ingress nodes for the
domain in which an NSH is being used to steer traffic.</t>

 </section>
 
 <section>
   <name>At Transit Nodes</name>

<t>This section describes the behavior at nodes that forward based on
the NSH such as SFF and other forwarding nodes such as IP
routers. <xref target="transit"/> shows a packet on the wire between
forwarding nodes.</t>
 
<figure anchor="transit">
  <name>Packet in Transit</name>
    <artwork type="ascii-art" align="center">
<![CDATA[
+-----------------+
|   Outer Header  |
+-----------------+
|       NSH       |
+-----------------+
|   Inner Header  |
+-----------------+
|     Payload     |
+-----------------+
]]>
   </artwork>
</figure>

<t>There can be nodes implementing firewall, DDoS, or similar
functions that conditionally discard packets. When they do discard a
packet, they are an egress node (see <xref
target="EgressProcedures"/>), not a transit node.</t>
 
 <section anchor="TransitProcedures">  <!-- 3.2.1 -->
   <name>At NSH Transit Nodes</name>

<t>When a packet is received at an NSH based forwarding node such as
an SFF, say N1, the outer transport encapsulation is removed and its
ECN marking SHOULD be combined into the NSH ECN marking as specified
in <xref target="RFC6040"/>. If this is not done, any congestion
encountered at non-NSH transit nodes between N1 and the previous
upstream NSH based forwarding node will be lost and not transmitted
downstream.</t>

<t>The NSH forwarding node SHOULD use a recognized AQM algorithm <xref
target="RFC7567"/> to detect congestion. If the NSH ECN field
indicates ECT, it will probabilistically set the NSH ECN field to the
Congestion Experienced (CE) value or, in cases of extreme congestion,
drop the packet.</t>

<t>When the NSH encapsulated packet is further encapsulated for
transmission to the next SFF or SF, ECN marking behavior depends on
whether or not the node that will decapsulate the outer header
supports Compliant ECN Decapsulation (see <xref target="ECNNSH"/>). If
it does, then the encapsulating node propagates the NSH ECN field to
this outer encapsulation using the "Normal Mode" of ECN encapsulation
<xref target="RFC6040"/> (the ECN field is copied). If it does not,
then the encapsulating node MUST clear ECN in the outer encapsulation
to non-ECT (the "Compatibility Mode" of <xref
target="RFC6040"/>).</t>

 </section>
 
 <section>  <!-- 3.2.2 -->
   <name>At an SF/Proxy</name>

<t>If the SF is NSH and ECN-aware, the processing is essentially the
same at the SF as at an SFF as discussed in <xref
target="TransitProcedures"/> (except in the case where the SF
terminates the packets path).</t>

<t>If the SF is NSH-aware but ECN-unaware, then the SFF transmitting
the packet to the SF will use Compatibility Mode. Congestion
encountered in the SFF to SF and SF to SFF paths or internal to the SF
will be unmanaged.</t>

<t>If the SF is not NSH-aware, then an NSH proxy will be between the
SFF and the SF to avoid exposure of the NSH-ignorant SF to NSHs as
shown in <xref target="proxy"/>. This is described in Section 4.6 of
<xref target="RFC7665"/>.  The SF and proxy together look to the SFF
like an NSH-aware SF. The behavior at the proxy and SF in this case is
as below:</t>

  <t indent="3">If such a proxy is not ECN-aware, then congestion in
  the entire path from SFF to proxy to SF back to proxy to SFF will be
  unmanaged.</t>
  
<figure anchor="proxy">
  <name>Proxy for NSH Un-aware SFF</name>
    <artwork type="ascii-art" align="center">
<![CDATA[
     |
     v
+----------+                   +---------+
|          |     +-------+     |   NSH   |
|   SFF    +---->|  NSH  +---->|un-aware |
|(Service  |     | aware |     |   SF    |
| Function |<----+ proxy |<----+(Service |
|Forwarder)|     +-------+     |Function)|
+----------+                   +---------+
     |
     v
]]>
   </artwork>
</figure>

  <t indent="3">If the proxy is ECN-aware, the proxy uses an AQM to
  indicate congestion within the proxy in the NSH that it returns to
  the SFF.  The outer header used for the proxy-to-SF path uses Normal
  Mode.  The outer header used for the proxy-to-SFF path uses Normal
  Mode based copying of the NSH ECN field to the outer header. Thus
  congestion in the proxy will be managed.</t>

  <t indent="3">Congestion in the SF will be managed only if the SF is
  ECN-aware and implements an AQM.</t>

 </section>

 <section>  <!--- 3.2.3 -->
   <name>At Other Forwarding Nodes</name>

<t>Other forwarding nodes, that is non-NSH forwarding nodes between
NSH forwarding nodes, such as IP or label switched routers, bridges,
or other devices, might also contain potential bottlenecks. If so,
they SHOULD implement an AQM algorithm to update the ECN marking in
the outer transport header as specified in <xref
target="RFC3168"/>.</t>

 </section>
</section>
 
<section anchor="EgressProcedures">  <!-- 3.3 -->
  <name>At Egress/End</name>

<t>At an SFC enabled domain egress node, first any actions are taken
based on Congestion Experienced or other values of ECN marking, such
as accumulating statistics to send back to the ingress (see <xref
target="TunnelFeedback"/>) or for other uses.</t>

<t>There can be nodes implementing firewall, DDoS, or similar
functions that then discard the packet. If the packet is so discarded,
no further actions are needed.</t>

<t>If the packet is to be propagated and is carried inside the NSH as
encapsulated IP, then when the NSH is removed the NSH ECN field MUST
be combined with the IP ECN field as specified in <xref
target="Merger"/> that was extracted from Section 3.2 of <xref
target="RFC6040"/>.  This requirement applies to all egress nodes for
the domain in which an NSH is being used to route traffic.</t>

<table anchor="Merger" align="center">
  <name>Egress ECN Fields Merger (Source <xref
  target="RFC6040"/>)</name>
  <thead>
    
<tr><th rowspan="2">Arriving Inner Header</th>
<th align="center" colspan="4">Arriving Outer Header</th></tr>
    
<tr><th>Not-ECT</th><th align="right">ECT(0)</th>
<th align="right">ECT(1)</th><th align="right">CE</th></tr>

  </thead>
  <tbody>
    
<tr><td align="right">Not-ECT</td><td align="right">Not-ECT</td>
<td align="right">Not-ECT</td><td align="right">Not-ECT</td>
<td align="right">&lt;drop&gt;</td></tr>

<tr><td align="right">ECT(0)</td><td align="right">ECT(0)</td><td
align="right">ECT(0)</td><td align="right">ECT(0)</td><td
align="right">CE</td></tr>

<tr><td align="right">ECT(1)</td><td align="right">ECT(1)</td><td
align="right">ECT(1)</td><td align="right">ECT(1)</td><td
align="right">CE</td></tr>

<tr><td align="right">CE</td><td align="right">CE</td><td
align="right">CE</td><td align="right">CE</td><td
align="right">CE</td></tr>

  </tbody>
</table>

<t>All the egress nodes of the SFC enabled domain that can propagate
NSH encapsulated packets MUST support Compliant ECN Decapsulation as
specified in this section. If this is not the case, the scheme
described in this document will not work.</t>

 </section>
 
 <section anchor="complex">  <!-- 3.4 -->
   <name>Congestion Statistics and More Complex Cases</name>

<t>The SFC specification permits an SF to absorb packets and to
generate new packets as well as simply processing and returning the
packets it receives to an SFF.  Such actions might appear to be packet
loss due to congestion or might mask the loss of packets by generating
additional packets.</t>

<t>The closer a particular application of SFC is to a simple tunnel
with a single ingress and egress, the simpler it is to accurately use
the techniques in this document. Where there is a single ingress but
multiple egress nodes (where a node that discards a packet counts as
an egress) these techniques can still work well if all egress nodes
feedback congestion information to that ingress. Multiple ingress
nodes are a substantial complication, but similar techniques may still
work in some cases if multiple physical ingress nodes can coordinate
to act as one logical ingress node; methods for such coordination are
beyond the scope of this document. Use of the techniques in this
document for a flow with multiple egress and uncoordinated ingress
nodes is NOT RECOMMENDED, although there might be some cases where
these techniques could be elements in some sort of beneficial scheme;
such schemes are beyond the scope of this document.</t>

<t>The tunnel congestion feedback approach (<xref
target="TunnelFeedback"/>) can detect congestions in several ways. One
way detects traffic loss by counting payload packets and bytes in at
the ingress and counting them out at the egress. This does not work
unless nodes conserve the number of payload packets and/or
bytes. Therefore, it will not be possible to accurately detect packet
loss using this technique if traffic volume, as measured by the metric
in use (packets or bytes), is not conserved by the service function
chain processing that traffic.</t>

<t>Nonetheless, if a bottleneck supports ECN marking, it will be
possible to detect the high level of CE markings that are associated
with congestion at that bottleneck by looking at the ratio of CE-
marked to non-CE-marked packets. However, it will not be possible to
detect any congestion based on ECN marking, whether slight or severe,
if it occurs at a bottleneck that does not support ECN marking.</t>

 </section>
</section>

<section anchor="TunnelFeedback">  <!-- 4. -->
  <name>Tunnel Congestion Feedback Support</name>

<t>The collection and storage of congestion information at an egress
can be useful for later analysis and MAY be used without the feedback
mechanisms specified in this Section. However, if congestion
information is not fed back to a point which can act to reduce
congestion, it will not be useful in real time. Such congestion
feedback to the ingress enables the ingress to take actions such as
those listed in <xref target="TunnelActions"/>.</t>

<t>IP Flow Information Export (IPFIX <xref target="RFC7011"/>)
provides a standard for communicating traffic flow statistics. As
extended by this document, IPFIX messages from the egress to the
ingress are used to communicate the extent of congestion between an
ingress and egress based on ECN marking in the NSH and traffic
statistics. Each egress MUST be able to identify the relevant ingress
for a packet based on information in the packet such as the SPI or the
Ingress Network Node Information Context Header <xref
target="RFC9263"/>.</t>

<section>  <!-- 4.1 -->
  <name>Congestion Level Measurements</name>

<t>The congestion level measurements are based on ECN marking in the
NSH and packet drop detection. In particular, congestion information
includes at least one of the following:</t>

<ul>
<li>cumulative byte counts of packets with each type of outer/inner
header ECN marking combination,</li>
<li>the ratio of CE-marked packets to all packets, and</li>
<li>the ratio of dropped packets to all packets.</li>
</ul>

<t>All IPFIX messages are time stamped <xref target="RFC7011"/>. So,
for example, it is possible to compute rates of packets or packets
with various ECN labeling from two IPFIX messages that have cumulative
counts and time stamps. An earlier count and time can be deducted from
a later count and time to give the time interval and count during that
interval.</t>

<t>If the congestion level is low enough, the packets are marked as CE
instead of being dropped, and then the congestion level can be
calculated according to the ratio of CE-marked packets. If the
congestion level is so high that ECT packets will be dropped, then the
packet loss ratio can be calculated by comparing total packets
entering ingress and total packets arriving at egress over the same
span of packets. Note that a node that discards packets for firewall,
DDoS, or similar reasons counts as an egress.  If packet loss, other
than such deliberate discard, is detected, then it can be assumed that
severe congestion has occurred.</t>

<t>Faked ECN-Capable Transport (ECT) is used at the ingress to defer
packet loss to the egress. The basic idea of faked ECT is that, when
encapsulating packets, the ingress first marks the tunnel outer header
according to <xref target="RFC6040"/>, and then remarks the outer
header of Not-ECT packets as ECT. (ECT(0) and ECT(1) are treated as
the same.)  In this case, the NSH is treated as the tunnel outer
header because it will be present for the entire SFC enabled domain
transit while transport headers may change. Thus, as transmitted by
the ingress node, there will be one of three combinations of outer
header ECN field and inner header ECN field as follows: CE|CE,
ECT|N-ECT, and ECT|ECT (in the format of outer-ECN|inner-ECN); when
decapsulating packets at the egress, <xref target="RFC6040"/> defined
decapsulation behavior is used, and according to <xref
target="RFC6040"/>, the packets marked as CE|N-ECT will be dropped.
Faked-ECT is used to shift some drops to the egress in order to allow
the egress to calculate the CE-marked packet counts and ratio more
precisely.</t>

<t>The ingress encapsulates packets and marks their outer header
according to faked ECT as described above. The ingress cumulatively
counts packet bytes for three types of ECN combination (CE|CE, ECT|N-
ECT, and ECT|ECT) and then the ingress regularly sends cumulative byte
counts message of each type of ECN combination to the egress.</t>

<t>When each message arrives at the egress, the following two steps
occur: (1) the egress calculates the ratio of CE-marked packets; (2)
the egress cumulatively counts packet bytes coming from the ingress
and adds its own bytes counts of each type of ECN combination (CE|CE,
ECT|N-ECT, CE|N-ECT, CE|ECT, and ECT|ECT) to the message for the
ingress to calculate packet loss. The egress feeds back the CE-marked
packet ratio, packet loss ratio, byte counts information, and the like
to the ingress as requested for evaluating congestion level in the
tunnel.</t>

<t>The egress calculates the CE-marked packet ratio by counting
packets with different ECN markings. The CE-marked packet ratio can be
used as an indication of tunnel load level. For example, the
tunnelEcnCEMarkedRatio field (specified below) indicates the fraction
of traffic that has been marked in the ECN field of the NSH as
Congestion Experienced (CE).  It is assumed that nodes between the
ingress and egress will not drop packets biased towards certain ECN
codepoints, so calculating of CE-marked packet ratio is not affected
by packet drop.</t>

<t>The calculation of the fraction of packets dropped is by comparing
the traffic volumes between ingress and egress.</t>

<t>In the case of multiple egresses, the ingress can combine their
reports. Statistics of number of packets or bytes can simply be added.
Statistics of percentage or ratio of particular ECN marking can be
averaged with reports from different egresses weighted by the number
of packets processed by that egress.</t>

<t>The statistics can be at the granularity of all traffic from the
ingress to the egress to learn about the overall congestion status of
the path between the ingress and the egress or at the granularity of
individual customer's traffic or a specific set of flows to learn
about their congestion contribution.</t>

</section>

<section> <!-- 4.2 -->
  <name>Congestion Information Delivery</name>

<t>As described above, the tunnel ingress sends a message containing
cumulative byte counts of packets of each type of ECN marking to the
tunnel egress, and the tunnel egress feeds back messages to the
ingress with at least one of the following: cumulative byte counts of
packets of each type of ECN combination, the ratio of CE-marked
packets to all packets, and/or the ratio of dropped packets to all
packets. It is possible for these messages to contribute to
congestion. This section specifies how the messages are conveyed.</t>

<t>IPFIX recommends, but does not require, use of SCTP <xref
target="RFC9260"/> in partial reliability mode <xref
target="RFC3758"/> for the transport of its messages.  This mode
allows loss of some packets, which is tolerable because IPFIX
communicates cumulative statistics. IPFIX over SCTP over IP SHOULD be
used directly where there is IP connectivity between the ingress and
egress; however, there might be different transport protocols or
address spaces used in different regions of an SFC enabled domain that
block such direct IP connectivity. The NSH provides the general method
of routing traffic within an SFC enabled domain so the encapsulation
of the required IPFIX traffic in NSH MUST be implemented and, when IP
connectivity is not available, IPFIX over NSH, as specified in <xref
target="IFIXoverNSH"/>, SHOULD be used along with configuration of
appropriate SFC paths for the IPFIX over NSH traffic. Other methods
MAY be used in particular SFC domains which support them, such as
IPFIX over MPLS.</t>

<t>IPFIX messages could travel along the same path as network data
traffic. In any case, an IPFIX message packet may get lost in case of
network congestion. Even though the missing information could be
recovered because of the use of cumulative counts, IPFIX messages
SHOULD be transmitted at a higher priority than users' traffic flows
to improve the promptness of congestion information feedback.</t>

<t>The ingress node can do congestion management at different
granularity which means both the overall aggregate congestion level
and congestion level contributed by certain traffic flows could be
measured for different congestion management purposes. For example, if
the ingress only wants to limit congestion volume caused by certain
traffic flows, such as UDP-based traffic, then congestion volume for
that traffic can be fed back; or if the ingress is doing overall
congestion management, the aggregated congestion volume can be fed
back.</t>

<t>When sending IPFIX messages from ingress to egress, the ingress
acts as IPFIX exporter and the egress acts as IPFIX collector. When
feeding back congestion level information from egress to ingress, the
egress acts as IPFIX exporter and ingress acts as IPFIX collector.</t>

<t>The combination of congestion level measurement and congestion
information delivery procedures are as following:</t>

<ul>

<li>The ingress node determines the IPFIX template record to be used.
The template record can be pre-configured or determined at runtime,
the content of the template record will be determined according to the
granularity of congestion management; if the ingress wants to limit
congestion volume contributed by specific traffic flows then the
elements such as source IP address, destination IP address, flow ID,
and CE-marked packet volume of the flows, etc., will be included in
the template record.</li>

<li>Metering at the ingress measures traffic volume according to the
template record chosen and then the measurement records are sent to
the egress.</li>

<li>Metering on the egress measures congestion level information
according to template record which, in simple cases, SHOULD be the
same as the template record sent by the ingress (see <xref
target="complex"/>).</li>

<li>The egress sends its measurement records together with the
measurement records of the ingress back to the ingress.</li>

</ul>

</section>

<section>  <!-- 4.3 -->
  <name>IPFIX Extensions</name>

<t>This section specifies the new IPFIX Information Elements
needed. It conforms to <xref target="RFC7013"/>.</t>

<section>
  <name>nshServicePathID</name>

<t>In order to identify SFC flows, so that congestion can be measured
and reported at that granularity, it is necessary for IPFIX to be able
to classify traffic based on the Service Path Identifier (SPI) field
of the NSH <xref target="RFC8300"/>. Thus, an NSH Service Path Identifier
(nshServicePathID) IPFIX Information Element <xref target="RFC7012"/> is
specified.</t>

<t indent="3">Name: nshServicePathID</t>

<t indent="3">Description: Network Service Header <xref
target="RFC8300"/> Service Path Identifier.  This is a 24-bit value
which is left justified in the Information Element. The low order byte
MUST be sent as zero and ignored on receipt.</t>

<t indent="3">Abstract Data Type: unsigned32</t>

<t indent="3">Data Type Semantics: identifier</t>

<t indent="3">ElementId: TBD0</t>

<t indent="3">Status: current</t>

</section>

<section>
  <name>tunnelEcnCeCeByteTotalCount</name>

<t indent="3">Description: The total number of bytes of incoming
packets with the CE|CE ECN marking combination at the Observation
Point since the Metering Process (re-)initialization for this
Observation Point.</t>

<t indent="3">Abstract Data Type: unsigned64</t>

<t indent="3">Data Type Semantics: totalCounter</t>

<t indent="3">ElementId: TBD1</t>

<t indent="3">Status: current</t>

<t indent="3">Units: bytes</t>

</section>

<section>
  <name>tunnelEcnEctNectBytetTotalCount</name>

<t indent="3">Description: The total number of bytes of incoming
packets with the ECT|N-ECT ECN marking combination (ECT(0) and ECT(1)
are treated the same as each other) at the Observation Point since the
Metering Process (re-)initialization for this Observation Point.</t>

<t indent="3">Abstract Data Type: unsigned64</t>

<t indent="3">Data Type Semantics: totalCounter</t>

<t indent="3">ElementId: TBD2</t>

<t indent="3">Status: current</t>

<t indent="3">Units: bytes</t>

</section>

<section>
  <name>tunnelEcnCeNectByteTotalCount</name>

<t indent="3">Description: The total number of bytes of incoming
packets with the CE|N-ECT ECN marking combination at the Observation
Point since the Metering Process (re-)initialization for this
Observation Point.</t>

<t indent="3">Abstract Data Type: unsigned64</t>

<t indent="3">Data Type Semantics: totalCounter</t>

<t indent="3">ElementId: TBD3</t>

<t indent="3">Status: current</t>

<t indent="3">Units: bytes</t>

</section>

<section>
  <name>tunnelEcnCeEctByteTotalCount</name>

<t indent="3">Description: The total number of bytes of incoming
packets with the CE|ECT ECN marking combination (ECT(0) and ECT(1) are
treated the same as each other) at the Observation Point since the
Metering Process (re-)initialization for this Observation Point.</t>

<t indent="3">Abstract Data Type: unsigned64</t>

<t indent="3">Data Type Semantics: totalCounter</t>

<t indent="3">ElementId: TBD4</t>

<t indent="3">Status: current</t>

<t indent="3">Units: bytes</t>

</section>

<section>
  <name>tunnelEcnEctEctByteTotalCount</name>

<t indent="3">Description: The total number of bytes of incoming
packets with the ECT|ECT ECN marking combination (ECT(0) and ECT(1)
are treated the same as each other) at the Observation Point since the
Metering Process (re-)initialization for this Observation Point.</t>

<t indent="3">Abstract Data Type: unsigned64</t>

<t indent="3">Data Type Semantics: totalCounter</t>

<t indent="3">ElementId: TBD5</t>

<t indent="3">Status: current</t>

<t indent="3">Units: bytes</t>

</section>

<section>
  <name>tunnelEcnCEMarkedRatio</name>

<t indent="3">Description: The ratio of packets that are CE-marked to
packets that are not CE-marked at the Observation Point.</t>

<t indent="3">Abstract Data Type: float32</t>

<t indent="3">ElementId: TBD6</t>

<t indent="3">Status: current</t>

</section>
</section>

<section anchor="IFIXoverNSH">  <!-- 4.4 -->
  <name>IPFIX over NSH</name>

<t>Encapsulating IPFIX messages with an NSH can be an effective method
for transporting such messages within an SFC enabled domain. This is
particularly the case if different outer transport protocols are used
in different parts of such a domain, for example IP in one part and
MPLS in another part.</t>

<t>This is accomplished by setting the Next Protocol field in the NSH
Base Header <xref target="RFC8300"/> to the value TBD7 and placing the
IPFIX message immediately after the NSH (including after any NSH
Metadata). See <xref target="IPFIXnsh"/>.</t>

<figure anchor="IPFIXnsh">
  <name>IPFIX over NSH</name>
    <artwork type="ascii-art" align="center">
<![CDATA[
 0                   1                   2                   3
 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|Ver|O|U|    TTL    |   Length  |ECN|U|U|MD Type|Next Proto=TBD7|
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|          Service Path Identifier (SPI)        | Service Index |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
/   Optional MD (Metadata)                                      /
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|   IPFIX Message                                               |
]]>
   </artwork>
</figure>

</section>
</section>

<section> <!-- 5. -->
  <name>Example of Use</name>

<t>This section provides an example of the solution described in this
document.</t>

<t>First, IPFIX template records are exchanged between ingress and
egress to negotiate the format of the data records to be exchanged.
The example here is to measure the congestion level for the overall
tunnel caused by all the traffic. After the negotiation is finished,
the ingress sends in-band messages to the egress containing the number
of each kind of ECN-marked packets (i.e., CE|CE, ECT|N-ECT and
ECT|ECT) received before it sent the IPFIX message.</t>

<t>After the egress receives the IPFIX message, the egress calculates
the CE-marked packet ratio and counts the number of different kinds of
ECN-marking packets received before it received that message. Then the
egress sends a feedback IPFIX message containing the counts together
with the information in the ingress's message back to the ingress.</t>

<t><xref target="example1"/> to <xref target="example4"/> below
illustrate the procedure between ingress and egress.</t>

<figure anchor="example1">
  <name>Template Record Sent from Egress to Ingress</name>
    <artwork type="ascii-art" align="center">
<![CDATA[
+---------------------------------+----------------------+
|Set ID=2                              Length=40         |
|---------------------------------|----------------------|
|Template ID=256                       Field Count=8     |
|---------------------------------|----------------------|
|tunnelEcnCeCeByteTotalCount           Field Length=8    |
|---------------------------------|----------------------|
|tunnelEcnEctNectByteTotalCount        Field Length=8    |
|---------------------------------|----------------------|
|tunnelEcnEctEctByteTotalCount         Field Length=8    |
|---------------------------------|----------------------|
|tunnelEcnCeNectByteTotalCount         Field Length=8    |
|---------------------------------|----------------------|
|tunnelEcnCeEctByteTotalCount          Field Length=8    |
+---------------------------------|----------------------+
|tunnelEcnCEMarkedRatio                Field Length=4    |
+---------------------------------+----------------------+
]]>
   </artwork>
</figure>

<t></t>

<figure anchor="example2">
  <name>Template Record Sent from Ingress to Egress</name>
    <artwork type="ascii-art" align="center">
<![CDATA[
+---------------------------------+----------------------+
|Set ID=2                              Length=28         |
|---------------------------------|----------------------|
|Template ID=257                       Field Count=3     |
|---------------------------------|----------------------|
|tunnelEcnCeCeByteTotalCount           Field Length=8    |
|---------------------------------|----------------------|
|tunnelEcnEctNectByteTotalCount        Field Length=8    |
|---------------------------------|----------------------|
|tunnelEcnEctEctByteTotalCount         Field Length=8    |
|---------------------------------+----------------------|
]]>
   </artwork>
</figure>

<t></t>

<figure anchor="example3">
  <name>Traffic Flow Between Ingress and Egress</name>
    <artwork type="ascii-art" align="center">
<![CDATA[
+-------+                                        +-------+
|       |  +-+ +-+ +-+  +-+ +-+ +-+  +-+         |       |
|       |  |P| |P| |M|  |P| |P| |P|  |M|         |       |
|       |  +-+ +-+ +-+  +-+ +-+ +-+  +-+         |       |
|       |--------------------------------------->|       |
|       |                                        |       |
|ingress|                                        |egress |
|       |            +-+             +-+         |       |
|       |            |M|             |M|         |       |
|       |            +-+             +-+         |       |
|       |<---------------------------------------|       |
|       |                                        |       |
+-------+                                        +-------+

            +-+
            |M| : IPFIX Message Packet
            +-+

            +-+
            |P| : User Data Packet
            +-+
]]>
   </artwork>
</figure>

<t></t>

<figure anchor="example4">
  <name>Traffic Flow Between Ingress and Egress</name>
    <artwork type="ascii-art" align="center">
<![CDATA[
                 SetID=257, Length=28
+-------+             A1                    +-------+
|       |             B1                    |       |
|       |             C1                    |       |
|       |  ----------------------------->   |       |
|       |                                   |       |
|       |                                   |       |
|       |        SetID=256, Length=72       |       |
|       |             A1                    |       |
|       |             B1                    |       |
|ingress|             C1                    |egress |
|       |             A2                    |       |
|       |             B2                    |       |
|       |             C2                    |       |
|       |             D                     |       |
|       |             E                     |       |
|       |             R                     |       |
|       |    <----------------------------  |       |
|       |                                   |       |
 +-------+                                   +-------+
]]>
   </artwork>
</figure>

<t>The following provides an example of how the tunnel congestion
level can be calculated (see <xref target="example4"/>):</t>

  <t indent="3">The congestion Level could be divided into two
  categories: (1) slight congestion (no packets dropped); (2) serious
  congestion (packets are being dropped).</t>

  <t indent="3">For slight congestion, the congestion level is
  indicated by the ratio of CE-marked packets:</t>

    <t indent="6">R = ce_marked_ratio = ce-marked / total_egress ;</t>

  <t indent="3">For serious congestion, the congestion level is
  indicated as the volume of traffic loss:</t>

    <t indent="6">total_ingress = (A1 + B1 + C1)</t>

    <t indent="6">total_egress = (A2 + B2 + C2 + D + E)</t>

    <t indent="6">volume_loss = (total_ingress - total_egress)</t>

</section>

<section anchor="IANA">  <!-- 6. -->
  <name>IANA Considerations</name>

 <t>The following subsections provide IANA assignment
 considerations.</t>

 <section> <!-- 6.1 -->
   <name>SFC NSH Header ECN Bits</name>

 <t>IANA is requested to assign two contiguous bits in the NSH Base
 Header Bits registry for ECN (bits 16 and 17 suggested) and note this
 assignment as follows:</t>

 <table><thead>
   <tr><th align="center">Bit</th><th>Description</th><th
   align="center">Reference</th></tr>
 </thead>
 <tbody>
   <tr><td>tbd(16-17)</td><td align="center">NSH ECN</td><td>[this
   document]</td></tr>
 </tbody></table>
 
 </section>
 <section><!-- 6.2 -->
   <name>SFC NSH Next Protocol Value</name>
 
 <t>IANA is requested to assign a next protocol value in the NSH Next
 Protocol Registry, as follows:</t>

 <table><thead>
   <tr><th align="center">Next Protocol</th><th>Description</th><th
   align="center">Reference</th></tr>
 </thead>
 <tbody>
   <tr><td align="center">TBD7</td><td
   align="center">IPFIX</td><td>[this document]</td></tr>
 </tbody></table>

 </section>

<section>  <!-- 6.3 -->
  <name>IPFIX Information Element IDs</name>

 <t>IANA is requested to assign seven IPFIX Information Element IDs as
 follows:</t>

<dl spacing="compact">
<dt>ElementID:</dt><dd>TBD0</dd>
<dt>Name:</dt><dd>nshServicePathID</dd>
<dt>Data Type:</dt><dd>unsigned32</dd>
<dt>Data Type Semantics:</dt><dd>identifier</dd>
<dt>Status:</dt><dd>current</dd>
<dt>Description:</dt><dd>The Network Service Header <xref
target="RFC8300"/> Service Path Identifier.</dd>
</dl>
       
<dl spacing="compact">
<dt>ElementID:</dt><dd>TBD1</dd>
<dt>Name:</dt><dd>tunnelEcnCeCeByteTotalCount</dd>
<dt>Data Type:</dt><dd>unsigned64</dd>
<dt>Data Type Semantics:</dt><dd>totalCounter</dd>
<dt>Status:</dt><dd>current</dd>
<dt>Description:</dt><dd>The total number of bytes of incoming packets
with the CE|CE ECN marking combination at the Observation Point since
the Metering Process (re-)initialization for this Observation
Point.</dd>
<dt>Units:</dt><dd>octets</dd>
</dl>

<dl spacing="compact">
<dt>ElementID:</dt><dd>TBD2</dd>
<dt>Name:</dt><dd>tunnelEcnEctNectByteTotalCount</dd>
<dt>Data Type:</dt><dd>unsigned64</dd>
<dt>Data Type Semantics:</dt><dd>totalCounter</dd>
<dt>Status:</dt><dd>current</dd>
<dt>Description:</dt><dd>The total number of bytes of incoming packets
with the ECT|N-ECT ECN marking combination at the Observation Point
since the Metering Process (re-)initialization for this Observation
Point.</dd>
<dt>Units:</dt><dd>octets</dd>
</dl>

<dl spacing="compact">
<dt>ElementID:</dt><dd>TBD3</dd>
<dt>Name:</dt><dd>tunnelEcnCeNectByteTotalCount</dd>
<dt>Data Type:</dt><dd>unsigned64</dd>
<dt>Data Type Semantics:</dt><dd>totalCounter</dd>
<dt>Status:</dt><dd>current</dd>
<dt>Description:</dt><dd>The total number of bytes of incoming packets
with the CE|N-ECT ECN marking combination at the Observation Point
since the Metering Process (re-)initialization for this Observation
Point.</dd>
<dt>Units:</dt><dd>octets</dd>
</dl>

<dl spacing="compact">
<dt>ElementID:</dt><dd>TBD4</dd>
<dt>Name:</dt><dd>tunnelEcnCeEctByteTotalCount</dd>
<dt>Data Type:</dt><dd>unsigned64</dd>
<dt>Data Type Semantics:</dt><dd>totalCounter</dd>
<dt>Status:</dt><dd>current</dd>
<dt>Description:</dt><dd>The total number of bytes of incoming packets
with the CE|ECT ECN marking combination at the Observation Point since
the Metering Process (re-)initialization for this Observation
Point.</dd>
<dt>Units:</dt><dd>octets</dd>
</dl>

<dl spacing="compact">
<dt>ElementID:</dt><dd>TBD5</dd>
<dt>Name:</dt><dd>tunnelEcnEctEctByteTotalCount</dd>
<dt>Data Type:</dt><dd>unsigned64</dd>
<dt>Data Type Semantics:</dt><dd>totalCounter</dd>
<dt>Status:</dt><dd>current</dd>
<dt>Description:</dt><dd>The total number of bytes of incoming packets
with the CE|ECT(0) ECN marking combination at the Observation Point
since the Metering Process (re-)initialization for this Observation
Point.</dd>
<dt>Units:</dt><dd>octets</dd>
</dl>

<dl spacing="compact">
<dt>ElementID:</dt><dd>TBD6</dd>
<dt>Name:</dt><dd>tunnelEcnCEMarkedRatio</dd>
<dt>Data Type:</dt><dd>float32</dd>
<dt>Status:</dt><dd>current</dd>
<dt>Description:</dt><dd>The ratio of CE-marked packets to
non-CE-marked packets at the Observation Point.</dd>
</dl>

</section>
</section>

<section anchor="security">  <!-- 7. -->
  <name>Security Considerations</name>

 <t>For general NSH security considerations, see <xref
 target="RFC8300"/>.</t>

 <t>For security considerations concerning ECN signaling tampering,
 see <xref target="RFC3168"/>. For security considerations concerning
 ECN and encapsulation, see <xref target="RFC6040"/>.</t>

 <t>For general IPFIX security considerations, see <xref
 target="RFC7011"/>. False congestion feedback could cause throttling
 or rerouting. If deployed in an untrusted environment, the signaling
 traffic between ingress and egress can be protected utilizing the
 security mechanisms provided by IPFIX (see Section 11 in <xref
 target="RFC7011"/>).  The tunnel endpoints (the ingress and egress
 for an SFC enabled domain) are assumed to be in the same
 administrative domain, so they will trust each other.</t>

</section>

<section anchor="privacy"> <!-- 8. -->
  <name>Privacy Considerations</name>

 <t>It is important to assure unified administrative control and
 protection against external observation of the SFC domain in which
 the solution presented in this draft is deployed.</t>

 <t>The NSH Service Path Identifier (SPI) and associated metadata can
 be stable identifiers for classes of traffic. When combined with
 IPFIX-communcated congestion and traffic statistics, these
 identifiers can enable correlation of traffic characteristics over
 time. This could allow an observer to infer tenant traffic patterns,
 service usage, or behavior, especially if metadata includes tenant-
 or flow-specific identifiers. In aggregate, it could reveal capacity
 limits, bottleneck locations, peak load times, traffic engineering
 policies, or the like.  Implementations and deployments SHOULD limit
 the inclusion of identifying metadata to what is operationally
 necessary.</t>

</section>

</middle>

<!-- ____________________BACK_MATTER____________________ -->
<back>

<references>
  <name>Normative References</name>

<xi:include
    href="https://www.rfc-editor.org/refs/bibxml/reference.RFC.2119.xml"/>
<xi:include
    href="https://www.rfc-editor.org/refs/bibxml/reference.RFC.3168.xml"/>
<xi:include
    href="https://www.rfc-editor.org/refs/bibxml/reference.RFC.3758.xml"/>
<xi:include
    href="https://www.rfc-editor.org/refs/bibxml/reference.RFC.5129.xml"/>
<xi:include
    href="https://www.rfc-editor.org/refs/bibxml/reference.RFC.6040.xml"/>
<xi:include
    href="https://www.rfc-editor.org/refs/bibxml/reference.RFC.7011.xml"/>
<xi:include
    href="https://www.rfc-editor.org/refs/bibxml/reference.RFC.7013.xml"/>
<xi:include
    href="https://www.rfc-editor.org/refs/bibxml/reference.RFC.7567.xml"/>
<xi:include
    href="https://www.rfc-editor.org/refs/bibxml/reference.RFC.8174.xml"/>
<xi:include
    href="https://www.rfc-editor.org/refs/bibxml/reference.RFC.8300.xml"/>

</references>
 
<references>
  <name>Informative References</name>

  <reference anchor="ecnL4S" target="draft-ietf-tsvwg-ecn-l4s-id">
    <front>
      <title>Identifying Modified Explicit Congestion Notification
      (ECN) Semantics for Ultra-Low Queuing Delay (L4S)</title>
      <author initials="K." surname="De Schepper"/>
      <author initials="B." surname="Briscoe"/>
    </front>
    <seriesInfo name="work in" value="Progress"/>
  </reference>
  
<xi:include
    href="https://www.rfc-editor.org/refs/bibxml/reference.RFC.4301.xml"/>
<xi:include
    href="https://www.rfc-editor.org/refs/bibxml/reference.RFC.7012.xml"/>
<xi:include
    href="https://www.rfc-editor.org/refs/bibxml/reference.RFC.7665.xml"/>
<xi:include
    href="https://www.rfc-editor.org/refs/bibxml/reference.RFC.8311.xml"/>
<xi:include
    href="https://www.rfc-editor.org/refs/bibxml/reference.RFC.9260.xml"/>
<xi:include
    href="https://www.rfc-editor.org/refs/bibxml/reference.RFC.9263.xml"/>

</references>


<section anchor="Acknowledgements" numbered="false">
  <name>Acknowledgements</name>

  <t>Most of the material on Tunnel Congestion Feedback was originally
  in draft-ietf-tsvwg-tunnel-congestion-feedback. After discussion
  with the authors of that draft, the authors of this draft, and the
  Chairs of the TSVWG and SFC Working Groups, the Tunnel Congestion
  Feedback draft was merged into this draft.</t>
  
 <t>The authors wish to thank the following for their comments,
   suggestions, and reviews:</t>

 <t indent="3">David Black, Mohamed Boucadair, Sami Boutros, Anthony
 Chan, Lingli Deng, Liang Geng, Joel Halpern, Jake Holland, John
 Kaippallimalil, Tal Mizrahi, Vincent Roca, Lei Zhu.</t>

</section>
 
</back>

</rfc>
