Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers ipv6_fragmentation.c Source File

ipv6_fragmentation.c

00001 /*
00002  * Copyright (c) 2015-2017, Arm Limited and affiliates.
00003  * SPDX-License-Identifier: Apache-2.0
00004  *
00005  * Licensed under the Apache License, Version 2.0 (the "License");
00006  * you may not use this file except in compliance with the License.
00007  * You may obtain a copy of the License at
00008  *
00009  *     http://www.apache.org/licenses/LICENSE-2.0
00010  *
00011  * Unless required by applicable law or agreed to in writing, software
00012  * distributed under the License is distributed on an "AS IS" BASIS,
00013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00014  * See the License for the specific language governing permissions and
00015  * limitations under the License.
00016  */
00017 /* IPv6 fragmentation and defragmentation
00018  *
00019  * (Could fairly easily be modified to also do IPv4)
00020  *
00021  * References:
00022  *
00023  * RFC  815   IP Datagram Reassembly Algorithms
00024  * RFC 2460   Internet Protocol. Version 6 (IPv6) Specification
00025  * RFC 3168   The Addition of Explicit Congestion Notification (ECN) to IP
00026  * RFC 5722   Handling of Overlapping IPv6 Fragments
00027  * RFC 6040   Tunnelling of Explicit Congestion Notification
00028  * RFC 6145   IP/ICMP Translation Algorithm [sections on Path MTU]
00029  * RFC 6660   Encoding Three Pre-Congestion Notification (PCN) States in the
00030  *            IP Header Using a Single Diffserv Codepoint (DSCP)
00031  * RFC 6946   Processing of IPv6 "Atomic" Fragments
00032  * RFC 7112   Implications of Oversized IPv6 Header Chains
00033  */
00034 #include "nsconfig.h"
00035 #include "ns_types.h"
00036 #include "ns_list.h"
00037 #include "ns_trace.h"
00038 #include "common_functions.h"
00039 #include "nsdynmemLIB.h"
00040 #include <string.h>
00041 #include "ns_trace.h"
00042 #include "Core/include/socket.h"
00043 #include "NWK_INTERFACE/Include/protocol.h"
00044 #include "Common_Protocols/ip.h"
00045 #include "Common_Protocols/ipv6.h"
00046 #include "Common_Protocols/icmpv6.h"
00047 #include "Common_Protocols/ipv6_fragmentation.h"
00048 
00049 #include "NWK_INTERFACE/Include/protocol_stats.h"
00050 
00051 #define TRACE_GROUP "Ifrg"
00052 
00053 /*                         FRAGMENT REASSEMBLY
00054  *
00055  * Allow fragment RX to be disabled for really constrained systems.
00056  * This would violate RFC 2460 and RFC 6434 - all IPv6 nodes must be able to
00057  * process fragment headers and reassemble 1500-octet datagrams.
00058  */
00059 #ifndef NO_IP_FRAGMENT_RX
00060 
00061 static uint16_t ipv6_frag_mru = IPV6_FRAG_MRU;
00062 
00063 typedef struct ip_fragmented_datagram {
00064     uint8_t age;
00065     bool discard;               /* Set to ignore all future fragments (and not send Time Exceeded) */
00066     bool had_last;
00067     int8_t ecn;
00068     uint32_t id;
00069     uint16_t fragmentable;      /* Offset in buf->buf[] of fragmentable part */
00070     uint16_t first_hole;        /* Offset of first hole (relative to fragmentable part) */
00071     buffer_t *buf;
00072     ns_list_link_t link;
00073 } ip_fragmented_datagram_t;
00074 
00075 /* We reassemble into the datagram buffer in basically the style of RFC 815 */
00076 /* An 6-byte hole descriptor is placed directly in buffer holes */
00077 /* We link them them by buffer offset (relative to start of fragmentable section) */
00078 /* Note the possible need to align means we can't use more than 7 bytes */
00079 typedef struct hole {
00080     uint16_t first;
00081     uint16_t last;
00082     uint16_t next;
00083 } hole_t;
00084 
00085 /* Given the offset of a hole in the datagram buffer, return an aligned pointer
00086  * to put a hole_t in it. We assume a "normal" platform requiring 2-byte
00087  * alignment for hole_t, and letting us manipulate uintptr_t in the conventional
00088  * fashion.
00089  */
00090 static hole_t *hole_pointer(const ip_fragmented_datagram_t *dgram, uint16_t offset)
00091 {
00092     uintptr_t ptr = (uintptr_t)(dgram->buf->buf + dgram->fragmentable + offset);
00093 
00094     return (hole_t *)((ptr + 1) & ~(uintptr_t) 1);
00095 }
00096 
00097 static NS_LIST_DEFINE(frag_list, ip_fragmented_datagram_t, link);
00098 
00099 /* Maximum time to hold fragments in seconds */
00100 #define FRAG_TTL 60
00101 
00102 /* How many partially-assembled datagrams we will hold */
00103 #define MAX_FRAG_DATAGRAMS 4
00104 
00105 /* Dummy negative ECN value used during assembly */
00106 #define IP_ECN__ILLEGAL (-1)
00107 
00108 /* RFC 5722 - discard already-received *and future* fragments */
00109 static void invalidate_datagram(ip_fragmented_datagram_t *dgram)
00110 {
00111     // Would like to free the buffer here, but it contains the
00112     // source and destination address we need to match the datagram entry.
00113     dgram->discard = true;
00114 }
00115 
00116 static void free_datagram(ip_fragmented_datagram_t *dgram)
00117 {
00118     ns_list_remove(&frag_list, dgram);
00119     if (dgram->buf) {
00120         buffer_free(dgram->buf);
00121     }
00122     ns_dyn_mem_free(dgram);
00123 }
00124 
00125 /* We would be in trouble if last fragment is < 8 bytes, and we didn't have
00126  * room for the hole descriptor. Avoid a problem by ensuring that we always
00127  * allocate a multiple-of-8 reassembly buffer.
00128  */
00129 uint16_t ipv6_frag_set_mru(uint16_t frag_mru)
00130 {
00131     frag_mru = (frag_mru + 7) &~ UINT16_C(7);
00132     if (frag_mru < IPV6_MIN_FRAG_MRU) {
00133         frag_mru = (IPV6_MIN_FRAG_MRU + 7) &~ UINT16_C(7);
00134     }
00135     if (ipv6_frag_mru != frag_mru) {
00136         /* I don't want to worry about the complications of changing MRU while
00137          * we've got ongoing reassembly. Simplest just to drop any pending.
00138          */
00139         ns_list_foreach_safe(ip_fragmented_datagram_t, dgram, &frag_list) {
00140             free_datagram(dgram);
00141         }
00142         ipv6_frag_mru = frag_mru;
00143     }
00144     return ipv6_frag_mru;
00145 }
00146 
00147 void ipv6_frag_timer(uint8_t secs)
00148 {
00149     ns_list_foreach_safe(ip_fragmented_datagram_t, dgram, &frag_list) {
00150         if ((dgram->age += secs) > FRAG_TTL) {
00151             uint16_t first_hole = dgram->first_hole;
00152             /* If we've received the first fragment, can send "time exceeded" */
00153             if (first_hole != 0 && !dgram->discard) {
00154                 /* Take as much as we've got, up to first hole; icmpv6_error will limit to min MTU */
00155                 dgram->buf->buf_end = dgram->fragmentable + first_hole;
00156                 /* Fill in IP header length */
00157                 common_write_16_bit(buffer_data_length(dgram->buf) - 40, buffer_data_pointer(dgram->buf) + 4);
00158 
00159                 buffer_t *err = icmpv6_error(dgram->buf, NULL, ICMPV6_TYPE_ERROR_TIME_EXCEEDED, ICMPV6_CODE_TME_EXCD_FRG_REASS_TME_EXCD, 0);
00160                 protocol_push(err);
00161                 dgram->buf = NULL;
00162             }
00163             free_datagram(dgram);
00164         }
00165     }
00166 }
00167 
00168 static void delete_hole(ip_fragmented_datagram_t *dgram, uint16_t hole, uint16_t *prev_ptr)
00169 {
00170     hole_t *hole_ptr = hole_pointer(dgram, hole);
00171 
00172     *prev_ptr = hole_ptr->next;
00173 }
00174 
00175 static hole_t *create_hole(ip_fragmented_datagram_t *dgram, uint16_t first, uint16_t last, uint16_t *prev_ptr)
00176 {
00177     hole_t *hole_ptr = hole_pointer(dgram, first);
00178     hole_ptr->first = first;
00179     hole_ptr->last = last;
00180     hole_ptr->next = *prev_ptr;
00181 
00182     *prev_ptr = first;
00183     return hole_ptr;
00184 }
00185 
00186 static ip_fragmented_datagram_t *ip_frag_dgram_lookup(buffer_t *buf, uint32_t id, uint16_t unfrag_len)
00187 {
00188     int_fast8_t count = 0;
00189     ns_list_foreach(ip_fragmented_datagram_t, dgram, &frag_list) {
00190         if (id == dgram->id &&
00191                 addr_ipv6_equal(buf->src_sa .address , dgram->buf->src_sa.address) &&
00192                 addr_ipv6_equal(buf->dst_sa .address , dgram->buf->dst_sa.address)) {
00193             return dgram;
00194         }
00195         count++;
00196     }
00197 
00198     /* Not found - create one */
00199     if (count >= MAX_FRAG_DATAGRAMS) {
00200         free_datagram(ns_list_get_last(&frag_list));
00201     }
00202 
00203     ip_fragmented_datagram_t *new_dgram = ns_dyn_mem_temporary_alloc(sizeof(ip_fragmented_datagram_t));
00204     if (!new_dgram) {
00205         return NULL;
00206     }
00207 
00208     /* We track payload holes as per RFC 815, roughly, and reserve header
00209      * room in front, based on the unfragmentable size of the first-received
00210      * fragment.
00211      *
00212      * So initial state is:
00213      *
00214      * buf_ptr -> default buffer headroom + first-received-fragment header size
00215      * fragmentable = buf_end = buf_ptr = offset of where fragments are assembled.
00216      *
00217      * When we receive the first (0-offset) fragment, we move down buf_ptr to
00218      * put in its header, and when we receive the final (M=0) fragment, we
00219      * set buf_end accordingly.
00220      *
00221      * Two odd cases to worry about:
00222      *
00223      *   1) First fragment is not received first, and has a larger
00224      *      header than our first-received fragment. In this case, we
00225      *      shuffle data if required when we get that first fragment.
00226      *      (Actual shuffle will normally be avoided by buffer headroom slack).
00227      *   2) First fragment is not received first, and has a smaller
00228      *      header than our first-received fragment, meaning an IPV6_MRU-sized
00229      *      datagram may have more fragmented payload than we expected. Avoid
00230      *      a problem in this case by allocating a bigger-than-IPV6_MRU buffer
00231      *      if first-received fragment has extension headers.
00232      */
00233     new_dgram->buf = buffer_get(unfrag_len + ipv6_frag_mru - 40);
00234     if (!new_dgram->buf) {
00235         ns_dyn_mem_free(new_dgram);
00236         return NULL;
00237     }
00238 
00239     new_dgram->fragmentable = new_dgram->buf->buf_end = new_dgram->buf->buf_ptr += unfrag_len;
00240     new_dgram->first_hole = 0xffff;
00241     create_hole(new_dgram, 0, 0xffff, &new_dgram->first_hole);
00242 
00243     new_dgram->buf->src_sa = buf->src_sa ;
00244     new_dgram->buf->dst_sa = buf->dst_sa ;
00245     new_dgram->id = id;
00246     new_dgram->age = 0;
00247     new_dgram->discard = false;
00248     new_dgram->had_last = false;
00249     new_dgram->ecn = buf->options .traffic_class  & IP_TCLASS_ECN_MASK;
00250     ns_list_add_to_start(&frag_list, new_dgram);
00251 
00252     return new_dgram;
00253 }
00254 
00255 /*
00256  * 4x4 combination array implementing the ECN combination rules from RFC 3168.
00257  *
00258  * Summary visualisation:      N10C
00259  *                            +----
00260  *                           N|NNN-
00261  *                           1|N11C
00262  *                           0|N10C
00263  *                           C|-CCC
00264  *
00265  * Each of the 16 entries, with justification:
00266  */
00267 static const int8_t frag_ecn_combination[4][4] = {
00268     // We MUST preserve the ECN codepoint when all fragments match.
00269     [IP_ECN_NOT_ECT][IP_ECN_NOT_ECT] = IP_ECN_NOT_ECT,
00270     [IP_ECN_ECT_0  ][IP_ECN_ECT_0  ] = IP_ECN_ECT_0,
00271     [IP_ECN_ECT_1  ][IP_ECN_ECT_1  ] = IP_ECN_ECT_1,
00272     [IP_ECN_CE     ][IP_ECN_CE     ] = IP_ECN_CE,
00273 
00274     // We MUST set CE if any fragment has CE...
00275     [IP_ECN_CE     ][IP_ECN_ECT_0  ] = IP_ECN_CE,
00276     [IP_ECN_CE     ][IP_ECN_ECT_1  ] = IP_ECN_CE,
00277     [IP_ECN_ECT_0  ][IP_ECN_CE     ] = IP_ECN_CE,
00278     [IP_ECN_ECT_1  ][IP_ECN_CE     ] = IP_ECN_CE,
00279 
00280     // ...except we MUST drop the packet if we see CE + Not-ECT.
00281     [IP_ECN_CE     ][IP_ECN_NOT_ECT] = IP_ECN__ILLEGAL,
00282     [IP_ECN_NOT_ECT][IP_ECN_CE     ] = IP_ECN__ILLEGAL,
00283 
00284     // For the remaining cases, RFC 3168 leaves us free to do anything.
00285     // To make the above CE+Not-ECT rule work in all delivery orders, with
00286     // intervening ECT fragments, Not-ECT overrides ECT.
00287     [IP_ECN_NOT_ECT][IP_ECN_ECT_0  ] = IP_ECN_NOT_ECT,
00288     [IP_ECN_NOT_ECT][IP_ECN_ECT_1  ] = IP_ECN_NOT_ECT,
00289     [IP_ECN_ECT_0  ][IP_ECN_NOT_ECT] = IP_ECN_NOT_ECT,
00290     [IP_ECN_ECT_1  ][IP_ECN_NOT_ECT] = IP_ECN_NOT_ECT,
00291 
00292     // Last two cases - RFC 3168 doesn't specify, but we follow the
00293     // model of RFC 6040 and RFC 6660 which for tunnelling make ECT(1)
00294     // take priority, as it can be used as a mild congestion indication.
00295     [IP_ECN_ECT_0  ][IP_ECN_ECT_1  ] = IP_ECN_ECT_1,
00296     [IP_ECN_ECT_1  ][IP_ECN_ECT_0  ] = IP_ECN_ECT_1
00297 };
00298 
00299 /*
00300  * RFC 2460 notes:
00301  *
00302  *  fragment packets:
00303  *
00304  *  +------------------+--------+--------------+
00305  *  |  Unfragmentable  |Fragment|    first     |
00306  *  |       Part       | Header |   fragment   |
00307  *  +------------------+--------+--------------+
00308  *
00309  *  +------------------+--------+--------------+
00310  *  |  Unfragmentable  |Fragment|    second    |
00311  *  |       Part       | Header |   fragment   |
00312  *  +------------------+--------+--------------+
00313  *                        o
00314  *                        o
00315  *                        o
00316  *  +------------------+--------+----------+
00317  *  |  Unfragmentable  |Fragment|   last   |
00318  *  |       Part       | Header | fragment |
00319  *  +------------------+--------+----------+
00320  *
00321  *  reassembled original packet:
00322  *
00323  *  +------------------+----------------------//------------------------+
00324  *  |  Unfragmentable  |                 Fragmentable                   |
00325  *  |       Part       |                     Part                       |
00326  *  +------------------+----------------------//------------------------+
00327  *
00328  *    The following rules govern reassembly:
00329  *
00330  *     An original packet is reassembled only from fragment packets that
00331  *     have the same Source Address, Destination Address, and Fragment
00332  *     Identification.
00333  *
00334  *     The Unfragmentable Part of the reassembled packet consists of all
00335  *     headers up to, but not including, the Fragment header of the first
00336  *     fragment packet (that is, the packet whose Fragment Offset is
00337  *     zero), with the following two changes:
00338  *
00339  *        The Next Header field of the last header of the Unfragmentable
00340  *        Part is obtained from the Next Header field of the first
00341  *        fragment's Fragment header.
00342  *
00343  *        The Payload Length of the reassembled packet is computed from
00344  *        the length of the Unfragmentable Part and the length and offset
00345  *        of the last fragment.
00346  *
00347  *   Fragment Header
00348  *
00349  *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
00350  *  |  Next Header  |   Reserved    |      Fragment Offset    |Res|M|
00351  *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
00352  *  |                         Identification                        |
00353  *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
00354  *
00355  *  Fragment Offset      13-bit unsigned integer.  The offset, in 8-octet
00356  *                       units, of the data following this header,
00357  *                       relative to the start of the Fragmentable Part
00358  *                       of the original packet.
00359  *
00360  *  M flag               1 = more fragments; 0 = last fragment.
00361  */
00362 
00363 /* On entry: frag_hdr -> fragment header
00364  *           nh_ptr -> Next Header octet in previous header
00365  *           payload_length = length of remaining data, including this header
00366  *           buffer data pointers describe entire IP fragment packet
00367  *           buffer src/dst filled in
00368  * Returns: Either reassembled packet (B_DIR_UP | B_TO_IPV6_FWD)
00369  *          or ICMP error response (B_DIR_DOWN | B_TO_ICMP)
00370  *          or NULL (fragment accepted, reassembly in progress)
00371  */
00372 buffer_t *ipv6_frag_up(buffer_t *frag_buf, const uint8_t *frag_hdr, uint8_t *nh_ptr, uint16_t payload_length)
00373 {
00374     if (payload_length <= 8) {
00375         return icmpv6_error(frag_buf, NULL, ICMPV6_TYPE_ERROR_PARAMETER_PROBLEM, ICMPV6_CODE_PARAM_PRB_HDR_ERR, 4);
00376     }
00377 
00378     payload_length -= 8;
00379 
00380     uint8_t *ip_hdr = buffer_data_pointer(frag_buf);
00381     uint16_t unfrag_len = frag_hdr - ip_hdr;
00382     uint16_t fragment_first = common_read_16_bit(frag_hdr + 2) & 0xFFF8;
00383     uint16_t fragment_last = fragment_first + payload_length - 1;
00384     bool more = frag_hdr[3] & 1;
00385 
00386     /* All fragments apart from last must be multiples of 8 */
00387     if (more && (payload_length & 7)) {
00388         return icmpv6_error(frag_buf, NULL, ICMPV6_TYPE_ERROR_PARAMETER_PROBLEM, ICMPV6_CODE_PARAM_PRB_HDR_ERR, 4);
00389     }
00390 
00391     /* Check we don't overflow 16-bit size */
00392     if (fragment_last < fragment_first) {
00393         return icmpv6_error(frag_buf, NULL, ICMPV6_TYPE_ERROR_PARAMETER_PROBLEM, ICMPV6_CODE_PARAM_PRB_HDR_ERR, frag_hdr + 2 - ip_hdr);
00394     }
00395 
00396     if (fragment_first == 0) {
00397         /* Replace "Next Header" byte in previous header */
00398         *nh_ptr = frag_hdr[0];
00399 
00400         if (!more) {
00401             /* Atomic fragment handling - strip out the fragment header.
00402              * See RFC 6946, which says that we require a special case for atomic
00403              * fragments:
00404              *
00405              *   A host that receives an IPv6 packet that includes a Fragment
00406              *   Header with the "Fragment Offset" equal to 0 and the "M" flag
00407              *   equal to 0 MUST process that packet in isolation from any other
00408              *   packets/fragments, even if such packets/fragments contain the same
00409              *   set {IPv6 Source Address, IPv6 Destination Address, Fragment
00410              *   Identification}.
00411              *
00412              * (Conceivably, we could just skip the header and keep parsing,
00413              * but this keeps it consistent with real fragments).
00414              */
00415 
00416             /* Move unfragmentable part up, eliminating fragment header */
00417             memmove(ip_hdr + 8, ip_hdr, unfrag_len);
00418             ip_hdr = buffer_data_strip_header(frag_buf, 8);
00419 
00420             /* Reduce Payload Length in IP header */
00421             uint16_t len = common_read_16_bit(ip_hdr + 4);
00422             common_write_16_bit(len - 8, ip_hdr + 4);
00423 
00424             frag_buf->offset  = unfrag_len;
00425             frag_buf->options .ip_extflags  |= IPEXT_FRAGMENT;
00426             frag_buf->info  = (buffer_info_t)(B_DIR_UP | B_TO_IPV6_FWD | B_FROM_IPV6_FWD);
00427             return frag_buf;
00428         }
00429     }
00430 
00431     /* Adjust buffer pointer to point to fragment data. ip_ptr remains
00432      * pointing at IP header, which we need for first fragment. */
00433     buffer_data_pointer_set(frag_buf, frag_hdr + 8);
00434 
00435     /* Locate or create datagram assembly buffer */
00436     uint32_t id = common_read_32_bit(frag_hdr + 4);
00437     ip_fragmented_datagram_t *dgram = ip_frag_dgram_lookup(frag_buf, id, unfrag_len);
00438     if (!dgram || dgram->discard) {
00439         protocol_stats_update(STATS_IP_RX_DROP, 1);
00440         return buffer_free(frag_buf);
00441     }
00442 
00443     buffer_t *dgram_buf = dgram->buf ;
00444 
00445     /* Length checks. For predictability, best to ensure we always try to
00446      * respect IPV6_MRU as a hard limit, which means a bit of care. */
00447     uint16_t limit;
00448     if (dgram_buf->buf_ptr  == dgram->fragmentable) {
00449         /* Haven't yet got final header size - good enough to do rough check;
00450          * we have enough buffer to fit MRU - min IP header size */
00451         limit = ipv6_frag_mru - 40;
00452     } else {
00453         /* We do know final header size, so can do precise MRU check */
00454         limit = ipv6_frag_mru - (dgram->fragmentable - dgram_buf->buf_ptr );
00455     }
00456     /* Make sure we have room for following data, and hence a hole descriptor */
00457     if (more) {
00458         limit -= 8;
00459     }
00460 
00461     if (fragment_last >= limit) {
00462         /* Fragment would make datagram exceed MRU */
00463         tr_warn("Datagram size %u too big", fragment_last + 1);
00464 fail:
00465         invalidate_datagram(dgram);
00466         protocol_stats_update(STATS_IP_RX_DROP, 1);
00467         return buffer_free(frag_buf);
00468     }
00469 
00470     /* Hole-filling algorithm, basically as per RFC815, but with added
00471      * checks for overlap (RFC 5722). We keep the hole list sorted to aid this,
00472      * (and Time Exceeded messages) - something RFC 815 doesn't strictly require.
00473      */
00474     uint16_t hole_off = dgram->first_hole;
00475     uint16_t *prev_ptr = &dgram->first_hole;
00476     bool okay = false;
00477     do {
00478         hole_t *hole = hole_pointer(dgram, hole_off);
00479         uint_fast16_t hole_first = hole->first;
00480         uint_fast16_t hole_last = hole->last;
00481 
00482         /* Fragment is beyond this hole - move to next (RFC 815 step 2) */
00483         if (fragment_first > hole_last) {
00484             prev_ptr = &hole->next;
00485             hole_off = hole->next;
00486             continue;
00487         }
00488 
00489         /* RFC 815 step 3 would have us check for fragment_last < hole_first,
00490          * and skipping, but we don't need/want to do that - it's covered by
00491          * the next check.
00492          */
00493 
00494         /* Unlike RFC 815, we now check for any overlap (RFC 5722) */
00495         if (fragment_first < hole_first || fragment_last > hole_last) {
00496             break;
00497         }
00498 
00499         /* Unhook this hole from the hole list (RFC 815 step 4) */
00500         delete_hole(dgram, hole_off, prev_ptr);
00501         hole = NULL;
00502 
00503         /* Create a new hole in front if necessary (RFC 815 step 5) */
00504         if (fragment_first > hole_first) {
00505             prev_ptr = &create_hole(dgram, hole_first, fragment_first - 1, prev_ptr)->next;
00506         }
00507 
00508         if (more) {
00509             /* Create a following hole if necessary (RFC 815 step 6) */
00510             if (fragment_last < hole_last) {
00511                 create_hole(dgram, fragment_last + 1, hole_last, prev_ptr);
00512             }
00513         } else {
00514             /* If we already have some later data, it's broken. */
00515             if (hole_last != 0xffff) {
00516                 break;
00517             }
00518             dgram->had_last = true;
00519         }
00520 
00521         /* Update end of buffer, if this is the last-placed fragment so far */
00522         if (hole_last == 0xffff) {
00523             dgram_buf->buf_end  = dgram->fragmentable + fragment_last + 1;
00524         }
00525 
00526         /* Unlike RFC 815, we're now done. We don't allow overlaps, so we finish
00527          * as soon as we identify one hole that it entirely or partially fills */
00528         okay = true;
00529         break;
00530     } while (hole_off != 0xffff);
00531 
00532     /* If /any/ reassembly problems - overlaps etc - abandon the datagram */
00533     if (!okay) {
00534         tr_warn("Reassembly error");
00535         goto fail;
00536     }
00537 
00538     /* Hole list updated, can now copy in the fragment data */
00539     memcpy(dgram_buf->buf  + dgram->fragmentable + fragment_first, buffer_data_pointer(frag_buf), fragment_last + 1 - fragment_first);
00540 
00541     /* Combine the "improper security" flags, so reassembled buffer's flag is set if any fragment wasn't secure */
00542     /* XXX should have some sort of overall "merge buffer metadata" routine handling this and whatever else */
00543     dgram_buf->options .ll_security_bypass_rx  |= frag_buf->options .ll_security_bypass_rx ;
00544 
00545     /* Combine the ECN field */
00546     dgram->ecn = frag_ecn_combination[dgram->ecn][frag_buf->options .traffic_class  & IP_TCLASS_ECN_MASK];
00547     if (dgram->ecn == IP_ECN__ILLEGAL) {
00548         tr_warn("Illegal ECN");
00549         goto fail;
00550     }
00551 
00552     /* Overlap checks above ensure first-packet processing only happens once */
00553     if (fragment_first == 0) {
00554         /* Now know final header size, so repeat MRU check */
00555         uint16_t frag_so_far = dgram_buf->buf_end  - dgram->fragmentable;
00556         if (!dgram->had_last) {
00557             /* This fudge factor represents our expectation of more data, and
00558              * also makes sure we memmove the trailing hole descriptor. */
00559             frag_so_far += 8;
00560         }
00561         if (unfrag_len + frag_so_far > ipv6_frag_mru) {
00562             tr_warn("Datagram size %u too big", unfrag_len + frag_so_far);
00563             goto fail;
00564         }
00565 
00566         if (dgram_buf->buf_ptr  < unfrag_len) {
00567             /* Didn't reserve enough space for header. Shuffle data up into what will be final position */
00568             /* We know we have buffer room, thanks to previous checks against IPV6_MRU */
00569             uint16_t new_frag_offset = dgram_buf->size  - ipv6_frag_mru + unfrag_len;
00570             memmove(dgram_buf->buf  + new_frag_offset, dgram_buf->buf  + dgram->fragmentable, frag_so_far);
00571             dgram->buf->buf_ptr = dgram->fragmentable = new_frag_offset;
00572         }
00573 
00574         /* Move the start pointer, and copy the header */
00575         memcpy(buffer_data_reserve_header(dgram_buf, unfrag_len), ip_hdr, unfrag_len);
00576 
00577         /* Clone the buffer header from this first fragment, preserving only size + pointers */
00578         /* Also the security flag, already merged above */
00579         bool buf_security = dgram_buf->options .ll_security_bypass_rx ;
00580         buffer_copy_metadata(dgram_buf, frag_buf, true);
00581         dgram_buf->options .ll_security_bypass_rx  = buf_security;
00582         /* Mark position of fragment header - allows skipping previous headers */
00583         dgram_buf->offset  = unfrag_len;
00584         dgram_buf->options .ip_extflags  |= IPEXT_FRAGMENT;
00585     }
00586 
00587     /* Free the original fragment buffer - we've extracted its juice */
00588     buffer_free(frag_buf);
00589 
00590     /* Thanks to David Clark, completion check is now simple */
00591     if (dgram->first_hole != 0xffff) {
00592         /* Not yet complete - processing finished on this fragment */
00593         return NULL;
00594     }
00595 
00596     /* First 8 bytes of the IP header, currently from the first fragment,
00597      * that we need to patch:
00598      * .               .               .               .               .
00599      * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
00600      * |Version|   DSCP    |ECN|           Flow Label                  |
00601      * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
00602      * |         Payload Length        |  Next Header  |   Hop Limit   |
00603      * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
00604      */
00605 
00606     /* Fill in the combined ECN - 2 bits in the middle of the second byte */
00607     buffer_data_pointer(dgram_buf)[1] &= ~(3 << 4);
00608     buffer_data_pointer(dgram_buf)[1] |= (dgram->ecn << 4);
00609 
00610     /* Fill in final IP header length */
00611     common_write_16_bit(buffer_data_length(dgram_buf) - 40, buffer_data_pointer(dgram_buf) + 4);
00612 
00613     /* We've completed the datagram. Free the assembly structures (but not the buffer!) */
00614     dgram->buf = NULL;
00615     free_datagram(dgram);
00616 
00617     /* Send on the completed datagram */
00618     dgram_buf->info  = (buffer_info_t)(B_DIR_UP | B_TO_IPV6_FWD | B_FROM_IPV6_FWD);
00619     return dgram_buf;
00620 }
00621 #endif /* NO_IP_FRAGMENT_RX */
00622 
00623 
00624 /*                         FRAGMENT CREATION
00625  *
00626  * Allow fragment TX to be disabled for constrained systems.
00627  * This would violate RFC 6434, which says all IPv6 nodes must be able to
00628  * generate fragment headers. (Even if our only link has the minimum 1280-byte
00629  * MTU, we may still need to insert a fragment header).
00630  */
00631 #ifndef NO_IP_FRAGMENT_TX
00632 buffer_t *ipv6_frag_down(buffer_t *dgram_buf)
00633 {
00634     uint8_t *ip_ptr = buffer_data_pointer(dgram_buf);
00635     uint16_t pmtu = ipv6_mtu(dgram_buf);
00636     uint8_t *frag_hdr;
00637     buffer_list_t frags_list = NS_LIST_INIT(frags_list);
00638     ipv6_destination_t *dest = ipv6_destination_lookup_or_create(dgram_buf->dst_sa .address , dgram_buf->interface ->id);
00639     if (!dest) {
00640         return buffer_free(dgram_buf);
00641     }
00642 
00643     /* Skip over HbH and Routing headers to reach fragmentable part. Assume
00644      * packet well-formed (we created it...).
00645      */
00646     uint8_t *nh_ptr = &ip_ptr[6];
00647     uint8_t nh = *nh_ptr;
00648     uint8_t *fragmentable = ip_ptr + 40;
00649     while (nh == IPV6_NH_HOP_BY_HOP || nh == IPV6_NH_ROUTING) {
00650         nh_ptr = &fragmentable[0];
00651         nh = *nh_ptr;
00652         fragmentable += (fragmentable[1] + 1) * 8;
00653     }
00654     uint16_t unfrag_len = fragmentable - ip_ptr;
00655     uint16_t fragmentable_len = buffer_data_end(dgram_buf) - fragmentable;
00656 
00657     *nh_ptr = IPV6_NH_FRAGMENT;
00658 
00659     /* Special case for atomic fragments (caused by a small PMTU) */
00660     /* Note that we DO have the option of actually fragmenting and obeying
00661      * a small PMTU, which would avoid this special case.
00662      */
00663     if (buffer_data_length(dgram_buf) <= IPV6_MIN_LINK_MTU - 8) {
00664         dgram_buf = buffer_headroom(dgram_buf, 8);
00665         if (!dgram_buf) {
00666             return NULL;
00667         }
00668 
00669         /* Move unfragmentable section back 8 bytes; increase IP length field */
00670         ip_ptr = buffer_data_reserve_header(dgram_buf, 8);
00671         memmove(ip_ptr, ip_ptr + 8, unfrag_len);
00672         common_write_16_bit(common_read_16_bit(ip_ptr + 4) + 8, ip_ptr + 4);
00673 
00674         /* Write atomic fragment header into the gap */
00675         frag_hdr = ip_ptr + unfrag_len;
00676         frag_hdr[0] = nh;
00677         frag_hdr[1] = 0;
00678         common_write_16_bit(0, frag_hdr + 2);
00679         common_write_32_bit(++dest->fragment_id, frag_hdr + 4);
00680         return dgram_buf;
00681     }
00682 
00683     /* We won't fragment below minimum MTU. (Although we could...) */
00684     if (pmtu < IPV6_MIN_LINK_MTU) {
00685         pmtu = IPV6_MIN_LINK_MTU;
00686     }
00687 
00688     /* Check for silly situation - can't fit any fragment data (8 for fragment
00689      * header, 8 for minimum fragment payload) */
00690     if (unfrag_len + 8 + 8 > pmtu) {
00691         goto failed;
00692     }
00693 
00694     ++dest->fragment_id;
00695 
00696     /* RFC 7112 requires the entire header chain to be in the first fragment. */
00697     /* We don't explicitly check for this, but it would be spectacularly unlikely. */
00698     /* I think it would require a super-sized routing header */
00699 
00700     /* This is much simpler (more simplistic?) than the 6LoWPAN fragmentation,
00701      * which relies on co-operation with lower layers to ensure it works one
00702      * fragment at a time. We make all the fragments in one go, meaning higher
00703      * overhead, but IP fragmentation should be pretty rare - we don't need
00704      * to optimise this.
00705      */
00706     for (uint16_t frag_offset = 0; fragmentable_len;) {
00707         /* How much going in this packet? */
00708         uint16_t frag_len = (pmtu - unfrag_len - 8);
00709         if (fragmentable_len > frag_len) {
00710             frag_len &= ~7;
00711         } else {
00712             frag_len = fragmentable_len;
00713         }
00714 
00715         buffer_t *frag_buf = buffer_get(unfrag_len + 8 + frag_len);
00716         if (!frag_buf) {
00717             goto failed;
00718         }
00719 
00720         /* Clone the buffer header, apart from size+ptr */
00721         buffer_copy_metadata(frag_buf, dgram_buf, false);
00722 
00723         /* We splat the socket, so no upper-layer callbacks from the fragments */
00724         buffer_socket_set(frag_buf, NULL);
00725 
00726         /* Construct the new packet contents */
00727         buffer_data_length_set(frag_buf, unfrag_len + 8 + frag_len);
00728         uint8_t *ptr = buffer_data_pointer(frag_buf);
00729         /* Unfragmentable part */
00730         memcpy(ptr, ip_ptr, unfrag_len);
00731         /* Adjust length in IP header */
00732         common_write_16_bit(unfrag_len - 40 + 8 + frag_len, ptr + 4);
00733         /* Fragment header */
00734         frag_hdr = ptr + unfrag_len;
00735         frag_hdr[0] = nh;
00736         frag_hdr[1] = 0;
00737         common_write_16_bit(frag_offset | (frag_len != fragmentable_len), frag_hdr + 2);
00738         common_write_32_bit(dest->fragment_id, frag_hdr + 4);
00739         /* Fragment data */
00740         memcpy(frag_hdr + 8, fragmentable + frag_offset, frag_len);
00741         fragmentable_len -= frag_len;
00742         frag_offset += frag_len;
00743 
00744         /* Add to our fragment list */
00745         ns_list_add_to_start(&frags_list, frag_buf);
00746     }
00747 
00748     /* Now have a list of fragment buffers - report "success" to the socket */
00749     /* (TCP may save the dgram payload here? It strips off headers, so okay...) */
00750     socket_tx_buffer_event_and_free(dgram_buf, SOCKET_TX_DONE);
00751 
00752     /* Push the fragments. Backwards, as it happens, but who cares? */
00753     ns_list_foreach_safe(buffer_t, f, &frags_list) {
00754         ns_list_remove(&frags_list, f);
00755         protocol_push(f);
00756     }
00757 
00758     return NULL;
00759 
00760 failed:
00761     /* Failed to allocate a buffer - no point sending any fragments if we
00762      * can't send all.
00763      */
00764     ns_list_foreach_safe(buffer_t, f, &frags_list) {
00765         ns_list_remove(&frags_list, f);
00766         buffer_free(f);
00767     }
00768 
00769     socket_tx_buffer_event_and_free(dgram_buf, SOCKET_NO_RAM);
00770     return NULL;
00771 }
00772 #endif /* NO_IP_FRAGMENT_TX */