WAND Trace processing  4.0.5
format_dpdk.h
1 #ifndef LIBTRACE_FORMAT_DPDK_H_
2 #define LIBTRACE_FORMAT_DPDK_H_
3 
4 #include <libtrace.h>
5 #include "libtrace_int.h"
6 
7 /* We can deal with any minor differences by checking the RTE VERSION
8  * Typically DPDK backports some fixes (typically for building against
9  * newer kernels) to the older version of DPDK.
10  *
11  * These get released with the rX suffix. The following macros where added
12  * in these new releases.
13  *
14  * Below this is a log of version that required changes to the libtrace
15  * code (that we still attempt to support).
16  *
17  * DPDK 16.04 or newer is recommended.
18  * However 1.6 and newer are still likely supported.
19  */
20 #include <rte_eal.h>
21 #include <rte_version.h>
22 #ifndef RTE_VERSION_NUM
23 # define RTE_VERSION_NUM(a,b,c,d) ((a) << 24 | (b) << 16 | (c) << 8 | (d))
24 #endif
25 #ifndef RTE_VER_PATCH_RELEASE
26 # define RTE_VER_PATCH_RELEASE 0
27 #endif
28 #ifndef RTE_VERSION
29 # define RTE_VERSION RTE_VERSION_NUM(RTE_VER_MAJOR,RTE_VER_MINOR, \
30  RTE_VER_PATCH_LEVEL, RTE_VER_PATCH_RELEASE)
31 #endif
32 
33 /* 1.6.0r2 :
34  * rte_eal_pci_set_blacklist() is removed
35  * device_list is renamed to pci_device_list
36  * In the 1.7.0 release rte_eal_pci_probe is called by rte_eal_init
37  * as such we do apply the whitelist before rte_eal_init.
38  * This also works correctly with DPDK 1.6.0r2.
39  *
40  * Replaced by:
41  * rte_devargs (we can simply whitelist)
42  */
43 #if RTE_VERSION <= RTE_VERSION_NUM(1, 6, 0, 1)
44 # define DPDK_USE_BLACKLIST 1
45 #else
46 # define DPDK_USE_BLACKLIST 0
47 #endif
48 
49 /*
50  * 1.7.0 :
51  * rte_pmd_init_all is removed
52  *
53  * Replaced by:
54  * Nothing, no longer needed
55  */
56 #if RTE_VERSION < RTE_VERSION_NUM(1, 7, 0, 0)
57 # define DPDK_USE_PMD_INIT 1
58 #else
59 # define DPDK_USE_PMD_INIT 0
60 #endif
61 
62 /* 1.7.0-rc3 :
63  *
64  * Since 1.7.0-rc3 rte_eal_pci_probe is called as part of rte_eal_init.
65  * Somewhere between 1.7 and 1.8 calling it twice broke so we should not call
66  * it twice.
67  */
68 #if RTE_VERSION < RTE_VERSION_NUM(1, 7, 0, 3)
69 # define DPDK_USE_PCI_PROBE 1
70 #else
71 # define DPDK_USE_PCI_PROBE 0
72 #endif
73 
74 /* 1.8.0-rc1 :
75  * LOG LEVEL is a command line option which overrides what
76  * we previously set it to.
77  */
78 #if RTE_VERSION >= RTE_VERSION_NUM(1, 8, 0, 1)
79 # define DPDK_USE_LOG_LEVEL 1
80 #else
81 # define DPDK_USE_LOG_LEVEL 0
82 #endif
83 
84 /* 1.8.0-rc2
85  * rx/tx_conf thresholds can be set to NULL in rte_eth_rx/tx_queue_setup
86  * this uses the default values, which are better tuned per device
87  * See issue #26
88  */
89 #if RTE_VERSION >= RTE_VERSION_NUM(1, 8, 0, 2)
90 # define DPDK_USE_NULL_QUEUE_CONFIG 1
91 #else
92 # define DPDK_USE_NULL_QUEUE_CONFIG 0
93 #endif
94 
95 /* 2.0.0-rc1
96  * Unifies RSS hash between cards
97  */
98 #if RTE_VERSION >= RTE_VERSION_NUM(2, 0, 0, 1)
99 # define RX_RSS_FLAGS (ETH_RSS_IP | ETH_RSS_UDP | ETH_RSS_TCP | \
100  ETH_RSS_SCTP)
101 #else
102 # define RX_RSS_FLAGS (ETH_RSS_IPV4_UDP | ETH_RSS_IPV6 | ETH_RSS_IPV4 | \
103  ETH_RSS_IPV4_TCP | ETH_RSS_IPV6_TCP |\
104  ETH_RSS_IPV6_UDP)
105 #endif
106 
107 /* v16.07-rc1 - deprecated
108  * rte_mempool_avail_count to replace rte_mempool_count
109  * rte_mempool_in_use_count to replace rte_mempool_free_count
110  */
111 #if RTE_VERSION < RTE_VERSION_NUM(16, 7, 0, 1)
112 #define rte_mempool_avail_count rte_mempool_count
113 #define rte_mempool_in_use_count rte_mempool_free_count
114 #endif
115 
116 /* 17.05-rc1 deprecated, 17.08 removed
117  * rte_set_log_level -> rte_log_set_global_level
118  */
119 #if RTE_VERSION < RTE_VERSION_NUM(17, 5, 0, 1)
120 #define rte_log_set_global_level rte_set_log_level
121 #endif
122 
123 /* 17.11-rc1 increases port size from 8 to 16bits
124  */
125 #if RTE_VERSION >= RTE_VERSION_NUM(17, 11, 0, 1)
126 typedef uint16_t portid_t;
127 #else
128 typedef uint8_t portid_t;
129 #endif
130 
131 
132 #include <rte_per_lcore.h>
133 #include <rte_debug.h>
134 #include <rte_errno.h>
135 #include <rte_common.h>
136 #include <rte_log.h>
137 #include <rte_memcpy.h>
138 #include <rte_prefetch.h>
139 #include <rte_branch_prediction.h>
140 #include <rte_pci.h>
141 #include <rte_ether.h>
142 #include <rte_ethdev.h>
143 #include <rte_ring.h>
144 #include <rte_mempool.h>
145 #include <rte_mbuf.h>
146 #include <rte_launch.h>
147 #include <rte_lcore.h>
148 #include <rte_per_lcore.h>
149 #include <rte_cycles.h>
150 #include <pthread.h>
151 #ifdef __FreeBSD__
152 #include <pthread_np.h>
153 #endif
154 
155 
156 /* 16.04-rc3 ETH_LINK_SPEED_X are replaced with ETH_SPEED_NUM_X.
157  * ETH_LINK_SPEED_ are reused as flags, ugly.
158  * We use the new way in this code.
159  */
160 #ifndef ETH_SPEED_NUM_1G
161  #define ETH_SPEED_NUM_1G ETH_LINK_SPEED_1000
162  #define ETH_SPEED_NUM_10G ETH_LINK_SPEED_10G
163  #define ETH_SPEED_NUM_20G ETH_LINK_SPEED_20G
164  #define ETH_SPEED_NUM_40G ETH_LINK_SPEED_40G
165 #endif
166 
167 /* The default size of memory buffers to use - This is the max size of standard
168  * ethernet packet less the size of the MAC CHECKSUM, rounded up to the
169  * next power of 2, plus the RTE_PKTMBUF_HEADROOM. */
170 #define RX_MBUF_SIZE (2048 + RTE_PKTMBUF_HEADROOM)
171 
172 /* The minimum number of memory buffers per queue tx or rx. Based on
173  * the requirement of the memory pool with 128 per thread buffers, needing
174  * at least 128*1.5 = 192 buffers. Our code allocates 128*2 to be safe.
175  */
176 #define MIN_NB_BUF 128
177 
178 /* Number of receive memory buffers to use
179  * By default this is limited by driver to 4k and must be a multiple of 128.
180  * A modification can be made to the driver to remove this limit.
181  * This can be increased in the driver and here.
182  * Should be at least MIN_NB_BUF.
183  * We choose 2K rather than 4K because it enables the usage of sse vector
184  * drivers which are significantly faster than using the larger buffer.
185  */
186 #define NB_RX_MBUF (4096/2)
187 
188 /* Number of send memory buffers to use.
189  * Same limits apply as those to NB_TX_MBUF.
190  */
191 #define NB_TX_MBUF 1024
192 
193 /* The size of the PCI blacklist needs to be big enough to contain
194  * every PCI device address (listed by lspci every bus:device.function tuple).
195  */
196 #define BLACK_LIST_SIZE 50
197 
198 /* The maximum number of characters the mempool name can be */
199 #define MEMPOOL_NAME_LEN 20
200 
201 /* For single threaded libtrace we read packets as a batch/burst
202  * this is the maximum size of said burst */
203 #define BURST_SIZE 32
204 
205 
206 /* ~~~~~~~~~~~~~~~~~~~~~~ Advance settings ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
207  * THESE MAY REQUIRE MODIFICATIONS TO INTEL DPDK
208  *
209  * Make sure you understand what these are doing before enabling them.
210  * They might make traces incompatible with other builds etc.
211  *
212  * These are also included to show how to do somethings which aren't
213  * obvious in the DPDK documentation.
214  */
215 
216 /* Print verbose messages to stderr */
217 #define DEBUG 0
218 
219 /* Use clock_gettime() for nanosecond resolution rather than gettimeofday()
220  * only turn on if you know clock_gettime is a vsyscall on your system
221  * otherwise could be a large overhead. Again gettimeofday() should be
222  * vsyscall also if it's not you should seriously consider updating your
223  * kernel.
224  */
225 #ifdef HAVE_CLOCK_GETTIME
226 /* You can turn this on (set to 1) to prefer clock_gettime */
227 #define USE_CLOCK_GETTIME 1
228 #else
229 /* DON'T CHANGE THIS !!! */
230 #define USE_CLOCK_GETTIME 0
231 #endif
232 
233 /* This is fairly safe to turn on - currently there appears to be a 'bug'
234  * in DPDK that will remove the checksum by making the packet appear 4bytes
235  * smaller than what it really is. Most formats don't include the checksum
236  * hence writing out a port such as int: ring: and dpdk: assumes there
237  * is no checksum and will attempt to write the checksum as part of the
238  * packet
239  */
240 #define GET_MAC_CRC_CHECKSUM 0
241 
242 /* This requires a modification of the pmd drivers (inside Intel DPDK)
243  * TODO this requires updating (packet sizes are wrong TS most likely also)
244  */
245 #define HAS_HW_TIMESTAMPS_82580 0
246 
247 #if HAS_HW_TIMESTAMPS_82580
248 # define TS_NBITS_82580 40
249 /* The maximum on the +ve or -ve side that we can be, make it half way */
250 # define MAXSKEW_82580 ((uint64_t) (.5 * (double)(1ull<<TS_NBITS_82580)))
251 #define WITHIN_VARIANCE(v1,v2,var) (((v1) - (var) < (v2)) && ((v1) + (var) > (v2)))
252 #endif
253 
254 /* As per Intel 82580 specification - mismatch in 82580 datasheet
255  * it states ts is stored in Big Endian, however its actually Little */
257  uint64_t reserved;
258  uint64_t timestamp; /* Little Endian only lower 40 bits are valid */
259 };
260 
261 enum paused_state {
262  DPDK_NEVER_STARTED,
263  DPDK_RUNNING,
264  DPDK_PAUSED,
265 };
266 
268 {
269  uint16_t queue_id;
270  uint64_t ts_last_sys; /* System timestamp of our most recent packet in nanoseconds */
271  struct rte_mempool *mempool;
272  int lcore;
273 #if HAS_HW_TIMESTAMPS_82580
274  /* Timestamping only relevant to RX */
275  uint64_t ts_first_sys; /* Sytem timestamp of the first packet in nanoseconds */
276  uint32_t wrap_count; /* Number of times the NIC clock has wrapped around completely */
277 #endif
278 } ALIGN_STRUCT(CACHE_LINE_SIZE);
279 
280 #if HAS_HW_TIMESTAMPS_82580
281 #define DPDK_EMPTY_STREAM {-1, 0, NULL, -1, 0, 0}
282 #else
283 #define DPDK_EMPTY_STREAM {-1, 0, NULL, -1}
284 #endif
285 
286 typedef struct dpdk_per_stream_t dpdk_per_stream_t;
287 
288 
289 libtrace_eventobj_t dpdk_trace_event(libtrace_t *trace,
290  libtrace_packet_t *packet);
291 int dpdk_pstart_input (libtrace_t *libtrace);
292 int dpdk_start_input (libtrace_t *libtrace);
293 int dpdk_config_input (libtrace_t *libtrace,
294  trace_option_t option, void *data);
295 int dpdk_init_input (libtrace_t *libtrace);
296 int dpdk_pause_input(libtrace_t * libtrace);
297 int dpdk_fin_input(libtrace_t * libtrace);
298 int dpdk_read_packet (libtrace_t *libtrace, libtrace_packet_t *packet);
299 int dpdk_pregister_thread(libtrace_t *libtrace, libtrace_thread_t *t,
300  bool reading);
301 void dpdk_punregister_thread(libtrace_t *libtrace, libtrace_thread_t *t);
302 void dpdk_get_stats(libtrace_t *trace, libtrace_stat_t *stats);
303 int dpdk_get_framing_length (const libtrace_packet_t *packet) ;
304 int dpdk_read_packet_stream (libtrace_t *libtrace,
305  dpdk_per_stream_t *stream,
307  struct rte_mbuf* pkts_burst[],
308  size_t nb_packets);
309 int dpdk_prepare_packet(libtrace_t *libtrace,
310  libtrace_packet_t *packet, void *buffer,
311  libtrace_rt_types_t rt_type, uint32_t flags);
312 #endif
Trace file processing library header.
A libtrace input trace.
Definition: libtrace_int.h:306
Definition: format_dpdk.h:256
trace_option_t
Valid configuration options for input traces.
Definition: libtrace.h:1306
The libtrace packet structure.
Definition: libtrace.h:543
Information of this thread.
Definition: libtrace_int.h:198
libtrace_rt_types_t
RT protocol packet types.
Definition: libtrace.h:406
Structure returned by libtrace_event explaining what the current event is.
Definition: libtrace.h:1870
Header file containing definitions for structures and functions that are internal.
Definition: message_queue.h:35
Statistic counters are cumulative from the time the trace is started.
Definition: libtrace.h:1432
Flags.
Definition: dagformat.h:81
Definition: format_dpdk.h:267