Paper Citations

@inproceedings {liu-asplos2023,
author = {
  Liu, Jiawei and Lin, Jinkun and Ruffy, Fabian and Tan,
  Cheng and Li, Jinyang and Panda, Aurojit and
  Zhang, Lingming},
title = {NNSmith: Generating Diverse and Valid Test Cases for Deep Learning Compilers},
booktitle={Proceedings of the 27th ACM International Conference on Architectural
 Support for Programming Languages and Operating Systems},
year = {2023},
month = mar,
publisher = {Association for Computing Machinery},
abstract = {Deep-learning (DL) compilers such as TVM and TensorRT are
 increasingly being used to optimize deep neural network (DNN) models to meet
 performance, resource utilization and other requirements. Bugs in these
 compilers can result in models whose semantics differ from the original ones,
 producing incorrect results that corrupt the correctness of downstream
 applications. However, finding bugs in these compilers is challenging due to
 their complexity. In this work, we propose a new fuzz testing approach for
 finding bugs in deep-learning compilers. Our core approach consists of (i)
 generating diverse yet valid DNN test models that can exercise a large part of
 the compiler’s transformation logic using light-weight operator specifications;
 (ii) performing gradient-based search to find model inputs that avoid any
 floating-point exceptional values during model execution, reducing the chance
 of missed bugs or false alarms; and (iii) using differential testing to
 identify bugs. We implemented this approach in NNSmith which has found 72 new
 bugs for TVM, TensorRT, ONNXRuntime, and PyTorch to date. Of these 58 have
 been confirmed and 51 have been fixed by their respective project maintainers.},

@inproceedings {berg-hotnets2021,
author = {
  Berg, Jessica and Ruffy, Fabian and Nguyen, Khanh and Yang,
  Nicholas and Kim, Taegyun and Sivaraman, Anirudh and
  Netravali, Ravi and Narayana, Srinivas},
title = {Snicket: Query-Driven Distributed Tracing},
booktitle = {Proceedings of the Twentieth ACM Workshop on Hot Topics in Networks},
year = {2021},
month = nov,
publisher = {Association for Computing Machinery},
abstract = {Increasing application complexity has caused applications to be
refactored into smaller components known as microservices that communicate with
 each other using RPCs. Distributed tracing has emerged as an important
 debugging tool for such microservice-based applications. Distributed tracing
  follows the journey of a user request from its starting point at the
  application's front-end, through RPC calls made by the front-end to different
  microservices recursively, all the way until a response is constructed and
  sent back to the user. To reduce storage costs, distributed tracing systems
  sample traces before collecting them for subsequent querying, affecting the
  accuracy of queries on the collected traces.We propose an alternative system,
  Snicket, that tightly integrates querying and collection of traces. Snicket
  takes as input a database-style streaming query that expresses the analysis
  the developer wants to perform on the trace data. This query is compiled into
  a distributed collection of microservice extensions that run as
  "bumps-in-the-wire," intercepting RPC requests and responses as they flow
  into and out of microservices. This collection of extensions implements the
  query, performing early filtering and computation on the traces to reduce the
  amount of stored data in a query-specific manner. We show that Snicket is
  expressive in the queries it can support and can update queries fast enough
  for interactive use.},

@inproceedings {ruffy-osdi2020,
title = {Gauntlet: Finding Bugs in Compilers for Programmable Packet Processing},
booktitle = {14th {USENIX} Symposium on Operating Systems Design and Implementation ({OSDI} 20)},
year = {2020},
publisher = {{USENIX} Association},
month = nov,
 abstract = {
  Programmable packet-processing devices such as programmable switches and
  network interface cards are becoming mainstream. These devices are configured
  in a domain-specific language such as P4, using a compiler to translate
  packet-processing programs into instructions for different targets. As networks
  with programmable devices become widespread, it is critical that these compilers
  be dependable. This paper considers the problem of finding bugs in compilers
  for packet processing in the context of P4-16. We introduce domain-specific
  techniques to induce both abnormal termination of the compiler (crash bugs)
  and miscompilation (semantic bugs). We apply these techniques to (1) the
  opensource P4 compiler (P4C) infrastructure, which serves as a common base
  for different P4 back ends; (2) the P4 back end for the P4 reference software
  switch; and (3) the P4 back end for the Barefoot Tofino switch. Across the 3
  platforms, over 8 months of bug finding, our tool Gauntlet detected 96 new and
  distinct bugs (62 crash and 34 semantic), which we confirmed with the respective
  compiler developers. 54 have been fixed (31 crash and 23 semantic); the remaining
  have been assigned to a developer. Our bug-finding efforts also led to 6 P4
  specification changes. We have open sourced Gauntlet at
  and it now runs within P4C’s continuous integration pipeline.

@inproceedings {wang-hotcloud2020,
 author = {Tao Wang and Hang Zhu and Fabian Ruffy and Xin Jin and Anirudh Sivaraman
  and Dan Ports and Aurojit Panda},
 title = {Multitenancy for Fast and Programmable Networks in the Cloud},
 booktitle = {12th {USENIX} Workshop on Hot Topics in Cloud Computing (HotCloud 20)},
 year = {2020},
 address = {Virtual Conference},
 publisher = {{USENIX} Association},
 abstract = {
  Fast and programmable network devices are now readily available, both in the
  form of programmable switches and smart network-interface cards. Going forward,
  we envision that these devices will be widely deployed in the networks of cloud
  providers (e.g., AWS, Azure, and GCP) and exposed as a programmable surface for
  cloud customers—similar to how cloud customers can today rent CPUs, GPUs, FPGAs,
  and ML accelerators. Making this vision a reality requires us to develop a mechanism
  to share the resources of a programmable network device across multiple cloud
  tenants. In other words, we need to provide multitenancy on these devices.
  In this position paper, we design compile and run-time approaches to multitenancy.
  We present preliminary results showing that our design provides both efficient
  resource utilization and isolation of tenant programs from each other.

@inproceedings {ruffy-arxiv2020,
 title={The State of Knowledge Distillation for Classification},
 author={Fabian Ruffy and Karanbir Chahal},
 journal={arXiv preprint arXiv:1912.10850},
 abstract = {
  We survey various knowledge distillation (KD) strategies for simple
  classification tasks and implement a set of techniques that claim
  state-of-the-art accuracy. Our experiments using standardized model
  architectures, fixed compute budgets, and consistent training schedules
  indicate that many of these distillation results are hard to reproduce.
  This is especially apparent with methods using some form of feature
  distillation. Further examination reveals a lack of generalizability where
  these techniques may only succeed for specific architectures and training
  settings. We observe that appropriately tuned classical distillation in
  combination with a data augmentation training scheme gives an orthogonal
  improvement over other techniques. We validate this approach and open-source
  our code.

@inproceedings {mustard-hotcloud2019,
 author = {Craig Mustard and Fabian Ruffy and Anny Gakhokidze and Ivan Beschastnikh
  and Alexandra Fedorova},
 title = {Jumpgate: In-Network Processing as a Service for Data Analytics},
 booktitle = {11th {USENIX} Workshop on Hot Topics in Cloud Computing (HotCloud 19)},
 year = {2019},
 address = {Renton, WA},
 publisher = {{USENIX} Association},
 abstract = {
  In-network processing, where data is processed by special-purpose devices as it
  passes overthe network, is showing great promise at improving application
  performance, in particular for data analytics tasks. However, analytics and
  in-network processing are not yet integrated and widely deployed. This paper
  presents a vision for providing in-network processing as a service to data
  analytics frameworks, and outlines benefits, remaining challenges, and our
  current research directions towards realizing this vision.

 title={Iroko: A Framework to Prototype Reinforcement Learning for Data Center
 Traffic Control},
 author={Ruffy, Fabian and Przystupa, Michael and Beschastnikh, Ivan},
 booktitle={Neural Information Processing Systems 2018},
 abstract = {
  Recent networking research has identified that data-driven congestion control
  (CC) can be more efficient than traditional CC in TCP. Deep reinforcement
  learning (RL), in particular, has the potential to learn optimal network
  policies. However, RL suffers from instability and over-fitting, deficiencies
  which so far render it unacceptable for use in datacenter networks.
  In this paper, we analyze the requirements for RL to succeed in the datacenter
  context. We present a new emulator, Iroko, which we developed to support
  different network topologies, congestion control algorithms, and deployment
  scenarios. Iroko interfaces with the OpenAI gym toolkit, which allows for fast
  and fair evaluation of different RL and traditional CC algorithms under the
  same conditions. We present initial benchmarks on three deep RL algorithms
  compared to TCP New Vegas and DCTCP. Our results show that these algorithms
  are able to learn a CC policy which exceeds the performance of TCP New Vegas
  on a dumbbell and fat-tree topology. We make our emulator open-source and
  publicly available:

 title={Linux Network Programming with P4},
 author={Tu, William and Ruffy, Fabian and Budiu, Mihai},
 booktitle={Linux Plumbers Conference 2018},
 abstract = {
  P4 is a domain-specific language for implementing network data-planes. The P4
  abstraction allows programmers to write network protocols in a generalized
  fashion, without needing to know the configuration specifics of the targeted
  data-plane. The extended Berkeley Packet Filter (eBPF) is a safe virtual
  machine for executing sand-boxed programs in the Linux kernel. eBPF, and its
  extension the eXpress Data Path (XDP), effectively serve as programmable
  data-planes of the kernel. P4C-XDP is a project combining the performance of
  XDP with the generality and usability of P4. In this document, we describe
  how P4 can be translated into eBPF/XDP. We review the fundamental limitations
  of both technologies, analyze the performance of several generated XDP
  programs, and discuss problems we have faced while working on this new

 title={VNF chain allocation and management at data center scale},
 author={Kodirov, Nodir and Bayless, Sam and Ruffy, Fabian and Beschastnikh,
  Ivan and Hoos, Holger H and Hu, Alan J},
 booktitle={Proceedings of the 2018 Symposium on Architectures for Networking
  and Communications Systems},
 abstract = {
  Recent advances in network function virtualization have prompted the research
  community to consider data-center-scale deployments. However, existing tools,
  such as E2 and SOL, limit VNF chain allocation to rack-scale and provide
  limited support for management of allocated chains. We define a narrow API to
  let data center tenants and operators allocate and manage arbitrary VNF chain
  topologies, and we introduce NetPack, a new stochastic placement algorithm,
  to implement this API at data-center-scale. We prototyped the resulting
  system, dubbed Daisy, using the Sonata platform. In data-center-scale
  simulations on realistic scenarios and topologies that are orders of magnitude
  larger than prior work, we achieve in all cases an allocation density within
  96% of a recently introduced, theoretically complete, constraint-solver-based
  placement engine, while being 82x faster on average. In detailed emulation
  with real packet traces, we find that Daisy performs each of our six API
  calls with at most one second of throughput drop.

 title={VNF chain abstraction for cloud service providers},
 author={Kodirov, Nodir and Bayless, Sam and Ruffy, Fabian and Beschastnikh,
  Ivan and Hoos, Holger H and Hu, Alan J},
 booktitle={Proceedings of the 2018 Symposium on Architectures for Networking
  and Communications Systems},
 abstract = {
  We propose a VNF chain abstraction to decouple a tenant's view of the VNF
  chain from the cloud provider's implementation. We motivate the benefits of
  such an abstraction for the cloud provider as well as the tenants, and outline
  the challenges a cloud provider needs to address to make the chain abstraction
  practical. We describe the design requirements and report on our initial

 title={A STRIDE-based Security Architecture for Software-Defined Networking},
 author={Ruffy, Fabian and Hommel, Wolfgang and von Eye, Felix},
 booktitle={Proceedings of the Fifteenth International Conference on Networks},
 abstract = {While the novelty of Software-Defined Networking (SDN) - the
  separation of network control and data planes - is appealing and simple enough
  to foster massive vendor support, the resulting impact on the security of
  communication networks infrastructures and their management may be tremendous.
  The paradigm change affects the entire networking architecture. It involves new
  IP-based management communication protocols, and introduces newly engineered,
  potentially immature and vulnerable implementations in both network components
  and SDN controllers. In this paper, the well-known STRIDE threat model is
  applied to the generic SDN concepts as a basis for the design of a secure SDN
  architecture. The key elements are presented in detail along with a discussion
  of potentially fundamental security flaws in the current SDN concepts.