Paper Citations

@inproceedings{ruffy-hotnets2024,
author = {Ruffy, Fabian and Wang, Zhanghan and Antichi, Gianni and Panda, Aurojit and Sivaraman, Anirudh},
title = {Incremental Specialization of Network Programs},
booktitle = {Proceedings of the Twenty-Third ACM Workshop on Hot Topics in Networks},
year = {2024},
month = nov,
publisher = {Association for Computing Machinery},
abstract = {Programmable network devices process packets using limited time and space. Consequently, much effort has been spent making network programs run as efficiently as possible. One promising line of work focuses on specializing the implementation of a network program to a particular---presumed constant---control-plane configuration. However, while some parts of the control plane configurations are constant for long periods of time, others change frequently, and in bursts (e.g., due to routing table updates).

Thus, any approach that specializes a network program with respect to control-plane configurations must be incremental: it should be able to tell quickly whether a new control-plane update will affect the program's implementation and recompile the program only when its implementation must change. We describe several benefits of such an approach, including reducing resource use on line-rate pipelines and improving the memory footprint of packet classification. We explore our ideas with a prototype, Flay, an incremental partial evaluator that optimizes P4 programs by treating control-plane entries as constant. Flay can reduce resources in implementations of Tofino programs and determine in 100s of milliseconds whether a control-plane update will change a program's implementation. We conclude by outlining several avenues for future work.
},
}

@inproceedings{ruffy-sigcomm2023,
author = {Ruffy, Fabian and Liu, Jed and Kotikalapudi, Prathima and Havel, Vojtěch and Tavante, Hanneli and Sherwood, Rob and Dubina, Vladislav and Peschanenko, Volodymyr and Sivaraman, Anirudh and Foster, Nate},
title = {P4Testgen: An Extensible Test Oracle For P4},
booktitle={Proceedings of the ACM SIGCOMM 2023 Conference. 2023},
year = {2023},
month = sep,
publisher = {Association for Computing Machinery},
abstract = {We present P4Testgen, a test oracle for the P4-16. P4Testgen supports automatic test generation for any P4 target and is designed to be extensible to many P4 targets. It models the complete semantics of the target's packet-processing pipeline including the P4 language, architectures and externs, and target-specific extensions. To handle non-deterministic behaviors and complex externs (e.g., checksums and hash functions), P4Testgen uses taint tracking and concolic execution. It also provides path selection strategies that reduce the number of tests required to achieve full coverage. We have instantiated P4Testgen for the V1model, eBPF, PNA, and Tofino P4 architectures. Each extension required effort commensurate with the complexity of the target. We validated the tests generated by P4Testgen by running them across the entire P4C test suite as well as the programs supplied with the Tofino P4 Studio. Using the tool, we have also confirmed 25 bugs in mature, production toolchains for BMv2 and Tofino.},
}

@inproceedings{liu-asplos2023,
author = {Liu, Jiawei and Lin, Jinkun and Ruffy, Fabian and Tan, Cheng and Li, Jinyang and Panda, Aurojit and Zhang, Lingmin},
title = {NNSmith: Generating Diverse and Valid Test Cases for Deep Learning Compilers},
booktitle={Proceedings of the 28th ACM International Conference on Architectural
 Support for Programming Languages and Operating Systems},
year = {2023},
month = mar,
publisher = {Association for Computing Machinery},
abstract = {Deep-learning (DL) compilers such as TVM and TensorRT are increasingly being used to optimize deep neural network (DNN) models to meet performance, resource utilization and other requirements. Bugs in these compilers can result in models whose semantics differ from the original ones, producing incorrect results that corrupt the correctness of downstream applications. However, finding bugs in these compilers is challenging due to their complexity. In this work, we propose a new fuzz testing approach for finding bugs in deep-learning compilers. Our core approach consists of (i) generating diverse yet valid DNN test models that can exercise a large part of the compiler’s transformation logic using light-weight operator specifications; (ii) performing gradient-based search to find model inputs that avoid any floating-point exceptional values during model execution, reducing the chance of missed bugs or false alarms; and (iii) using differential testing to identify bugs. We implemented this approach in NNSmith which has found 72 new bugs for TVM, TensorRT, ONNXRuntime, and PyTorch to date. Of these 58 have been confirmed and 51 have been fixed by their respective project maintainers.}
}

@inproceedings{berg-hotnets2021,
author = {Berg, Jessica and Ruffy, Fabian and Nguyen, Khanh and Yang, Nicholas and Kim, Taegyun and Sivaraman, Anirudh and Netravali, Ravi and Narayana, Srinivas},
title = {Snicket: Query-Driven Distributed Tracing},
booktitle = {Proceedings of the Twentieth ACM Workshop on Hot Topics in Networks},
year = {2021},
month = nov,
publisher = {Association for Computing Machinery},
abstract = {Increasing application complexity has caused applications to berefactored into smaller components known as microservices that communicate with each other using RPCs. Distributed tracing has emerged as an important debugging tool for such microservice-based applications. Distributed tracing  follows the journey of a user request from its starting point at the  application's front-end, through RPC calls made by the front-end to different  microservices recursively, all the way until a response is constructed and sent back to the user. To reduce storage costs, distributed tracing systems sample traces before collecting them for subsequent querying, affecting the accuracy of queries on the collected traces.We propose an alternative system, Snicket, that tightly integrates querying and collection of traces. Snicket takes as input a database-style streaming query that expresses the analysis the developer wants to perform on the trace data. This query is compiled into a distributed collection of microservice extensions that run as "bumps-in-the-wire," intercepting RPC requests and responses as they flow  into and out of microservices. This collection of extensions implements the query, performing early filtering and computation on the traces to reduce the amount of stored data in a query-specific manner. We show that Snicket is expressive in the queries it can support and can update queries fast enough for interactive use.}
}

@inproceedings{ruffy-osdi2020,
author = {Ruffy, Fabian and Wang, Tao and Sivaraman, Anirudh},
title = {Gauntlet: Finding Bugs in Compilers for Programmable Packet Processing},
booktitle = {14th {USENIX} Symposium on Operating Systems Design and Implementation ({OSDI} 20)},
year = {2020},
publisher = {{USENIX} Association},
month = nov,
 abstract = {
  Programmable packet-processing devices such as programmable switches and network interface cards are becoming mainstream. These devices are configured in a domain-specific language such as P4, using a compiler to translate packet-processing programs into instructions for different targets. As networks with programmable devices become widespread, it is critical that these compilers be dependable. This paper considers the problem of finding bugs in compilers for packet processing in the context of P4-16. We introduce domain-specific techniques to induce both abnormal termination of the compiler (crash bugs) and miscompilation (semantic bugs). We apply these techniques to (1) the opensource P4 compiler (P4C) infrastructure, which serves as a common base for different P4 back ends; (2) the P4 back end for the P4 reference software switch; and (3) the P4 back end for the Barefoot Tofino switch. Across the 3 platforms, over 8 months of bug finding, our tool Gauntlet detected 96 new and distinct bugs (62 crash and 34 semantic), which we confirmed with the respective compiler developers. 54 have been fixed (31 crash and 23 semantic); the remaining have been assigned to a developer. Our bug-finding efforts also led to 6 P4 specification changes. We have open sourced Gauntlet at p4gauntlet.github.io and it now runs within P4C’s continuous integration pipeline.}
}

@inproceedings{wang-hotcloud2020,
 author = {Tao Wang and Hang Zhu and Fabian Ruffy and Xin Jin and Anirudh Sivaraman
  and Dan Ports and Aurojit Panda},
 title = {Multitenancy for Fast and Programmable Networks in the Cloud},
 booktitle = {12th {USENIX} Workshop on Hot Topics in Cloud Computing (HotCloud 20)},
 year = {2020},
 address = {Virtual Conference},
 publisher = {{USENIX} Association},
 abstract = {
  Fast and programmable network devices are now readily available, both in the form of programmable switches and smart network-interface cards. Going forward, we envision that these devices will be widely deployed in the networks of cloud providers (e.g., AWS, Azure, and GCP) and exposed as a programmable surface for cloud customers—similar to how cloud customers can today rent CPUs, GPUs, FPGAs, and ML accelerators. Making this vision a reality requires us to develop a mechanism to share the resources of a programmable network device across multiple cloud tenants. In other words, we need to provide multitenancy on these devices. In this position paper, we design compile and run-time approaches to multitenancy. We present preliminary results showing that our design provides both efficient resource utilization and isolation of tenant programs from each other.}
}

@inproceedings{ruffy-arxiv2020,
 title={The State of Knowledge Distillation for Classification},
 author={Fabian Ruffy and Karanbir Chahal},
 journal={arXiv preprint arXiv:1912.10850},
 year={2019},
 abstract = {
  We survey various knowledge distillation (KD) strategies for simple classification tasks and implement a set of techniques that claim state-of-the-art accuracy. Our experiments using standardized model architectures, fixed compute budgets, and consistent training schedules indicate that many of these distillation results are hard to reproduce. This is especially apparent with methods using some form of feature distillation. Further examination reveals a lack of generalizability where these techniques may only succeed for specific architectures and training settings. We observe that appropriately tuned classical distillation in combination with a data augmentation training scheme gives an orthogonal improvement over other techniques. We validate this approach and open-source our code.}
}


@inproceedings{mustard-hotcloud2019,
 author = {Craig Mustard and Fabian Ruffy and Anny Gakhokidze and Ivan Beschastnikh and Alexandra Fedorova},
 title = {Jumpgate: In-Network Processing as a Service for Data Analytics},
 booktitle = {11th {USENIX} Workshop on Hot Topics in Cloud Computing (HotCloud 19)},
 year = {2019},
 address = {Renton, WA},
 publisher = {{USENIX} Association},
 abstract = {
  In-network processing, where data is processed by special-purpose devices as it passes overthe network, is showing great promise at improving application performance, in particular for data analytics tasks. However, analytics and in-network processing are not yet integrated and widely deployed. This paper presents a vision for providing in-network processing as a service to data analytics frameworks, and outlines benefits, remaining challenges, and our current research directions towards realizing this vision.}
}


@article{
ruffy-nips2018,
 title={Iroko: A Framework to Prototype Reinforcement Learning for Data Center
 Traffic Control},
 author={Ruffy, Fabian and Przystupa, Michael and Beschastnikh, Ivan},
 booktitle={Neural Information Processing Systems 2018},
 year={2018},
 abstract = {
  Recent networking research has identified that data-driven congestion control (CC) can be more efficient than traditional CC in TCP. Deep reinforcement learning (RL), in particular, has the potential to learn optimal network policies. However, RL suffers from instability and over-fitting, deficiencies which so far render it unacceptable for use in datacenter networks. In this paper, we analyze the requirements for RL to succeed in the datacenter context. We present a new emulator, Iroko, which we developed to support different network topologies, congestion control algorithms, and deployment scenarios. Iroko interfaces with the OpenAI gym toolkit, which allows for fast and fair evaluation of different RL and traditional CC algorithms under the same conditions. We present initial benchmarks on three deep RL algorithms compared to TCP New Vegas and DCTCP. Our results show that these algorithms are able to learn a CC policy which exceeds the performance of TCP New Vegas on a dumbbell and fat-tree topology. We make our emulator open-source and publicly available: https://github.com/dcgym/iroko.}
}

@article{
tu-lpc2018,
 title={Linux Network Programming with P4},
 author={Tu, William and Ruffy, Fabian and Budiu, Mihai},
 booktitle={Linux Plumbers Conference 2018},
 year={2018},
 abstract = {
  P4 is a domain-specific language for implementing network data-planes. The P4 abstraction allows programmers to write network protocols in a generalized fashion, without needing to know the configuration specifics of the targeted data-plane. The extended Berkeley Packet Filter (eBPF) is a safe virtual machine for executing sand-boxed programs in the Linux kernel. eBPF, and its extension the eXpress Data Path (XDP), effectively serve as programmable data-planes of the kernel. P4C-XDP is a project combining the performance of XDP with the generality and usability of P4. In this document, we describe how P4 can be translated into eBPF/XDP. We review the fundamental limitations of both technologies, analyze the performance of several generated XDP programs, and discuss problems we have faced while working on this new technology.}
}

@inproceedings{kodirov-ancs2018,
 title={VNF chain allocation and management at data center scale},
 author={Kodirov, Nodir and Bayless, Sam and Ruffy, Fabian and Beschastnikh, Ivan and Hoos, Holger H and Hu, Alan J},
 booktitle={Proceedings of the 2018 Symposium on Architectures for Networking
  and Communications Systems},
 pages={125--140},
 year={2018},
 organization={ACM},
 abstract = {
  Recent advances in network function virtualization have prompted the research community to consider data-center-scale deployments. However, existing tools, such as E2 and SOL, limit VNF chain allocation to rack-scale and provide limited support for management of allocated chains. We define a narrow API to let data center tenants and operators allocate and manage arbitrary VNF chain topologies, and we introduce NetPack, a new stochastic placement algorithm, to implement this API at data-center-scale. We prototyped the resulting system, dubbed Daisy, using the Sonata platform. In data-center-scale simulations on realistic scenarios and topologies that are orders of magnitude larger than prior work, we achieve in all cases an allocation density within 96% of a recently introduced, theoretically complete, constraint-solver-based placement engine, while being 82x faster on average. In detailed emulation with real packet traces, we find that Daisy performs each of our six API calls with at most one second of throughput drop.}
}

@inproceedings{kodirov-poster-ancs2018,
 title={VNF chain abstraction for cloud service providers},
 author={Kodirov, Nodir and Bayless, Sam and Ruffy, Fabian and Beschastnikh, Ivan and Hoos, Holger H and Hu, Alan J},
 booktitle={Proceedings of the 2018 Symposium on Architectures for Networking
  and Communications Systems},
 pages={165--166},
 year={2018},
 organization={ACM},
 abstract = {
  We propose a VNF chain abstraction to decouple a tenant's view of the VNF chain from the cloud provider's implementation. We motivate the benefits of such an abstraction for the cloud provider as well as the tenants, and outline the challenges a cloud provider needs to address to make the chain abstraction practical. We describe the design requirements and report on our initial prototype.}
}

@inproceedings{ruffy-icn2016,
 title={A STRIDE-based Security Architecture for Software-Defined Networking},
 author={Ruffy, Fabian and Hommel, Wolfgang and von Eye, Felix},
 booktitle={Proceedings of the Fifteenth International Conference on Networks},
 pages={95--101},
 year={2016},
 organization={IARIA},
 abstract = {While the novelty of Software-Defined Networking (SDN) - the separation of network control and data planes - is appealing and simple enough
  to foster massive vendor support, the resulting impact on the security of communication networks infrastructures and their management may be tremendous. The paradigm change affects the entire networking architecture. It involves new IP-based management communication protocols, and introduces newly engineered, potentially immature and vulnerable implementations in both network components and SDN controllers. In this paper, the well-known STRIDE threat model is applied to the generic SDN concepts as a basis for the design of a secure SDN architecture. The key elements are presented in detail along with a discussion of potentially fundamental security flaws in the current SDN concepts.}
}